npm - @johntalton/json-tokenizer - Versions diffs - 1.0.0 - Mend

@johntalton/json-tokenizer 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 John
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

package/README.md ADDED Viewed

@@ -0,0 +1,101 @@
+# JSON Tokenizer
+Generator function that tokenizes string based on JSON format.
+- Uses Generator based API
+- Produces tokens for all input text (including error tokens)
+- Uses `Intl.Segmenter` with `'grapheme'` granularity
+- Segmenter locale can be custom set (default: `'en-US'`)
+- Allows for `AbortSignal` to control termination
+- Best effort to match `JSON.parse` restriction
+## Example
+Basic initialization and iteration:
+```js
+import { JSONTokenizer } from '@johntalton/json-tokenizer'
+const signal = AbortSignal.timeout(100)
+const text = '{ }'
+for(const token of JSONTokenizer.tokenize(text, { signal })) {
+  const { type, value } = token
+  // ...
+}
+```
+The following shows the Token-Stream from a valid JSON text
+```js
+import { JSONTokenizer } from '@johntalton/json-tokenizer'
+const text = JSON.stringify({
+  team: 'Mystery Inc',
+  members: [ 'Fred', 'Daphne', 'Velma', 'Shaggy', 'Scooby' ],
+  aired: 1969
+}, undefined, '\t')
+const stream = JSONTokenizer.tokenize(text)
+for(const token of stream) {
+  console.log(token)
+}
+/*
+{ type: 'object-open', value: '{' }
+{ type: 'whitespace', value: '\n\t' }
+{ type: 'open-key-quote', value: '"' }
+{ type: 'key', value: 'team' }
+{ type: 'close-key-quote', value: '"' }
+{ type: 'colon', value: ':' }
+{ type: 'whitespace', value: ' ' }
+{ type: 'open-string-quote', value: '"' }
+{ type: 'string', value: 'Mystery Inc' }
+{ type: 'close-string-quote', value: '"' }
+{ type: 'object-member-comma', value: ',' }
+{ type: 'whitespace', value: '\n\t' }
+{ type: 'open-key-quote', value: '"' }
+{ type: 'key', value: 'members' }
+{ type: 'close-key-quote', value: '"' }
+{ type: 'colon', value: ':' }
+{ type: 'whitespace', value: ' ' }
+{ type: 'array-open', value: '[' }
+{ type: 'whitespace', value: '\n\t\t' }
+{ type: 'open-string-quote', value: '"' }
+{ type: 'string', value: 'Fred' }
+{ type: 'close-string-quote', value: '"' }
+{ type: 'array-element-comma', value: ',' }
+{ type: 'whitespace', value: '\n\t\t' }
+{ type: 'open-string-quote', value: '"' }
+{ type: 'string', value: 'Daphne' }
+{ type: 'close-string-quote', value: '"' }
+{ type: 'array-element-comma', value: ',' }
+{ type: 'whitespace', value: '\n\t\t' }
+{ type: 'open-string-quote', value: '"' }
+{ type: 'string', value: 'Velma' }
+{ type: 'close-string-quote', value: '"' }
+{ type: 'array-element-comma', value: ',' }
+{ type: 'whitespace', value: '\n\t\t' }
+{ type: 'open-string-quote', value: '"' }
+{ type: 'string', value: 'Shaggy' }
+{ type: 'close-string-quote', value: '"' }
+{ type: 'array-element-comma', value: ',' }
+{ type: 'whitespace', value: '\n\t\t' }
+{ type: 'open-string-quote', value: '"' }
+{ type: 'string', value: 'Scooby' }
+{ type: 'close-string-quote', value: '"' }
+{ type: 'whitespace', value: '\n\t' }
+{ type: 'array-close', value: ']' }
+{ type: 'object-member-comma', value: ',' }
+{ type: 'whitespace', value: '\n\t' }
+{ type: 'open-key-quote', value: '"' }
+{ type: 'key', value: 'aired' }
+{ type: 'close-key-quote', value: '"' }
+{ type: 'colon', value: ':' }
+{ type: 'whitespace', value: ' ' }
+{ type: 'number', value: '1969' }
+{ type: 'whitespace', value: '\n' }
+{ type: 'object-close', value: '}' }
+{ type: 'eof', value: '' }
+*/
+```

package/package.json ADDED Viewed

@@ -0,0 +1,15 @@
+{
+  "name": "@johntalton/json-tokenizer",
+  "type": "module",
+  "version": "1.0.0",
+  "license": "MIT",
+  "exports": {
+    ".": "./src/index.js"
+  },
+  "files": [
+    "src/*.js"
+  ],
+  "repository": {
+    "url": "git+https://github.com/johntalton/json-tokenizer.git"
+  }
+}

package/src/index.js ADDED Viewed

@@ -0,0 +1,799 @@
+/**
+ * @typedef {Object} TokenizerOptions
+ * @property {string|undefined} [locale = 'en-US']
+ * @property {AbortSignal|undefined} [signal]
+ * @property {boolean|undefined} [debug]
+ */
+export const TOKEN = {
+	ERROR: 'error',
+	EOF: 'eof',
+	WHITESPACE: 'whitespace',
+	// Array / Elements
+	ARRAY_OPEN: 'array-open',
+	ARRAY_CLOSE: 'array-close',
+	ARRAY_CLOSE_IMMEDIATE: 'array-close-immediate',
+	ARRAY_ELEMENT_COMMA: 'array-element-comma',
+	// Object
+	OBJECT_OPEN: 'object-open',
+	OBJECT_CLOSE: 'object-close',
+	OBJECT_CLOSE_IMMEDIATE: 'object-close-immediate',
+	OBJECT_MEMBER_COMMA: 'object-member-comma',
+	OBJECT_KEY_OPEN: 'open-key-quote',
+	OBJECT_KEY: 'key',
+	OBJECT_KEY_CLOSE: 'close-key-quote',
+	OBJECT_COLON: 'colon',
+	// String
+	STRING_OPEN: 'open-string-quote',
+	STRING: 'string',
+	STRING_CLOSE: 'close-string-quote',
+	// Primitives
+	TRUE: 'true',
+	FALSE: 'false',
+	NULL: 'null',
+	// Number
+	NUMBER: 'number'
+}
+export const EMPTY = ''
+export const STATE = {
+	ELEMENTS: 'es',
+	ELEMENT: 'e',
+	ELEMENT_AFTER: 'e_',
+	VALUE: 'v',
+	WS: 'w',
+	OBJ_OPEN: 'oo',
+	OBJ_CLOSE: 'oc',
+	ARY_OPEN: 'ao',
+	ARY_CLOSE: 'ac',
+	MEMBERS: 'ms',
+	MEMBER: 'm',
+	KEY: 'key',
+	MEMBER_KEY_AFTER: 'mka',
+	STR: 'str',
+	COLON: ':',
+	NULL: 'null',
+	TRUE: 'true',
+	FALSE: 'false',
+	NUMBER: 'num',
+	NUMBER_INT: 'numi',
+	NUMBER_INT19: 'num19',
+	NUMBER_INT_AFTER: 'num_',
+	NUMBER_EXPONENT_SIGN: 'esign',
+	NUMBER_EXPONENT: 'exp',
+	NUMBER_EXPONENT_FIRST: 'expf',
+	NUMBER_DECIMAL_FIRST: 'numdf',
+	NUMBER_DECIMAL: 'numd',
+	U_HEX4: 'hex'
+}
+export const ESCAPE_CHARS = [
+	'"', '\\', '/', 'b', 'f', 'n', 'r', 't', 'u'
+]
+export const DEFAULT_LOCALE = 'en-US'
+export class JSONTokenizer {
+	/**
+	 * @param {string} str
+	 */
+	static isValueEscapeChar(str) {
+		return ESCAPE_CHARS.includes(str)
+	}
+	/**
+	 * @param {string} str
+	*/
+	static isValidChar(str) {
+		// 0020 - 10FFFF
+		const codePoint = str.codePointAt(0)
+		if(codePoint === undefined) { return false }
+		if(codePoint < 0x0020) { return false }
+		if(codePoint > 0x10FFFF) { return false }
+		return true
+	}
+	/**
+	 * @param {string} str
+	 * @param {TokenizerOptions} [options]
+	*/
+	static *tokenize(str, options) {
+		const debug = (options?.debug ?? false) === true
+		const signal = options?.signal
+		const locale = options?.locale ?? DEFAULT_LOCALE
+		const seg = new Intl.Segmenter(locale, { granularity: 'grapheme' })
+		const segments = seg.segment(str)
+		using segmentIter = segments[Symbol.iterator]()
+		/** @type {Array<string>} */
+		const stack = []
+		/** @type {string|undefined} */
+		let state = STATE.ELEMENT
+		let next = segmentIter.next()
+		let accumulator = EMPTY
+		//
+		if(next.done) {
+			yield { type: TOKEN.ERROR, value: EMPTY }
+			return
+		}
+		//
+		while(true) {
+			if(signal?.aborted) {
+				//
+				break
+			}
+			//
+			if(next.done) {
+				if(accumulator !== EMPTY) { yield { type: TOKEN.ERROR, value: accumulator } }
+				if(stack.length !== 0) { yield { type: TOKEN.ERROR, value: EMPTY }}
+				yield { type: TOKEN.EOF, value: EMPTY }
+				break
+			}
+			//
+			if(state === undefined) {
+				if(accumulator !== EMPTY) { yield { type: TOKEN.ERROR, value: accumulator } }
+				if(!next.done) {
+					accumulator = EMPTY
+					while(!next.done) {
+						accumulator += next.value.segment
+						next = segmentIter.next()
+					}
+					yield { type: TOKEN.ERROR, value: accumulator }
+				}
+				yield { type: TOKEN.EOF, value: EMPTY }
+				break
+			}
+			//
+			if(debug) {
+				console.log({
+					seg: next.value.segment,
+					state, stack:
+					stack.join(','),
+					accumulator
+				})
+			}
+			//
+			switch(state) {
+				case STATE.ELEMENTS:
+					switch(next.value.segment) {
+						case ']':
+							state = stack.pop()
+							break
+						case ',':
+							yield { type: TOKEN.ARRAY_ELEMENT_COMMA, value: next.value.segment }
+							stack.push(STATE.ELEMENTS)
+							state = STATE.ELEMENT
+							next = segmentIter.next()
+							break
+						default:
+							stack.push(STATE.ELEMENTS)
+							state = STATE.ELEMENT
+							break
+					}
+					break
+				case STATE.ELEMENT:
+					switch(next.value.segment) {
+						case ' ': case '\r': case '\n': case '\t':
+							stack.push(STATE.ELEMENT)
+							state = STATE.WS
+							break
+						default:
+							stack.push(STATE.ELEMENT_AFTER)
+							state = STATE.VALUE
+							break
+					}
+					break
+				case STATE.ELEMENT_AFTER:
+					switch(next.value.segment) {
+						case ' ': case '\r': case '\n': case '\t':
+							stack.push(STATE.ELEMENT_AFTER)
+							state = STATE.WS
+							break
+						case '}':
+							state = stack.pop()
+							break
+						case ']':
+							state = stack.pop()
+							break
+						case ',':
+							state = stack.pop()
+							break
+						default:
+							yield { type: TOKEN.ERROR, value: next.value.segment }
+							next = segmentIter.next()
+							break
+					}
+					break
+				case STATE.WS:
+					switch(next.value.segment) {
+						case ' ': case '\r': case '\n': case '\t':
+							accumulator += next.value.segment
+							next = segmentIter.next()
+							if(next.done) {
+								yield { type: TOKEN.WHITESPACE, value: accumulator }
+								accumulator = EMPTY
+							}
+							break
+						default:
+							yield { type: TOKEN.WHITESPACE, value: accumulator }
+							accumulator = EMPTY
+							state = stack.pop()
+							break
+					}
+					break
+				case STATE.VALUE:
+					switch(next.value.segment) {
+						case '{':
+							yield { type: TOKEN.OBJECT_OPEN, value: next.value.segment }
+							state = STATE.OBJ_OPEN
+							next = segmentIter.next()
+							break
+						case '[':
+							yield { type: TOKEN.ARRAY_OPEN, value: next.value.segment }
+							state = STATE.ARY_OPEN
+							next = segmentIter.next()
+							break
+						case '"':
+							yield { type: TOKEN.STRING_OPEN, value: next.value.segment }
+							accumulator = EMPTY
+							state = STATE.STR
+							next = segmentIter.next()
+							break
+						case 't':
+							accumulator = 't'
+							next = segmentIter.next()
+							if(next.done || next.value.segment !== 'r') {
+								yield { type: TOKEN.ERROR, value: accumulator }
+								accumulator = EMPTY
+								state = stack.pop()
+								break
+							}
+							accumulator += next.value?.segment
+							next = segmentIter.next()
+							if(next.done || next.value.segment !== 'u') {
+								yield { type: TOKEN.ERROR, value: accumulator }
+								accumulator = EMPTY
+								state = stack.pop()
+								break
+							}
+							accumulator += next.value?.segment
+							next = segmentIter.next()
+							if(next.done || next.value.segment !== 'e') {
+								yield { type: TOKEN.ERROR, value: accumulator }
+								accumulator = EMPTY
+								state = stack.pop()
+								break
+							}
+							accumulator += next.value?.segment
+							yield { type: TOKEN.TRUE, value: accumulator }
+							accumulator = EMPTY
+							next = segmentIter.next()
+							state = stack.pop()
+							break
+						case 'f':
+							accumulator = 'f'
+							next = segmentIter.next()
+							if(next.done || next.value.segment !== 'a') {
+								yield { type: TOKEN.ERROR, value: accumulator }
+								accumulator = EMPTY
+								state = stack.pop()
+								break
+							}
+							accumulator += next.value?.segment
+							next = segmentIter.next()
+							if(next.done || next.value.segment !== 'l') {
+								yield { type: TOKEN.ERROR, value: accumulator }
+								accumulator = EMPTY
+								state = stack.pop()
+								break
+							}
+							accumulator += next.value?.segment
+							next = segmentIter.next()
+							if(next.done || next.value.segment !== 's') {
+								yield { type: TOKEN.ERROR, value: accumulator }
+								accumulator = EMPTY
+								state = stack.pop()
+								break
+							}
+							accumulator += next.value?.segment
+							next = segmentIter.next()
+							if(next.done || next.value.segment !== 'e') {
+								yield { type: TOKEN.ERROR, value: accumulator }
+								accumulator = EMPTY
+								state = stack.pop()
+								break
+							}
+							accumulator += next.value?.segment
+							yield { type: TOKEN.FALSE, value: accumulator }
+							accumulator = EMPTY
+							next = segmentIter.next()
+							state = stack.pop()
+							break
+						case 'n':
+							accumulator = 'n'
+							next = segmentIter.next()
+							if(next.done || next.value.segment !== 'u') {
+								yield { type: TOKEN.ERROR, value: accumulator }
+								accumulator = EMPTY
+								state = stack.pop()
+								break
+							}
+							accumulator += next.value?.segment
+							next = segmentIter.next()
+							if(next.done || next.value.segment !== 'l') {
+								yield { type: TOKEN.ERROR, value: accumulator }
+								accumulator = EMPTY
+								state = stack.pop()
+								break
+							}
+							accumulator += next.value?.segment
+							next = segmentIter.next()
+							if(next.done || next.value.segment !== 'l') {
+								yield { type: TOKEN.ERROR, value: accumulator }
+								accumulator = EMPTY
+								state = stack.pop()
+								break
+							}
+							accumulator += next.value?.segment
+							yield { type: TOKEN.NULL, value: accumulator }
+							accumulator = EMPTY
+							next = segmentIter.next()
+							state = stack.pop()
+							break
+						case '-':
+							accumulator = next.value.segment
+							state = STATE.NUMBER
+							next = segmentIter.next()
+							break
+						case '0':
+							accumulator = next.value.segment
+							state = STATE.NUMBER_INT_AFTER
+							next = segmentIter.next()
+							break
+						case '1': case '2': case '3':
+						case '4': case '5': case '6':
+						case '7': case '8': case '9':
+							accumulator = EMPTY
+							state = STATE.NUMBER_INT
+							break
+						default:
+							//
+							yield { type: TOKEN.ERROR, value: next.value.segment }
+							next = segmentIter.next()
+							break
+					}
+					break
+				case STATE.NUMBER:
+					switch(next.value.segment) {
+						case '0':
+							accumulator += next.value.segment
+							state = STATE.NUMBER_INT_AFTER
+							next = segmentIter.next()
+							break
+						default:
+							state = STATE.NUMBER_INT19
+							break
+					}
+					break
+				case STATE.NUMBER_INT19:
+					switch(next.value.segment) {
+						case '1': case '2': case '3':
+						case '4': case '5': case '6':
+						case '7': case '8': case '9':
+							accumulator += next.value.segment
+							state = STATE.NUMBER_INT
+							next = segmentIter.next()
+							break
+						default:
+							accumulator += next.value.segment
+							yield { type: TOKEN.ERROR, value: accumulator }
+							accumulator = EMPTY
+							next = segmentIter.next()
+							break
+					}
+					break
+				case STATE.NUMBER_INT:
+					switch(next.value.segment) {
+						case '0':
+						case '1': case '2': case '3':
+						case '4': case '5': case '6':
+						case '7': case '8': case '9':
+							accumulator += next.value.segment
+							next = segmentIter.next()
+							if(next.done) {
+								yield { type: TOKEN.NUMBER, value: accumulator }
+								accumulator = EMPTY
+							}
+							break
+						default:
+							state = STATE.NUMBER_INT_AFTER
+							break
+					}
+					break
+				case STATE.NUMBER_INT_AFTER:
+					switch(next.value.segment) {
+						case '.':
+							accumulator += next.value.segment
+							next = segmentIter.next()
+							state = STATE.NUMBER_DECIMAL_FIRST
+							break
+						case 'e':
+						case 'E':
+							accumulator += next.value.segment
+							state = STATE.NUMBER_EXPONENT_SIGN
+							next = segmentIter.next()
+							break
+						default:
+							yield { type: TOKEN.NUMBER, value: accumulator }
+							accumulator = EMPTY
+							state = stack.pop()
+							break
+					}
+ 					break
+				case STATE.NUMBER_DECIMAL_FIRST:
+					switch(next.value.segment) {
+						case '0':
+						case '1': case '2': case '3':
+						case '4': case '5': case '6':
+						case '7': case '8': case '9':
+							accumulator += next.value.segment
+							next = segmentIter.next()
+							state = STATE.NUMBER_DECIMAL
+							if(next.done) {
+								yield { type: TOKEN.NUMBER, value: accumulator }
+								accumulator = EMPTY
+							}
+							break
+						default:
+							accumulator += next.value.segment
+							yield { type: TOKEN.ERROR, value: accumulator}
+							accumulator = EMPTY
+							next = segmentIter.next()
+							break
+					}
+					break
+				case STATE.NUMBER_DECIMAL:
+					switch(next.value.segment) {
+						case '0':
+						case '1': case '2': case '3':
+						case '4': case '5': case '6':
+						case '7': case '8': case '9':
+							accumulator += next.value.segment
+							next = segmentIter.next()
+							if(next.done) {
+								yield { type: TOKEN.NUMBER, value: accumulator }
+								accumulator = EMPTY
+							}
+							break
+						case 'e':
+						case 'E':
+							accumulator += next.value.segment
+							state = STATE.NUMBER_EXPONENT_SIGN
+							next = segmentIter.next()
+							break
+						default:
+							yield { type: TOKEN.NUMBER, value: accumulator }
+							accumulator = EMPTY
+							state = stack.pop()
+							break
+					}
+					break
+				case STATE.NUMBER_EXPONENT_SIGN:
+					switch(next.value.segment) {
+						case '+':
+							accumulator += next.value.segment
+							next = segmentIter.next()
+							state = STATE.NUMBER_EXPONENT_FIRST
+							break
+						case '-':
+							accumulator += next.value.segment
+							next = segmentIter.next()
+							state = STATE.NUMBER_EXPONENT_FIRST
+							break
+						default:
+							state = STATE.NUMBER_EXPONENT_FIRST
+							break
+					}
+					break
+				case STATE.NUMBER_EXPONENT_FIRST:
+					switch(next.value.segment) {
+						case '0':
+						case '1': case '2': case '3':
+						case '4': case '5': case '6':
+						case '7': case '8': case '9':
+							accumulator += next.value.segment
+							next = segmentIter.next()
+							state = STATE.NUMBER_EXPONENT
+							break
+						default:
+							accumulator += next.value.segment
+							yield { type: TOKEN.ERROR, value: accumulator }
+							accumulator = EMPTY
+							next = segmentIter.next()
+							break
+					}
+					break
+				case STATE.NUMBER_EXPONENT:
+					switch(next.value.segment) {
+						case '0':
+						case '1': case '2': case '3':
+						case '4': case '5': case '6':
+						case '7': case '8': case '9':
+							accumulator += next.value.segment
+							next = segmentIter.next()
+							break
+						default:
+							yield { type: TOKEN.NUMBER, value: accumulator }
+							accumulator = EMPTY
+							state = stack.pop()
+							break
+					}
+					break
+				case STATE.OBJ_OPEN:
+					switch(next.value.segment) {
+						case ' ': case '\r': case '\n': case '\t':
+							stack.push(STATE.OBJ_OPEN)
+							state = STATE.WS
+							break
+						case '}':
+							yield { type: TOKEN.OBJECT_CLOSE_IMMEDIATE, value: next.value.segment }
+							next = segmentIter.next()
+							state = stack.pop()
+							break
+						default:
+							stack.push(STATE.OBJ_CLOSE)
+							state = STATE.MEMBERS
+							break
+					}
+					break
+				case STATE.OBJ_CLOSE:
+					switch(next.value.segment) {
+						case '}':
+							yield { type: TOKEN.OBJECT_CLOSE, value: next.value.segment }
+							next = segmentIter.next()
+							state = stack.pop()
+							break
+						default:
+							// todo
+							throw new Error('expecting object close')
+							break
+					}
+					break
+				case STATE.MEMBERS:
+					switch(next.value.segment){
+						case '}':
+							state = stack.pop()
+							break
+						case ',':
+							yield { type: TOKEN.OBJECT_MEMBER_COMMA, value: next.value.segment }
+							stack.push(STATE.MEMBERS)
+							state = STATE.MEMBER
+							next = segmentIter.next()
+							break
+						default:
+							stack.push(STATE.MEMBERS)
+							state = STATE.MEMBER
+							break
+					}
+					break
+				case STATE.MEMBER:
+					switch(next.value.segment) {
+						case ' ': case '\r': case '\n': case '\t':
+							stack.push(STATE.MEMBER)
+							state = STATE.WS
+							break
+						case '"':
+							yield { type: TOKEN.OBJECT_KEY_OPEN, value: next.value.segment }
+							stack.push(STATE.MEMBER_KEY_AFTER)
+							state = STATE.KEY
+							next = segmentIter.next()
+							break
+						default:
+							yield { type: TOKEN.ERROR, value: next.value.segment }
+							next = segmentIter.next()
+							break
+					}
+					break
+				case STATE.MEMBER_KEY_AFTER:
+					switch(next.value.segment) {
+						case ' ': case '\r': case '\n': case '\t':
+							stack.push(STATE.MEMBER_KEY_AFTER)
+							state = STATE.WS
+							break
+						case ':':
+							yield { type: TOKEN.OBJECT_COLON, value: next.value.segment }
+							next = segmentIter.next()
+							state = STATE.ELEMENT
+							break
+						default:
+							yield { type: TOKEN.ERROR, value: next.value.segment }
+							next = segmentIter.next()
+							break
+					}
+					break
+				case STATE.ARY_OPEN:
+					switch(next.value.segment) {
+						case ' ': case '\r': case '\n': case '\t':
+							stack.push(STATE.ARY_OPEN)
+							state = STATE.WS
+							break
+						case ']':
+							yield { type: TOKEN.ARRAY_CLOSE_IMMEDIATE, value: next.value.segment }
+							next = segmentIter.next()
+							state = stack.pop()
+							break
+						case ',':
+							yield { type: TOKEN.ERROR, value: next.value.segment}
+							next = segmentIter.next()
+							break
+						default:
+							stack.push(STATE.ARY_CLOSE)
+							state = STATE.ELEMENTS
+							break
+					}
+					break
+				case STATE.ARY_CLOSE:
+					switch(next.value.segment) {
+						case ']':
+							yield { type: TOKEN.ARRAY_CLOSE, value: next.value.segment }
+							next = segmentIter.next()
+							state = stack.pop()
+							break
+						default:
+							// todo
+							throw new Error('expecting array close')
+							break
+					}
+					break
+				case STATE.KEY:
+				case STATE.STR:
+					switch(next.value.segment) {
+						case '"':
+							if(state === STATE.KEY) {
+								yield { type: TOKEN.OBJECT_KEY, value: accumulator }
+								yield { type: TOKEN.OBJECT_KEY_CLOSE, value: next.value.segment }
+							}
+							else {
+								yield { type: TOKEN.STRING, value: accumulator }
+								yield { type: TOKEN.STRING_CLOSE, value: next.value.segment }
+							}
+							accumulator = EMPTY
+							next = segmentIter.next()
+							state = stack.pop()
+							break
+						case '\\':
+							accumulator += next.value.segment
+							next = segmentIter.next()
+							if(next.done) {
+								yield { type: TOKEN.ERROR, value: accumulator }
+								accumulator = EMPTY
+								state = stack.pop()
+								break
+							}
+							accumulator += next.value.segment
+							if(!JSONTokenizer.isValueEscapeChar(next.value?.segment)) {
+								yield { type: TOKEN.ERROR, value: accumulator }
+								accumulator = EMPTY
+							}
+							if(next.value.segment === 'u') {
+								stack.push(state)
+								state = STATE.U_HEX4
+							}
+							next = segmentIter.next()
+							break
+						default:
+							accumulator += next.value.segment
+							if(!JSONTokenizer.isValidChar(next.value.segment)) {
+								yield { type: TOKEN.ERROR, value: accumulator }
+								accumulator = EMPTY
+							}
+							next = segmentIter.next()
+							break
+					}
+					break
+				case STATE.U_HEX4:
+					switch(next.value.segment) {
+						case '0':
+						case '1': case '2': case '3':
+						case '4': case '5': case '6':
+						case '7': case '8': case '9':
+						case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+						case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+							accumulator += next.value.segment
+							// second
+							next = segmentIter.next()
+							if(next.done) {
+								yield { type: TOKEN.ERROR, value: accumulator }
+								accumulator = EMPTY
+								state = stack.pop()
+								break
+							}
+							accumulator += next.value?.segment
+							// third
+							next = segmentIter.next()
+							if(next.done) {
+								yield { type: TOKEN.ERROR, value: accumulator }
+								accumulator = EMPTY
+								state = stack.pop()
+								break
+							}
+							accumulator += next.value?.segment
+							// fourth
+							next = segmentIter.next()
+							if(next.done) {
+								yield { type: TOKEN.ERROR, value: accumulator }
+								accumulator = EMPTY
+								state = stack.pop()
+								break
+							}
+							accumulator += next.value?.segment
+							state = stack.pop()
+							next = segmentIter.next()
+							break
+						default:
+							accumulator += next.value.segment
+							yield { type: TOKEN.ERROR, value: accumulator }
+							accumulator = EMPTY
+							next = segmentIter.next()
+							break
+					}
+					break
+				default:
+					// todo
+					throw new Error(`unknown state ${state}`)
+			}
+		}
+	}
+}