npm - @johntalton/json-tokenizer - Versions diffs - 1.0.1 → 1.1.0 - Mend

@johntalton/json-tokenizer 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -4,10 +4,9 @@ Generator function that tokenizes string based on JSON format.
 - Uses Generator based API
 - Produces tokens for all input text (including error tokens)
-- Uses `Intl.Segmenter` with `'grapheme'` granularity
-- Segmenter locale can be custom set (default: `'en-US'`)
 - Allows for `AbortSignal` to control termination
 - Best effort to match `JSON.parse` restriction
+- Start and End position for errors
 ## Example
@@ -42,60 +41,60 @@ for(const token of stream) {
   console.log(token)
 }
 /*
-{ type: 'object-open', value: '{' }
-{ type: 'whitespace', value: '\n\t' }
-{ type: 'open-key-quote', value: '"' }
-{ type: 'key', value: 'team' }
-{ type: 'close-key-quote', value: '"' }
-{ type: 'colon', value: ':' }
-{ type: 'whitespace', value: ' ' }
-{ type: 'open-string-quote', value: '"' }
-{ type: 'string', value: 'Mystery Inc' }
-{ type: 'close-string-quote', value: '"' }
-{ type: 'object-member-comma', value: ',' }
-{ type: 'whitespace', value: '\n\t' }
-{ type: 'open-key-quote', value: '"' }
-{ type: 'key', value: 'members' }
-{ type: 'close-key-quote', value: '"' }
-{ type: 'colon', value: ':' }
-{ type: 'whitespace', value: ' ' }
-{ type: 'array-open', value: '[' }
-{ type: 'whitespace', value: '\n\t\t' }
-{ type: 'open-string-quote', value: '"' }
-{ type: 'string', value: 'Fred' }
-{ type: 'close-string-quote', value: '"' }
-{ type: 'array-element-comma', value: ',' }
-{ type: 'whitespace', value: '\n\t\t' }
-{ type: 'open-string-quote', value: '"' }
-{ type: 'string', value: 'Daphne' }
-{ type: 'close-string-quote', value: '"' }
-{ type: 'array-element-comma', value: ',' }
-{ type: 'whitespace', value: '\n\t\t' }
-{ type: 'open-string-quote', value: '"' }
-{ type: 'string', value: 'Velma' }
-{ type: 'close-string-quote', value: '"' }
-{ type: 'array-element-comma', value: ',' }
-{ type: 'whitespace', value: '\n\t\t' }
-{ type: 'open-string-quote', value: '"' }
-{ type: 'string', value: 'Shaggy' }
-{ type: 'close-string-quote', value: '"' }
-{ type: 'array-element-comma', value: ',' }
-{ type: 'whitespace', value: '\n\t\t' }
-{ type: 'open-string-quote', value: '"' }
-{ type: 'string', value: 'Scooby' }
-{ type: 'close-string-quote', value: '"' }
-{ type: 'whitespace', value: '\n\t' }
-{ type: 'array-close', value: ']' }
-{ type: 'object-member-comma', value: ',' }
-{ type: 'whitespace', value: '\n\t' }
-{ type: 'open-key-quote', value: '"' }
-{ type: 'key', value: 'aired' }
-{ type: 'close-key-quote', value: '"' }
-{ type: 'colon', value: ':' }
-{ type: 'whitespace', value: ' ' }
-{ type: 'number', value: '1969' }
-{ type: 'whitespace', value: '\n' }
-{ type: 'object-close', value: '}' }
-{ type: 'eof', value: '' }
+{ type: 'object-open', value: '{', start: 0, end: 0 }
+{ type: 'whitespace', value: '\n\t', start: 1, end: 2 }
+{ type: 'open-key-quote', value: '"', start: 3, end: 3 }
+{ type: 'key', value: 'team', start: 4, end: 7 }
+{ type: 'close-key-quote', value: '"', start: 8, end: 8 }
+{ type: 'colon', value: ':', start: 9, end: 9 }
+{ type: 'whitespace', value: ' ', start: 10, end: 10 }
+{ type: 'open-string-quote', value: '"', start: 11, end: 11 }
+{ type: 'string', value: 'Mystery Inc', start: 12, end: 22 }
+{ type: 'close-string-quote', value: '"', start: 23, end: 23 }
+{ type: 'object-member-comma', value: ',', start: 24, end: 24 }
+{ type: 'whitespace', value: '\n\t', start: 25, end: 26 }
+{ type: 'open-key-quote', value: '"', start: 27, end: 27 }
+{ type: 'key', value: 'members', start: 28, end: 34 }
+{ type: 'close-key-quote', value: '"', start: 35, end: 35 }
+{ type: 'colon', value: ':', start: 36, end: 36 }
+{ type: 'whitespace', value: ' ', start: 37, end: 37 }
+{ type: 'array-open', value: '[', start: 38, end: 38 }
+{ type: 'whitespace', value: '\n\t\t', start: 39, end: 41 }
+{ type: 'open-string-quote', value: '"', start: 42, end: 42 }
+{ type: 'string', value: 'Fred', start: 43, end: 46 }
+{ type: 'close-string-quote', value: '"', start: 47, end: 47 }
+{ type: 'array-element-comma', value: ',', start: 48, end: 48 }
+{ type: 'whitespace', value: '\n\t\t', start: 49, end: 51 }
+{ type: 'open-string-quote', value: '"', start: 52, end: 52 }
+{ type: 'string', value: 'Daphne', start: 53, end: 58 }
+{ type: 'close-string-quote', value: '"', start: 59, end: 59 }
+{ type: 'array-element-comma', value: ',', start: 60, end: 60 }
+{ type: 'whitespace', value: '\n\t\t', start: 61, end: 63 }
+{ type: 'open-string-quote', value: '"', start: 64, end: 64 }
+{ type: 'string', value: 'Velma', start: 65, end: 69 }
+{ type: 'close-string-quote', value: '"', start: 70, end: 70 }
+{ type: 'array-element-comma', value: ',', start: 71, end: 71 }
+{ type: 'whitespace', value: '\n\t\t', start: 72, end: 74 }
+{ type: 'open-string-quote', value: '"', start: 75, end: 75 }
+{ type: 'string', value: 'Shaggy', start: 76, end: 81 }
+{ type: 'close-string-quote', value: '"', start: 82, end: 82 }
+{ type: 'array-element-comma', value: ',', start: 83, end: 83 }
+{ type: 'whitespace', value: '\n\t\t', start: 84, end: 86 }
+{ type: 'open-string-quote', value: '"', start: 87, end: 87 }
+{ type: 'string', value: 'Scooby', start: 88, end: 93 }
+{ type: 'close-string-quote', value: '"', start: 94, end: 94 }
+{ type: 'whitespace', value: '\n\t', start: 95, end: 96 }
+{ type: 'array-close', value: ']', start: 97, end: 97 }
+{ type: 'object-member-comma', value: ',', start: 98, end: 98 }
+{ type: 'whitespace', value: '\n\t', start: 99, end: 100 }
+{ type: 'open-key-quote', value: '"', start: 101, end: 101 }
+{ type: 'key', value: 'aired', start: 102, end: 106 }
+{ type: 'close-key-quote', value: '"', start: 107, end: 107 }
+{ type: 'colon', value: ':', start: 108, end: 108 }
+{ type: 'whitespace', value: ' ', start: 109, end: 109 }
+{ type: 'number', value: '1969', start: 110, end: 113 }
+{ type: 'whitespace', value: '\n', start: 114, end: 114 }
+{ type: 'object-close', value: '}', start: 115, end: 115 }
+{ type: 'eof', value: '', start: null, end: 116 }
 */
 ```

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@johntalton/json-tokenizer",
   "type": "module",
-  "version": "1.0.1",
+  "version": "1.1.0",
   "license": "MIT",
   "exports": {
     ".": "./src/index.js"

package/src/index.js CHANGED Viewed

@@ -1,44 +1,87 @@
 /**
  * @typedef {Object} TokenizerOptions
- * @property {string|undefined} [locale = 'en-US']
  * @property {AbortSignal|undefined} [signal]
  * @property {boolean|undefined} [debug]
  */
-export const TOKEN = {
-	ERROR: 'error',
-	EOF: 'eof',
-	WHITESPACE: 'whitespace',
+/**
+ * @typedef {Object} TokenBase
+ * @property {string} type
+ * @property {string} value
+ * @property {number|null} start
+ * @property {number|null} end
+ */
+/**
+ * @typedef {Object} ErrorTokenBase
+ * @property {'error'} type
+ * @property {string} cause
+ */
+/** @typedef {TokenBase & ErrorTokenBase} ErrorToken */
+/** @typedef {TokenBase & { type: 'eof' }} EOFToken */
+/** @typedef {TokenBase & { type: 'whitespace' }} WhitespaceToken */
+/** @typedef {TokenBase & { type: 'array-open' }} ArrayOpenToken */
+/** @typedef {TokenBase & { type: 'array-close' }} ArrayCloseToken */
+/** @typedef {TokenBase & { type: 'array-close-immediate' }} ArrayCloseImmediateToken */
+/** @typedef {TokenBase & { type: 'array-element-comma' }} ArrayCommaToken */
+/** @typedef {TokenBase & { type: 'object-open' }} ObjectOpenToken */
+/** @typedef {TokenBase & { type: 'object-close' }} ObjectCloseToken */
+/** @typedef {TokenBase & { type: 'object-close-immediate' }} ObjectCloseImmediateToken */
+/** @typedef {TokenBase & { type: 'object-member-comma' }} ObjectCommaToken */
+/** @typedef {TokenBase & { type: 'open-key-quote' }} OpenKeyQuoteToken */
+/** @typedef {TokenBase & { type: 'key' }} KeyToken */
+/** @typedef {TokenBase & { type: 'close-key-quote' }} CloseKeyQuoteToken */
+/** @typedef {TokenBase & { type: 'colon' }} ColonToken */
+/** @typedef {TokenBase & { type: 'open-string-quote' }} OpenStringQuoteToken */
+/** @typedef {TokenBase & { type: 'string' }} StringToken */
+/** @typedef {TokenBase & { type: 'close-string-quote' }} CloseStringQuoteToken */
+/** @typedef {TokenBase & { type: 'true' }} TrueToken */
+/** @typedef {TokenBase & { type: 'false' }} FalseToken */
+/** @typedef {TokenBase & { type: 'null' }} NULLToken */
+/** @typedef {TokenBase & { type: 'number' }} NumberToken */
+/** @typedef {EOFToken|WhitespaceToken|ErrorToken|ArrayOpenToken|ArrayCloseToken|ArrayCloseImmediateToken|ArrayCommaToken|ObjectOpenToken|ObjectCloseToken|ObjectCloseImmediateToken|ObjectCommaToken|OpenKeyQuoteToken|KeyToken|CloseKeyQuoteToken|ColonToken|OpenStringQuoteToken|StringToken|CloseStringQuoteToken|TrueToken|FalseToken|NULLToken|NumberToken} Token */
+/**
+ * @typedef {Object} AccumulationState
+ * @property {string} value
+ * @property {number|null} start
+ * @property {number|null} end
+ */
+export const TOKEN_ERROR = 'error'
+export const TOKEN_EOF = 'eof'
+export const TOKEN_WHITESPACE = 'whitespace'
 	// Array / Elements
-	ARRAY_OPEN: 'array-open',
-	ARRAY_CLOSE: 'array-close',
-	ARRAY_CLOSE_IMMEDIATE: 'array-close-immediate',
-	ARRAY_ELEMENT_COMMA: 'array-element-comma',
+export const TOKEN_ARRAY_OPEN = 'array-open'
+export const TOKEN_ARRAY_CLOSE = 'array-close'
+export const TOKEN_ARRAY_CLOSE_IMMEDIATE = 'array-close-immediate'
+export const TOKEN_ARRAY_ELEMENT_COMMA = 'array-element-comma'
 	// Object
-	OBJECT_OPEN: 'object-open',
-	OBJECT_CLOSE: 'object-close',
-	OBJECT_CLOSE_IMMEDIATE: 'object-close-immediate',
-	OBJECT_MEMBER_COMMA: 'object-member-comma',
-	OBJECT_KEY_OPEN: 'open-key-quote',
-	OBJECT_KEY: 'key',
-	OBJECT_KEY_CLOSE: 'close-key-quote',
-	OBJECT_COLON: 'colon',
+export const TOKEN_OBJECT_OPEN = 'object-open'
+export const TOKEN_OBJECT_CLOSE = 'object-close'
+export const TOKEN_OBJECT_CLOSE_IMMEDIATE = 'object-close-immediate'
+export const TOKEN_OBJECT_MEMBER_COMMA = 'object-member-comma'
+export const TOKEN_OBJECT_KEY_OPEN = 'open-key-quote'
+export const TOKEN_OBJECT_KEY = 'key'
+export const TOKEN_OBJECT_KEY_CLOSE = 'close-key-quote'
+export const TOKEN_OBJECT_COLON = 'colon'
 	// String
-	STRING_OPEN: 'open-string-quote',
-	STRING: 'string',
-	STRING_CLOSE: 'close-string-quote',
+export const TOKEN_STRING_OPEN = 'open-string-quote'
+export const TOKEN_STRING = 'string'
+export const TOKEN_STRING_CLOSE = 'close-string-quote'
 	// Primitives
-	TRUE: 'true',
-	FALSE: 'false',
-	NULL: 'null',
+export const TOKEN_TRUE = 'true'
+export const TOKEN_FALSE = 'false'
+export const TOKEN_NULL = 'null'
 	// Number
-	NUMBER: 'number'
-}
+export const TOKEN_NUMBER = 'number'
 export const EMPTY = ''
@@ -53,6 +96,7 @@ export const STATE = {
 	ARY_OPEN: 'ao',
 	ARY_CLOSE: 'ac',
 	MEMBERS: 'ms',
+	MEMBERS_CONTINUE: 'msc',
 	MEMBER: 'm',
 	KEY: 'key',
 	MEMBER_KEY_AFTER: 'mka',
@@ -73,11 +117,64 @@ export const STATE = {
 	U_HEX4: 'hex'
 }
+export const EXIT_STATES = [ STATE.ELEMENT_AFTER ]
 export const ESCAPE_CHARS = [
 	'"', '\\', '/', 'b', 'f', 'n', 'r', 't', 'u'
 ]
-export const DEFAULT_LOCALE = 'en-US'
+export const HEX_CHARS = [
+	'1', '2', '3', '4', '5', '6', '7', '8', '9', '0',
+	'a', 'b', 'c', 'd', 'e', 'f',
+	'A', 'B', 'C', 'D', 'E', 'F',
+]
+export class Accumulator {
+	/**
+	 * @param {AccumulationState} accumulationState
+	 */
+	static #reset(accumulationState) {
+		accumulationState.value = EMPTY
+		accumulationState.start = null
+		accumulationState.end = null
+	}
+	/**
+	 * @param {AccumulationState} accumulationState
+	 * @param {number} start
+	 */
+	static start(accumulationState, start) {
+		accumulationState.value = EMPTY
+		accumulationState.start = start
+		accumulationState.end = start
+	}
+	/**
+	 * @param {AccumulationState} accumulationState
+	 * @param {string} value
+	 * @param {number} end
+	 */
+	static accumulate(accumulationState, value, end) {
+		accumulationState.value += value
+		accumulationState.end = end
+	}
+	/**
+	 * @param {AccumulationState} accumulationState
+	 */
+	static end(accumulationState) {
+		const result = { ...accumulationState }
+		Accumulator.#reset(accumulationState)
+		return result
+	}
+	/**
+	 * @param {AccumulationState} accumulationState
+	 */
+	static empty(accumulationState) {
+		return accumulationState.value === EMPTY
+	}
+}
 export class JSONTokenizer {
 	/**
@@ -100,63 +197,107 @@ export class JSONTokenizer {
 		return true
 	}
+	/**
+	 * @param {string} str
+	 */
+	static isValidHEX(str) {
+		return HEX_CHARS.includes(str)
+	}
 	/**
 	 * @param {string} str
 	 * @param {TokenizerOptions} [options]
+	 * @return {Generator<Token, undefined, undefined>}
 	*/
 	static *tokenize(str, options) {
 		const debug = (options?.debug ?? false) === true
 		const signal = options?.signal
-		const locale = options?.locale ?? DEFAULT_LOCALE
-		const seg = new Intl.Segmenter(locale, { granularity: 'grapheme' })
-		const segments = seg.segment(str)
-		using segmentIter = segments[Symbol.iterator]()
+		using segmentIter = Iterator.from(str)
+			.map((item, index) => ({
+				segment: item,
+				done: false,
+				index
+			}))
 		/** @type {Array<string>} */
-		const stack = []
+		const stack = [ ]
 		/** @type {string|undefined} */
 		let state = STATE.ELEMENT
 		let next = segmentIter.next()
-		let accumulator = EMPTY
+		/** @type {AccumulationState} */
+		const accumulatorState = {
+			value: EMPTY,
+			start: null,
+			end: null
+		}
+		let line = 1
 		//
 		if(next.done) {
-			yield { type: TOKEN.ERROR, value: EMPTY }
+			yield { type: TOKEN_ERROR, value: EMPTY, start: 0, end: 0, cause: 'empty' }
 			return
 		}
 		//
 		while(true) {
 			if(signal?.aborted) {
-				//
+				yield { type: TOKEN_EOF, value: EMPTY, start: null, end: null }
 				break
 			}
 			//
 			if(next.done) {
-				if(accumulator !== EMPTY) { yield { type: TOKEN.ERROR, value: accumulator } }
-				if(stack.length !== 0) { yield { type: TOKEN.ERROR, value: EMPTY }}
-				yield { type: TOKEN.EOF, value: EMPTY }
+				if(state === undefined) {
+					yield { type: TOKEN_ERROR, value: EMPTY, start: null, end: null, cause: 'undefined state' }
+				}
+				//
+				if(state !== undefined && !EXIT_STATES.includes(state)) {
+					yield { type: TOKEN_ERROR, value: EMPTY, start: null, end: str.length, cause: `invalid exit state (${state})` }
+				}
+				//
+				if(!Accumulator.empty(accumulatorState)) {
+					yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'accumulator not empty (done)' }
+				}
+				//
+				if(stack.length !== 0) {
+					yield { type: TOKEN_ERROR, value: EMPTY, start: null, end: str.length, cause: 'stack not empty' }
+				}
+				//
+				yield { type: TOKEN_EOF, value: EMPTY, start: null, end: str.length }
 				break
 			}
 			//
 			if(state === undefined) {
-				if(accumulator !== EMPTY) { yield { type: TOKEN.ERROR, value: accumulator } }
+				//
+				if(!Accumulator.empty(accumulatorState)) {
+					yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'accumulator not empty (stack)' }
+				}
+				//
 				if(!next.done) {
-					accumulator = EMPTY
+					Accumulator.start(accumulatorState, next.value.index)
 					while(!next.done) {
-						accumulator += next.value.segment
+						if(signal?.aborted) { break }
+						Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
 						next = segmentIter.next()
 					}
-					yield { type: TOKEN.ERROR, value: accumulator }
+					yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'accumulator not empty (not done)' }
 				}
-				yield { type: TOKEN.EOF, value: EMPTY }
+				//
+				yield { type: TOKEN_EOF, value: EMPTY, start: null, end: null }
 				break
 			}
@@ -164,9 +305,9 @@ export class JSONTokenizer {
 			if(debug) {
 				console.log({
 					seg: next.value.segment,
-					state, stack:
-					stack.join(','),
-					accumulator
+					state,
+					stack: stack.join(','),
+					...accumulatorState
 				})
 			}
@@ -178,7 +319,7 @@ export class JSONTokenizer {
 							state = stack.pop()
 							break
 						case ',':
-							yield { type: TOKEN.ARRAY_ELEMENT_COMMA, value: next.value.segment }
+							yield { type: TOKEN_ARRAY_ELEMENT_COMMA, value: next.value.segment, start: next.value.index, end: next.value.index }
 							stack.push(STATE.ELEMENTS)
 							state = STATE.ELEMENT
 							next = segmentIter.next()
@@ -192,6 +333,7 @@ export class JSONTokenizer {
 				case STATE.ELEMENT:
 					switch(next.value.segment) {
 						case ' ': case '\r': case '\n': case '\t':
+							Accumulator.start(accumulatorState, next.value.index)
 							stack.push(STATE.ELEMENT)
 							state = STATE.WS
 							break
@@ -204,6 +346,7 @@ export class JSONTokenizer {
 				case STATE.ELEMENT_AFTER:
 					switch(next.value.segment) {
 						case ' ': case '\r': case '\n': case '\t':
+							Accumulator.start(accumulatorState, next.value.index)
 							stack.push(STATE.ELEMENT_AFTER)
 							state = STATE.WS
 							break
@@ -217,7 +360,7 @@ export class JSONTokenizer {
 							state = stack.pop()
 							break
 						default:
-							yield { type: TOKEN.ERROR, value: next.value.segment }
+							yield { type: TOKEN_ERROR, value: next.value.segment, start: next.value.index, end: next.value.index, cause: 'expect close, comma or whitespace' }
 							next = segmentIter.next()
 							break
 					}
@@ -225,16 +368,20 @@ export class JSONTokenizer {
 				case STATE.WS:
 					switch(next.value.segment) {
 						case ' ': case '\r': case '\n': case '\t':
-							accumulator += next.value.segment
+							if(next.value.segment === '\n') {
+								line += 1
+							}
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
 							next = segmentIter.next()
 							if(next.done) {
-								yield { type: TOKEN.WHITESPACE, value: accumulator }
-								accumulator = EMPTY
+								yield { type: TOKEN_WHITESPACE, ...Accumulator.end(accumulatorState) }
+								state = stack.pop()
 							}
 							break
 						default:
-							yield { type: TOKEN.WHITESPACE, value: accumulator }
-							accumulator = EMPTY
+							yield { type: TOKEN_WHITESPACE, ...Accumulator.end(accumulatorState) }
 							state = stack.pop()
 							break
 					}
@@ -242,157 +389,150 @@ export class JSONTokenizer {
 				case STATE.VALUE:
 					switch(next.value.segment) {
 						case '{':
-							yield { type: TOKEN.OBJECT_OPEN, value: next.value.segment }
+							yield { type: TOKEN_OBJECT_OPEN, value: next.value.segment, start: next.value.index, end: next.value.index }
 							state = STATE.OBJ_OPEN
 							next = segmentIter.next()
 							break
 						case '[':
-							yield { type: TOKEN.ARRAY_OPEN, value: next.value.segment }
+							yield { type: TOKEN_ARRAY_OPEN, value: next.value.segment, start: next.value.index, end: next.value.index }
 							state = STATE.ARY_OPEN
 							next = segmentIter.next()
 							break
 						case '"':
-							yield { type: TOKEN.STRING_OPEN, value: next.value.segment }
-							accumulator = EMPTY
+							yield { type: TOKEN_STRING_OPEN, value: next.value.segment, start: next.value.index, end: next.value.index }
 							state = STATE.STR
 							next = segmentIter.next()
+							if(!next.done) { Accumulator.start(accumulatorState, next.value.index) }
 							break
 						case 't':
-							accumulator = 't'
+							Accumulator.start(accumulatorState, next.value.index)
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
 							next = segmentIter.next()
 							if(next.done || next.value.segment !== 'r') {
-								yield { type: TOKEN.ERROR, value: accumulator }
-								accumulator = EMPTY
+								yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected true literal (r)' }
 								state = stack.pop()
 								break
 							}
-							accumulator += next.value?.segment
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
 							next = segmentIter.next()
 							if(next.done || next.value.segment !== 'u') {
-								yield { type: TOKEN.ERROR, value: accumulator }
-								accumulator = EMPTY
+								yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected true literal (u)' }
 								state = stack.pop()
 								break
 							}
-							accumulator += next.value?.segment
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
 							next = segmentIter.next()
 							if(next.done || next.value.segment !== 'e') {
-								yield { type: TOKEN.ERROR, value: accumulator }
-								accumulator = EMPTY
+								yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected true literal (e)' }
 								state = stack.pop()
 								break
 							}
-							accumulator += next.value?.segment
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
-							yield { type: TOKEN.TRUE, value: accumulator }
-							accumulator = EMPTY
+							yield { type: TOKEN_TRUE, ...Accumulator.end(accumulatorState) }
 							next = segmentIter.next()
 							state = stack.pop()
 							break
 						case 'f':
-							accumulator = 'f'
+							Accumulator.start(accumulatorState, next.value.index)
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
 							next = segmentIter.next()
 							if(next.done || next.value.segment !== 'a') {
-								yield { type: TOKEN.ERROR, value: accumulator }
-								accumulator = EMPTY
+								yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected false literal (a)' }
 								state = stack.pop()
 								break
 							}
-							accumulator += next.value?.segment
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
 							next = segmentIter.next()
 							if(next.done || next.value.segment !== 'l') {
-								yield { type: TOKEN.ERROR, value: accumulator }
-								accumulator = EMPTY
+								yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected false literal (l)' }
 								state = stack.pop()
 								break
 							}
-							accumulator += next.value?.segment
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
 							next = segmentIter.next()
 							if(next.done || next.value.segment !== 's') {
-								yield { type: TOKEN.ERROR, value: accumulator }
-								accumulator = EMPTY
+								yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected false literal (s)' }
 								state = stack.pop()
 								break
 							}
-							accumulator += next.value?.segment
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
 							next = segmentIter.next()
 							if(next.done || next.value.segment !== 'e') {
-								yield { type: TOKEN.ERROR, value: accumulator }
-								accumulator = EMPTY
+								yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected false literal (e)' }
 								state = stack.pop()
 								break
 							}
-							accumulator += next.value?.segment
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
-							yield { type: TOKEN.FALSE, value: accumulator }
-							accumulator = EMPTY
+							yield { type: TOKEN_FALSE, ...Accumulator.end(accumulatorState) }
 							next = segmentIter.next()
 							state = stack.pop()
 							break
 						case 'n':
-							accumulator = 'n'
+							Accumulator.start(accumulatorState, next.value.index)
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
 							next = segmentIter.next()
 							if(next.done || next.value.segment !== 'u') {
-								yield { type: TOKEN.ERROR, value: accumulator }
-								accumulator = EMPTY
+								yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected null literal (u)' }
 								state = stack.pop()
 								break
 							}
-							accumulator += next.value?.segment
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
 							next = segmentIter.next()
 							if(next.done || next.value.segment !== 'l') {
-								yield { type: TOKEN.ERROR, value: accumulator }
-								accumulator = EMPTY
+								yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected null literal (l)' }
 								state = stack.pop()
 								break
 							}
-							accumulator += next.value?.segment
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
 							next = segmentIter.next()
 							if(next.done || next.value.segment !== 'l') {
-								yield { type: TOKEN.ERROR, value: accumulator }
-								accumulator = EMPTY
+								yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected null literal (l)' }
 								state = stack.pop()
 								break
 							}
-							accumulator += next.value?.segment
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
-							yield { type: TOKEN.NULL, value: accumulator }
-							accumulator = EMPTY
+							yield { type: TOKEN_NULL, ...Accumulator.end(accumulatorState) }
 							next = segmentIter.next()
 							state = stack.pop()
 							break
 						case '-':
-							accumulator = next.value.segment
+							Accumulator.start(accumulatorState, next.value.index)
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
 							state = STATE.NUMBER
 							next = segmentIter.next()
 							break
 						case '0':
-							accumulator = next.value.segment
+							Accumulator.start(accumulatorState, next.value.index)
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
 							state = STATE.NUMBER_INT_AFTER
 							next = segmentIter.next()
 							break
 						case '1': case '2': case '3':
 						case '4': case '5': case '6':
 						case '7': case '8': case '9':
-							accumulator = EMPTY
+							Accumulator.start(accumulatorState, next.value.index)
 							state = STATE.NUMBER_INT
 							break
 						default:
 							//
-							yield { type: TOKEN.ERROR, value: next.value.segment }
+							yield { type: TOKEN_ERROR, value: next.value.segment, start: next.value.index, end: next.value.index, cause: 'unexpected value' }
 							next = segmentIter.next()
 							break
 					}
@@ -400,7 +540,7 @@ export class JSONTokenizer {
 				case STATE.NUMBER:
 					switch(next.value.segment) {
 						case '0':
-							accumulator += next.value.segment
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
 							state = STATE.NUMBER_INT_AFTER
 							next = segmentIter.next()
 							break
@@ -414,14 +554,13 @@ export class JSONTokenizer {
 						case '1': case '2': case '3':
 						case '4': case '5': case '6':
 						case '7': case '8': case '9':
-							accumulator += next.value.segment
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
 							state = STATE.NUMBER_INT
 							next = segmentIter.next()
 							break
 						default:
-							accumulator += next.value.segment
-							yield { type: TOKEN.ERROR, value: accumulator }
-							accumulator = EMPTY
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
+							yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid number format' }
 							next = segmentIter.next()
 							break
 					}
@@ -432,12 +571,12 @@ export class JSONTokenizer {
 						case '1': case '2': case '3':
 						case '4': case '5': case '6':
 						case '7': case '8': case '9':
-							accumulator += next.value.segment
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
 							next = segmentIter.next()
 							if(next.done) {
-								yield { type: TOKEN.NUMBER, value: accumulator }
-								accumulator = EMPTY
+								yield { type: TOKEN_NUMBER, ...Accumulator.end(accumulatorState) }
+								state = stack.pop()
 							}
 							break
 						default:
@@ -448,19 +587,18 @@ export class JSONTokenizer {
 				case STATE.NUMBER_INT_AFTER:
 					switch(next.value.segment) {
 						case '.':
-							accumulator += next.value.segment
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
 							next = segmentIter.next()
 							state = STATE.NUMBER_DECIMAL_FIRST
 							break
 						case 'e':
 						case 'E':
-							accumulator += next.value.segment
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
 							state = STATE.NUMBER_EXPONENT_SIGN
 							next = segmentIter.next()
 							break
 						default:
-							yield { type: TOKEN.NUMBER, value: accumulator }
-							accumulator = EMPTY
+							yield { type: TOKEN_NUMBER, ...Accumulator.end(accumulatorState) }
 							state = stack.pop()
 							break
 					}
@@ -471,20 +609,20 @@ export class JSONTokenizer {
 						case '1': case '2': case '3':
 						case '4': case '5': case '6':
 						case '7': case '8': case '9':
-							accumulator += next.value.segment
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
 							next = segmentIter.next()
 							state = STATE.NUMBER_DECIMAL
 							if(next.done) {
-								yield { type: TOKEN.NUMBER, value: accumulator }
-								accumulator = EMPTY
+								yield { type: TOKEN_NUMBER, ...Accumulator.end(accumulatorState) }
+								state = stack.pop()
 							}
 							break
 						default:
-							accumulator += next.value.segment
-							yield { type: TOKEN.ERROR, value: accumulator}
-							accumulator = EMPTY
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
+							yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid decimal format' }
 							next = segmentIter.next()
+							state = stack.pop()
 							break
 					}
 					break
@@ -494,23 +632,22 @@ export class JSONTokenizer {
 						case '1': case '2': case '3':
 						case '4': case '5': case '6':
 						case '7': case '8': case '9':
-							accumulator += next.value.segment
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
 							next = segmentIter.next()
 							if(next.done) {
-								yield { type: TOKEN.NUMBER, value: accumulator }
-								accumulator = EMPTY
+								yield { type: TOKEN_NUMBER, ...Accumulator.end(accumulatorState) }
+								state = stack.pop()
 							}
 							break
 						case 'e':
 						case 'E':
-							accumulator += next.value.segment
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
 							state = STATE.NUMBER_EXPONENT_SIGN
 							next = segmentIter.next()
 							break
 						default:
-							yield { type: TOKEN.NUMBER, value: accumulator }
-							accumulator = EMPTY
+							yield { type: TOKEN_NUMBER, ...Accumulator.end(accumulatorState) }
 							state = stack.pop()
 							break
 					}
@@ -518,12 +655,12 @@ export class JSONTokenizer {
 				case STATE.NUMBER_EXPONENT_SIGN:
 					switch(next.value.segment) {
 						case '+':
-							accumulator += next.value.segment
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
 							next = segmentIter.next()
 							state = STATE.NUMBER_EXPONENT_FIRST
 							break
 						case '-':
-							accumulator += next.value.segment
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
 							next = segmentIter.next()
 							state = STATE.NUMBER_EXPONENT_FIRST
 							break
@@ -538,15 +675,15 @@ export class JSONTokenizer {
 						case '1': case '2': case '3':
 						case '4': case '5': case '6':
 						case '7': case '8': case '9':
-							accumulator += next.value.segment
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
 							next = segmentIter.next()
 							state = STATE.NUMBER_EXPONENT
 							break
 						default:
-							accumulator += next.value.segment
-							yield { type: TOKEN.ERROR, value: accumulator }
-							accumulator = EMPTY
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
+							yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid exponent format' }
 							next = segmentIter.next()
+							state = stack.pop()
 							break
 					}
 					break
@@ -556,12 +693,11 @@ export class JSONTokenizer {
 						case '1': case '2': case '3':
 						case '4': case '5': case '6':
 						case '7': case '8': case '9':
-							accumulator += next.value.segment
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
 							next = segmentIter.next()
 							break
 						default:
-							yield { type: TOKEN.NUMBER, value: accumulator }
-							accumulator = EMPTY
+							yield { type: TOKEN_NUMBER, ...Accumulator.end(accumulatorState) }
 							state = stack.pop()
 							break
 					}
@@ -569,11 +705,12 @@ export class JSONTokenizer {
 				case STATE.OBJ_OPEN:
 					switch(next.value.segment) {
 						case ' ': case '\r': case '\n': case '\t':
+							Accumulator.start(accumulatorState, next.value.index)
 							stack.push(STATE.OBJ_OPEN)
 							state = STATE.WS
 							break
 						case '}':
-							yield { type: TOKEN.OBJECT_CLOSE_IMMEDIATE, value: next.value.segment }
+							yield { type: TOKEN_OBJECT_CLOSE_IMMEDIATE, value: next.value.segment, start: next.value.index, end: next.value.index }
 							next = segmentIter.next()
 							state = stack.pop()
 							break
@@ -586,7 +723,7 @@ export class JSONTokenizer {
 				case STATE.OBJ_CLOSE:
 					switch(next.value.segment) {
 						case '}':
-							yield { type: TOKEN.OBJECT_CLOSE, value: next.value.segment }
+							yield { type: TOKEN_OBJECT_CLOSE, value: next.value.segment, start: next.value.index, end: next.value.index }
 							next = segmentIter.next()
 							state = stack.pop()
 							break
@@ -597,36 +734,49 @@ export class JSONTokenizer {
 					}
 					break
 				case STATE.MEMBERS:
+					switch(next.value.segment){
+						case '}':
+							state = stack.pop()
+							break
+						default:
+							stack.push(STATE.MEMBERS_CONTINUE)
+							state = STATE.MEMBER
+							break
+					}
+					break
+				case STATE.MEMBERS_CONTINUE:
 					switch(next.value.segment){
 						case '}':
 							state = stack.pop()
 							break
 						case ',':
-							yield { type: TOKEN.OBJECT_MEMBER_COMMA, value: next.value.segment }
-							stack.push(STATE.MEMBERS)
+							yield { type: TOKEN_OBJECT_MEMBER_COMMA, value: next.value.segment, start: next.value.index, end: next.value.index }
+							stack.push(STATE.MEMBERS_CONTINUE)
 							state = STATE.MEMBER
 							next = segmentIter.next()
 							break
 						default:
-							stack.push(STATE.MEMBERS)
-							state = STATE.MEMBER
+							yield { type: TOKEN_ERROR, value: next.value.segment, start: next.value.index, end: next.value.index, cause: 'expecting comma or close object' }
+							state = stack.pop()
 							break
 					}
 					break
 				case STATE.MEMBER:
 					switch(next.value.segment) {
 						case ' ': case '\r': case '\n': case '\t':
+							Accumulator.start(accumulatorState, next.value.index)
 							stack.push(STATE.MEMBER)
 							state = STATE.WS
 							break
 						case '"':
-							yield { type: TOKEN.OBJECT_KEY_OPEN, value: next.value.segment }
+							yield { type: TOKEN_OBJECT_KEY_OPEN, value: next.value.segment, start: next.value.index, end: next.value.index }
 							stack.push(STATE.MEMBER_KEY_AFTER)
 							state = STATE.KEY
 							next = segmentIter.next()
+							if(!next.done) { Accumulator.start(accumulatorState, next.value.index) }
 							break
 						default:
-							yield { type: TOKEN.ERROR, value: next.value.segment }
+							yield { type: TOKEN_ERROR, value: next.value.segment, start: next.value.index, end: next.value.index, cause: 'expected member key or whitespace' }
 							next = segmentIter.next()
 							break
 					}
@@ -634,17 +784,18 @@ export class JSONTokenizer {
 				case STATE.MEMBER_KEY_AFTER:
 					switch(next.value.segment) {
 						case ' ': case '\r': case '\n': case '\t':
+							Accumulator.start(accumulatorState, next.value.index)
 							stack.push(STATE.MEMBER_KEY_AFTER)
 							state = STATE.WS
 							break
 						case ':':
-							yield { type: TOKEN.OBJECT_COLON, value: next.value.segment }
+							yield { type: TOKEN_OBJECT_COLON, value: next.value.segment, start: next.value.index, end: next.value.index }
 							next = segmentIter.next()
 							state = STATE.ELEMENT
 							break
 						default:
-							yield { type: TOKEN.ERROR, value: next.value.segment }
+							yield { type: TOKEN_ERROR, value: next.value.segment, start: next.value.index, end: next.value.index, cause: 'expected whitespace or colon' }
 							next = segmentIter.next()
 							break
 					}
@@ -652,16 +803,17 @@ export class JSONTokenizer {
 				case STATE.ARY_OPEN:
 					switch(next.value.segment) {
 						case ' ': case '\r': case '\n': case '\t':
+							Accumulator.start(accumulatorState, next.value.index)
 							stack.push(STATE.ARY_OPEN)
 							state = STATE.WS
 							break
 						case ']':
-							yield { type: TOKEN.ARRAY_CLOSE_IMMEDIATE, value: next.value.segment }
+							yield { type: TOKEN_ARRAY_CLOSE_IMMEDIATE, value: next.value.segment, start: next.value.index, end: next.value.index }
 							next = segmentIter.next()
 							state = stack.pop()
 							break
 						case ',':
-							yield { type: TOKEN.ERROR, value: next.value.segment}
+							yield { type: TOKEN_ERROR, value: next.value.segment, start: next.value.index, end: next.value.index, cause: 'unexpected array comma' }
 							next = segmentIter.next()
 							break
 						default:
@@ -673,7 +825,7 @@ export class JSONTokenizer {
 				case STATE.ARY_CLOSE:
 					switch(next.value.segment) {
 						case ']':
-							yield { type: TOKEN.ARRAY_CLOSE, value: next.value.segment }
+							yield { type: TOKEN_ARRAY_CLOSE, value: next.value.segment, start: next.value.index, end: next.value.index }
 							next = segmentIter.next()
 							state = stack.pop()
 							break
@@ -688,34 +840,31 @@ export class JSONTokenizer {
 					switch(next.value.segment) {
 						case '"':
 							if(state === STATE.KEY) {
-								yield { type: TOKEN.OBJECT_KEY, value: accumulator }
-								yield { type: TOKEN.OBJECT_KEY_CLOSE, value: next.value.segment }
+								yield { type: TOKEN_OBJECT_KEY, ...Accumulator.end(accumulatorState) }
+								yield { type: TOKEN_OBJECT_KEY_CLOSE, value: next.value.segment, start: next.value.index, end: next.value.index }
 							}
 							else {
-								yield { type: TOKEN.STRING, value: accumulator }
-								yield { type: TOKEN.STRING_CLOSE, value: next.value.segment }
+								yield { type: TOKEN_STRING, ...Accumulator.end(accumulatorState) }
+								yield { type: TOKEN_STRING_CLOSE, value: next.value.segment, start: next.value.index, end: next.value.index }
 							}
-							accumulator = EMPTY
 							next = segmentIter.next()
 							state = stack.pop()
 							break
 						case '\\':
-							accumulator += next.value.segment
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
 							next = segmentIter.next()
 							if(next.done) {
-								yield { type: TOKEN.ERROR, value: accumulator }
-								accumulator = EMPTY
+								yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence' }
 								state = stack.pop()
 								break
 							}
-							accumulator += next.value.segment
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
 							if(!JSONTokenizer.isValueEscapeChar(next.value?.segment)) {
-								yield { type: TOKEN.ERROR, value: accumulator }
-								accumulator = EMPTY
+								yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape character' }
 							}
 							if(next.value.segment === 'u') {
@@ -726,11 +875,10 @@ export class JSONTokenizer {
 							next = segmentIter.next()
 							break
 						default:
-							accumulator += next.value.segment
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
 							if(!JSONTokenizer.isValidChar(next.value.segment)) {
-								yield { type: TOKEN.ERROR, value: accumulator }
-								accumulator = EMPTY
+								yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid character in string literal' }
 							}
 							next = segmentIter.next()
@@ -746,52 +894,68 @@ export class JSONTokenizer {
 						case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 						case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
-							accumulator += next.value.segment
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
 							// second
 							next = segmentIter.next()
 							if(next.done) {
-								yield { type: TOKEN.ERROR, value: accumulator }
-								accumulator = EMPTY
+								yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence (2nd done)' }
 								state = stack.pop()
 								break
 							}
-							accumulator += next.value?.segment
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
+							if(!JSONTokenizer.isValidHEX(next.value.segment)) {
+								yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence (2nd)' }
+								state = stack.pop()
+								next = segmentIter.next()
+								break
+							}
 							// third
 							next = segmentIter.next()
 							if(next.done) {
-								yield { type: TOKEN.ERROR, value: accumulator }
-								accumulator = EMPTY
+								yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence (3rd done)' }
+								state = stack.pop()
+								break
+							}
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
+							if(!JSONTokenizer.isValidHEX(next.value.segment)) {
+								yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence (3rd)' }
 								state = stack.pop()
+								next = segmentIter.next()
 								break
 							}
-							accumulator += next.value?.segment
 							// fourth
 							next = segmentIter.next()
 							if(next.done) {
-								yield { type: TOKEN.ERROR, value: accumulator }
-								accumulator = EMPTY
+								yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence (4th done)' }
 								state = stack.pop()
 								break
 							}
-							accumulator += next.value?.segment
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
+							if(!JSONTokenizer.isValidHEX(next.value.segment)) {
+								yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence (4th)' }
+								state = stack.pop()
+								next = segmentIter.next()
+								break
+							}
 							state = stack.pop()
 							next = segmentIter.next()
 							break
 						default:
-							accumulator += next.value.segment
-							yield { type: TOKEN.ERROR, value: accumulator }
-							accumulator = EMPTY
+							Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
+							yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence' }
+							state = stack.pop()
 							next = segmentIter.next()
 							break
 					}
 					break
 				default:
-					// todo
-					throw new Error(`unknown state ${state}`)
+					yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: `unknown state ${state}` }
+					break
 			}
 		}
 	}