@johntalton/json-tokenizer 1.0.1 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +68 -58
  2. package/package.json +11 -1
  3. package/src/index.js +360 -178
package/README.md CHANGED
@@ -4,10 +4,9 @@ Generator function that tokenizes string based on JSON format.
4
4
 
5
5
  - Uses Generator based API
6
6
  - Produces tokens for all input text (including error tokens)
7
- - Uses `Intl.Segmenter` with `'grapheme'` granularity
8
- - Segmenter locale can be custom set (default: `'en-US'`)
9
7
  - Allows for `AbortSignal` to control termination
10
8
  - Best effort to match `JSON.parse` restriction
9
+ - Start and End position for errors
11
10
 
12
11
  ## Example
13
12
 
@@ -42,60 +41,71 @@ for(const token of stream) {
42
41
  console.log(token)
43
42
  }
44
43
  /*
45
- { type: 'object-open', value: '{' }
46
- { type: 'whitespace', value: '\n\t' }
47
- { type: 'open-key-quote', value: '"' }
48
- { type: 'key', value: 'team' }
49
- { type: 'close-key-quote', value: '"' }
50
- { type: 'colon', value: ':' }
51
- { type: 'whitespace', value: ' ' }
52
- { type: 'open-string-quote', value: '"' }
53
- { type: 'string', value: 'Mystery Inc' }
54
- { type: 'close-string-quote', value: '"' }
55
- { type: 'object-member-comma', value: ',' }
56
- { type: 'whitespace', value: '\n\t' }
57
- { type: 'open-key-quote', value: '"' }
58
- { type: 'key', value: 'members' }
59
- { type: 'close-key-quote', value: '"' }
60
- { type: 'colon', value: ':' }
61
- { type: 'whitespace', value: ' ' }
62
- { type: 'array-open', value: '[' }
63
- { type: 'whitespace', value: '\n\t\t' }
64
- { type: 'open-string-quote', value: '"' }
65
- { type: 'string', value: 'Fred' }
66
- { type: 'close-string-quote', value: '"' }
67
- { type: 'array-element-comma', value: ',' }
68
- { type: 'whitespace', value: '\n\t\t' }
69
- { type: 'open-string-quote', value: '"' }
70
- { type: 'string', value: 'Daphne' }
71
- { type: 'close-string-quote', value: '"' }
72
- { type: 'array-element-comma', value: ',' }
73
- { type: 'whitespace', value: '\n\t\t' }
74
- { type: 'open-string-quote', value: '"' }
75
- { type: 'string', value: 'Velma' }
76
- { type: 'close-string-quote', value: '"' }
77
- { type: 'array-element-comma', value: ',' }
78
- { type: 'whitespace', value: '\n\t\t' }
79
- { type: 'open-string-quote', value: '"' }
80
- { type: 'string', value: 'Shaggy' }
81
- { type: 'close-string-quote', value: '"' }
82
- { type: 'array-element-comma', value: ',' }
83
- { type: 'whitespace', value: '\n\t\t' }
84
- { type: 'open-string-quote', value: '"' }
85
- { type: 'string', value: 'Scooby' }
86
- { type: 'close-string-quote', value: '"' }
87
- { type: 'whitespace', value: '\n\t' }
88
- { type: 'array-close', value: ']' }
89
- { type: 'object-member-comma', value: ',' }
90
- { type: 'whitespace', value: '\n\t' }
91
- { type: 'open-key-quote', value: '"' }
92
- { type: 'key', value: 'aired' }
93
- { type: 'close-key-quote', value: '"' }
94
- { type: 'colon', value: ':' }
95
- { type: 'whitespace', value: ' ' }
96
- { type: 'number', value: '1969' }
97
- { type: 'whitespace', value: '\n' }
98
- { type: 'object-close', value: '}' }
99
- { type: 'eof', value: '' }
44
+ { type: 'object-open', value: '{', start: 0, end: 0 }
45
+ { type: 'whitespace', value: '\n\t', start: 1, end: 2 }
46
+ { type: 'open-key-quote', value: '"', start: 3, end: 3 }
47
+ { type: 'key', value: 'team', start: 4, end: 7 }
48
+ { type: 'close-key-quote', value: '"', start: 8, end: 8 }
49
+ { type: 'colon', value: ':', start: 9, end: 9 }
50
+ { type: 'whitespace', value: ' ', start: 10, end: 10 }
51
+ { type: 'open-string-quote', value: '"', start: 11, end: 11 }
52
+ { type: 'string', value: 'Mystery Inc', start: 12, end: 22 }
53
+ { type: 'close-string-quote', value: '"', start: 23, end: 23 }
54
+ { type: 'object-member-comma', value: ',', start: 24, end: 24 }
55
+ { type: 'whitespace', value: '\n\t', start: 25, end: 26 }
56
+ { type: 'open-key-quote', value: '"', start: 27, end: 27 }
57
+ { type: 'key', value: 'members', start: 28, end: 34 }
58
+ { type: 'close-key-quote', value: '"', start: 35, end: 35 }
59
+ { type: 'colon', value: ':', start: 36, end: 36 }
60
+ { type: 'whitespace', value: ' ', start: 37, end: 37 }
61
+ { type: 'array-open', value: '[', start: 38, end: 38 }
62
+ { type: 'whitespace', value: '\n\t\t', start: 39, end: 41 }
63
+ { type: 'open-string-quote', value: '"', start: 42, end: 42 }
64
+ { type: 'string', value: 'Fred', start: 43, end: 46 }
65
+ { type: 'close-string-quote', value: '"', start: 47, end: 47 }
66
+ { type: 'array-element-comma', value: ',', start: 48, end: 48 }
67
+ { type: 'whitespace', value: '\n\t\t', start: 49, end: 51 }
68
+ { type: 'open-string-quote', value: '"', start: 52, end: 52 }
69
+ { type: 'string', value: 'Daphne', start: 53, end: 58 }
70
+ { type: 'close-string-quote', value: '"', start: 59, end: 59 }
71
+ { type: 'array-element-comma', value: ',', start: 60, end: 60 }
72
+ { type: 'whitespace', value: '\n\t\t', start: 61, end: 63 }
73
+ { type: 'open-string-quote', value: '"', start: 64, end: 64 }
74
+ { type: 'string', value: 'Velma', start: 65, end: 69 }
75
+ { type: 'close-string-quote', value: '"', start: 70, end: 70 }
76
+ { type: 'array-element-comma', value: ',', start: 71, end: 71 }
77
+ { type: 'whitespace', value: '\n\t\t', start: 72, end: 74 }
78
+ { type: 'open-string-quote', value: '"', start: 75, end: 75 }
79
+ { type: 'string', value: 'Shaggy', start: 76, end: 81 }
80
+ { type: 'close-string-quote', value: '"', start: 82, end: 82 }
81
+ { type: 'array-element-comma', value: ',', start: 83, end: 83 }
82
+ { type: 'whitespace', value: '\n\t\t', start: 84, end: 86 }
83
+ { type: 'open-string-quote', value: '"', start: 87, end: 87 }
84
+ { type: 'string', value: 'Scooby', start: 88, end: 93 }
85
+ { type: 'close-string-quote', value: '"', start: 94, end: 94 }
86
+ { type: 'whitespace', value: '\n\t', start: 95, end: 96 }
87
+ { type: 'array-close', value: ']', start: 97, end: 97 }
88
+ { type: 'object-member-comma', value: ',', start: 98, end: 98 }
89
+ { type: 'whitespace', value: '\n\t', start: 99, end: 100 }
90
+ { type: 'open-key-quote', value: '"', start: 101, end: 101 }
91
+ { type: 'key', value: 'aired', start: 102, end: 106 }
92
+ { type: 'close-key-quote', value: '"', start: 107, end: 107 }
93
+ { type: 'colon', value: ':', start: 108, end: 108 }
94
+ { type: 'whitespace', value: ' ', start: 109, end: 109 }
95
+ { type: 'number', value: '1969', start: 110, end: 113 }
96
+ { type: 'whitespace', value: '\n', start: 114, end: 114 }
97
+ { type: 'object-close', value: '}', start: 115, end: 115 }
98
+ { type: 'eof', value: '', start: null, end: 116 }
100
99
  */
101
- ```
100
+ ```
101
+
102
+ ## Tests
103
+
104
+ A simple set of test for coverage exists within the repo.
105
+
106
+ For a more complete and varied set of validation of in-the-wild json, the following have been tested against:
107
+
108
+ - https://github.com/nst/JSONTestSuite
109
+ - https://github.com/nlohmann/json_test_data
110
+ - https://github.com/open-source-parsers/jsoncpp
111
+ - any other that can be found :P
package/package.json CHANGED
@@ -1,15 +1,25 @@
1
1
  {
2
2
  "name": "@johntalton/json-tokenizer",
3
3
  "type": "module",
4
- "version": "1.0.1",
4
+ "version": "1.1.1",
5
5
  "license": "MIT",
6
+ "engines": {
7
+ "node": ">=22.0.0"
8
+ },
6
9
  "exports": {
7
10
  ".": "./src/index.js"
8
11
  },
9
12
  "files": [
10
13
  "src/*.js"
11
14
  ],
15
+ "scripts": {
16
+ "test": "node --test test/**",
17
+ "coverage": "c8 -r lcov -r text node --test test/**"
18
+ },
12
19
  "repository": {
13
20
  "url": "git+https://github.com/johntalton/json-tokenizer.git"
21
+ },
22
+ "dependencies": {
23
+ "c8": "^10.1.3"
14
24
  }
15
25
  }
package/src/index.js CHANGED
@@ -1,44 +1,87 @@
1
1
  /**
2
2
  * @typedef {Object} TokenizerOptions
3
- * @property {string|undefined} [locale = 'en-US']
4
3
  * @property {AbortSignal|undefined} [signal]
5
4
  * @property {boolean|undefined} [debug]
6
5
  */
7
6
 
8
- export const TOKEN = {
9
- ERROR: 'error',
10
- EOF: 'eof',
11
- WHITESPACE: 'whitespace',
7
+ /**
8
+ * @typedef {Object} TokenBase
9
+ * @property {string} type
10
+ * @property {string} value
11
+ * @property {number|null} start
12
+ * @property {number|null} end
13
+ */
14
+
15
+ /**
16
+ * @typedef {Object} ErrorTokenBase
17
+ * @property {'error'} type
18
+ * @property {string} cause
19
+ */
20
+ /** @typedef {TokenBase & ErrorTokenBase} ErrorToken */
21
+ /** @typedef {TokenBase & { type: 'eof' }} EOFToken */
22
+ /** @typedef {TokenBase & { type: 'whitespace' }} WhitespaceToken */
23
+ /** @typedef {TokenBase & { type: 'array-open' }} ArrayOpenToken */
24
+ /** @typedef {TokenBase & { type: 'array-close' }} ArrayCloseToken */
25
+ /** @typedef {TokenBase & { type: 'array-close-immediate' }} ArrayCloseImmediateToken */
26
+ /** @typedef {TokenBase & { type: 'array-element-comma' }} ArrayCommaToken */
27
+ /** @typedef {TokenBase & { type: 'object-open' }} ObjectOpenToken */
28
+ /** @typedef {TokenBase & { type: 'object-close' }} ObjectCloseToken */
29
+ /** @typedef {TokenBase & { type: 'object-close-immediate' }} ObjectCloseImmediateToken */
30
+ /** @typedef {TokenBase & { type: 'object-member-comma' }} ObjectCommaToken */
31
+ /** @typedef {TokenBase & { type: 'open-key-quote' }} OpenKeyQuoteToken */
32
+ /** @typedef {TokenBase & { type: 'key' }} KeyToken */
33
+ /** @typedef {TokenBase & { type: 'close-key-quote' }} CloseKeyQuoteToken */
34
+ /** @typedef {TokenBase & { type: 'colon' }} ColonToken */
35
+ /** @typedef {TokenBase & { type: 'open-string-quote' }} OpenStringQuoteToken */
36
+ /** @typedef {TokenBase & { type: 'string' }} StringToken */
37
+ /** @typedef {TokenBase & { type: 'close-string-quote' }} CloseStringQuoteToken */
38
+ /** @typedef {TokenBase & { type: 'true' }} TrueToken */
39
+ /** @typedef {TokenBase & { type: 'false' }} FalseToken */
40
+ /** @typedef {TokenBase & { type: 'null' }} NULLToken */
41
+ /** @typedef {TokenBase & { type: 'number' }} NumberToken */
42
+
43
+ /** @typedef {EOFToken|WhitespaceToken|ErrorToken|ArrayOpenToken|ArrayCloseToken|ArrayCloseImmediateToken|ArrayCommaToken|ObjectOpenToken|ObjectCloseToken|ObjectCloseImmediateToken|ObjectCommaToken|OpenKeyQuoteToken|KeyToken|CloseKeyQuoteToken|ColonToken|OpenStringQuoteToken|StringToken|CloseStringQuoteToken|TrueToken|FalseToken|NULLToken|NumberToken} Token */
44
+
45
+ /**
46
+ * @typedef {Object} AccumulationState
47
+ * @property {string} value
48
+ * @property {number|null} start
49
+ * @property {number|null} end
50
+ */
51
+
52
+ export const TOKEN_ERROR = 'error'
53
+ export const TOKEN_EOF = 'eof'
54
+ export const TOKEN_WHITESPACE = 'whitespace'
12
55
 
13
56
  // Array / Elements
14
- ARRAY_OPEN: 'array-open',
15
- ARRAY_CLOSE: 'array-close',
16
- ARRAY_CLOSE_IMMEDIATE: 'array-close-immediate',
17
- ARRAY_ELEMENT_COMMA: 'array-element-comma',
57
+ export const TOKEN_ARRAY_OPEN = 'array-open'
58
+ export const TOKEN_ARRAY_CLOSE = 'array-close'
59
+ export const TOKEN_ARRAY_CLOSE_IMMEDIATE = 'array-close-immediate'
60
+ export const TOKEN_ARRAY_ELEMENT_COMMA = 'array-element-comma'
18
61
 
19
62
  // Object
20
- OBJECT_OPEN: 'object-open',
21
- OBJECT_CLOSE: 'object-close',
22
- OBJECT_CLOSE_IMMEDIATE: 'object-close-immediate',
23
- OBJECT_MEMBER_COMMA: 'object-member-comma',
24
- OBJECT_KEY_OPEN: 'open-key-quote',
25
- OBJECT_KEY: 'key',
26
- OBJECT_KEY_CLOSE: 'close-key-quote',
27
- OBJECT_COLON: 'colon',
63
+ export const TOKEN_OBJECT_OPEN = 'object-open'
64
+ export const TOKEN_OBJECT_CLOSE = 'object-close'
65
+ export const TOKEN_OBJECT_CLOSE_IMMEDIATE = 'object-close-immediate'
66
+ export const TOKEN_OBJECT_MEMBER_COMMA = 'object-member-comma'
67
+ export const TOKEN_OBJECT_KEY_OPEN = 'open-key-quote'
68
+ export const TOKEN_OBJECT_KEY = 'key'
69
+ export const TOKEN_OBJECT_KEY_CLOSE = 'close-key-quote'
70
+ export const TOKEN_OBJECT_COLON = 'colon'
28
71
 
29
72
  // String
30
- STRING_OPEN: 'open-string-quote',
31
- STRING: 'string',
32
- STRING_CLOSE: 'close-string-quote',
73
+ export const TOKEN_STRING_OPEN = 'open-string-quote'
74
+ export const TOKEN_STRING = 'string'
75
+ export const TOKEN_STRING_CLOSE = 'close-string-quote'
33
76
 
34
77
  // Primitives
35
- TRUE: 'true',
36
- FALSE: 'false',
37
- NULL: 'null',
78
+ export const TOKEN_TRUE = 'true'
79
+ export const TOKEN_FALSE = 'false'
80
+ export const TOKEN_NULL = 'null'
38
81
 
39
82
  // Number
40
- NUMBER: 'number'
41
- }
83
+ export const TOKEN_NUMBER = 'number'
84
+
42
85
 
43
86
  export const EMPTY = ''
44
87
 
@@ -53,6 +96,7 @@ export const STATE = {
53
96
  ARY_OPEN: 'ao',
54
97
  ARY_CLOSE: 'ac',
55
98
  MEMBERS: 'ms',
99
+ MEMBERS_CONTINUE: 'msc',
56
100
  MEMBER: 'm',
57
101
  KEY: 'key',
58
102
  MEMBER_KEY_AFTER: 'mka',
@@ -73,11 +117,64 @@ export const STATE = {
73
117
  U_HEX4: 'hex'
74
118
  }
75
119
 
120
+ export const EXIT_STATES = [ STATE.ELEMENT_AFTER ]
121
+
76
122
  export const ESCAPE_CHARS = [
77
123
  '"', '\\', '/', 'b', 'f', 'n', 'r', 't', 'u'
78
124
  ]
79
125
 
80
- export const DEFAULT_LOCALE = 'en-US'
126
+ export const HEX_CHARS = [
127
+ '1', '2', '3', '4', '5', '6', '7', '8', '9', '0',
128
+ 'a', 'b', 'c', 'd', 'e', 'f',
129
+ 'A', 'B', 'C', 'D', 'E', 'F',
130
+ ]
131
+
132
+ export class Accumulator {
133
+ /**
134
+ * @param {AccumulationState} accumulationState
135
+ */
136
+ static #reset(accumulationState) {
137
+ accumulationState.value = EMPTY
138
+ accumulationState.start = null
139
+ accumulationState.end = null
140
+ }
141
+
142
+ /**
143
+ * @param {AccumulationState} accumulationState
144
+ * @param {number} start
145
+ */
146
+ static start(accumulationState, start) {
147
+ accumulationState.value = EMPTY
148
+ accumulationState.start = start
149
+ accumulationState.end = start
150
+ }
151
+
152
+ /**
153
+ * @param {AccumulationState} accumulationState
154
+ * @param {string} value
155
+ * @param {number} end
156
+ */
157
+ static accumulate(accumulationState, value, end) {
158
+ accumulationState.value += value
159
+ accumulationState.end = end
160
+ }
161
+
162
+ /**
163
+ * @param {AccumulationState} accumulationState
164
+ */
165
+ static end(accumulationState) {
166
+ const result = { ...accumulationState }
167
+ Accumulator.#reset(accumulationState)
168
+ return result
169
+ }
170
+
171
+ /**
172
+ * @param {AccumulationState} accumulationState
173
+ */
174
+ static empty(accumulationState) {
175
+ return accumulationState.value === EMPTY
176
+ }
177
+ }
81
178
 
82
179
  export class JSONTokenizer {
83
180
  /**
@@ -100,63 +197,107 @@ export class JSONTokenizer {
100
197
  return true
101
198
  }
102
199
 
200
+ /**
201
+ * @param {string} str
202
+ */
203
+ static isValidHEX(str) {
204
+ return HEX_CHARS.includes(str)
205
+ }
206
+
103
207
  /**
104
208
  * @param {string} str
105
209
  * @param {TokenizerOptions} [options]
210
+ * @return {Generator<Token, undefined, undefined>}
106
211
  */
107
212
  static *tokenize(str, options) {
108
213
  const debug = (options?.debug ?? false) === true
109
214
  const signal = options?.signal
110
- const locale = options?.locale ?? DEFAULT_LOCALE
111
215
 
112
- const seg = new Intl.Segmenter(locale, { granularity: 'grapheme' })
113
- const segments = seg.segment(str)
114
- using segmentIter = segments[Symbol.iterator]()
216
+ const segmentIter = Iterator.from(str)
217
+ .map((item, index) => ({
218
+ segment: item,
219
+ done: false,
220
+ index
221
+ }))
115
222
 
116
223
  /** @type {Array<string>} */
117
- const stack = []
224
+ const stack = [ ]
118
225
 
119
226
  /** @type {string|undefined} */
120
227
  let state = STATE.ELEMENT
121
228
 
122
229
  let next = segmentIter.next()
123
230
 
124
- let accumulator = EMPTY
231
+ /** @type {AccumulationState} */
232
+ const accumulatorState = {
233
+ value: EMPTY,
234
+ start: null,
235
+ end: null
236
+ }
237
+
238
+ let line = 1
125
239
 
126
240
  //
127
241
  if(next.done) {
128
- yield { type: TOKEN.ERROR, value: EMPTY }
242
+ yield { type: TOKEN_ERROR, value: EMPTY, start: 0, end: 0, cause: 'empty' }
129
243
  return
130
244
  }
131
245
 
132
246
  //
133
247
  while(true) {
134
248
  if(signal?.aborted) {
135
- //
249
+ yield { type: TOKEN_EOF, value: EMPTY, start: null, end: null }
136
250
  break
137
251
  }
138
252
 
139
253
  //
140
254
  if(next.done) {
141
- if(accumulator !== EMPTY) { yield { type: TOKEN.ERROR, value: accumulator } }
142
- if(stack.length !== 0) { yield { type: TOKEN.ERROR, value: EMPTY }}
143
- yield { type: TOKEN.EOF, value: EMPTY }
255
+ if(state === undefined) {
256
+ yield { type: TOKEN_ERROR, value: EMPTY, start: null, end: null, cause: 'undefined state' }
257
+ }
258
+
259
+ //
260
+ if(state !== undefined && !EXIT_STATES.includes(state)) {
261
+ yield { type: TOKEN_ERROR, value: EMPTY, start: null, end: str.length, cause: `invalid exit state (${state})` }
262
+ }
263
+
264
+ //
265
+ if(!Accumulator.empty(accumulatorState)) {
266
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'accumulator not empty (done)' }
267
+ }
268
+
269
+ //
270
+ if(stack.length !== 0) {
271
+ yield { type: TOKEN_ERROR, value: EMPTY, start: null, end: str.length, cause: 'stack not empty' }
272
+ }
273
+
274
+ //
275
+ yield { type: TOKEN_EOF, value: EMPTY, start: null, end: str.length }
144
276
  break
145
277
  }
146
278
 
147
279
  //
148
280
  if(state === undefined) {
149
- if(accumulator !== EMPTY) { yield { type: TOKEN.ERROR, value: accumulator } }
281
+ //
282
+ if(!Accumulator.empty(accumulatorState)) {
283
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'accumulator not empty (stack)' }
284
+ }
285
+
286
+ //
150
287
  if(!next.done) {
151
- accumulator = EMPTY
288
+ Accumulator.start(accumulatorState, next.value.index)
289
+
152
290
  while(!next.done) {
153
- accumulator += next.value.segment
291
+ if(signal?.aborted) { break }
292
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
154
293
  next = segmentIter.next()
155
294
  }
156
- yield { type: TOKEN.ERROR, value: accumulator }
295
+
296
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'accumulator not empty (not done)' }
157
297
  }
158
298
 
159
- yield { type: TOKEN.EOF, value: EMPTY }
299
+ //
300
+ yield { type: TOKEN_EOF, value: EMPTY, start: null, end: null }
160
301
  break
161
302
  }
162
303
 
@@ -164,9 +305,9 @@ export class JSONTokenizer {
164
305
  if(debug) {
165
306
  console.log({
166
307
  seg: next.value.segment,
167
- state, stack:
168
- stack.join(','),
169
- accumulator
308
+ state,
309
+ stack: stack.join(','),
310
+ ...accumulatorState
170
311
  })
171
312
  }
172
313
 
@@ -178,7 +319,7 @@ export class JSONTokenizer {
178
319
  state = stack.pop()
179
320
  break
180
321
  case ',':
181
- yield { type: TOKEN.ARRAY_ELEMENT_COMMA, value: next.value.segment }
322
+ yield { type: TOKEN_ARRAY_ELEMENT_COMMA, value: next.value.segment, start: next.value.index, end: next.value.index }
182
323
  stack.push(STATE.ELEMENTS)
183
324
  state = STATE.ELEMENT
184
325
  next = segmentIter.next()
@@ -192,6 +333,7 @@ export class JSONTokenizer {
192
333
  case STATE.ELEMENT:
193
334
  switch(next.value.segment) {
194
335
  case ' ': case '\r': case '\n': case '\t':
336
+ Accumulator.start(accumulatorState, next.value.index)
195
337
  stack.push(STATE.ELEMENT)
196
338
  state = STATE.WS
197
339
  break
@@ -204,6 +346,7 @@ export class JSONTokenizer {
204
346
  case STATE.ELEMENT_AFTER:
205
347
  switch(next.value.segment) {
206
348
  case ' ': case '\r': case '\n': case '\t':
349
+ Accumulator.start(accumulatorState, next.value.index)
207
350
  stack.push(STATE.ELEMENT_AFTER)
208
351
  state = STATE.WS
209
352
  break
@@ -217,24 +360,29 @@ export class JSONTokenizer {
217
360
  state = stack.pop()
218
361
  break
219
362
  default:
220
- yield { type: TOKEN.ERROR, value: next.value.segment }
363
+ yield { type: TOKEN_ERROR, value: next.value.segment, start: next.value.index, end: next.value.index, cause: 'expect close, comma or whitespace' }
221
364
  next = segmentIter.next()
365
+ state = stack.pop()
222
366
  break
223
367
  }
224
368
  break
225
369
  case STATE.WS:
226
370
  switch(next.value.segment) {
227
371
  case ' ': case '\r': case '\n': case '\t':
228
- accumulator += next.value.segment
372
+
373
+ if(next.value.segment === '\n') {
374
+ line += 1
375
+ }
376
+
377
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
229
378
  next = segmentIter.next()
230
379
  if(next.done) {
231
- yield { type: TOKEN.WHITESPACE, value: accumulator }
232
- accumulator = EMPTY
380
+ yield { type: TOKEN_WHITESPACE, ...Accumulator.end(accumulatorState) }
381
+ state = stack.pop()
233
382
  }
234
383
  break
235
384
  default:
236
- yield { type: TOKEN.WHITESPACE, value: accumulator }
237
- accumulator = EMPTY
385
+ yield { type: TOKEN_WHITESPACE, ...Accumulator.end(accumulatorState) }
238
386
  state = stack.pop()
239
387
  break
240
388
  }
@@ -242,157 +390,154 @@ export class JSONTokenizer {
242
390
  case STATE.VALUE:
243
391
  switch(next.value.segment) {
244
392
  case '{':
245
- yield { type: TOKEN.OBJECT_OPEN, value: next.value.segment }
393
+ yield { type: TOKEN_OBJECT_OPEN, value: next.value.segment, start: next.value.index, end: next.value.index }
246
394
  state = STATE.OBJ_OPEN
247
395
  next = segmentIter.next()
248
396
  break
249
397
  case '[':
250
- yield { type: TOKEN.ARRAY_OPEN, value: next.value.segment }
398
+ yield { type: TOKEN_ARRAY_OPEN, value: next.value.segment, start: next.value.index, end: next.value.index }
251
399
  state = STATE.ARY_OPEN
252
400
  next = segmentIter.next()
253
401
  break
254
402
  case '"':
255
- yield { type: TOKEN.STRING_OPEN, value: next.value.segment }
256
- accumulator = EMPTY
403
+ yield { type: TOKEN_STRING_OPEN, value: next.value.segment, start: next.value.index, end: next.value.index }
257
404
  state = STATE.STR
258
405
  next = segmentIter.next()
406
+ if(!next.done) { Accumulator.start(accumulatorState, next.value.index) }
259
407
  break
260
408
  case 't':
261
- accumulator = 't'
409
+ Accumulator.start(accumulatorState, next.value.index)
410
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
262
411
 
263
412
  next = segmentIter.next()
264
413
  if(next.done || next.value.segment !== 'r') {
265
- yield { type: TOKEN.ERROR, value: accumulator }
266
- accumulator = EMPTY
414
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected true literal (r)' }
267
415
  state = stack.pop()
268
416
  break
269
417
  }
270
- accumulator += next.value?.segment
418
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
271
419
 
272
420
  next = segmentIter.next()
273
421
  if(next.done || next.value.segment !== 'u') {
274
- yield { type: TOKEN.ERROR, value: accumulator }
275
- accumulator = EMPTY
422
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected true literal (u)' }
276
423
  state = stack.pop()
277
424
  break
278
425
  }
279
- accumulator += next.value?.segment
426
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
280
427
 
281
428
  next = segmentIter.next()
282
429
  if(next.done || next.value.segment !== 'e') {
283
- yield { type: TOKEN.ERROR, value: accumulator }
284
- accumulator = EMPTY
430
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected true literal (e)' }
285
431
  state = stack.pop()
286
432
  break
287
433
  }
288
- accumulator += next.value?.segment
434
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
289
435
 
290
- yield { type: TOKEN.TRUE, value: accumulator }
291
- accumulator = EMPTY
436
+ yield { type: TOKEN_TRUE, ...Accumulator.end(accumulatorState) }
292
437
 
293
438
  next = segmentIter.next()
294
439
  state = stack.pop()
295
440
  break
296
441
  case 'f':
297
- accumulator = 'f'
442
+ Accumulator.start(accumulatorState, next.value.index)
443
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
298
444
 
299
445
  next = segmentIter.next()
300
446
  if(next.done || next.value.segment !== 'a') {
301
- yield { type: TOKEN.ERROR, value: accumulator }
302
- accumulator = EMPTY
447
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected false literal (a)' }
303
448
  state = stack.pop()
304
449
  break
305
450
  }
306
- accumulator += next.value?.segment
451
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
307
452
 
308
453
  next = segmentIter.next()
309
454
  if(next.done || next.value.segment !== 'l') {
310
- yield { type: TOKEN.ERROR, value: accumulator }
311
- accumulator = EMPTY
455
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected false literal (l)' }
312
456
  state = stack.pop()
313
457
  break
314
458
  }
315
- accumulator += next.value?.segment
459
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
316
460
 
317
461
  next = segmentIter.next()
318
462
  if(next.done || next.value.segment !== 's') {
319
- yield { type: TOKEN.ERROR, value: accumulator }
320
- accumulator = EMPTY
463
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected false literal (s)' }
321
464
  state = stack.pop()
322
465
  break
323
466
  }
324
- accumulator += next.value?.segment
467
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
325
468
 
326
469
  next = segmentIter.next()
327
470
  if(next.done || next.value.segment !== 'e') {
328
- yield { type: TOKEN.ERROR, value: accumulator }
329
- accumulator = EMPTY
471
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected false literal (e)' }
330
472
  state = stack.pop()
331
473
  break
332
474
  }
333
- accumulator += next.value?.segment
475
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
334
476
 
335
- yield { type: TOKEN.FALSE, value: accumulator }
336
- accumulator = EMPTY
477
+ yield { type: TOKEN_FALSE, ...Accumulator.end(accumulatorState) }
337
478
 
338
479
  next = segmentIter.next()
339
480
  state = stack.pop()
340
481
  break
341
482
  case 'n':
342
- accumulator = 'n'
483
+ Accumulator.start(accumulatorState, next.value.index)
484
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
343
485
 
344
486
  next = segmentIter.next()
345
487
  if(next.done || next.value.segment !== 'u') {
346
- yield { type: TOKEN.ERROR, value: accumulator }
347
- accumulator = EMPTY
488
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected null literal (u)' }
348
489
  state = stack.pop()
349
490
  break
350
491
  }
351
- accumulator += next.value?.segment
492
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
352
493
 
353
494
  next = segmentIter.next()
354
495
  if(next.done || next.value.segment !== 'l') {
355
- yield { type: TOKEN.ERROR, value: accumulator }
356
- accumulator = EMPTY
496
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected null literal (l)' }
357
497
  state = stack.pop()
358
498
  break
359
499
  }
360
- accumulator += next.value?.segment
500
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
361
501
 
362
502
  next = segmentIter.next()
363
503
  if(next.done || next.value.segment !== 'l') {
364
- yield { type: TOKEN.ERROR, value: accumulator }
365
- accumulator = EMPTY
504
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected null literal (l)' }
366
505
  state = stack.pop()
367
506
  break
368
507
  }
369
- accumulator += next.value?.segment
508
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
370
509
 
371
- yield { type: TOKEN.NULL, value: accumulator }
372
- accumulator = EMPTY
510
+ yield { type: TOKEN_NULL, ...Accumulator.end(accumulatorState) }
373
511
 
374
512
  next = segmentIter.next()
375
513
  state = stack.pop()
376
514
  break
377
515
  case '-':
378
- accumulator = next.value.segment
516
+ Accumulator.start(accumulatorState, next.value.index)
517
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
379
518
  state = STATE.NUMBER
380
519
  next = segmentIter.next()
381
520
  break
382
521
  case '0':
383
- accumulator = next.value.segment
522
+ Accumulator.start(accumulatorState, next.value.index)
523
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
524
+
384
525
  state = STATE.NUMBER_INT_AFTER
385
526
  next = segmentIter.next()
527
+ if(next.done) {
528
+ yield { type: TOKEN_NUMBER, ...Accumulator.end(accumulatorState) }
529
+ state = stack.pop()
530
+ }
386
531
  break
387
532
  case '1': case '2': case '3':
388
533
  case '4': case '5': case '6':
389
534
  case '7': case '8': case '9':
390
- accumulator = EMPTY
535
+ Accumulator.start(accumulatorState, next.value.index)
391
536
  state = STATE.NUMBER_INT
392
537
  break
393
538
  default:
394
539
  //
395
- yield { type: TOKEN.ERROR, value: next.value.segment }
540
+ yield { type: TOKEN_ERROR, value: next.value.segment, start: next.value.index, end: next.value.index, cause: 'unexpected value' }
396
541
  next = segmentIter.next()
397
542
  break
398
543
  }
@@ -400,7 +545,7 @@ export class JSONTokenizer {
400
545
  case STATE.NUMBER:
401
546
  switch(next.value.segment) {
402
547
  case '0':
403
- accumulator += next.value.segment
548
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
404
549
  state = STATE.NUMBER_INT_AFTER
405
550
  next = segmentIter.next()
406
551
  break
@@ -414,14 +559,13 @@ export class JSONTokenizer {
414
559
  case '1': case '2': case '3':
415
560
  case '4': case '5': case '6':
416
561
  case '7': case '8': case '9':
417
- accumulator += next.value.segment
562
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
418
563
  state = STATE.NUMBER_INT
419
564
  next = segmentIter.next()
420
565
  break
421
566
  default:
422
- accumulator += next.value.segment
423
- yield { type: TOKEN.ERROR, value: accumulator }
424
- accumulator = EMPTY
567
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
568
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid number format' }
425
569
  next = segmentIter.next()
426
570
  break
427
571
  }
@@ -432,12 +576,12 @@ export class JSONTokenizer {
432
576
  case '1': case '2': case '3':
433
577
  case '4': case '5': case '6':
434
578
  case '7': case '8': case '9':
435
- accumulator += next.value.segment
579
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
436
580
  next = segmentIter.next()
437
581
 
438
582
  if(next.done) {
439
- yield { type: TOKEN.NUMBER, value: accumulator }
440
- accumulator = EMPTY
583
+ yield { type: TOKEN_NUMBER, ...Accumulator.end(accumulatorState) }
584
+ state = stack.pop()
441
585
  }
442
586
  break
443
587
  default:
@@ -448,19 +592,18 @@ export class JSONTokenizer {
448
592
  case STATE.NUMBER_INT_AFTER:
449
593
  switch(next.value.segment) {
450
594
  case '.':
451
- accumulator += next.value.segment
595
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
452
596
  next = segmentIter.next()
453
597
  state = STATE.NUMBER_DECIMAL_FIRST
454
598
  break
455
599
  case 'e':
456
600
  case 'E':
457
- accumulator += next.value.segment
601
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
458
602
  state = STATE.NUMBER_EXPONENT_SIGN
459
603
  next = segmentIter.next()
460
604
  break
461
605
  default:
462
- yield { type: TOKEN.NUMBER, value: accumulator }
463
- accumulator = EMPTY
606
+ yield { type: TOKEN_NUMBER, ...Accumulator.end(accumulatorState) }
464
607
  state = stack.pop()
465
608
  break
466
609
  }
@@ -471,20 +614,20 @@ export class JSONTokenizer {
471
614
  case '1': case '2': case '3':
472
615
  case '4': case '5': case '6':
473
616
  case '7': case '8': case '9':
474
- accumulator += next.value.segment
617
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
475
618
  next = segmentIter.next()
476
619
  state = STATE.NUMBER_DECIMAL
477
620
 
478
621
  if(next.done) {
479
- yield { type: TOKEN.NUMBER, value: accumulator }
480
- accumulator = EMPTY
622
+ yield { type: TOKEN_NUMBER, ...Accumulator.end(accumulatorState) }
623
+ state = stack.pop()
481
624
  }
482
625
  break
483
626
  default:
484
- accumulator += next.value.segment
485
- yield { type: TOKEN.ERROR, value: accumulator}
486
- accumulator = EMPTY
627
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
628
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid decimal format' }
487
629
  next = segmentIter.next()
630
+ state = stack.pop()
488
631
  break
489
632
  }
490
633
  break
@@ -494,23 +637,22 @@ export class JSONTokenizer {
494
637
  case '1': case '2': case '3':
495
638
  case '4': case '5': case '6':
496
639
  case '7': case '8': case '9':
497
- accumulator += next.value.segment
640
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
498
641
  next = segmentIter.next()
499
642
 
500
643
  if(next.done) {
501
- yield { type: TOKEN.NUMBER, value: accumulator }
502
- accumulator = EMPTY
644
+ yield { type: TOKEN_NUMBER, ...Accumulator.end(accumulatorState) }
645
+ state = stack.pop()
503
646
  }
504
647
  break
505
648
  case 'e':
506
649
  case 'E':
507
- accumulator += next.value.segment
650
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
508
651
  state = STATE.NUMBER_EXPONENT_SIGN
509
652
  next = segmentIter.next()
510
653
  break
511
654
  default:
512
- yield { type: TOKEN.NUMBER, value: accumulator }
513
- accumulator = EMPTY
655
+ yield { type: TOKEN_NUMBER, ...Accumulator.end(accumulatorState) }
514
656
  state = stack.pop()
515
657
  break
516
658
  }
@@ -518,12 +660,12 @@ export class JSONTokenizer {
518
660
  case STATE.NUMBER_EXPONENT_SIGN:
519
661
  switch(next.value.segment) {
520
662
  case '+':
521
- accumulator += next.value.segment
663
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
522
664
  next = segmentIter.next()
523
665
  state = STATE.NUMBER_EXPONENT_FIRST
524
666
  break
525
667
  case '-':
526
- accumulator += next.value.segment
668
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
527
669
  next = segmentIter.next()
528
670
  state = STATE.NUMBER_EXPONENT_FIRST
529
671
  break
@@ -538,15 +680,20 @@ export class JSONTokenizer {
538
680
  case '1': case '2': case '3':
539
681
  case '4': case '5': case '6':
540
682
  case '7': case '8': case '9':
541
- accumulator += next.value.segment
683
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
542
684
  next = segmentIter.next()
543
685
  state = STATE.NUMBER_EXPONENT
686
+
687
+ if(next.done) {
688
+ yield { type: TOKEN_NUMBER, ...Accumulator.end(accumulatorState) }
689
+ state = stack.pop()
690
+ }
544
691
  break
545
692
  default:
546
- accumulator += next.value.segment
547
- yield { type: TOKEN.ERROR, value: accumulator }
548
- accumulator = EMPTY
693
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
694
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid exponent format' }
549
695
  next = segmentIter.next()
696
+ state = stack.pop()
550
697
  break
551
698
  }
552
699
  break
@@ -556,12 +703,16 @@ export class JSONTokenizer {
556
703
  case '1': case '2': case '3':
557
704
  case '4': case '5': case '6':
558
705
  case '7': case '8': case '9':
559
- accumulator += next.value.segment
706
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
560
707
  next = segmentIter.next()
708
+
709
+ if(next.done) {
710
+ yield { type: TOKEN_NUMBER, ...Accumulator.end(accumulatorState) }
711
+ state = stack.pop()
712
+ }
561
713
  break
562
714
  default:
563
- yield { type: TOKEN.NUMBER, value: accumulator }
564
- accumulator = EMPTY
715
+ yield { type: TOKEN_NUMBER, ...Accumulator.end(accumulatorState) }
565
716
  state = stack.pop()
566
717
  break
567
718
  }
@@ -569,11 +720,12 @@ export class JSONTokenizer {
569
720
  case STATE.OBJ_OPEN:
570
721
  switch(next.value.segment) {
571
722
  case ' ': case '\r': case '\n': case '\t':
723
+ Accumulator.start(accumulatorState, next.value.index)
572
724
  stack.push(STATE.OBJ_OPEN)
573
725
  state = STATE.WS
574
726
  break
575
727
  case '}':
576
- yield { type: TOKEN.OBJECT_CLOSE_IMMEDIATE, value: next.value.segment }
728
+ yield { type: TOKEN_OBJECT_CLOSE_IMMEDIATE, value: next.value.segment, start: next.value.index, end: next.value.index }
577
729
  next = segmentIter.next()
578
730
  state = stack.pop()
579
731
  break
@@ -586,47 +738,62 @@ export class JSONTokenizer {
586
738
  case STATE.OBJ_CLOSE:
587
739
  switch(next.value.segment) {
588
740
  case '}':
589
- yield { type: TOKEN.OBJECT_CLOSE, value: next.value.segment }
741
+ yield { type: TOKEN_OBJECT_CLOSE, value: next.value.segment, start: next.value.index, end: next.value.index }
590
742
  next = segmentIter.next()
591
743
  state = stack.pop()
592
744
  break
593
745
  default:
594
- // todo
595
- throw new Error('expecting object close')
746
+ yield { type: TOKEN_ERROR, value: next.value.segment, start: next.value.index, end: next.value.index, cause: 'expected object close' }
747
+ next = segmentIter.next()
748
+ state = stack.pop()
596
749
  break
597
750
  }
598
751
  break
599
752
  case STATE.MEMBERS:
753
+ switch(next.value.segment){
754
+ case '}':
755
+ state = stack.pop()
756
+ break
757
+ default:
758
+ stack.push(STATE.MEMBERS_CONTINUE)
759
+ state = STATE.MEMBER
760
+ break
761
+ }
762
+ break
763
+ case STATE.MEMBERS_CONTINUE:
600
764
  switch(next.value.segment){
601
765
  case '}':
602
766
  state = stack.pop()
603
767
  break
604
768
  case ',':
605
- yield { type: TOKEN.OBJECT_MEMBER_COMMA, value: next.value.segment }
606
- stack.push(STATE.MEMBERS)
769
+ yield { type: TOKEN_OBJECT_MEMBER_COMMA, value: next.value.segment, start: next.value.index, end: next.value.index }
770
+ stack.push(STATE.MEMBERS_CONTINUE)
607
771
  state = STATE.MEMBER
608
772
  next = segmentIter.next()
609
773
  break
610
774
  default:
611
- stack.push(STATE.MEMBERS)
612
- state = STATE.MEMBER
775
+ yield { type: TOKEN_ERROR, value: next.value.segment, start: next.value.index, end: next.value.index, cause: 'expecting comma or close object' }
776
+ state = stack.pop()
777
+ next = segmentIter.next()
613
778
  break
614
779
  }
615
780
  break
616
781
  case STATE.MEMBER:
617
782
  switch(next.value.segment) {
618
783
  case ' ': case '\r': case '\n': case '\t':
784
+ Accumulator.start(accumulatorState, next.value.index)
619
785
  stack.push(STATE.MEMBER)
620
786
  state = STATE.WS
621
787
  break
622
788
  case '"':
623
- yield { type: TOKEN.OBJECT_KEY_OPEN, value: next.value.segment }
789
+ yield { type: TOKEN_OBJECT_KEY_OPEN, value: next.value.segment, start: next.value.index, end: next.value.index }
624
790
  stack.push(STATE.MEMBER_KEY_AFTER)
625
791
  state = STATE.KEY
626
792
  next = segmentIter.next()
793
+ if(!next.done) { Accumulator.start(accumulatorState, next.value.index) }
627
794
  break
628
795
  default:
629
- yield { type: TOKEN.ERROR, value: next.value.segment }
796
+ yield { type: TOKEN_ERROR, value: next.value.segment, start: next.value.index, end: next.value.index, cause: 'expected member key or whitespace' }
630
797
  next = segmentIter.next()
631
798
  break
632
799
  }
@@ -634,17 +801,18 @@ export class JSONTokenizer {
634
801
  case STATE.MEMBER_KEY_AFTER:
635
802
  switch(next.value.segment) {
636
803
  case ' ': case '\r': case '\n': case '\t':
804
+ Accumulator.start(accumulatorState, next.value.index)
637
805
  stack.push(STATE.MEMBER_KEY_AFTER)
638
806
  state = STATE.WS
639
807
  break
640
808
  case ':':
641
- yield { type: TOKEN.OBJECT_COLON, value: next.value.segment }
809
+ yield { type: TOKEN_OBJECT_COLON, value: next.value.segment, start: next.value.index, end: next.value.index }
642
810
  next = segmentIter.next()
643
811
 
644
812
  state = STATE.ELEMENT
645
813
  break
646
814
  default:
647
- yield { type: TOKEN.ERROR, value: next.value.segment }
815
+ yield { type: TOKEN_ERROR, value: next.value.segment, start: next.value.index, end: next.value.index, cause: 'expected whitespace or colon' }
648
816
  next = segmentIter.next()
649
817
  break
650
818
  }
@@ -652,16 +820,17 @@ export class JSONTokenizer {
652
820
  case STATE.ARY_OPEN:
653
821
  switch(next.value.segment) {
654
822
  case ' ': case '\r': case '\n': case '\t':
823
+ Accumulator.start(accumulatorState, next.value.index)
655
824
  stack.push(STATE.ARY_OPEN)
656
825
  state = STATE.WS
657
826
  break
658
827
  case ']':
659
- yield { type: TOKEN.ARRAY_CLOSE_IMMEDIATE, value: next.value.segment }
828
+ yield { type: TOKEN_ARRAY_CLOSE_IMMEDIATE, value: next.value.segment, start: next.value.index, end: next.value.index }
660
829
  next = segmentIter.next()
661
830
  state = stack.pop()
662
831
  break
663
832
  case ',':
664
- yield { type: TOKEN.ERROR, value: next.value.segment}
833
+ yield { type: TOKEN_ERROR, value: next.value.segment, start: next.value.index, end: next.value.index, cause: 'unexpected array comma' }
665
834
  next = segmentIter.next()
666
835
  break
667
836
  default:
@@ -673,13 +842,14 @@ export class JSONTokenizer {
673
842
  case STATE.ARY_CLOSE:
674
843
  switch(next.value.segment) {
675
844
  case ']':
676
- yield { type: TOKEN.ARRAY_CLOSE, value: next.value.segment }
845
+ yield { type: TOKEN_ARRAY_CLOSE, value: next.value.segment, start: next.value.index, end: next.value.index }
677
846
  next = segmentIter.next()
678
847
  state = stack.pop()
679
848
  break
680
849
  default:
681
- // todo
682
- throw new Error('expecting array close')
850
+ yield { type: TOKEN_ERROR, value: next.value.segment, start: next.value.index, end: next.value.index, cause: 'expected array close' }
851
+ next = segmentIter.next()
852
+ state = stack.pop()
683
853
  break
684
854
  }
685
855
  break
@@ -688,34 +858,31 @@ export class JSONTokenizer {
688
858
  switch(next.value.segment) {
689
859
  case '"':
690
860
  if(state === STATE.KEY) {
691
- yield { type: TOKEN.OBJECT_KEY, value: accumulator }
692
- yield { type: TOKEN.OBJECT_KEY_CLOSE, value: next.value.segment }
861
+ yield { type: TOKEN_OBJECT_KEY, ...Accumulator.end(accumulatorState) }
862
+ yield { type: TOKEN_OBJECT_KEY_CLOSE, value: next.value.segment, start: next.value.index, end: next.value.index }
693
863
  }
694
864
  else {
695
- yield { type: TOKEN.STRING, value: accumulator }
696
- yield { type: TOKEN.STRING_CLOSE, value: next.value.segment }
865
+ yield { type: TOKEN_STRING, ...Accumulator.end(accumulatorState) }
866
+ yield { type: TOKEN_STRING_CLOSE, value: next.value.segment, start: next.value.index, end: next.value.index }
697
867
  }
698
868
 
699
- accumulator = EMPTY
700
869
  next = segmentIter.next()
701
870
  state = stack.pop()
702
871
  break
703
872
  case '\\':
704
- accumulator += next.value.segment
873
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
705
874
 
706
875
  next = segmentIter.next()
707
876
  if(next.done) {
708
- yield { type: TOKEN.ERROR, value: accumulator }
709
- accumulator = EMPTY
877
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence' }
710
878
  state = stack.pop()
711
879
  break
712
880
  }
713
881
 
714
- accumulator += next.value.segment
882
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
715
883
 
716
884
  if(!JSONTokenizer.isValueEscapeChar(next.value?.segment)) {
717
- yield { type: TOKEN.ERROR, value: accumulator }
718
- accumulator = EMPTY
885
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape character' }
719
886
  }
720
887
 
721
888
  if(next.value.segment === 'u') {
@@ -726,11 +893,10 @@ export class JSONTokenizer {
726
893
  next = segmentIter.next()
727
894
  break
728
895
  default:
729
- accumulator += next.value.segment
896
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
730
897
 
731
898
  if(!JSONTokenizer.isValidChar(next.value.segment)) {
732
- yield { type: TOKEN.ERROR, value: accumulator }
733
- accumulator = EMPTY
899
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid character in string literal' }
734
900
  }
735
901
 
736
902
  next = segmentIter.next()
@@ -746,52 +912,68 @@ export class JSONTokenizer {
746
912
  case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
747
913
  case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
748
914
 
749
- accumulator += next.value.segment
915
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
750
916
 
751
917
  // second
752
918
  next = segmentIter.next()
753
919
  if(next.done) {
754
- yield { type: TOKEN.ERROR, value: accumulator }
755
- accumulator = EMPTY
920
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence (2nd done)' }
756
921
  state = stack.pop()
757
922
  break
758
923
  }
759
- accumulator += next.value?.segment
924
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
925
+ if(!JSONTokenizer.isValidHEX(next.value.segment)) {
926
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence (2nd)' }
927
+ state = stack.pop()
928
+ next = segmentIter.next()
929
+ break
930
+ }
760
931
 
761
932
  // third
762
933
  next = segmentIter.next()
763
934
  if(next.done) {
764
- yield { type: TOKEN.ERROR, value: accumulator }
765
- accumulator = EMPTY
935
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence (3rd done)' }
936
+ state = stack.pop()
937
+ break
938
+ }
939
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
940
+ if(!JSONTokenizer.isValidHEX(next.value.segment)) {
941
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence (3rd)' }
766
942
  state = stack.pop()
943
+ next = segmentIter.next()
767
944
  break
768
945
  }
769
- accumulator += next.value?.segment
770
946
 
771
947
  // fourth
772
948
  next = segmentIter.next()
773
949
  if(next.done) {
774
- yield { type: TOKEN.ERROR, value: accumulator }
775
- accumulator = EMPTY
950
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence (4th done)' }
951
+ state = stack.pop()
952
+ break
953
+ }
954
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
955
+ if(!JSONTokenizer.isValidHEX(next.value.segment)) {
956
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence (4th)' }
776
957
  state = stack.pop()
958
+ next = segmentIter.next()
777
959
  break
778
960
  }
779
- accumulator += next.value?.segment
780
961
 
781
962
  state = stack.pop()
782
963
  next = segmentIter.next()
783
964
  break
784
965
  default:
785
- accumulator += next.value.segment
786
- yield { type: TOKEN.ERROR, value: accumulator }
787
- accumulator = EMPTY
966
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
967
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence' }
968
+
969
+ state = stack.pop()
788
970
  next = segmentIter.next()
789
971
  break
790
972
  }
791
973
  break
792
974
  default:
793
- // todo
794
- throw new Error(`unknown state ${state}`)
975
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: `unknown state ${state}` }
976
+ break
795
977
  }
796
978
  }
797
979
  }