@johntalton/json-tokenizer 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +56 -57
  2. package/package.json +1 -1
  3. package/src/index.js +338 -174
package/README.md CHANGED
@@ -4,10 +4,9 @@ Generator function that tokenizes string based on JSON format.
4
4
 
5
5
  - Uses Generator based API
6
6
  - Produces tokens for all input text (including error tokens)
7
- - Uses `Intl.Segmenter` with `'grapheme'` granularity
8
- - Segmenter locale can be custom set (default: `'en-US'`)
9
7
  - Allows for `AbortSignal` to control termination
10
8
  - Best effort to match `JSON.parse` restriction
9
+ - Start and End position for errors
11
10
 
12
11
  ## Example
13
12
 
@@ -42,60 +41,60 @@ for(const token of stream) {
42
41
  console.log(token)
43
42
  }
44
43
  /*
45
- { type: 'object-open', value: '{' }
46
- { type: 'whitespace', value: '\n\t' }
47
- { type: 'open-key-quote', value: '"' }
48
- { type: 'key', value: 'team' }
49
- { type: 'close-key-quote', value: '"' }
50
- { type: 'colon', value: ':' }
51
- { type: 'whitespace', value: ' ' }
52
- { type: 'open-string-quote', value: '"' }
53
- { type: 'string', value: 'Mystery Inc' }
54
- { type: 'close-string-quote', value: '"' }
55
- { type: 'object-member-comma', value: ',' }
56
- { type: 'whitespace', value: '\n\t' }
57
- { type: 'open-key-quote', value: '"' }
58
- { type: 'key', value: 'members' }
59
- { type: 'close-key-quote', value: '"' }
60
- { type: 'colon', value: ':' }
61
- { type: 'whitespace', value: ' ' }
62
- { type: 'array-open', value: '[' }
63
- { type: 'whitespace', value: '\n\t\t' }
64
- { type: 'open-string-quote', value: '"' }
65
- { type: 'string', value: 'Fred' }
66
- { type: 'close-string-quote', value: '"' }
67
- { type: 'array-element-comma', value: ',' }
68
- { type: 'whitespace', value: '\n\t\t' }
69
- { type: 'open-string-quote', value: '"' }
70
- { type: 'string', value: 'Daphne' }
71
- { type: 'close-string-quote', value: '"' }
72
- { type: 'array-element-comma', value: ',' }
73
- { type: 'whitespace', value: '\n\t\t' }
74
- { type: 'open-string-quote', value: '"' }
75
- { type: 'string', value: 'Velma' }
76
- { type: 'close-string-quote', value: '"' }
77
- { type: 'array-element-comma', value: ',' }
78
- { type: 'whitespace', value: '\n\t\t' }
79
- { type: 'open-string-quote', value: '"' }
80
- { type: 'string', value: 'Shaggy' }
81
- { type: 'close-string-quote', value: '"' }
82
- { type: 'array-element-comma', value: ',' }
83
- { type: 'whitespace', value: '\n\t\t' }
84
- { type: 'open-string-quote', value: '"' }
85
- { type: 'string', value: 'Scooby' }
86
- { type: 'close-string-quote', value: '"' }
87
- { type: 'whitespace', value: '\n\t' }
88
- { type: 'array-close', value: ']' }
89
- { type: 'object-member-comma', value: ',' }
90
- { type: 'whitespace', value: '\n\t' }
91
- { type: 'open-key-quote', value: '"' }
92
- { type: 'key', value: 'aired' }
93
- { type: 'close-key-quote', value: '"' }
94
- { type: 'colon', value: ':' }
95
- { type: 'whitespace', value: ' ' }
96
- { type: 'number', value: '1969' }
97
- { type: 'whitespace', value: '\n' }
98
- { type: 'object-close', value: '}' }
99
- { type: 'eof', value: '' }
44
+ { type: 'object-open', value: '{', start: 0, end: 0 }
45
+ { type: 'whitespace', value: '\n\t', start: 1, end: 2 }
46
+ { type: 'open-key-quote', value: '"', start: 3, end: 3 }
47
+ { type: 'key', value: 'team', start: 4, end: 7 }
48
+ { type: 'close-key-quote', value: '"', start: 8, end: 8 }
49
+ { type: 'colon', value: ':', start: 9, end: 9 }
50
+ { type: 'whitespace', value: ' ', start: 10, end: 10 }
51
+ { type: 'open-string-quote', value: '"', start: 11, end: 11 }
52
+ { type: 'string', value: 'Mystery Inc', start: 12, end: 22 }
53
+ { type: 'close-string-quote', value: '"', start: 23, end: 23 }
54
+ { type: 'object-member-comma', value: ',', start: 24, end: 24 }
55
+ { type: 'whitespace', value: '\n\t', start: 25, end: 26 }
56
+ { type: 'open-key-quote', value: '"', start: 27, end: 27 }
57
+ { type: 'key', value: 'members', start: 28, end: 34 }
58
+ { type: 'close-key-quote', value: '"', start: 35, end: 35 }
59
+ { type: 'colon', value: ':', start: 36, end: 36 }
60
+ { type: 'whitespace', value: ' ', start: 37, end: 37 }
61
+ { type: 'array-open', value: '[', start: 38, end: 38 }
62
+ { type: 'whitespace', value: '\n\t\t', start: 39, end: 41 }
63
+ { type: 'open-string-quote', value: '"', start: 42, end: 42 }
64
+ { type: 'string', value: 'Fred', start: 43, end: 46 }
65
+ { type: 'close-string-quote', value: '"', start: 47, end: 47 }
66
+ { type: 'array-element-comma', value: ',', start: 48, end: 48 }
67
+ { type: 'whitespace', value: '\n\t\t', start: 49, end: 51 }
68
+ { type: 'open-string-quote', value: '"', start: 52, end: 52 }
69
+ { type: 'string', value: 'Daphne', start: 53, end: 58 }
70
+ { type: 'close-string-quote', value: '"', start: 59, end: 59 }
71
+ { type: 'array-element-comma', value: ',', start: 60, end: 60 }
72
+ { type: 'whitespace', value: '\n\t\t', start: 61, end: 63 }
73
+ { type: 'open-string-quote', value: '"', start: 64, end: 64 }
74
+ { type: 'string', value: 'Velma', start: 65, end: 69 }
75
+ { type: 'close-string-quote', value: '"', start: 70, end: 70 }
76
+ { type: 'array-element-comma', value: ',', start: 71, end: 71 }
77
+ { type: 'whitespace', value: '\n\t\t', start: 72, end: 74 }
78
+ { type: 'open-string-quote', value: '"', start: 75, end: 75 }
79
+ { type: 'string', value: 'Shaggy', start: 76, end: 81 }
80
+ { type: 'close-string-quote', value: '"', start: 82, end: 82 }
81
+ { type: 'array-element-comma', value: ',', start: 83, end: 83 }
82
+ { type: 'whitespace', value: '\n\t\t', start: 84, end: 86 }
83
+ { type: 'open-string-quote', value: '"', start: 87, end: 87 }
84
+ { type: 'string', value: 'Scooby', start: 88, end: 93 }
85
+ { type: 'close-string-quote', value: '"', start: 94, end: 94 }
86
+ { type: 'whitespace', value: '\n\t', start: 95, end: 96 }
87
+ { type: 'array-close', value: ']', start: 97, end: 97 }
88
+ { type: 'object-member-comma', value: ',', start: 98, end: 98 }
89
+ { type: 'whitespace', value: '\n\t', start: 99, end: 100 }
90
+ { type: 'open-key-quote', value: '"', start: 101, end: 101 }
91
+ { type: 'key', value: 'aired', start: 102, end: 106 }
92
+ { type: 'close-key-quote', value: '"', start: 107, end: 107 }
93
+ { type: 'colon', value: ':', start: 108, end: 108 }
94
+ { type: 'whitespace', value: ' ', start: 109, end: 109 }
95
+ { type: 'number', value: '1969', start: 110, end: 113 }
96
+ { type: 'whitespace', value: '\n', start: 114, end: 114 }
97
+ { type: 'object-close', value: '}', start: 115, end: 115 }
98
+ { type: 'eof', value: '', start: null, end: 116 }
100
99
  */
101
100
  ```
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@johntalton/json-tokenizer",
3
3
  "type": "module",
4
- "version": "1.0.1",
4
+ "version": "1.1.0",
5
5
  "license": "MIT",
6
6
  "exports": {
7
7
  ".": "./src/index.js"
package/src/index.js CHANGED
@@ -1,44 +1,87 @@
1
1
  /**
2
2
  * @typedef {Object} TokenizerOptions
3
- * @property {string|undefined} [locale = 'en-US']
4
3
  * @property {AbortSignal|undefined} [signal]
5
4
  * @property {boolean|undefined} [debug]
6
5
  */
7
6
 
8
- export const TOKEN = {
9
- ERROR: 'error',
10
- EOF: 'eof',
11
- WHITESPACE: 'whitespace',
7
+ /**
8
+ * @typedef {Object} TokenBase
9
+ * @property {string} type
10
+ * @property {string} value
11
+ * @property {number|null} start
12
+ * @property {number|null} end
13
+ */
14
+
15
+ /**
16
+ * @typedef {Object} ErrorTokenBase
17
+ * @property {'error'} type
18
+ * @property {string} cause
19
+ */
20
+ /** @typedef {TokenBase & ErrorTokenBase} ErrorToken */
21
+ /** @typedef {TokenBase & { type: 'eof' }} EOFToken */
22
+ /** @typedef {TokenBase & { type: 'whitespace' }} WhitespaceToken */
23
+ /** @typedef {TokenBase & { type: 'array-open' }} ArrayOpenToken */
24
+ /** @typedef {TokenBase & { type: 'array-close' }} ArrayCloseToken */
25
+ /** @typedef {TokenBase & { type: 'array-close-immediate' }} ArrayCloseImmediateToken */
26
+ /** @typedef {TokenBase & { type: 'array-element-comma' }} ArrayCommaToken */
27
+ /** @typedef {TokenBase & { type: 'object-open' }} ObjectOpenToken */
28
+ /** @typedef {TokenBase & { type: 'object-close' }} ObjectCloseToken */
29
+ /** @typedef {TokenBase & { type: 'object-close-immediate' }} ObjectCloseImmediateToken */
30
+ /** @typedef {TokenBase & { type: 'object-member-comma' }} ObjectCommaToken */
31
+ /** @typedef {TokenBase & { type: 'open-key-quote' }} OpenKeyQuoteToken */
32
+ /** @typedef {TokenBase & { type: 'key' }} KeyToken */
33
+ /** @typedef {TokenBase & { type: 'close-key-quote' }} CloseKeyQuoteToken */
34
+ /** @typedef {TokenBase & { type: 'colon' }} ColonToken */
35
+ /** @typedef {TokenBase & { type: 'open-string-quote' }} OpenStringQuoteToken */
36
+ /** @typedef {TokenBase & { type: 'string' }} StringToken */
37
+ /** @typedef {TokenBase & { type: 'close-string-quote' }} CloseStringQuoteToken */
38
+ /** @typedef {TokenBase & { type: 'true' }} TrueToken */
39
+ /** @typedef {TokenBase & { type: 'false' }} FalseToken */
40
+ /** @typedef {TokenBase & { type: 'null' }} NULLToken */
41
+ /** @typedef {TokenBase & { type: 'number' }} NumberToken */
42
+
43
+ /** @typedef {EOFToken|WhitespaceToken|ErrorToken|ArrayOpenToken|ArrayCloseToken|ArrayCloseImmediateToken|ArrayCommaToken|ObjectOpenToken|ObjectCloseToken|ObjectCloseImmediateToken|ObjectCommaToken|OpenKeyQuoteToken|KeyToken|CloseKeyQuoteToken|ColonToken|OpenStringQuoteToken|StringToken|CloseStringQuoteToken|TrueToken|FalseToken|NULLToken|NumberToken} Token */
44
+
45
+ /**
46
+ * @typedef {Object} AccumulationState
47
+ * @property {string} value
48
+ * @property {number|null} start
49
+ * @property {number|null} end
50
+ */
51
+
52
+ export const TOKEN_ERROR = 'error'
53
+ export const TOKEN_EOF = 'eof'
54
+ export const TOKEN_WHITESPACE = 'whitespace'
12
55
 
13
56
  // Array / Elements
14
- ARRAY_OPEN: 'array-open',
15
- ARRAY_CLOSE: 'array-close',
16
- ARRAY_CLOSE_IMMEDIATE: 'array-close-immediate',
17
- ARRAY_ELEMENT_COMMA: 'array-element-comma',
57
+ export const TOKEN_ARRAY_OPEN = 'array-open'
58
+ export const TOKEN_ARRAY_CLOSE = 'array-close'
59
+ export const TOKEN_ARRAY_CLOSE_IMMEDIATE = 'array-close-immediate'
60
+ export const TOKEN_ARRAY_ELEMENT_COMMA = 'array-element-comma'
18
61
 
19
62
  // Object
20
- OBJECT_OPEN: 'object-open',
21
- OBJECT_CLOSE: 'object-close',
22
- OBJECT_CLOSE_IMMEDIATE: 'object-close-immediate',
23
- OBJECT_MEMBER_COMMA: 'object-member-comma',
24
- OBJECT_KEY_OPEN: 'open-key-quote',
25
- OBJECT_KEY: 'key',
26
- OBJECT_KEY_CLOSE: 'close-key-quote',
27
- OBJECT_COLON: 'colon',
63
+ export const TOKEN_OBJECT_OPEN = 'object-open'
64
+ export const TOKEN_OBJECT_CLOSE = 'object-close'
65
+ export const TOKEN_OBJECT_CLOSE_IMMEDIATE = 'object-close-immediate'
66
+ export const TOKEN_OBJECT_MEMBER_COMMA = 'object-member-comma'
67
+ export const TOKEN_OBJECT_KEY_OPEN = 'open-key-quote'
68
+ export const TOKEN_OBJECT_KEY = 'key'
69
+ export const TOKEN_OBJECT_KEY_CLOSE = 'close-key-quote'
70
+ export const TOKEN_OBJECT_COLON = 'colon'
28
71
 
29
72
  // String
30
- STRING_OPEN: 'open-string-quote',
31
- STRING: 'string',
32
- STRING_CLOSE: 'close-string-quote',
73
+ export const TOKEN_STRING_OPEN = 'open-string-quote'
74
+ export const TOKEN_STRING = 'string'
75
+ export const TOKEN_STRING_CLOSE = 'close-string-quote'
33
76
 
34
77
  // Primitives
35
- TRUE: 'true',
36
- FALSE: 'false',
37
- NULL: 'null',
78
+ export const TOKEN_TRUE = 'true'
79
+ export const TOKEN_FALSE = 'false'
80
+ export const TOKEN_NULL = 'null'
38
81
 
39
82
  // Number
40
- NUMBER: 'number'
41
- }
83
+ export const TOKEN_NUMBER = 'number'
84
+
42
85
 
43
86
  export const EMPTY = ''
44
87
 
@@ -53,6 +96,7 @@ export const STATE = {
53
96
  ARY_OPEN: 'ao',
54
97
  ARY_CLOSE: 'ac',
55
98
  MEMBERS: 'ms',
99
+ MEMBERS_CONTINUE: 'msc',
56
100
  MEMBER: 'm',
57
101
  KEY: 'key',
58
102
  MEMBER_KEY_AFTER: 'mka',
@@ -73,11 +117,64 @@ export const STATE = {
73
117
  U_HEX4: 'hex'
74
118
  }
75
119
 
120
+ export const EXIT_STATES = [ STATE.ELEMENT_AFTER ]
121
+
76
122
  export const ESCAPE_CHARS = [
77
123
  '"', '\\', '/', 'b', 'f', 'n', 'r', 't', 'u'
78
124
  ]
79
125
 
80
- export const DEFAULT_LOCALE = 'en-US'
126
+ export const HEX_CHARS = [
127
+ '1', '2', '3', '4', '5', '6', '7', '8', '9', '0',
128
+ 'a', 'b', 'c', 'd', 'e', 'f',
129
+ 'A', 'B', 'C', 'D', 'E', 'F',
130
+ ]
131
+
132
+ export class Accumulator {
133
+ /**
134
+ * @param {AccumulationState} accumulationState
135
+ */
136
+ static #reset(accumulationState) {
137
+ accumulationState.value = EMPTY
138
+ accumulationState.start = null
139
+ accumulationState.end = null
140
+ }
141
+
142
+ /**
143
+ * @param {AccumulationState} accumulationState
144
+ * @param {number} start
145
+ */
146
+ static start(accumulationState, start) {
147
+ accumulationState.value = EMPTY
148
+ accumulationState.start = start
149
+ accumulationState.end = start
150
+ }
151
+
152
+ /**
153
+ * @param {AccumulationState} accumulationState
154
+ * @param {string} value
155
+ * @param {number} end
156
+ */
157
+ static accumulate(accumulationState, value, end) {
158
+ accumulationState.value += value
159
+ accumulationState.end = end
160
+ }
161
+
162
+ /**
163
+ * @param {AccumulationState} accumulationState
164
+ */
165
+ static end(accumulationState) {
166
+ const result = { ...accumulationState }
167
+ Accumulator.#reset(accumulationState)
168
+ return result
169
+ }
170
+
171
+ /**
172
+ * @param {AccumulationState} accumulationState
173
+ */
174
+ static empty(accumulationState) {
175
+ return accumulationState.value === EMPTY
176
+ }
177
+ }
81
178
 
82
179
  export class JSONTokenizer {
83
180
  /**
@@ -100,63 +197,107 @@ export class JSONTokenizer {
100
197
  return true
101
198
  }
102
199
 
200
+ /**
201
+ * @param {string} str
202
+ */
203
+ static isValidHEX(str) {
204
+ return HEX_CHARS.includes(str)
205
+ }
206
+
103
207
  /**
104
208
  * @param {string} str
105
209
  * @param {TokenizerOptions} [options]
210
+ * @return {Generator<Token, undefined, undefined>}
106
211
  */
107
212
  static *tokenize(str, options) {
108
213
  const debug = (options?.debug ?? false) === true
109
214
  const signal = options?.signal
110
- const locale = options?.locale ?? DEFAULT_LOCALE
111
215
 
112
- const seg = new Intl.Segmenter(locale, { granularity: 'grapheme' })
113
- const segments = seg.segment(str)
114
- using segmentIter = segments[Symbol.iterator]()
216
+ using segmentIter = Iterator.from(str)
217
+ .map((item, index) => ({
218
+ segment: item,
219
+ done: false,
220
+ index
221
+ }))
115
222
 
116
223
  /** @type {Array<string>} */
117
- const stack = []
224
+ const stack = [ ]
118
225
 
119
226
  /** @type {string|undefined} */
120
227
  let state = STATE.ELEMENT
121
228
 
122
229
  let next = segmentIter.next()
123
230
 
124
- let accumulator = EMPTY
231
+ /** @type {AccumulationState} */
232
+ const accumulatorState = {
233
+ value: EMPTY,
234
+ start: null,
235
+ end: null
236
+ }
237
+
238
+ let line = 1
125
239
 
126
240
  //
127
241
  if(next.done) {
128
- yield { type: TOKEN.ERROR, value: EMPTY }
242
+ yield { type: TOKEN_ERROR, value: EMPTY, start: 0, end: 0, cause: 'empty' }
129
243
  return
130
244
  }
131
245
 
132
246
  //
133
247
  while(true) {
134
248
  if(signal?.aborted) {
135
- //
249
+ yield { type: TOKEN_EOF, value: EMPTY, start: null, end: null }
136
250
  break
137
251
  }
138
252
 
139
253
  //
140
254
  if(next.done) {
141
- if(accumulator !== EMPTY) { yield { type: TOKEN.ERROR, value: accumulator } }
142
- if(stack.length !== 0) { yield { type: TOKEN.ERROR, value: EMPTY }}
143
- yield { type: TOKEN.EOF, value: EMPTY }
255
+ if(state === undefined) {
256
+ yield { type: TOKEN_ERROR, value: EMPTY, start: null, end: null, cause: 'undefined state' }
257
+ }
258
+
259
+ //
260
+ if(state !== undefined && !EXIT_STATES.includes(state)) {
261
+ yield { type: TOKEN_ERROR, value: EMPTY, start: null, end: str.length, cause: `invalid exit state (${state})` }
262
+ }
263
+
264
+ //
265
+ if(!Accumulator.empty(accumulatorState)) {
266
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'accumulator not empty (done)' }
267
+ }
268
+
269
+ //
270
+ if(stack.length !== 0) {
271
+ yield { type: TOKEN_ERROR, value: EMPTY, start: null, end: str.length, cause: 'stack not empty' }
272
+ }
273
+
274
+ //
275
+ yield { type: TOKEN_EOF, value: EMPTY, start: null, end: str.length }
144
276
  break
145
277
  }
146
278
 
147
279
  //
148
280
  if(state === undefined) {
149
- if(accumulator !== EMPTY) { yield { type: TOKEN.ERROR, value: accumulator } }
281
+ //
282
+ if(!Accumulator.empty(accumulatorState)) {
283
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'accumulator not empty (stack)' }
284
+ }
285
+
286
+ //
150
287
  if(!next.done) {
151
- accumulator = EMPTY
288
+ Accumulator.start(accumulatorState, next.value.index)
289
+
152
290
  while(!next.done) {
153
- accumulator += next.value.segment
291
+ if(signal?.aborted) { break }
292
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
154
293
  next = segmentIter.next()
155
294
  }
156
- yield { type: TOKEN.ERROR, value: accumulator }
295
+
296
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'accumulator not empty (not done)' }
157
297
  }
158
298
 
159
- yield { type: TOKEN.EOF, value: EMPTY }
299
+ //
300
+ yield { type: TOKEN_EOF, value: EMPTY, start: null, end: null }
160
301
  break
161
302
  }
162
303
 
@@ -164,9 +305,9 @@ export class JSONTokenizer {
164
305
  if(debug) {
165
306
  console.log({
166
307
  seg: next.value.segment,
167
- state, stack:
168
- stack.join(','),
169
- accumulator
308
+ state,
309
+ stack: stack.join(','),
310
+ ...accumulatorState
170
311
  })
171
312
  }
172
313
 
@@ -178,7 +319,7 @@ export class JSONTokenizer {
178
319
  state = stack.pop()
179
320
  break
180
321
  case ',':
181
- yield { type: TOKEN.ARRAY_ELEMENT_COMMA, value: next.value.segment }
322
+ yield { type: TOKEN_ARRAY_ELEMENT_COMMA, value: next.value.segment, start: next.value.index, end: next.value.index }
182
323
  stack.push(STATE.ELEMENTS)
183
324
  state = STATE.ELEMENT
184
325
  next = segmentIter.next()
@@ -192,6 +333,7 @@ export class JSONTokenizer {
192
333
  case STATE.ELEMENT:
193
334
  switch(next.value.segment) {
194
335
  case ' ': case '\r': case '\n': case '\t':
336
+ Accumulator.start(accumulatorState, next.value.index)
195
337
  stack.push(STATE.ELEMENT)
196
338
  state = STATE.WS
197
339
  break
@@ -204,6 +346,7 @@ export class JSONTokenizer {
204
346
  case STATE.ELEMENT_AFTER:
205
347
  switch(next.value.segment) {
206
348
  case ' ': case '\r': case '\n': case '\t':
349
+ Accumulator.start(accumulatorState, next.value.index)
207
350
  stack.push(STATE.ELEMENT_AFTER)
208
351
  state = STATE.WS
209
352
  break
@@ -217,7 +360,7 @@ export class JSONTokenizer {
217
360
  state = stack.pop()
218
361
  break
219
362
  default:
220
- yield { type: TOKEN.ERROR, value: next.value.segment }
363
+ yield { type: TOKEN_ERROR, value: next.value.segment, start: next.value.index, end: next.value.index, cause: 'expect close, comma or whitespace' }
221
364
  next = segmentIter.next()
222
365
  break
223
366
  }
@@ -225,16 +368,20 @@ export class JSONTokenizer {
225
368
  case STATE.WS:
226
369
  switch(next.value.segment) {
227
370
  case ' ': case '\r': case '\n': case '\t':
228
- accumulator += next.value.segment
371
+
372
+ if(next.value.segment === '\n') {
373
+ line += 1
374
+ }
375
+
376
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
229
377
  next = segmentIter.next()
230
378
  if(next.done) {
231
- yield { type: TOKEN.WHITESPACE, value: accumulator }
232
- accumulator = EMPTY
379
+ yield { type: TOKEN_WHITESPACE, ...Accumulator.end(accumulatorState) }
380
+ state = stack.pop()
233
381
  }
234
382
  break
235
383
  default:
236
- yield { type: TOKEN.WHITESPACE, value: accumulator }
237
- accumulator = EMPTY
384
+ yield { type: TOKEN_WHITESPACE, ...Accumulator.end(accumulatorState) }
238
385
  state = stack.pop()
239
386
  break
240
387
  }
@@ -242,157 +389,150 @@ export class JSONTokenizer {
242
389
  case STATE.VALUE:
243
390
  switch(next.value.segment) {
244
391
  case '{':
245
- yield { type: TOKEN.OBJECT_OPEN, value: next.value.segment }
392
+ yield { type: TOKEN_OBJECT_OPEN, value: next.value.segment, start: next.value.index, end: next.value.index }
246
393
  state = STATE.OBJ_OPEN
247
394
  next = segmentIter.next()
248
395
  break
249
396
  case '[':
250
- yield { type: TOKEN.ARRAY_OPEN, value: next.value.segment }
397
+ yield { type: TOKEN_ARRAY_OPEN, value: next.value.segment, start: next.value.index, end: next.value.index }
251
398
  state = STATE.ARY_OPEN
252
399
  next = segmentIter.next()
253
400
  break
254
401
  case '"':
255
- yield { type: TOKEN.STRING_OPEN, value: next.value.segment }
256
- accumulator = EMPTY
402
+ yield { type: TOKEN_STRING_OPEN, value: next.value.segment, start: next.value.index, end: next.value.index }
257
403
  state = STATE.STR
258
404
  next = segmentIter.next()
405
+ if(!next.done) { Accumulator.start(accumulatorState, next.value.index) }
259
406
  break
260
407
  case 't':
261
- accumulator = 't'
408
+ Accumulator.start(accumulatorState, next.value.index)
409
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
262
410
 
263
411
  next = segmentIter.next()
264
412
  if(next.done || next.value.segment !== 'r') {
265
- yield { type: TOKEN.ERROR, value: accumulator }
266
- accumulator = EMPTY
413
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected true literal (r)' }
267
414
  state = stack.pop()
268
415
  break
269
416
  }
270
- accumulator += next.value?.segment
417
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
271
418
 
272
419
  next = segmentIter.next()
273
420
  if(next.done || next.value.segment !== 'u') {
274
- yield { type: TOKEN.ERROR, value: accumulator }
275
- accumulator = EMPTY
421
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected true literal (u)' }
276
422
  state = stack.pop()
277
423
  break
278
424
  }
279
- accumulator += next.value?.segment
425
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
280
426
 
281
427
  next = segmentIter.next()
282
428
  if(next.done || next.value.segment !== 'e') {
283
- yield { type: TOKEN.ERROR, value: accumulator }
284
- accumulator = EMPTY
429
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected true literal (e)' }
285
430
  state = stack.pop()
286
431
  break
287
432
  }
288
- accumulator += next.value?.segment
433
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
289
434
 
290
- yield { type: TOKEN.TRUE, value: accumulator }
291
- accumulator = EMPTY
435
+ yield { type: TOKEN_TRUE, ...Accumulator.end(accumulatorState) }
292
436
 
293
437
  next = segmentIter.next()
294
438
  state = stack.pop()
295
439
  break
296
440
  case 'f':
297
- accumulator = 'f'
441
+ Accumulator.start(accumulatorState, next.value.index)
442
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
298
443
 
299
444
  next = segmentIter.next()
300
445
  if(next.done || next.value.segment !== 'a') {
301
- yield { type: TOKEN.ERROR, value: accumulator }
302
- accumulator = EMPTY
446
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected false literal (a)' }
303
447
  state = stack.pop()
304
448
  break
305
449
  }
306
- accumulator += next.value?.segment
450
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
307
451
 
308
452
  next = segmentIter.next()
309
453
  if(next.done || next.value.segment !== 'l') {
310
- yield { type: TOKEN.ERROR, value: accumulator }
311
- accumulator = EMPTY
454
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected false literal (l)' }
312
455
  state = stack.pop()
313
456
  break
314
457
  }
315
- accumulator += next.value?.segment
458
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
316
459
 
317
460
  next = segmentIter.next()
318
461
  if(next.done || next.value.segment !== 's') {
319
- yield { type: TOKEN.ERROR, value: accumulator }
320
- accumulator = EMPTY
462
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected false literal (s)' }
321
463
  state = stack.pop()
322
464
  break
323
465
  }
324
- accumulator += next.value?.segment
466
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
325
467
 
326
468
  next = segmentIter.next()
327
469
  if(next.done || next.value.segment !== 'e') {
328
- yield { type: TOKEN.ERROR, value: accumulator }
329
- accumulator = EMPTY
470
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected false literal (e)' }
330
471
  state = stack.pop()
331
472
  break
332
473
  }
333
- accumulator += next.value?.segment
474
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
334
475
 
335
- yield { type: TOKEN.FALSE, value: accumulator }
336
- accumulator = EMPTY
476
+ yield { type: TOKEN_FALSE, ...Accumulator.end(accumulatorState) }
337
477
 
338
478
  next = segmentIter.next()
339
479
  state = stack.pop()
340
480
  break
341
481
  case 'n':
342
- accumulator = 'n'
482
+ Accumulator.start(accumulatorState, next.value.index)
483
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
343
484
 
344
485
  next = segmentIter.next()
345
486
  if(next.done || next.value.segment !== 'u') {
346
- yield { type: TOKEN.ERROR, value: accumulator }
347
- accumulator = EMPTY
487
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected null literal (u)' }
348
488
  state = stack.pop()
349
489
  break
350
490
  }
351
- accumulator += next.value?.segment
491
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
352
492
 
353
493
  next = segmentIter.next()
354
494
  if(next.done || next.value.segment !== 'l') {
355
- yield { type: TOKEN.ERROR, value: accumulator }
356
- accumulator = EMPTY
495
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected null literal (l)' }
357
496
  state = stack.pop()
358
497
  break
359
498
  }
360
- accumulator += next.value?.segment
499
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
361
500
 
362
501
  next = segmentIter.next()
363
502
  if(next.done || next.value.segment !== 'l') {
364
- yield { type: TOKEN.ERROR, value: accumulator }
365
- accumulator = EMPTY
503
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected null literal (l)' }
366
504
  state = stack.pop()
367
505
  break
368
506
  }
369
- accumulator += next.value?.segment
507
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
370
508
 
371
- yield { type: TOKEN.NULL, value: accumulator }
372
- accumulator = EMPTY
509
+ yield { type: TOKEN_NULL, ...Accumulator.end(accumulatorState) }
373
510
 
374
511
  next = segmentIter.next()
375
512
  state = stack.pop()
376
513
  break
377
514
  case '-':
378
- accumulator = next.value.segment
515
+ Accumulator.start(accumulatorState, next.value.index)
516
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
379
517
  state = STATE.NUMBER
380
518
  next = segmentIter.next()
381
519
  break
382
520
  case '0':
383
- accumulator = next.value.segment
521
+ Accumulator.start(accumulatorState, next.value.index)
522
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
523
+
384
524
  state = STATE.NUMBER_INT_AFTER
385
525
  next = segmentIter.next()
386
526
  break
387
527
  case '1': case '2': case '3':
388
528
  case '4': case '5': case '6':
389
529
  case '7': case '8': case '9':
390
- accumulator = EMPTY
530
+ Accumulator.start(accumulatorState, next.value.index)
391
531
  state = STATE.NUMBER_INT
392
532
  break
393
533
  default:
394
534
  //
395
- yield { type: TOKEN.ERROR, value: next.value.segment }
535
+ yield { type: TOKEN_ERROR, value: next.value.segment, start: next.value.index, end: next.value.index, cause: 'unexpected value' }
396
536
  next = segmentIter.next()
397
537
  break
398
538
  }
@@ -400,7 +540,7 @@ export class JSONTokenizer {
400
540
  case STATE.NUMBER:
401
541
  switch(next.value.segment) {
402
542
  case '0':
403
- accumulator += next.value.segment
543
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
404
544
  state = STATE.NUMBER_INT_AFTER
405
545
  next = segmentIter.next()
406
546
  break
@@ -414,14 +554,13 @@ export class JSONTokenizer {
414
554
  case '1': case '2': case '3':
415
555
  case '4': case '5': case '6':
416
556
  case '7': case '8': case '9':
417
- accumulator += next.value.segment
557
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
418
558
  state = STATE.NUMBER_INT
419
559
  next = segmentIter.next()
420
560
  break
421
561
  default:
422
- accumulator += next.value.segment
423
- yield { type: TOKEN.ERROR, value: accumulator }
424
- accumulator = EMPTY
562
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
563
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid number format' }
425
564
  next = segmentIter.next()
426
565
  break
427
566
  }
@@ -432,12 +571,12 @@ export class JSONTokenizer {
432
571
  case '1': case '2': case '3':
433
572
  case '4': case '5': case '6':
434
573
  case '7': case '8': case '9':
435
- accumulator += next.value.segment
574
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
436
575
  next = segmentIter.next()
437
576
 
438
577
  if(next.done) {
439
- yield { type: TOKEN.NUMBER, value: accumulator }
440
- accumulator = EMPTY
578
+ yield { type: TOKEN_NUMBER, ...Accumulator.end(accumulatorState) }
579
+ state = stack.pop()
441
580
  }
442
581
  break
443
582
  default:
@@ -448,19 +587,18 @@ export class JSONTokenizer {
448
587
  case STATE.NUMBER_INT_AFTER:
449
588
  switch(next.value.segment) {
450
589
  case '.':
451
- accumulator += next.value.segment
590
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
452
591
  next = segmentIter.next()
453
592
  state = STATE.NUMBER_DECIMAL_FIRST
454
593
  break
455
594
  case 'e':
456
595
  case 'E':
457
- accumulator += next.value.segment
596
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
458
597
  state = STATE.NUMBER_EXPONENT_SIGN
459
598
  next = segmentIter.next()
460
599
  break
461
600
  default:
462
- yield { type: TOKEN.NUMBER, value: accumulator }
463
- accumulator = EMPTY
601
+ yield { type: TOKEN_NUMBER, ...Accumulator.end(accumulatorState) }
464
602
  state = stack.pop()
465
603
  break
466
604
  }
@@ -471,20 +609,20 @@ export class JSONTokenizer {
471
609
  case '1': case '2': case '3':
472
610
  case '4': case '5': case '6':
473
611
  case '7': case '8': case '9':
474
- accumulator += next.value.segment
612
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
475
613
  next = segmentIter.next()
476
614
  state = STATE.NUMBER_DECIMAL
477
615
 
478
616
  if(next.done) {
479
- yield { type: TOKEN.NUMBER, value: accumulator }
480
- accumulator = EMPTY
617
+ yield { type: TOKEN_NUMBER, ...Accumulator.end(accumulatorState) }
618
+ state = stack.pop()
481
619
  }
482
620
  break
483
621
  default:
484
- accumulator += next.value.segment
485
- yield { type: TOKEN.ERROR, value: accumulator}
486
- accumulator = EMPTY
622
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
623
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid decimal format' }
487
624
  next = segmentIter.next()
625
+ state = stack.pop()
488
626
  break
489
627
  }
490
628
  break
@@ -494,23 +632,22 @@ export class JSONTokenizer {
494
632
  case '1': case '2': case '3':
495
633
  case '4': case '5': case '6':
496
634
  case '7': case '8': case '9':
497
- accumulator += next.value.segment
635
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
498
636
  next = segmentIter.next()
499
637
 
500
638
  if(next.done) {
501
- yield { type: TOKEN.NUMBER, value: accumulator }
502
- accumulator = EMPTY
639
+ yield { type: TOKEN_NUMBER, ...Accumulator.end(accumulatorState) }
640
+ state = stack.pop()
503
641
  }
504
642
  break
505
643
  case 'e':
506
644
  case 'E':
507
- accumulator += next.value.segment
645
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
508
646
  state = STATE.NUMBER_EXPONENT_SIGN
509
647
  next = segmentIter.next()
510
648
  break
511
649
  default:
512
- yield { type: TOKEN.NUMBER, value: accumulator }
513
- accumulator = EMPTY
650
+ yield { type: TOKEN_NUMBER, ...Accumulator.end(accumulatorState) }
514
651
  state = stack.pop()
515
652
  break
516
653
  }
@@ -518,12 +655,12 @@ export class JSONTokenizer {
518
655
  case STATE.NUMBER_EXPONENT_SIGN:
519
656
  switch(next.value.segment) {
520
657
  case '+':
521
- accumulator += next.value.segment
658
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
522
659
  next = segmentIter.next()
523
660
  state = STATE.NUMBER_EXPONENT_FIRST
524
661
  break
525
662
  case '-':
526
- accumulator += next.value.segment
663
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
527
664
  next = segmentIter.next()
528
665
  state = STATE.NUMBER_EXPONENT_FIRST
529
666
  break
@@ -538,15 +675,15 @@ export class JSONTokenizer {
538
675
  case '1': case '2': case '3':
539
676
  case '4': case '5': case '6':
540
677
  case '7': case '8': case '9':
541
- accumulator += next.value.segment
678
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
542
679
  next = segmentIter.next()
543
680
  state = STATE.NUMBER_EXPONENT
544
681
  break
545
682
  default:
546
- accumulator += next.value.segment
547
- yield { type: TOKEN.ERROR, value: accumulator }
548
- accumulator = EMPTY
683
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
684
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid exponent format' }
549
685
  next = segmentIter.next()
686
+ state = stack.pop()
550
687
  break
551
688
  }
552
689
  break
@@ -556,12 +693,11 @@ export class JSONTokenizer {
556
693
  case '1': case '2': case '3':
557
694
  case '4': case '5': case '6':
558
695
  case '7': case '8': case '9':
559
- accumulator += next.value.segment
696
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
560
697
  next = segmentIter.next()
561
698
  break
562
699
  default:
563
- yield { type: TOKEN.NUMBER, value: accumulator }
564
- accumulator = EMPTY
700
+ yield { type: TOKEN_NUMBER, ...Accumulator.end(accumulatorState) }
565
701
  state = stack.pop()
566
702
  break
567
703
  }
@@ -569,11 +705,12 @@ export class JSONTokenizer {
569
705
  case STATE.OBJ_OPEN:
570
706
  switch(next.value.segment) {
571
707
  case ' ': case '\r': case '\n': case '\t':
708
+ Accumulator.start(accumulatorState, next.value.index)
572
709
  stack.push(STATE.OBJ_OPEN)
573
710
  state = STATE.WS
574
711
  break
575
712
  case '}':
576
- yield { type: TOKEN.OBJECT_CLOSE_IMMEDIATE, value: next.value.segment }
713
+ yield { type: TOKEN_OBJECT_CLOSE_IMMEDIATE, value: next.value.segment, start: next.value.index, end: next.value.index }
577
714
  next = segmentIter.next()
578
715
  state = stack.pop()
579
716
  break
@@ -586,7 +723,7 @@ export class JSONTokenizer {
586
723
  case STATE.OBJ_CLOSE:
587
724
  switch(next.value.segment) {
588
725
  case '}':
589
- yield { type: TOKEN.OBJECT_CLOSE, value: next.value.segment }
726
+ yield { type: TOKEN_OBJECT_CLOSE, value: next.value.segment, start: next.value.index, end: next.value.index }
590
727
  next = segmentIter.next()
591
728
  state = stack.pop()
592
729
  break
@@ -597,36 +734,49 @@ export class JSONTokenizer {
597
734
  }
598
735
  break
599
736
  case STATE.MEMBERS:
737
+ switch(next.value.segment){
738
+ case '}':
739
+ state = stack.pop()
740
+ break
741
+ default:
742
+ stack.push(STATE.MEMBERS_CONTINUE)
743
+ state = STATE.MEMBER
744
+ break
745
+ }
746
+ break
747
+ case STATE.MEMBERS_CONTINUE:
600
748
  switch(next.value.segment){
601
749
  case '}':
602
750
  state = stack.pop()
603
751
  break
604
752
  case ',':
605
- yield { type: TOKEN.OBJECT_MEMBER_COMMA, value: next.value.segment }
606
- stack.push(STATE.MEMBERS)
753
+ yield { type: TOKEN_OBJECT_MEMBER_COMMA, value: next.value.segment, start: next.value.index, end: next.value.index }
754
+ stack.push(STATE.MEMBERS_CONTINUE)
607
755
  state = STATE.MEMBER
608
756
  next = segmentIter.next()
609
757
  break
610
758
  default:
611
- stack.push(STATE.MEMBERS)
612
- state = STATE.MEMBER
759
+ yield { type: TOKEN_ERROR, value: next.value.segment, start: next.value.index, end: next.value.index, cause: 'expecting comma or close object' }
760
+ state = stack.pop()
613
761
  break
614
762
  }
615
763
  break
616
764
  case STATE.MEMBER:
617
765
  switch(next.value.segment) {
618
766
  case ' ': case '\r': case '\n': case '\t':
767
+ Accumulator.start(accumulatorState, next.value.index)
619
768
  stack.push(STATE.MEMBER)
620
769
  state = STATE.WS
621
770
  break
622
771
  case '"':
623
- yield { type: TOKEN.OBJECT_KEY_OPEN, value: next.value.segment }
772
+ yield { type: TOKEN_OBJECT_KEY_OPEN, value: next.value.segment, start: next.value.index, end: next.value.index }
624
773
  stack.push(STATE.MEMBER_KEY_AFTER)
625
774
  state = STATE.KEY
626
775
  next = segmentIter.next()
776
+ if(!next.done) { Accumulator.start(accumulatorState, next.value.index) }
627
777
  break
628
778
  default:
629
- yield { type: TOKEN.ERROR, value: next.value.segment }
779
+ yield { type: TOKEN_ERROR, value: next.value.segment, start: next.value.index, end: next.value.index, cause: 'expected member key or whitespace' }
630
780
  next = segmentIter.next()
631
781
  break
632
782
  }
@@ -634,17 +784,18 @@ export class JSONTokenizer {
634
784
  case STATE.MEMBER_KEY_AFTER:
635
785
  switch(next.value.segment) {
636
786
  case ' ': case '\r': case '\n': case '\t':
787
+ Accumulator.start(accumulatorState, next.value.index)
637
788
  stack.push(STATE.MEMBER_KEY_AFTER)
638
789
  state = STATE.WS
639
790
  break
640
791
  case ':':
641
- yield { type: TOKEN.OBJECT_COLON, value: next.value.segment }
792
+ yield { type: TOKEN_OBJECT_COLON, value: next.value.segment, start: next.value.index, end: next.value.index }
642
793
  next = segmentIter.next()
643
794
 
644
795
  state = STATE.ELEMENT
645
796
  break
646
797
  default:
647
- yield { type: TOKEN.ERROR, value: next.value.segment }
798
+ yield { type: TOKEN_ERROR, value: next.value.segment, start: next.value.index, end: next.value.index, cause: 'expected whitespace or colon' }
648
799
  next = segmentIter.next()
649
800
  break
650
801
  }
@@ -652,16 +803,17 @@ export class JSONTokenizer {
652
803
  case STATE.ARY_OPEN:
653
804
  switch(next.value.segment) {
654
805
  case ' ': case '\r': case '\n': case '\t':
806
+ Accumulator.start(accumulatorState, next.value.index)
655
807
  stack.push(STATE.ARY_OPEN)
656
808
  state = STATE.WS
657
809
  break
658
810
  case ']':
659
- yield { type: TOKEN.ARRAY_CLOSE_IMMEDIATE, value: next.value.segment }
811
+ yield { type: TOKEN_ARRAY_CLOSE_IMMEDIATE, value: next.value.segment, start: next.value.index, end: next.value.index }
660
812
  next = segmentIter.next()
661
813
  state = stack.pop()
662
814
  break
663
815
  case ',':
664
- yield { type: TOKEN.ERROR, value: next.value.segment}
816
+ yield { type: TOKEN_ERROR, value: next.value.segment, start: next.value.index, end: next.value.index, cause: 'unexpected array comma' }
665
817
  next = segmentIter.next()
666
818
  break
667
819
  default:
@@ -673,7 +825,7 @@ export class JSONTokenizer {
673
825
  case STATE.ARY_CLOSE:
674
826
  switch(next.value.segment) {
675
827
  case ']':
676
- yield { type: TOKEN.ARRAY_CLOSE, value: next.value.segment }
828
+ yield { type: TOKEN_ARRAY_CLOSE, value: next.value.segment, start: next.value.index, end: next.value.index }
677
829
  next = segmentIter.next()
678
830
  state = stack.pop()
679
831
  break
@@ -688,34 +840,31 @@ export class JSONTokenizer {
688
840
  switch(next.value.segment) {
689
841
  case '"':
690
842
  if(state === STATE.KEY) {
691
- yield { type: TOKEN.OBJECT_KEY, value: accumulator }
692
- yield { type: TOKEN.OBJECT_KEY_CLOSE, value: next.value.segment }
843
+ yield { type: TOKEN_OBJECT_KEY, ...Accumulator.end(accumulatorState) }
844
+ yield { type: TOKEN_OBJECT_KEY_CLOSE, value: next.value.segment, start: next.value.index, end: next.value.index }
693
845
  }
694
846
  else {
695
- yield { type: TOKEN.STRING, value: accumulator }
696
- yield { type: TOKEN.STRING_CLOSE, value: next.value.segment }
847
+ yield { type: TOKEN_STRING, ...Accumulator.end(accumulatorState) }
848
+ yield { type: TOKEN_STRING_CLOSE, value: next.value.segment, start: next.value.index, end: next.value.index }
697
849
  }
698
850
 
699
- accumulator = EMPTY
700
851
  next = segmentIter.next()
701
852
  state = stack.pop()
702
853
  break
703
854
  case '\\':
704
- accumulator += next.value.segment
855
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
705
856
 
706
857
  next = segmentIter.next()
707
858
  if(next.done) {
708
- yield { type: TOKEN.ERROR, value: accumulator }
709
- accumulator = EMPTY
859
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence' }
710
860
  state = stack.pop()
711
861
  break
712
862
  }
713
863
 
714
- accumulator += next.value.segment
864
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
715
865
 
716
866
  if(!JSONTokenizer.isValueEscapeChar(next.value?.segment)) {
717
- yield { type: TOKEN.ERROR, value: accumulator }
718
- accumulator = EMPTY
867
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape character' }
719
868
  }
720
869
 
721
870
  if(next.value.segment === 'u') {
@@ -726,11 +875,10 @@ export class JSONTokenizer {
726
875
  next = segmentIter.next()
727
876
  break
728
877
  default:
729
- accumulator += next.value.segment
878
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
730
879
 
731
880
  if(!JSONTokenizer.isValidChar(next.value.segment)) {
732
- yield { type: TOKEN.ERROR, value: accumulator }
733
- accumulator = EMPTY
881
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid character in string literal' }
734
882
  }
735
883
 
736
884
  next = segmentIter.next()
@@ -746,52 +894,68 @@ export class JSONTokenizer {
746
894
  case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
747
895
  case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
748
896
 
749
- accumulator += next.value.segment
897
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
750
898
 
751
899
  // second
752
900
  next = segmentIter.next()
753
901
  if(next.done) {
754
- yield { type: TOKEN.ERROR, value: accumulator }
755
- accumulator = EMPTY
902
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence (2nd done)' }
756
903
  state = stack.pop()
757
904
  break
758
905
  }
759
- accumulator += next.value?.segment
906
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
907
+ if(!JSONTokenizer.isValidHEX(next.value.segment)) {
908
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence (2nd)' }
909
+ state = stack.pop()
910
+ next = segmentIter.next()
911
+ break
912
+ }
760
913
 
761
914
  // third
762
915
  next = segmentIter.next()
763
916
  if(next.done) {
764
- yield { type: TOKEN.ERROR, value: accumulator }
765
- accumulator = EMPTY
917
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence (3rd done)' }
918
+ state = stack.pop()
919
+ break
920
+ }
921
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
922
+ if(!JSONTokenizer.isValidHEX(next.value.segment)) {
923
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence (3rd)' }
766
924
  state = stack.pop()
925
+ next = segmentIter.next()
767
926
  break
768
927
  }
769
- accumulator += next.value?.segment
770
928
 
771
929
  // fourth
772
930
  next = segmentIter.next()
773
931
  if(next.done) {
774
- yield { type: TOKEN.ERROR, value: accumulator }
775
- accumulator = EMPTY
932
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence (4th done)' }
776
933
  state = stack.pop()
777
934
  break
778
935
  }
779
- accumulator += next.value?.segment
936
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
937
+ if(!JSONTokenizer.isValidHEX(next.value.segment)) {
938
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence (4th)' }
939
+ state = stack.pop()
940
+ next = segmentIter.next()
941
+ break
942
+ }
780
943
 
781
944
  state = stack.pop()
782
945
  next = segmentIter.next()
783
946
  break
784
947
  default:
785
- accumulator += next.value.segment
786
- yield { type: TOKEN.ERROR, value: accumulator }
787
- accumulator = EMPTY
948
+ Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
949
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence' }
950
+
951
+ state = stack.pop()
788
952
  next = segmentIter.next()
789
953
  break
790
954
  }
791
955
  break
792
956
  default:
793
- // todo
794
- throw new Error(`unknown state ${state}`)
957
+ yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: `unknown state ${state}` }
958
+ break
795
959
  }
796
960
  }
797
961
  }