@johntalton/json-tokenizer 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +56 -57
- package/package.json +1 -1
- package/src/index.js +338 -174
package/README.md
CHANGED
|
@@ -4,10 +4,9 @@ Generator function that tokenizes string based on JSON format.
|
|
|
4
4
|
|
|
5
5
|
- Uses Generator based API
|
|
6
6
|
- Produces tokens for all input text (including error tokens)
|
|
7
|
-
- Uses `Intl.Segmenter` with `'grapheme'` granularity
|
|
8
|
-
- Segmenter locale can be custom set (default: `'en-US'`)
|
|
9
7
|
- Allows for `AbortSignal` to control termination
|
|
10
8
|
- Best effort to match `JSON.parse` restriction
|
|
9
|
+
- Start and End position for errors
|
|
11
10
|
|
|
12
11
|
## Example
|
|
13
12
|
|
|
@@ -42,60 +41,60 @@ for(const token of stream) {
|
|
|
42
41
|
console.log(token)
|
|
43
42
|
}
|
|
44
43
|
/*
|
|
45
|
-
{ type: 'object-open', value: '{' }
|
|
46
|
-
{ type: 'whitespace', value: '\n\t' }
|
|
47
|
-
{ type: 'open-key-quote', value: '"' }
|
|
48
|
-
{ type: 'key', value: 'team' }
|
|
49
|
-
{ type: 'close-key-quote', value: '"' }
|
|
50
|
-
{ type: 'colon', value: ':' }
|
|
51
|
-
{ type: 'whitespace', value: ' ' }
|
|
52
|
-
{ type: 'open-string-quote', value: '"' }
|
|
53
|
-
{ type: 'string', value: 'Mystery Inc' }
|
|
54
|
-
{ type: 'close-string-quote', value: '"' }
|
|
55
|
-
{ type: 'object-member-comma', value: ',' }
|
|
56
|
-
{ type: 'whitespace', value: '\n\t' }
|
|
57
|
-
{ type: 'open-key-quote', value: '"' }
|
|
58
|
-
{ type: 'key', value: 'members' }
|
|
59
|
-
{ type: 'close-key-quote', value: '"' }
|
|
60
|
-
{ type: 'colon', value: ':' }
|
|
61
|
-
{ type: 'whitespace', value: ' ' }
|
|
62
|
-
{ type: 'array-open', value: '[' }
|
|
63
|
-
{ type: 'whitespace', value: '\n\t\t' }
|
|
64
|
-
{ type: 'open-string-quote', value: '"' }
|
|
65
|
-
{ type: 'string', value: 'Fred' }
|
|
66
|
-
{ type: 'close-string-quote', value: '"' }
|
|
67
|
-
{ type: 'array-element-comma', value: ',' }
|
|
68
|
-
{ type: 'whitespace', value: '\n\t\t' }
|
|
69
|
-
{ type: 'open-string-quote', value: '"' }
|
|
70
|
-
{ type: 'string', value: 'Daphne' }
|
|
71
|
-
{ type: 'close-string-quote', value: '"' }
|
|
72
|
-
{ type: 'array-element-comma', value: ',' }
|
|
73
|
-
{ type: 'whitespace', value: '\n\t\t' }
|
|
74
|
-
{ type: 'open-string-quote', value: '"' }
|
|
75
|
-
{ type: 'string', value: 'Velma' }
|
|
76
|
-
{ type: 'close-string-quote', value: '"' }
|
|
77
|
-
{ type: 'array-element-comma', value: ',' }
|
|
78
|
-
{ type: 'whitespace', value: '\n\t\t' }
|
|
79
|
-
{ type: 'open-string-quote', value: '"' }
|
|
80
|
-
{ type: 'string', value: 'Shaggy' }
|
|
81
|
-
{ type: 'close-string-quote', value: '"' }
|
|
82
|
-
{ type: 'array-element-comma', value: ',' }
|
|
83
|
-
{ type: 'whitespace', value: '\n\t\t' }
|
|
84
|
-
{ type: 'open-string-quote', value: '"' }
|
|
85
|
-
{ type: 'string', value: 'Scooby' }
|
|
86
|
-
{ type: 'close-string-quote', value: '"' }
|
|
87
|
-
{ type: 'whitespace', value: '\n\t' }
|
|
88
|
-
{ type: 'array-close', value: ']' }
|
|
89
|
-
{ type: 'object-member-comma', value: ',' }
|
|
90
|
-
{ type: 'whitespace', value: '\n\t' }
|
|
91
|
-
{ type: 'open-key-quote', value: '"' }
|
|
92
|
-
{ type: 'key', value: 'aired' }
|
|
93
|
-
{ type: 'close-key-quote', value: '"' }
|
|
94
|
-
{ type: 'colon', value: ':' }
|
|
95
|
-
{ type: 'whitespace', value: ' ' }
|
|
96
|
-
{ type: 'number', value: '1969' }
|
|
97
|
-
{ type: 'whitespace', value: '\n' }
|
|
98
|
-
{ type: 'object-close', value: '}' }
|
|
99
|
-
{ type: 'eof', value: '' }
|
|
44
|
+
{ type: 'object-open', value: '{', start: 0, end: 0 }
|
|
45
|
+
{ type: 'whitespace', value: '\n\t', start: 1, end: 2 }
|
|
46
|
+
{ type: 'open-key-quote', value: '"', start: 3, end: 3 }
|
|
47
|
+
{ type: 'key', value: 'team', start: 4, end: 7 }
|
|
48
|
+
{ type: 'close-key-quote', value: '"', start: 8, end: 8 }
|
|
49
|
+
{ type: 'colon', value: ':', start: 9, end: 9 }
|
|
50
|
+
{ type: 'whitespace', value: ' ', start: 10, end: 10 }
|
|
51
|
+
{ type: 'open-string-quote', value: '"', start: 11, end: 11 }
|
|
52
|
+
{ type: 'string', value: 'Mystery Inc', start: 12, end: 22 }
|
|
53
|
+
{ type: 'close-string-quote', value: '"', start: 23, end: 23 }
|
|
54
|
+
{ type: 'object-member-comma', value: ',', start: 24, end: 24 }
|
|
55
|
+
{ type: 'whitespace', value: '\n\t', start: 25, end: 26 }
|
|
56
|
+
{ type: 'open-key-quote', value: '"', start: 27, end: 27 }
|
|
57
|
+
{ type: 'key', value: 'members', start: 28, end: 34 }
|
|
58
|
+
{ type: 'close-key-quote', value: '"', start: 35, end: 35 }
|
|
59
|
+
{ type: 'colon', value: ':', start: 36, end: 36 }
|
|
60
|
+
{ type: 'whitespace', value: ' ', start: 37, end: 37 }
|
|
61
|
+
{ type: 'array-open', value: '[', start: 38, end: 38 }
|
|
62
|
+
{ type: 'whitespace', value: '\n\t\t', start: 39, end: 41 }
|
|
63
|
+
{ type: 'open-string-quote', value: '"', start: 42, end: 42 }
|
|
64
|
+
{ type: 'string', value: 'Fred', start: 43, end: 46 }
|
|
65
|
+
{ type: 'close-string-quote', value: '"', start: 47, end: 47 }
|
|
66
|
+
{ type: 'array-element-comma', value: ',', start: 48, end: 48 }
|
|
67
|
+
{ type: 'whitespace', value: '\n\t\t', start: 49, end: 51 }
|
|
68
|
+
{ type: 'open-string-quote', value: '"', start: 52, end: 52 }
|
|
69
|
+
{ type: 'string', value: 'Daphne', start: 53, end: 58 }
|
|
70
|
+
{ type: 'close-string-quote', value: '"', start: 59, end: 59 }
|
|
71
|
+
{ type: 'array-element-comma', value: ',', start: 60, end: 60 }
|
|
72
|
+
{ type: 'whitespace', value: '\n\t\t', start: 61, end: 63 }
|
|
73
|
+
{ type: 'open-string-quote', value: '"', start: 64, end: 64 }
|
|
74
|
+
{ type: 'string', value: 'Velma', start: 65, end: 69 }
|
|
75
|
+
{ type: 'close-string-quote', value: '"', start: 70, end: 70 }
|
|
76
|
+
{ type: 'array-element-comma', value: ',', start: 71, end: 71 }
|
|
77
|
+
{ type: 'whitespace', value: '\n\t\t', start: 72, end: 74 }
|
|
78
|
+
{ type: 'open-string-quote', value: '"', start: 75, end: 75 }
|
|
79
|
+
{ type: 'string', value: 'Shaggy', start: 76, end: 81 }
|
|
80
|
+
{ type: 'close-string-quote', value: '"', start: 82, end: 82 }
|
|
81
|
+
{ type: 'array-element-comma', value: ',', start: 83, end: 83 }
|
|
82
|
+
{ type: 'whitespace', value: '\n\t\t', start: 84, end: 86 }
|
|
83
|
+
{ type: 'open-string-quote', value: '"', start: 87, end: 87 }
|
|
84
|
+
{ type: 'string', value: 'Scooby', start: 88, end: 93 }
|
|
85
|
+
{ type: 'close-string-quote', value: '"', start: 94, end: 94 }
|
|
86
|
+
{ type: 'whitespace', value: '\n\t', start: 95, end: 96 }
|
|
87
|
+
{ type: 'array-close', value: ']', start: 97, end: 97 }
|
|
88
|
+
{ type: 'object-member-comma', value: ',', start: 98, end: 98 }
|
|
89
|
+
{ type: 'whitespace', value: '\n\t', start: 99, end: 100 }
|
|
90
|
+
{ type: 'open-key-quote', value: '"', start: 101, end: 101 }
|
|
91
|
+
{ type: 'key', value: 'aired', start: 102, end: 106 }
|
|
92
|
+
{ type: 'close-key-quote', value: '"', start: 107, end: 107 }
|
|
93
|
+
{ type: 'colon', value: ':', start: 108, end: 108 }
|
|
94
|
+
{ type: 'whitespace', value: ' ', start: 109, end: 109 }
|
|
95
|
+
{ type: 'number', value: '1969', start: 110, end: 113 }
|
|
96
|
+
{ type: 'whitespace', value: '\n', start: 114, end: 114 }
|
|
97
|
+
{ type: 'object-close', value: '}', start: 115, end: 115 }
|
|
98
|
+
{ type: 'eof', value: '', start: null, end: 116 }
|
|
100
99
|
*/
|
|
101
100
|
```
|
package/package.json
CHANGED
package/src/index.js
CHANGED
|
@@ -1,44 +1,87 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @typedef {Object} TokenizerOptions
|
|
3
|
-
* @property {string|undefined} [locale = 'en-US']
|
|
4
3
|
* @property {AbortSignal|undefined} [signal]
|
|
5
4
|
* @property {boolean|undefined} [debug]
|
|
6
5
|
*/
|
|
7
6
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
7
|
+
/**
|
|
8
|
+
* @typedef {Object} TokenBase
|
|
9
|
+
* @property {string} type
|
|
10
|
+
* @property {string} value
|
|
11
|
+
* @property {number|null} start
|
|
12
|
+
* @property {number|null} end
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* @typedef {Object} ErrorTokenBase
|
|
17
|
+
* @property {'error'} type
|
|
18
|
+
* @property {string} cause
|
|
19
|
+
*/
|
|
20
|
+
/** @typedef {TokenBase & ErrorTokenBase} ErrorToken */
|
|
21
|
+
/** @typedef {TokenBase & { type: 'eof' }} EOFToken */
|
|
22
|
+
/** @typedef {TokenBase & { type: 'whitespace' }} WhitespaceToken */
|
|
23
|
+
/** @typedef {TokenBase & { type: 'array-open' }} ArrayOpenToken */
|
|
24
|
+
/** @typedef {TokenBase & { type: 'array-close' }} ArrayCloseToken */
|
|
25
|
+
/** @typedef {TokenBase & { type: 'array-close-immediate' }} ArrayCloseImmediateToken */
|
|
26
|
+
/** @typedef {TokenBase & { type: 'array-element-comma' }} ArrayCommaToken */
|
|
27
|
+
/** @typedef {TokenBase & { type: 'object-open' }} ObjectOpenToken */
|
|
28
|
+
/** @typedef {TokenBase & { type: 'object-close' }} ObjectCloseToken */
|
|
29
|
+
/** @typedef {TokenBase & { type: 'object-close-immediate' }} ObjectCloseImmediateToken */
|
|
30
|
+
/** @typedef {TokenBase & { type: 'object-member-comma' }} ObjectCommaToken */
|
|
31
|
+
/** @typedef {TokenBase & { type: 'open-key-quote' }} OpenKeyQuoteToken */
|
|
32
|
+
/** @typedef {TokenBase & { type: 'key' }} KeyToken */
|
|
33
|
+
/** @typedef {TokenBase & { type: 'close-key-quote' }} CloseKeyQuoteToken */
|
|
34
|
+
/** @typedef {TokenBase & { type: 'colon' }} ColonToken */
|
|
35
|
+
/** @typedef {TokenBase & { type: 'open-string-quote' }} OpenStringQuoteToken */
|
|
36
|
+
/** @typedef {TokenBase & { type: 'string' }} StringToken */
|
|
37
|
+
/** @typedef {TokenBase & { type: 'close-string-quote' }} CloseStringQuoteToken */
|
|
38
|
+
/** @typedef {TokenBase & { type: 'true' }} TrueToken */
|
|
39
|
+
/** @typedef {TokenBase & { type: 'false' }} FalseToken */
|
|
40
|
+
/** @typedef {TokenBase & { type: 'null' }} NULLToken */
|
|
41
|
+
/** @typedef {TokenBase & { type: 'number' }} NumberToken */
|
|
42
|
+
|
|
43
|
+
/** @typedef {EOFToken|WhitespaceToken|ErrorToken|ArrayOpenToken|ArrayCloseToken|ArrayCloseImmediateToken|ArrayCommaToken|ObjectOpenToken|ObjectCloseToken|ObjectCloseImmediateToken|ObjectCommaToken|OpenKeyQuoteToken|KeyToken|CloseKeyQuoteToken|ColonToken|OpenStringQuoteToken|StringToken|CloseStringQuoteToken|TrueToken|FalseToken|NULLToken|NumberToken} Token */
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* @typedef {Object} AccumulationState
|
|
47
|
+
* @property {string} value
|
|
48
|
+
* @property {number|null} start
|
|
49
|
+
* @property {number|null} end
|
|
50
|
+
*/
|
|
51
|
+
|
|
52
|
+
export const TOKEN_ERROR = 'error'
|
|
53
|
+
export const TOKEN_EOF = 'eof'
|
|
54
|
+
export const TOKEN_WHITESPACE = 'whitespace'
|
|
12
55
|
|
|
13
56
|
// Array / Elements
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
57
|
+
export const TOKEN_ARRAY_OPEN = 'array-open'
|
|
58
|
+
export const TOKEN_ARRAY_CLOSE = 'array-close'
|
|
59
|
+
export const TOKEN_ARRAY_CLOSE_IMMEDIATE = 'array-close-immediate'
|
|
60
|
+
export const TOKEN_ARRAY_ELEMENT_COMMA = 'array-element-comma'
|
|
18
61
|
|
|
19
62
|
// Object
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
63
|
+
export const TOKEN_OBJECT_OPEN = 'object-open'
|
|
64
|
+
export const TOKEN_OBJECT_CLOSE = 'object-close'
|
|
65
|
+
export const TOKEN_OBJECT_CLOSE_IMMEDIATE = 'object-close-immediate'
|
|
66
|
+
export const TOKEN_OBJECT_MEMBER_COMMA = 'object-member-comma'
|
|
67
|
+
export const TOKEN_OBJECT_KEY_OPEN = 'open-key-quote'
|
|
68
|
+
export const TOKEN_OBJECT_KEY = 'key'
|
|
69
|
+
export const TOKEN_OBJECT_KEY_CLOSE = 'close-key-quote'
|
|
70
|
+
export const TOKEN_OBJECT_COLON = 'colon'
|
|
28
71
|
|
|
29
72
|
// String
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
73
|
+
export const TOKEN_STRING_OPEN = 'open-string-quote'
|
|
74
|
+
export const TOKEN_STRING = 'string'
|
|
75
|
+
export const TOKEN_STRING_CLOSE = 'close-string-quote'
|
|
33
76
|
|
|
34
77
|
// Primitives
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
78
|
+
export const TOKEN_TRUE = 'true'
|
|
79
|
+
export const TOKEN_FALSE = 'false'
|
|
80
|
+
export const TOKEN_NULL = 'null'
|
|
38
81
|
|
|
39
82
|
// Number
|
|
40
|
-
|
|
41
|
-
|
|
83
|
+
export const TOKEN_NUMBER = 'number'
|
|
84
|
+
|
|
42
85
|
|
|
43
86
|
export const EMPTY = ''
|
|
44
87
|
|
|
@@ -53,6 +96,7 @@ export const STATE = {
|
|
|
53
96
|
ARY_OPEN: 'ao',
|
|
54
97
|
ARY_CLOSE: 'ac',
|
|
55
98
|
MEMBERS: 'ms',
|
|
99
|
+
MEMBERS_CONTINUE: 'msc',
|
|
56
100
|
MEMBER: 'm',
|
|
57
101
|
KEY: 'key',
|
|
58
102
|
MEMBER_KEY_AFTER: 'mka',
|
|
@@ -73,11 +117,64 @@ export const STATE = {
|
|
|
73
117
|
U_HEX4: 'hex'
|
|
74
118
|
}
|
|
75
119
|
|
|
120
|
+
export const EXIT_STATES = [ STATE.ELEMENT_AFTER ]
|
|
121
|
+
|
|
76
122
|
export const ESCAPE_CHARS = [
|
|
77
123
|
'"', '\\', '/', 'b', 'f', 'n', 'r', 't', 'u'
|
|
78
124
|
]
|
|
79
125
|
|
|
80
|
-
export const
|
|
126
|
+
export const HEX_CHARS = [
|
|
127
|
+
'1', '2', '3', '4', '5', '6', '7', '8', '9', '0',
|
|
128
|
+
'a', 'b', 'c', 'd', 'e', 'f',
|
|
129
|
+
'A', 'B', 'C', 'D', 'E', 'F',
|
|
130
|
+
]
|
|
131
|
+
|
|
132
|
+
export class Accumulator {
|
|
133
|
+
/**
|
|
134
|
+
* @param {AccumulationState} accumulationState
|
|
135
|
+
*/
|
|
136
|
+
static #reset(accumulationState) {
|
|
137
|
+
accumulationState.value = EMPTY
|
|
138
|
+
accumulationState.start = null
|
|
139
|
+
accumulationState.end = null
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* @param {AccumulationState} accumulationState
|
|
144
|
+
* @param {number} start
|
|
145
|
+
*/
|
|
146
|
+
static start(accumulationState, start) {
|
|
147
|
+
accumulationState.value = EMPTY
|
|
148
|
+
accumulationState.start = start
|
|
149
|
+
accumulationState.end = start
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* @param {AccumulationState} accumulationState
|
|
154
|
+
* @param {string} value
|
|
155
|
+
* @param {number} end
|
|
156
|
+
*/
|
|
157
|
+
static accumulate(accumulationState, value, end) {
|
|
158
|
+
accumulationState.value += value
|
|
159
|
+
accumulationState.end = end
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* @param {AccumulationState} accumulationState
|
|
164
|
+
*/
|
|
165
|
+
static end(accumulationState) {
|
|
166
|
+
const result = { ...accumulationState }
|
|
167
|
+
Accumulator.#reset(accumulationState)
|
|
168
|
+
return result
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* @param {AccumulationState} accumulationState
|
|
173
|
+
*/
|
|
174
|
+
static empty(accumulationState) {
|
|
175
|
+
return accumulationState.value === EMPTY
|
|
176
|
+
}
|
|
177
|
+
}
|
|
81
178
|
|
|
82
179
|
export class JSONTokenizer {
|
|
83
180
|
/**
|
|
@@ -100,63 +197,107 @@ export class JSONTokenizer {
|
|
|
100
197
|
return true
|
|
101
198
|
}
|
|
102
199
|
|
|
200
|
+
/**
|
|
201
|
+
* @param {string} str
|
|
202
|
+
*/
|
|
203
|
+
static isValidHEX(str) {
|
|
204
|
+
return HEX_CHARS.includes(str)
|
|
205
|
+
}
|
|
206
|
+
|
|
103
207
|
/**
|
|
104
208
|
* @param {string} str
|
|
105
209
|
* @param {TokenizerOptions} [options]
|
|
210
|
+
* @return {Generator<Token, undefined, undefined>}
|
|
106
211
|
*/
|
|
107
212
|
static *tokenize(str, options) {
|
|
108
213
|
const debug = (options?.debug ?? false) === true
|
|
109
214
|
const signal = options?.signal
|
|
110
|
-
const locale = options?.locale ?? DEFAULT_LOCALE
|
|
111
215
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
216
|
+
using segmentIter = Iterator.from(str)
|
|
217
|
+
.map((item, index) => ({
|
|
218
|
+
segment: item,
|
|
219
|
+
done: false,
|
|
220
|
+
index
|
|
221
|
+
}))
|
|
115
222
|
|
|
116
223
|
/** @type {Array<string>} */
|
|
117
|
-
const stack = []
|
|
224
|
+
const stack = [ ]
|
|
118
225
|
|
|
119
226
|
/** @type {string|undefined} */
|
|
120
227
|
let state = STATE.ELEMENT
|
|
121
228
|
|
|
122
229
|
let next = segmentIter.next()
|
|
123
230
|
|
|
124
|
-
|
|
231
|
+
/** @type {AccumulationState} */
|
|
232
|
+
const accumulatorState = {
|
|
233
|
+
value: EMPTY,
|
|
234
|
+
start: null,
|
|
235
|
+
end: null
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
let line = 1
|
|
125
239
|
|
|
126
240
|
//
|
|
127
241
|
if(next.done) {
|
|
128
|
-
yield { type:
|
|
242
|
+
yield { type: TOKEN_ERROR, value: EMPTY, start: 0, end: 0, cause: 'empty' }
|
|
129
243
|
return
|
|
130
244
|
}
|
|
131
245
|
|
|
132
246
|
//
|
|
133
247
|
while(true) {
|
|
134
248
|
if(signal?.aborted) {
|
|
135
|
-
|
|
249
|
+
yield { type: TOKEN_EOF, value: EMPTY, start: null, end: null }
|
|
136
250
|
break
|
|
137
251
|
}
|
|
138
252
|
|
|
139
253
|
//
|
|
140
254
|
if(next.done) {
|
|
141
|
-
if(
|
|
142
|
-
|
|
143
|
-
|
|
255
|
+
if(state === undefined) {
|
|
256
|
+
yield { type: TOKEN_ERROR, value: EMPTY, start: null, end: null, cause: 'undefined state' }
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
//
|
|
260
|
+
if(state !== undefined && !EXIT_STATES.includes(state)) {
|
|
261
|
+
yield { type: TOKEN_ERROR, value: EMPTY, start: null, end: str.length, cause: `invalid exit state (${state})` }
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
//
|
|
265
|
+
if(!Accumulator.empty(accumulatorState)) {
|
|
266
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'accumulator not empty (done)' }
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
//
|
|
270
|
+
if(stack.length !== 0) {
|
|
271
|
+
yield { type: TOKEN_ERROR, value: EMPTY, start: null, end: str.length, cause: 'stack not empty' }
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
//
|
|
275
|
+
yield { type: TOKEN_EOF, value: EMPTY, start: null, end: str.length }
|
|
144
276
|
break
|
|
145
277
|
}
|
|
146
278
|
|
|
147
279
|
//
|
|
148
280
|
if(state === undefined) {
|
|
149
|
-
|
|
281
|
+
//
|
|
282
|
+
if(!Accumulator.empty(accumulatorState)) {
|
|
283
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'accumulator not empty (stack)' }
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
//
|
|
150
287
|
if(!next.done) {
|
|
151
|
-
|
|
288
|
+
Accumulator.start(accumulatorState, next.value.index)
|
|
289
|
+
|
|
152
290
|
while(!next.done) {
|
|
153
|
-
|
|
291
|
+
if(signal?.aborted) { break }
|
|
292
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
154
293
|
next = segmentIter.next()
|
|
155
294
|
}
|
|
156
|
-
|
|
295
|
+
|
|
296
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'accumulator not empty (not done)' }
|
|
157
297
|
}
|
|
158
298
|
|
|
159
|
-
|
|
299
|
+
//
|
|
300
|
+
yield { type: TOKEN_EOF, value: EMPTY, start: null, end: null }
|
|
160
301
|
break
|
|
161
302
|
}
|
|
162
303
|
|
|
@@ -164,9 +305,9 @@ export class JSONTokenizer {
|
|
|
164
305
|
if(debug) {
|
|
165
306
|
console.log({
|
|
166
307
|
seg: next.value.segment,
|
|
167
|
-
state,
|
|
168
|
-
stack.join(','),
|
|
169
|
-
|
|
308
|
+
state,
|
|
309
|
+
stack: stack.join(','),
|
|
310
|
+
...accumulatorState
|
|
170
311
|
})
|
|
171
312
|
}
|
|
172
313
|
|
|
@@ -178,7 +319,7 @@ export class JSONTokenizer {
|
|
|
178
319
|
state = stack.pop()
|
|
179
320
|
break
|
|
180
321
|
case ',':
|
|
181
|
-
yield { type:
|
|
322
|
+
yield { type: TOKEN_ARRAY_ELEMENT_COMMA, value: next.value.segment, start: next.value.index, end: next.value.index }
|
|
182
323
|
stack.push(STATE.ELEMENTS)
|
|
183
324
|
state = STATE.ELEMENT
|
|
184
325
|
next = segmentIter.next()
|
|
@@ -192,6 +333,7 @@ export class JSONTokenizer {
|
|
|
192
333
|
case STATE.ELEMENT:
|
|
193
334
|
switch(next.value.segment) {
|
|
194
335
|
case ' ': case '\r': case '\n': case '\t':
|
|
336
|
+
Accumulator.start(accumulatorState, next.value.index)
|
|
195
337
|
stack.push(STATE.ELEMENT)
|
|
196
338
|
state = STATE.WS
|
|
197
339
|
break
|
|
@@ -204,6 +346,7 @@ export class JSONTokenizer {
|
|
|
204
346
|
case STATE.ELEMENT_AFTER:
|
|
205
347
|
switch(next.value.segment) {
|
|
206
348
|
case ' ': case '\r': case '\n': case '\t':
|
|
349
|
+
Accumulator.start(accumulatorState, next.value.index)
|
|
207
350
|
stack.push(STATE.ELEMENT_AFTER)
|
|
208
351
|
state = STATE.WS
|
|
209
352
|
break
|
|
@@ -217,7 +360,7 @@ export class JSONTokenizer {
|
|
|
217
360
|
state = stack.pop()
|
|
218
361
|
break
|
|
219
362
|
default:
|
|
220
|
-
yield { type:
|
|
363
|
+
yield { type: TOKEN_ERROR, value: next.value.segment, start: next.value.index, end: next.value.index, cause: 'expect close, comma or whitespace' }
|
|
221
364
|
next = segmentIter.next()
|
|
222
365
|
break
|
|
223
366
|
}
|
|
@@ -225,16 +368,20 @@ export class JSONTokenizer {
|
|
|
225
368
|
case STATE.WS:
|
|
226
369
|
switch(next.value.segment) {
|
|
227
370
|
case ' ': case '\r': case '\n': case '\t':
|
|
228
|
-
|
|
371
|
+
|
|
372
|
+
if(next.value.segment === '\n') {
|
|
373
|
+
line += 1
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
229
377
|
next = segmentIter.next()
|
|
230
378
|
if(next.done) {
|
|
231
|
-
yield { type:
|
|
232
|
-
|
|
379
|
+
yield { type: TOKEN_WHITESPACE, ...Accumulator.end(accumulatorState) }
|
|
380
|
+
state = stack.pop()
|
|
233
381
|
}
|
|
234
382
|
break
|
|
235
383
|
default:
|
|
236
|
-
yield { type:
|
|
237
|
-
accumulator = EMPTY
|
|
384
|
+
yield { type: TOKEN_WHITESPACE, ...Accumulator.end(accumulatorState) }
|
|
238
385
|
state = stack.pop()
|
|
239
386
|
break
|
|
240
387
|
}
|
|
@@ -242,157 +389,150 @@ export class JSONTokenizer {
|
|
|
242
389
|
case STATE.VALUE:
|
|
243
390
|
switch(next.value.segment) {
|
|
244
391
|
case '{':
|
|
245
|
-
yield { type:
|
|
392
|
+
yield { type: TOKEN_OBJECT_OPEN, value: next.value.segment, start: next.value.index, end: next.value.index }
|
|
246
393
|
state = STATE.OBJ_OPEN
|
|
247
394
|
next = segmentIter.next()
|
|
248
395
|
break
|
|
249
396
|
case '[':
|
|
250
|
-
yield { type:
|
|
397
|
+
yield { type: TOKEN_ARRAY_OPEN, value: next.value.segment, start: next.value.index, end: next.value.index }
|
|
251
398
|
state = STATE.ARY_OPEN
|
|
252
399
|
next = segmentIter.next()
|
|
253
400
|
break
|
|
254
401
|
case '"':
|
|
255
|
-
yield { type:
|
|
256
|
-
accumulator = EMPTY
|
|
402
|
+
yield { type: TOKEN_STRING_OPEN, value: next.value.segment, start: next.value.index, end: next.value.index }
|
|
257
403
|
state = STATE.STR
|
|
258
404
|
next = segmentIter.next()
|
|
405
|
+
if(!next.done) { Accumulator.start(accumulatorState, next.value.index) }
|
|
259
406
|
break
|
|
260
407
|
case 't':
|
|
261
|
-
|
|
408
|
+
Accumulator.start(accumulatorState, next.value.index)
|
|
409
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
262
410
|
|
|
263
411
|
next = segmentIter.next()
|
|
264
412
|
if(next.done || next.value.segment !== 'r') {
|
|
265
|
-
yield { type:
|
|
266
|
-
accumulator = EMPTY
|
|
413
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected true literal (r)' }
|
|
267
414
|
state = stack.pop()
|
|
268
415
|
break
|
|
269
416
|
}
|
|
270
|
-
|
|
417
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
271
418
|
|
|
272
419
|
next = segmentIter.next()
|
|
273
420
|
if(next.done || next.value.segment !== 'u') {
|
|
274
|
-
yield { type:
|
|
275
|
-
accumulator = EMPTY
|
|
421
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected true literal (u)' }
|
|
276
422
|
state = stack.pop()
|
|
277
423
|
break
|
|
278
424
|
}
|
|
279
|
-
|
|
425
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
280
426
|
|
|
281
427
|
next = segmentIter.next()
|
|
282
428
|
if(next.done || next.value.segment !== 'e') {
|
|
283
|
-
yield { type:
|
|
284
|
-
accumulator = EMPTY
|
|
429
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected true literal (e)' }
|
|
285
430
|
state = stack.pop()
|
|
286
431
|
break
|
|
287
432
|
}
|
|
288
|
-
|
|
433
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
289
434
|
|
|
290
|
-
yield { type:
|
|
291
|
-
accumulator = EMPTY
|
|
435
|
+
yield { type: TOKEN_TRUE, ...Accumulator.end(accumulatorState) }
|
|
292
436
|
|
|
293
437
|
next = segmentIter.next()
|
|
294
438
|
state = stack.pop()
|
|
295
439
|
break
|
|
296
440
|
case 'f':
|
|
297
|
-
|
|
441
|
+
Accumulator.start(accumulatorState, next.value.index)
|
|
442
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
298
443
|
|
|
299
444
|
next = segmentIter.next()
|
|
300
445
|
if(next.done || next.value.segment !== 'a') {
|
|
301
|
-
yield { type:
|
|
302
|
-
accumulator = EMPTY
|
|
446
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected false literal (a)' }
|
|
303
447
|
state = stack.pop()
|
|
304
448
|
break
|
|
305
449
|
}
|
|
306
|
-
|
|
450
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
307
451
|
|
|
308
452
|
next = segmentIter.next()
|
|
309
453
|
if(next.done || next.value.segment !== 'l') {
|
|
310
|
-
yield { type:
|
|
311
|
-
accumulator = EMPTY
|
|
454
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected false literal (l)' }
|
|
312
455
|
state = stack.pop()
|
|
313
456
|
break
|
|
314
457
|
}
|
|
315
|
-
|
|
458
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
316
459
|
|
|
317
460
|
next = segmentIter.next()
|
|
318
461
|
if(next.done || next.value.segment !== 's') {
|
|
319
|
-
yield { type:
|
|
320
|
-
accumulator = EMPTY
|
|
462
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected false literal (s)' }
|
|
321
463
|
state = stack.pop()
|
|
322
464
|
break
|
|
323
465
|
}
|
|
324
|
-
|
|
466
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
325
467
|
|
|
326
468
|
next = segmentIter.next()
|
|
327
469
|
if(next.done || next.value.segment !== 'e') {
|
|
328
|
-
yield { type:
|
|
329
|
-
accumulator = EMPTY
|
|
470
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected false literal (e)' }
|
|
330
471
|
state = stack.pop()
|
|
331
472
|
break
|
|
332
473
|
}
|
|
333
|
-
|
|
474
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
334
475
|
|
|
335
|
-
yield { type:
|
|
336
|
-
accumulator = EMPTY
|
|
476
|
+
yield { type: TOKEN_FALSE, ...Accumulator.end(accumulatorState) }
|
|
337
477
|
|
|
338
478
|
next = segmentIter.next()
|
|
339
479
|
state = stack.pop()
|
|
340
480
|
break
|
|
341
481
|
case 'n':
|
|
342
|
-
|
|
482
|
+
Accumulator.start(accumulatorState, next.value.index)
|
|
483
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
343
484
|
|
|
344
485
|
next = segmentIter.next()
|
|
345
486
|
if(next.done || next.value.segment !== 'u') {
|
|
346
|
-
yield { type:
|
|
347
|
-
accumulator = EMPTY
|
|
487
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected null literal (u)' }
|
|
348
488
|
state = stack.pop()
|
|
349
489
|
break
|
|
350
490
|
}
|
|
351
|
-
|
|
491
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
352
492
|
|
|
353
493
|
next = segmentIter.next()
|
|
354
494
|
if(next.done || next.value.segment !== 'l') {
|
|
355
|
-
yield { type:
|
|
356
|
-
accumulator = EMPTY
|
|
495
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected null literal (l)' }
|
|
357
496
|
state = stack.pop()
|
|
358
497
|
break
|
|
359
498
|
}
|
|
360
|
-
|
|
499
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
361
500
|
|
|
362
501
|
next = segmentIter.next()
|
|
363
502
|
if(next.done || next.value.segment !== 'l') {
|
|
364
|
-
yield { type:
|
|
365
|
-
accumulator = EMPTY
|
|
503
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected null literal (l)' }
|
|
366
504
|
state = stack.pop()
|
|
367
505
|
break
|
|
368
506
|
}
|
|
369
|
-
|
|
507
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
370
508
|
|
|
371
|
-
yield { type:
|
|
372
|
-
accumulator = EMPTY
|
|
509
|
+
yield { type: TOKEN_NULL, ...Accumulator.end(accumulatorState) }
|
|
373
510
|
|
|
374
511
|
next = segmentIter.next()
|
|
375
512
|
state = stack.pop()
|
|
376
513
|
break
|
|
377
514
|
case '-':
|
|
378
|
-
|
|
515
|
+
Accumulator.start(accumulatorState, next.value.index)
|
|
516
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
379
517
|
state = STATE.NUMBER
|
|
380
518
|
next = segmentIter.next()
|
|
381
519
|
break
|
|
382
520
|
case '0':
|
|
383
|
-
|
|
521
|
+
Accumulator.start(accumulatorState, next.value.index)
|
|
522
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
523
|
+
|
|
384
524
|
state = STATE.NUMBER_INT_AFTER
|
|
385
525
|
next = segmentIter.next()
|
|
386
526
|
break
|
|
387
527
|
case '1': case '2': case '3':
|
|
388
528
|
case '4': case '5': case '6':
|
|
389
529
|
case '7': case '8': case '9':
|
|
390
|
-
|
|
530
|
+
Accumulator.start(accumulatorState, next.value.index)
|
|
391
531
|
state = STATE.NUMBER_INT
|
|
392
532
|
break
|
|
393
533
|
default:
|
|
394
534
|
//
|
|
395
|
-
yield { type:
|
|
535
|
+
yield { type: TOKEN_ERROR, value: next.value.segment, start: next.value.index, end: next.value.index, cause: 'unexpected value' }
|
|
396
536
|
next = segmentIter.next()
|
|
397
537
|
break
|
|
398
538
|
}
|
|
@@ -400,7 +540,7 @@ export class JSONTokenizer {
|
|
|
400
540
|
case STATE.NUMBER:
|
|
401
541
|
switch(next.value.segment) {
|
|
402
542
|
case '0':
|
|
403
|
-
|
|
543
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
404
544
|
state = STATE.NUMBER_INT_AFTER
|
|
405
545
|
next = segmentIter.next()
|
|
406
546
|
break
|
|
@@ -414,14 +554,13 @@ export class JSONTokenizer {
|
|
|
414
554
|
case '1': case '2': case '3':
|
|
415
555
|
case '4': case '5': case '6':
|
|
416
556
|
case '7': case '8': case '9':
|
|
417
|
-
|
|
557
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
418
558
|
state = STATE.NUMBER_INT
|
|
419
559
|
next = segmentIter.next()
|
|
420
560
|
break
|
|
421
561
|
default:
|
|
422
|
-
|
|
423
|
-
yield { type:
|
|
424
|
-
accumulator = EMPTY
|
|
562
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
563
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid number format' }
|
|
425
564
|
next = segmentIter.next()
|
|
426
565
|
break
|
|
427
566
|
}
|
|
@@ -432,12 +571,12 @@ export class JSONTokenizer {
|
|
|
432
571
|
case '1': case '2': case '3':
|
|
433
572
|
case '4': case '5': case '6':
|
|
434
573
|
case '7': case '8': case '9':
|
|
435
|
-
|
|
574
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
436
575
|
next = segmentIter.next()
|
|
437
576
|
|
|
438
577
|
if(next.done) {
|
|
439
|
-
yield { type:
|
|
440
|
-
|
|
578
|
+
yield { type: TOKEN_NUMBER, ...Accumulator.end(accumulatorState) }
|
|
579
|
+
state = stack.pop()
|
|
441
580
|
}
|
|
442
581
|
break
|
|
443
582
|
default:
|
|
@@ -448,19 +587,18 @@ export class JSONTokenizer {
|
|
|
448
587
|
case STATE.NUMBER_INT_AFTER:
|
|
449
588
|
switch(next.value.segment) {
|
|
450
589
|
case '.':
|
|
451
|
-
|
|
590
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
452
591
|
next = segmentIter.next()
|
|
453
592
|
state = STATE.NUMBER_DECIMAL_FIRST
|
|
454
593
|
break
|
|
455
594
|
case 'e':
|
|
456
595
|
case 'E':
|
|
457
|
-
|
|
596
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
458
597
|
state = STATE.NUMBER_EXPONENT_SIGN
|
|
459
598
|
next = segmentIter.next()
|
|
460
599
|
break
|
|
461
600
|
default:
|
|
462
|
-
yield { type:
|
|
463
|
-
accumulator = EMPTY
|
|
601
|
+
yield { type: TOKEN_NUMBER, ...Accumulator.end(accumulatorState) }
|
|
464
602
|
state = stack.pop()
|
|
465
603
|
break
|
|
466
604
|
}
|
|
@@ -471,20 +609,20 @@ export class JSONTokenizer {
|
|
|
471
609
|
case '1': case '2': case '3':
|
|
472
610
|
case '4': case '5': case '6':
|
|
473
611
|
case '7': case '8': case '9':
|
|
474
|
-
|
|
612
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
475
613
|
next = segmentIter.next()
|
|
476
614
|
state = STATE.NUMBER_DECIMAL
|
|
477
615
|
|
|
478
616
|
if(next.done) {
|
|
479
|
-
yield { type:
|
|
480
|
-
|
|
617
|
+
yield { type: TOKEN_NUMBER, ...Accumulator.end(accumulatorState) }
|
|
618
|
+
state = stack.pop()
|
|
481
619
|
}
|
|
482
620
|
break
|
|
483
621
|
default:
|
|
484
|
-
|
|
485
|
-
yield { type:
|
|
486
|
-
accumulator = EMPTY
|
|
622
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
623
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid decimal format' }
|
|
487
624
|
next = segmentIter.next()
|
|
625
|
+
state = stack.pop()
|
|
488
626
|
break
|
|
489
627
|
}
|
|
490
628
|
break
|
|
@@ -494,23 +632,22 @@ export class JSONTokenizer {
|
|
|
494
632
|
case '1': case '2': case '3':
|
|
495
633
|
case '4': case '5': case '6':
|
|
496
634
|
case '7': case '8': case '9':
|
|
497
|
-
|
|
635
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
498
636
|
next = segmentIter.next()
|
|
499
637
|
|
|
500
638
|
if(next.done) {
|
|
501
|
-
yield { type:
|
|
502
|
-
|
|
639
|
+
yield { type: TOKEN_NUMBER, ...Accumulator.end(accumulatorState) }
|
|
640
|
+
state = stack.pop()
|
|
503
641
|
}
|
|
504
642
|
break
|
|
505
643
|
case 'e':
|
|
506
644
|
case 'E':
|
|
507
|
-
|
|
645
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
508
646
|
state = STATE.NUMBER_EXPONENT_SIGN
|
|
509
647
|
next = segmentIter.next()
|
|
510
648
|
break
|
|
511
649
|
default:
|
|
512
|
-
yield { type:
|
|
513
|
-
accumulator = EMPTY
|
|
650
|
+
yield { type: TOKEN_NUMBER, ...Accumulator.end(accumulatorState) }
|
|
514
651
|
state = stack.pop()
|
|
515
652
|
break
|
|
516
653
|
}
|
|
@@ -518,12 +655,12 @@ export class JSONTokenizer {
|
|
|
518
655
|
case STATE.NUMBER_EXPONENT_SIGN:
|
|
519
656
|
switch(next.value.segment) {
|
|
520
657
|
case '+':
|
|
521
|
-
|
|
658
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
522
659
|
next = segmentIter.next()
|
|
523
660
|
state = STATE.NUMBER_EXPONENT_FIRST
|
|
524
661
|
break
|
|
525
662
|
case '-':
|
|
526
|
-
|
|
663
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
527
664
|
next = segmentIter.next()
|
|
528
665
|
state = STATE.NUMBER_EXPONENT_FIRST
|
|
529
666
|
break
|
|
@@ -538,15 +675,15 @@ export class JSONTokenizer {
|
|
|
538
675
|
case '1': case '2': case '3':
|
|
539
676
|
case '4': case '5': case '6':
|
|
540
677
|
case '7': case '8': case '9':
|
|
541
|
-
|
|
678
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
542
679
|
next = segmentIter.next()
|
|
543
680
|
state = STATE.NUMBER_EXPONENT
|
|
544
681
|
break
|
|
545
682
|
default:
|
|
546
|
-
|
|
547
|
-
yield { type:
|
|
548
|
-
accumulator = EMPTY
|
|
683
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
684
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid exponent format' }
|
|
549
685
|
next = segmentIter.next()
|
|
686
|
+
state = stack.pop()
|
|
550
687
|
break
|
|
551
688
|
}
|
|
552
689
|
break
|
|
@@ -556,12 +693,11 @@ export class JSONTokenizer {
|
|
|
556
693
|
case '1': case '2': case '3':
|
|
557
694
|
case '4': case '5': case '6':
|
|
558
695
|
case '7': case '8': case '9':
|
|
559
|
-
|
|
696
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
560
697
|
next = segmentIter.next()
|
|
561
698
|
break
|
|
562
699
|
default:
|
|
563
|
-
yield { type:
|
|
564
|
-
accumulator = EMPTY
|
|
700
|
+
yield { type: TOKEN_NUMBER, ...Accumulator.end(accumulatorState) }
|
|
565
701
|
state = stack.pop()
|
|
566
702
|
break
|
|
567
703
|
}
|
|
@@ -569,11 +705,12 @@ export class JSONTokenizer {
|
|
|
569
705
|
case STATE.OBJ_OPEN:
|
|
570
706
|
switch(next.value.segment) {
|
|
571
707
|
case ' ': case '\r': case '\n': case '\t':
|
|
708
|
+
Accumulator.start(accumulatorState, next.value.index)
|
|
572
709
|
stack.push(STATE.OBJ_OPEN)
|
|
573
710
|
state = STATE.WS
|
|
574
711
|
break
|
|
575
712
|
case '}':
|
|
576
|
-
yield { type:
|
|
713
|
+
yield { type: TOKEN_OBJECT_CLOSE_IMMEDIATE, value: next.value.segment, start: next.value.index, end: next.value.index }
|
|
577
714
|
next = segmentIter.next()
|
|
578
715
|
state = stack.pop()
|
|
579
716
|
break
|
|
@@ -586,7 +723,7 @@ export class JSONTokenizer {
|
|
|
586
723
|
case STATE.OBJ_CLOSE:
|
|
587
724
|
switch(next.value.segment) {
|
|
588
725
|
case '}':
|
|
589
|
-
yield { type:
|
|
726
|
+
yield { type: TOKEN_OBJECT_CLOSE, value: next.value.segment, start: next.value.index, end: next.value.index }
|
|
590
727
|
next = segmentIter.next()
|
|
591
728
|
state = stack.pop()
|
|
592
729
|
break
|
|
@@ -597,36 +734,49 @@ export class JSONTokenizer {
|
|
|
597
734
|
}
|
|
598
735
|
break
|
|
599
736
|
case STATE.MEMBERS:
|
|
737
|
+
switch(next.value.segment){
|
|
738
|
+
case '}':
|
|
739
|
+
state = stack.pop()
|
|
740
|
+
break
|
|
741
|
+
default:
|
|
742
|
+
stack.push(STATE.MEMBERS_CONTINUE)
|
|
743
|
+
state = STATE.MEMBER
|
|
744
|
+
break
|
|
745
|
+
}
|
|
746
|
+
break
|
|
747
|
+
case STATE.MEMBERS_CONTINUE:
|
|
600
748
|
switch(next.value.segment){
|
|
601
749
|
case '}':
|
|
602
750
|
state = stack.pop()
|
|
603
751
|
break
|
|
604
752
|
case ',':
|
|
605
|
-
yield { type:
|
|
606
|
-
stack.push(STATE.
|
|
753
|
+
yield { type: TOKEN_OBJECT_MEMBER_COMMA, value: next.value.segment, start: next.value.index, end: next.value.index }
|
|
754
|
+
stack.push(STATE.MEMBERS_CONTINUE)
|
|
607
755
|
state = STATE.MEMBER
|
|
608
756
|
next = segmentIter.next()
|
|
609
757
|
break
|
|
610
758
|
default:
|
|
611
|
-
|
|
612
|
-
state =
|
|
759
|
+
yield { type: TOKEN_ERROR, value: next.value.segment, start: next.value.index, end: next.value.index, cause: 'expecting comma or close object' }
|
|
760
|
+
state = stack.pop()
|
|
613
761
|
break
|
|
614
762
|
}
|
|
615
763
|
break
|
|
616
764
|
case STATE.MEMBER:
|
|
617
765
|
switch(next.value.segment) {
|
|
618
766
|
case ' ': case '\r': case '\n': case '\t':
|
|
767
|
+
Accumulator.start(accumulatorState, next.value.index)
|
|
619
768
|
stack.push(STATE.MEMBER)
|
|
620
769
|
state = STATE.WS
|
|
621
770
|
break
|
|
622
771
|
case '"':
|
|
623
|
-
yield { type:
|
|
772
|
+
yield { type: TOKEN_OBJECT_KEY_OPEN, value: next.value.segment, start: next.value.index, end: next.value.index }
|
|
624
773
|
stack.push(STATE.MEMBER_KEY_AFTER)
|
|
625
774
|
state = STATE.KEY
|
|
626
775
|
next = segmentIter.next()
|
|
776
|
+
if(!next.done) { Accumulator.start(accumulatorState, next.value.index) }
|
|
627
777
|
break
|
|
628
778
|
default:
|
|
629
|
-
yield { type:
|
|
779
|
+
yield { type: TOKEN_ERROR, value: next.value.segment, start: next.value.index, end: next.value.index, cause: 'expected member key or whitespace' }
|
|
630
780
|
next = segmentIter.next()
|
|
631
781
|
break
|
|
632
782
|
}
|
|
@@ -634,17 +784,18 @@ export class JSONTokenizer {
|
|
|
634
784
|
case STATE.MEMBER_KEY_AFTER:
|
|
635
785
|
switch(next.value.segment) {
|
|
636
786
|
case ' ': case '\r': case '\n': case '\t':
|
|
787
|
+
Accumulator.start(accumulatorState, next.value.index)
|
|
637
788
|
stack.push(STATE.MEMBER_KEY_AFTER)
|
|
638
789
|
state = STATE.WS
|
|
639
790
|
break
|
|
640
791
|
case ':':
|
|
641
|
-
yield { type:
|
|
792
|
+
yield { type: TOKEN_OBJECT_COLON, value: next.value.segment, start: next.value.index, end: next.value.index }
|
|
642
793
|
next = segmentIter.next()
|
|
643
794
|
|
|
644
795
|
state = STATE.ELEMENT
|
|
645
796
|
break
|
|
646
797
|
default:
|
|
647
|
-
yield { type:
|
|
798
|
+
yield { type: TOKEN_ERROR, value: next.value.segment, start: next.value.index, end: next.value.index, cause: 'expected whitespace or colon' }
|
|
648
799
|
next = segmentIter.next()
|
|
649
800
|
break
|
|
650
801
|
}
|
|
@@ -652,16 +803,17 @@ export class JSONTokenizer {
|
|
|
652
803
|
case STATE.ARY_OPEN:
|
|
653
804
|
switch(next.value.segment) {
|
|
654
805
|
case ' ': case '\r': case '\n': case '\t':
|
|
806
|
+
Accumulator.start(accumulatorState, next.value.index)
|
|
655
807
|
stack.push(STATE.ARY_OPEN)
|
|
656
808
|
state = STATE.WS
|
|
657
809
|
break
|
|
658
810
|
case ']':
|
|
659
|
-
yield { type:
|
|
811
|
+
yield { type: TOKEN_ARRAY_CLOSE_IMMEDIATE, value: next.value.segment, start: next.value.index, end: next.value.index }
|
|
660
812
|
next = segmentIter.next()
|
|
661
813
|
state = stack.pop()
|
|
662
814
|
break
|
|
663
815
|
case ',':
|
|
664
|
-
yield { type:
|
|
816
|
+
yield { type: TOKEN_ERROR, value: next.value.segment, start: next.value.index, end: next.value.index, cause: 'unexpected array comma' }
|
|
665
817
|
next = segmentIter.next()
|
|
666
818
|
break
|
|
667
819
|
default:
|
|
@@ -673,7 +825,7 @@ export class JSONTokenizer {
|
|
|
673
825
|
case STATE.ARY_CLOSE:
|
|
674
826
|
switch(next.value.segment) {
|
|
675
827
|
case ']':
|
|
676
|
-
yield { type:
|
|
828
|
+
yield { type: TOKEN_ARRAY_CLOSE, value: next.value.segment, start: next.value.index, end: next.value.index }
|
|
677
829
|
next = segmentIter.next()
|
|
678
830
|
state = stack.pop()
|
|
679
831
|
break
|
|
@@ -688,34 +840,31 @@ export class JSONTokenizer {
|
|
|
688
840
|
switch(next.value.segment) {
|
|
689
841
|
case '"':
|
|
690
842
|
if(state === STATE.KEY) {
|
|
691
|
-
yield { type:
|
|
692
|
-
yield { type:
|
|
843
|
+
yield { type: TOKEN_OBJECT_KEY, ...Accumulator.end(accumulatorState) }
|
|
844
|
+
yield { type: TOKEN_OBJECT_KEY_CLOSE, value: next.value.segment, start: next.value.index, end: next.value.index }
|
|
693
845
|
}
|
|
694
846
|
else {
|
|
695
|
-
yield { type:
|
|
696
|
-
yield { type:
|
|
847
|
+
yield { type: TOKEN_STRING, ...Accumulator.end(accumulatorState) }
|
|
848
|
+
yield { type: TOKEN_STRING_CLOSE, value: next.value.segment, start: next.value.index, end: next.value.index }
|
|
697
849
|
}
|
|
698
850
|
|
|
699
|
-
accumulator = EMPTY
|
|
700
851
|
next = segmentIter.next()
|
|
701
852
|
state = stack.pop()
|
|
702
853
|
break
|
|
703
854
|
case '\\':
|
|
704
|
-
|
|
855
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
705
856
|
|
|
706
857
|
next = segmentIter.next()
|
|
707
858
|
if(next.done) {
|
|
708
|
-
yield { type:
|
|
709
|
-
accumulator = EMPTY
|
|
859
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence' }
|
|
710
860
|
state = stack.pop()
|
|
711
861
|
break
|
|
712
862
|
}
|
|
713
863
|
|
|
714
|
-
|
|
864
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
715
865
|
|
|
716
866
|
if(!JSONTokenizer.isValueEscapeChar(next.value?.segment)) {
|
|
717
|
-
yield { type:
|
|
718
|
-
accumulator = EMPTY
|
|
867
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape character' }
|
|
719
868
|
}
|
|
720
869
|
|
|
721
870
|
if(next.value.segment === 'u') {
|
|
@@ -726,11 +875,10 @@ export class JSONTokenizer {
|
|
|
726
875
|
next = segmentIter.next()
|
|
727
876
|
break
|
|
728
877
|
default:
|
|
729
|
-
|
|
878
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
730
879
|
|
|
731
880
|
if(!JSONTokenizer.isValidChar(next.value.segment)) {
|
|
732
|
-
yield { type:
|
|
733
|
-
accumulator = EMPTY
|
|
881
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid character in string literal' }
|
|
734
882
|
}
|
|
735
883
|
|
|
736
884
|
next = segmentIter.next()
|
|
@@ -746,52 +894,68 @@ export class JSONTokenizer {
|
|
|
746
894
|
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
|
|
747
895
|
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
|
|
748
896
|
|
|
749
|
-
|
|
897
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
750
898
|
|
|
751
899
|
// second
|
|
752
900
|
next = segmentIter.next()
|
|
753
901
|
if(next.done) {
|
|
754
|
-
yield { type:
|
|
755
|
-
accumulator = EMPTY
|
|
902
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence (2nd done)' }
|
|
756
903
|
state = stack.pop()
|
|
757
904
|
break
|
|
758
905
|
}
|
|
759
|
-
|
|
906
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
907
|
+
if(!JSONTokenizer.isValidHEX(next.value.segment)) {
|
|
908
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence (2nd)' }
|
|
909
|
+
state = stack.pop()
|
|
910
|
+
next = segmentIter.next()
|
|
911
|
+
break
|
|
912
|
+
}
|
|
760
913
|
|
|
761
914
|
// third
|
|
762
915
|
next = segmentIter.next()
|
|
763
916
|
if(next.done) {
|
|
764
|
-
yield { type:
|
|
765
|
-
|
|
917
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence (3rd done)' }
|
|
918
|
+
state = stack.pop()
|
|
919
|
+
break
|
|
920
|
+
}
|
|
921
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
922
|
+
if(!JSONTokenizer.isValidHEX(next.value.segment)) {
|
|
923
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence (3rd)' }
|
|
766
924
|
state = stack.pop()
|
|
925
|
+
next = segmentIter.next()
|
|
767
926
|
break
|
|
768
927
|
}
|
|
769
|
-
accumulator += next.value?.segment
|
|
770
928
|
|
|
771
929
|
// fourth
|
|
772
930
|
next = segmentIter.next()
|
|
773
931
|
if(next.done) {
|
|
774
|
-
yield { type:
|
|
775
|
-
accumulator = EMPTY
|
|
932
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence (4th done)' }
|
|
776
933
|
state = stack.pop()
|
|
777
934
|
break
|
|
778
935
|
}
|
|
779
|
-
|
|
936
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
937
|
+
if(!JSONTokenizer.isValidHEX(next.value.segment)) {
|
|
938
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence (4th)' }
|
|
939
|
+
state = stack.pop()
|
|
940
|
+
next = segmentIter.next()
|
|
941
|
+
break
|
|
942
|
+
}
|
|
780
943
|
|
|
781
944
|
state = stack.pop()
|
|
782
945
|
next = segmentIter.next()
|
|
783
946
|
break
|
|
784
947
|
default:
|
|
785
|
-
|
|
786
|
-
yield { type:
|
|
787
|
-
|
|
948
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
949
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence' }
|
|
950
|
+
|
|
951
|
+
state = stack.pop()
|
|
788
952
|
next = segmentIter.next()
|
|
789
953
|
break
|
|
790
954
|
}
|
|
791
955
|
break
|
|
792
956
|
default:
|
|
793
|
-
|
|
794
|
-
|
|
957
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: `unknown state ${state}` }
|
|
958
|
+
break
|
|
795
959
|
}
|
|
796
960
|
}
|
|
797
961
|
}
|