@johntalton/json-tokenizer 1.0.1 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +68 -58
- package/package.json +11 -1
- package/src/index.js +360 -178
package/README.md
CHANGED
|
@@ -4,10 +4,9 @@ Generator function that tokenizes string based on JSON format.
|
|
|
4
4
|
|
|
5
5
|
- Uses Generator based API
|
|
6
6
|
- Produces tokens for all input text (including error tokens)
|
|
7
|
-
- Uses `Intl.Segmenter` with `'grapheme'` granularity
|
|
8
|
-
- Segmenter locale can be custom set (default: `'en-US'`)
|
|
9
7
|
- Allows for `AbortSignal` to control termination
|
|
10
8
|
- Best effort to match `JSON.parse` restriction
|
|
9
|
+
- Start and End position for errors
|
|
11
10
|
|
|
12
11
|
## Example
|
|
13
12
|
|
|
@@ -42,60 +41,71 @@ for(const token of stream) {
|
|
|
42
41
|
console.log(token)
|
|
43
42
|
}
|
|
44
43
|
/*
|
|
45
|
-
{ type: 'object-open', value: '{' }
|
|
46
|
-
{ type: 'whitespace', value: '\n\t' }
|
|
47
|
-
{ type: 'open-key-quote', value: '"' }
|
|
48
|
-
{ type: 'key', value: 'team' }
|
|
49
|
-
{ type: 'close-key-quote', value: '"' }
|
|
50
|
-
{ type: 'colon', value: ':' }
|
|
51
|
-
{ type: 'whitespace', value: ' ' }
|
|
52
|
-
{ type: 'open-string-quote', value: '"' }
|
|
53
|
-
{ type: 'string', value: 'Mystery Inc' }
|
|
54
|
-
{ type: 'close-string-quote', value: '"' }
|
|
55
|
-
{ type: 'object-member-comma', value: ',' }
|
|
56
|
-
{ type: 'whitespace', value: '\n\t' }
|
|
57
|
-
{ type: 'open-key-quote', value: '"' }
|
|
58
|
-
{ type: 'key', value: 'members' }
|
|
59
|
-
{ type: 'close-key-quote', value: '"' }
|
|
60
|
-
{ type: 'colon', value: ':' }
|
|
61
|
-
{ type: 'whitespace', value: ' ' }
|
|
62
|
-
{ type: 'array-open', value: '[' }
|
|
63
|
-
{ type: 'whitespace', value: '\n\t\t' }
|
|
64
|
-
{ type: 'open-string-quote', value: '"' }
|
|
65
|
-
{ type: 'string', value: 'Fred' }
|
|
66
|
-
{ type: 'close-string-quote', value: '"' }
|
|
67
|
-
{ type: 'array-element-comma', value: ',' }
|
|
68
|
-
{ type: 'whitespace', value: '\n\t\t' }
|
|
69
|
-
{ type: 'open-string-quote', value: '"' }
|
|
70
|
-
{ type: 'string', value: 'Daphne' }
|
|
71
|
-
{ type: 'close-string-quote', value: '"' }
|
|
72
|
-
{ type: 'array-element-comma', value: ',' }
|
|
73
|
-
{ type: 'whitespace', value: '\n\t\t' }
|
|
74
|
-
{ type: 'open-string-quote', value: '"' }
|
|
75
|
-
{ type: 'string', value: 'Velma' }
|
|
76
|
-
{ type: 'close-string-quote', value: '"' }
|
|
77
|
-
{ type: 'array-element-comma', value: ',' }
|
|
78
|
-
{ type: 'whitespace', value: '\n\t\t' }
|
|
79
|
-
{ type: 'open-string-quote', value: '"' }
|
|
80
|
-
{ type: 'string', value: 'Shaggy' }
|
|
81
|
-
{ type: 'close-string-quote', value: '"' }
|
|
82
|
-
{ type: 'array-element-comma', value: ',' }
|
|
83
|
-
{ type: 'whitespace', value: '\n\t\t' }
|
|
84
|
-
{ type: 'open-string-quote', value: '"' }
|
|
85
|
-
{ type: 'string', value: 'Scooby' }
|
|
86
|
-
{ type: 'close-string-quote', value: '"' }
|
|
87
|
-
{ type: 'whitespace', value: '\n\t' }
|
|
88
|
-
{ type: 'array-close', value: ']' }
|
|
89
|
-
{ type: 'object-member-comma', value: ',' }
|
|
90
|
-
{ type: 'whitespace', value: '\n\t' }
|
|
91
|
-
{ type: 'open-key-quote', value: '"' }
|
|
92
|
-
{ type: 'key', value: 'aired' }
|
|
93
|
-
{ type: 'close-key-quote', value: '"' }
|
|
94
|
-
{ type: 'colon', value: ':' }
|
|
95
|
-
{ type: 'whitespace', value: ' ' }
|
|
96
|
-
{ type: 'number', value: '1969' }
|
|
97
|
-
{ type: 'whitespace', value: '\n' }
|
|
98
|
-
{ type: 'object-close', value: '}' }
|
|
99
|
-
{ type: 'eof', value: '' }
|
|
44
|
+
{ type: 'object-open', value: '{', start: 0, end: 0 }
|
|
45
|
+
{ type: 'whitespace', value: '\n\t', start: 1, end: 2 }
|
|
46
|
+
{ type: 'open-key-quote', value: '"', start: 3, end: 3 }
|
|
47
|
+
{ type: 'key', value: 'team', start: 4, end: 7 }
|
|
48
|
+
{ type: 'close-key-quote', value: '"', start: 8, end: 8 }
|
|
49
|
+
{ type: 'colon', value: ':', start: 9, end: 9 }
|
|
50
|
+
{ type: 'whitespace', value: ' ', start: 10, end: 10 }
|
|
51
|
+
{ type: 'open-string-quote', value: '"', start: 11, end: 11 }
|
|
52
|
+
{ type: 'string', value: 'Mystery Inc', start: 12, end: 22 }
|
|
53
|
+
{ type: 'close-string-quote', value: '"', start: 23, end: 23 }
|
|
54
|
+
{ type: 'object-member-comma', value: ',', start: 24, end: 24 }
|
|
55
|
+
{ type: 'whitespace', value: '\n\t', start: 25, end: 26 }
|
|
56
|
+
{ type: 'open-key-quote', value: '"', start: 27, end: 27 }
|
|
57
|
+
{ type: 'key', value: 'members', start: 28, end: 34 }
|
|
58
|
+
{ type: 'close-key-quote', value: '"', start: 35, end: 35 }
|
|
59
|
+
{ type: 'colon', value: ':', start: 36, end: 36 }
|
|
60
|
+
{ type: 'whitespace', value: ' ', start: 37, end: 37 }
|
|
61
|
+
{ type: 'array-open', value: '[', start: 38, end: 38 }
|
|
62
|
+
{ type: 'whitespace', value: '\n\t\t', start: 39, end: 41 }
|
|
63
|
+
{ type: 'open-string-quote', value: '"', start: 42, end: 42 }
|
|
64
|
+
{ type: 'string', value: 'Fred', start: 43, end: 46 }
|
|
65
|
+
{ type: 'close-string-quote', value: '"', start: 47, end: 47 }
|
|
66
|
+
{ type: 'array-element-comma', value: ',', start: 48, end: 48 }
|
|
67
|
+
{ type: 'whitespace', value: '\n\t\t', start: 49, end: 51 }
|
|
68
|
+
{ type: 'open-string-quote', value: '"', start: 52, end: 52 }
|
|
69
|
+
{ type: 'string', value: 'Daphne', start: 53, end: 58 }
|
|
70
|
+
{ type: 'close-string-quote', value: '"', start: 59, end: 59 }
|
|
71
|
+
{ type: 'array-element-comma', value: ',', start: 60, end: 60 }
|
|
72
|
+
{ type: 'whitespace', value: '\n\t\t', start: 61, end: 63 }
|
|
73
|
+
{ type: 'open-string-quote', value: '"', start: 64, end: 64 }
|
|
74
|
+
{ type: 'string', value: 'Velma', start: 65, end: 69 }
|
|
75
|
+
{ type: 'close-string-quote', value: '"', start: 70, end: 70 }
|
|
76
|
+
{ type: 'array-element-comma', value: ',', start: 71, end: 71 }
|
|
77
|
+
{ type: 'whitespace', value: '\n\t\t', start: 72, end: 74 }
|
|
78
|
+
{ type: 'open-string-quote', value: '"', start: 75, end: 75 }
|
|
79
|
+
{ type: 'string', value: 'Shaggy', start: 76, end: 81 }
|
|
80
|
+
{ type: 'close-string-quote', value: '"', start: 82, end: 82 }
|
|
81
|
+
{ type: 'array-element-comma', value: ',', start: 83, end: 83 }
|
|
82
|
+
{ type: 'whitespace', value: '\n\t\t', start: 84, end: 86 }
|
|
83
|
+
{ type: 'open-string-quote', value: '"', start: 87, end: 87 }
|
|
84
|
+
{ type: 'string', value: 'Scooby', start: 88, end: 93 }
|
|
85
|
+
{ type: 'close-string-quote', value: '"', start: 94, end: 94 }
|
|
86
|
+
{ type: 'whitespace', value: '\n\t', start: 95, end: 96 }
|
|
87
|
+
{ type: 'array-close', value: ']', start: 97, end: 97 }
|
|
88
|
+
{ type: 'object-member-comma', value: ',', start: 98, end: 98 }
|
|
89
|
+
{ type: 'whitespace', value: '\n\t', start: 99, end: 100 }
|
|
90
|
+
{ type: 'open-key-quote', value: '"', start: 101, end: 101 }
|
|
91
|
+
{ type: 'key', value: 'aired', start: 102, end: 106 }
|
|
92
|
+
{ type: 'close-key-quote', value: '"', start: 107, end: 107 }
|
|
93
|
+
{ type: 'colon', value: ':', start: 108, end: 108 }
|
|
94
|
+
{ type: 'whitespace', value: ' ', start: 109, end: 109 }
|
|
95
|
+
{ type: 'number', value: '1969', start: 110, end: 113 }
|
|
96
|
+
{ type: 'whitespace', value: '\n', start: 114, end: 114 }
|
|
97
|
+
{ type: 'object-close', value: '}', start: 115, end: 115 }
|
|
98
|
+
{ type: 'eof', value: '', start: null, end: 116 }
|
|
100
99
|
*/
|
|
101
|
-
```
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## Tests
|
|
103
|
+
|
|
104
|
+
A simple set of test for coverage exists within the repo.
|
|
105
|
+
|
|
106
|
+
For a more complete and varied set of validation of in-the-wild json, the following have been tested against:
|
|
107
|
+
|
|
108
|
+
- https://github.com/nst/JSONTestSuite
|
|
109
|
+
- https://github.com/nlohmann/json_test_data
|
|
110
|
+
- https://github.com/open-source-parsers/jsoncpp
|
|
111
|
+
- any other that can be found :P
|
package/package.json
CHANGED
|
@@ -1,15 +1,25 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@johntalton/json-tokenizer",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "1.
|
|
4
|
+
"version": "1.1.1",
|
|
5
5
|
"license": "MIT",
|
|
6
|
+
"engines": {
|
|
7
|
+
"node": ">=22.0.0"
|
|
8
|
+
},
|
|
6
9
|
"exports": {
|
|
7
10
|
".": "./src/index.js"
|
|
8
11
|
},
|
|
9
12
|
"files": [
|
|
10
13
|
"src/*.js"
|
|
11
14
|
],
|
|
15
|
+
"scripts": {
|
|
16
|
+
"test": "node --test test/**",
|
|
17
|
+
"coverage": "c8 -r lcov -r text node --test test/**"
|
|
18
|
+
},
|
|
12
19
|
"repository": {
|
|
13
20
|
"url": "git+https://github.com/johntalton/json-tokenizer.git"
|
|
21
|
+
},
|
|
22
|
+
"dependencies": {
|
|
23
|
+
"c8": "^10.1.3"
|
|
14
24
|
}
|
|
15
25
|
}
|
package/src/index.js
CHANGED
|
@@ -1,44 +1,87 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @typedef {Object} TokenizerOptions
|
|
3
|
-
* @property {string|undefined} [locale = 'en-US']
|
|
4
3
|
* @property {AbortSignal|undefined} [signal]
|
|
5
4
|
* @property {boolean|undefined} [debug]
|
|
6
5
|
*/
|
|
7
6
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
7
|
+
/**
|
|
8
|
+
* @typedef {Object} TokenBase
|
|
9
|
+
* @property {string} type
|
|
10
|
+
* @property {string} value
|
|
11
|
+
* @property {number|null} start
|
|
12
|
+
* @property {number|null} end
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* @typedef {Object} ErrorTokenBase
|
|
17
|
+
* @property {'error'} type
|
|
18
|
+
* @property {string} cause
|
|
19
|
+
*/
|
|
20
|
+
/** @typedef {TokenBase & ErrorTokenBase} ErrorToken */
|
|
21
|
+
/** @typedef {TokenBase & { type: 'eof' }} EOFToken */
|
|
22
|
+
/** @typedef {TokenBase & { type: 'whitespace' }} WhitespaceToken */
|
|
23
|
+
/** @typedef {TokenBase & { type: 'array-open' }} ArrayOpenToken */
|
|
24
|
+
/** @typedef {TokenBase & { type: 'array-close' }} ArrayCloseToken */
|
|
25
|
+
/** @typedef {TokenBase & { type: 'array-close-immediate' }} ArrayCloseImmediateToken */
|
|
26
|
+
/** @typedef {TokenBase & { type: 'array-element-comma' }} ArrayCommaToken */
|
|
27
|
+
/** @typedef {TokenBase & { type: 'object-open' }} ObjectOpenToken */
|
|
28
|
+
/** @typedef {TokenBase & { type: 'object-close' }} ObjectCloseToken */
|
|
29
|
+
/** @typedef {TokenBase & { type: 'object-close-immediate' }} ObjectCloseImmediateToken */
|
|
30
|
+
/** @typedef {TokenBase & { type: 'object-member-comma' }} ObjectCommaToken */
|
|
31
|
+
/** @typedef {TokenBase & { type: 'open-key-quote' }} OpenKeyQuoteToken */
|
|
32
|
+
/** @typedef {TokenBase & { type: 'key' }} KeyToken */
|
|
33
|
+
/** @typedef {TokenBase & { type: 'close-key-quote' }} CloseKeyQuoteToken */
|
|
34
|
+
/** @typedef {TokenBase & { type: 'colon' }} ColonToken */
|
|
35
|
+
/** @typedef {TokenBase & { type: 'open-string-quote' }} OpenStringQuoteToken */
|
|
36
|
+
/** @typedef {TokenBase & { type: 'string' }} StringToken */
|
|
37
|
+
/** @typedef {TokenBase & { type: 'close-string-quote' }} CloseStringQuoteToken */
|
|
38
|
+
/** @typedef {TokenBase & { type: 'true' }} TrueToken */
|
|
39
|
+
/** @typedef {TokenBase & { type: 'false' }} FalseToken */
|
|
40
|
+
/** @typedef {TokenBase & { type: 'null' }} NULLToken */
|
|
41
|
+
/** @typedef {TokenBase & { type: 'number' }} NumberToken */
|
|
42
|
+
|
|
43
|
+
/** @typedef {EOFToken|WhitespaceToken|ErrorToken|ArrayOpenToken|ArrayCloseToken|ArrayCloseImmediateToken|ArrayCommaToken|ObjectOpenToken|ObjectCloseToken|ObjectCloseImmediateToken|ObjectCommaToken|OpenKeyQuoteToken|KeyToken|CloseKeyQuoteToken|ColonToken|OpenStringQuoteToken|StringToken|CloseStringQuoteToken|TrueToken|FalseToken|NULLToken|NumberToken} Token */
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* @typedef {Object} AccumulationState
|
|
47
|
+
* @property {string} value
|
|
48
|
+
* @property {number|null} start
|
|
49
|
+
* @property {number|null} end
|
|
50
|
+
*/
|
|
51
|
+
|
|
52
|
+
export const TOKEN_ERROR = 'error'
|
|
53
|
+
export const TOKEN_EOF = 'eof'
|
|
54
|
+
export const TOKEN_WHITESPACE = 'whitespace'
|
|
12
55
|
|
|
13
56
|
// Array / Elements
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
57
|
+
export const TOKEN_ARRAY_OPEN = 'array-open'
|
|
58
|
+
export const TOKEN_ARRAY_CLOSE = 'array-close'
|
|
59
|
+
export const TOKEN_ARRAY_CLOSE_IMMEDIATE = 'array-close-immediate'
|
|
60
|
+
export const TOKEN_ARRAY_ELEMENT_COMMA = 'array-element-comma'
|
|
18
61
|
|
|
19
62
|
// Object
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
63
|
+
export const TOKEN_OBJECT_OPEN = 'object-open'
|
|
64
|
+
export const TOKEN_OBJECT_CLOSE = 'object-close'
|
|
65
|
+
export const TOKEN_OBJECT_CLOSE_IMMEDIATE = 'object-close-immediate'
|
|
66
|
+
export const TOKEN_OBJECT_MEMBER_COMMA = 'object-member-comma'
|
|
67
|
+
export const TOKEN_OBJECT_KEY_OPEN = 'open-key-quote'
|
|
68
|
+
export const TOKEN_OBJECT_KEY = 'key'
|
|
69
|
+
export const TOKEN_OBJECT_KEY_CLOSE = 'close-key-quote'
|
|
70
|
+
export const TOKEN_OBJECT_COLON = 'colon'
|
|
28
71
|
|
|
29
72
|
// String
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
73
|
+
export const TOKEN_STRING_OPEN = 'open-string-quote'
|
|
74
|
+
export const TOKEN_STRING = 'string'
|
|
75
|
+
export const TOKEN_STRING_CLOSE = 'close-string-quote'
|
|
33
76
|
|
|
34
77
|
// Primitives
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
78
|
+
export const TOKEN_TRUE = 'true'
|
|
79
|
+
export const TOKEN_FALSE = 'false'
|
|
80
|
+
export const TOKEN_NULL = 'null'
|
|
38
81
|
|
|
39
82
|
// Number
|
|
40
|
-
|
|
41
|
-
|
|
83
|
+
export const TOKEN_NUMBER = 'number'
|
|
84
|
+
|
|
42
85
|
|
|
43
86
|
export const EMPTY = ''
|
|
44
87
|
|
|
@@ -53,6 +96,7 @@ export const STATE = {
|
|
|
53
96
|
ARY_OPEN: 'ao',
|
|
54
97
|
ARY_CLOSE: 'ac',
|
|
55
98
|
MEMBERS: 'ms',
|
|
99
|
+
MEMBERS_CONTINUE: 'msc',
|
|
56
100
|
MEMBER: 'm',
|
|
57
101
|
KEY: 'key',
|
|
58
102
|
MEMBER_KEY_AFTER: 'mka',
|
|
@@ -73,11 +117,64 @@ export const STATE = {
|
|
|
73
117
|
U_HEX4: 'hex'
|
|
74
118
|
}
|
|
75
119
|
|
|
120
|
+
export const EXIT_STATES = [ STATE.ELEMENT_AFTER ]
|
|
121
|
+
|
|
76
122
|
export const ESCAPE_CHARS = [
|
|
77
123
|
'"', '\\', '/', 'b', 'f', 'n', 'r', 't', 'u'
|
|
78
124
|
]
|
|
79
125
|
|
|
80
|
-
export const
|
|
126
|
+
export const HEX_CHARS = [
|
|
127
|
+
'1', '2', '3', '4', '5', '6', '7', '8', '9', '0',
|
|
128
|
+
'a', 'b', 'c', 'd', 'e', 'f',
|
|
129
|
+
'A', 'B', 'C', 'D', 'E', 'F',
|
|
130
|
+
]
|
|
131
|
+
|
|
132
|
+
export class Accumulator {
|
|
133
|
+
/**
|
|
134
|
+
* @param {AccumulationState} accumulationState
|
|
135
|
+
*/
|
|
136
|
+
static #reset(accumulationState) {
|
|
137
|
+
accumulationState.value = EMPTY
|
|
138
|
+
accumulationState.start = null
|
|
139
|
+
accumulationState.end = null
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* @param {AccumulationState} accumulationState
|
|
144
|
+
* @param {number} start
|
|
145
|
+
*/
|
|
146
|
+
static start(accumulationState, start) {
|
|
147
|
+
accumulationState.value = EMPTY
|
|
148
|
+
accumulationState.start = start
|
|
149
|
+
accumulationState.end = start
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* @param {AccumulationState} accumulationState
|
|
154
|
+
* @param {string} value
|
|
155
|
+
* @param {number} end
|
|
156
|
+
*/
|
|
157
|
+
static accumulate(accumulationState, value, end) {
|
|
158
|
+
accumulationState.value += value
|
|
159
|
+
accumulationState.end = end
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* @param {AccumulationState} accumulationState
|
|
164
|
+
*/
|
|
165
|
+
static end(accumulationState) {
|
|
166
|
+
const result = { ...accumulationState }
|
|
167
|
+
Accumulator.#reset(accumulationState)
|
|
168
|
+
return result
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* @param {AccumulationState} accumulationState
|
|
173
|
+
*/
|
|
174
|
+
static empty(accumulationState) {
|
|
175
|
+
return accumulationState.value === EMPTY
|
|
176
|
+
}
|
|
177
|
+
}
|
|
81
178
|
|
|
82
179
|
export class JSONTokenizer {
|
|
83
180
|
/**
|
|
@@ -100,63 +197,107 @@ export class JSONTokenizer {
|
|
|
100
197
|
return true
|
|
101
198
|
}
|
|
102
199
|
|
|
200
|
+
/**
|
|
201
|
+
* @param {string} str
|
|
202
|
+
*/
|
|
203
|
+
static isValidHEX(str) {
|
|
204
|
+
return HEX_CHARS.includes(str)
|
|
205
|
+
}
|
|
206
|
+
|
|
103
207
|
/**
|
|
104
208
|
* @param {string} str
|
|
105
209
|
* @param {TokenizerOptions} [options]
|
|
210
|
+
* @return {Generator<Token, undefined, undefined>}
|
|
106
211
|
*/
|
|
107
212
|
static *tokenize(str, options) {
|
|
108
213
|
const debug = (options?.debug ?? false) === true
|
|
109
214
|
const signal = options?.signal
|
|
110
|
-
const locale = options?.locale ?? DEFAULT_LOCALE
|
|
111
215
|
|
|
112
|
-
const
|
|
113
|
-
|
|
114
|
-
|
|
216
|
+
const segmentIter = Iterator.from(str)
|
|
217
|
+
.map((item, index) => ({
|
|
218
|
+
segment: item,
|
|
219
|
+
done: false,
|
|
220
|
+
index
|
|
221
|
+
}))
|
|
115
222
|
|
|
116
223
|
/** @type {Array<string>} */
|
|
117
|
-
const stack = []
|
|
224
|
+
const stack = [ ]
|
|
118
225
|
|
|
119
226
|
/** @type {string|undefined} */
|
|
120
227
|
let state = STATE.ELEMENT
|
|
121
228
|
|
|
122
229
|
let next = segmentIter.next()
|
|
123
230
|
|
|
124
|
-
|
|
231
|
+
/** @type {AccumulationState} */
|
|
232
|
+
const accumulatorState = {
|
|
233
|
+
value: EMPTY,
|
|
234
|
+
start: null,
|
|
235
|
+
end: null
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
let line = 1
|
|
125
239
|
|
|
126
240
|
//
|
|
127
241
|
if(next.done) {
|
|
128
|
-
yield { type:
|
|
242
|
+
yield { type: TOKEN_ERROR, value: EMPTY, start: 0, end: 0, cause: 'empty' }
|
|
129
243
|
return
|
|
130
244
|
}
|
|
131
245
|
|
|
132
246
|
//
|
|
133
247
|
while(true) {
|
|
134
248
|
if(signal?.aborted) {
|
|
135
|
-
|
|
249
|
+
yield { type: TOKEN_EOF, value: EMPTY, start: null, end: null }
|
|
136
250
|
break
|
|
137
251
|
}
|
|
138
252
|
|
|
139
253
|
//
|
|
140
254
|
if(next.done) {
|
|
141
|
-
if(
|
|
142
|
-
|
|
143
|
-
|
|
255
|
+
if(state === undefined) {
|
|
256
|
+
yield { type: TOKEN_ERROR, value: EMPTY, start: null, end: null, cause: 'undefined state' }
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
//
|
|
260
|
+
if(state !== undefined && !EXIT_STATES.includes(state)) {
|
|
261
|
+
yield { type: TOKEN_ERROR, value: EMPTY, start: null, end: str.length, cause: `invalid exit state (${state})` }
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
//
|
|
265
|
+
if(!Accumulator.empty(accumulatorState)) {
|
|
266
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'accumulator not empty (done)' }
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
//
|
|
270
|
+
if(stack.length !== 0) {
|
|
271
|
+
yield { type: TOKEN_ERROR, value: EMPTY, start: null, end: str.length, cause: 'stack not empty' }
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
//
|
|
275
|
+
yield { type: TOKEN_EOF, value: EMPTY, start: null, end: str.length }
|
|
144
276
|
break
|
|
145
277
|
}
|
|
146
278
|
|
|
147
279
|
//
|
|
148
280
|
if(state === undefined) {
|
|
149
|
-
|
|
281
|
+
//
|
|
282
|
+
if(!Accumulator.empty(accumulatorState)) {
|
|
283
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'accumulator not empty (stack)' }
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
//
|
|
150
287
|
if(!next.done) {
|
|
151
|
-
|
|
288
|
+
Accumulator.start(accumulatorState, next.value.index)
|
|
289
|
+
|
|
152
290
|
while(!next.done) {
|
|
153
|
-
|
|
291
|
+
if(signal?.aborted) { break }
|
|
292
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
154
293
|
next = segmentIter.next()
|
|
155
294
|
}
|
|
156
|
-
|
|
295
|
+
|
|
296
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'accumulator not empty (not done)' }
|
|
157
297
|
}
|
|
158
298
|
|
|
159
|
-
|
|
299
|
+
//
|
|
300
|
+
yield { type: TOKEN_EOF, value: EMPTY, start: null, end: null }
|
|
160
301
|
break
|
|
161
302
|
}
|
|
162
303
|
|
|
@@ -164,9 +305,9 @@ export class JSONTokenizer {
|
|
|
164
305
|
if(debug) {
|
|
165
306
|
console.log({
|
|
166
307
|
seg: next.value.segment,
|
|
167
|
-
state,
|
|
168
|
-
stack.join(','),
|
|
169
|
-
|
|
308
|
+
state,
|
|
309
|
+
stack: stack.join(','),
|
|
310
|
+
...accumulatorState
|
|
170
311
|
})
|
|
171
312
|
}
|
|
172
313
|
|
|
@@ -178,7 +319,7 @@ export class JSONTokenizer {
|
|
|
178
319
|
state = stack.pop()
|
|
179
320
|
break
|
|
180
321
|
case ',':
|
|
181
|
-
yield { type:
|
|
322
|
+
yield { type: TOKEN_ARRAY_ELEMENT_COMMA, value: next.value.segment, start: next.value.index, end: next.value.index }
|
|
182
323
|
stack.push(STATE.ELEMENTS)
|
|
183
324
|
state = STATE.ELEMENT
|
|
184
325
|
next = segmentIter.next()
|
|
@@ -192,6 +333,7 @@ export class JSONTokenizer {
|
|
|
192
333
|
case STATE.ELEMENT:
|
|
193
334
|
switch(next.value.segment) {
|
|
194
335
|
case ' ': case '\r': case '\n': case '\t':
|
|
336
|
+
Accumulator.start(accumulatorState, next.value.index)
|
|
195
337
|
stack.push(STATE.ELEMENT)
|
|
196
338
|
state = STATE.WS
|
|
197
339
|
break
|
|
@@ -204,6 +346,7 @@ export class JSONTokenizer {
|
|
|
204
346
|
case STATE.ELEMENT_AFTER:
|
|
205
347
|
switch(next.value.segment) {
|
|
206
348
|
case ' ': case '\r': case '\n': case '\t':
|
|
349
|
+
Accumulator.start(accumulatorState, next.value.index)
|
|
207
350
|
stack.push(STATE.ELEMENT_AFTER)
|
|
208
351
|
state = STATE.WS
|
|
209
352
|
break
|
|
@@ -217,24 +360,29 @@ export class JSONTokenizer {
|
|
|
217
360
|
state = stack.pop()
|
|
218
361
|
break
|
|
219
362
|
default:
|
|
220
|
-
yield { type:
|
|
363
|
+
yield { type: TOKEN_ERROR, value: next.value.segment, start: next.value.index, end: next.value.index, cause: 'expect close, comma or whitespace' }
|
|
221
364
|
next = segmentIter.next()
|
|
365
|
+
state = stack.pop()
|
|
222
366
|
break
|
|
223
367
|
}
|
|
224
368
|
break
|
|
225
369
|
case STATE.WS:
|
|
226
370
|
switch(next.value.segment) {
|
|
227
371
|
case ' ': case '\r': case '\n': case '\t':
|
|
228
|
-
|
|
372
|
+
|
|
373
|
+
if(next.value.segment === '\n') {
|
|
374
|
+
line += 1
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
229
378
|
next = segmentIter.next()
|
|
230
379
|
if(next.done) {
|
|
231
|
-
yield { type:
|
|
232
|
-
|
|
380
|
+
yield { type: TOKEN_WHITESPACE, ...Accumulator.end(accumulatorState) }
|
|
381
|
+
state = stack.pop()
|
|
233
382
|
}
|
|
234
383
|
break
|
|
235
384
|
default:
|
|
236
|
-
yield { type:
|
|
237
|
-
accumulator = EMPTY
|
|
385
|
+
yield { type: TOKEN_WHITESPACE, ...Accumulator.end(accumulatorState) }
|
|
238
386
|
state = stack.pop()
|
|
239
387
|
break
|
|
240
388
|
}
|
|
@@ -242,157 +390,154 @@ export class JSONTokenizer {
|
|
|
242
390
|
case STATE.VALUE:
|
|
243
391
|
switch(next.value.segment) {
|
|
244
392
|
case '{':
|
|
245
|
-
yield { type:
|
|
393
|
+
yield { type: TOKEN_OBJECT_OPEN, value: next.value.segment, start: next.value.index, end: next.value.index }
|
|
246
394
|
state = STATE.OBJ_OPEN
|
|
247
395
|
next = segmentIter.next()
|
|
248
396
|
break
|
|
249
397
|
case '[':
|
|
250
|
-
yield { type:
|
|
398
|
+
yield { type: TOKEN_ARRAY_OPEN, value: next.value.segment, start: next.value.index, end: next.value.index }
|
|
251
399
|
state = STATE.ARY_OPEN
|
|
252
400
|
next = segmentIter.next()
|
|
253
401
|
break
|
|
254
402
|
case '"':
|
|
255
|
-
yield { type:
|
|
256
|
-
accumulator = EMPTY
|
|
403
|
+
yield { type: TOKEN_STRING_OPEN, value: next.value.segment, start: next.value.index, end: next.value.index }
|
|
257
404
|
state = STATE.STR
|
|
258
405
|
next = segmentIter.next()
|
|
406
|
+
if(!next.done) { Accumulator.start(accumulatorState, next.value.index) }
|
|
259
407
|
break
|
|
260
408
|
case 't':
|
|
261
|
-
|
|
409
|
+
Accumulator.start(accumulatorState, next.value.index)
|
|
410
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
262
411
|
|
|
263
412
|
next = segmentIter.next()
|
|
264
413
|
if(next.done || next.value.segment !== 'r') {
|
|
265
|
-
yield { type:
|
|
266
|
-
accumulator = EMPTY
|
|
414
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected true literal (r)' }
|
|
267
415
|
state = stack.pop()
|
|
268
416
|
break
|
|
269
417
|
}
|
|
270
|
-
|
|
418
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
271
419
|
|
|
272
420
|
next = segmentIter.next()
|
|
273
421
|
if(next.done || next.value.segment !== 'u') {
|
|
274
|
-
yield { type:
|
|
275
|
-
accumulator = EMPTY
|
|
422
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected true literal (u)' }
|
|
276
423
|
state = stack.pop()
|
|
277
424
|
break
|
|
278
425
|
}
|
|
279
|
-
|
|
426
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
280
427
|
|
|
281
428
|
next = segmentIter.next()
|
|
282
429
|
if(next.done || next.value.segment !== 'e') {
|
|
283
|
-
yield { type:
|
|
284
|
-
accumulator = EMPTY
|
|
430
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected true literal (e)' }
|
|
285
431
|
state = stack.pop()
|
|
286
432
|
break
|
|
287
433
|
}
|
|
288
|
-
|
|
434
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
289
435
|
|
|
290
|
-
yield { type:
|
|
291
|
-
accumulator = EMPTY
|
|
436
|
+
yield { type: TOKEN_TRUE, ...Accumulator.end(accumulatorState) }
|
|
292
437
|
|
|
293
438
|
next = segmentIter.next()
|
|
294
439
|
state = stack.pop()
|
|
295
440
|
break
|
|
296
441
|
case 'f':
|
|
297
|
-
|
|
442
|
+
Accumulator.start(accumulatorState, next.value.index)
|
|
443
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
298
444
|
|
|
299
445
|
next = segmentIter.next()
|
|
300
446
|
if(next.done || next.value.segment !== 'a') {
|
|
301
|
-
yield { type:
|
|
302
|
-
accumulator = EMPTY
|
|
447
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected false literal (a)' }
|
|
303
448
|
state = stack.pop()
|
|
304
449
|
break
|
|
305
450
|
}
|
|
306
|
-
|
|
451
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
307
452
|
|
|
308
453
|
next = segmentIter.next()
|
|
309
454
|
if(next.done || next.value.segment !== 'l') {
|
|
310
|
-
yield { type:
|
|
311
|
-
accumulator = EMPTY
|
|
455
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected false literal (l)' }
|
|
312
456
|
state = stack.pop()
|
|
313
457
|
break
|
|
314
458
|
}
|
|
315
|
-
|
|
459
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
316
460
|
|
|
317
461
|
next = segmentIter.next()
|
|
318
462
|
if(next.done || next.value.segment !== 's') {
|
|
319
|
-
yield { type:
|
|
320
|
-
accumulator = EMPTY
|
|
463
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected false literal (s)' }
|
|
321
464
|
state = stack.pop()
|
|
322
465
|
break
|
|
323
466
|
}
|
|
324
|
-
|
|
467
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
325
468
|
|
|
326
469
|
next = segmentIter.next()
|
|
327
470
|
if(next.done || next.value.segment !== 'e') {
|
|
328
|
-
yield { type:
|
|
329
|
-
accumulator = EMPTY
|
|
471
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected false literal (e)' }
|
|
330
472
|
state = stack.pop()
|
|
331
473
|
break
|
|
332
474
|
}
|
|
333
|
-
|
|
475
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
334
476
|
|
|
335
|
-
yield { type:
|
|
336
|
-
accumulator = EMPTY
|
|
477
|
+
yield { type: TOKEN_FALSE, ...Accumulator.end(accumulatorState) }
|
|
337
478
|
|
|
338
479
|
next = segmentIter.next()
|
|
339
480
|
state = stack.pop()
|
|
340
481
|
break
|
|
341
482
|
case 'n':
|
|
342
|
-
|
|
483
|
+
Accumulator.start(accumulatorState, next.value.index)
|
|
484
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
343
485
|
|
|
344
486
|
next = segmentIter.next()
|
|
345
487
|
if(next.done || next.value.segment !== 'u') {
|
|
346
|
-
yield { type:
|
|
347
|
-
accumulator = EMPTY
|
|
488
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected null literal (u)' }
|
|
348
489
|
state = stack.pop()
|
|
349
490
|
break
|
|
350
491
|
}
|
|
351
|
-
|
|
492
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
352
493
|
|
|
353
494
|
next = segmentIter.next()
|
|
354
495
|
if(next.done || next.value.segment !== 'l') {
|
|
355
|
-
yield { type:
|
|
356
|
-
accumulator = EMPTY
|
|
496
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected null literal (l)' }
|
|
357
497
|
state = stack.pop()
|
|
358
498
|
break
|
|
359
499
|
}
|
|
360
|
-
|
|
500
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
361
501
|
|
|
362
502
|
next = segmentIter.next()
|
|
363
503
|
if(next.done || next.value.segment !== 'l') {
|
|
364
|
-
yield { type:
|
|
365
|
-
accumulator = EMPTY
|
|
504
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'expected null literal (l)' }
|
|
366
505
|
state = stack.pop()
|
|
367
506
|
break
|
|
368
507
|
}
|
|
369
|
-
|
|
508
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
370
509
|
|
|
371
|
-
yield { type:
|
|
372
|
-
accumulator = EMPTY
|
|
510
|
+
yield { type: TOKEN_NULL, ...Accumulator.end(accumulatorState) }
|
|
373
511
|
|
|
374
512
|
next = segmentIter.next()
|
|
375
513
|
state = stack.pop()
|
|
376
514
|
break
|
|
377
515
|
case '-':
|
|
378
|
-
|
|
516
|
+
Accumulator.start(accumulatorState, next.value.index)
|
|
517
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
379
518
|
state = STATE.NUMBER
|
|
380
519
|
next = segmentIter.next()
|
|
381
520
|
break
|
|
382
521
|
case '0':
|
|
383
|
-
|
|
522
|
+
Accumulator.start(accumulatorState, next.value.index)
|
|
523
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
524
|
+
|
|
384
525
|
state = STATE.NUMBER_INT_AFTER
|
|
385
526
|
next = segmentIter.next()
|
|
527
|
+
if(next.done) {
|
|
528
|
+
yield { type: TOKEN_NUMBER, ...Accumulator.end(accumulatorState) }
|
|
529
|
+
state = stack.pop()
|
|
530
|
+
}
|
|
386
531
|
break
|
|
387
532
|
case '1': case '2': case '3':
|
|
388
533
|
case '4': case '5': case '6':
|
|
389
534
|
case '7': case '8': case '9':
|
|
390
|
-
|
|
535
|
+
Accumulator.start(accumulatorState, next.value.index)
|
|
391
536
|
state = STATE.NUMBER_INT
|
|
392
537
|
break
|
|
393
538
|
default:
|
|
394
539
|
//
|
|
395
|
-
yield { type:
|
|
540
|
+
yield { type: TOKEN_ERROR, value: next.value.segment, start: next.value.index, end: next.value.index, cause: 'unexpected value' }
|
|
396
541
|
next = segmentIter.next()
|
|
397
542
|
break
|
|
398
543
|
}
|
|
@@ -400,7 +545,7 @@ export class JSONTokenizer {
|
|
|
400
545
|
case STATE.NUMBER:
|
|
401
546
|
switch(next.value.segment) {
|
|
402
547
|
case '0':
|
|
403
|
-
|
|
548
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
404
549
|
state = STATE.NUMBER_INT_AFTER
|
|
405
550
|
next = segmentIter.next()
|
|
406
551
|
break
|
|
@@ -414,14 +559,13 @@ export class JSONTokenizer {
|
|
|
414
559
|
case '1': case '2': case '3':
|
|
415
560
|
case '4': case '5': case '6':
|
|
416
561
|
case '7': case '8': case '9':
|
|
417
|
-
|
|
562
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
418
563
|
state = STATE.NUMBER_INT
|
|
419
564
|
next = segmentIter.next()
|
|
420
565
|
break
|
|
421
566
|
default:
|
|
422
|
-
|
|
423
|
-
yield { type:
|
|
424
|
-
accumulator = EMPTY
|
|
567
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
568
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid number format' }
|
|
425
569
|
next = segmentIter.next()
|
|
426
570
|
break
|
|
427
571
|
}
|
|
@@ -432,12 +576,12 @@ export class JSONTokenizer {
|
|
|
432
576
|
case '1': case '2': case '3':
|
|
433
577
|
case '4': case '5': case '6':
|
|
434
578
|
case '7': case '8': case '9':
|
|
435
|
-
|
|
579
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
436
580
|
next = segmentIter.next()
|
|
437
581
|
|
|
438
582
|
if(next.done) {
|
|
439
|
-
yield { type:
|
|
440
|
-
|
|
583
|
+
yield { type: TOKEN_NUMBER, ...Accumulator.end(accumulatorState) }
|
|
584
|
+
state = stack.pop()
|
|
441
585
|
}
|
|
442
586
|
break
|
|
443
587
|
default:
|
|
@@ -448,19 +592,18 @@ export class JSONTokenizer {
|
|
|
448
592
|
case STATE.NUMBER_INT_AFTER:
|
|
449
593
|
switch(next.value.segment) {
|
|
450
594
|
case '.':
|
|
451
|
-
|
|
595
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
452
596
|
next = segmentIter.next()
|
|
453
597
|
state = STATE.NUMBER_DECIMAL_FIRST
|
|
454
598
|
break
|
|
455
599
|
case 'e':
|
|
456
600
|
case 'E':
|
|
457
|
-
|
|
601
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
458
602
|
state = STATE.NUMBER_EXPONENT_SIGN
|
|
459
603
|
next = segmentIter.next()
|
|
460
604
|
break
|
|
461
605
|
default:
|
|
462
|
-
yield { type:
|
|
463
|
-
accumulator = EMPTY
|
|
606
|
+
yield { type: TOKEN_NUMBER, ...Accumulator.end(accumulatorState) }
|
|
464
607
|
state = stack.pop()
|
|
465
608
|
break
|
|
466
609
|
}
|
|
@@ -471,20 +614,20 @@ export class JSONTokenizer {
|
|
|
471
614
|
case '1': case '2': case '3':
|
|
472
615
|
case '4': case '5': case '6':
|
|
473
616
|
case '7': case '8': case '9':
|
|
474
|
-
|
|
617
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
475
618
|
next = segmentIter.next()
|
|
476
619
|
state = STATE.NUMBER_DECIMAL
|
|
477
620
|
|
|
478
621
|
if(next.done) {
|
|
479
|
-
yield { type:
|
|
480
|
-
|
|
622
|
+
yield { type: TOKEN_NUMBER, ...Accumulator.end(accumulatorState) }
|
|
623
|
+
state = stack.pop()
|
|
481
624
|
}
|
|
482
625
|
break
|
|
483
626
|
default:
|
|
484
|
-
|
|
485
|
-
yield { type:
|
|
486
|
-
accumulator = EMPTY
|
|
627
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
628
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid decimal format' }
|
|
487
629
|
next = segmentIter.next()
|
|
630
|
+
state = stack.pop()
|
|
488
631
|
break
|
|
489
632
|
}
|
|
490
633
|
break
|
|
@@ -494,23 +637,22 @@ export class JSONTokenizer {
|
|
|
494
637
|
case '1': case '2': case '3':
|
|
495
638
|
case '4': case '5': case '6':
|
|
496
639
|
case '7': case '8': case '9':
|
|
497
|
-
|
|
640
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
498
641
|
next = segmentIter.next()
|
|
499
642
|
|
|
500
643
|
if(next.done) {
|
|
501
|
-
yield { type:
|
|
502
|
-
|
|
644
|
+
yield { type: TOKEN_NUMBER, ...Accumulator.end(accumulatorState) }
|
|
645
|
+
state = stack.pop()
|
|
503
646
|
}
|
|
504
647
|
break
|
|
505
648
|
case 'e':
|
|
506
649
|
case 'E':
|
|
507
|
-
|
|
650
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
508
651
|
state = STATE.NUMBER_EXPONENT_SIGN
|
|
509
652
|
next = segmentIter.next()
|
|
510
653
|
break
|
|
511
654
|
default:
|
|
512
|
-
yield { type:
|
|
513
|
-
accumulator = EMPTY
|
|
655
|
+
yield { type: TOKEN_NUMBER, ...Accumulator.end(accumulatorState) }
|
|
514
656
|
state = stack.pop()
|
|
515
657
|
break
|
|
516
658
|
}
|
|
@@ -518,12 +660,12 @@ export class JSONTokenizer {
|
|
|
518
660
|
case STATE.NUMBER_EXPONENT_SIGN:
|
|
519
661
|
switch(next.value.segment) {
|
|
520
662
|
case '+':
|
|
521
|
-
|
|
663
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
522
664
|
next = segmentIter.next()
|
|
523
665
|
state = STATE.NUMBER_EXPONENT_FIRST
|
|
524
666
|
break
|
|
525
667
|
case '-':
|
|
526
|
-
|
|
668
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
527
669
|
next = segmentIter.next()
|
|
528
670
|
state = STATE.NUMBER_EXPONENT_FIRST
|
|
529
671
|
break
|
|
@@ -538,15 +680,20 @@ export class JSONTokenizer {
|
|
|
538
680
|
case '1': case '2': case '3':
|
|
539
681
|
case '4': case '5': case '6':
|
|
540
682
|
case '7': case '8': case '9':
|
|
541
|
-
|
|
683
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
542
684
|
next = segmentIter.next()
|
|
543
685
|
state = STATE.NUMBER_EXPONENT
|
|
686
|
+
|
|
687
|
+
if(next.done) {
|
|
688
|
+
yield { type: TOKEN_NUMBER, ...Accumulator.end(accumulatorState) }
|
|
689
|
+
state = stack.pop()
|
|
690
|
+
}
|
|
544
691
|
break
|
|
545
692
|
default:
|
|
546
|
-
|
|
547
|
-
yield { type:
|
|
548
|
-
accumulator = EMPTY
|
|
693
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
694
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid exponent format' }
|
|
549
695
|
next = segmentIter.next()
|
|
696
|
+
state = stack.pop()
|
|
550
697
|
break
|
|
551
698
|
}
|
|
552
699
|
break
|
|
@@ -556,12 +703,16 @@ export class JSONTokenizer {
|
|
|
556
703
|
case '1': case '2': case '3':
|
|
557
704
|
case '4': case '5': case '6':
|
|
558
705
|
case '7': case '8': case '9':
|
|
559
|
-
|
|
706
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
560
707
|
next = segmentIter.next()
|
|
708
|
+
|
|
709
|
+
if(next.done) {
|
|
710
|
+
yield { type: TOKEN_NUMBER, ...Accumulator.end(accumulatorState) }
|
|
711
|
+
state = stack.pop()
|
|
712
|
+
}
|
|
561
713
|
break
|
|
562
714
|
default:
|
|
563
|
-
yield { type:
|
|
564
|
-
accumulator = EMPTY
|
|
715
|
+
yield { type: TOKEN_NUMBER, ...Accumulator.end(accumulatorState) }
|
|
565
716
|
state = stack.pop()
|
|
566
717
|
break
|
|
567
718
|
}
|
|
@@ -569,11 +720,12 @@ export class JSONTokenizer {
|
|
|
569
720
|
case STATE.OBJ_OPEN:
|
|
570
721
|
switch(next.value.segment) {
|
|
571
722
|
case ' ': case '\r': case '\n': case '\t':
|
|
723
|
+
Accumulator.start(accumulatorState, next.value.index)
|
|
572
724
|
stack.push(STATE.OBJ_OPEN)
|
|
573
725
|
state = STATE.WS
|
|
574
726
|
break
|
|
575
727
|
case '}':
|
|
576
|
-
yield { type:
|
|
728
|
+
yield { type: TOKEN_OBJECT_CLOSE_IMMEDIATE, value: next.value.segment, start: next.value.index, end: next.value.index }
|
|
577
729
|
next = segmentIter.next()
|
|
578
730
|
state = stack.pop()
|
|
579
731
|
break
|
|
@@ -586,47 +738,62 @@ export class JSONTokenizer {
|
|
|
586
738
|
case STATE.OBJ_CLOSE:
|
|
587
739
|
switch(next.value.segment) {
|
|
588
740
|
case '}':
|
|
589
|
-
yield { type:
|
|
741
|
+
yield { type: TOKEN_OBJECT_CLOSE, value: next.value.segment, start: next.value.index, end: next.value.index }
|
|
590
742
|
next = segmentIter.next()
|
|
591
743
|
state = stack.pop()
|
|
592
744
|
break
|
|
593
745
|
default:
|
|
594
|
-
|
|
595
|
-
|
|
746
|
+
yield { type: TOKEN_ERROR, value: next.value.segment, start: next.value.index, end: next.value.index, cause: 'expected object close' }
|
|
747
|
+
next = segmentIter.next()
|
|
748
|
+
state = stack.pop()
|
|
596
749
|
break
|
|
597
750
|
}
|
|
598
751
|
break
|
|
599
752
|
case STATE.MEMBERS:
|
|
753
|
+
switch(next.value.segment){
|
|
754
|
+
case '}':
|
|
755
|
+
state = stack.pop()
|
|
756
|
+
break
|
|
757
|
+
default:
|
|
758
|
+
stack.push(STATE.MEMBERS_CONTINUE)
|
|
759
|
+
state = STATE.MEMBER
|
|
760
|
+
break
|
|
761
|
+
}
|
|
762
|
+
break
|
|
763
|
+
case STATE.MEMBERS_CONTINUE:
|
|
600
764
|
switch(next.value.segment){
|
|
601
765
|
case '}':
|
|
602
766
|
state = stack.pop()
|
|
603
767
|
break
|
|
604
768
|
case ',':
|
|
605
|
-
yield { type:
|
|
606
|
-
stack.push(STATE.
|
|
769
|
+
yield { type: TOKEN_OBJECT_MEMBER_COMMA, value: next.value.segment, start: next.value.index, end: next.value.index }
|
|
770
|
+
stack.push(STATE.MEMBERS_CONTINUE)
|
|
607
771
|
state = STATE.MEMBER
|
|
608
772
|
next = segmentIter.next()
|
|
609
773
|
break
|
|
610
774
|
default:
|
|
611
|
-
|
|
612
|
-
state =
|
|
775
|
+
yield { type: TOKEN_ERROR, value: next.value.segment, start: next.value.index, end: next.value.index, cause: 'expecting comma or close object' }
|
|
776
|
+
state = stack.pop()
|
|
777
|
+
next = segmentIter.next()
|
|
613
778
|
break
|
|
614
779
|
}
|
|
615
780
|
break
|
|
616
781
|
case STATE.MEMBER:
|
|
617
782
|
switch(next.value.segment) {
|
|
618
783
|
case ' ': case '\r': case '\n': case '\t':
|
|
784
|
+
Accumulator.start(accumulatorState, next.value.index)
|
|
619
785
|
stack.push(STATE.MEMBER)
|
|
620
786
|
state = STATE.WS
|
|
621
787
|
break
|
|
622
788
|
case '"':
|
|
623
|
-
yield { type:
|
|
789
|
+
yield { type: TOKEN_OBJECT_KEY_OPEN, value: next.value.segment, start: next.value.index, end: next.value.index }
|
|
624
790
|
stack.push(STATE.MEMBER_KEY_AFTER)
|
|
625
791
|
state = STATE.KEY
|
|
626
792
|
next = segmentIter.next()
|
|
793
|
+
if(!next.done) { Accumulator.start(accumulatorState, next.value.index) }
|
|
627
794
|
break
|
|
628
795
|
default:
|
|
629
|
-
yield { type:
|
|
796
|
+
yield { type: TOKEN_ERROR, value: next.value.segment, start: next.value.index, end: next.value.index, cause: 'expected member key or whitespace' }
|
|
630
797
|
next = segmentIter.next()
|
|
631
798
|
break
|
|
632
799
|
}
|
|
@@ -634,17 +801,18 @@ export class JSONTokenizer {
|
|
|
634
801
|
case STATE.MEMBER_KEY_AFTER:
|
|
635
802
|
switch(next.value.segment) {
|
|
636
803
|
case ' ': case '\r': case '\n': case '\t':
|
|
804
|
+
Accumulator.start(accumulatorState, next.value.index)
|
|
637
805
|
stack.push(STATE.MEMBER_KEY_AFTER)
|
|
638
806
|
state = STATE.WS
|
|
639
807
|
break
|
|
640
808
|
case ':':
|
|
641
|
-
yield { type:
|
|
809
|
+
yield { type: TOKEN_OBJECT_COLON, value: next.value.segment, start: next.value.index, end: next.value.index }
|
|
642
810
|
next = segmentIter.next()
|
|
643
811
|
|
|
644
812
|
state = STATE.ELEMENT
|
|
645
813
|
break
|
|
646
814
|
default:
|
|
647
|
-
yield { type:
|
|
815
|
+
yield { type: TOKEN_ERROR, value: next.value.segment, start: next.value.index, end: next.value.index, cause: 'expected whitespace or colon' }
|
|
648
816
|
next = segmentIter.next()
|
|
649
817
|
break
|
|
650
818
|
}
|
|
@@ -652,16 +820,17 @@ export class JSONTokenizer {
|
|
|
652
820
|
case STATE.ARY_OPEN:
|
|
653
821
|
switch(next.value.segment) {
|
|
654
822
|
case ' ': case '\r': case '\n': case '\t':
|
|
823
|
+
Accumulator.start(accumulatorState, next.value.index)
|
|
655
824
|
stack.push(STATE.ARY_OPEN)
|
|
656
825
|
state = STATE.WS
|
|
657
826
|
break
|
|
658
827
|
case ']':
|
|
659
|
-
yield { type:
|
|
828
|
+
yield { type: TOKEN_ARRAY_CLOSE_IMMEDIATE, value: next.value.segment, start: next.value.index, end: next.value.index }
|
|
660
829
|
next = segmentIter.next()
|
|
661
830
|
state = stack.pop()
|
|
662
831
|
break
|
|
663
832
|
case ',':
|
|
664
|
-
yield { type:
|
|
833
|
+
yield { type: TOKEN_ERROR, value: next.value.segment, start: next.value.index, end: next.value.index, cause: 'unexpected array comma' }
|
|
665
834
|
next = segmentIter.next()
|
|
666
835
|
break
|
|
667
836
|
default:
|
|
@@ -673,13 +842,14 @@ export class JSONTokenizer {
|
|
|
673
842
|
case STATE.ARY_CLOSE:
|
|
674
843
|
switch(next.value.segment) {
|
|
675
844
|
case ']':
|
|
676
|
-
yield { type:
|
|
845
|
+
yield { type: TOKEN_ARRAY_CLOSE, value: next.value.segment, start: next.value.index, end: next.value.index }
|
|
677
846
|
next = segmentIter.next()
|
|
678
847
|
state = stack.pop()
|
|
679
848
|
break
|
|
680
849
|
default:
|
|
681
|
-
|
|
682
|
-
|
|
850
|
+
yield { type: TOKEN_ERROR, value: next.value.segment, start: next.value.index, end: next.value.index, cause: 'expected array close' }
|
|
851
|
+
next = segmentIter.next()
|
|
852
|
+
state = stack.pop()
|
|
683
853
|
break
|
|
684
854
|
}
|
|
685
855
|
break
|
|
@@ -688,34 +858,31 @@ export class JSONTokenizer {
|
|
|
688
858
|
switch(next.value.segment) {
|
|
689
859
|
case '"':
|
|
690
860
|
if(state === STATE.KEY) {
|
|
691
|
-
yield { type:
|
|
692
|
-
yield { type:
|
|
861
|
+
yield { type: TOKEN_OBJECT_KEY, ...Accumulator.end(accumulatorState) }
|
|
862
|
+
yield { type: TOKEN_OBJECT_KEY_CLOSE, value: next.value.segment, start: next.value.index, end: next.value.index }
|
|
693
863
|
}
|
|
694
864
|
else {
|
|
695
|
-
yield { type:
|
|
696
|
-
yield { type:
|
|
865
|
+
yield { type: TOKEN_STRING, ...Accumulator.end(accumulatorState) }
|
|
866
|
+
yield { type: TOKEN_STRING_CLOSE, value: next.value.segment, start: next.value.index, end: next.value.index }
|
|
697
867
|
}
|
|
698
868
|
|
|
699
|
-
accumulator = EMPTY
|
|
700
869
|
next = segmentIter.next()
|
|
701
870
|
state = stack.pop()
|
|
702
871
|
break
|
|
703
872
|
case '\\':
|
|
704
|
-
|
|
873
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
705
874
|
|
|
706
875
|
next = segmentIter.next()
|
|
707
876
|
if(next.done) {
|
|
708
|
-
yield { type:
|
|
709
|
-
accumulator = EMPTY
|
|
877
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence' }
|
|
710
878
|
state = stack.pop()
|
|
711
879
|
break
|
|
712
880
|
}
|
|
713
881
|
|
|
714
|
-
|
|
882
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
715
883
|
|
|
716
884
|
if(!JSONTokenizer.isValueEscapeChar(next.value?.segment)) {
|
|
717
|
-
yield { type:
|
|
718
|
-
accumulator = EMPTY
|
|
885
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape character' }
|
|
719
886
|
}
|
|
720
887
|
|
|
721
888
|
if(next.value.segment === 'u') {
|
|
@@ -726,11 +893,10 @@ export class JSONTokenizer {
|
|
|
726
893
|
next = segmentIter.next()
|
|
727
894
|
break
|
|
728
895
|
default:
|
|
729
|
-
|
|
896
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
730
897
|
|
|
731
898
|
if(!JSONTokenizer.isValidChar(next.value.segment)) {
|
|
732
|
-
yield { type:
|
|
733
|
-
accumulator = EMPTY
|
|
899
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid character in string literal' }
|
|
734
900
|
}
|
|
735
901
|
|
|
736
902
|
next = segmentIter.next()
|
|
@@ -746,52 +912,68 @@ export class JSONTokenizer {
|
|
|
746
912
|
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
|
|
747
913
|
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
|
|
748
914
|
|
|
749
|
-
|
|
915
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
750
916
|
|
|
751
917
|
// second
|
|
752
918
|
next = segmentIter.next()
|
|
753
919
|
if(next.done) {
|
|
754
|
-
yield { type:
|
|
755
|
-
accumulator = EMPTY
|
|
920
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence (2nd done)' }
|
|
756
921
|
state = stack.pop()
|
|
757
922
|
break
|
|
758
923
|
}
|
|
759
|
-
|
|
924
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
925
|
+
if(!JSONTokenizer.isValidHEX(next.value.segment)) {
|
|
926
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence (2nd)' }
|
|
927
|
+
state = stack.pop()
|
|
928
|
+
next = segmentIter.next()
|
|
929
|
+
break
|
|
930
|
+
}
|
|
760
931
|
|
|
761
932
|
// third
|
|
762
933
|
next = segmentIter.next()
|
|
763
934
|
if(next.done) {
|
|
764
|
-
yield { type:
|
|
765
|
-
|
|
935
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence (3rd done)' }
|
|
936
|
+
state = stack.pop()
|
|
937
|
+
break
|
|
938
|
+
}
|
|
939
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
940
|
+
if(!JSONTokenizer.isValidHEX(next.value.segment)) {
|
|
941
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence (3rd)' }
|
|
766
942
|
state = stack.pop()
|
|
943
|
+
next = segmentIter.next()
|
|
767
944
|
break
|
|
768
945
|
}
|
|
769
|
-
accumulator += next.value?.segment
|
|
770
946
|
|
|
771
947
|
// fourth
|
|
772
948
|
next = segmentIter.next()
|
|
773
949
|
if(next.done) {
|
|
774
|
-
yield { type:
|
|
775
|
-
|
|
950
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence (4th done)' }
|
|
951
|
+
state = stack.pop()
|
|
952
|
+
break
|
|
953
|
+
}
|
|
954
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
955
|
+
if(!JSONTokenizer.isValidHEX(next.value.segment)) {
|
|
956
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence (4th)' }
|
|
776
957
|
state = stack.pop()
|
|
958
|
+
next = segmentIter.next()
|
|
777
959
|
break
|
|
778
960
|
}
|
|
779
|
-
accumulator += next.value?.segment
|
|
780
961
|
|
|
781
962
|
state = stack.pop()
|
|
782
963
|
next = segmentIter.next()
|
|
783
964
|
break
|
|
784
965
|
default:
|
|
785
|
-
|
|
786
|
-
yield { type:
|
|
787
|
-
|
|
966
|
+
Accumulator.accumulate(accumulatorState, next.value.segment, next.value.index)
|
|
967
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: 'invalid escape sequence' }
|
|
968
|
+
|
|
969
|
+
state = stack.pop()
|
|
788
970
|
next = segmentIter.next()
|
|
789
971
|
break
|
|
790
972
|
}
|
|
791
973
|
break
|
|
792
974
|
default:
|
|
793
|
-
|
|
794
|
-
|
|
975
|
+
yield { type: TOKEN_ERROR, ...Accumulator.end(accumulatorState), cause: `unknown state ${state}` }
|
|
976
|
+
break
|
|
795
977
|
}
|
|
796
978
|
}
|
|
797
979
|
}
|