functionalscript 0.0.326 → 0.0.330
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/json/tokenizer/index.js +70 -99
- package/json/tokenizer/test.js +4 -4
- package/package.json +1 -1
package/json/tokenizer/index.js
CHANGED
|
@@ -3,53 +3,29 @@ const operator = require('../../types/function/operator')
|
|
|
3
3
|
const { concat } = require('../../types/list')
|
|
4
4
|
const list = require('../../types/list')
|
|
5
5
|
|
|
6
|
-
/**
|
|
7
|
-
|
|
8
|
-
/** @typedef {{readonly kind: '}'}} RightBraceToken */
|
|
9
|
-
|
|
10
|
-
/** @typedef {{readonly kind: ':'}} ColonToken */
|
|
11
|
-
|
|
12
|
-
/** @typedef {{readonly kind: ','}} CommaToken */
|
|
13
|
-
|
|
14
|
-
/** @typedef {{readonly kind: '['}} LeftBracketToken */
|
|
15
|
-
|
|
16
|
-
/** @typedef {{readonly kind: ']'}} RightBracketToken */
|
|
17
|
-
|
|
18
|
-
/**
|
|
6
|
+
/**
|
|
19
7
|
* @typedef {{
|
|
20
8
|
* readonly kind: 'string'
|
|
21
9
|
* readonly value: string
|
|
22
|
-
* }} StringToken
|
|
10
|
+
* }} StringToken
|
|
23
11
|
* */
|
|
24
12
|
|
|
25
|
-
/**
|
|
13
|
+
/**
|
|
26
14
|
* @typedef {{
|
|
27
15
|
* readonly kind: 'number'
|
|
28
16
|
* readonly value: string
|
|
29
|
-
* }} NumberToken
|
|
17
|
+
* }} NumberToken
|
|
30
18
|
* */
|
|
31
19
|
|
|
32
|
-
/** @typedef {{readonly kind: 'true'}} TrueToken */
|
|
33
|
-
|
|
34
|
-
/** @typedef {{readonly kind: 'false'}} FalseToken */
|
|
35
|
-
|
|
36
|
-
/** @typedef {{readonly kind: 'null'}} NullToken */
|
|
37
|
-
|
|
38
20
|
/** @typedef {{readonly kind: 'error', message: ErrorMessage}} ErrorToken */
|
|
39
21
|
|
|
40
|
-
/**
|
|
22
|
+
/** @typedef {{readonly kind: '{' | '}' | ':' | ',' | '[' | ']' | 'true' | 'false' | 'null'}} SimpleToken */
|
|
23
|
+
|
|
24
|
+
/**
|
|
41
25
|
* @typedef {|
|
|
42
|
-
*
|
|
43
|
-
* RightBraceToken |
|
|
44
|
-
* ColonToken |
|
|
45
|
-
* CommaToken |
|
|
46
|
-
* LeftBracketToken |
|
|
47
|
-
* RightBracketToken |
|
|
26
|
+
* SimpleToken |
|
|
48
27
|
* StringToken |
|
|
49
28
|
* NumberToken |
|
|
50
|
-
* TrueToken |
|
|
51
|
-
* FalseToken |
|
|
52
|
-
* NullToken |
|
|
53
29
|
* ErrorToken
|
|
54
30
|
* } JsonToken
|
|
55
31
|
*/
|
|
@@ -74,7 +50,7 @@ const newLine = 0x0a
|
|
|
74
50
|
const carriageReturn = 0x0d
|
|
75
51
|
const space = 0x20
|
|
76
52
|
|
|
77
|
-
const
|
|
53
|
+
const backslash = 0x5c
|
|
78
54
|
const slash = 0x2f
|
|
79
55
|
const backspace = 0x08
|
|
80
56
|
const formfeed = 0x0c
|
|
@@ -93,7 +69,7 @@ const letterT = 0x74
|
|
|
93
69
|
const letterU = 0x75
|
|
94
70
|
const letterZ = 0x7a
|
|
95
71
|
|
|
96
|
-
/**
|
|
72
|
+
/**
|
|
97
73
|
* @typedef {|
|
|
98
74
|
* InitialState |
|
|
99
75
|
* ParseKeywordState |
|
|
@@ -103,10 +79,10 @@ const letterZ = 0x7a
|
|
|
103
79
|
* ParseNumberState |
|
|
104
80
|
* InvalidNumberState |
|
|
105
81
|
* EofState
|
|
106
|
-
* } TokenizerState
|
|
82
|
+
* } TokenizerState
|
|
107
83
|
*/
|
|
108
84
|
|
|
109
|
-
/**
|
|
85
|
+
/**
|
|
110
86
|
* @typedef {|
|
|
111
87
|
* 'invalid keyword' |
|
|
112
88
|
* '" are missing' |
|
|
@@ -115,7 +91,7 @@ const letterZ = 0x7a
|
|
|
115
91
|
* 'unexpected character' |
|
|
116
92
|
* 'invalid number' |
|
|
117
93
|
* 'eof'
|
|
118
|
-
* } ErrorMessage
|
|
94
|
+
* } ErrorMessage
|
|
119
95
|
*/
|
|
120
96
|
|
|
121
97
|
/** @typedef {{ readonly kind: 'initial'}} InitialState */
|
|
@@ -129,7 +105,7 @@ const letterZ = 0x7a
|
|
|
129
105
|
/** @typedef {{ readonly kind: 'unicodeChar', readonly value: string, readonly unicode: number, readonly hexIndex: number}} ParseUnicodeCharState */
|
|
130
106
|
|
|
131
107
|
/**
|
|
132
|
-
* @typedef {{
|
|
108
|
+
* @typedef {{
|
|
133
109
|
* readonly kind: 'number',
|
|
134
110
|
* readonly numberKind: '0' | '-' | 'int' | '.' | 'fractional' | 'e' | 'e+' | 'e-' | 'expDigits'
|
|
135
111
|
* readonly value: string
|
|
@@ -140,21 +116,17 @@ const letterZ = 0x7a
|
|
|
140
116
|
|
|
141
117
|
/** @typedef {{ readonly kind: 'eof'}} EofState */
|
|
142
118
|
|
|
143
|
-
/** @typedef {number|undefined}
|
|
119
|
+
/** @typedef {number|undefined} CharCodeOrEof */
|
|
144
120
|
|
|
145
|
-
/** @type {(old: string) => (input:
|
|
121
|
+
/** @type {(old: string) => (input: CharCodeOrEof) => string} */
|
|
146
122
|
const appendChar = old => input => input === undefined ? old : operator.concat(charToString(input))(old)
|
|
147
123
|
|
|
148
|
-
/** @type {(input:
|
|
124
|
+
/** @type {(input: CharCodeOrEof) => string} */
|
|
149
125
|
const charToString = input => input === undefined ? '' : String.fromCharCode(input)
|
|
150
126
|
|
|
151
|
-
/** @type {(state: InitialState) => (input:
|
|
152
|
-
const initialStateOp = initialState => input =>
|
|
127
|
+
/** @type {(state: InitialState) => (input: number) => readonly[list.List<JsonToken>, TokenizerState]} */
|
|
128
|
+
const initialStateOp = initialState => input =>
|
|
153
129
|
{
|
|
154
|
-
if (input === undefined)
|
|
155
|
-
{
|
|
156
|
-
return[undefined, {kind: 'eof'}]
|
|
157
|
-
}
|
|
158
130
|
if (input >= digit1 && input <= digit9)
|
|
159
131
|
{
|
|
160
132
|
return [undefined, { kind: 'number', value: charToString(input), numberKind: 'int'}]
|
|
@@ -182,21 +154,9 @@ const initialStateOp = initialState => input =>
|
|
|
182
154
|
}
|
|
183
155
|
}
|
|
184
156
|
|
|
185
|
-
/** @type {(state: ParseNumberState) => (input:
|
|
157
|
+
/** @type {(state: ParseNumberState) => (input: number) => readonly[list.List<JsonToken>, TokenizerState]} */
|
|
186
158
|
const parseNumberStateOp = state => input =>
|
|
187
159
|
{
|
|
188
|
-
if (input === undefined)
|
|
189
|
-
{
|
|
190
|
-
switch (state.numberKind)
|
|
191
|
-
{
|
|
192
|
-
case '-':
|
|
193
|
-
case '.':
|
|
194
|
-
case 'e':
|
|
195
|
-
case 'e+':
|
|
196
|
-
case 'e-': return [[{kind: 'error', message: 'invalid number'}], {kind: 'invalidNumber', }]
|
|
197
|
-
default: return [[{kind: 'number', value: state.value}], {kind: 'eof'}]
|
|
198
|
-
}
|
|
199
|
-
}
|
|
200
160
|
if (input === decimalPoint)
|
|
201
161
|
{
|
|
202
162
|
switch (state.numberKind)
|
|
@@ -266,12 +226,12 @@ const parseNumberStateOp = state => input =>
|
|
|
266
226
|
case '.':
|
|
267
227
|
case 'e':
|
|
268
228
|
case 'e+':
|
|
269
|
-
case 'e-':
|
|
229
|
+
case 'e-':
|
|
270
230
|
{
|
|
271
231
|
const next = tokenizeOp({kind: 'initial'})(input)
|
|
272
232
|
return [{first: {kind: 'error', message: 'invalid number'}, tail: next[0]}, next[1]]
|
|
273
233
|
}
|
|
274
|
-
default:
|
|
234
|
+
default:
|
|
275
235
|
{
|
|
276
236
|
const next = tokenizeOp({kind: 'initial'})(input)
|
|
277
237
|
return [{first: {kind: 'number', value: state.value}, tail: next[0]}, next[1]]
|
|
@@ -297,14 +257,10 @@ const isTerminalForNumber = char =>
|
|
|
297
257
|
}
|
|
298
258
|
}
|
|
299
259
|
|
|
300
|
-
/** @type {(state: InvalidNumberState) => (input:
|
|
260
|
+
/** @type {(state: InvalidNumberState) => (input: number) => readonly[list.List<JsonToken>, TokenizerState]} */
|
|
301
261
|
const invalidNumberStateOp = state => input =>
|
|
302
262
|
{
|
|
303
|
-
if (input
|
|
304
|
-
{
|
|
305
|
-
return [[{kind: 'error', message: 'invalid number'}], {kind: 'eof'}]
|
|
306
|
-
}
|
|
307
|
-
if (isTerminalForNumber(input))
|
|
263
|
+
if (isTerminalForNumber(input))
|
|
308
264
|
{
|
|
309
265
|
const next = tokenizeOp({kind: 'initial'})(input)
|
|
310
266
|
return [{first: {kind: 'error', message: 'invalid number'}, tail: next[0]}, next[1]]
|
|
@@ -312,25 +268,24 @@ const invalidNumberStateOp = state => input =>
|
|
|
312
268
|
return [undefined, {kind: 'invalidNumber'}]
|
|
313
269
|
}
|
|
314
270
|
|
|
315
|
-
/** @type {(state: ParseStringState) => (input:
|
|
271
|
+
/** @type {(state: ParseStringState) => (input: number) => readonly[list.List<JsonToken>, TokenizerState]} */
|
|
316
272
|
const parseStringStateOp = state => input =>
|
|
317
273
|
{
|
|
318
274
|
switch(input)
|
|
319
275
|
{
|
|
320
276
|
case quotationMark: return[[{kind: 'string', value: state.value}], {kind: 'initial'}]
|
|
321
|
-
case
|
|
322
|
-
case undefined: return [[{kind: 'error', message: '" are missing'}], {kind: 'eof'}]
|
|
277
|
+
case backslash: return [undefined, {kind:'escapeChar', value: state.value}]
|
|
323
278
|
default: return [undefined, {kind:'string', value: appendChar(state.value)(input)}]
|
|
324
279
|
}
|
|
325
280
|
}
|
|
326
281
|
|
|
327
|
-
/** @type {(state: ParseEscapeCharState) => (input:
|
|
282
|
+
/** @type {(state: ParseEscapeCharState) => (input: number) => readonly[list.List<JsonToken>, TokenizerState]} */
|
|
328
283
|
const parseEscapeCharStateOp = state => input =>
|
|
329
284
|
{
|
|
330
285
|
switch(input)
|
|
331
286
|
{
|
|
332
287
|
case quotationMark:
|
|
333
|
-
case
|
|
288
|
+
case backslash:
|
|
334
289
|
case slash: return [undefined, {kind: 'string', value: appendChar(state.value)(input)}]
|
|
335
290
|
case letterB: return [undefined, {kind: 'string', value: appendChar(state.value)(backspace)}]
|
|
336
291
|
case letterF: return [undefined, {kind: 'string', value: appendChar(state.value)(formfeed)}]
|
|
@@ -338,7 +293,6 @@ const parseEscapeCharStateOp = state => input =>
|
|
|
338
293
|
case letterR: return [undefined, {kind: 'string', value: appendChar(state.value)(carriageReturn)}]
|
|
339
294
|
case letterT: return [undefined, {kind: 'string', value: appendChar(state.value)(horizontalTab)}]
|
|
340
295
|
case letterU: return [undefined, {kind: 'unicodeChar', value: state.value, unicode: 0, hexIndex: 0}]
|
|
341
|
-
case undefined: return [[{kind: 'error', message: '" are missing'}], {kind: 'eof'}]
|
|
342
296
|
default: {
|
|
343
297
|
const next = tokenizeOp({kind: 'string', value: state.value})(input)
|
|
344
298
|
return [{first: {kind: 'error', message: 'unescaped character'}, tail: next[0]}, next[1]]
|
|
@@ -354,13 +308,9 @@ const hexDigitToNumber = hex =>
|
|
|
354
308
|
if (hex >= letterA && hex <= letterF) { return hex - letterA + 10 }
|
|
355
309
|
}
|
|
356
310
|
|
|
357
|
-
/** @type {(state: ParseUnicodeCharState) => (input:
|
|
311
|
+
/** @type {(state: ParseUnicodeCharState) => (input: number) => readonly[list.List<JsonToken>, TokenizerState]} */
|
|
358
312
|
const parseUnicodeCharStateOp = state => input =>
|
|
359
313
|
{
|
|
360
|
-
if (input === undefined)
|
|
361
|
-
{
|
|
362
|
-
return [[{kind: 'error', message: '" are missing'}], {kind: 'eof'}]
|
|
363
|
-
}
|
|
364
314
|
const hexValue = hexDigitToNumber(input)
|
|
365
315
|
if (hexValue === undefined)
|
|
366
316
|
{
|
|
@@ -385,14 +335,9 @@ const stringToKeywordToken = s =>
|
|
|
385
335
|
}
|
|
386
336
|
}
|
|
387
337
|
|
|
388
|
-
/** @type {(state: ParseKeywordState) => (input:
|
|
389
|
-
const parseKeyWordStateOp = state => input =>
|
|
338
|
+
/** @type {(state: ParseKeywordState) => (input: number) => readonly[list.List<JsonToken>, TokenizerState]} */
|
|
339
|
+
const parseKeyWordStateOp = state => input =>
|
|
390
340
|
{
|
|
391
|
-
if (input === undefined)
|
|
392
|
-
{
|
|
393
|
-
const keyWordToken = stringToKeywordToken(state.value)
|
|
394
|
-
return [[keyWordToken], {kind: 'eof'}]
|
|
395
|
-
}
|
|
396
341
|
if (input >= letterA && input <= letterZ)
|
|
397
342
|
{
|
|
398
343
|
return [undefined, {kind: 'keyword', value: appendChar(state.value)(input)}]
|
|
@@ -402,26 +347,52 @@ const parseKeyWordStateOp = state => input =>
|
|
|
402
347
|
return [{first: keyWordToken, tail: next[0]}, next[1]]
|
|
403
348
|
}
|
|
404
349
|
|
|
405
|
-
/** @type {(state: EofState) => (input:
|
|
350
|
+
/** @type {(state: EofState) => (input: number) => readonly[list.List<JsonToken>, TokenizerState]} */
|
|
406
351
|
const eofStateOp = state => input => [[{kind: 'error', message: 'eof'}], state]
|
|
407
352
|
|
|
408
|
-
/** @type {operator.StateScan<
|
|
409
|
-
const
|
|
410
|
-
|
|
353
|
+
/** @type {operator.StateScan<number, TokenizerState, list.List<JsonToken>>} */
|
|
354
|
+
const tokenizeCharCodeOp = state => {
|
|
355
|
+
switch(state.kind)
|
|
356
|
+
{
|
|
357
|
+
case 'initial': return initialStateOp(state)
|
|
358
|
+
case 'keyword': return parseKeyWordStateOp(state)
|
|
359
|
+
case 'string': return parseStringStateOp(state)
|
|
360
|
+
case 'escapeChar': return parseEscapeCharStateOp(state)
|
|
361
|
+
case 'unicodeChar': return parseUnicodeCharStateOp(state)
|
|
362
|
+
case 'invalidNumber': return invalidNumberStateOp(state)
|
|
363
|
+
case 'number': return parseNumberStateOp(state)
|
|
364
|
+
case 'eof': return eofStateOp(state)
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
/** @type {(state: TokenizerState) => readonly[list.List<JsonToken>, TokenizerState]} */
|
|
369
|
+
const tokenizeEofOp = state => {
|
|
411
370
|
switch(state.kind)
|
|
412
371
|
{
|
|
413
|
-
case 'initial': return
|
|
414
|
-
case 'keyword': return
|
|
415
|
-
case 'string':
|
|
416
|
-
case 'escapeChar':
|
|
417
|
-
case 'unicodeChar': return
|
|
418
|
-
case 'invalidNumber': return
|
|
419
|
-
case 'number'
|
|
420
|
-
|
|
372
|
+
case 'initial': return[undefined, {kind: 'eof'}]
|
|
373
|
+
case 'keyword': return [[stringToKeywordToken(state.value)], {kind: 'eof'}]
|
|
374
|
+
case 'string':
|
|
375
|
+
case 'escapeChar':
|
|
376
|
+
case 'unicodeChar': return [[{kind: 'error', message: '" are missing'}], {kind: 'eof'}]
|
|
377
|
+
case 'invalidNumber': return [[{kind: 'error', message: 'invalid number'}], {kind: 'eof'}]
|
|
378
|
+
case 'number':
|
|
379
|
+
switch (state.numberKind)
|
|
380
|
+
{
|
|
381
|
+
case '-':
|
|
382
|
+
case '.':
|
|
383
|
+
case 'e':
|
|
384
|
+
case 'e+':
|
|
385
|
+
case 'e-': return [[{kind: 'error', message: 'invalid number'}], {kind: 'invalidNumber', }]
|
|
386
|
+
default: return [[{kind: 'number', value: state.value}], {kind: 'eof'}]
|
|
387
|
+
}
|
|
388
|
+
case 'eof': return [[{kind: 'error', message: 'eof'}], state]
|
|
421
389
|
}
|
|
422
390
|
}
|
|
423
391
|
|
|
424
|
-
/** @type {
|
|
392
|
+
/** @type {operator.StateScan<CharCodeOrEof, TokenizerState, list.List<JsonToken>>} */
|
|
393
|
+
const tokenizeOp = state => input => input === undefined ? tokenizeEofOp(state) : tokenizeCharCodeOp(state)(input)
|
|
394
|
+
|
|
395
|
+
/** @type {(input: list.List<CharCodeOrEof>) => list.List<JsonToken>} */
|
|
425
396
|
const tokenize = input => list.flat(list.stateScan(tokenizeOp)({kind: 'initial'})(input))
|
|
426
397
|
|
|
427
398
|
module.exports = {
|
package/json/tokenizer/test.js
CHANGED
|
@@ -3,17 +3,17 @@ const list = require('../../types/list')
|
|
|
3
3
|
const json = require('..')
|
|
4
4
|
const { sort } = require('../../types/object')
|
|
5
5
|
|
|
6
|
-
/** @type {(s: string) => list.List<tokenizer.
|
|
6
|
+
/** @type {(s: string) => list.List<tokenizer.CharCodeOrEof>} */
|
|
7
7
|
const toCharacters = s =>
|
|
8
8
|
{
|
|
9
|
-
/** @type {list.List<tokenizer.
|
|
9
|
+
/** @type {list.List<tokenizer.CharCodeOrEof>} */
|
|
10
10
|
const charCodes = list.toCharCodes(s)
|
|
11
11
|
return list.concat(charCodes)([undefined])
|
|
12
|
-
}
|
|
12
|
+
}
|
|
13
13
|
|
|
14
14
|
/** @type {(s: string) => readonly tokenizer.JsonToken[]} */
|
|
15
15
|
const tokenizeString = s =>
|
|
16
|
-
{
|
|
16
|
+
{
|
|
17
17
|
const characters = toCharacters(s)
|
|
18
18
|
return list.toArray(tokenizer.tokenize(characters))
|
|
19
19
|
}
|