@atproto/lex-json 0.0.15 → 0.0.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/dist/json-bytes-decoder.d.ts +24 -0
- package/dist/json-bytes-decoder.d.ts.map +1 -0
- package/dist/json-bytes-decoder.js +600 -0
- package/dist/json-bytes-decoder.js.map +1 -0
- package/dist/lex-json.d.ts +7 -3
- package/dist/lex-json.d.ts.map +1 -1
- package/dist/lex-json.js +20 -33
- package/dist/lex-json.js.map +1 -1
- package/package.json +1 -1
- package/src/json-bytes-decoder.bench.ts +252 -0
- package/src/json-bytes-decoder.test.ts +889 -0
- package/src/json-bytes-decoder.ts +672 -0
- package/src/lex-json.bench.ts +125 -0
- package/src/lex-json.test.ts +368 -0
- package/src/lex-json.ts +19 -33
- package/tsconfig.build.json +1 -1
- package/tsconfig.tests.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
# @atproto/lex-json
|
|
2
2
|
|
|
3
|
+
## 0.0.16
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- [#4836](https://github.com/bluesky-social/atproto/pull/4836) [`952354c`](https://github.com/bluesky-social/atproto/commit/952354c1dd458251f8b643d02f4b227d40c5df17) Thanks [@matthieusieben](https://github.com/matthieusieben)! - Improve performances of `lexParseJsonBytes` function
|
|
8
|
+
|
|
3
9
|
## 0.0.15
|
|
4
10
|
|
|
5
11
|
### Patch Changes
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import { LexValue } from '@atproto/lex-data';
|
|
2
|
+
export declare const BASE64_NATIVE_THRESHOLD = 256;
|
|
3
|
+
export declare class JsonBytesDecoder {
|
|
4
|
+
private readonly data;
|
|
5
|
+
private readonly strict;
|
|
6
|
+
private pos;
|
|
7
|
+
constructor(data: Uint8Array, strict?: boolean);
|
|
8
|
+
decode(): LexValue;
|
|
9
|
+
private parseValue;
|
|
10
|
+
private parseObject;
|
|
11
|
+
private parseArray;
|
|
12
|
+
private parseString;
|
|
13
|
+
private parseEscapeSequence;
|
|
14
|
+
private parseUnicodeEscape;
|
|
15
|
+
private hexValue;
|
|
16
|
+
private base64Value;
|
|
17
|
+
private decodeUnescapedString;
|
|
18
|
+
private parseNumber;
|
|
19
|
+
private parseTrue;
|
|
20
|
+
private parseFalse;
|
|
21
|
+
private parseNull;
|
|
22
|
+
private skipWhitespace;
|
|
23
|
+
}
|
|
24
|
+
//# sourceMappingURL=json-bytes-decoder.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"json-bytes-decoder.d.ts","sourceRoot":"","sources":["../src/json-bytes-decoder.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAwB,MAAM,mBAAmB,CAAA;AAkElE,eAAO,MAAM,uBAAuB,MAAM,CAAA;AAE1C,qBAAa,gBAAgB;IAIzB,OAAO,CAAC,QAAQ,CAAC,IAAI;IACrB,OAAO,CAAC,QAAQ,CAAC,MAAM;IAJzB,OAAO,CAAC,GAAG,CAAI;gBAGI,IAAI,EAAE,UAAU,EAChB,MAAM,UAAO;IAGhC,MAAM,IAAI,QAAQ;IAYlB,OAAO,CAAC,UAAU;IAuBlB,OAAO,CAAC,WAAW;IA4LnB,OAAO,CAAC,UAAU;IA+BlB,OAAO,CAAC,WAAW;IA+DnB,OAAO,CAAC,mBAAmB;IA2B3B,OAAO,CAAC,kBAAkB;IAwC1B,OAAO,CAAC,QAAQ;IAMhB,OAAO,CAAC,WAAW;IAQnB,OAAO,CAAC,qBAAqB;IAkC7B,OAAO,CAAC,WAAW;IAsGnB,OAAO,CAAC,SAAS;IAcjB,OAAO,CAAC,UAAU;IAelB,OAAO,CAAC,SAAS;IAcjB,OAAO,CAAC,cAAc;CAkBvB"}
|
|
@@ -0,0 +1,600 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.JsonBytesDecoder = exports.BASE64_NATIVE_THRESHOLD = void 0;
|
|
4
|
+
const lex_data_1 = require("@atproto/lex-data");
|
|
5
|
+
const blob_js_1 = require("./blob.js");
|
|
6
|
+
const CHAR_TAB = 0x09;
|
|
7
|
+
const CHAR_NEWLINE = 0x0a;
|
|
8
|
+
const CHAR_CARRIAGE_RETURN = 0x0d;
|
|
9
|
+
const CHAR_SPACE = 0x20;
|
|
10
|
+
const CHAR_DOUBLE_QUOTE = 0x22;
|
|
11
|
+
const CHAR_PLUS = 0x2b;
|
|
12
|
+
const CHAR_COMMA = 0x2c;
|
|
13
|
+
const CHAR_MINUS = 0x2d;
|
|
14
|
+
const CHAR_PERIOD = 0x2e;
|
|
15
|
+
const CHAR_SLASH = 0x2f;
|
|
16
|
+
const CHAR_DIGIT_0 = 0x30;
|
|
17
|
+
const CHAR_DIGIT_1 = 0x31;
|
|
18
|
+
const CHAR_DIGIT_9 = 0x39;
|
|
19
|
+
const CHAR_COLON = 0x3a;
|
|
20
|
+
const CHAR_EQUAL = 0x3d;
|
|
21
|
+
const CHAR_UPPER_A = 0x41;
|
|
22
|
+
const CHAR_UPPER_E = 0x45;
|
|
23
|
+
const CHAR_UPPER_F = 0x46;
|
|
24
|
+
const CHAR_UPPER_Z = 0x5a;
|
|
25
|
+
const CHAR_LEFT_BRACKET = 0x5b;
|
|
26
|
+
const CHAR_BACKSLASH = 0x5c;
|
|
27
|
+
const CHAR_RIGHT_BRACKET = 0x5d;
|
|
28
|
+
const CHAR_UNDERSCORE = 0x5f;
|
|
29
|
+
const CHAR_DOLLAR = 0x24;
|
|
30
|
+
const CHAR_LOWER_A = 0x61;
|
|
31
|
+
const CHAR_LOWER_B = 0x62;
|
|
32
|
+
const CHAR_LOWER_E = 0x65;
|
|
33
|
+
const CHAR_LOWER_F = 0x66;
|
|
34
|
+
const CHAR_LOWER_L = 0x6c;
|
|
35
|
+
const CHAR_LOWER_N = 0x6e;
|
|
36
|
+
const CHAR_LOWER_R = 0x72;
|
|
37
|
+
const CHAR_LOWER_S = 0x73;
|
|
38
|
+
const CHAR_LOWER_T = 0x74;
|
|
39
|
+
const CHAR_LOWER_U = 0x75;
|
|
40
|
+
const CHAR_LOWER_Z = 0x7a;
|
|
41
|
+
const CHAR_LEFT_BRACE = 0x7b;
|
|
42
|
+
const CHAR_RIGHT_BRACE = 0x7d;
|
|
43
|
+
const DECODER = new TextDecoder('utf-8', { fatal: true });
|
|
44
|
+
const BASE64_LOOKUP = new Int8Array(256);
|
|
45
|
+
BASE64_LOOKUP.fill(-1);
|
|
46
|
+
for (let i = CHAR_UPPER_A; i <= CHAR_UPPER_Z; i++)
|
|
47
|
+
BASE64_LOOKUP[i] = i - CHAR_UPPER_A;
|
|
48
|
+
for (let i = CHAR_LOWER_A; i <= CHAR_LOWER_Z; i++)
|
|
49
|
+
BASE64_LOOKUP[i] = i - CHAR_LOWER_A + 26;
|
|
50
|
+
for (let i = CHAR_DIGIT_0; i <= CHAR_DIGIT_9; i++)
|
|
51
|
+
BASE64_LOOKUP[i] = i - CHAR_DIGIT_0 + 52;
|
|
52
|
+
BASE64_LOOKUP[CHAR_PLUS] = 62;
|
|
53
|
+
BASE64_LOOKUP[CHAR_MINUS] = 62;
|
|
54
|
+
BASE64_LOOKUP[CHAR_SLASH] = 63;
|
|
55
|
+
BASE64_LOOKUP[CHAR_UNDERSCORE] = 63;
|
|
56
|
+
const HEX_LOOKUP = new Int8Array(256);
|
|
57
|
+
HEX_LOOKUP.fill(-1);
|
|
58
|
+
for (let i = CHAR_DIGIT_0; i <= CHAR_DIGIT_9; i++)
|
|
59
|
+
HEX_LOOKUP[i] = i - CHAR_DIGIT_0;
|
|
60
|
+
for (let i = CHAR_UPPER_A; i <= CHAR_UPPER_F; i++)
|
|
61
|
+
HEX_LOOKUP[i] = i - CHAR_UPPER_A + 10;
|
|
62
|
+
for (let i = CHAR_LOWER_A; i <= CHAR_LOWER_F; i++)
|
|
63
|
+
HEX_LOOKUP[i] = i - CHAR_LOWER_A + 10;
|
|
64
|
+
// Thresholds for optimization heuristics
|
|
65
|
+
exports.BASE64_NATIVE_THRESHOLD = 256; // Use native decoding for base64 strings > this length
|
|
66
|
+
class JsonBytesDecoder {
|
|
67
|
+
constructor(data, strict = true) {
|
|
68
|
+
Object.defineProperty(this, "data", {
|
|
69
|
+
enumerable: true,
|
|
70
|
+
configurable: true,
|
|
71
|
+
writable: true,
|
|
72
|
+
value: data
|
|
73
|
+
});
|
|
74
|
+
Object.defineProperty(this, "strict", {
|
|
75
|
+
enumerable: true,
|
|
76
|
+
configurable: true,
|
|
77
|
+
writable: true,
|
|
78
|
+
value: strict
|
|
79
|
+
});
|
|
80
|
+
Object.defineProperty(this, "pos", {
|
|
81
|
+
enumerable: true,
|
|
82
|
+
configurable: true,
|
|
83
|
+
writable: true,
|
|
84
|
+
value: 0
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
decode() {
|
|
88
|
+
this.skipWhitespace();
|
|
89
|
+
const value = this.parseValue();
|
|
90
|
+
this.skipWhitespace();
|
|
91
|
+
if (this.pos < this.data.length) {
|
|
92
|
+
throw new TypeError(`Unexpected data after JSON at position ${this.pos}`);
|
|
93
|
+
}
|
|
94
|
+
return value;
|
|
95
|
+
}
|
|
96
|
+
parseValue() {
|
|
97
|
+
const ch = this.data[this.pos];
|
|
98
|
+
// Optimize by checking most common value types first
|
|
99
|
+
// Strings and objects are very common in real JSON
|
|
100
|
+
if (ch === CHAR_DOUBLE_QUOTE) {
|
|
101
|
+
return this.parseString();
|
|
102
|
+
}
|
|
103
|
+
else if (ch === CHAR_LEFT_BRACE) {
|
|
104
|
+
return this.parseObject();
|
|
105
|
+
}
|
|
106
|
+
else if (ch === CHAR_LEFT_BRACKET) {
|
|
107
|
+
return this.parseArray();
|
|
108
|
+
}
|
|
109
|
+
else if (ch === CHAR_LOWER_T) {
|
|
110
|
+
return this.parseTrue();
|
|
111
|
+
}
|
|
112
|
+
else if (ch === CHAR_LOWER_F) {
|
|
113
|
+
return this.parseFalse();
|
|
114
|
+
}
|
|
115
|
+
else if (ch === CHAR_LOWER_N) {
|
|
116
|
+
return this.parseNull();
|
|
117
|
+
}
|
|
118
|
+
else {
|
|
119
|
+
// Fallback for unexpected input
|
|
120
|
+
return this.parseNumber();
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
parseObject() {
|
|
124
|
+
this.pos++; // skip '{'
|
|
125
|
+
this.skipWhitespace();
|
|
126
|
+
// Check for empty object
|
|
127
|
+
if (this.data[this.pos] === CHAR_RIGHT_BRACE) {
|
|
128
|
+
this.pos++;
|
|
129
|
+
return {};
|
|
130
|
+
}
|
|
131
|
+
let obj;
|
|
132
|
+
let hasDollarKey = false; // Track if we've seen any $ key for validation
|
|
133
|
+
for (let i = 0;; i++) {
|
|
134
|
+
this.skipWhitespace();
|
|
135
|
+
// Parse key
|
|
136
|
+
if (this.data[this.pos] !== CHAR_DOUBLE_QUOTE) {
|
|
137
|
+
throw new TypeError(`Expected string key at position ${this.pos}`);
|
|
138
|
+
}
|
|
139
|
+
// Track special keys for later validation
|
|
140
|
+
if (this.data[this.pos + 1] === CHAR_DOLLAR) {
|
|
141
|
+
hasDollarKey = true;
|
|
142
|
+
}
|
|
143
|
+
const key = this.parseString();
|
|
144
|
+
// Prevent prototype pollution
|
|
145
|
+
if (key === '__proto__') {
|
|
146
|
+
throw new TypeError('JSON object keys cannot be "__proto__"');
|
|
147
|
+
}
|
|
148
|
+
this.skipWhitespace();
|
|
149
|
+
// Parse colon
|
|
150
|
+
if (this.data[this.pos] !== CHAR_COLON) {
|
|
151
|
+
throw new TypeError(`Expected ':' at position ${this.pos}`);
|
|
152
|
+
}
|
|
153
|
+
this.pos++;
|
|
154
|
+
this.skipWhitespace();
|
|
155
|
+
// Parse $bytes or $link if it's the first and only key
|
|
156
|
+
if (i === 0) {
|
|
157
|
+
if (key === '$bytes' && this.data[this.pos] === CHAR_DOUBLE_QUOTE) {
|
|
158
|
+
const initialPos = this.pos;
|
|
159
|
+
const b64Start = initialPos + 1;
|
|
160
|
+
const b64End = this.data.indexOf(CHAR_DOUBLE_QUOTE, b64Start);
|
|
161
|
+
if (b64End !== -1) {
|
|
162
|
+
this.pos = b64End + 1;
|
|
163
|
+
this.skipWhitespace();
|
|
164
|
+
if (this.data[this.pos] === CHAR_RIGHT_BRACE) {
|
|
165
|
+
this.pos++;
|
|
166
|
+
const base64Len = b64End - b64Start;
|
|
167
|
+
try {
|
|
168
|
+
// Use native decoding for large base64 strings (much faster
|
|
169
|
+
// based on benchmarks)
|
|
170
|
+
if (base64Len > exports.BASE64_NATIVE_THRESHOLD) {
|
|
171
|
+
const b64Str = this.decodeUnescapedString(b64Start, b64End);
|
|
172
|
+
return (0, lex_data_1.fromBase64)(b64Str); // Validate and convert to LexValue bytes
|
|
173
|
+
}
|
|
174
|
+
// Manual decoding for smaller strings (optimized path)
|
|
175
|
+
// Skip padding characters
|
|
176
|
+
let b64EndNoPadding = b64End;
|
|
177
|
+
while (b64EndNoPadding > b64Start &&
|
|
178
|
+
this.data[b64EndNoPadding - 1] === CHAR_EQUAL) {
|
|
179
|
+
b64EndNoPadding--;
|
|
180
|
+
}
|
|
181
|
+
const base64LenNoPadding = b64EndNoPadding - b64Start;
|
|
182
|
+
const bytesLen = Math.floor((base64LenNoPadding * 3) / 4);
|
|
183
|
+
const result = new Uint8Array(bytesLen);
|
|
184
|
+
for (let i = b64Start, j = 0; i <= b64EndNoPadding - 4; i += 4) {
|
|
185
|
+
const chunk = (this.base64Value(this.data[i]) << 18) |
|
|
186
|
+
(this.base64Value(this.data[i + 1]) << 12) |
|
|
187
|
+
(this.base64Value(this.data[i + 2]) << 6) |
|
|
188
|
+
this.base64Value(this.data[i + 3]);
|
|
189
|
+
result[j++] = (chunk >> 16) & 0xff;
|
|
190
|
+
result[j++] = (chunk >> 8) & 0xff;
|
|
191
|
+
result[j++] = chunk & 0xff;
|
|
192
|
+
}
|
|
193
|
+
// Handle remaining characters (if any)
|
|
194
|
+
if (base64LenNoPadding % 4 === 2) {
|
|
195
|
+
const chunk = (this.base64Value(this.data[b64EndNoPadding - 2]) << 18) |
|
|
196
|
+
(this.base64Value(this.data[b64EndNoPadding - 1]) << 12);
|
|
197
|
+
result[bytesLen - 1] = (chunk >> 16) & 0xff;
|
|
198
|
+
}
|
|
199
|
+
else if (base64LenNoPadding % 4 === 3) {
|
|
200
|
+
const chunk = (this.base64Value(this.data[b64EndNoPadding - 3]) << 18) |
|
|
201
|
+
(this.base64Value(this.data[b64EndNoPadding - 2]) << 12) |
|
|
202
|
+
(this.base64Value(this.data[b64EndNoPadding - 1]) << 6);
|
|
203
|
+
result[bytesLen - 2] = (chunk >> 16) & 0xff;
|
|
204
|
+
result[bytesLen - 1] = (chunk >> 8) & 0xff;
|
|
205
|
+
}
|
|
206
|
+
return result;
|
|
207
|
+
}
|
|
208
|
+
catch (cause) {
|
|
209
|
+
if (this.strict) {
|
|
210
|
+
throw new TypeError('Invalid $bytes object', { cause });
|
|
211
|
+
}
|
|
212
|
+
// ignore and parse as regular object
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
this.pos = initialPos; // reset position to parse string properly
|
|
217
|
+
}
|
|
218
|
+
else if (key === '$link' &&
|
|
219
|
+
this.data[this.pos] === CHAR_DOUBLE_QUOTE) {
|
|
220
|
+
const initialPos = this.pos;
|
|
221
|
+
const cidStart = initialPos + 1;
|
|
222
|
+
const cidEnd = this.data.indexOf(CHAR_DOUBLE_QUOTE, cidStart);
|
|
223
|
+
if (cidEnd !== -1) {
|
|
224
|
+
this.pos = cidEnd + 1;
|
|
225
|
+
this.skipWhitespace();
|
|
226
|
+
if (this.data[this.pos] === CHAR_RIGHT_BRACE) {
|
|
227
|
+
this.pos++;
|
|
228
|
+
const cidStr = this.decodeUnescapedString(cidStart, cidEnd);
|
|
229
|
+
try {
|
|
230
|
+
return (0, lex_data_1.parseCid)(cidStr);
|
|
231
|
+
}
|
|
232
|
+
catch (cause) {
|
|
233
|
+
if (this.strict) {
|
|
234
|
+
throw new TypeError('Invalid $link object', { cause });
|
|
235
|
+
}
|
|
236
|
+
// ignore
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
this.pos = initialPos; // reset position to parse string properly
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
// Parse value
|
|
244
|
+
obj ?? (obj = {});
|
|
245
|
+
obj[key] = this.parseValue();
|
|
246
|
+
this.skipWhitespace();
|
|
247
|
+
const next = this.data[this.pos];
|
|
248
|
+
if (next === CHAR_RIGHT_BRACE) {
|
|
249
|
+
this.pos++;
|
|
250
|
+
break;
|
|
251
|
+
}
|
|
252
|
+
else if (next === CHAR_COMMA) {
|
|
253
|
+
this.pos++;
|
|
254
|
+
}
|
|
255
|
+
else {
|
|
256
|
+
throw new TypeError(`Expected ',' or '}' at position ${this.pos}`);
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
// In strict mode, validate special objects with extra keys
|
|
260
|
+
// Only check if we've seen a $ key (optimization)
|
|
261
|
+
if (hasDollarKey && this.strict) {
|
|
262
|
+
if (obj.$bytes !== undefined) {
|
|
263
|
+
throw new TypeError('Invalid $bytes object');
|
|
264
|
+
}
|
|
265
|
+
else if (obj.$link !== undefined) {
|
|
266
|
+
throw new TypeError('Invalid $link object');
|
|
267
|
+
}
|
|
268
|
+
else if (obj.$type === 'blob') {
|
|
269
|
+
const blob = (0, blob_js_1.parseTypedBlobRef)(obj, { strict: this.strict });
|
|
270
|
+
if (blob)
|
|
271
|
+
return blob;
|
|
272
|
+
throw new TypeError(`Invalid blob object`);
|
|
273
|
+
}
|
|
274
|
+
else if (obj.$type !== undefined) {
|
|
275
|
+
if (typeof obj.$type !== 'string') {
|
|
276
|
+
throw new TypeError(`Invalid $type property (${typeof obj.$type})`);
|
|
277
|
+
}
|
|
278
|
+
else if (obj.$type.length === 0) {
|
|
279
|
+
throw new TypeError(`Empty $type property`);
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
return obj;
|
|
284
|
+
}
|
|
285
|
+
parseArray() {
|
|
286
|
+
this.pos++; // skip '['
|
|
287
|
+
this.skipWhitespace();
|
|
288
|
+
const arr = [];
|
|
289
|
+
// Check for empty array
|
|
290
|
+
if (this.data[this.pos] === CHAR_RIGHT_BRACKET) {
|
|
291
|
+
this.pos++;
|
|
292
|
+
return arr;
|
|
293
|
+
}
|
|
294
|
+
for (;;) {
|
|
295
|
+
this.skipWhitespace();
|
|
296
|
+
arr.push(this.parseValue());
|
|
297
|
+
this.skipWhitespace();
|
|
298
|
+
const next = this.data[this.pos];
|
|
299
|
+
if (next === CHAR_RIGHT_BRACKET) {
|
|
300
|
+
this.pos++;
|
|
301
|
+
break;
|
|
302
|
+
}
|
|
303
|
+
else if (next === CHAR_COMMA) {
|
|
304
|
+
this.pos++;
|
|
305
|
+
}
|
|
306
|
+
else {
|
|
307
|
+
throw new TypeError(`Expected ',' or ']' at position ${this.pos}`);
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
return arr;
|
|
311
|
+
}
|
|
312
|
+
parseString() {
|
|
313
|
+
this.pos++; // skip opening quote
|
|
314
|
+
const start = this.pos;
|
|
315
|
+
// Fast path: scan for quote, checking for escapes and control chars inline
|
|
316
|
+
// Optimized for the common case of strings without escapes
|
|
317
|
+
let i = this.pos;
|
|
318
|
+
while (i < this.data.length) {
|
|
319
|
+
const ch = this.data[i];
|
|
320
|
+
if (ch === CHAR_DOUBLE_QUOTE) {
|
|
321
|
+
// Found end quote - fast path success
|
|
322
|
+
this.pos = i + 1;
|
|
323
|
+
return this.decodeUnescapedString(start, i);
|
|
324
|
+
}
|
|
325
|
+
else if (ch === CHAR_BACKSLASH) {
|
|
326
|
+
// Found escape or control character - need slow path
|
|
327
|
+
break;
|
|
328
|
+
}
|
|
329
|
+
else if (ch < 0x20) {
|
|
330
|
+
throw new TypeError(`Unescaped control character at position ${i}`);
|
|
331
|
+
}
|
|
332
|
+
i++;
|
|
333
|
+
}
|
|
334
|
+
// Slow path: handle escapes or control characters
|
|
335
|
+
if (i >= this.data.length) {
|
|
336
|
+
throw new TypeError('Unterminated string');
|
|
337
|
+
}
|
|
338
|
+
// We hit a backslash - need to process escape sequences
|
|
339
|
+
let result = '';
|
|
340
|
+
let segmentStart = start;
|
|
341
|
+
this.pos = i;
|
|
342
|
+
while (this.pos < this.data.length) {
|
|
343
|
+
const ch = this.data[this.pos];
|
|
344
|
+
if (ch === CHAR_DOUBLE_QUOTE) {
|
|
345
|
+
// Found end of string
|
|
346
|
+
if (segmentStart < this.pos) {
|
|
347
|
+
result += this.decodeUnescapedString(segmentStart, this.pos);
|
|
348
|
+
}
|
|
349
|
+
this.pos++;
|
|
350
|
+
return result;
|
|
351
|
+
}
|
|
352
|
+
else if (ch === CHAR_BACKSLASH) {
|
|
353
|
+
// Process escape sequence
|
|
354
|
+
if (segmentStart < this.pos) {
|
|
355
|
+
result += this.decodeUnescapedString(segmentStart, this.pos);
|
|
356
|
+
}
|
|
357
|
+
this.pos++; // skip backslash
|
|
358
|
+
result += this.parseEscapeSequence();
|
|
359
|
+
segmentStart = this.pos;
|
|
360
|
+
}
|
|
361
|
+
else if (ch < 0x20) {
|
|
362
|
+
throw new TypeError(`Unescaped control character at position ${this.pos}`);
|
|
363
|
+
}
|
|
364
|
+
else {
|
|
365
|
+
this.pos++;
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
throw new TypeError('Unterminated string');
|
|
369
|
+
}
|
|
370
|
+
parseEscapeSequence() {
|
|
371
|
+
const ch = this.data[this.pos++];
|
|
372
|
+
switch (ch) {
|
|
373
|
+
case CHAR_DOUBLE_QUOTE:
|
|
374
|
+
return '"';
|
|
375
|
+
case CHAR_BACKSLASH:
|
|
376
|
+
return '\\';
|
|
377
|
+
case CHAR_SLASH:
|
|
378
|
+
return '/';
|
|
379
|
+
case CHAR_LOWER_B:
|
|
380
|
+
return '\b';
|
|
381
|
+
case CHAR_LOWER_F:
|
|
382
|
+
return '\f';
|
|
383
|
+
case CHAR_LOWER_N:
|
|
384
|
+
return '\n';
|
|
385
|
+
case CHAR_LOWER_R:
|
|
386
|
+
return '\r';
|
|
387
|
+
case CHAR_LOWER_T:
|
|
388
|
+
return '\t';
|
|
389
|
+
case CHAR_LOWER_U:
|
|
390
|
+
return this.parseUnicodeEscape();
|
|
391
|
+
default:
|
|
392
|
+
throw new TypeError(`Invalid escape sequence at position ${this.pos}`);
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
parseUnicodeEscape() {
|
|
396
|
+
// Parse \uXXXX
|
|
397
|
+
let codePoint = 0;
|
|
398
|
+
for (let i = 0; i < 4; i++) {
|
|
399
|
+
const ch = this.data[this.pos++];
|
|
400
|
+
const hex = this.hexValue(ch);
|
|
401
|
+
codePoint = (codePoint << 4) | hex;
|
|
402
|
+
}
|
|
403
|
+
// Handle surrogate pairs
|
|
404
|
+
if (codePoint >= 0xd800 && codePoint <= 0xdbff) {
|
|
405
|
+
// High surrogate, check if followed by low surrogate
|
|
406
|
+
if (this.pos + 5 < this.data.length &&
|
|
407
|
+
this.data[this.pos] === CHAR_BACKSLASH &&
|
|
408
|
+
this.data[this.pos + 1] === CHAR_LOWER_U) {
|
|
409
|
+
// Save position in case we need to backtrack
|
|
410
|
+
const savedPos = this.pos;
|
|
411
|
+
this.pos += 2;
|
|
412
|
+
let low = 0;
|
|
413
|
+
for (let i = 0; i < 4; i++) {
|
|
414
|
+
const ch = this.data[this.pos++];
|
|
415
|
+
const hex = this.hexValue(ch);
|
|
416
|
+
low = (low << 4) | hex;
|
|
417
|
+
}
|
|
418
|
+
// Check if it's a valid low surrogate
|
|
419
|
+
if (low >= 0xdc00 && low <= 0xdfff) {
|
|
420
|
+
// Valid pair - combine into single codepoint
|
|
421
|
+
codePoint = 0x10000 + ((codePoint - 0xd800) << 10) + (low - 0xdc00);
|
|
422
|
+
}
|
|
423
|
+
else {
|
|
424
|
+
// Not a low surrogate - backtrack so it gets processed separately
|
|
425
|
+
this.pos = savedPos;
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
return String.fromCodePoint(codePoint);
|
|
430
|
+
}
|
|
431
|
+
hexValue(ch) {
|
|
432
|
+
const value = HEX_LOOKUP[ch];
|
|
433
|
+
if (value !== -1)
|
|
434
|
+
return value;
|
|
435
|
+
throw new TypeError(`Invalid unicode escape at position ${this.pos}`);
|
|
436
|
+
}
|
|
437
|
+
base64Value(ch) {
|
|
438
|
+
const value = BASE64_LOOKUP[ch];
|
|
439
|
+
if (value !== -1)
|
|
440
|
+
return value;
|
|
441
|
+
throw new TypeError(`Invalid base64 character: ${String.fromCharCode(ch)} at position ${this.pos}`);
|
|
442
|
+
}
|
|
443
|
+
decodeUnescapedString(start, end) {
|
|
444
|
+
const len = end - start;
|
|
445
|
+
if (len === 0)
|
|
446
|
+
return '';
|
|
447
|
+
// Fast path for very short ASCII strings (common for object keys like "id", "name", etc.)
|
|
448
|
+
// Heuristic: only worth it for strings <= 20 chars where String.fromCharCode is faster
|
|
449
|
+
// This is a hot path for object keys
|
|
450
|
+
if (len <= 20) {
|
|
451
|
+
let result = '';
|
|
452
|
+
for (let i = start; i < end; i++) {
|
|
453
|
+
const byte = this.data[i];
|
|
454
|
+
if (byte > 0x7f) {
|
|
455
|
+
// Hit non-ASCII, fall back to TextDecoder for full UTF-8 decoding
|
|
456
|
+
const subView = new Uint8Array(this.data.buffer, this.data.byteOffset + start, len);
|
|
457
|
+
return DECODER.decode(subView);
|
|
458
|
+
}
|
|
459
|
+
result += String.fromCharCode(byte);
|
|
460
|
+
}
|
|
461
|
+
return result;
|
|
462
|
+
}
|
|
463
|
+
// For longer strings, use utf8FromBytes directly (it's highly optimized)
|
|
464
|
+
const subView = new Uint8Array(this.data.buffer, this.data.byteOffset + start, len);
|
|
465
|
+
return DECODER.decode(subView);
|
|
466
|
+
}
|
|
467
|
+
parseNumber() {
|
|
468
|
+
const start = this.pos;
|
|
469
|
+
let sign = 1;
|
|
470
|
+
let int = 0;
|
|
471
|
+
let decimal = 0;
|
|
472
|
+
let expSign = 1;
|
|
473
|
+
let exp = 0;
|
|
474
|
+
// Parse sign
|
|
475
|
+
if (this.data[this.pos] === CHAR_MINUS) {
|
|
476
|
+
sign = -1;
|
|
477
|
+
this.pos++;
|
|
478
|
+
}
|
|
479
|
+
// Parse integer part
|
|
480
|
+
if (this.data[this.pos] === CHAR_DIGIT_0) {
|
|
481
|
+
this.pos++;
|
|
482
|
+
// Leading zero must be followed by decimal, exponent, or end
|
|
483
|
+
}
|
|
484
|
+
else if (
|
|
485
|
+
// Note: cannot start with "0"
|
|
486
|
+
this.data[this.pos] >= CHAR_DIGIT_1 &&
|
|
487
|
+
this.data[this.pos] <= CHAR_DIGIT_9) {
|
|
488
|
+
do {
|
|
489
|
+
int = int * 10 + (this.data[this.pos] - CHAR_DIGIT_0);
|
|
490
|
+
this.pos++;
|
|
491
|
+
} while (this.pos < this.data.length &&
|
|
492
|
+
this.data[this.pos] >= CHAR_DIGIT_0 &&
|
|
493
|
+
this.data[this.pos] <= CHAR_DIGIT_9);
|
|
494
|
+
}
|
|
495
|
+
else {
|
|
496
|
+
throw new TypeError(`Unexpected character at position ${this.pos}`);
|
|
497
|
+
}
|
|
498
|
+
// Strict mode validation is deferred until after decimal/exponent parsing
|
|
499
|
+
// so that we can include the complete number value in the error message.
|
|
500
|
+
// Parse decimal part
|
|
501
|
+
if (this.pos < this.data.length && this.data[this.pos] === CHAR_PERIOD) {
|
|
502
|
+
this.pos++;
|
|
503
|
+
if (this.pos >= this.data.length ||
|
|
504
|
+
this.data[this.pos] < CHAR_DIGIT_0 ||
|
|
505
|
+
this.data[this.pos] > CHAR_DIGIT_9) {
|
|
506
|
+
throw new TypeError(`Invalid number at position ${start}`);
|
|
507
|
+
}
|
|
508
|
+
let decimalPlace = 0.1;
|
|
509
|
+
do {
|
|
510
|
+
decimal += (this.data[this.pos] - CHAR_DIGIT_0) * decimalPlace;
|
|
511
|
+
decimalPlace *= 0.1;
|
|
512
|
+
this.pos++;
|
|
513
|
+
} while (this.pos < this.data.length &&
|
|
514
|
+
this.data[this.pos] >= CHAR_DIGIT_0 &&
|
|
515
|
+
this.data[this.pos] <= CHAR_DIGIT_9);
|
|
516
|
+
}
|
|
517
|
+
// Parse exponent part
|
|
518
|
+
if (this.pos < this.data.length &&
|
|
519
|
+
(this.data[this.pos] === CHAR_LOWER_E ||
|
|
520
|
+
this.data[this.pos] === CHAR_UPPER_E)) {
|
|
521
|
+
this.pos++;
|
|
522
|
+
if (this.pos < this.data.length &&
|
|
523
|
+
(this.data[this.pos] === CHAR_PLUS ||
|
|
524
|
+
this.data[this.pos] === CHAR_MINUS)) {
|
|
525
|
+
expSign = this.data[this.pos] === CHAR_MINUS ? -1 : 1;
|
|
526
|
+
this.pos++; // skip + or -
|
|
527
|
+
}
|
|
528
|
+
if (this.pos >= this.data.length ||
|
|
529
|
+
this.data[this.pos] < CHAR_DIGIT_0 ||
|
|
530
|
+
this.data[this.pos] > CHAR_DIGIT_9) {
|
|
531
|
+
throw new TypeError(`Invalid number at position ${start}`);
|
|
532
|
+
}
|
|
533
|
+
do {
|
|
534
|
+
exp = exp * 10 + (this.data[this.pos] - CHAR_DIGIT_0);
|
|
535
|
+
this.pos++;
|
|
536
|
+
} while (this.pos < this.data.length &&
|
|
537
|
+
this.data[this.pos] >= CHAR_DIGIT_0 &&
|
|
538
|
+
this.data[this.pos] <= CHAR_DIGIT_9);
|
|
539
|
+
}
|
|
540
|
+
const num = sign * (int + decimal) * Math.pow(10, expSign * exp);
|
|
541
|
+
if (this.strict && !Number.isSafeInteger(num)) {
|
|
542
|
+
throw new TypeError(`Invalid non-integer number: ${num}`);
|
|
543
|
+
}
|
|
544
|
+
return num;
|
|
545
|
+
}
|
|
546
|
+
parseTrue() {
|
|
547
|
+
if (this.pos + 4 <= this.data.length &&
|
|
548
|
+
this.data[this.pos] === CHAR_LOWER_T &&
|
|
549
|
+
this.data[this.pos + 1] === CHAR_LOWER_R &&
|
|
550
|
+
this.data[this.pos + 2] === CHAR_LOWER_U &&
|
|
551
|
+
this.data[this.pos + 3] === CHAR_LOWER_E) {
|
|
552
|
+
this.pos += 4;
|
|
553
|
+
return true;
|
|
554
|
+
}
|
|
555
|
+
throw new TypeError(`Unexpected token at position ${this.pos}`);
|
|
556
|
+
}
|
|
557
|
+
parseFalse() {
|
|
558
|
+
if (this.pos + 5 <= this.data.length &&
|
|
559
|
+
this.data[this.pos] === CHAR_LOWER_F &&
|
|
560
|
+
this.data[this.pos + 1] === CHAR_LOWER_A &&
|
|
561
|
+
this.data[this.pos + 2] === CHAR_LOWER_L &&
|
|
562
|
+
this.data[this.pos + 3] === CHAR_LOWER_S &&
|
|
563
|
+
this.data[this.pos + 4] === CHAR_LOWER_E) {
|
|
564
|
+
this.pos += 5;
|
|
565
|
+
return false;
|
|
566
|
+
}
|
|
567
|
+
throw new TypeError(`Unexpected token at position ${this.pos}`);
|
|
568
|
+
}
|
|
569
|
+
parseNull() {
|
|
570
|
+
if (this.pos + 4 <= this.data.length &&
|
|
571
|
+
this.data[this.pos] === CHAR_LOWER_N &&
|
|
572
|
+
this.data[this.pos + 1] === CHAR_LOWER_U &&
|
|
573
|
+
this.data[this.pos + 2] === CHAR_LOWER_L &&
|
|
574
|
+
this.data[this.pos + 3] === CHAR_LOWER_L) {
|
|
575
|
+
this.pos += 4;
|
|
576
|
+
return null;
|
|
577
|
+
}
|
|
578
|
+
throw new TypeError(`Unexpected token at position ${this.pos}`);
|
|
579
|
+
}
|
|
580
|
+
skipWhitespace() {
|
|
581
|
+
// Optimized: check most common case (space) first, and use <= for compact check
|
|
582
|
+
while (this.pos < this.data.length) {
|
|
583
|
+
const ch = this.data[this.pos];
|
|
584
|
+
// Optimize for the most common case: space (0x20)
|
|
585
|
+
if (ch === CHAR_SPACE) {
|
|
586
|
+
this.pos++;
|
|
587
|
+
}
|
|
588
|
+
else if (ch === CHAR_TAB ||
|
|
589
|
+
ch === CHAR_NEWLINE ||
|
|
590
|
+
ch === CHAR_CARRIAGE_RETURN) {
|
|
591
|
+
this.pos++;
|
|
592
|
+
}
|
|
593
|
+
else {
|
|
594
|
+
break;
|
|
595
|
+
}
|
|
596
|
+
}
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
exports.JsonBytesDecoder = JsonBytesDecoder;
|
|
600
|
+
//# sourceMappingURL=json-bytes-decoder.js.map
|