@typia/utils 12.0.0-dev.20260316 → 12.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,919 +1,919 @@
1
- import { DeepPartial, IJsonParseResult } from "@typia/interface";
2
-
3
- /**
4
- * Parse lenient JSON that may be incomplete or malformed.
5
- *
6
- * Handles:
7
- *
8
- * - Unclosed brackets `{`, `[` - parses as much as possible
9
- * - Trailing commas `[1, 2, ]` - ignores them
10
- * - Unclosed strings `"hello` - returns partial string
11
- * - Junk text before JSON (LLM often adds explanatory text)
12
- * - Markdown code blocks (extracts content from `json ... `)
13
- * - Incomplete keywords like `tru`, `fal`, `nul`
14
- * - Unicode escape sequences including surrogate pairs (emoji)
15
- * - JavaScript-style comments (single-line and multi-line)
16
- * - Unquoted object keys (JavaScript identifier style)
17
- *
18
- * @param input Raw JSON string (potentially incomplete)
19
- * @returns Parse result with data, original input, and any errors
20
- * @internal
21
- */
22
- export function parseLenientJson<T>(input: string): IJsonParseResult<T> {
23
- // For safe guard
24
- if (typeof input !== "string") input = String(input);
25
-
26
- // Try native JSON.parse first (faster for valid JSON)
27
- let error: Error | null = null;
28
- try {
29
- return {
30
- success: true,
31
- data: JSON.parse(input) as T,
32
- };
33
- } catch (e) {
34
- // Fall back to lenient parser
35
- error = e instanceof Error ? e : new Error(String(e));
36
- }
37
-
38
- try {
39
- return iterate(input);
40
- } catch {
41
- // actually unreachable, maybe?
42
- return {
43
- success: false,
44
- data: undefined as DeepPartial<T>,
45
- input,
46
- errors: [
47
- {
48
- path: "$input",
49
- expected: "valid JSON",
50
- description: error.message,
51
- },
52
- ],
53
- };
54
- }
55
- }
56
-
57
- function iterate<T>(input: string): IJsonParseResult<T> {
58
- // Extract markdown code block if present
59
- const codeBlockContent: string | null = extractMarkdownCodeBlock(input);
60
- const jsonSource: string =
61
- codeBlockContent !== null ? codeBlockContent : input;
62
-
63
- // Check if input is empty or whitespace-only
64
- const trimmed: string = jsonSource.trim();
65
- if (trimmed.length === 0) {
66
- return {
67
- success: false,
68
- data: undefined as DeepPartial<T>,
69
- input,
70
- errors: [
71
- {
72
- path: "$input",
73
- expected: "JSON value",
74
- description: "empty input",
75
- },
76
- ],
77
- };
78
- }
79
-
80
- // Check if input starts with a primitive value (no junk prefix skipping needed)
81
- if (startsWithPrimitive(trimmed)) {
82
- const errors: IJsonParseResult.IError[] = [];
83
- const parser: LenientJsonParser = new LenientJsonParser(jsonSource, errors);
84
- const data: unknown = parser.parse();
85
- if (errors.length > 0) {
86
- return { success: false, data: data as DeepPartial<T>, input, errors };
87
- }
88
- return { success: true, data: data as T };
89
- }
90
-
91
- // Find JSON start position (skip junk prefix from LLM)
92
- const jsonStart: number = findJsonStart(jsonSource);
93
- if (jsonStart === -1) {
94
- // No object/array found - check if there's a primitive after skipping comments
95
- const skipped: string = skipCommentsAndWhitespace(jsonSource);
96
- if (skipped.length > 0 && startsWithPrimitive(skipped)) {
97
- const errors: IJsonParseResult.IError[] = [];
98
- const parser: LenientJsonParser = new LenientJsonParser(
99
- jsonSource,
100
- errors,
101
- );
102
- const data: unknown = parser.parse();
103
- if (errors.length > 0) {
104
- return { success: false, data: data as DeepPartial<T>, input, errors };
105
- }
106
- return { success: true, data: data as T };
107
- }
108
- // No valid JSON found - return failure
109
- return {
110
- success: false,
111
- data: undefined as DeepPartial<T>,
112
- input,
113
- errors: [
114
- {
115
- path: "$input",
116
- expected: "JSON value",
117
- description: jsonSource,
118
- },
119
- ],
120
- };
121
- }
122
-
123
- // Extract JSON portion (skip junk prefix)
124
- const jsonInput: string =
125
- jsonStart > 0 ? jsonSource.slice(jsonStart) : jsonSource;
126
-
127
- const errors: IJsonParseResult.IError[] = [];
128
- const parser: LenientJsonParser = new LenientJsonParser(jsonInput, errors);
129
- const data: unknown = parser.parse();
130
-
131
- if (errors.length > 0) {
132
- return {
133
- success: false,
134
- data: data as DeepPartial<T>,
135
- input,
136
- errors,
137
- };
138
- }
139
- return {
140
- success: true,
141
- data: data as T,
142
- };
143
- }
144
-
145
- /**
146
- * Check if a string is a valid 4-character hexadecimal string.
147
- *
148
- * @internal
149
- */
150
- function isHexString(s: string): boolean {
151
- if (s.length !== 4) return false;
152
- for (let i = 0; i < 4; i++) {
153
- const c: number = s.charCodeAt(i);
154
- if (
155
- !((c >= 48 && c <= 57) || (c >= 65 && c <= 70) || (c >= 97 && c <= 102))
156
- ) {
157
- return false;
158
- }
159
- }
160
- return true;
161
- }
162
-
163
- /**
164
- * Extract JSON content from markdown code block if present.
165
- *
166
- * LLM outputs often wrap JSON in markdown code blocks like:
167
- *
168
- * Here is your result:
169
- *
170
- * ```json
171
- * { "name": "test" }
172
- * ```
173
- *
174
- * This function extracts the content between the backticks.
175
- *
176
- * IMPORTANT: Only extracts if the input doesn't already start with JSON. If
177
- * input (after trim) starts with `{`, `[`, or `"`, it's already JSON and any
178
- * markdown inside is part of a string value.
179
- *
180
- * @param input Text that may contain markdown code block
181
- * @returns Extracted content or null if no code block found
182
- * @internal
183
- */
184
- function extractMarkdownCodeBlock(input: string): string | null {
185
- // Must be ```json specifically, not just ```
186
- const codeBlockStart: number = input.indexOf("```json");
187
- if (codeBlockStart === -1) return null;
188
-
189
- // Check if input already starts with JSON (after trimming whitespace)
190
- // If so, don't extract - the markdown is inside a JSON string value
191
- const trimmed: string = input.trimStart();
192
- if (trimmed.length > 0) {
193
- const firstChar: string = trimmed[0]!;
194
- if (firstChar === "{" || firstChar === "[" || firstChar === '"') {
195
- return null;
196
- }
197
- }
198
-
199
- // Find the end of the opening line (after ```json)
200
- let contentStart: number = codeBlockStart + 7; // length of "```json"
201
- while (contentStart < input.length && input[contentStart] !== "\n") {
202
- contentStart++;
203
- }
204
- if (contentStart >= input.length) return null;
205
- contentStart++; // skip the newline
206
-
207
- // Find the closing ```
208
- const codeBlockEnd: number = input.indexOf("```", contentStart);
209
- if (codeBlockEnd === -1) {
210
- // No closing ``` - return everything after opening
211
- return input.slice(contentStart);
212
- }
213
-
214
- return input.slice(contentStart, codeBlockEnd);
215
- }
216
-
217
- /**
218
- * Find the start position of JSON object/array content in text that may have
219
- * junk prefix.
220
- *
221
- * LLM outputs often contain text before JSON like:
222
- *
223
- * - "Here is your JSON: {"name": "test"}"
224
- * - "Sure! [1, 2, 3]"
225
- *
226
- * This function skips over comments and strings to find the real JSON start.
227
- * Primitive values (strings, numbers, booleans) are handled directly by the
228
- * parser.
229
- *
230
- * @param input Text that may contain JSON with junk prefix
231
- * @returns Index of first `{` or `[` outside comments/strings, or -1 if not
232
- * found
233
- * @internal
234
- */
235
- function findJsonStart(input: string): number {
236
- let pos: number = 0;
237
- const len: number = input.length;
238
-
239
- while (pos < len) {
240
- const ch: string = input[pos]!;
241
-
242
- // Found JSON start
243
- if (ch === "{" || ch === "[") {
244
- return pos;
245
- }
246
-
247
- // Skip single-line comment
248
- if (ch === "/" && pos + 1 < len && input[pos + 1] === "/") {
249
- pos += 2;
250
- while (pos < len && input[pos] !== "\n" && input[pos] !== "\r") {
251
- pos++;
252
- }
253
- continue;
254
- }
255
-
256
- // Skip multi-line comment
257
- if (ch === "/" && pos + 1 < len && input[pos + 1] === "*") {
258
- pos += 2;
259
- while (pos + 1 < len) {
260
- if (input[pos] === "*" && input[pos + 1] === "/") {
261
- pos += 2;
262
- break;
263
- }
264
- pos++;
265
- }
266
- // If unclosed comment, move to end
267
- if (pos + 1 >= len) {
268
- pos = len;
269
- }
270
- continue;
271
- }
272
-
273
- // Skip string literal (to avoid matching { or [ inside strings)
274
- if (ch === '"') {
275
- pos++;
276
- while (pos < len) {
277
- if (input[pos] === "\\") {
278
- pos += 2; // skip escape sequence
279
- continue;
280
- }
281
- if (input[pos] === '"') {
282
- pos++;
283
- break;
284
- }
285
- pos++;
286
- }
287
- continue;
288
- }
289
-
290
- pos++;
291
- }
292
-
293
- return -1;
294
- }
295
-
296
- /**
297
- * Skip leading comments and whitespace from input.
298
- *
299
- * @param input Text that may start with comments or whitespace
300
- * @returns Input with leading comments and whitespace removed
301
- * @internal
302
- */
303
- function skipCommentsAndWhitespace(input: string): string {
304
- let pos: number = 0;
305
- const len: number = input.length;
306
-
307
- while (pos < len) {
308
- const ch: string = input[pos]!;
309
-
310
- // Skip whitespace
311
- if (ch === " " || ch === "\t" || ch === "\n" || ch === "\r") {
312
- pos++;
313
- continue;
314
- }
315
-
316
- // Skip single-line comment
317
- if (ch === "/" && pos + 1 < len && input[pos + 1] === "/") {
318
- pos += 2;
319
- while (pos < len && input[pos] !== "\n" && input[pos] !== "\r") {
320
- pos++;
321
- }
322
- continue;
323
- }
324
-
325
- // Skip multi-line comment
326
- if (ch === "/" && pos + 1 < len && input[pos + 1] === "*") {
327
- pos += 2;
328
- while (pos + 1 < len) {
329
- if (input[pos] === "*" && input[pos + 1] === "/") {
330
- pos += 2;
331
- break;
332
- }
333
- pos++;
334
- }
335
- if (pos + 1 >= len) {
336
- pos = len;
337
- }
338
- continue;
339
- }
340
-
341
- // Not whitespace or comment
342
- break;
343
- }
344
-
345
- return input.slice(pos);
346
- }
347
-
348
- /**
349
- * Check if input starts with a valid JSON primitive token.
350
- *
351
- * @param input Trimmed input string
352
- * @returns True if input starts with a primitive value
353
- * @internal
354
- */
355
- function startsWithPrimitive(input: string): boolean {
356
- if (input.length === 0) return false;
357
- const ch: string = input[0]!;
358
- // String
359
- if (ch === '"') return true;
360
- // Number (digit or minus)
361
- if ((ch >= "0" && ch <= "9") || ch === "-") return true;
362
- // Keywords
363
- if (
364
- input.startsWith("true") ||
365
- input.startsWith("false") ||
366
- input.startsWith("null")
367
- )
368
- return true;
369
- // Partial keywords (note: "null" requires at least 2 chars to match parseKeywordOrIdentifier logic)
370
- if (
371
- "true".startsWith(input) ||
372
- "false".startsWith(input) ||
373
- ("null".startsWith(input) && input.length >= 2)
374
- )
375
- return true;
376
- // Boolean string variants (note: "n" is intentionally excluded)
377
- const lower: string = input.toLowerCase();
378
- if (
379
- lower === "yes" ||
380
- lower === "y" ||
381
- lower === "on" ||
382
- lower === "no" ||
383
- lower === "off"
384
- )
385
- return true;
386
- return false;
387
- }
388
-
389
- /**
390
- * Lenient JSON parser that handles incomplete JSON.
391
- *
392
- * @internal
393
- */
394
- class LenientJsonParser {
395
- private pos: number = 0;
396
- private depth: number = 0;
397
- private readonly input: string;
398
- private readonly errors: IJsonParseResult.IError[];
399
-
400
- constructor(input: string, errors: IJsonParseResult.IError[]) {
401
- this.input = input;
402
- this.errors = errors;
403
- }
404
-
405
- parse(): unknown {
406
- this.skipWhitespace();
407
- if (this.pos >= this.input.length) {
408
- return undefined;
409
- }
410
- return this.parseValue("$input");
411
- }
412
-
413
- private parseValue(path: string): unknown {
414
- this.skipWhitespace();
415
-
416
- if (this.pos >= this.input.length) {
417
- return undefined;
418
- }
419
-
420
- // Check for maximum depth to prevent stack overflow
421
- if (this.depth >= MAX_DEPTH) {
422
- this.errors.push({
423
- path,
424
- expected: "value (max depth exceeded)",
425
- description: undefined,
426
- });
427
- return undefined;
428
- }
429
-
430
- const char: string = this.input[this.pos]!;
431
-
432
- if (char === "{") return this.parseObject(path);
433
- if (char === "[") return this.parseArray(path);
434
- if (char === '"') return this.parseString();
435
- if (char === "-" || (char >= "0" && char <= "9")) return this.parseNumber();
436
-
437
- // Handle keywords (true, false, null) or invalid identifiers
438
- if (this.isIdentifierStart(char)) {
439
- return this.parseKeywordOrIdentifier(path);
440
- }
441
-
442
- // Don't skip structural characters - let the caller handle them
443
- const ch: string = this.input[this.pos]!;
444
- if (ch === "}" || ch === "]" || ch === ",") {
445
- // Not an error - just no value here (e.g., {"a":} or [,])
446
- return undefined;
447
- }
448
-
449
- this.errors.push({
450
- path,
451
- expected: "JSON value",
452
- description: this.getErrorContext(),
453
- });
454
- // Skip the problematic character and try to continue
455
- this.pos++;
456
- return undefined;
457
- }
458
-
459
- private getErrorContext(): string {
460
- // Get surrounding context for better error messages
461
- const start: number = Math.max(0, this.pos - 10);
462
- const end: number = Math.min(this.input.length, this.pos + 20);
463
- const before: string = this.input.slice(start, this.pos);
464
- const after: string = this.input.slice(this.pos, end);
465
- return (
466
- (start > 0 ? "..." : "") +
467
- before +
468
- "→" +
469
- after +
470
- (end < this.input.length ? "..." : "")
471
- );
472
- }
473
-
474
- private parseKeywordOrIdentifier(path: string): unknown {
475
- // Extract the token (sequence of identifier characters)
476
- const start: number = this.pos;
477
- while (
478
- this.pos < this.input.length &&
479
- this.isIdentifierChar(this.input[this.pos]!)
480
- ) {
481
- this.pos++;
482
- }
483
- const token: string = this.input.slice(start, this.pos);
484
-
485
- // Check for complete or partial keyword matches
486
- if (token === "true") return true;
487
- if (token === "false") return false;
488
- if (token === "null") return null;
489
-
490
- // Boolean string coercion: "yes", "y", "on" -> true, "no", "off" -> false
491
- // Note: "n" is intentionally NOT handled (neither null nor false)
492
- const lower: string = token.toLowerCase();
493
- if (lower === "yes" || lower === "y" || lower === "on") return true;
494
- if (lower === "no" || lower === "off") return false;
495
-
496
- // Partial match for lenient parsing (e.g., "tru" -> true, "fal" -> false)
497
- if ("true".startsWith(token) && token.length > 0) return true;
498
- if ("false".startsWith(token) && token.length > 0) return false;
499
- if ("null".startsWith(token) && token.length >= 2) return null;
500
-
501
- // Check if this looks like a string with missing opening quote (e.g., abcdefg")
502
- if (this.pos < this.input.length && this.input[this.pos] === '"') {
503
- // Treat as unquoted string value - skip the errant closing quote and return as string
504
- this.pos++; // skip the closing quote
505
- this.errors.push({
506
- path,
507
- expected: "quoted string",
508
- description: "missing opening quote for '" + token + "'",
509
- });
510
- return token;
511
- }
512
-
513
- // Invalid identifier as value - provide helpful error message
514
- this.errors.push({
515
- path,
516
- expected: "JSON value (string, number, boolean, null, object, or array)",
517
- description: "unquoted string '" + token + "' - did you forget quotes?",
518
- });
519
- // Skip to next comma, closing brace/bracket for recovery
520
- this.skipToRecoveryPoint();
521
- return undefined;
522
- }
523
-
524
- private skipToRecoveryPoint(): void {
525
- while (this.pos < this.input.length) {
526
- const ch: string = this.input[this.pos]!;
527
- if (ch === "," || ch === "}" || ch === "]") {
528
- return;
529
- }
530
- this.pos++;
531
- }
532
- }
533
-
534
- private parseObject(path: string): Record<string, unknown> {
535
- const result: Record<string, unknown> = {};
536
- this.pos++; // skip '{'
537
- this.depth++;
538
- this.skipWhitespace();
539
-
540
- while (this.pos < this.input.length) {
541
- this.skipWhitespace();
542
-
543
- // Handle end of object or end of input
544
- if (this.pos >= this.input.length || this.input[this.pos] === "}") {
545
- if (this.pos < this.input.length) this.pos++; // skip '}'
546
- this.depth--;
547
- return result;
548
- }
549
-
550
- // Skip trailing comma
551
- if (this.input[this.pos] === ",") {
552
- this.pos++;
553
- this.skipWhitespace();
554
- continue;
555
- }
556
-
557
- // Parse key (quoted string or unquoted identifier)
558
- let key: string;
559
- if (this.input[this.pos] === '"') {
560
- key = this.parseString();
561
- } else if (this.isIdentifierStart(this.input[this.pos]!)) {
562
- key = this.parseIdentifier();
563
- } else {
564
- this.errors.push({
565
- path,
566
- expected: "string key",
567
- description: this.input[this.pos],
568
- });
569
- // Try to recover by skipping to next meaningful character
570
- this.depth--;
571
- return result;
572
- }
573
- if (typeof key !== "string") {
574
- this.depth--;
575
- return result;
576
- }
577
-
578
- this.skipWhitespace();
579
-
580
- // Expect colon - but if we're at end of input, it's just incomplete (not an error)
581
- if (this.pos >= this.input.length) {
582
- this.depth--;
583
- return result;
584
- }
585
- if (this.input[this.pos] !== ":") {
586
- this.errors.push({
587
- path: path + "." + key,
588
- expected: "':'",
589
- description: this.input[this.pos],
590
- });
591
- this.depth--;
592
- return result;
593
- }
594
- this.pos++; // skip ':'
595
-
596
- this.skipWhitespace();
597
-
598
- // Parse value
599
- if (this.pos >= this.input.length) {
600
- // No value - incomplete but not an error for lenient parsing
601
- this.depth--;
602
- return result;
603
- }
604
-
605
- const value: unknown = this.parseValue(path + "." + key);
606
- result[key] = value;
607
-
608
- this.skipWhitespace();
609
-
610
- // Handle comma or end
611
- if (this.pos < this.input.length && this.input[this.pos] === ",") {
612
- this.pos++;
613
- }
614
- }
615
-
616
- this.depth--;
617
- return result;
618
- }
619
-
620
- private parseArray(path: string): unknown[] {
621
- const result: unknown[] = [];
622
- this.pos++; // skip '['
623
- this.depth++;
624
- this.skipWhitespace();
625
-
626
- let index: number = 0;
627
- while (this.pos < this.input.length) {
628
- this.skipWhitespace();
629
-
630
- // Handle end of array or end of input
631
- if (this.pos >= this.input.length || this.input[this.pos] === "]") {
632
- if (this.pos < this.input.length) this.pos++; // skip ']'
633
- this.depth--;
634
- return result;
635
- }
636
-
637
- // Skip trailing comma
638
- if (this.input[this.pos] === ",") {
639
- this.pos++;
640
- this.skipWhitespace();
641
- continue;
642
- }
643
-
644
- // Parse value
645
- const prevPos: number = this.pos;
646
- const value: unknown = this.parseValue(path + "[" + index + "]");
647
-
648
- // Guard: if parseValue didn't advance, skip unexpected char to prevent infinite loop
649
- if (this.pos === prevPos && this.pos < this.input.length) {
650
- this.pos++;
651
- continue;
652
- }
653
-
654
- result.push(value);
655
- index++;
656
-
657
- this.skipWhitespace();
658
-
659
- // Handle comma or end
660
- if (this.pos < this.input.length && this.input[this.pos] === ",") {
661
- this.pos++;
662
- }
663
- }
664
-
665
- this.depth--;
666
- return result;
667
- }
668
-
669
- private parseString(): string {
670
- this.pos++; // skip opening '"'
671
- let result: string = "";
672
- let escaped: boolean = false;
673
-
674
- while (this.pos < this.input.length) {
675
- const char: string = this.input[this.pos]!;
676
-
677
- if (escaped) {
678
- switch (char) {
679
- case '"':
680
- result += '"';
681
- break;
682
- case "\\":
683
- result += "\\";
684
- break;
685
- case "/":
686
- result += "/";
687
- break;
688
- case "b":
689
- result += "\b";
690
- break;
691
- case "f":
692
- result += "\f";
693
- break;
694
- case "n":
695
- result += "\n";
696
- break;
697
- case "r":
698
- result += "\r";
699
- break;
700
- case "t":
701
- result += "\t";
702
- break;
703
- case "u":
704
- // Parse unicode escape
705
- if (this.pos + 4 <= this.input.length) {
706
- const hex: string = this.input.slice(this.pos + 1, this.pos + 5);
707
- if (isHexString(hex)) {
708
- const highCode: number = parseInt(hex, 16);
709
- this.pos += 4;
710
-
711
- // Check for surrogate pair (emoji and characters > U+FFFF)
712
- if (
713
- highCode >= 0xd800 &&
714
- highCode <= 0xdbff &&
715
- this.pos + 6 <= this.input.length &&
716
- this.input[this.pos + 1] === "\\" &&
717
- this.input[this.pos + 2] === "u"
718
- ) {
719
- const lowHex: string = this.input.slice(
720
- this.pos + 3,
721
- this.pos + 7,
722
- );
723
- if (isHexString(lowHex)) {
724
- const lowCode: number = parseInt(lowHex, 16);
725
- if (lowCode >= 0xdc00 && lowCode <= 0xdfff) {
726
- result += String.fromCharCode(highCode, lowCode);
727
- this.pos += 6;
728
- break;
729
- }
730
- }
731
- }
732
- result += String.fromCharCode(highCode);
733
- } else {
734
- // Invalid hex - preserve escape sequence literally
735
- result += "\\u" + hex;
736
- this.pos += 4;
737
- }
738
- } else {
739
- // Incomplete unicode escape - add partial sequence
740
- const partial: string = this.input.slice(this.pos + 1);
741
- result += "\\u" + partial;
742
- this.pos = this.input.length - 1;
743
- }
744
- break;
745
- default:
746
- result += char;
747
- }
748
- escaped = false;
749
- this.pos++;
750
- continue;
751
- }
752
-
753
- if (char === "\\") {
754
- escaped = true;
755
- this.pos++;
756
- continue;
757
- }
758
-
759
- if (char === '"') {
760
- this.pos++; // skip closing '"'
761
- return result;
762
- }
763
-
764
- result += char;
765
- this.pos++;
766
- }
767
-
768
- // Unclosed string - return what we have (lenient)
769
- return result;
770
- }
771
-
772
- private parseNumber(): number {
773
- const start: number = this.pos;
774
-
775
- // Handle negative sign
776
- if (this.input[this.pos] === "-") {
777
- this.pos++;
778
- }
779
-
780
- // Parse integer part
781
- while (
782
- this.pos < this.input.length &&
783
- this.input[this.pos]! >= "0" &&
784
- this.input[this.pos]! <= "9"
785
- ) {
786
- this.pos++;
787
- }
788
-
789
- // Parse decimal part
790
- if (this.pos < this.input.length && this.input[this.pos] === ".") {
791
- this.pos++;
792
- while (
793
- this.pos < this.input.length &&
794
- this.input[this.pos]! >= "0" &&
795
- this.input[this.pos]! <= "9"
796
- ) {
797
- this.pos++;
798
- }
799
- }
800
-
801
- // Parse exponent
802
- if (
803
- this.pos < this.input.length &&
804
- (this.input[this.pos] === "e" || this.input[this.pos] === "E")
805
- ) {
806
- this.pos++;
807
- if (
808
- this.pos < this.input.length &&
809
- (this.input[this.pos] === "+" || this.input[this.pos] === "-")
810
- ) {
811
- this.pos++;
812
- }
813
- while (
814
- this.pos < this.input.length &&
815
- this.input[this.pos]! >= "0" &&
816
- this.input[this.pos]! <= "9"
817
- ) {
818
- this.pos++;
819
- }
820
- }
821
-
822
- const numStr: string = this.input.slice(start, this.pos);
823
- const num: number = Number(numStr);
824
- return Number.isNaN(num) ? 0 : num;
825
- }
826
-
827
- private isIdentifierStart(ch: string): boolean {
828
- return (
829
- (ch >= "a" && ch <= "z") ||
830
- (ch >= "A" && ch <= "Z") ||
831
- ch === "_" ||
832
- ch === "$"
833
- );
834
- }
835
-
836
- private isIdentifierChar(ch: string): boolean {
837
- return (
838
- (ch >= "a" && ch <= "z") ||
839
- (ch >= "A" && ch <= "Z") ||
840
- (ch >= "0" && ch <= "9") ||
841
- ch === "_" ||
842
- ch === "$"
843
- );
844
- }
845
-
846
- private parseIdentifier(): string {
847
- const start: number = this.pos;
848
- while (
849
- this.pos < this.input.length &&
850
- this.isIdentifierChar(this.input[this.pos]!)
851
- ) {
852
- this.pos++;
853
- }
854
- return this.input.slice(start, this.pos);
855
- }
856
-
857
- private skipWhitespace(): void {
858
- while (this.pos < this.input.length) {
859
- const ch: string = this.input[this.pos]!;
860
-
861
- // Skip standard whitespace
862
- if (ch === " " || ch === "\t" || ch === "\n" || ch === "\r") {
863
- this.pos++;
864
- continue;
865
- }
866
-
867
- // Skip single-line comment: // ...
868
- if (
869
- ch === "/" &&
870
- this.pos + 1 < this.input.length &&
871
- this.input[this.pos + 1] === "/"
872
- ) {
873
- this.pos += 2;
874
- while (
875
- this.pos < this.input.length &&
876
- this.input[this.pos] !== "\n" &&
877
- this.input[this.pos] !== "\r"
878
- ) {
879
- this.pos++;
880
- }
881
- continue;
882
- }
883
-
884
- // Skip multi-line comment: /* ... */
885
- if (
886
- ch === "/" &&
887
- this.pos + 1 < this.input.length &&
888
- this.input[this.pos + 1] === "*"
889
- ) {
890
- this.pos += 2;
891
- while (this.pos + 1 < this.input.length) {
892
- if (
893
- this.input[this.pos] === "*" &&
894
- this.input[this.pos + 1] === "/"
895
- ) {
896
- this.pos += 2;
897
- break;
898
- }
899
- this.pos++;
900
- }
901
- // Handle unclosed comment - move to end
902
- if (this.pos + 1 >= this.input.length) {
903
- this.pos = this.input.length;
904
- }
905
- continue;
906
- }
907
-
908
- // Not whitespace or comment
909
- break;
910
- }
911
- }
912
- }
913
-
914
- /**
915
- * Maximum nesting depth to prevent stack overflow attacks.
916
- *
917
- * @internal
918
- */
919
- const MAX_DEPTH: number = 512;
1
+ import { DeepPartial, IJsonParseResult } from "@typia/interface";
2
+
3
+ /**
4
+ * Parse lenient JSON that may be incomplete or malformed.
5
+ *
6
+ * Handles:
7
+ *
8
+ * - Unclosed brackets `{`, `[` - parses as much as possible
9
+ * - Trailing commas `[1, 2, ]` - ignores them
10
+ * - Unclosed strings `"hello` - returns partial string
11
+ * - Junk text before JSON (LLM often adds explanatory text)
12
+ * - Markdown code blocks (extracts content from `json ... `)
13
+ * - Incomplete keywords like `tru`, `fal`, `nul`
14
+ * - Unicode escape sequences including surrogate pairs (emoji)
15
+ * - JavaScript-style comments (single-line and multi-line)
16
+ * - Unquoted object keys (JavaScript identifier style)
17
+ *
18
+ * @param input Raw JSON string (potentially incomplete)
19
+ * @returns Parse result with data, original input, and any errors
20
+ * @internal
21
+ */
22
+ export function parseLenientJson<T>(input: string): IJsonParseResult<T> {
23
+ // For safe guard
24
+ if (typeof input !== "string") input = String(input);
25
+
26
+ // Try native JSON.parse first (faster for valid JSON)
27
+ let error: Error | null = null;
28
+ try {
29
+ return {
30
+ success: true,
31
+ data: JSON.parse(input) as T,
32
+ };
33
+ } catch (e) {
34
+ // Fall back to lenient parser
35
+ error = e instanceof Error ? e : new Error(String(e));
36
+ }
37
+
38
+ try {
39
+ return iterate(input);
40
+ } catch {
41
+ // actually unreachable, maybe?
42
+ return {
43
+ success: false,
44
+ data: undefined as DeepPartial<T>,
45
+ input,
46
+ errors: [
47
+ {
48
+ path: "$input",
49
+ expected: "valid JSON",
50
+ description: error.message,
51
+ },
52
+ ],
53
+ };
54
+ }
55
+ }
56
+
57
+ function iterate<T>(input: string): IJsonParseResult<T> {
58
+ // Extract markdown code block if present
59
+ const codeBlockContent: string | null = extractMarkdownCodeBlock(input);
60
+ const jsonSource: string =
61
+ codeBlockContent !== null ? codeBlockContent : input;
62
+
63
+ // Check if input is empty or whitespace-only
64
+ const trimmed: string = jsonSource.trim();
65
+ if (trimmed.length === 0) {
66
+ return {
67
+ success: false,
68
+ data: undefined as DeepPartial<T>,
69
+ input,
70
+ errors: [
71
+ {
72
+ path: "$input",
73
+ expected: "JSON value",
74
+ description: "empty input",
75
+ },
76
+ ],
77
+ };
78
+ }
79
+
80
+ // Check if input starts with a primitive value (no junk prefix skipping needed)
81
+ if (startsWithPrimitive(trimmed)) {
82
+ const errors: IJsonParseResult.IError[] = [];
83
+ const parser: LenientJsonParser = new LenientJsonParser(jsonSource, errors);
84
+ const data: unknown = parser.parse();
85
+ if (errors.length > 0) {
86
+ return { success: false, data: data as DeepPartial<T>, input, errors };
87
+ }
88
+ return { success: true, data: data as T };
89
+ }
90
+
91
+ // Find JSON start position (skip junk prefix from LLM)
92
+ const jsonStart: number = findJsonStart(jsonSource);
93
+ if (jsonStart === -1) {
94
+ // No object/array found - check if there's a primitive after skipping comments
95
+ const skipped: string = skipCommentsAndWhitespace(jsonSource);
96
+ if (skipped.length > 0 && startsWithPrimitive(skipped)) {
97
+ const errors: IJsonParseResult.IError[] = [];
98
+ const parser: LenientJsonParser = new LenientJsonParser(
99
+ jsonSource,
100
+ errors,
101
+ );
102
+ const data: unknown = parser.parse();
103
+ if (errors.length > 0) {
104
+ return { success: false, data: data as DeepPartial<T>, input, errors };
105
+ }
106
+ return { success: true, data: data as T };
107
+ }
108
+ // No valid JSON found - return failure
109
+ return {
110
+ success: false,
111
+ data: undefined as DeepPartial<T>,
112
+ input,
113
+ errors: [
114
+ {
115
+ path: "$input",
116
+ expected: "JSON value",
117
+ description: jsonSource,
118
+ },
119
+ ],
120
+ };
121
+ }
122
+
123
+ // Extract JSON portion (skip junk prefix)
124
+ const jsonInput: string =
125
+ jsonStart > 0 ? jsonSource.slice(jsonStart) : jsonSource;
126
+
127
+ const errors: IJsonParseResult.IError[] = [];
128
+ const parser: LenientJsonParser = new LenientJsonParser(jsonInput, errors);
129
+ const data: unknown = parser.parse();
130
+
131
+ if (errors.length > 0) {
132
+ return {
133
+ success: false,
134
+ data: data as DeepPartial<T>,
135
+ input,
136
+ errors,
137
+ };
138
+ }
139
+ return {
140
+ success: true,
141
+ data: data as T,
142
+ };
143
+ }
144
+
145
+ /**
146
+ * Check if a string is a valid 4-character hexadecimal string.
147
+ *
148
+ * @internal
149
+ */
150
+ function isHexString(s: string): boolean {
151
+ if (s.length !== 4) return false;
152
+ for (let i = 0; i < 4; i++) {
153
+ const c: number = s.charCodeAt(i);
154
+ if (
155
+ !((c >= 48 && c <= 57) || (c >= 65 && c <= 70) || (c >= 97 && c <= 102))
156
+ ) {
157
+ return false;
158
+ }
159
+ }
160
+ return true;
161
+ }
162
+
163
+ /**
164
+ * Extract JSON content from markdown code block if present.
165
+ *
166
+ * LLM outputs often wrap JSON in markdown code blocks like:
167
+ *
168
+ * Here is your result:
169
+ *
170
+ * ```json
171
+ * { "name": "test" }
172
+ * ```
173
+ *
174
+ * This function extracts the content between the backticks.
175
+ *
176
+ * IMPORTANT: Only extracts if the input doesn't already start with JSON. If
177
+ * input (after trim) starts with `{`, `[`, or `"`, it's already JSON and any
178
+ * markdown inside is part of a string value.
179
+ *
180
+ * @param input Text that may contain markdown code block
181
+ * @returns Extracted content or null if no code block found
182
+ * @internal
183
+ */
184
+ function extractMarkdownCodeBlock(input: string): string | null {
185
+ // Must be ```json specifically, not just ```
186
+ const codeBlockStart: number = input.indexOf("```json");
187
+ if (codeBlockStart === -1) return null;
188
+
189
+ // Check if input already starts with JSON (after trimming whitespace)
190
+ // If so, don't extract - the markdown is inside a JSON string value
191
+ const trimmed: string = input.trimStart();
192
+ if (trimmed.length > 0) {
193
+ const firstChar: string = trimmed[0]!;
194
+ if (firstChar === "{" || firstChar === "[" || firstChar === '"') {
195
+ return null;
196
+ }
197
+ }
198
+
199
+ // Find the end of the opening line (after ```json)
200
+ let contentStart: number = codeBlockStart + 7; // length of "```json"
201
+ while (contentStart < input.length && input[contentStart] !== "\n") {
202
+ contentStart++;
203
+ }
204
+ if (contentStart >= input.length) return null;
205
+ contentStart++; // skip the newline
206
+
207
+ // Find the closing ```
208
+ const codeBlockEnd: number = input.indexOf("```", contentStart);
209
+ if (codeBlockEnd === -1) {
210
+ // No closing ``` - return everything after opening
211
+ return input.slice(contentStart);
212
+ }
213
+
214
+ return input.slice(contentStart, codeBlockEnd);
215
+ }
216
+
217
+ /**
218
+ * Find the start position of JSON object/array content in text that may have
219
+ * junk prefix.
220
+ *
221
+ * LLM outputs often contain text before JSON like:
222
+ *
223
+ * - "Here is your JSON: {"name": "test"}"
224
+ * - "Sure! [1, 2, 3]"
225
+ *
226
+ * This function skips over comments and strings to find the real JSON start.
227
+ * Primitive values (strings, numbers, booleans) are handled directly by the
228
+ * parser.
229
+ *
230
+ * @param input Text that may contain JSON with junk prefix
231
+ * @returns Index of first `{` or `[` outside comments/strings, or -1 if not
232
+ * found
233
+ * @internal
234
+ */
235
+ function findJsonStart(input: string): number {
236
+ let pos: number = 0;
237
+ const len: number = input.length;
238
+
239
+ while (pos < len) {
240
+ const ch: string = input[pos]!;
241
+
242
+ // Found JSON start
243
+ if (ch === "{" || ch === "[") {
244
+ return pos;
245
+ }
246
+
247
+ // Skip single-line comment
248
+ if (ch === "/" && pos + 1 < len && input[pos + 1] === "/") {
249
+ pos += 2;
250
+ while (pos < len && input[pos] !== "\n" && input[pos] !== "\r") {
251
+ pos++;
252
+ }
253
+ continue;
254
+ }
255
+
256
+ // Skip multi-line comment
257
+ if (ch === "/" && pos + 1 < len && input[pos + 1] === "*") {
258
+ pos += 2;
259
+ while (pos + 1 < len) {
260
+ if (input[pos] === "*" && input[pos + 1] === "/") {
261
+ pos += 2;
262
+ break;
263
+ }
264
+ pos++;
265
+ }
266
+ // If unclosed comment, move to end
267
+ if (pos + 1 >= len) {
268
+ pos = len;
269
+ }
270
+ continue;
271
+ }
272
+
273
+ // Skip string literal (to avoid matching { or [ inside strings)
274
+ if (ch === '"') {
275
+ pos++;
276
+ while (pos < len) {
277
+ if (input[pos] === "\\") {
278
+ pos += 2; // skip escape sequence
279
+ continue;
280
+ }
281
+ if (input[pos] === '"') {
282
+ pos++;
283
+ break;
284
+ }
285
+ pos++;
286
+ }
287
+ continue;
288
+ }
289
+
290
+ pos++;
291
+ }
292
+
293
+ return -1;
294
+ }
295
+
296
+ /**
297
+ * Skip leading comments and whitespace from input.
298
+ *
299
+ * @param input Text that may start with comments or whitespace
300
+ * @returns Input with leading comments and whitespace removed
301
+ * @internal
302
+ */
303
+ function skipCommentsAndWhitespace(input: string): string {
304
+ let pos: number = 0;
305
+ const len: number = input.length;
306
+
307
+ while (pos < len) {
308
+ const ch: string = input[pos]!;
309
+
310
+ // Skip whitespace
311
+ if (ch === " " || ch === "\t" || ch === "\n" || ch === "\r") {
312
+ pos++;
313
+ continue;
314
+ }
315
+
316
+ // Skip single-line comment
317
+ if (ch === "/" && pos + 1 < len && input[pos + 1] === "/") {
318
+ pos += 2;
319
+ while (pos < len && input[pos] !== "\n" && input[pos] !== "\r") {
320
+ pos++;
321
+ }
322
+ continue;
323
+ }
324
+
325
+ // Skip multi-line comment
326
+ if (ch === "/" && pos + 1 < len && input[pos + 1] === "*") {
327
+ pos += 2;
328
+ while (pos + 1 < len) {
329
+ if (input[pos] === "*" && input[pos + 1] === "/") {
330
+ pos += 2;
331
+ break;
332
+ }
333
+ pos++;
334
+ }
335
+ if (pos + 1 >= len) {
336
+ pos = len;
337
+ }
338
+ continue;
339
+ }
340
+
341
+ // Not whitespace or comment
342
+ break;
343
+ }
344
+
345
+ return input.slice(pos);
346
+ }
347
+
348
+ /**
349
+ * Check if input starts with a valid JSON primitive token.
350
+ *
351
+ * @param input Trimmed input string
352
+ * @returns True if input starts with a primitive value
353
+ * @internal
354
+ */
355
+ function startsWithPrimitive(input: string): boolean {
356
+ if (input.length === 0) return false;
357
+ const ch: string = input[0]!;
358
+ // String
359
+ if (ch === '"') return true;
360
+ // Number (digit or minus)
361
+ if ((ch >= "0" && ch <= "9") || ch === "-") return true;
362
+ // Keywords
363
+ if (
364
+ input.startsWith("true") ||
365
+ input.startsWith("false") ||
366
+ input.startsWith("null")
367
+ )
368
+ return true;
369
+ // Partial keywords (note: "null" requires at least 2 chars to match parseKeywordOrIdentifier logic)
370
+ if (
371
+ "true".startsWith(input) ||
372
+ "false".startsWith(input) ||
373
+ ("null".startsWith(input) && input.length >= 2)
374
+ )
375
+ return true;
376
+ // Boolean string variants (note: "n" is intentionally excluded)
377
+ const lower: string = input.toLowerCase();
378
+ if (
379
+ lower === "yes" ||
380
+ lower === "y" ||
381
+ lower === "on" ||
382
+ lower === "no" ||
383
+ lower === "off"
384
+ )
385
+ return true;
386
+ return false;
387
+ }
388
+
389
+ /**
390
+ * Lenient JSON parser that handles incomplete JSON.
391
+ *
392
+ * @internal
393
+ */
394
+ class LenientJsonParser {
395
+ private pos: number = 0;
396
+ private depth: number = 0;
397
+ private readonly input: string;
398
+ private readonly errors: IJsonParseResult.IError[];
399
+
400
+ constructor(input: string, errors: IJsonParseResult.IError[]) {
401
+ this.input = input;
402
+ this.errors = errors;
403
+ }
404
+
405
+ parse(): unknown {
406
+ this.skipWhitespace();
407
+ if (this.pos >= this.input.length) {
408
+ return undefined;
409
+ }
410
+ return this.parseValue("$input");
411
+ }
412
+
413
+ private parseValue(path: string): unknown {
414
+ this.skipWhitespace();
415
+
416
+ if (this.pos >= this.input.length) {
417
+ return undefined;
418
+ }
419
+
420
+ // Check for maximum depth to prevent stack overflow
421
+ if (this.depth >= MAX_DEPTH) {
422
+ this.errors.push({
423
+ path,
424
+ expected: "value (max depth exceeded)",
425
+ description: undefined,
426
+ });
427
+ return undefined;
428
+ }
429
+
430
+ const char: string = this.input[this.pos]!;
431
+
432
+ if (char === "{") return this.parseObject(path);
433
+ if (char === "[") return this.parseArray(path);
434
+ if (char === '"') return this.parseString();
435
+ if (char === "-" || (char >= "0" && char <= "9")) return this.parseNumber();
436
+
437
+ // Handle keywords (true, false, null) or invalid identifiers
438
+ if (this.isIdentifierStart(char)) {
439
+ return this.parseKeywordOrIdentifier(path);
440
+ }
441
+
442
+ // Don't skip structural characters - let the caller handle them
443
+ const ch: string = this.input[this.pos]!;
444
+ if (ch === "}" || ch === "]" || ch === ",") {
445
+ // Not an error - just no value here (e.g., {"a":} or [,])
446
+ return undefined;
447
+ }
448
+
449
+ this.errors.push({
450
+ path,
451
+ expected: "JSON value",
452
+ description: this.getErrorContext(),
453
+ });
454
+ // Skip the problematic character and try to continue
455
+ this.pos++;
456
+ return undefined;
457
+ }
458
+
459
+ private getErrorContext(): string {
460
+ // Get surrounding context for better error messages
461
+ const start: number = Math.max(0, this.pos - 10);
462
+ const end: number = Math.min(this.input.length, this.pos + 20);
463
+ const before: string = this.input.slice(start, this.pos);
464
+ const after: string = this.input.slice(this.pos, end);
465
+ return (
466
+ (start > 0 ? "..." : "") +
467
+ before +
468
+ "→" +
469
+ after +
470
+ (end < this.input.length ? "..." : "")
471
+ );
472
+ }
473
+
474
+ private parseKeywordOrIdentifier(path: string): unknown {
475
+ // Extract the token (sequence of identifier characters)
476
+ const start: number = this.pos;
477
+ while (
478
+ this.pos < this.input.length &&
479
+ this.isIdentifierChar(this.input[this.pos]!)
480
+ ) {
481
+ this.pos++;
482
+ }
483
+ const token: string = this.input.slice(start, this.pos);
484
+
485
+ // Check for complete or partial keyword matches
486
+ if (token === "true") return true;
487
+ if (token === "false") return false;
488
+ if (token === "null") return null;
489
+
490
+ // Boolean string coercion: "yes", "y", "on" -> true, "no", "off" -> false
491
+ // Note: "n" is intentionally NOT handled (neither null nor false)
492
+ const lower: string = token.toLowerCase();
493
+ if (lower === "yes" || lower === "y" || lower === "on") return true;
494
+ if (lower === "no" || lower === "off") return false;
495
+
496
+ // Partial match for lenient parsing (e.g., "tru" -> true, "fal" -> false)
497
+ if ("true".startsWith(token) && token.length > 0) return true;
498
+ if ("false".startsWith(token) && token.length > 0) return false;
499
+ if ("null".startsWith(token) && token.length >= 2) return null;
500
+
501
+ // Check if this looks like a string with missing opening quote (e.g., abcdefg")
502
+ if (this.pos < this.input.length && this.input[this.pos] === '"') {
503
+ // Treat as unquoted string value - skip the errant closing quote and return as string
504
+ this.pos++; // skip the closing quote
505
+ this.errors.push({
506
+ path,
507
+ expected: "quoted string",
508
+ description: "missing opening quote for '" + token + "'",
509
+ });
510
+ return token;
511
+ }
512
+
513
+ // Invalid identifier as value - provide helpful error message
514
+ this.errors.push({
515
+ path,
516
+ expected: "JSON value (string, number, boolean, null, object, or array)",
517
+ description: "unquoted string '" + token + "' - did you forget quotes?",
518
+ });
519
+ // Skip to next comma, closing brace/bracket for recovery
520
+ this.skipToRecoveryPoint();
521
+ return undefined;
522
+ }
523
+
524
+ private skipToRecoveryPoint(): void {
525
+ while (this.pos < this.input.length) {
526
+ const ch: string = this.input[this.pos]!;
527
+ if (ch === "," || ch === "}" || ch === "]") {
528
+ return;
529
+ }
530
+ this.pos++;
531
+ }
532
+ }
533
+
534
+ private parseObject(path: string): Record<string, unknown> {
535
+ const result: Record<string, unknown> = {};
536
+ this.pos++; // skip '{'
537
+ this.depth++;
538
+ this.skipWhitespace();
539
+
540
+ while (this.pos < this.input.length) {
541
+ this.skipWhitespace();
542
+
543
+ // Handle end of object or end of input
544
+ if (this.pos >= this.input.length || this.input[this.pos] === "}") {
545
+ if (this.pos < this.input.length) this.pos++; // skip '}'
546
+ this.depth--;
547
+ return result;
548
+ }
549
+
550
+ // Skip trailing comma
551
+ if (this.input[this.pos] === ",") {
552
+ this.pos++;
553
+ this.skipWhitespace();
554
+ continue;
555
+ }
556
+
557
+ // Parse key (quoted string or unquoted identifier)
558
+ let key: string;
559
+ if (this.input[this.pos] === '"') {
560
+ key = this.parseString();
561
+ } else if (this.isIdentifierStart(this.input[this.pos]!)) {
562
+ key = this.parseIdentifier();
563
+ } else {
564
+ this.errors.push({
565
+ path,
566
+ expected: "string key",
567
+ description: this.input[this.pos],
568
+ });
569
+ // Try to recover by skipping to next meaningful character
570
+ this.depth--;
571
+ return result;
572
+ }
573
+ if (typeof key !== "string") {
574
+ this.depth--;
575
+ return result;
576
+ }
577
+
578
+ this.skipWhitespace();
579
+
580
+ // Expect colon - but if we're at end of input, it's just incomplete (not an error)
581
+ if (this.pos >= this.input.length) {
582
+ this.depth--;
583
+ return result;
584
+ }
585
+ if (this.input[this.pos] !== ":") {
586
+ this.errors.push({
587
+ path: path + "." + key,
588
+ expected: "':'",
589
+ description: this.input[this.pos],
590
+ });
591
+ this.depth--;
592
+ return result;
593
+ }
594
+ this.pos++; // skip ':'
595
+
596
+ this.skipWhitespace();
597
+
598
+ // Parse value
599
+ if (this.pos >= this.input.length) {
600
+ // No value - incomplete but not an error for lenient parsing
601
+ this.depth--;
602
+ return result;
603
+ }
604
+
605
+ const value: unknown = this.parseValue(path + "." + key);
606
+ result[key] = value;
607
+
608
+ this.skipWhitespace();
609
+
610
+ // Handle comma or end
611
+ if (this.pos < this.input.length && this.input[this.pos] === ",") {
612
+ this.pos++;
613
+ }
614
+ }
615
+
616
+ this.depth--;
617
+ return result;
618
+ }
619
+
620
+ private parseArray(path: string): unknown[] {
621
+ const result: unknown[] = [];
622
+ this.pos++; // skip '['
623
+ this.depth++;
624
+ this.skipWhitespace();
625
+
626
+ let index: number = 0;
627
+ while (this.pos < this.input.length) {
628
+ this.skipWhitespace();
629
+
630
+ // Handle end of array or end of input
631
+ if (this.pos >= this.input.length || this.input[this.pos] === "]") {
632
+ if (this.pos < this.input.length) this.pos++; // skip ']'
633
+ this.depth--;
634
+ return result;
635
+ }
636
+
637
+ // Skip trailing comma
638
+ if (this.input[this.pos] === ",") {
639
+ this.pos++;
640
+ this.skipWhitespace();
641
+ continue;
642
+ }
643
+
644
+ // Parse value
645
+ const prevPos: number = this.pos;
646
+ const value: unknown = this.parseValue(path + "[" + index + "]");
647
+
648
+ // Guard: if parseValue didn't advance, skip unexpected char to prevent infinite loop
649
+ if (this.pos === prevPos && this.pos < this.input.length) {
650
+ this.pos++;
651
+ continue;
652
+ }
653
+
654
+ result.push(value);
655
+ index++;
656
+
657
+ this.skipWhitespace();
658
+
659
+ // Handle comma or end
660
+ if (this.pos < this.input.length && this.input[this.pos] === ",") {
661
+ this.pos++;
662
+ }
663
+ }
664
+
665
+ this.depth--;
666
+ return result;
667
+ }
668
+
669
+ private parseString(): string {
670
+ this.pos++; // skip opening '"'
671
+ let result: string = "";
672
+ let escaped: boolean = false;
673
+
674
+ while (this.pos < this.input.length) {
675
+ const char: string = this.input[this.pos]!;
676
+
677
+ if (escaped) {
678
+ switch (char) {
679
+ case '"':
680
+ result += '"';
681
+ break;
682
+ case "\\":
683
+ result += "\\";
684
+ break;
685
+ case "/":
686
+ result += "/";
687
+ break;
688
+ case "b":
689
+ result += "\b";
690
+ break;
691
+ case "f":
692
+ result += "\f";
693
+ break;
694
+ case "n":
695
+ result += "\n";
696
+ break;
697
+ case "r":
698
+ result += "\r";
699
+ break;
700
+ case "t":
701
+ result += "\t";
702
+ break;
703
+ case "u":
704
+ // Parse unicode escape
705
+ if (this.pos + 4 <= this.input.length) {
706
+ const hex: string = this.input.slice(this.pos + 1, this.pos + 5);
707
+ if (isHexString(hex)) {
708
+ const highCode: number = parseInt(hex, 16);
709
+ this.pos += 4;
710
+
711
+ // Check for surrogate pair (emoji and characters > U+FFFF)
712
+ if (
713
+ highCode >= 0xd800 &&
714
+ highCode <= 0xdbff &&
715
+ this.pos + 6 <= this.input.length &&
716
+ this.input[this.pos + 1] === "\\" &&
717
+ this.input[this.pos + 2] === "u"
718
+ ) {
719
+ const lowHex: string = this.input.slice(
720
+ this.pos + 3,
721
+ this.pos + 7,
722
+ );
723
+ if (isHexString(lowHex)) {
724
+ const lowCode: number = parseInt(lowHex, 16);
725
+ if (lowCode >= 0xdc00 && lowCode <= 0xdfff) {
726
+ result += String.fromCharCode(highCode, lowCode);
727
+ this.pos += 6;
728
+ break;
729
+ }
730
+ }
731
+ }
732
+ result += String.fromCharCode(highCode);
733
+ } else {
734
+ // Invalid hex - preserve escape sequence literally
735
+ result += "\\u" + hex;
736
+ this.pos += 4;
737
+ }
738
+ } else {
739
+ // Incomplete unicode escape - add partial sequence
740
+ const partial: string = this.input.slice(this.pos + 1);
741
+ result += "\\u" + partial;
742
+ this.pos = this.input.length - 1;
743
+ }
744
+ break;
745
+ default:
746
+ result += char;
747
+ }
748
+ escaped = false;
749
+ this.pos++;
750
+ continue;
751
+ }
752
+
753
+ if (char === "\\") {
754
+ escaped = true;
755
+ this.pos++;
756
+ continue;
757
+ }
758
+
759
+ if (char === '"') {
760
+ this.pos++; // skip closing '"'
761
+ return result;
762
+ }
763
+
764
+ result += char;
765
+ this.pos++;
766
+ }
767
+
768
+ // Unclosed string - return what we have (lenient)
769
+ return result;
770
+ }
771
+
772
+ private parseNumber(): number {
773
+ const start: number = this.pos;
774
+
775
+ // Handle negative sign
776
+ if (this.input[this.pos] === "-") {
777
+ this.pos++;
778
+ }
779
+
780
+ // Parse integer part
781
+ while (
782
+ this.pos < this.input.length &&
783
+ this.input[this.pos]! >= "0" &&
784
+ this.input[this.pos]! <= "9"
785
+ ) {
786
+ this.pos++;
787
+ }
788
+
789
+ // Parse decimal part
790
+ if (this.pos < this.input.length && this.input[this.pos] === ".") {
791
+ this.pos++;
792
+ while (
793
+ this.pos < this.input.length &&
794
+ this.input[this.pos]! >= "0" &&
795
+ this.input[this.pos]! <= "9"
796
+ ) {
797
+ this.pos++;
798
+ }
799
+ }
800
+
801
+ // Parse exponent
802
+ if (
803
+ this.pos < this.input.length &&
804
+ (this.input[this.pos] === "e" || this.input[this.pos] === "E")
805
+ ) {
806
+ this.pos++;
807
+ if (
808
+ this.pos < this.input.length &&
809
+ (this.input[this.pos] === "+" || this.input[this.pos] === "-")
810
+ ) {
811
+ this.pos++;
812
+ }
813
+ while (
814
+ this.pos < this.input.length &&
815
+ this.input[this.pos]! >= "0" &&
816
+ this.input[this.pos]! <= "9"
817
+ ) {
818
+ this.pos++;
819
+ }
820
+ }
821
+
822
+ const numStr: string = this.input.slice(start, this.pos);
823
+ const num: number = Number(numStr);
824
+ return Number.isNaN(num) ? 0 : num;
825
+ }
826
+
827
+ private isIdentifierStart(ch: string): boolean {
828
+ return (
829
+ (ch >= "a" && ch <= "z") ||
830
+ (ch >= "A" && ch <= "Z") ||
831
+ ch === "_" ||
832
+ ch === "$"
833
+ );
834
+ }
835
+
836
+ private isIdentifierChar(ch: string): boolean {
837
+ return (
838
+ (ch >= "a" && ch <= "z") ||
839
+ (ch >= "A" && ch <= "Z") ||
840
+ (ch >= "0" && ch <= "9") ||
841
+ ch === "_" ||
842
+ ch === "$"
843
+ );
844
+ }
845
+
846
+ private parseIdentifier(): string {
847
+ const start: number = this.pos;
848
+ while (
849
+ this.pos < this.input.length &&
850
+ this.isIdentifierChar(this.input[this.pos]!)
851
+ ) {
852
+ this.pos++;
853
+ }
854
+ return this.input.slice(start, this.pos);
855
+ }
856
+
857
+ private skipWhitespace(): void {
858
+ while (this.pos < this.input.length) {
859
+ const ch: string = this.input[this.pos]!;
860
+
861
+ // Skip standard whitespace
862
+ if (ch === " " || ch === "\t" || ch === "\n" || ch === "\r") {
863
+ this.pos++;
864
+ continue;
865
+ }
866
+
867
+ // Skip single-line comment: // ...
868
+ if (
869
+ ch === "/" &&
870
+ this.pos + 1 < this.input.length &&
871
+ this.input[this.pos + 1] === "/"
872
+ ) {
873
+ this.pos += 2;
874
+ while (
875
+ this.pos < this.input.length &&
876
+ this.input[this.pos] !== "\n" &&
877
+ this.input[this.pos] !== "\r"
878
+ ) {
879
+ this.pos++;
880
+ }
881
+ continue;
882
+ }
883
+
884
+ // Skip multi-line comment: /* ... */
885
+ if (
886
+ ch === "/" &&
887
+ this.pos + 1 < this.input.length &&
888
+ this.input[this.pos + 1] === "*"
889
+ ) {
890
+ this.pos += 2;
891
+ while (this.pos + 1 < this.input.length) {
892
+ if (
893
+ this.input[this.pos] === "*" &&
894
+ this.input[this.pos + 1] === "/"
895
+ ) {
896
+ this.pos += 2;
897
+ break;
898
+ }
899
+ this.pos++;
900
+ }
901
+ // Handle unclosed comment - move to end
902
+ if (this.pos + 1 >= this.input.length) {
903
+ this.pos = this.input.length;
904
+ }
905
+ continue;
906
+ }
907
+
908
+ // Not whitespace or comment
909
+ break;
910
+ }
911
+ }
912
+ }
913
+
914
+ /**
915
+ * Maximum nesting depth to prevent stack overflow attacks.
916
+ *
917
+ * @internal
918
+ */
919
+ const MAX_DEPTH: number = 512;