@typia/utils 12.0.0-dev.20260316 → 12.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +85 -85
- package/package.json +2 -2
- package/src/converters/LlmSchemaConverter.ts +617 -617
- package/src/converters/OpenApiConverter.ts +375 -375
- package/src/converters/internal/OpenApiV3Downgrader.ts +381 -381
- package/src/converters/internal/OpenApiV3Upgrader.ts +494 -494
- package/src/converters/internal/OpenApiV3_1Downgrader.ts +318 -318
- package/src/converters/internal/OpenApiV3_1Upgrader.ts +710 -710
- package/src/converters/internal/OpenApiV3_2Upgrader.ts +342 -342
- package/src/converters/internal/SwaggerV2Downgrader.ts +450 -450
- package/src/converters/internal/SwaggerV2Upgrader.ts +547 -547
- package/src/http/HttpError.ts +114 -114
- package/src/http/HttpLlm.ts +169 -169
- package/src/http/HttpMigration.ts +94 -94
- package/src/http/internal/HttpLlmApplicationComposer.ts +360 -360
- package/src/http/internal/HttpMigrateApplicationComposer.ts +56 -56
- package/src/http/internal/HttpMigrateRouteComposer.ts +505 -505
- package/src/utils/LlmJson.ts +173 -173
- package/src/utils/internal/parseLenientJson.ts +919 -919
|
@@ -1,919 +1,919 @@
|
|
|
1
|
-
import { DeepPartial, IJsonParseResult } from "@typia/interface";
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* Parse lenient JSON that may be incomplete or malformed.
|
|
5
|
-
*
|
|
6
|
-
* Handles:
|
|
7
|
-
*
|
|
8
|
-
* - Unclosed brackets `{`, `[` - parses as much as possible
|
|
9
|
-
* - Trailing commas `[1, 2, ]` - ignores them
|
|
10
|
-
* - Unclosed strings `"hello` - returns partial string
|
|
11
|
-
* - Junk text before JSON (LLM often adds explanatory text)
|
|
12
|
-
* - Markdown code blocks (extracts content from `json ... `)
|
|
13
|
-
* - Incomplete keywords like `tru`, `fal`, `nul`
|
|
14
|
-
* - Unicode escape sequences including surrogate pairs (emoji)
|
|
15
|
-
* - JavaScript-style comments (single-line and multi-line)
|
|
16
|
-
* - Unquoted object keys (JavaScript identifier style)
|
|
17
|
-
*
|
|
18
|
-
* @param input Raw JSON string (potentially incomplete)
|
|
19
|
-
* @returns Parse result with data, original input, and any errors
|
|
20
|
-
* @internal
|
|
21
|
-
*/
|
|
22
|
-
export function parseLenientJson<T>(input: string): IJsonParseResult<T> {
|
|
23
|
-
// For safe guard
|
|
24
|
-
if (typeof input !== "string") input = String(input);
|
|
25
|
-
|
|
26
|
-
// Try native JSON.parse first (faster for valid JSON)
|
|
27
|
-
let error: Error | null = null;
|
|
28
|
-
try {
|
|
29
|
-
return {
|
|
30
|
-
success: true,
|
|
31
|
-
data: JSON.parse(input) as T,
|
|
32
|
-
};
|
|
33
|
-
} catch (e) {
|
|
34
|
-
// Fall back to lenient parser
|
|
35
|
-
error = e instanceof Error ? e : new Error(String(e));
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
try {
|
|
39
|
-
return iterate(input);
|
|
40
|
-
} catch {
|
|
41
|
-
// actually unreachable, maybe?
|
|
42
|
-
return {
|
|
43
|
-
success: false,
|
|
44
|
-
data: undefined as DeepPartial<T>,
|
|
45
|
-
input,
|
|
46
|
-
errors: [
|
|
47
|
-
{
|
|
48
|
-
path: "$input",
|
|
49
|
-
expected: "valid JSON",
|
|
50
|
-
description: error.message,
|
|
51
|
-
},
|
|
52
|
-
],
|
|
53
|
-
};
|
|
54
|
-
}
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
function iterate<T>(input: string): IJsonParseResult<T> {
|
|
58
|
-
// Extract markdown code block if present
|
|
59
|
-
const codeBlockContent: string | null = extractMarkdownCodeBlock(input);
|
|
60
|
-
const jsonSource: string =
|
|
61
|
-
codeBlockContent !== null ? codeBlockContent : input;
|
|
62
|
-
|
|
63
|
-
// Check if input is empty or whitespace-only
|
|
64
|
-
const trimmed: string = jsonSource.trim();
|
|
65
|
-
if (trimmed.length === 0) {
|
|
66
|
-
return {
|
|
67
|
-
success: false,
|
|
68
|
-
data: undefined as DeepPartial<T>,
|
|
69
|
-
input,
|
|
70
|
-
errors: [
|
|
71
|
-
{
|
|
72
|
-
path: "$input",
|
|
73
|
-
expected: "JSON value",
|
|
74
|
-
description: "empty input",
|
|
75
|
-
},
|
|
76
|
-
],
|
|
77
|
-
};
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
// Check if input starts with a primitive value (no junk prefix skipping needed)
|
|
81
|
-
if (startsWithPrimitive(trimmed)) {
|
|
82
|
-
const errors: IJsonParseResult.IError[] = [];
|
|
83
|
-
const parser: LenientJsonParser = new LenientJsonParser(jsonSource, errors);
|
|
84
|
-
const data: unknown = parser.parse();
|
|
85
|
-
if (errors.length > 0) {
|
|
86
|
-
return { success: false, data: data as DeepPartial<T>, input, errors };
|
|
87
|
-
}
|
|
88
|
-
return { success: true, data: data as T };
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
// Find JSON start position (skip junk prefix from LLM)
|
|
92
|
-
const jsonStart: number = findJsonStart(jsonSource);
|
|
93
|
-
if (jsonStart === -1) {
|
|
94
|
-
// No object/array found - check if there's a primitive after skipping comments
|
|
95
|
-
const skipped: string = skipCommentsAndWhitespace(jsonSource);
|
|
96
|
-
if (skipped.length > 0 && startsWithPrimitive(skipped)) {
|
|
97
|
-
const errors: IJsonParseResult.IError[] = [];
|
|
98
|
-
const parser: LenientJsonParser = new LenientJsonParser(
|
|
99
|
-
jsonSource,
|
|
100
|
-
errors,
|
|
101
|
-
);
|
|
102
|
-
const data: unknown = parser.parse();
|
|
103
|
-
if (errors.length > 0) {
|
|
104
|
-
return { success: false, data: data as DeepPartial<T>, input, errors };
|
|
105
|
-
}
|
|
106
|
-
return { success: true, data: data as T };
|
|
107
|
-
}
|
|
108
|
-
// No valid JSON found - return failure
|
|
109
|
-
return {
|
|
110
|
-
success: false,
|
|
111
|
-
data: undefined as DeepPartial<T>,
|
|
112
|
-
input,
|
|
113
|
-
errors: [
|
|
114
|
-
{
|
|
115
|
-
path: "$input",
|
|
116
|
-
expected: "JSON value",
|
|
117
|
-
description: jsonSource,
|
|
118
|
-
},
|
|
119
|
-
],
|
|
120
|
-
};
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
// Extract JSON portion (skip junk prefix)
|
|
124
|
-
const jsonInput: string =
|
|
125
|
-
jsonStart > 0 ? jsonSource.slice(jsonStart) : jsonSource;
|
|
126
|
-
|
|
127
|
-
const errors: IJsonParseResult.IError[] = [];
|
|
128
|
-
const parser: LenientJsonParser = new LenientJsonParser(jsonInput, errors);
|
|
129
|
-
const data: unknown = parser.parse();
|
|
130
|
-
|
|
131
|
-
if (errors.length > 0) {
|
|
132
|
-
return {
|
|
133
|
-
success: false,
|
|
134
|
-
data: data as DeepPartial<T>,
|
|
135
|
-
input,
|
|
136
|
-
errors,
|
|
137
|
-
};
|
|
138
|
-
}
|
|
139
|
-
return {
|
|
140
|
-
success: true,
|
|
141
|
-
data: data as T,
|
|
142
|
-
};
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
/**
|
|
146
|
-
* Check if a string is a valid 4-character hexadecimal string.
|
|
147
|
-
*
|
|
148
|
-
* @internal
|
|
149
|
-
*/
|
|
150
|
-
function isHexString(s: string): boolean {
|
|
151
|
-
if (s.length !== 4) return false;
|
|
152
|
-
for (let i = 0; i < 4; i++) {
|
|
153
|
-
const c: number = s.charCodeAt(i);
|
|
154
|
-
if (
|
|
155
|
-
!((c >= 48 && c <= 57) || (c >= 65 && c <= 70) || (c >= 97 && c <= 102))
|
|
156
|
-
) {
|
|
157
|
-
return false;
|
|
158
|
-
}
|
|
159
|
-
}
|
|
160
|
-
return true;
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
/**
|
|
164
|
-
* Extract JSON content from markdown code block if present.
|
|
165
|
-
*
|
|
166
|
-
* LLM outputs often wrap JSON in markdown code blocks like:
|
|
167
|
-
*
|
|
168
|
-
* Here is your result:
|
|
169
|
-
*
|
|
170
|
-
* ```json
|
|
171
|
-
* { "name": "test" }
|
|
172
|
-
* ```
|
|
173
|
-
*
|
|
174
|
-
* This function extracts the content between the backticks.
|
|
175
|
-
*
|
|
176
|
-
* IMPORTANT: Only extracts if the input doesn't already start with JSON. If
|
|
177
|
-
* input (after trim) starts with `{`, `[`, or `"`, it's already JSON and any
|
|
178
|
-
* markdown inside is part of a string value.
|
|
179
|
-
*
|
|
180
|
-
* @param input Text that may contain markdown code block
|
|
181
|
-
* @returns Extracted content or null if no code block found
|
|
182
|
-
* @internal
|
|
183
|
-
*/
|
|
184
|
-
function extractMarkdownCodeBlock(input: string): string | null {
|
|
185
|
-
// Must be ```json specifically, not just ```
|
|
186
|
-
const codeBlockStart: number = input.indexOf("```json");
|
|
187
|
-
if (codeBlockStart === -1) return null;
|
|
188
|
-
|
|
189
|
-
// Check if input already starts with JSON (after trimming whitespace)
|
|
190
|
-
// If so, don't extract - the markdown is inside a JSON string value
|
|
191
|
-
const trimmed: string = input.trimStart();
|
|
192
|
-
if (trimmed.length > 0) {
|
|
193
|
-
const firstChar: string = trimmed[0]!;
|
|
194
|
-
if (firstChar === "{" || firstChar === "[" || firstChar === '"') {
|
|
195
|
-
return null;
|
|
196
|
-
}
|
|
197
|
-
}
|
|
198
|
-
|
|
199
|
-
// Find the end of the opening line (after ```json)
|
|
200
|
-
let contentStart: number = codeBlockStart + 7; // length of "```json"
|
|
201
|
-
while (contentStart < input.length && input[contentStart] !== "\n") {
|
|
202
|
-
contentStart++;
|
|
203
|
-
}
|
|
204
|
-
if (contentStart >= input.length) return null;
|
|
205
|
-
contentStart++; // skip the newline
|
|
206
|
-
|
|
207
|
-
// Find the closing ```
|
|
208
|
-
const codeBlockEnd: number = input.indexOf("```", contentStart);
|
|
209
|
-
if (codeBlockEnd === -1) {
|
|
210
|
-
// No closing ``` - return everything after opening
|
|
211
|
-
return input.slice(contentStart);
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
return input.slice(contentStart, codeBlockEnd);
|
|
215
|
-
}
|
|
216
|
-
|
|
217
|
-
/**
|
|
218
|
-
* Find the start position of JSON object/array content in text that may have
|
|
219
|
-
* junk prefix.
|
|
220
|
-
*
|
|
221
|
-
* LLM outputs often contain text before JSON like:
|
|
222
|
-
*
|
|
223
|
-
* - "Here is your JSON: {"name": "test"}"
|
|
224
|
-
* - "Sure! [1, 2, 3]"
|
|
225
|
-
*
|
|
226
|
-
* This function skips over comments and strings to find the real JSON start.
|
|
227
|
-
* Primitive values (strings, numbers, booleans) are handled directly by the
|
|
228
|
-
* parser.
|
|
229
|
-
*
|
|
230
|
-
* @param input Text that may contain JSON with junk prefix
|
|
231
|
-
* @returns Index of first `{` or `[` outside comments/strings, or -1 if not
|
|
232
|
-
* found
|
|
233
|
-
* @internal
|
|
234
|
-
*/
|
|
235
|
-
function findJsonStart(input: string): number {
|
|
236
|
-
let pos: number = 0;
|
|
237
|
-
const len: number = input.length;
|
|
238
|
-
|
|
239
|
-
while (pos < len) {
|
|
240
|
-
const ch: string = input[pos]!;
|
|
241
|
-
|
|
242
|
-
// Found JSON start
|
|
243
|
-
if (ch === "{" || ch === "[") {
|
|
244
|
-
return pos;
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
// Skip single-line comment
|
|
248
|
-
if (ch === "/" && pos + 1 < len && input[pos + 1] === "/") {
|
|
249
|
-
pos += 2;
|
|
250
|
-
while (pos < len && input[pos] !== "\n" && input[pos] !== "\r") {
|
|
251
|
-
pos++;
|
|
252
|
-
}
|
|
253
|
-
continue;
|
|
254
|
-
}
|
|
255
|
-
|
|
256
|
-
// Skip multi-line comment
|
|
257
|
-
if (ch === "/" && pos + 1 < len && input[pos + 1] === "*") {
|
|
258
|
-
pos += 2;
|
|
259
|
-
while (pos + 1 < len) {
|
|
260
|
-
if (input[pos] === "*" && input[pos + 1] === "/") {
|
|
261
|
-
pos += 2;
|
|
262
|
-
break;
|
|
263
|
-
}
|
|
264
|
-
pos++;
|
|
265
|
-
}
|
|
266
|
-
// If unclosed comment, move to end
|
|
267
|
-
if (pos + 1 >= len) {
|
|
268
|
-
pos = len;
|
|
269
|
-
}
|
|
270
|
-
continue;
|
|
271
|
-
}
|
|
272
|
-
|
|
273
|
-
// Skip string literal (to avoid matching { or [ inside strings)
|
|
274
|
-
if (ch === '"') {
|
|
275
|
-
pos++;
|
|
276
|
-
while (pos < len) {
|
|
277
|
-
if (input[pos] === "\\") {
|
|
278
|
-
pos += 2; // skip escape sequence
|
|
279
|
-
continue;
|
|
280
|
-
}
|
|
281
|
-
if (input[pos] === '"') {
|
|
282
|
-
pos++;
|
|
283
|
-
break;
|
|
284
|
-
}
|
|
285
|
-
pos++;
|
|
286
|
-
}
|
|
287
|
-
continue;
|
|
288
|
-
}
|
|
289
|
-
|
|
290
|
-
pos++;
|
|
291
|
-
}
|
|
292
|
-
|
|
293
|
-
return -1;
|
|
294
|
-
}
|
|
295
|
-
|
|
296
|
-
/**
|
|
297
|
-
* Skip leading comments and whitespace from input.
|
|
298
|
-
*
|
|
299
|
-
* @param input Text that may start with comments or whitespace
|
|
300
|
-
* @returns Input with leading comments and whitespace removed
|
|
301
|
-
* @internal
|
|
302
|
-
*/
|
|
303
|
-
function skipCommentsAndWhitespace(input: string): string {
|
|
304
|
-
let pos: number = 0;
|
|
305
|
-
const len: number = input.length;
|
|
306
|
-
|
|
307
|
-
while (pos < len) {
|
|
308
|
-
const ch: string = input[pos]!;
|
|
309
|
-
|
|
310
|
-
// Skip whitespace
|
|
311
|
-
if (ch === " " || ch === "\t" || ch === "\n" || ch === "\r") {
|
|
312
|
-
pos++;
|
|
313
|
-
continue;
|
|
314
|
-
}
|
|
315
|
-
|
|
316
|
-
// Skip single-line comment
|
|
317
|
-
if (ch === "/" && pos + 1 < len && input[pos + 1] === "/") {
|
|
318
|
-
pos += 2;
|
|
319
|
-
while (pos < len && input[pos] !== "\n" && input[pos] !== "\r") {
|
|
320
|
-
pos++;
|
|
321
|
-
}
|
|
322
|
-
continue;
|
|
323
|
-
}
|
|
324
|
-
|
|
325
|
-
// Skip multi-line comment
|
|
326
|
-
if (ch === "/" && pos + 1 < len && input[pos + 1] === "*") {
|
|
327
|
-
pos += 2;
|
|
328
|
-
while (pos + 1 < len) {
|
|
329
|
-
if (input[pos] === "*" && input[pos + 1] === "/") {
|
|
330
|
-
pos += 2;
|
|
331
|
-
break;
|
|
332
|
-
}
|
|
333
|
-
pos++;
|
|
334
|
-
}
|
|
335
|
-
if (pos + 1 >= len) {
|
|
336
|
-
pos = len;
|
|
337
|
-
}
|
|
338
|
-
continue;
|
|
339
|
-
}
|
|
340
|
-
|
|
341
|
-
// Not whitespace or comment
|
|
342
|
-
break;
|
|
343
|
-
}
|
|
344
|
-
|
|
345
|
-
return input.slice(pos);
|
|
346
|
-
}
|
|
347
|
-
|
|
348
|
-
/**
|
|
349
|
-
* Check if input starts with a valid JSON primitive token.
|
|
350
|
-
*
|
|
351
|
-
* @param input Trimmed input string
|
|
352
|
-
* @returns True if input starts with a primitive value
|
|
353
|
-
* @internal
|
|
354
|
-
*/
|
|
355
|
-
function startsWithPrimitive(input: string): boolean {
|
|
356
|
-
if (input.length === 0) return false;
|
|
357
|
-
const ch: string = input[0]!;
|
|
358
|
-
// String
|
|
359
|
-
if (ch === '"') return true;
|
|
360
|
-
// Number (digit or minus)
|
|
361
|
-
if ((ch >= "0" && ch <= "9") || ch === "-") return true;
|
|
362
|
-
// Keywords
|
|
363
|
-
if (
|
|
364
|
-
input.startsWith("true") ||
|
|
365
|
-
input.startsWith("false") ||
|
|
366
|
-
input.startsWith("null")
|
|
367
|
-
)
|
|
368
|
-
return true;
|
|
369
|
-
// Partial keywords (note: "null" requires at least 2 chars to match parseKeywordOrIdentifier logic)
|
|
370
|
-
if (
|
|
371
|
-
"true".startsWith(input) ||
|
|
372
|
-
"false".startsWith(input) ||
|
|
373
|
-
("null".startsWith(input) && input.length >= 2)
|
|
374
|
-
)
|
|
375
|
-
return true;
|
|
376
|
-
// Boolean string variants (note: "n" is intentionally excluded)
|
|
377
|
-
const lower: string = input.toLowerCase();
|
|
378
|
-
if (
|
|
379
|
-
lower === "yes" ||
|
|
380
|
-
lower === "y" ||
|
|
381
|
-
lower === "on" ||
|
|
382
|
-
lower === "no" ||
|
|
383
|
-
lower === "off"
|
|
384
|
-
)
|
|
385
|
-
return true;
|
|
386
|
-
return false;
|
|
387
|
-
}
|
|
388
|
-
|
|
389
|
-
/**
|
|
390
|
-
* Lenient JSON parser that handles incomplete JSON.
|
|
391
|
-
*
|
|
392
|
-
* @internal
|
|
393
|
-
*/
|
|
394
|
-
class LenientJsonParser {
|
|
395
|
-
private pos: number = 0;
|
|
396
|
-
private depth: number = 0;
|
|
397
|
-
private readonly input: string;
|
|
398
|
-
private readonly errors: IJsonParseResult.IError[];
|
|
399
|
-
|
|
400
|
-
constructor(input: string, errors: IJsonParseResult.IError[]) {
|
|
401
|
-
this.input = input;
|
|
402
|
-
this.errors = errors;
|
|
403
|
-
}
|
|
404
|
-
|
|
405
|
-
parse(): unknown {
|
|
406
|
-
this.skipWhitespace();
|
|
407
|
-
if (this.pos >= this.input.length) {
|
|
408
|
-
return undefined;
|
|
409
|
-
}
|
|
410
|
-
return this.parseValue("$input");
|
|
411
|
-
}
|
|
412
|
-
|
|
413
|
-
private parseValue(path: string): unknown {
|
|
414
|
-
this.skipWhitespace();
|
|
415
|
-
|
|
416
|
-
if (this.pos >= this.input.length) {
|
|
417
|
-
return undefined;
|
|
418
|
-
}
|
|
419
|
-
|
|
420
|
-
// Check for maximum depth to prevent stack overflow
|
|
421
|
-
if (this.depth >= MAX_DEPTH) {
|
|
422
|
-
this.errors.push({
|
|
423
|
-
path,
|
|
424
|
-
expected: "value (max depth exceeded)",
|
|
425
|
-
description: undefined,
|
|
426
|
-
});
|
|
427
|
-
return undefined;
|
|
428
|
-
}
|
|
429
|
-
|
|
430
|
-
const char: string = this.input[this.pos]!;
|
|
431
|
-
|
|
432
|
-
if (char === "{") return this.parseObject(path);
|
|
433
|
-
if (char === "[") return this.parseArray(path);
|
|
434
|
-
if (char === '"') return this.parseString();
|
|
435
|
-
if (char === "-" || (char >= "0" && char <= "9")) return this.parseNumber();
|
|
436
|
-
|
|
437
|
-
// Handle keywords (true, false, null) or invalid identifiers
|
|
438
|
-
if (this.isIdentifierStart(char)) {
|
|
439
|
-
return this.parseKeywordOrIdentifier(path);
|
|
440
|
-
}
|
|
441
|
-
|
|
442
|
-
// Don't skip structural characters - let the caller handle them
|
|
443
|
-
const ch: string = this.input[this.pos]!;
|
|
444
|
-
if (ch === "}" || ch === "]" || ch === ",") {
|
|
445
|
-
// Not an error - just no value here (e.g., {"a":} or [,])
|
|
446
|
-
return undefined;
|
|
447
|
-
}
|
|
448
|
-
|
|
449
|
-
this.errors.push({
|
|
450
|
-
path,
|
|
451
|
-
expected: "JSON value",
|
|
452
|
-
description: this.getErrorContext(),
|
|
453
|
-
});
|
|
454
|
-
// Skip the problematic character and try to continue
|
|
455
|
-
this.pos++;
|
|
456
|
-
return undefined;
|
|
457
|
-
}
|
|
458
|
-
|
|
459
|
-
private getErrorContext(): string {
|
|
460
|
-
// Get surrounding context for better error messages
|
|
461
|
-
const start: number = Math.max(0, this.pos - 10);
|
|
462
|
-
const end: number = Math.min(this.input.length, this.pos + 20);
|
|
463
|
-
const before: string = this.input.slice(start, this.pos);
|
|
464
|
-
const after: string = this.input.slice(this.pos, end);
|
|
465
|
-
return (
|
|
466
|
-
(start > 0 ? "..." : "") +
|
|
467
|
-
before +
|
|
468
|
-
"→" +
|
|
469
|
-
after +
|
|
470
|
-
(end < this.input.length ? "..." : "")
|
|
471
|
-
);
|
|
472
|
-
}
|
|
473
|
-
|
|
474
|
-
private parseKeywordOrIdentifier(path: string): unknown {
|
|
475
|
-
// Extract the token (sequence of identifier characters)
|
|
476
|
-
const start: number = this.pos;
|
|
477
|
-
while (
|
|
478
|
-
this.pos < this.input.length &&
|
|
479
|
-
this.isIdentifierChar(this.input[this.pos]!)
|
|
480
|
-
) {
|
|
481
|
-
this.pos++;
|
|
482
|
-
}
|
|
483
|
-
const token: string = this.input.slice(start, this.pos);
|
|
484
|
-
|
|
485
|
-
// Check for complete or partial keyword matches
|
|
486
|
-
if (token === "true") return true;
|
|
487
|
-
if (token === "false") return false;
|
|
488
|
-
if (token === "null") return null;
|
|
489
|
-
|
|
490
|
-
// Boolean string coercion: "yes", "y", "on" -> true, "no", "off" -> false
|
|
491
|
-
// Note: "n" is intentionally NOT handled (neither null nor false)
|
|
492
|
-
const lower: string = token.toLowerCase();
|
|
493
|
-
if (lower === "yes" || lower === "y" || lower === "on") return true;
|
|
494
|
-
if (lower === "no" || lower === "off") return false;
|
|
495
|
-
|
|
496
|
-
// Partial match for lenient parsing (e.g., "tru" -> true, "fal" -> false)
|
|
497
|
-
if ("true".startsWith(token) && token.length > 0) return true;
|
|
498
|
-
if ("false".startsWith(token) && token.length > 0) return false;
|
|
499
|
-
if ("null".startsWith(token) && token.length >= 2) return null;
|
|
500
|
-
|
|
501
|
-
// Check if this looks like a string with missing opening quote (e.g., abcdefg")
|
|
502
|
-
if (this.pos < this.input.length && this.input[this.pos] === '"') {
|
|
503
|
-
// Treat as unquoted string value - skip the errant closing quote and return as string
|
|
504
|
-
this.pos++; // skip the closing quote
|
|
505
|
-
this.errors.push({
|
|
506
|
-
path,
|
|
507
|
-
expected: "quoted string",
|
|
508
|
-
description: "missing opening quote for '" + token + "'",
|
|
509
|
-
});
|
|
510
|
-
return token;
|
|
511
|
-
}
|
|
512
|
-
|
|
513
|
-
// Invalid identifier as value - provide helpful error message
|
|
514
|
-
this.errors.push({
|
|
515
|
-
path,
|
|
516
|
-
expected: "JSON value (string, number, boolean, null, object, or array)",
|
|
517
|
-
description: "unquoted string '" + token + "' - did you forget quotes?",
|
|
518
|
-
});
|
|
519
|
-
// Skip to next comma, closing brace/bracket for recovery
|
|
520
|
-
this.skipToRecoveryPoint();
|
|
521
|
-
return undefined;
|
|
522
|
-
}
|
|
523
|
-
|
|
524
|
-
private skipToRecoveryPoint(): void {
|
|
525
|
-
while (this.pos < this.input.length) {
|
|
526
|
-
const ch: string = this.input[this.pos]!;
|
|
527
|
-
if (ch === "," || ch === "}" || ch === "]") {
|
|
528
|
-
return;
|
|
529
|
-
}
|
|
530
|
-
this.pos++;
|
|
531
|
-
}
|
|
532
|
-
}
|
|
533
|
-
|
|
534
|
-
private parseObject(path: string): Record<string, unknown> {
|
|
535
|
-
const result: Record<string, unknown> = {};
|
|
536
|
-
this.pos++; // skip '{'
|
|
537
|
-
this.depth++;
|
|
538
|
-
this.skipWhitespace();
|
|
539
|
-
|
|
540
|
-
while (this.pos < this.input.length) {
|
|
541
|
-
this.skipWhitespace();
|
|
542
|
-
|
|
543
|
-
// Handle end of object or end of input
|
|
544
|
-
if (this.pos >= this.input.length || this.input[this.pos] === "}") {
|
|
545
|
-
if (this.pos < this.input.length) this.pos++; // skip '}'
|
|
546
|
-
this.depth--;
|
|
547
|
-
return result;
|
|
548
|
-
}
|
|
549
|
-
|
|
550
|
-
// Skip trailing comma
|
|
551
|
-
if (this.input[this.pos] === ",") {
|
|
552
|
-
this.pos++;
|
|
553
|
-
this.skipWhitespace();
|
|
554
|
-
continue;
|
|
555
|
-
}
|
|
556
|
-
|
|
557
|
-
// Parse key (quoted string or unquoted identifier)
|
|
558
|
-
let key: string;
|
|
559
|
-
if (this.input[this.pos] === '"') {
|
|
560
|
-
key = this.parseString();
|
|
561
|
-
} else if (this.isIdentifierStart(this.input[this.pos]!)) {
|
|
562
|
-
key = this.parseIdentifier();
|
|
563
|
-
} else {
|
|
564
|
-
this.errors.push({
|
|
565
|
-
path,
|
|
566
|
-
expected: "string key",
|
|
567
|
-
description: this.input[this.pos],
|
|
568
|
-
});
|
|
569
|
-
// Try to recover by skipping to next meaningful character
|
|
570
|
-
this.depth--;
|
|
571
|
-
return result;
|
|
572
|
-
}
|
|
573
|
-
if (typeof key !== "string") {
|
|
574
|
-
this.depth--;
|
|
575
|
-
return result;
|
|
576
|
-
}
|
|
577
|
-
|
|
578
|
-
this.skipWhitespace();
|
|
579
|
-
|
|
580
|
-
// Expect colon - but if we're at end of input, it's just incomplete (not an error)
|
|
581
|
-
if (this.pos >= this.input.length) {
|
|
582
|
-
this.depth--;
|
|
583
|
-
return result;
|
|
584
|
-
}
|
|
585
|
-
if (this.input[this.pos] !== ":") {
|
|
586
|
-
this.errors.push({
|
|
587
|
-
path: path + "." + key,
|
|
588
|
-
expected: "':'",
|
|
589
|
-
description: this.input[this.pos],
|
|
590
|
-
});
|
|
591
|
-
this.depth--;
|
|
592
|
-
return result;
|
|
593
|
-
}
|
|
594
|
-
this.pos++; // skip ':'
|
|
595
|
-
|
|
596
|
-
this.skipWhitespace();
|
|
597
|
-
|
|
598
|
-
// Parse value
|
|
599
|
-
if (this.pos >= this.input.length) {
|
|
600
|
-
// No value - incomplete but not an error for lenient parsing
|
|
601
|
-
this.depth--;
|
|
602
|
-
return result;
|
|
603
|
-
}
|
|
604
|
-
|
|
605
|
-
const value: unknown = this.parseValue(path + "." + key);
|
|
606
|
-
result[key] = value;
|
|
607
|
-
|
|
608
|
-
this.skipWhitespace();
|
|
609
|
-
|
|
610
|
-
// Handle comma or end
|
|
611
|
-
if (this.pos < this.input.length && this.input[this.pos] === ",") {
|
|
612
|
-
this.pos++;
|
|
613
|
-
}
|
|
614
|
-
}
|
|
615
|
-
|
|
616
|
-
this.depth--;
|
|
617
|
-
return result;
|
|
618
|
-
}
|
|
619
|
-
|
|
620
|
-
private parseArray(path: string): unknown[] {
|
|
621
|
-
const result: unknown[] = [];
|
|
622
|
-
this.pos++; // skip '['
|
|
623
|
-
this.depth++;
|
|
624
|
-
this.skipWhitespace();
|
|
625
|
-
|
|
626
|
-
let index: number = 0;
|
|
627
|
-
while (this.pos < this.input.length) {
|
|
628
|
-
this.skipWhitespace();
|
|
629
|
-
|
|
630
|
-
// Handle end of array or end of input
|
|
631
|
-
if (this.pos >= this.input.length || this.input[this.pos] === "]") {
|
|
632
|
-
if (this.pos < this.input.length) this.pos++; // skip ']'
|
|
633
|
-
this.depth--;
|
|
634
|
-
return result;
|
|
635
|
-
}
|
|
636
|
-
|
|
637
|
-
// Skip trailing comma
|
|
638
|
-
if (this.input[this.pos] === ",") {
|
|
639
|
-
this.pos++;
|
|
640
|
-
this.skipWhitespace();
|
|
641
|
-
continue;
|
|
642
|
-
}
|
|
643
|
-
|
|
644
|
-
// Parse value
|
|
645
|
-
const prevPos: number = this.pos;
|
|
646
|
-
const value: unknown = this.parseValue(path + "[" + index + "]");
|
|
647
|
-
|
|
648
|
-
// Guard: if parseValue didn't advance, skip unexpected char to prevent infinite loop
|
|
649
|
-
if (this.pos === prevPos && this.pos < this.input.length) {
|
|
650
|
-
this.pos++;
|
|
651
|
-
continue;
|
|
652
|
-
}
|
|
653
|
-
|
|
654
|
-
result.push(value);
|
|
655
|
-
index++;
|
|
656
|
-
|
|
657
|
-
this.skipWhitespace();
|
|
658
|
-
|
|
659
|
-
// Handle comma or end
|
|
660
|
-
if (this.pos < this.input.length && this.input[this.pos] === ",") {
|
|
661
|
-
this.pos++;
|
|
662
|
-
}
|
|
663
|
-
}
|
|
664
|
-
|
|
665
|
-
this.depth--;
|
|
666
|
-
return result;
|
|
667
|
-
}
|
|
668
|
-
|
|
669
|
-
private parseString(): string {
|
|
670
|
-
this.pos++; // skip opening '"'
|
|
671
|
-
let result: string = "";
|
|
672
|
-
let escaped: boolean = false;
|
|
673
|
-
|
|
674
|
-
while (this.pos < this.input.length) {
|
|
675
|
-
const char: string = this.input[this.pos]!;
|
|
676
|
-
|
|
677
|
-
if (escaped) {
|
|
678
|
-
switch (char) {
|
|
679
|
-
case '"':
|
|
680
|
-
result += '"';
|
|
681
|
-
break;
|
|
682
|
-
case "\\":
|
|
683
|
-
result += "\\";
|
|
684
|
-
break;
|
|
685
|
-
case "/":
|
|
686
|
-
result += "/";
|
|
687
|
-
break;
|
|
688
|
-
case "b":
|
|
689
|
-
result += "\b";
|
|
690
|
-
break;
|
|
691
|
-
case "f":
|
|
692
|
-
result += "\f";
|
|
693
|
-
break;
|
|
694
|
-
case "n":
|
|
695
|
-
result += "\n";
|
|
696
|
-
break;
|
|
697
|
-
case "r":
|
|
698
|
-
result += "\r";
|
|
699
|
-
break;
|
|
700
|
-
case "t":
|
|
701
|
-
result += "\t";
|
|
702
|
-
break;
|
|
703
|
-
case "u":
|
|
704
|
-
// Parse unicode escape
|
|
705
|
-
if (this.pos + 4 <= this.input.length) {
|
|
706
|
-
const hex: string = this.input.slice(this.pos + 1, this.pos + 5);
|
|
707
|
-
if (isHexString(hex)) {
|
|
708
|
-
const highCode: number = parseInt(hex, 16);
|
|
709
|
-
this.pos += 4;
|
|
710
|
-
|
|
711
|
-
// Check for surrogate pair (emoji and characters > U+FFFF)
|
|
712
|
-
if (
|
|
713
|
-
highCode >= 0xd800 &&
|
|
714
|
-
highCode <= 0xdbff &&
|
|
715
|
-
this.pos + 6 <= this.input.length &&
|
|
716
|
-
this.input[this.pos + 1] === "\\" &&
|
|
717
|
-
this.input[this.pos + 2] === "u"
|
|
718
|
-
) {
|
|
719
|
-
const lowHex: string = this.input.slice(
|
|
720
|
-
this.pos + 3,
|
|
721
|
-
this.pos + 7,
|
|
722
|
-
);
|
|
723
|
-
if (isHexString(lowHex)) {
|
|
724
|
-
const lowCode: number = parseInt(lowHex, 16);
|
|
725
|
-
if (lowCode >= 0xdc00 && lowCode <= 0xdfff) {
|
|
726
|
-
result += String.fromCharCode(highCode, lowCode);
|
|
727
|
-
this.pos += 6;
|
|
728
|
-
break;
|
|
729
|
-
}
|
|
730
|
-
}
|
|
731
|
-
}
|
|
732
|
-
result += String.fromCharCode(highCode);
|
|
733
|
-
} else {
|
|
734
|
-
// Invalid hex - preserve escape sequence literally
|
|
735
|
-
result += "\\u" + hex;
|
|
736
|
-
this.pos += 4;
|
|
737
|
-
}
|
|
738
|
-
} else {
|
|
739
|
-
// Incomplete unicode escape - add partial sequence
|
|
740
|
-
const partial: string = this.input.slice(this.pos + 1);
|
|
741
|
-
result += "\\u" + partial;
|
|
742
|
-
this.pos = this.input.length - 1;
|
|
743
|
-
}
|
|
744
|
-
break;
|
|
745
|
-
default:
|
|
746
|
-
result += char;
|
|
747
|
-
}
|
|
748
|
-
escaped = false;
|
|
749
|
-
this.pos++;
|
|
750
|
-
continue;
|
|
751
|
-
}
|
|
752
|
-
|
|
753
|
-
if (char === "\\") {
|
|
754
|
-
escaped = true;
|
|
755
|
-
this.pos++;
|
|
756
|
-
continue;
|
|
757
|
-
}
|
|
758
|
-
|
|
759
|
-
if (char === '"') {
|
|
760
|
-
this.pos++; // skip closing '"'
|
|
761
|
-
return result;
|
|
762
|
-
}
|
|
763
|
-
|
|
764
|
-
result += char;
|
|
765
|
-
this.pos++;
|
|
766
|
-
}
|
|
767
|
-
|
|
768
|
-
// Unclosed string - return what we have (lenient)
|
|
769
|
-
return result;
|
|
770
|
-
}
|
|
771
|
-
|
|
772
|
-
private parseNumber(): number {
|
|
773
|
-
const start: number = this.pos;
|
|
774
|
-
|
|
775
|
-
// Handle negative sign
|
|
776
|
-
if (this.input[this.pos] === "-") {
|
|
777
|
-
this.pos++;
|
|
778
|
-
}
|
|
779
|
-
|
|
780
|
-
// Parse integer part
|
|
781
|
-
while (
|
|
782
|
-
this.pos < this.input.length &&
|
|
783
|
-
this.input[this.pos]! >= "0" &&
|
|
784
|
-
this.input[this.pos]! <= "9"
|
|
785
|
-
) {
|
|
786
|
-
this.pos++;
|
|
787
|
-
}
|
|
788
|
-
|
|
789
|
-
// Parse decimal part
|
|
790
|
-
if (this.pos < this.input.length && this.input[this.pos] === ".") {
|
|
791
|
-
this.pos++;
|
|
792
|
-
while (
|
|
793
|
-
this.pos < this.input.length &&
|
|
794
|
-
this.input[this.pos]! >= "0" &&
|
|
795
|
-
this.input[this.pos]! <= "9"
|
|
796
|
-
) {
|
|
797
|
-
this.pos++;
|
|
798
|
-
}
|
|
799
|
-
}
|
|
800
|
-
|
|
801
|
-
// Parse exponent
|
|
802
|
-
if (
|
|
803
|
-
this.pos < this.input.length &&
|
|
804
|
-
(this.input[this.pos] === "e" || this.input[this.pos] === "E")
|
|
805
|
-
) {
|
|
806
|
-
this.pos++;
|
|
807
|
-
if (
|
|
808
|
-
this.pos < this.input.length &&
|
|
809
|
-
(this.input[this.pos] === "+" || this.input[this.pos] === "-")
|
|
810
|
-
) {
|
|
811
|
-
this.pos++;
|
|
812
|
-
}
|
|
813
|
-
while (
|
|
814
|
-
this.pos < this.input.length &&
|
|
815
|
-
this.input[this.pos]! >= "0" &&
|
|
816
|
-
this.input[this.pos]! <= "9"
|
|
817
|
-
) {
|
|
818
|
-
this.pos++;
|
|
819
|
-
}
|
|
820
|
-
}
|
|
821
|
-
|
|
822
|
-
const numStr: string = this.input.slice(start, this.pos);
|
|
823
|
-
const num: number = Number(numStr);
|
|
824
|
-
return Number.isNaN(num) ? 0 : num;
|
|
825
|
-
}
|
|
826
|
-
|
|
827
|
-
private isIdentifierStart(ch: string): boolean {
|
|
828
|
-
return (
|
|
829
|
-
(ch >= "a" && ch <= "z") ||
|
|
830
|
-
(ch >= "A" && ch <= "Z") ||
|
|
831
|
-
ch === "_" ||
|
|
832
|
-
ch === "$"
|
|
833
|
-
);
|
|
834
|
-
}
|
|
835
|
-
|
|
836
|
-
private isIdentifierChar(ch: string): boolean {
|
|
837
|
-
return (
|
|
838
|
-
(ch >= "a" && ch <= "z") ||
|
|
839
|
-
(ch >= "A" && ch <= "Z") ||
|
|
840
|
-
(ch >= "0" && ch <= "9") ||
|
|
841
|
-
ch === "_" ||
|
|
842
|
-
ch === "$"
|
|
843
|
-
);
|
|
844
|
-
}
|
|
845
|
-
|
|
846
|
-
private parseIdentifier(): string {
|
|
847
|
-
const start: number = this.pos;
|
|
848
|
-
while (
|
|
849
|
-
this.pos < this.input.length &&
|
|
850
|
-
this.isIdentifierChar(this.input[this.pos]!)
|
|
851
|
-
) {
|
|
852
|
-
this.pos++;
|
|
853
|
-
}
|
|
854
|
-
return this.input.slice(start, this.pos);
|
|
855
|
-
}
|
|
856
|
-
|
|
857
|
-
private skipWhitespace(): void {
|
|
858
|
-
while (this.pos < this.input.length) {
|
|
859
|
-
const ch: string = this.input[this.pos]!;
|
|
860
|
-
|
|
861
|
-
// Skip standard whitespace
|
|
862
|
-
if (ch === " " || ch === "\t" || ch === "\n" || ch === "\r") {
|
|
863
|
-
this.pos++;
|
|
864
|
-
continue;
|
|
865
|
-
}
|
|
866
|
-
|
|
867
|
-
// Skip single-line comment: // ...
|
|
868
|
-
if (
|
|
869
|
-
ch === "/" &&
|
|
870
|
-
this.pos + 1 < this.input.length &&
|
|
871
|
-
this.input[this.pos + 1] === "/"
|
|
872
|
-
) {
|
|
873
|
-
this.pos += 2;
|
|
874
|
-
while (
|
|
875
|
-
this.pos < this.input.length &&
|
|
876
|
-
this.input[this.pos] !== "\n" &&
|
|
877
|
-
this.input[this.pos] !== "\r"
|
|
878
|
-
) {
|
|
879
|
-
this.pos++;
|
|
880
|
-
}
|
|
881
|
-
continue;
|
|
882
|
-
}
|
|
883
|
-
|
|
884
|
-
// Skip multi-line comment: /* ... */
|
|
885
|
-
if (
|
|
886
|
-
ch === "/" &&
|
|
887
|
-
this.pos + 1 < this.input.length &&
|
|
888
|
-
this.input[this.pos + 1] === "*"
|
|
889
|
-
) {
|
|
890
|
-
this.pos += 2;
|
|
891
|
-
while (this.pos + 1 < this.input.length) {
|
|
892
|
-
if (
|
|
893
|
-
this.input[this.pos] === "*" &&
|
|
894
|
-
this.input[this.pos + 1] === "/"
|
|
895
|
-
) {
|
|
896
|
-
this.pos += 2;
|
|
897
|
-
break;
|
|
898
|
-
}
|
|
899
|
-
this.pos++;
|
|
900
|
-
}
|
|
901
|
-
// Handle unclosed comment - move to end
|
|
902
|
-
if (this.pos + 1 >= this.input.length) {
|
|
903
|
-
this.pos = this.input.length;
|
|
904
|
-
}
|
|
905
|
-
continue;
|
|
906
|
-
}
|
|
907
|
-
|
|
908
|
-
// Not whitespace or comment
|
|
909
|
-
break;
|
|
910
|
-
}
|
|
911
|
-
}
|
|
912
|
-
}
|
|
913
|
-
|
|
914
|
-
/**
|
|
915
|
-
* Maximum nesting depth to prevent stack overflow attacks.
|
|
916
|
-
*
|
|
917
|
-
* @internal
|
|
918
|
-
*/
|
|
919
|
-
const MAX_DEPTH: number = 512;
|
|
1
|
+
import { DeepPartial, IJsonParseResult } from "@typia/interface";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Parse lenient JSON that may be incomplete or malformed.
|
|
5
|
+
*
|
|
6
|
+
* Handles:
|
|
7
|
+
*
|
|
8
|
+
* - Unclosed brackets `{`, `[` - parses as much as possible
|
|
9
|
+
* - Trailing commas `[1, 2, ]` - ignores them
|
|
10
|
+
* - Unclosed strings `"hello` - returns partial string
|
|
11
|
+
* - Junk text before JSON (LLM often adds explanatory text)
|
|
12
|
+
* - Markdown code blocks (extracts content from `json ... `)
|
|
13
|
+
* - Incomplete keywords like `tru`, `fal`, `nul`
|
|
14
|
+
* - Unicode escape sequences including surrogate pairs (emoji)
|
|
15
|
+
* - JavaScript-style comments (single-line and multi-line)
|
|
16
|
+
* - Unquoted object keys (JavaScript identifier style)
|
|
17
|
+
*
|
|
18
|
+
* @param input Raw JSON string (potentially incomplete)
|
|
19
|
+
* @returns Parse result with data, original input, and any errors
|
|
20
|
+
* @internal
|
|
21
|
+
*/
|
|
22
|
+
export function parseLenientJson<T>(input: string): IJsonParseResult<T> {
|
|
23
|
+
// For safe guard
|
|
24
|
+
if (typeof input !== "string") input = String(input);
|
|
25
|
+
|
|
26
|
+
// Try native JSON.parse first (faster for valid JSON)
|
|
27
|
+
let error: Error | null = null;
|
|
28
|
+
try {
|
|
29
|
+
return {
|
|
30
|
+
success: true,
|
|
31
|
+
data: JSON.parse(input) as T,
|
|
32
|
+
};
|
|
33
|
+
} catch (e) {
|
|
34
|
+
// Fall back to lenient parser
|
|
35
|
+
error = e instanceof Error ? e : new Error(String(e));
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
try {
|
|
39
|
+
return iterate(input);
|
|
40
|
+
} catch {
|
|
41
|
+
// actually unreachable, maybe?
|
|
42
|
+
return {
|
|
43
|
+
success: false,
|
|
44
|
+
data: undefined as DeepPartial<T>,
|
|
45
|
+
input,
|
|
46
|
+
errors: [
|
|
47
|
+
{
|
|
48
|
+
path: "$input",
|
|
49
|
+
expected: "valid JSON",
|
|
50
|
+
description: error.message,
|
|
51
|
+
},
|
|
52
|
+
],
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function iterate<T>(input: string): IJsonParseResult<T> {
|
|
58
|
+
// Extract markdown code block if present
|
|
59
|
+
const codeBlockContent: string | null = extractMarkdownCodeBlock(input);
|
|
60
|
+
const jsonSource: string =
|
|
61
|
+
codeBlockContent !== null ? codeBlockContent : input;
|
|
62
|
+
|
|
63
|
+
// Check if input is empty or whitespace-only
|
|
64
|
+
const trimmed: string = jsonSource.trim();
|
|
65
|
+
if (trimmed.length === 0) {
|
|
66
|
+
return {
|
|
67
|
+
success: false,
|
|
68
|
+
data: undefined as DeepPartial<T>,
|
|
69
|
+
input,
|
|
70
|
+
errors: [
|
|
71
|
+
{
|
|
72
|
+
path: "$input",
|
|
73
|
+
expected: "JSON value",
|
|
74
|
+
description: "empty input",
|
|
75
|
+
},
|
|
76
|
+
],
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Check if input starts with a primitive value (no junk prefix skipping needed)
|
|
81
|
+
if (startsWithPrimitive(trimmed)) {
|
|
82
|
+
const errors: IJsonParseResult.IError[] = [];
|
|
83
|
+
const parser: LenientJsonParser = new LenientJsonParser(jsonSource, errors);
|
|
84
|
+
const data: unknown = parser.parse();
|
|
85
|
+
if (errors.length > 0) {
|
|
86
|
+
return { success: false, data: data as DeepPartial<T>, input, errors };
|
|
87
|
+
}
|
|
88
|
+
return { success: true, data: data as T };
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// Find JSON start position (skip junk prefix from LLM)
|
|
92
|
+
const jsonStart: number = findJsonStart(jsonSource);
|
|
93
|
+
if (jsonStart === -1) {
|
|
94
|
+
// No object/array found - check if there's a primitive after skipping comments
|
|
95
|
+
const skipped: string = skipCommentsAndWhitespace(jsonSource);
|
|
96
|
+
if (skipped.length > 0 && startsWithPrimitive(skipped)) {
|
|
97
|
+
const errors: IJsonParseResult.IError[] = [];
|
|
98
|
+
const parser: LenientJsonParser = new LenientJsonParser(
|
|
99
|
+
jsonSource,
|
|
100
|
+
errors,
|
|
101
|
+
);
|
|
102
|
+
const data: unknown = parser.parse();
|
|
103
|
+
if (errors.length > 0) {
|
|
104
|
+
return { success: false, data: data as DeepPartial<T>, input, errors };
|
|
105
|
+
}
|
|
106
|
+
return { success: true, data: data as T };
|
|
107
|
+
}
|
|
108
|
+
// No valid JSON found - return failure
|
|
109
|
+
return {
|
|
110
|
+
success: false,
|
|
111
|
+
data: undefined as DeepPartial<T>,
|
|
112
|
+
input,
|
|
113
|
+
errors: [
|
|
114
|
+
{
|
|
115
|
+
path: "$input",
|
|
116
|
+
expected: "JSON value",
|
|
117
|
+
description: jsonSource,
|
|
118
|
+
},
|
|
119
|
+
],
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// Extract JSON portion (skip junk prefix)
|
|
124
|
+
const jsonInput: string =
|
|
125
|
+
jsonStart > 0 ? jsonSource.slice(jsonStart) : jsonSource;
|
|
126
|
+
|
|
127
|
+
const errors: IJsonParseResult.IError[] = [];
|
|
128
|
+
const parser: LenientJsonParser = new LenientJsonParser(jsonInput, errors);
|
|
129
|
+
const data: unknown = parser.parse();
|
|
130
|
+
|
|
131
|
+
if (errors.length > 0) {
|
|
132
|
+
return {
|
|
133
|
+
success: false,
|
|
134
|
+
data: data as DeepPartial<T>,
|
|
135
|
+
input,
|
|
136
|
+
errors,
|
|
137
|
+
};
|
|
138
|
+
}
|
|
139
|
+
return {
|
|
140
|
+
success: true,
|
|
141
|
+
data: data as T,
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Check if a string is a valid 4-character hexadecimal string.
|
|
147
|
+
*
|
|
148
|
+
* @internal
|
|
149
|
+
*/
|
|
150
|
+
function isHexString(s: string): boolean {
|
|
151
|
+
if (s.length !== 4) return false;
|
|
152
|
+
for (let i = 0; i < 4; i++) {
|
|
153
|
+
const c: number = s.charCodeAt(i);
|
|
154
|
+
if (
|
|
155
|
+
!((c >= 48 && c <= 57) || (c >= 65 && c <= 70) || (c >= 97 && c <= 102))
|
|
156
|
+
) {
|
|
157
|
+
return false;
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
return true;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* Extract JSON content from markdown code block if present.
|
|
165
|
+
*
|
|
166
|
+
* LLM outputs often wrap JSON in markdown code blocks like:
|
|
167
|
+
*
|
|
168
|
+
* Here is your result:
|
|
169
|
+
*
|
|
170
|
+
* ```json
|
|
171
|
+
* { "name": "test" }
|
|
172
|
+
* ```
|
|
173
|
+
*
|
|
174
|
+
* This function extracts the content between the backticks.
|
|
175
|
+
*
|
|
176
|
+
* IMPORTANT: Only extracts if the input doesn't already start with JSON. If
|
|
177
|
+
* input (after trim) starts with `{`, `[`, or `"`, it's already JSON and any
|
|
178
|
+
* markdown inside is part of a string value.
|
|
179
|
+
*
|
|
180
|
+
* @param input Text that may contain markdown code block
|
|
181
|
+
* @returns Extracted content or null if no code block found
|
|
182
|
+
* @internal
|
|
183
|
+
*/
|
|
184
|
+
function extractMarkdownCodeBlock(input: string): string | null {
|
|
185
|
+
// Must be ```json specifically, not just ```
|
|
186
|
+
const codeBlockStart: number = input.indexOf("```json");
|
|
187
|
+
if (codeBlockStart === -1) return null;
|
|
188
|
+
|
|
189
|
+
// Check if input already starts with JSON (after trimming whitespace)
|
|
190
|
+
// If so, don't extract - the markdown is inside a JSON string value
|
|
191
|
+
const trimmed: string = input.trimStart();
|
|
192
|
+
if (trimmed.length > 0) {
|
|
193
|
+
const firstChar: string = trimmed[0]!;
|
|
194
|
+
if (firstChar === "{" || firstChar === "[" || firstChar === '"') {
|
|
195
|
+
return null;
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// Find the end of the opening line (after ```json)
|
|
200
|
+
let contentStart: number = codeBlockStart + 7; // length of "```json"
|
|
201
|
+
while (contentStart < input.length && input[contentStart] !== "\n") {
|
|
202
|
+
contentStart++;
|
|
203
|
+
}
|
|
204
|
+
if (contentStart >= input.length) return null;
|
|
205
|
+
contentStart++; // skip the newline
|
|
206
|
+
|
|
207
|
+
// Find the closing ```
|
|
208
|
+
const codeBlockEnd: number = input.indexOf("```", contentStart);
|
|
209
|
+
if (codeBlockEnd === -1) {
|
|
210
|
+
// No closing ``` - return everything after opening
|
|
211
|
+
return input.slice(contentStart);
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
return input.slice(contentStart, codeBlockEnd);
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
/**
|
|
218
|
+
* Find the start position of JSON object/array content in text that may have
|
|
219
|
+
* junk prefix.
|
|
220
|
+
*
|
|
221
|
+
* LLM outputs often contain text before JSON like:
|
|
222
|
+
*
|
|
223
|
+
* - "Here is your JSON: {"name": "test"}"
|
|
224
|
+
* - "Sure! [1, 2, 3]"
|
|
225
|
+
*
|
|
226
|
+
* This function skips over comments and strings to find the real JSON start.
|
|
227
|
+
* Primitive values (strings, numbers, booleans) are handled directly by the
|
|
228
|
+
* parser.
|
|
229
|
+
*
|
|
230
|
+
* @param input Text that may contain JSON with junk prefix
|
|
231
|
+
* @returns Index of first `{` or `[` outside comments/strings, or -1 if not
|
|
232
|
+
* found
|
|
233
|
+
* @internal
|
|
234
|
+
*/
|
|
235
|
+
function findJsonStart(input: string): number {
|
|
236
|
+
let pos: number = 0;
|
|
237
|
+
const len: number = input.length;
|
|
238
|
+
|
|
239
|
+
while (pos < len) {
|
|
240
|
+
const ch: string = input[pos]!;
|
|
241
|
+
|
|
242
|
+
// Found JSON start
|
|
243
|
+
if (ch === "{" || ch === "[") {
|
|
244
|
+
return pos;
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
// Skip single-line comment
|
|
248
|
+
if (ch === "/" && pos + 1 < len && input[pos + 1] === "/") {
|
|
249
|
+
pos += 2;
|
|
250
|
+
while (pos < len && input[pos] !== "\n" && input[pos] !== "\r") {
|
|
251
|
+
pos++;
|
|
252
|
+
}
|
|
253
|
+
continue;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
// Skip multi-line comment
|
|
257
|
+
if (ch === "/" && pos + 1 < len && input[pos + 1] === "*") {
|
|
258
|
+
pos += 2;
|
|
259
|
+
while (pos + 1 < len) {
|
|
260
|
+
if (input[pos] === "*" && input[pos + 1] === "/") {
|
|
261
|
+
pos += 2;
|
|
262
|
+
break;
|
|
263
|
+
}
|
|
264
|
+
pos++;
|
|
265
|
+
}
|
|
266
|
+
// If unclosed comment, move to end
|
|
267
|
+
if (pos + 1 >= len) {
|
|
268
|
+
pos = len;
|
|
269
|
+
}
|
|
270
|
+
continue;
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
// Skip string literal (to avoid matching { or [ inside strings)
|
|
274
|
+
if (ch === '"') {
|
|
275
|
+
pos++;
|
|
276
|
+
while (pos < len) {
|
|
277
|
+
if (input[pos] === "\\") {
|
|
278
|
+
pos += 2; // skip escape sequence
|
|
279
|
+
continue;
|
|
280
|
+
}
|
|
281
|
+
if (input[pos] === '"') {
|
|
282
|
+
pos++;
|
|
283
|
+
break;
|
|
284
|
+
}
|
|
285
|
+
pos++;
|
|
286
|
+
}
|
|
287
|
+
continue;
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
pos++;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
return -1;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
/**
|
|
297
|
+
* Skip leading comments and whitespace from input.
|
|
298
|
+
*
|
|
299
|
+
* @param input Text that may start with comments or whitespace
|
|
300
|
+
* @returns Input with leading comments and whitespace removed
|
|
301
|
+
* @internal
|
|
302
|
+
*/
|
|
303
|
+
function skipCommentsAndWhitespace(input: string): string {
|
|
304
|
+
let pos: number = 0;
|
|
305
|
+
const len: number = input.length;
|
|
306
|
+
|
|
307
|
+
while (pos < len) {
|
|
308
|
+
const ch: string = input[pos]!;
|
|
309
|
+
|
|
310
|
+
// Skip whitespace
|
|
311
|
+
if (ch === " " || ch === "\t" || ch === "\n" || ch === "\r") {
|
|
312
|
+
pos++;
|
|
313
|
+
continue;
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
// Skip single-line comment
|
|
317
|
+
if (ch === "/" && pos + 1 < len && input[pos + 1] === "/") {
|
|
318
|
+
pos += 2;
|
|
319
|
+
while (pos < len && input[pos] !== "\n" && input[pos] !== "\r") {
|
|
320
|
+
pos++;
|
|
321
|
+
}
|
|
322
|
+
continue;
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
// Skip multi-line comment
|
|
326
|
+
if (ch === "/" && pos + 1 < len && input[pos + 1] === "*") {
|
|
327
|
+
pos += 2;
|
|
328
|
+
while (pos + 1 < len) {
|
|
329
|
+
if (input[pos] === "*" && input[pos + 1] === "/") {
|
|
330
|
+
pos += 2;
|
|
331
|
+
break;
|
|
332
|
+
}
|
|
333
|
+
pos++;
|
|
334
|
+
}
|
|
335
|
+
if (pos + 1 >= len) {
|
|
336
|
+
pos = len;
|
|
337
|
+
}
|
|
338
|
+
continue;
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
// Not whitespace or comment
|
|
342
|
+
break;
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
return input.slice(pos);
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
/**
|
|
349
|
+
* Check if input starts with a valid JSON primitive token.
|
|
350
|
+
*
|
|
351
|
+
* @param input Trimmed input string
|
|
352
|
+
* @returns True if input starts with a primitive value
|
|
353
|
+
* @internal
|
|
354
|
+
*/
|
|
355
|
+
function startsWithPrimitive(input: string): boolean {
|
|
356
|
+
if (input.length === 0) return false;
|
|
357
|
+
const ch: string = input[0]!;
|
|
358
|
+
// String
|
|
359
|
+
if (ch === '"') return true;
|
|
360
|
+
// Number (digit or minus)
|
|
361
|
+
if ((ch >= "0" && ch <= "9") || ch === "-") return true;
|
|
362
|
+
// Keywords
|
|
363
|
+
if (
|
|
364
|
+
input.startsWith("true") ||
|
|
365
|
+
input.startsWith("false") ||
|
|
366
|
+
input.startsWith("null")
|
|
367
|
+
)
|
|
368
|
+
return true;
|
|
369
|
+
// Partial keywords (note: "null" requires at least 2 chars to match parseKeywordOrIdentifier logic)
|
|
370
|
+
if (
|
|
371
|
+
"true".startsWith(input) ||
|
|
372
|
+
"false".startsWith(input) ||
|
|
373
|
+
("null".startsWith(input) && input.length >= 2)
|
|
374
|
+
)
|
|
375
|
+
return true;
|
|
376
|
+
// Boolean string variants (note: "n" is intentionally excluded)
|
|
377
|
+
const lower: string = input.toLowerCase();
|
|
378
|
+
if (
|
|
379
|
+
lower === "yes" ||
|
|
380
|
+
lower === "y" ||
|
|
381
|
+
lower === "on" ||
|
|
382
|
+
lower === "no" ||
|
|
383
|
+
lower === "off"
|
|
384
|
+
)
|
|
385
|
+
return true;
|
|
386
|
+
return false;
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
/**
|
|
390
|
+
* Lenient JSON parser that handles incomplete JSON.
|
|
391
|
+
*
|
|
392
|
+
* @internal
|
|
393
|
+
*/
|
|
394
|
+
class LenientJsonParser {
|
|
395
|
+
private pos: number = 0;
|
|
396
|
+
private depth: number = 0;
|
|
397
|
+
private readonly input: string;
|
|
398
|
+
private readonly errors: IJsonParseResult.IError[];
|
|
399
|
+
|
|
400
|
+
constructor(input: string, errors: IJsonParseResult.IError[]) {
|
|
401
|
+
this.input = input;
|
|
402
|
+
this.errors = errors;
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
parse(): unknown {
|
|
406
|
+
this.skipWhitespace();
|
|
407
|
+
if (this.pos >= this.input.length) {
|
|
408
|
+
return undefined;
|
|
409
|
+
}
|
|
410
|
+
return this.parseValue("$input");
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
private parseValue(path: string): unknown {
|
|
414
|
+
this.skipWhitespace();
|
|
415
|
+
|
|
416
|
+
if (this.pos >= this.input.length) {
|
|
417
|
+
return undefined;
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
// Check for maximum depth to prevent stack overflow
|
|
421
|
+
if (this.depth >= MAX_DEPTH) {
|
|
422
|
+
this.errors.push({
|
|
423
|
+
path,
|
|
424
|
+
expected: "value (max depth exceeded)",
|
|
425
|
+
description: undefined,
|
|
426
|
+
});
|
|
427
|
+
return undefined;
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
const char: string = this.input[this.pos]!;
|
|
431
|
+
|
|
432
|
+
if (char === "{") return this.parseObject(path);
|
|
433
|
+
if (char === "[") return this.parseArray(path);
|
|
434
|
+
if (char === '"') return this.parseString();
|
|
435
|
+
if (char === "-" || (char >= "0" && char <= "9")) return this.parseNumber();
|
|
436
|
+
|
|
437
|
+
// Handle keywords (true, false, null) or invalid identifiers
|
|
438
|
+
if (this.isIdentifierStart(char)) {
|
|
439
|
+
return this.parseKeywordOrIdentifier(path);
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
// Don't skip structural characters - let the caller handle them
|
|
443
|
+
const ch: string = this.input[this.pos]!;
|
|
444
|
+
if (ch === "}" || ch === "]" || ch === ",") {
|
|
445
|
+
// Not an error - just no value here (e.g., {"a":} or [,])
|
|
446
|
+
return undefined;
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
this.errors.push({
|
|
450
|
+
path,
|
|
451
|
+
expected: "JSON value",
|
|
452
|
+
description: this.getErrorContext(),
|
|
453
|
+
});
|
|
454
|
+
// Skip the problematic character and try to continue
|
|
455
|
+
this.pos++;
|
|
456
|
+
return undefined;
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
private getErrorContext(): string {
|
|
460
|
+
// Get surrounding context for better error messages
|
|
461
|
+
const start: number = Math.max(0, this.pos - 10);
|
|
462
|
+
const end: number = Math.min(this.input.length, this.pos + 20);
|
|
463
|
+
const before: string = this.input.slice(start, this.pos);
|
|
464
|
+
const after: string = this.input.slice(this.pos, end);
|
|
465
|
+
return (
|
|
466
|
+
(start > 0 ? "..." : "") +
|
|
467
|
+
before +
|
|
468
|
+
"→" +
|
|
469
|
+
after +
|
|
470
|
+
(end < this.input.length ? "..." : "")
|
|
471
|
+
);
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
private parseKeywordOrIdentifier(path: string): unknown {
|
|
475
|
+
// Extract the token (sequence of identifier characters)
|
|
476
|
+
const start: number = this.pos;
|
|
477
|
+
while (
|
|
478
|
+
this.pos < this.input.length &&
|
|
479
|
+
this.isIdentifierChar(this.input[this.pos]!)
|
|
480
|
+
) {
|
|
481
|
+
this.pos++;
|
|
482
|
+
}
|
|
483
|
+
const token: string = this.input.slice(start, this.pos);
|
|
484
|
+
|
|
485
|
+
// Check for complete or partial keyword matches
|
|
486
|
+
if (token === "true") return true;
|
|
487
|
+
if (token === "false") return false;
|
|
488
|
+
if (token === "null") return null;
|
|
489
|
+
|
|
490
|
+
// Boolean string coercion: "yes", "y", "on" -> true, "no", "off" -> false
|
|
491
|
+
// Note: "n" is intentionally NOT handled (neither null nor false)
|
|
492
|
+
const lower: string = token.toLowerCase();
|
|
493
|
+
if (lower === "yes" || lower === "y" || lower === "on") return true;
|
|
494
|
+
if (lower === "no" || lower === "off") return false;
|
|
495
|
+
|
|
496
|
+
// Partial match for lenient parsing (e.g., "tru" -> true, "fal" -> false)
|
|
497
|
+
if ("true".startsWith(token) && token.length > 0) return true;
|
|
498
|
+
if ("false".startsWith(token) && token.length > 0) return false;
|
|
499
|
+
if ("null".startsWith(token) && token.length >= 2) return null;
|
|
500
|
+
|
|
501
|
+
// Check if this looks like a string with missing opening quote (e.g., abcdefg")
|
|
502
|
+
if (this.pos < this.input.length && this.input[this.pos] === '"') {
|
|
503
|
+
// Treat as unquoted string value - skip the errant closing quote and return as string
|
|
504
|
+
this.pos++; // skip the closing quote
|
|
505
|
+
this.errors.push({
|
|
506
|
+
path,
|
|
507
|
+
expected: "quoted string",
|
|
508
|
+
description: "missing opening quote for '" + token + "'",
|
|
509
|
+
});
|
|
510
|
+
return token;
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
// Invalid identifier as value - provide helpful error message
|
|
514
|
+
this.errors.push({
|
|
515
|
+
path,
|
|
516
|
+
expected: "JSON value (string, number, boolean, null, object, or array)",
|
|
517
|
+
description: "unquoted string '" + token + "' - did you forget quotes?",
|
|
518
|
+
});
|
|
519
|
+
// Skip to next comma, closing brace/bracket for recovery
|
|
520
|
+
this.skipToRecoveryPoint();
|
|
521
|
+
return undefined;
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
private skipToRecoveryPoint(): void {
|
|
525
|
+
while (this.pos < this.input.length) {
|
|
526
|
+
const ch: string = this.input[this.pos]!;
|
|
527
|
+
if (ch === "," || ch === "}" || ch === "]") {
|
|
528
|
+
return;
|
|
529
|
+
}
|
|
530
|
+
this.pos++;
|
|
531
|
+
}
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
private parseObject(path: string): Record<string, unknown> {
|
|
535
|
+
const result: Record<string, unknown> = {};
|
|
536
|
+
this.pos++; // skip '{'
|
|
537
|
+
this.depth++;
|
|
538
|
+
this.skipWhitespace();
|
|
539
|
+
|
|
540
|
+
while (this.pos < this.input.length) {
|
|
541
|
+
this.skipWhitespace();
|
|
542
|
+
|
|
543
|
+
// Handle end of object or end of input
|
|
544
|
+
if (this.pos >= this.input.length || this.input[this.pos] === "}") {
|
|
545
|
+
if (this.pos < this.input.length) this.pos++; // skip '}'
|
|
546
|
+
this.depth--;
|
|
547
|
+
return result;
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
// Skip trailing comma
|
|
551
|
+
if (this.input[this.pos] === ",") {
|
|
552
|
+
this.pos++;
|
|
553
|
+
this.skipWhitespace();
|
|
554
|
+
continue;
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
// Parse key (quoted string or unquoted identifier)
|
|
558
|
+
let key: string;
|
|
559
|
+
if (this.input[this.pos] === '"') {
|
|
560
|
+
key = this.parseString();
|
|
561
|
+
} else if (this.isIdentifierStart(this.input[this.pos]!)) {
|
|
562
|
+
key = this.parseIdentifier();
|
|
563
|
+
} else {
|
|
564
|
+
this.errors.push({
|
|
565
|
+
path,
|
|
566
|
+
expected: "string key",
|
|
567
|
+
description: this.input[this.pos],
|
|
568
|
+
});
|
|
569
|
+
// Try to recover by skipping to next meaningful character
|
|
570
|
+
this.depth--;
|
|
571
|
+
return result;
|
|
572
|
+
}
|
|
573
|
+
if (typeof key !== "string") {
|
|
574
|
+
this.depth--;
|
|
575
|
+
return result;
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
this.skipWhitespace();
|
|
579
|
+
|
|
580
|
+
// Expect colon - but if we're at end of input, it's just incomplete (not an error)
|
|
581
|
+
if (this.pos >= this.input.length) {
|
|
582
|
+
this.depth--;
|
|
583
|
+
return result;
|
|
584
|
+
}
|
|
585
|
+
if (this.input[this.pos] !== ":") {
|
|
586
|
+
this.errors.push({
|
|
587
|
+
path: path + "." + key,
|
|
588
|
+
expected: "':'",
|
|
589
|
+
description: this.input[this.pos],
|
|
590
|
+
});
|
|
591
|
+
this.depth--;
|
|
592
|
+
return result;
|
|
593
|
+
}
|
|
594
|
+
this.pos++; // skip ':'
|
|
595
|
+
|
|
596
|
+
this.skipWhitespace();
|
|
597
|
+
|
|
598
|
+
// Parse value
|
|
599
|
+
if (this.pos >= this.input.length) {
|
|
600
|
+
// No value - incomplete but not an error for lenient parsing
|
|
601
|
+
this.depth--;
|
|
602
|
+
return result;
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
const value: unknown = this.parseValue(path + "." + key);
|
|
606
|
+
result[key] = value;
|
|
607
|
+
|
|
608
|
+
this.skipWhitespace();
|
|
609
|
+
|
|
610
|
+
// Handle comma or end
|
|
611
|
+
if (this.pos < this.input.length && this.input[this.pos] === ",") {
|
|
612
|
+
this.pos++;
|
|
613
|
+
}
|
|
614
|
+
}
|
|
615
|
+
|
|
616
|
+
this.depth--;
|
|
617
|
+
return result;
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
private parseArray(path: string): unknown[] {
|
|
621
|
+
const result: unknown[] = [];
|
|
622
|
+
this.pos++; // skip '['
|
|
623
|
+
this.depth++;
|
|
624
|
+
this.skipWhitespace();
|
|
625
|
+
|
|
626
|
+
let index: number = 0;
|
|
627
|
+
while (this.pos < this.input.length) {
|
|
628
|
+
this.skipWhitespace();
|
|
629
|
+
|
|
630
|
+
// Handle end of array or end of input
|
|
631
|
+
if (this.pos >= this.input.length || this.input[this.pos] === "]") {
|
|
632
|
+
if (this.pos < this.input.length) this.pos++; // skip ']'
|
|
633
|
+
this.depth--;
|
|
634
|
+
return result;
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
// Skip trailing comma
|
|
638
|
+
if (this.input[this.pos] === ",") {
|
|
639
|
+
this.pos++;
|
|
640
|
+
this.skipWhitespace();
|
|
641
|
+
continue;
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
// Parse value
|
|
645
|
+
const prevPos: number = this.pos;
|
|
646
|
+
const value: unknown = this.parseValue(path + "[" + index + "]");
|
|
647
|
+
|
|
648
|
+
// Guard: if parseValue didn't advance, skip unexpected char to prevent infinite loop
|
|
649
|
+
if (this.pos === prevPos && this.pos < this.input.length) {
|
|
650
|
+
this.pos++;
|
|
651
|
+
continue;
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
result.push(value);
|
|
655
|
+
index++;
|
|
656
|
+
|
|
657
|
+
this.skipWhitespace();
|
|
658
|
+
|
|
659
|
+
// Handle comma or end
|
|
660
|
+
if (this.pos < this.input.length && this.input[this.pos] === ",") {
|
|
661
|
+
this.pos++;
|
|
662
|
+
}
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
this.depth--;
|
|
666
|
+
return result;
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
private parseString(): string {
|
|
670
|
+
this.pos++; // skip opening '"'
|
|
671
|
+
let result: string = "";
|
|
672
|
+
let escaped: boolean = false;
|
|
673
|
+
|
|
674
|
+
while (this.pos < this.input.length) {
|
|
675
|
+
const char: string = this.input[this.pos]!;
|
|
676
|
+
|
|
677
|
+
if (escaped) {
|
|
678
|
+
switch (char) {
|
|
679
|
+
case '"':
|
|
680
|
+
result += '"';
|
|
681
|
+
break;
|
|
682
|
+
case "\\":
|
|
683
|
+
result += "\\";
|
|
684
|
+
break;
|
|
685
|
+
case "/":
|
|
686
|
+
result += "/";
|
|
687
|
+
break;
|
|
688
|
+
case "b":
|
|
689
|
+
result += "\b";
|
|
690
|
+
break;
|
|
691
|
+
case "f":
|
|
692
|
+
result += "\f";
|
|
693
|
+
break;
|
|
694
|
+
case "n":
|
|
695
|
+
result += "\n";
|
|
696
|
+
break;
|
|
697
|
+
case "r":
|
|
698
|
+
result += "\r";
|
|
699
|
+
break;
|
|
700
|
+
case "t":
|
|
701
|
+
result += "\t";
|
|
702
|
+
break;
|
|
703
|
+
case "u":
|
|
704
|
+
// Parse unicode escape
|
|
705
|
+
if (this.pos + 4 <= this.input.length) {
|
|
706
|
+
const hex: string = this.input.slice(this.pos + 1, this.pos + 5);
|
|
707
|
+
if (isHexString(hex)) {
|
|
708
|
+
const highCode: number = parseInt(hex, 16);
|
|
709
|
+
this.pos += 4;
|
|
710
|
+
|
|
711
|
+
// Check for surrogate pair (emoji and characters > U+FFFF)
|
|
712
|
+
if (
|
|
713
|
+
highCode >= 0xd800 &&
|
|
714
|
+
highCode <= 0xdbff &&
|
|
715
|
+
this.pos + 6 <= this.input.length &&
|
|
716
|
+
this.input[this.pos + 1] === "\\" &&
|
|
717
|
+
this.input[this.pos + 2] === "u"
|
|
718
|
+
) {
|
|
719
|
+
const lowHex: string = this.input.slice(
|
|
720
|
+
this.pos + 3,
|
|
721
|
+
this.pos + 7,
|
|
722
|
+
);
|
|
723
|
+
if (isHexString(lowHex)) {
|
|
724
|
+
const lowCode: number = parseInt(lowHex, 16);
|
|
725
|
+
if (lowCode >= 0xdc00 && lowCode <= 0xdfff) {
|
|
726
|
+
result += String.fromCharCode(highCode, lowCode);
|
|
727
|
+
this.pos += 6;
|
|
728
|
+
break;
|
|
729
|
+
}
|
|
730
|
+
}
|
|
731
|
+
}
|
|
732
|
+
result += String.fromCharCode(highCode);
|
|
733
|
+
} else {
|
|
734
|
+
// Invalid hex - preserve escape sequence literally
|
|
735
|
+
result += "\\u" + hex;
|
|
736
|
+
this.pos += 4;
|
|
737
|
+
}
|
|
738
|
+
} else {
|
|
739
|
+
// Incomplete unicode escape - add partial sequence
|
|
740
|
+
const partial: string = this.input.slice(this.pos + 1);
|
|
741
|
+
result += "\\u" + partial;
|
|
742
|
+
this.pos = this.input.length - 1;
|
|
743
|
+
}
|
|
744
|
+
break;
|
|
745
|
+
default:
|
|
746
|
+
result += char;
|
|
747
|
+
}
|
|
748
|
+
escaped = false;
|
|
749
|
+
this.pos++;
|
|
750
|
+
continue;
|
|
751
|
+
}
|
|
752
|
+
|
|
753
|
+
if (char === "\\") {
|
|
754
|
+
escaped = true;
|
|
755
|
+
this.pos++;
|
|
756
|
+
continue;
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
if (char === '"') {
|
|
760
|
+
this.pos++; // skip closing '"'
|
|
761
|
+
return result;
|
|
762
|
+
}
|
|
763
|
+
|
|
764
|
+
result += char;
|
|
765
|
+
this.pos++;
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
// Unclosed string - return what we have (lenient)
|
|
769
|
+
return result;
|
|
770
|
+
}
|
|
771
|
+
|
|
772
|
+
private parseNumber(): number {
|
|
773
|
+
const start: number = this.pos;
|
|
774
|
+
|
|
775
|
+
// Handle negative sign
|
|
776
|
+
if (this.input[this.pos] === "-") {
|
|
777
|
+
this.pos++;
|
|
778
|
+
}
|
|
779
|
+
|
|
780
|
+
// Parse integer part
|
|
781
|
+
while (
|
|
782
|
+
this.pos < this.input.length &&
|
|
783
|
+
this.input[this.pos]! >= "0" &&
|
|
784
|
+
this.input[this.pos]! <= "9"
|
|
785
|
+
) {
|
|
786
|
+
this.pos++;
|
|
787
|
+
}
|
|
788
|
+
|
|
789
|
+
// Parse decimal part
|
|
790
|
+
if (this.pos < this.input.length && this.input[this.pos] === ".") {
|
|
791
|
+
this.pos++;
|
|
792
|
+
while (
|
|
793
|
+
this.pos < this.input.length &&
|
|
794
|
+
this.input[this.pos]! >= "0" &&
|
|
795
|
+
this.input[this.pos]! <= "9"
|
|
796
|
+
) {
|
|
797
|
+
this.pos++;
|
|
798
|
+
}
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
// Parse exponent
|
|
802
|
+
if (
|
|
803
|
+
this.pos < this.input.length &&
|
|
804
|
+
(this.input[this.pos] === "e" || this.input[this.pos] === "E")
|
|
805
|
+
) {
|
|
806
|
+
this.pos++;
|
|
807
|
+
if (
|
|
808
|
+
this.pos < this.input.length &&
|
|
809
|
+
(this.input[this.pos] === "+" || this.input[this.pos] === "-")
|
|
810
|
+
) {
|
|
811
|
+
this.pos++;
|
|
812
|
+
}
|
|
813
|
+
while (
|
|
814
|
+
this.pos < this.input.length &&
|
|
815
|
+
this.input[this.pos]! >= "0" &&
|
|
816
|
+
this.input[this.pos]! <= "9"
|
|
817
|
+
) {
|
|
818
|
+
this.pos++;
|
|
819
|
+
}
|
|
820
|
+
}
|
|
821
|
+
|
|
822
|
+
const numStr: string = this.input.slice(start, this.pos);
|
|
823
|
+
const num: number = Number(numStr);
|
|
824
|
+
return Number.isNaN(num) ? 0 : num;
|
|
825
|
+
}
|
|
826
|
+
|
|
827
|
+
private isIdentifierStart(ch: string): boolean {
|
|
828
|
+
return (
|
|
829
|
+
(ch >= "a" && ch <= "z") ||
|
|
830
|
+
(ch >= "A" && ch <= "Z") ||
|
|
831
|
+
ch === "_" ||
|
|
832
|
+
ch === "$"
|
|
833
|
+
);
|
|
834
|
+
}
|
|
835
|
+
|
|
836
|
+
private isIdentifierChar(ch: string): boolean {
|
|
837
|
+
return (
|
|
838
|
+
(ch >= "a" && ch <= "z") ||
|
|
839
|
+
(ch >= "A" && ch <= "Z") ||
|
|
840
|
+
(ch >= "0" && ch <= "9") ||
|
|
841
|
+
ch === "_" ||
|
|
842
|
+
ch === "$"
|
|
843
|
+
);
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
private parseIdentifier(): string {
|
|
847
|
+
const start: number = this.pos;
|
|
848
|
+
while (
|
|
849
|
+
this.pos < this.input.length &&
|
|
850
|
+
this.isIdentifierChar(this.input[this.pos]!)
|
|
851
|
+
) {
|
|
852
|
+
this.pos++;
|
|
853
|
+
}
|
|
854
|
+
return this.input.slice(start, this.pos);
|
|
855
|
+
}
|
|
856
|
+
|
|
857
|
+
private skipWhitespace(): void {
|
|
858
|
+
while (this.pos < this.input.length) {
|
|
859
|
+
const ch: string = this.input[this.pos]!;
|
|
860
|
+
|
|
861
|
+
// Skip standard whitespace
|
|
862
|
+
if (ch === " " || ch === "\t" || ch === "\n" || ch === "\r") {
|
|
863
|
+
this.pos++;
|
|
864
|
+
continue;
|
|
865
|
+
}
|
|
866
|
+
|
|
867
|
+
// Skip single-line comment: // ...
|
|
868
|
+
if (
|
|
869
|
+
ch === "/" &&
|
|
870
|
+
this.pos + 1 < this.input.length &&
|
|
871
|
+
this.input[this.pos + 1] === "/"
|
|
872
|
+
) {
|
|
873
|
+
this.pos += 2;
|
|
874
|
+
while (
|
|
875
|
+
this.pos < this.input.length &&
|
|
876
|
+
this.input[this.pos] !== "\n" &&
|
|
877
|
+
this.input[this.pos] !== "\r"
|
|
878
|
+
) {
|
|
879
|
+
this.pos++;
|
|
880
|
+
}
|
|
881
|
+
continue;
|
|
882
|
+
}
|
|
883
|
+
|
|
884
|
+
// Skip multi-line comment: /* ... */
|
|
885
|
+
if (
|
|
886
|
+
ch === "/" &&
|
|
887
|
+
this.pos + 1 < this.input.length &&
|
|
888
|
+
this.input[this.pos + 1] === "*"
|
|
889
|
+
) {
|
|
890
|
+
this.pos += 2;
|
|
891
|
+
while (this.pos + 1 < this.input.length) {
|
|
892
|
+
if (
|
|
893
|
+
this.input[this.pos] === "*" &&
|
|
894
|
+
this.input[this.pos + 1] === "/"
|
|
895
|
+
) {
|
|
896
|
+
this.pos += 2;
|
|
897
|
+
break;
|
|
898
|
+
}
|
|
899
|
+
this.pos++;
|
|
900
|
+
}
|
|
901
|
+
// Handle unclosed comment - move to end
|
|
902
|
+
if (this.pos + 1 >= this.input.length) {
|
|
903
|
+
this.pos = this.input.length;
|
|
904
|
+
}
|
|
905
|
+
continue;
|
|
906
|
+
}
|
|
907
|
+
|
|
908
|
+
// Not whitespace or comment
|
|
909
|
+
break;
|
|
910
|
+
}
|
|
911
|
+
}
|
|
912
|
+
}
|
|
913
|
+
|
|
914
|
+
/**
|
|
915
|
+
* Maximum nesting depth to prevent stack overflow attacks.
|
|
916
|
+
*
|
|
917
|
+
* @internal
|
|
918
|
+
*/
|
|
919
|
+
const MAX_DEPTH: number = 512;
|