@typia/utils 12.0.0-dev.20260307-2 → 12.0.0-dev.20260309

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,122 @@
1
1
  import { DeepPartial, IJsonParseResult } from "@typia/interface";
2
2
 
3
+ /**
4
+ * Parse lenient JSON that may be incomplete or malformed.
5
+ *
6
+ * Handles:
7
+ *
8
+ * - Unclosed brackets `{`, `[` - parses as much as possible
9
+ * - Trailing commas `[1, 2, ]` - ignores them
10
+ * - Unclosed strings `"hello` - returns partial string
11
+ * - Junk text before JSON (LLM often adds explanatory text)
12
+ * - Markdown code blocks (extracts content from `json ... `)
13
+ * - Incomplete keywords like `tru`, `fal`, `nul`
14
+ * - Unicode escape sequences including surrogate pairs (emoji)
15
+ * - JavaScript-style comments (single-line and multi-line)
16
+ * - Unquoted object keys (JavaScript identifier style)
17
+ *
18
+ * @param input Raw JSON string (potentially incomplete)
19
+ * @returns Parse result with data, original input, and any errors
20
+ * @internal
21
+ */
22
+ export function parseLenientJson<T>(input: string): IJsonParseResult<T> {
23
+ // Try native JSON.parse first (faster for valid JSON)
24
+ try {
25
+ return {
26
+ success: true,
27
+ data: JSON.parse(input) as T,
28
+ };
29
+ } catch {
30
+ // Fall back to lenient parser
31
+ }
32
+
33
+ // Extract markdown code block if present
34
+ const codeBlockContent: string | null = extractMarkdownCodeBlock(input);
35
+ const jsonSource: string =
36
+ codeBlockContent !== null ? codeBlockContent : input;
37
+
38
+ // Check if input is empty or whitespace-only
39
+ const trimmed: string = jsonSource.trim();
40
+ if (trimmed.length === 0) {
41
+ return {
42
+ success: false,
43
+ data: undefined as DeepPartial<T>,
44
+ input,
45
+ errors: [
46
+ {
47
+ path: "$input",
48
+ expected: "JSON value",
49
+ value: "empty input",
50
+ },
51
+ ],
52
+ };
53
+ }
54
+
55
+ // Check if input starts with a primitive value (no junk prefix skipping needed)
56
+ if (startsWithPrimitive(trimmed)) {
57
+ const errors: IJsonParseResult.IError[] = [];
58
+ const parser: LenientJsonParser = new LenientJsonParser(jsonSource, errors);
59
+ const data: unknown = parser.parse();
60
+ if (errors.length > 0) {
61
+ return { success: false, data: data as DeepPartial<T>, input, errors };
62
+ }
63
+ return { success: true, data: data as T };
64
+ }
65
+
66
+ // Find JSON start position (skip junk prefix from LLM)
67
+ const jsonStart: number = findJsonStart(jsonSource);
68
+ if (jsonStart === -1) {
69
+ // No object/array found - check if there's a primitive after skipping comments
70
+ const skipped: string = skipCommentsAndWhitespace(jsonSource);
71
+ if (skipped.length > 0 && startsWithPrimitive(skipped)) {
72
+ const errors: IJsonParseResult.IError[] = [];
73
+ const parser: LenientJsonParser = new LenientJsonParser(
74
+ jsonSource,
75
+ errors,
76
+ );
77
+ const data: unknown = parser.parse();
78
+ if (errors.length > 0) {
79
+ return { success: false, data: data as DeepPartial<T>, input, errors };
80
+ }
81
+ return { success: true, data: data as T };
82
+ }
83
+ // No valid JSON found - return failure
84
+ return {
85
+ success: false,
86
+ data: undefined as DeepPartial<T>,
87
+ input,
88
+ errors: [
89
+ {
90
+ path: "$input",
91
+ expected: "JSON value",
92
+ value: jsonSource,
93
+ },
94
+ ],
95
+ };
96
+ }
97
+
98
+ // Extract JSON portion (skip junk prefix)
99
+ const jsonInput: string =
100
+ jsonStart > 0 ? jsonSource.slice(jsonStart) : jsonSource;
101
+
102
+ const errors: IJsonParseResult.IError[] = [];
103
+ const parser: LenientJsonParser = new LenientJsonParser(jsonInput, errors);
104
+ const data: unknown = parser.parse();
105
+
106
+ if (errors.length > 0) {
107
+ return {
108
+ success: false,
109
+ data: data as DeepPartial<T>,
110
+ input,
111
+ errors,
112
+ };
113
+ }
114
+ return {
115
+ success: true,
116
+ data: data as T,
117
+ };
118
+ }
119
+
3
120
  /**
4
121
  * Maximum nesting depth to prevent stack overflow attacks.
5
122
  *
@@ -88,21 +205,126 @@ function extractMarkdownCodeBlock(input: string): string | null {
88
205
  * - "Here is your JSON: {"name": "test"}"
89
206
  * - "Sure! [1, 2, 3]"
90
207
  *
91
- * This function only looks for `{` or `[` to skip junk prefix. Primitive values
92
- * (strings, numbers, booleans) are handled directly by the parser.
208
+ * This function skips over comments and strings to find the real JSON start.
209
+ * Primitive values (strings, numbers, booleans) are handled directly by the
210
+ * parser.
93
211
  *
94
212
  * @param input Text that may contain JSON with junk prefix
95
- * @returns Index of first `{` or `[`, or -1 if not found
213
+ * @returns Index of first `{` or `[` outside comments/strings, or -1 if not
214
+ * found
96
215
  * @internal
97
216
  */
98
217
  function findJsonStart(input: string): number {
99
- const objStart: number = input.indexOf("{");
100
- const arrStart: number = input.indexOf("[");
218
+ let pos: number = 0;
219
+ const len: number = input.length;
220
+
221
+ while (pos < len) {
222
+ const ch: string = input[pos]!;
223
+
224
+ // Found JSON start
225
+ if (ch === "{" || ch === "[") {
226
+ return pos;
227
+ }
228
+
229
+ // Skip single-line comment
230
+ if (ch === "/" && pos + 1 < len && input[pos + 1] === "/") {
231
+ pos += 2;
232
+ while (pos < len && input[pos] !== "\n" && input[pos] !== "\r") {
233
+ pos++;
234
+ }
235
+ continue;
236
+ }
237
+
238
+ // Skip multi-line comment
239
+ if (ch === "/" && pos + 1 < len && input[pos + 1] === "*") {
240
+ pos += 2;
241
+ while (pos + 1 < len) {
242
+ if (input[pos] === "*" && input[pos + 1] === "/") {
243
+ pos += 2;
244
+ break;
245
+ }
246
+ pos++;
247
+ }
248
+ // If unclosed comment, move to end
249
+ if (pos + 1 >= len) {
250
+ pos = len;
251
+ }
252
+ continue;
253
+ }
254
+
255
+ // Skip string literal (to avoid matching { or [ inside strings)
256
+ if (ch === '"') {
257
+ pos++;
258
+ while (pos < len) {
259
+ if (input[pos] === "\\") {
260
+ pos += 2; // skip escape sequence
261
+ continue;
262
+ }
263
+ if (input[pos] === '"') {
264
+ pos++;
265
+ break;
266
+ }
267
+ pos++;
268
+ }
269
+ continue;
270
+ }
271
+
272
+ pos++;
273
+ }
274
+
275
+ return -1;
276
+ }
277
+
278
+ /**
279
+ * Skip leading comments and whitespace from input.
280
+ *
281
+ * @param input Text that may start with comments or whitespace
282
+ * @returns Input with leading comments and whitespace removed
283
+ * @internal
284
+ */
285
+ function skipCommentsAndWhitespace(input: string): string {
286
+ let pos: number = 0;
287
+ const len: number = input.length;
101
288
 
102
- if (objStart === -1 && arrStart === -1) return -1;
103
- if (objStart === -1) return arrStart;
104
- if (arrStart === -1) return objStart;
105
- return Math.min(objStart, arrStart);
289
+ while (pos < len) {
290
+ const ch: string = input[pos]!;
291
+
292
+ // Skip whitespace
293
+ if (ch === " " || ch === "\t" || ch === "\n" || ch === "\r") {
294
+ pos++;
295
+ continue;
296
+ }
297
+
298
+ // Skip single-line comment
299
+ if (ch === "/" && pos + 1 < len && input[pos + 1] === "/") {
300
+ pos += 2;
301
+ while (pos < len && input[pos] !== "\n" && input[pos] !== "\r") {
302
+ pos++;
303
+ }
304
+ continue;
305
+ }
306
+
307
+ // Skip multi-line comment
308
+ if (ch === "/" && pos + 1 < len && input[pos + 1] === "*") {
309
+ pos += 2;
310
+ while (pos + 1 < len) {
311
+ if (input[pos] === "*" && input[pos + 1] === "/") {
312
+ pos += 2;
313
+ break;
314
+ }
315
+ pos++;
316
+ }
317
+ if (pos + 1 >= len) {
318
+ pos = len;
319
+ }
320
+ continue;
321
+ }
322
+
323
+ // Not whitespace or comment
324
+ break;
325
+ }
326
+
327
+ return input.slice(pos);
106
328
  }
107
329
 
108
330
  /**
@@ -126,106 +348,26 @@ function startsWithPrimitive(input: string): boolean {
126
348
  input.startsWith("null")
127
349
  )
128
350
  return true;
129
- // Partial keywords
351
+ // Partial keywords (note: "null" requires at least 2 chars to match parseKeywordOrIdentifier logic)
130
352
  if (
131
353
  "true".startsWith(input) ||
132
354
  "false".startsWith(input) ||
133
- "null".startsWith(input)
355
+ ("null".startsWith(input) && input.length >= 2)
356
+ )
357
+ return true;
358
+ // Boolean string variants (note: "n" is intentionally excluded)
359
+ const lower: string = input.toLowerCase();
360
+ if (
361
+ lower === "yes" ||
362
+ lower === "y" ||
363
+ lower === "on" ||
364
+ lower === "no" ||
365
+ lower === "off"
134
366
  )
135
367
  return true;
136
368
  return false;
137
369
  }
138
370
 
139
- /**
140
- * Parse lenient JSON that may be incomplete or malformed.
141
- *
142
- * Handles:
143
- *
144
- * - Unclosed brackets `{`, `[` - parses as much as possible
145
- * - Trailing commas `[1, 2, ]` - ignores them
146
- * - Unclosed strings `"hello` - returns partial string
147
- * - Junk text before JSON (LLM often adds explanatory text)
148
- * - Markdown code blocks (extracts content from `json ... `)
149
- * - Incomplete keywords like `tru`, `fal`, `nul`
150
- * - Unicode escape sequences including surrogate pairs (emoji)
151
- * - JavaScript-style comments (single-line and multi-line)
152
- * - Unquoted object keys (JavaScript identifier style)
153
- *
154
- * @param input Raw JSON string (potentially incomplete)
155
- * @returns Parse result with data, original input, and any errors
156
- * @internal
157
- */
158
- export function parseLenientJson<T>(input: string): IJsonParseResult<T> {
159
- // Try native JSON.parse first (faster for valid JSON)
160
- try {
161
- return {
162
- success: true,
163
- data: JSON.parse(input) as T,
164
- };
165
- } catch {
166
- // Fall back to lenient parser
167
- }
168
-
169
- // Extract markdown code block if present
170
- const codeBlockContent: string | null = extractMarkdownCodeBlock(input);
171
- const jsonSource: string =
172
- codeBlockContent !== null ? codeBlockContent : input;
173
-
174
- // Check if input is empty or whitespace-only
175
- const trimmed: string = jsonSource.trim();
176
- if (trimmed.length === 0) {
177
- const errors: IJsonParseResult.IError[] = [];
178
- const parser: LenientJsonParser = new LenientJsonParser(jsonSource, errors);
179
- const data: unknown = parser.parse();
180
- if (errors.length > 0) {
181
- return { success: false, data: data as DeepPartial<T>, input, errors };
182
- }
183
- return { success: true, data: data as T };
184
- }
185
-
186
- // Check if input starts with a primitive value (no junk prefix skipping needed)
187
- if (startsWithPrimitive(trimmed)) {
188
- const errors: IJsonParseResult.IError[] = [];
189
- const parser: LenientJsonParser = new LenientJsonParser(jsonSource, errors);
190
- const data: unknown = parser.parse();
191
- if (errors.length > 0) {
192
- return { success: false, data: data as DeepPartial<T>, input, errors };
193
- }
194
- return { success: true, data: data as T };
195
- }
196
-
197
- // Find JSON start position (skip junk prefix from LLM)
198
- const jsonStart: number = findJsonStart(jsonSource);
199
- if (jsonStart === -1) {
200
- // No JSON found - return empty object for lenient behavior
201
- return {
202
- success: true,
203
- data: {} as T,
204
- };
205
- }
206
-
207
- // Extract JSON portion (skip junk prefix)
208
- const jsonInput: string =
209
- jsonStart > 0 ? jsonSource.slice(jsonStart) : jsonSource;
210
-
211
- const errors: IJsonParseResult.IError[] = [];
212
- const parser: LenientJsonParser = new LenientJsonParser(jsonInput, errors);
213
- const data: unknown = parser.parse();
214
-
215
- if (errors.length > 0) {
216
- return {
217
- success: false,
218
- data: data as DeepPartial<T>,
219
- input,
220
- errors,
221
- };
222
- }
223
- return {
224
- success: true,
225
- data: data as T,
226
- };
227
- }
228
-
229
371
  /**
230
372
  * Lenient JSON parser that handles incomplete JSON.
231
373
  *
@@ -279,6 +421,13 @@ class LenientJsonParser {
279
421
  return this.parseKeywordOrIdentifier(path);
280
422
  }
281
423
 
424
+ // Don't skip structural characters - let the caller handle them
425
+ const ch: string = this.input[this.pos]!;
426
+ if (ch === "}" || ch === "]" || ch === ",") {
427
+ // Not an error - just no value here (e.g., {"a":} or [,])
428
+ return undefined;
429
+ }
430
+
282
431
  this.errors.push({
283
432
  path,
284
433
  expected: "JSON value",
@@ -320,10 +469,16 @@ class LenientJsonParser {
320
469
  if (token === "false") return false;
321
470
  if (token === "null") return null;
322
471
 
472
+ // Boolean string coercion: "yes", "y", "on" -> true, "no", "off" -> false
473
+ // Note: "n" is intentionally NOT handled (neither null nor false)
474
+ const lower: string = token.toLowerCase();
475
+ if (lower === "yes" || lower === "y" || lower === "on") return true;
476
+ if (lower === "no" || lower === "off") return false;
477
+
323
478
  // Partial match for lenient parsing (e.g., "tru" -> true, "fal" -> false)
324
479
  if ("true".startsWith(token) && token.length > 0) return true;
325
480
  if ("false".startsWith(token) && token.length > 0) return false;
326
- if ("null".startsWith(token) && token.length > 0) return null;
481
+ if ("null".startsWith(token) && token.length >= 2) return null;
327
482
 
328
483
  // Check if this looks like a string with missing opening quote (e.g., abcdefg")
329
484
  if (this.pos < this.input.length && this.input[this.pos] === '"') {
@@ -469,7 +624,15 @@ class LenientJsonParser {
469
624
  }
470
625
 
471
626
  // Parse value
627
+ const prevPos: number = this.pos;
472
628
  const value: unknown = this.parseValue(path + "[" + index + "]");
629
+
630
+ // Guard: if parseValue didn't advance, skip unexpected char to prevent infinite loop
631
+ if (this.pos === prevPos && this.pos < this.input.length) {
632
+ this.pos++;
633
+ continue;
634
+ }
635
+
473
636
  result.push(value);
474
637
  index++;
475
638