@typia/utils 12.0.0-dev.20260307-2 → 12.0.0-dev.20260309
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/lib/http/internal/HttpLlmApplicationComposer.js +1 -0
- package/lib/http/internal/HttpLlmApplicationComposer.js.map +1 -1
- package/lib/http/internal/HttpLlmApplicationComposer.mjs +2 -5
- package/lib/http/internal/HttpLlmApplicationComposer.mjs.map +1 -1
- package/lib/index.mjs +9 -9
- package/lib/utils/LlmJson.d.ts +3 -3
- package/lib/utils/LlmJson.js +2 -2
- package/lib/utils/LlmJson.js.map +1 -1
- package/lib/utils/LlmJson.mjs +4 -11
- package/lib/utils/LlmJson.mjs.map +1 -1
- package/lib/utils/internal/coerceLlmArguments.js +17 -1
- package/lib/utils/internal/coerceLlmArguments.js.map +1 -1
- package/lib/utils/internal/coerceLlmArguments.mjs +17 -1
- package/lib/utils/internal/coerceLlmArguments.mjs.map +1 -1
- package/lib/utils/internal/parseLenientJson.js +236 -96
- package/lib/utils/internal/parseLenientJson.js.map +1 -1
- package/lib/utils/internal/parseLenientJson.mjs +236 -96
- package/lib/utils/internal/parseLenientJson.mjs.map +1 -1
- package/lib/validators/internal/OpenApiOneOfValidator.mjs +1 -5
- package/lib/validators/internal/OpenApiOneOfValidator.mjs.map +1 -1
- package/package.json +2 -2
- package/src/http/internal/HttpLlmApplicationComposer.ts +1 -0
- package/src/utils/LlmJson.ts +3 -3
- package/src/utils/internal/coerceLlmArguments.ts +19 -2
- package/src/utils/internal/parseLenientJson.ts +265 -102
|
@@ -1,5 +1,122 @@
|
|
|
1
1
|
import { DeepPartial, IJsonParseResult } from "@typia/interface";
|
|
2
2
|
|
|
3
|
+
/**
|
|
4
|
+
* Parse lenient JSON that may be incomplete or malformed.
|
|
5
|
+
*
|
|
6
|
+
* Handles:
|
|
7
|
+
*
|
|
8
|
+
* - Unclosed brackets `{`, `[` - parses as much as possible
|
|
9
|
+
* - Trailing commas `[1, 2, ]` - ignores them
|
|
10
|
+
* - Unclosed strings `"hello` - returns partial string
|
|
11
|
+
* - Junk text before JSON (LLM often adds explanatory text)
|
|
12
|
+
* - Markdown code blocks (extracts content from `json ... `)
|
|
13
|
+
* - Incomplete keywords like `tru`, `fal`, `nul`
|
|
14
|
+
* - Unicode escape sequences including surrogate pairs (emoji)
|
|
15
|
+
* - JavaScript-style comments (single-line and multi-line)
|
|
16
|
+
* - Unquoted object keys (JavaScript identifier style)
|
|
17
|
+
*
|
|
18
|
+
* @param input Raw JSON string (potentially incomplete)
|
|
19
|
+
* @returns Parse result with data, original input, and any errors
|
|
20
|
+
* @internal
|
|
21
|
+
*/
|
|
22
|
+
export function parseLenientJson<T>(input: string): IJsonParseResult<T> {
|
|
23
|
+
// Try native JSON.parse first (faster for valid JSON)
|
|
24
|
+
try {
|
|
25
|
+
return {
|
|
26
|
+
success: true,
|
|
27
|
+
data: JSON.parse(input) as T,
|
|
28
|
+
};
|
|
29
|
+
} catch {
|
|
30
|
+
// Fall back to lenient parser
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// Extract markdown code block if present
|
|
34
|
+
const codeBlockContent: string | null = extractMarkdownCodeBlock(input);
|
|
35
|
+
const jsonSource: string =
|
|
36
|
+
codeBlockContent !== null ? codeBlockContent : input;
|
|
37
|
+
|
|
38
|
+
// Check if input is empty or whitespace-only
|
|
39
|
+
const trimmed: string = jsonSource.trim();
|
|
40
|
+
if (trimmed.length === 0) {
|
|
41
|
+
return {
|
|
42
|
+
success: false,
|
|
43
|
+
data: undefined as DeepPartial<T>,
|
|
44
|
+
input,
|
|
45
|
+
errors: [
|
|
46
|
+
{
|
|
47
|
+
path: "$input",
|
|
48
|
+
expected: "JSON value",
|
|
49
|
+
value: "empty input",
|
|
50
|
+
},
|
|
51
|
+
],
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// Check if input starts with a primitive value (no junk prefix skipping needed)
|
|
56
|
+
if (startsWithPrimitive(trimmed)) {
|
|
57
|
+
const errors: IJsonParseResult.IError[] = [];
|
|
58
|
+
const parser: LenientJsonParser = new LenientJsonParser(jsonSource, errors);
|
|
59
|
+
const data: unknown = parser.parse();
|
|
60
|
+
if (errors.length > 0) {
|
|
61
|
+
return { success: false, data: data as DeepPartial<T>, input, errors };
|
|
62
|
+
}
|
|
63
|
+
return { success: true, data: data as T };
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Find JSON start position (skip junk prefix from LLM)
|
|
67
|
+
const jsonStart: number = findJsonStart(jsonSource);
|
|
68
|
+
if (jsonStart === -1) {
|
|
69
|
+
// No object/array found - check if there's a primitive after skipping comments
|
|
70
|
+
const skipped: string = skipCommentsAndWhitespace(jsonSource);
|
|
71
|
+
if (skipped.length > 0 && startsWithPrimitive(skipped)) {
|
|
72
|
+
const errors: IJsonParseResult.IError[] = [];
|
|
73
|
+
const parser: LenientJsonParser = new LenientJsonParser(
|
|
74
|
+
jsonSource,
|
|
75
|
+
errors,
|
|
76
|
+
);
|
|
77
|
+
const data: unknown = parser.parse();
|
|
78
|
+
if (errors.length > 0) {
|
|
79
|
+
return { success: false, data: data as DeepPartial<T>, input, errors };
|
|
80
|
+
}
|
|
81
|
+
return { success: true, data: data as T };
|
|
82
|
+
}
|
|
83
|
+
// No valid JSON found - return failure
|
|
84
|
+
return {
|
|
85
|
+
success: false,
|
|
86
|
+
data: undefined as DeepPartial<T>,
|
|
87
|
+
input,
|
|
88
|
+
errors: [
|
|
89
|
+
{
|
|
90
|
+
path: "$input",
|
|
91
|
+
expected: "JSON value",
|
|
92
|
+
value: jsonSource,
|
|
93
|
+
},
|
|
94
|
+
],
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// Extract JSON portion (skip junk prefix)
|
|
99
|
+
const jsonInput: string =
|
|
100
|
+
jsonStart > 0 ? jsonSource.slice(jsonStart) : jsonSource;
|
|
101
|
+
|
|
102
|
+
const errors: IJsonParseResult.IError[] = [];
|
|
103
|
+
const parser: LenientJsonParser = new LenientJsonParser(jsonInput, errors);
|
|
104
|
+
const data: unknown = parser.parse();
|
|
105
|
+
|
|
106
|
+
if (errors.length > 0) {
|
|
107
|
+
return {
|
|
108
|
+
success: false,
|
|
109
|
+
data: data as DeepPartial<T>,
|
|
110
|
+
input,
|
|
111
|
+
errors,
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
return {
|
|
115
|
+
success: true,
|
|
116
|
+
data: data as T,
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
|
|
3
120
|
/**
|
|
4
121
|
* Maximum nesting depth to prevent stack overflow attacks.
|
|
5
122
|
*
|
|
@@ -88,21 +205,126 @@ function extractMarkdownCodeBlock(input: string): string | null {
|
|
|
88
205
|
* - "Here is your JSON: {"name": "test"}"
|
|
89
206
|
* - "Sure! [1, 2, 3]"
|
|
90
207
|
*
|
|
91
|
-
* This function
|
|
92
|
-
* (strings, numbers, booleans) are handled directly by the
|
|
208
|
+
* This function skips over comments and strings to find the real JSON start.
|
|
209
|
+
* Primitive values (strings, numbers, booleans) are handled directly by the
|
|
210
|
+
* parser.
|
|
93
211
|
*
|
|
94
212
|
* @param input Text that may contain JSON with junk prefix
|
|
95
|
-
* @returns Index of first `{` or `[
|
|
213
|
+
* @returns Index of first `{` or `[` outside comments/strings, or -1 if not
|
|
214
|
+
* found
|
|
96
215
|
* @internal
|
|
97
216
|
*/
|
|
98
217
|
function findJsonStart(input: string): number {
|
|
99
|
-
|
|
100
|
-
const
|
|
218
|
+
let pos: number = 0;
|
|
219
|
+
const len: number = input.length;
|
|
220
|
+
|
|
221
|
+
while (pos < len) {
|
|
222
|
+
const ch: string = input[pos]!;
|
|
223
|
+
|
|
224
|
+
// Found JSON start
|
|
225
|
+
if (ch === "{" || ch === "[") {
|
|
226
|
+
return pos;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
// Skip single-line comment
|
|
230
|
+
if (ch === "/" && pos + 1 < len && input[pos + 1] === "/") {
|
|
231
|
+
pos += 2;
|
|
232
|
+
while (pos < len && input[pos] !== "\n" && input[pos] !== "\r") {
|
|
233
|
+
pos++;
|
|
234
|
+
}
|
|
235
|
+
continue;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
// Skip multi-line comment
|
|
239
|
+
if (ch === "/" && pos + 1 < len && input[pos + 1] === "*") {
|
|
240
|
+
pos += 2;
|
|
241
|
+
while (pos + 1 < len) {
|
|
242
|
+
if (input[pos] === "*" && input[pos + 1] === "/") {
|
|
243
|
+
pos += 2;
|
|
244
|
+
break;
|
|
245
|
+
}
|
|
246
|
+
pos++;
|
|
247
|
+
}
|
|
248
|
+
// If unclosed comment, move to end
|
|
249
|
+
if (pos + 1 >= len) {
|
|
250
|
+
pos = len;
|
|
251
|
+
}
|
|
252
|
+
continue;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
// Skip string literal (to avoid matching { or [ inside strings)
|
|
256
|
+
if (ch === '"') {
|
|
257
|
+
pos++;
|
|
258
|
+
while (pos < len) {
|
|
259
|
+
if (input[pos] === "\\") {
|
|
260
|
+
pos += 2; // skip escape sequence
|
|
261
|
+
continue;
|
|
262
|
+
}
|
|
263
|
+
if (input[pos] === '"') {
|
|
264
|
+
pos++;
|
|
265
|
+
break;
|
|
266
|
+
}
|
|
267
|
+
pos++;
|
|
268
|
+
}
|
|
269
|
+
continue;
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
pos++;
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
return -1;
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
/**
|
|
279
|
+
* Skip leading comments and whitespace from input.
|
|
280
|
+
*
|
|
281
|
+
* @param input Text that may start with comments or whitespace
|
|
282
|
+
* @returns Input with leading comments and whitespace removed
|
|
283
|
+
* @internal
|
|
284
|
+
*/
|
|
285
|
+
function skipCommentsAndWhitespace(input: string): string {
|
|
286
|
+
let pos: number = 0;
|
|
287
|
+
const len: number = input.length;
|
|
101
288
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
289
|
+
while (pos < len) {
|
|
290
|
+
const ch: string = input[pos]!;
|
|
291
|
+
|
|
292
|
+
// Skip whitespace
|
|
293
|
+
if (ch === " " || ch === "\t" || ch === "\n" || ch === "\r") {
|
|
294
|
+
pos++;
|
|
295
|
+
continue;
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
// Skip single-line comment
|
|
299
|
+
if (ch === "/" && pos + 1 < len && input[pos + 1] === "/") {
|
|
300
|
+
pos += 2;
|
|
301
|
+
while (pos < len && input[pos] !== "\n" && input[pos] !== "\r") {
|
|
302
|
+
pos++;
|
|
303
|
+
}
|
|
304
|
+
continue;
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
// Skip multi-line comment
|
|
308
|
+
if (ch === "/" && pos + 1 < len && input[pos + 1] === "*") {
|
|
309
|
+
pos += 2;
|
|
310
|
+
while (pos + 1 < len) {
|
|
311
|
+
if (input[pos] === "*" && input[pos + 1] === "/") {
|
|
312
|
+
pos += 2;
|
|
313
|
+
break;
|
|
314
|
+
}
|
|
315
|
+
pos++;
|
|
316
|
+
}
|
|
317
|
+
if (pos + 1 >= len) {
|
|
318
|
+
pos = len;
|
|
319
|
+
}
|
|
320
|
+
continue;
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
// Not whitespace or comment
|
|
324
|
+
break;
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
return input.slice(pos);
|
|
106
328
|
}
|
|
107
329
|
|
|
108
330
|
/**
|
|
@@ -126,106 +348,26 @@ function startsWithPrimitive(input: string): boolean {
|
|
|
126
348
|
input.startsWith("null")
|
|
127
349
|
)
|
|
128
350
|
return true;
|
|
129
|
-
// Partial keywords
|
|
351
|
+
// Partial keywords (note: "null" requires at least 2 chars to match parseKeywordOrIdentifier logic)
|
|
130
352
|
if (
|
|
131
353
|
"true".startsWith(input) ||
|
|
132
354
|
"false".startsWith(input) ||
|
|
133
|
-
"null".startsWith(input)
|
|
355
|
+
("null".startsWith(input) && input.length >= 2)
|
|
356
|
+
)
|
|
357
|
+
return true;
|
|
358
|
+
// Boolean string variants (note: "n" is intentionally excluded)
|
|
359
|
+
const lower: string = input.toLowerCase();
|
|
360
|
+
if (
|
|
361
|
+
lower === "yes" ||
|
|
362
|
+
lower === "y" ||
|
|
363
|
+
lower === "on" ||
|
|
364
|
+
lower === "no" ||
|
|
365
|
+
lower === "off"
|
|
134
366
|
)
|
|
135
367
|
return true;
|
|
136
368
|
return false;
|
|
137
369
|
}
|
|
138
370
|
|
|
139
|
-
/**
|
|
140
|
-
* Parse lenient JSON that may be incomplete or malformed.
|
|
141
|
-
*
|
|
142
|
-
* Handles:
|
|
143
|
-
*
|
|
144
|
-
* - Unclosed brackets `{`, `[` - parses as much as possible
|
|
145
|
-
* - Trailing commas `[1, 2, ]` - ignores them
|
|
146
|
-
* - Unclosed strings `"hello` - returns partial string
|
|
147
|
-
* - Junk text before JSON (LLM often adds explanatory text)
|
|
148
|
-
* - Markdown code blocks (extracts content from `json ... `)
|
|
149
|
-
* - Incomplete keywords like `tru`, `fal`, `nul`
|
|
150
|
-
* - Unicode escape sequences including surrogate pairs (emoji)
|
|
151
|
-
* - JavaScript-style comments (single-line and multi-line)
|
|
152
|
-
* - Unquoted object keys (JavaScript identifier style)
|
|
153
|
-
*
|
|
154
|
-
* @param input Raw JSON string (potentially incomplete)
|
|
155
|
-
* @returns Parse result with data, original input, and any errors
|
|
156
|
-
* @internal
|
|
157
|
-
*/
|
|
158
|
-
export function parseLenientJson<T>(input: string): IJsonParseResult<T> {
|
|
159
|
-
// Try native JSON.parse first (faster for valid JSON)
|
|
160
|
-
try {
|
|
161
|
-
return {
|
|
162
|
-
success: true,
|
|
163
|
-
data: JSON.parse(input) as T,
|
|
164
|
-
};
|
|
165
|
-
} catch {
|
|
166
|
-
// Fall back to lenient parser
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
// Extract markdown code block if present
|
|
170
|
-
const codeBlockContent: string | null = extractMarkdownCodeBlock(input);
|
|
171
|
-
const jsonSource: string =
|
|
172
|
-
codeBlockContent !== null ? codeBlockContent : input;
|
|
173
|
-
|
|
174
|
-
// Check if input is empty or whitespace-only
|
|
175
|
-
const trimmed: string = jsonSource.trim();
|
|
176
|
-
if (trimmed.length === 0) {
|
|
177
|
-
const errors: IJsonParseResult.IError[] = [];
|
|
178
|
-
const parser: LenientJsonParser = new LenientJsonParser(jsonSource, errors);
|
|
179
|
-
const data: unknown = parser.parse();
|
|
180
|
-
if (errors.length > 0) {
|
|
181
|
-
return { success: false, data: data as DeepPartial<T>, input, errors };
|
|
182
|
-
}
|
|
183
|
-
return { success: true, data: data as T };
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
// Check if input starts with a primitive value (no junk prefix skipping needed)
|
|
187
|
-
if (startsWithPrimitive(trimmed)) {
|
|
188
|
-
const errors: IJsonParseResult.IError[] = [];
|
|
189
|
-
const parser: LenientJsonParser = new LenientJsonParser(jsonSource, errors);
|
|
190
|
-
const data: unknown = parser.parse();
|
|
191
|
-
if (errors.length > 0) {
|
|
192
|
-
return { success: false, data: data as DeepPartial<T>, input, errors };
|
|
193
|
-
}
|
|
194
|
-
return { success: true, data: data as T };
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
// Find JSON start position (skip junk prefix from LLM)
|
|
198
|
-
const jsonStart: number = findJsonStart(jsonSource);
|
|
199
|
-
if (jsonStart === -1) {
|
|
200
|
-
// No JSON found - return empty object for lenient behavior
|
|
201
|
-
return {
|
|
202
|
-
success: true,
|
|
203
|
-
data: {} as T,
|
|
204
|
-
};
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
// Extract JSON portion (skip junk prefix)
|
|
208
|
-
const jsonInput: string =
|
|
209
|
-
jsonStart > 0 ? jsonSource.slice(jsonStart) : jsonSource;
|
|
210
|
-
|
|
211
|
-
const errors: IJsonParseResult.IError[] = [];
|
|
212
|
-
const parser: LenientJsonParser = new LenientJsonParser(jsonInput, errors);
|
|
213
|
-
const data: unknown = parser.parse();
|
|
214
|
-
|
|
215
|
-
if (errors.length > 0) {
|
|
216
|
-
return {
|
|
217
|
-
success: false,
|
|
218
|
-
data: data as DeepPartial<T>,
|
|
219
|
-
input,
|
|
220
|
-
errors,
|
|
221
|
-
};
|
|
222
|
-
}
|
|
223
|
-
return {
|
|
224
|
-
success: true,
|
|
225
|
-
data: data as T,
|
|
226
|
-
};
|
|
227
|
-
}
|
|
228
|
-
|
|
229
371
|
/**
|
|
230
372
|
* Lenient JSON parser that handles incomplete JSON.
|
|
231
373
|
*
|
|
@@ -279,6 +421,13 @@ class LenientJsonParser {
|
|
|
279
421
|
return this.parseKeywordOrIdentifier(path);
|
|
280
422
|
}
|
|
281
423
|
|
|
424
|
+
// Don't skip structural characters - let the caller handle them
|
|
425
|
+
const ch: string = this.input[this.pos]!;
|
|
426
|
+
if (ch === "}" || ch === "]" || ch === ",") {
|
|
427
|
+
// Not an error - just no value here (e.g., {"a":} or [,])
|
|
428
|
+
return undefined;
|
|
429
|
+
}
|
|
430
|
+
|
|
282
431
|
this.errors.push({
|
|
283
432
|
path,
|
|
284
433
|
expected: "JSON value",
|
|
@@ -320,10 +469,16 @@ class LenientJsonParser {
|
|
|
320
469
|
if (token === "false") return false;
|
|
321
470
|
if (token === "null") return null;
|
|
322
471
|
|
|
472
|
+
// Boolean string coercion: "yes", "y", "on" -> true, "no", "off" -> false
|
|
473
|
+
// Note: "n" is intentionally NOT handled (neither null nor false)
|
|
474
|
+
const lower: string = token.toLowerCase();
|
|
475
|
+
if (lower === "yes" || lower === "y" || lower === "on") return true;
|
|
476
|
+
if (lower === "no" || lower === "off") return false;
|
|
477
|
+
|
|
323
478
|
// Partial match for lenient parsing (e.g., "tru" -> true, "fal" -> false)
|
|
324
479
|
if ("true".startsWith(token) && token.length > 0) return true;
|
|
325
480
|
if ("false".startsWith(token) && token.length > 0) return false;
|
|
326
|
-
if ("null".startsWith(token) && token.length
|
|
481
|
+
if ("null".startsWith(token) && token.length >= 2) return null;
|
|
327
482
|
|
|
328
483
|
// Check if this looks like a string with missing opening quote (e.g., abcdefg")
|
|
329
484
|
if (this.pos < this.input.length && this.input[this.pos] === '"') {
|
|
@@ -469,7 +624,15 @@ class LenientJsonParser {
|
|
|
469
624
|
}
|
|
470
625
|
|
|
471
626
|
// Parse value
|
|
627
|
+
const prevPos: number = this.pos;
|
|
472
628
|
const value: unknown = this.parseValue(path + "[" + index + "]");
|
|
629
|
+
|
|
630
|
+
// Guard: if parseValue didn't advance, skip unexpected char to prevent infinite loop
|
|
631
|
+
if (this.pos === prevPos && this.pos < this.input.length) {
|
|
632
|
+
this.pos++;
|
|
633
|
+
continue;
|
|
634
|
+
}
|
|
635
|
+
|
|
473
636
|
result.push(value);
|
|
474
637
|
index++;
|
|
475
638
|
|