stringent 0.0.2 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +61 -73
- package/dist/context.d.ts +20 -2
- package/dist/context.d.ts.map +1 -0
- package/dist/context.js +1 -0
- package/dist/context.js.map +1 -0
- package/dist/createParser.d.ts +109 -26
- package/dist/createParser.d.ts.map +1 -0
- package/dist/createParser.js +80 -19
- package/dist/createParser.js.map +1 -0
- package/dist/errors.d.ts +121 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +186 -0
- package/dist/errors.js.map +1 -0
- package/dist/grammar/index.d.ts +19 -14
- package/dist/grammar/index.d.ts.map +1 -0
- package/dist/grammar/index.js +4 -3
- package/dist/grammar/index.js.map +1 -0
- package/dist/index.d.ts +19 -11
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +16 -7
- package/dist/index.js.map +1 -0
- package/dist/parse/index.d.ts +101 -27
- package/dist/parse/index.d.ts.map +1 -0
- package/dist/parse/index.js +1 -0
- package/dist/parse/index.js.map +1 -0
- package/dist/performance.bench.d.ts +10 -0
- package/dist/performance.bench.d.ts.map +1 -0
- package/dist/performance.bench.js +379 -0
- package/dist/performance.bench.js.map +1 -0
- package/dist/primitive/index.d.ts +27 -35
- package/dist/primitive/index.d.ts.map +1 -0
- package/dist/primitive/index.js +22 -17
- package/dist/primitive/index.js.map +1 -0
- package/dist/runtime/eval.d.ts +157 -0
- package/dist/runtime/eval.d.ts.map +1 -0
- package/dist/runtime/eval.js +206 -0
- package/dist/runtime/eval.js.map +1 -0
- package/dist/runtime/infer.d.ts +2 -1
- package/dist/runtime/infer.d.ts.map +1 -0
- package/dist/runtime/infer.js +3 -2
- package/dist/runtime/infer.js.map +1 -0
- package/dist/runtime/parser.d.ts +92 -11
- package/dist/runtime/parser.d.ts.map +1 -0
- package/dist/runtime/parser.js +522 -47
- package/dist/runtime/parser.js.map +1 -0
- package/dist/schema/index.d.ts +230 -27
- package/dist/schema/index.d.ts.map +1 -0
- package/dist/schema/index.js +54 -28
- package/dist/schema/index.js.map +1 -0
- package/dist/static/infer.d.ts +4 -3
- package/dist/static/infer.d.ts.map +1 -0
- package/dist/static/infer.js +1 -0
- package/dist/static/infer.js.map +1 -0
- package/package.json +35 -4
- package/dist/combinators/index.d.ts +0 -57
- package/dist/combinators/index.js +0 -104
- package/dist/static/parser.d.ts +0 -7
- package/dist/static/parser.js +0 -6
package/dist/runtime/parser.js
CHANGED
|
@@ -7,7 +7,83 @@
|
|
|
7
7
|
* 2. Fall back to next level (higher precedence)
|
|
8
8
|
* 3. Base case: try atoms (last level)
|
|
9
9
|
*/
|
|
10
|
-
import { Token } from
|
|
10
|
+
import { Token } from '@sinclair/parsebox';
|
|
11
|
+
import { defineNode, number, string, ident, constVal, expr, nullLiteral, booleanLiteral, undefinedLiteral, } from '../schema/index.js';
|
|
12
|
+
// =============================================================================
|
|
13
|
+
// Built-in Atoms
|
|
14
|
+
// =============================================================================
|
|
15
|
+
/**
|
|
16
|
+
* Built-in atom schemas.
|
|
17
|
+
* These are always appended as the last level of the grammar.
|
|
18
|
+
* Users don't need to define these - they're provided automatically.
|
|
19
|
+
*/
|
|
20
|
+
/**
|
|
21
|
+
* Precedence for built-in atoms.
|
|
22
|
+
* Atoms are precedence 0 (base case), operators have precedence 1, 2, 3, etc.
|
|
23
|
+
* Note: Atoms are appended separately, so this value isn't used in sorting.
|
|
24
|
+
*/
|
|
25
|
+
const ATOM_PRECEDENCE = 0;
|
|
26
|
+
/** Number literal atom - matches numeric literals */
|
|
27
|
+
const numberLiteral = defineNode({
|
|
28
|
+
name: 'numberLiteral',
|
|
29
|
+
pattern: [number()],
|
|
30
|
+
precedence: ATOM_PRECEDENCE,
|
|
31
|
+
resultType: 'number',
|
|
32
|
+
});
|
|
33
|
+
/** String literal atom - matches strings with " or ' quotes */
|
|
34
|
+
const stringLiteral = defineNode({
|
|
35
|
+
name: 'stringLiteral',
|
|
36
|
+
pattern: [string(['"', "'"])],
|
|
37
|
+
precedence: ATOM_PRECEDENCE,
|
|
38
|
+
resultType: 'string',
|
|
39
|
+
});
|
|
40
|
+
/** Identifier atom - matches identifiers */
|
|
41
|
+
const identifierAtom = defineNode({
|
|
42
|
+
name: 'identifier',
|
|
43
|
+
pattern: [ident()],
|
|
44
|
+
precedence: ATOM_PRECEDENCE,
|
|
45
|
+
resultType: 'unknown',
|
|
46
|
+
});
|
|
47
|
+
/** Parentheses atom - matches ( expr ) for grouping */
|
|
48
|
+
const parentheses = defineNode({
|
|
49
|
+
name: 'parentheses',
|
|
50
|
+
pattern: [constVal('('), expr().as('inner'), constVal(')')],
|
|
51
|
+
precedence: ATOM_PRECEDENCE,
|
|
52
|
+
resultType: 'unknown',
|
|
53
|
+
});
|
|
54
|
+
/** Null literal atom - matches the keyword null */
|
|
55
|
+
const nullAtom = defineNode({
|
|
56
|
+
name: 'nullLiteral',
|
|
57
|
+
pattern: [nullLiteral()],
|
|
58
|
+
precedence: ATOM_PRECEDENCE,
|
|
59
|
+
resultType: 'null',
|
|
60
|
+
});
|
|
61
|
+
/** Boolean literal atom - matches true or false */
|
|
62
|
+
const booleanAtom = defineNode({
|
|
63
|
+
name: 'booleanLiteral',
|
|
64
|
+
pattern: [booleanLiteral()],
|
|
65
|
+
precedence: ATOM_PRECEDENCE,
|
|
66
|
+
resultType: 'boolean',
|
|
67
|
+
});
|
|
68
|
+
/** Undefined literal atom - matches the keyword undefined */
|
|
69
|
+
const undefinedAtom = defineNode({
|
|
70
|
+
name: 'undefinedLiteral',
|
|
71
|
+
pattern: [undefinedLiteral()],
|
|
72
|
+
precedence: ATOM_PRECEDENCE,
|
|
73
|
+
resultType: 'undefined',
|
|
74
|
+
});
|
|
75
|
+
/** All built-in atoms, used as the last level of the grammar */
|
|
76
|
+
// Note: Keyword literals (null, true, false, undefined) must come BEFORE
|
|
77
|
+
// identifierAtom to ensure they're matched correctly rather than as identifiers
|
|
78
|
+
export const BUILT_IN_ATOMS = [
|
|
79
|
+
numberLiteral,
|
|
80
|
+
stringLiteral,
|
|
81
|
+
nullAtom,
|
|
82
|
+
booleanAtom,
|
|
83
|
+
undefinedAtom,
|
|
84
|
+
identifierAtom,
|
|
85
|
+
parentheses,
|
|
86
|
+
];
|
|
11
87
|
// =============================================================================
|
|
12
88
|
// Primitive Parsers
|
|
13
89
|
// =============================================================================
|
|
@@ -17,24 +93,174 @@ function parseNumber(input) {
|
|
|
17
93
|
return [];
|
|
18
94
|
return [
|
|
19
95
|
{
|
|
20
|
-
node:
|
|
96
|
+
node: 'literal',
|
|
21
97
|
raw: result[0],
|
|
22
98
|
value: +result[0],
|
|
23
|
-
outputSchema:
|
|
99
|
+
outputSchema: 'number',
|
|
24
100
|
},
|
|
25
101
|
result[1],
|
|
26
102
|
];
|
|
27
103
|
}
|
|
104
|
+
/**
|
|
105
|
+
* Process escape sequences in a string.
|
|
106
|
+
* Supports: \n, \t, \r, \\, \", \', \0, \b, \f, \v, \xHH, \uHHHH
|
|
107
|
+
*
|
|
108
|
+
* @param str - The raw string with escape sequences
|
|
109
|
+
* @returns The processed string with escape sequences converted
|
|
110
|
+
*/
|
|
111
|
+
export function processEscapeSequences(str) {
|
|
112
|
+
let result = '';
|
|
113
|
+
let i = 0;
|
|
114
|
+
while (i < str.length) {
|
|
115
|
+
if (str[i] === '\\') {
|
|
116
|
+
if (i + 1 >= str.length) {
|
|
117
|
+
// Trailing backslash - keep as-is
|
|
118
|
+
result += '\\';
|
|
119
|
+
i++;
|
|
120
|
+
continue;
|
|
121
|
+
}
|
|
122
|
+
const next = str[i + 1];
|
|
123
|
+
switch (next) {
|
|
124
|
+
case 'n':
|
|
125
|
+
result += '\n';
|
|
126
|
+
i += 2;
|
|
127
|
+
break;
|
|
128
|
+
case 't':
|
|
129
|
+
result += '\t';
|
|
130
|
+
i += 2;
|
|
131
|
+
break;
|
|
132
|
+
case 'r':
|
|
133
|
+
result += '\r';
|
|
134
|
+
i += 2;
|
|
135
|
+
break;
|
|
136
|
+
case '\\':
|
|
137
|
+
result += '\\';
|
|
138
|
+
i += 2;
|
|
139
|
+
break;
|
|
140
|
+
case '"':
|
|
141
|
+
result += '"';
|
|
142
|
+
i += 2;
|
|
143
|
+
break;
|
|
144
|
+
case "'":
|
|
145
|
+
result += "'";
|
|
146
|
+
i += 2;
|
|
147
|
+
break;
|
|
148
|
+
case '0':
|
|
149
|
+
result += '\0';
|
|
150
|
+
i += 2;
|
|
151
|
+
break;
|
|
152
|
+
case 'b':
|
|
153
|
+
result += '\b';
|
|
154
|
+
i += 2;
|
|
155
|
+
break;
|
|
156
|
+
case 'f':
|
|
157
|
+
result += '\f';
|
|
158
|
+
i += 2;
|
|
159
|
+
break;
|
|
160
|
+
case 'v':
|
|
161
|
+
result += '\v';
|
|
162
|
+
i += 2;
|
|
163
|
+
break;
|
|
164
|
+
case 'x': {
|
|
165
|
+
// \xHH - two hex digits
|
|
166
|
+
if (i + 3 < str.length) {
|
|
167
|
+
const hex = str.slice(i + 2, i + 4);
|
|
168
|
+
if (/^[0-9a-fA-F]{2}$/.test(hex)) {
|
|
169
|
+
result += String.fromCharCode(parseInt(hex, 16));
|
|
170
|
+
i += 4;
|
|
171
|
+
break;
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
// Invalid \x escape - keep as-is
|
|
175
|
+
result += '\\x';
|
|
176
|
+
i += 2;
|
|
177
|
+
break;
|
|
178
|
+
}
|
|
179
|
+
case 'u': {
|
|
180
|
+
// \uHHHH - four hex digits
|
|
181
|
+
if (i + 5 < str.length) {
|
|
182
|
+
const hex = str.slice(i + 2, i + 6);
|
|
183
|
+
if (/^[0-9a-fA-F]{4}$/.test(hex)) {
|
|
184
|
+
result += String.fromCharCode(parseInt(hex, 16));
|
|
185
|
+
i += 6;
|
|
186
|
+
break;
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
// Invalid \u escape - keep as-is
|
|
190
|
+
result += '\\u';
|
|
191
|
+
i += 2;
|
|
192
|
+
break;
|
|
193
|
+
}
|
|
194
|
+
default:
|
|
195
|
+
// Unknown escape - keep backslash and character
|
|
196
|
+
result += '\\' + next;
|
|
197
|
+
i += 2;
|
|
198
|
+
break;
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
else {
|
|
202
|
+
result += str[i];
|
|
203
|
+
i++;
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
return result;
|
|
207
|
+
}
|
|
208
|
+
/**
|
|
209
|
+
* Parse a string literal with proper escape sequence handling.
|
|
210
|
+
* Unlike Token.String, this parser correctly handles escaped quotes within strings.
|
|
211
|
+
*/
|
|
212
|
+
function parseStringLiteral(quotes, input) {
|
|
213
|
+
// Trim leading whitespace
|
|
214
|
+
const trimmed = input.replace(/^[\s]*/, '');
|
|
215
|
+
if (trimmed.length === 0)
|
|
216
|
+
return [];
|
|
217
|
+
// Check for opening quote
|
|
218
|
+
const openQuote = quotes.find((q) => trimmed.startsWith(q));
|
|
219
|
+
if (!openQuote)
|
|
220
|
+
return [];
|
|
221
|
+
// Find closing quote, respecting escape sequences
|
|
222
|
+
let i = openQuote.length;
|
|
223
|
+
let rawContent = '';
|
|
224
|
+
while (i < trimmed.length) {
|
|
225
|
+
const char = trimmed[i];
|
|
226
|
+
// Check for escape sequence
|
|
227
|
+
if (char === '\\') {
|
|
228
|
+
if (i + 1 < trimmed.length) {
|
|
229
|
+
// Include both the backslash and the escaped character in raw content
|
|
230
|
+
rawContent += char + trimmed[i + 1];
|
|
231
|
+
i += 2;
|
|
232
|
+
continue;
|
|
233
|
+
}
|
|
234
|
+
else {
|
|
235
|
+
// Trailing backslash - include it
|
|
236
|
+
rawContent += char;
|
|
237
|
+
i++;
|
|
238
|
+
continue;
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
// Check for closing quote
|
|
242
|
+
if (char === openQuote) {
|
|
243
|
+
return [rawContent, trimmed.slice(i + openQuote.length)];
|
|
244
|
+
}
|
|
245
|
+
// Regular character
|
|
246
|
+
rawContent += char;
|
|
247
|
+
i++;
|
|
248
|
+
}
|
|
249
|
+
// Unterminated string
|
|
250
|
+
return [];
|
|
251
|
+
}
|
|
28
252
|
function parseString(quotes, input) {
|
|
29
|
-
const result =
|
|
253
|
+
const result = parseStringLiteral(quotes, input);
|
|
30
254
|
if (result.length === 0)
|
|
31
255
|
return [];
|
|
256
|
+
const rawValue = result[0];
|
|
257
|
+
const processedValue = processEscapeSequences(rawValue);
|
|
32
258
|
return [
|
|
33
259
|
{
|
|
34
|
-
node:
|
|
35
|
-
raw:
|
|
36
|
-
value:
|
|
37
|
-
outputSchema:
|
|
260
|
+
node: 'literal',
|
|
261
|
+
raw: rawValue,
|
|
262
|
+
value: processedValue,
|
|
263
|
+
outputSchema: 'string',
|
|
38
264
|
},
|
|
39
265
|
result[1],
|
|
40
266
|
];
|
|
@@ -44,11 +270,9 @@ function parseIdent(input, context) {
|
|
|
44
270
|
if (result.length === 0)
|
|
45
271
|
return [];
|
|
46
272
|
const name = result[0];
|
|
47
|
-
const valueType = name in context.data
|
|
48
|
-
? context.data[name]
|
|
49
|
-
: "unknown";
|
|
273
|
+
const valueType = name in context.data ? context.data[name] : 'unknown';
|
|
50
274
|
return [
|
|
51
|
-
{ node:
|
|
275
|
+
{ node: 'identifier', name, outputSchema: valueType },
|
|
52
276
|
result[1],
|
|
53
277
|
];
|
|
54
278
|
}
|
|
@@ -56,43 +280,113 @@ function parseConst(value, input) {
|
|
|
56
280
|
const result = Token.Const(value, input);
|
|
57
281
|
if (result.length === 0)
|
|
58
282
|
return [];
|
|
59
|
-
return [{ node:
|
|
283
|
+
return [{ node: 'const', outputSchema: JSON.stringify(value) }, result[1]];
|
|
284
|
+
}
|
|
285
|
+
function parseNull(input) {
|
|
286
|
+
const result = Token.Const('null', input);
|
|
287
|
+
if (result.length === 0)
|
|
288
|
+
return [];
|
|
289
|
+
// Ensure it's not part of a longer identifier (e.g., "nullable")
|
|
290
|
+
const remaining = result[1];
|
|
291
|
+
if (remaining.length > 0 && /^[a-zA-Z0-9_$]/.test(remaining)) {
|
|
292
|
+
return [];
|
|
293
|
+
}
|
|
294
|
+
return [
|
|
295
|
+
{
|
|
296
|
+
node: 'literal',
|
|
297
|
+
raw: 'null',
|
|
298
|
+
value: null,
|
|
299
|
+
outputSchema: 'null',
|
|
300
|
+
},
|
|
301
|
+
remaining,
|
|
302
|
+
];
|
|
303
|
+
}
|
|
304
|
+
function parseBoolean(input) {
|
|
305
|
+
// Try "true" first
|
|
306
|
+
let result = Token.Const('true', input);
|
|
307
|
+
if (result.length === 2) {
|
|
308
|
+
const remaining = result[1];
|
|
309
|
+
// Ensure it's not part of a longer identifier (e.g., "trueName")
|
|
310
|
+
if (remaining.length === 0 || !/^[a-zA-Z0-9_$]/.test(remaining)) {
|
|
311
|
+
return [
|
|
312
|
+
{
|
|
313
|
+
node: 'literal',
|
|
314
|
+
raw: 'true',
|
|
315
|
+
value: true,
|
|
316
|
+
outputSchema: 'boolean',
|
|
317
|
+
},
|
|
318
|
+
remaining,
|
|
319
|
+
];
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
// Try "false"
|
|
323
|
+
result = Token.Const('false', input);
|
|
324
|
+
if (result.length === 2) {
|
|
325
|
+
const remaining = result[1];
|
|
326
|
+
// Ensure it's not part of a longer identifier (e.g., "falsePositive")
|
|
327
|
+
if (remaining.length === 0 || !/^[a-zA-Z0-9_$]/.test(remaining)) {
|
|
328
|
+
return [
|
|
329
|
+
{
|
|
330
|
+
node: 'literal',
|
|
331
|
+
raw: 'false',
|
|
332
|
+
value: false,
|
|
333
|
+
outputSchema: 'boolean',
|
|
334
|
+
},
|
|
335
|
+
remaining,
|
|
336
|
+
];
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
return [];
|
|
340
|
+
}
|
|
341
|
+
function parseUndefined(input) {
|
|
342
|
+
const result = Token.Const('undefined', input);
|
|
343
|
+
if (result.length === 0)
|
|
344
|
+
return [];
|
|
345
|
+
// Ensure it's not part of a longer identifier (e.g., "undefinedVar")
|
|
346
|
+
const remaining = result[1];
|
|
347
|
+
if (remaining.length > 0 && /^[a-zA-Z0-9_$]/.test(remaining)) {
|
|
348
|
+
return [];
|
|
349
|
+
}
|
|
350
|
+
return [
|
|
351
|
+
{
|
|
352
|
+
node: 'literal',
|
|
353
|
+
raw: 'undefined',
|
|
354
|
+
value: undefined,
|
|
355
|
+
outputSchema: 'undefined',
|
|
356
|
+
},
|
|
357
|
+
remaining,
|
|
358
|
+
];
|
|
60
359
|
}
|
|
61
360
|
// =============================================================================
|
|
62
361
|
// Build Runtime Grammar from Node Schemas
|
|
63
362
|
// =============================================================================
|
|
64
363
|
/**
|
|
65
|
-
* Build runtime grammar from
|
|
364
|
+
* Build runtime grammar from operator schemas.
|
|
66
365
|
*
|
|
67
366
|
* Returns a flat tuple of levels:
|
|
68
|
-
* [[ops@prec1], [ops@prec2], ..., [
|
|
367
|
+
* [[ops@prec1], [ops@prec2], ..., [builtInAtoms]]
|
|
69
368
|
*
|
|
70
|
-
*
|
|
71
|
-
*
|
|
369
|
+
* Operators are sorted by precedence ascending (lowest first).
|
|
370
|
+
* Built-in atoms are always appended as the last level.
|
|
72
371
|
*/
|
|
73
|
-
export function buildGrammar(
|
|
74
|
-
const
|
|
75
|
-
const
|
|
76
|
-
for (const
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
const prec = node.precedence;
|
|
82
|
-
if (!operators.has(prec)) {
|
|
83
|
-
operators.set(prec, []);
|
|
84
|
-
}
|
|
85
|
-
operators.get(prec).push(node);
|
|
86
|
-
}
|
|
372
|
+
export function buildGrammar(operators) {
|
|
373
|
+
const operatorsByPrec = new Map();
|
|
374
|
+
const operatorsAndPrimitives = [...operators];
|
|
375
|
+
for (const op of operatorsAndPrimitives) {
|
|
376
|
+
const prec = op.precedence;
|
|
377
|
+
const ops = operatorsByPrec.get(prec) ?? [];
|
|
378
|
+
operatorsByPrec.set(prec, ops);
|
|
379
|
+
ops.push(op);
|
|
87
380
|
}
|
|
88
381
|
// Sort precedences ascending
|
|
89
|
-
const precedences = [...
|
|
90
|
-
// Build flat grammar: [[ops@prec1], [ops@prec2], ..., [
|
|
382
|
+
const precedences = [...operatorsByPrec.keys()].sort((a, b) => a - b);
|
|
383
|
+
// Build flat grammar: [[ops@prec1], [ops@prec2], ..., [builtInAtoms]]
|
|
91
384
|
const grammar = [];
|
|
92
385
|
for (const prec of precedences) {
|
|
93
|
-
grammar.push(
|
|
386
|
+
grammar.push(operatorsByPrec.get(prec) ?? []);
|
|
94
387
|
}
|
|
95
|
-
|
|
388
|
+
// Append built-in atoms as the last level
|
|
389
|
+
grammar.push(BUILT_IN_ATOMS);
|
|
96
390
|
return grammar;
|
|
97
391
|
}
|
|
98
392
|
// =============================================================================
|
|
@@ -103,14 +397,20 @@ export function buildGrammar(nodes) {
|
|
|
103
397
|
*/
|
|
104
398
|
function parseElement(element, input, context) {
|
|
105
399
|
switch (element.kind) {
|
|
106
|
-
case
|
|
400
|
+
case 'number':
|
|
107
401
|
return parseNumber(input);
|
|
108
|
-
case
|
|
402
|
+
case 'string':
|
|
109
403
|
return parseString(element.quotes, input);
|
|
110
|
-
case
|
|
404
|
+
case 'ident':
|
|
111
405
|
return parseIdent(input, context);
|
|
112
|
-
case
|
|
406
|
+
case 'const':
|
|
113
407
|
return parseConst(element.value, input);
|
|
408
|
+
case 'null':
|
|
409
|
+
return parseNull(input);
|
|
410
|
+
case 'boolean':
|
|
411
|
+
return parseBoolean(input);
|
|
412
|
+
case 'undefined':
|
|
413
|
+
return parseUndefined(input);
|
|
114
414
|
default:
|
|
115
415
|
return [];
|
|
116
416
|
}
|
|
@@ -124,14 +424,14 @@ function parseElement(element, input, context) {
|
|
|
124
424
|
* - "expr": fullGrammar (full reset for delimited contexts)
|
|
125
425
|
*/
|
|
126
426
|
function parseElementWithLevel(element, input, context, currentLevels, nextLevels, fullGrammar) {
|
|
127
|
-
if (element.kind ===
|
|
427
|
+
if (element.kind === 'expr') {
|
|
128
428
|
const exprElement = element;
|
|
129
429
|
const constraint = exprElement.constraint;
|
|
130
430
|
const role = exprElement.role;
|
|
131
|
-
if (role ===
|
|
431
|
+
if (role === 'lhs') {
|
|
132
432
|
return parseExprWithConstraint(nextLevels, input, context, constraint, fullGrammar);
|
|
133
433
|
}
|
|
134
|
-
else if (role ===
|
|
434
|
+
else if (role === 'rhs') {
|
|
135
435
|
return parseExprWithConstraint(currentLevels, input, context, constraint, fullGrammar);
|
|
136
436
|
}
|
|
137
437
|
else {
|
|
@@ -165,7 +465,7 @@ function extractBindings(pattern, children) {
|
|
|
165
465
|
const element = pattern[i];
|
|
166
466
|
const child = children[i];
|
|
167
467
|
// Check if element is a NamedSchema (has __named and name properties)
|
|
168
|
-
if (
|
|
468
|
+
if ('__named' in element && element.__named === true) {
|
|
169
469
|
bindings[element.name] = child;
|
|
170
470
|
}
|
|
171
471
|
}
|
|
@@ -178,7 +478,46 @@ function extractBindings(pattern, children) {
|
|
|
178
478
|
* - Single child without names: passthrough (atom behavior)
|
|
179
479
|
* - If configure() provided: transform bindings to fields
|
|
180
480
|
* - Otherwise: bindings become node fields directly
|
|
481
|
+
*
|
|
482
|
+
* Special case: If resultType is "unknown" and there's a single expr binding,
|
|
483
|
+
* we propagate that binding's outputSchema (for generic parentheses, etc.).
|
|
484
|
+
*/
|
|
485
|
+
/**
|
|
486
|
+
* Helper: Check if resultType is a UnionResultType (computed union).
|
|
181
487
|
*/
|
|
488
|
+
function isUnionResultType(resultType) {
|
|
489
|
+
return typeof resultType === 'object' && resultType !== null && 'union' in resultType;
|
|
490
|
+
}
|
|
491
|
+
/**
|
|
492
|
+
* Helper: Compute the union outputSchema string from multiple bindings.
|
|
493
|
+
* Given a list of binding names, extracts each binding's outputSchema and
|
|
494
|
+
* constructs a union string like "boolean | number".
|
|
495
|
+
*
|
|
496
|
+
* @example
|
|
497
|
+
* // bindings = { then: { outputSchema: 'boolean' }, else: { outputSchema: 'number' } }
|
|
498
|
+
* // names = ['then', 'else']
|
|
499
|
+
* // result = 'boolean | number'
|
|
500
|
+
*/
|
|
501
|
+
function computeUnionOutputSchema(bindings, names) {
|
|
502
|
+
const schemas = [];
|
|
503
|
+
for (const name of names) {
|
|
504
|
+
const binding = bindings[name];
|
|
505
|
+
if (binding?.outputSchema && binding.outputSchema !== 'unknown') {
|
|
506
|
+
// Only add unique schemas
|
|
507
|
+
if (!schemas.includes(binding.outputSchema)) {
|
|
508
|
+
schemas.push(binding.outputSchema);
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
if (schemas.length === 0) {
|
|
513
|
+
return 'unknown';
|
|
514
|
+
}
|
|
515
|
+
if (schemas.length === 1) {
|
|
516
|
+
return schemas[0];
|
|
517
|
+
}
|
|
518
|
+
// Sort for consistency and join with ' | '
|
|
519
|
+
return schemas.sort().join(' | ');
|
|
520
|
+
}
|
|
182
521
|
function buildNodeResult(nodeSchema, children, context) {
|
|
183
522
|
const bindings = extractBindings(nodeSchema.pattern, children);
|
|
184
523
|
// Single unnamed child → passthrough (atom behavior)
|
|
@@ -186,13 +525,33 @@ function buildNodeResult(nodeSchema, children, context) {
|
|
|
186
525
|
return children[0];
|
|
187
526
|
}
|
|
188
527
|
// Apply configure() if provided, otherwise use bindings directly
|
|
189
|
-
const fields = nodeSchema.configure
|
|
190
|
-
|
|
191
|
-
|
|
528
|
+
const fields = nodeSchema.configure ? nodeSchema.configure(bindings, context) : bindings;
|
|
529
|
+
// Determine output schema:
|
|
530
|
+
// - If resultType is a UnionResultType, compute the union from the specified bindings
|
|
531
|
+
// - If resultType is "unknown" and there's a single expr binding, use its outputSchema
|
|
532
|
+
// - Otherwise use the node's static resultType
|
|
533
|
+
let outputSchema;
|
|
534
|
+
if (isUnionResultType(nodeSchema.resultType)) {
|
|
535
|
+
// Computed union: extract schemas from named bindings and join with ' | '
|
|
536
|
+
outputSchema = computeUnionOutputSchema(bindings, nodeSchema.resultType.union);
|
|
537
|
+
}
|
|
538
|
+
else {
|
|
539
|
+
outputSchema = nodeSchema.resultType;
|
|
540
|
+
// TODO (see type ComputeOutputSchema<>): Remove hacky logic and use HKT potentially
|
|
541
|
+
if (outputSchema === 'unknown') {
|
|
542
|
+
const bindingKeys = Object.keys(bindings);
|
|
543
|
+
if (bindingKeys.length === 1) {
|
|
544
|
+
const singleBinding = bindings[bindingKeys[0]];
|
|
545
|
+
if (singleBinding.outputSchema) {
|
|
546
|
+
outputSchema = singleBinding.outputSchema;
|
|
547
|
+
}
|
|
548
|
+
}
|
|
549
|
+
}
|
|
550
|
+
}
|
|
192
551
|
// Build node with fields
|
|
193
552
|
return {
|
|
194
553
|
node: nodeSchema.name,
|
|
195
|
-
outputSchema
|
|
554
|
+
outputSchema,
|
|
196
555
|
...fields,
|
|
197
556
|
};
|
|
198
557
|
}
|
|
@@ -269,3 +628,119 @@ export function parse(nodes, input, context) {
|
|
|
269
628
|
const grammar = buildGrammar(nodes);
|
|
270
629
|
return parseLevels(grammar, input, context, grammar);
|
|
271
630
|
}
|
|
631
|
+
// =============================================================================
|
|
632
|
+
// Enhanced Parse API with Error Information
|
|
633
|
+
// =============================================================================
|
|
634
|
+
import { noMatchError, emptyInputError, } from '../errors.js';
|
|
635
|
+
/**
|
|
636
|
+
* Parse input with rich error information.
|
|
637
|
+
*
|
|
638
|
+
* Unlike `parse()` which returns an empty array on failure, this function
|
|
639
|
+
* returns detailed error information including:
|
|
640
|
+
* - Position (line, column, offset)
|
|
641
|
+
* - Error message
|
|
642
|
+
* - Source snippet showing where the error occurred
|
|
643
|
+
*
|
|
644
|
+
* @example
|
|
645
|
+
* ```ts
|
|
646
|
+
* const result = parseWithErrors([add], "1 + ", context);
|
|
647
|
+
* if (!result.success) {
|
|
648
|
+
* console.log(result.error.message);
|
|
649
|
+
* // "No grammar rule matched at position 1:5: """
|
|
650
|
+
* console.log(result.error.snippet);
|
|
651
|
+
* // "1 + →"
|
|
652
|
+
* }
|
|
653
|
+
* ```
|
|
654
|
+
*/
|
|
655
|
+
export function parseWithErrors(nodes, input, context) {
|
|
656
|
+
// Handle empty/whitespace-only input
|
|
657
|
+
if (input.trim().length === 0) {
|
|
658
|
+
return {
|
|
659
|
+
success: false,
|
|
660
|
+
error: emptyInputError(input),
|
|
661
|
+
input,
|
|
662
|
+
};
|
|
663
|
+
}
|
|
664
|
+
const grammar = buildGrammar(nodes);
|
|
665
|
+
const result = parseLevels(grammar, input, context, grammar);
|
|
666
|
+
if (result.length === 0) {
|
|
667
|
+
// Parse failed - determine where it failed
|
|
668
|
+
// Try to find how far we got before failing
|
|
669
|
+
const failOffset = findFailureOffset(grammar, input, context);
|
|
670
|
+
return {
|
|
671
|
+
success: false,
|
|
672
|
+
error: noMatchError(input, failOffset),
|
|
673
|
+
input,
|
|
674
|
+
};
|
|
675
|
+
}
|
|
676
|
+
// Parse succeeded
|
|
677
|
+
return {
|
|
678
|
+
success: true,
|
|
679
|
+
ast: result[0],
|
|
680
|
+
remaining: result[1],
|
|
681
|
+
input,
|
|
682
|
+
};
|
|
683
|
+
}
|
|
684
|
+
/**
|
|
685
|
+
* Find the offset where parsing failed by tracking the furthest successful parse.
|
|
686
|
+
* This helps provide more accurate error positions.
|
|
687
|
+
*/
|
|
688
|
+
function findFailureOffset(grammar, input, context) {
|
|
689
|
+
// Start by trimming leading whitespace since the parser does this
|
|
690
|
+
const trimmed = input.replace(/^[\s]*/, '');
|
|
691
|
+
const leadingWs = input.length - trimmed.length;
|
|
692
|
+
if (trimmed.length === 0) {
|
|
693
|
+
return 0;
|
|
694
|
+
}
|
|
695
|
+
// Try to parse and track how far we get
|
|
696
|
+
// This is a simplified heuristic - in a more complex implementation,
|
|
697
|
+
// we would thread position tracking through all parse functions
|
|
698
|
+
let furthestOffset = leadingWs;
|
|
699
|
+
// Try to parse the first atom/expression
|
|
700
|
+
const result = parseLevels(grammar, trimmed, context, grammar);
|
|
701
|
+
if (result.length === 2) {
|
|
702
|
+
// We parsed something - the failure is after what we parsed
|
|
703
|
+
const parsedLength = trimmed.length - result[1].length;
|
|
704
|
+
furthestOffset = leadingWs + parsedLength;
|
|
705
|
+
// Check if there's unparsed content
|
|
706
|
+
const remaining = result[1].trim();
|
|
707
|
+
if (remaining.length > 0) {
|
|
708
|
+
// There's remaining unparsed content - that's where the error is
|
|
709
|
+
furthestOffset = input.length - result[1].trimStart().length;
|
|
710
|
+
}
|
|
711
|
+
}
|
|
712
|
+
return furthestOffset;
|
|
713
|
+
}
|
|
714
|
+
/**
|
|
715
|
+
* Format a parse error for display.
|
|
716
|
+
*
|
|
717
|
+
* @example
|
|
718
|
+
* ```ts
|
|
719
|
+
* const result = parseWithErrors([add], "1 + ", context);
|
|
720
|
+
* if (!result.success) {
|
|
721
|
+
* console.log(formatParseError(result.error));
|
|
722
|
+
* // Error at line 1, column 5:
|
|
723
|
+
* // No grammar rule matched at position 1:5: ""
|
|
724
|
+
* //
|
|
725
|
+
* // 1 + →
|
|
726
|
+
* }
|
|
727
|
+
* ```
|
|
728
|
+
*/
|
|
729
|
+
export function formatParseError(error) {
|
|
730
|
+
const { position, message, snippet } = error;
|
|
731
|
+
const lines = [];
|
|
732
|
+
lines.push(`Error at line ${position.line}, column ${position.column}:`);
|
|
733
|
+
lines.push(` ${message}`);
|
|
734
|
+
lines.push('');
|
|
735
|
+
lines.push(` ${snippet}`);
|
|
736
|
+
if (error.context) {
|
|
737
|
+
const ctx = error.context;
|
|
738
|
+
if (ctx.expected && ctx.actual) {
|
|
739
|
+
lines.push('');
|
|
740
|
+
lines.push(` Expected: ${ctx.expected}`);
|
|
741
|
+
lines.push(` Actual: ${ctx.actual}`);
|
|
742
|
+
}
|
|
743
|
+
}
|
|
744
|
+
return lines.join('\n');
|
|
745
|
+
}
|
|
746
|
+
//# sourceMappingURL=parser.js.map
|