stringent 0.0.1 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +96 -0
- package/dist/context.d.ts +45 -0
- package/dist/context.d.ts.map +1 -0
- package/dist/context.js +14 -0
- package/dist/context.js.map +1 -0
- package/dist/createParser.d.ts +159 -0
- package/dist/createParser.d.ts.map +1 -0
- package/dist/createParser.js +118 -0
- package/dist/createParser.js.map +1 -0
- package/dist/errors.d.ts +121 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +186 -0
- package/dist/errors.js.map +1 -0
- package/dist/grammar/index.d.ts +48 -0
- package/dist/grammar/index.d.ts.map +1 -0
- package/dist/grammar/index.js +13 -0
- package/dist/grammar/index.js.map +1 -0
- package/dist/index.d.ts +27 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +31 -0
- package/dist/index.js.map +1 -0
- package/dist/parse/index.d.ts +211 -0
- package/dist/parse/index.d.ts.map +1 -0
- package/dist/parse/index.js +16 -0
- package/dist/parse/index.js.map +1 -0
- package/dist/performance.bench.d.ts +10 -0
- package/dist/performance.bench.d.ts.map +1 -0
- package/dist/performance.bench.js +379 -0
- package/dist/performance.bench.js.map +1 -0
- package/dist/primitive/index.d.ts +96 -0
- package/dist/primitive/index.d.ts.map +1 -0
- package/dist/primitive/index.js +102 -0
- package/dist/primitive/index.js.map +1 -0
- package/dist/runtime/eval.d.ts +157 -0
- package/dist/runtime/eval.d.ts.map +1 -0
- package/dist/runtime/eval.js +206 -0
- package/dist/runtime/eval.js.map +1 -0
- package/dist/runtime/infer.d.ts +27 -0
- package/dist/runtime/infer.d.ts.map +1 -0
- package/dist/runtime/infer.js +35 -0
- package/dist/runtime/infer.js.map +1 -0
- package/dist/runtime/parser.d.ts +115 -0
- package/dist/runtime/parser.d.ts.map +1 -0
- package/dist/runtime/parser.js +746 -0
- package/dist/runtime/parser.js.map +1 -0
- package/dist/schema/index.d.ts +476 -0
- package/dist/schema/index.d.ts.map +1 -0
- package/dist/schema/index.js +137 -0
- package/dist/schema/index.js.map +1 -0
- package/dist/static/infer.d.ts +27 -0
- package/dist/static/infer.d.ts.map +1 -0
- package/dist/static/infer.js +10 -0
- package/dist/static/infer.js.map +1 -0
- package/package.json +62 -8
|
@@ -0,0 +1,746 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Runtime Parser
|
|
3
|
+
*
|
|
4
|
+
* Mirrors the type-level Parse<Grammar, Input, Context> at runtime.
|
|
5
|
+
* Uses the same precedence-based parsing strategy:
|
|
6
|
+
* 1. Try operators at current level (lowest precedence first)
|
|
7
|
+
* 2. Fall back to next level (higher precedence)
|
|
8
|
+
* 3. Base case: try atoms (last level)
|
|
9
|
+
*/
|
|
10
|
+
import { Token } from '@sinclair/parsebox';
|
|
11
|
+
import { defineNode, number, string, ident, constVal, expr, nullLiteral, booleanLiteral, undefinedLiteral, } from '../schema/index.js';
|
|
12
|
+
// =============================================================================
|
|
13
|
+
// Built-in Atoms
|
|
14
|
+
// =============================================================================
|
|
15
|
+
/**
|
|
16
|
+
* Built-in atom schemas.
|
|
17
|
+
* These are always appended as the last level of the grammar.
|
|
18
|
+
* Users don't need to define these - they're provided automatically.
|
|
19
|
+
*/
|
|
20
|
+
/**
|
|
21
|
+
* Precedence for built-in atoms.
|
|
22
|
+
* Atoms are precedence 0 (base case), operators have precedence 1, 2, 3, etc.
|
|
23
|
+
* Note: Atoms are appended separately, so this value isn't used in sorting.
|
|
24
|
+
*/
|
|
25
|
+
const ATOM_PRECEDENCE = 0;
|
|
26
|
+
/** Number literal atom - matches numeric literals */
|
|
27
|
+
const numberLiteral = defineNode({
|
|
28
|
+
name: 'numberLiteral',
|
|
29
|
+
pattern: [number()],
|
|
30
|
+
precedence: ATOM_PRECEDENCE,
|
|
31
|
+
resultType: 'number',
|
|
32
|
+
});
|
|
33
|
+
/** String literal atom - matches strings with " or ' quotes */
|
|
34
|
+
const stringLiteral = defineNode({
|
|
35
|
+
name: 'stringLiteral',
|
|
36
|
+
pattern: [string(['"', "'"])],
|
|
37
|
+
precedence: ATOM_PRECEDENCE,
|
|
38
|
+
resultType: 'string',
|
|
39
|
+
});
|
|
40
|
+
/** Identifier atom - matches identifiers */
|
|
41
|
+
const identifierAtom = defineNode({
|
|
42
|
+
name: 'identifier',
|
|
43
|
+
pattern: [ident()],
|
|
44
|
+
precedence: ATOM_PRECEDENCE,
|
|
45
|
+
resultType: 'unknown',
|
|
46
|
+
});
|
|
47
|
+
/** Parentheses atom - matches ( expr ) for grouping */
|
|
48
|
+
const parentheses = defineNode({
|
|
49
|
+
name: 'parentheses',
|
|
50
|
+
pattern: [constVal('('), expr().as('inner'), constVal(')')],
|
|
51
|
+
precedence: ATOM_PRECEDENCE,
|
|
52
|
+
resultType: 'unknown',
|
|
53
|
+
});
|
|
54
|
+
/** Null literal atom - matches the keyword null */
|
|
55
|
+
const nullAtom = defineNode({
|
|
56
|
+
name: 'nullLiteral',
|
|
57
|
+
pattern: [nullLiteral()],
|
|
58
|
+
precedence: ATOM_PRECEDENCE,
|
|
59
|
+
resultType: 'null',
|
|
60
|
+
});
|
|
61
|
+
/** Boolean literal atom - matches true or false */
|
|
62
|
+
const booleanAtom = defineNode({
|
|
63
|
+
name: 'booleanLiteral',
|
|
64
|
+
pattern: [booleanLiteral()],
|
|
65
|
+
precedence: ATOM_PRECEDENCE,
|
|
66
|
+
resultType: 'boolean',
|
|
67
|
+
});
|
|
68
|
+
/** Undefined literal atom - matches the keyword undefined */
|
|
69
|
+
const undefinedAtom = defineNode({
|
|
70
|
+
name: 'undefinedLiteral',
|
|
71
|
+
pattern: [undefinedLiteral()],
|
|
72
|
+
precedence: ATOM_PRECEDENCE,
|
|
73
|
+
resultType: 'undefined',
|
|
74
|
+
});
|
|
75
|
+
/** All built-in atoms, used as the last level of the grammar */
|
|
76
|
+
// Note: Keyword literals (null, true, false, undefined) must come BEFORE
|
|
77
|
+
// identifierAtom to ensure they're matched correctly rather than as identifiers
|
|
78
|
+
export const BUILT_IN_ATOMS = [
|
|
79
|
+
numberLiteral,
|
|
80
|
+
stringLiteral,
|
|
81
|
+
nullAtom,
|
|
82
|
+
booleanAtom,
|
|
83
|
+
undefinedAtom,
|
|
84
|
+
identifierAtom,
|
|
85
|
+
parentheses,
|
|
86
|
+
];
|
|
87
|
+
// =============================================================================
|
|
88
|
+
// Primitive Parsers
|
|
89
|
+
// =============================================================================
|
|
90
|
+
function parseNumber(input) {
|
|
91
|
+
const result = Token.Number(input);
|
|
92
|
+
if (result.length === 0)
|
|
93
|
+
return [];
|
|
94
|
+
return [
|
|
95
|
+
{
|
|
96
|
+
node: 'literal',
|
|
97
|
+
raw: result[0],
|
|
98
|
+
value: +result[0],
|
|
99
|
+
outputSchema: 'number',
|
|
100
|
+
},
|
|
101
|
+
result[1],
|
|
102
|
+
];
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Process escape sequences in a string.
|
|
106
|
+
* Supports: \n, \t, \r, \\, \", \', \0, \b, \f, \v, \xHH, \uHHHH
|
|
107
|
+
*
|
|
108
|
+
* @param str - The raw string with escape sequences
|
|
109
|
+
* @returns The processed string with escape sequences converted
|
|
110
|
+
*/
|
|
111
|
+
export function processEscapeSequences(str) {
|
|
112
|
+
let result = '';
|
|
113
|
+
let i = 0;
|
|
114
|
+
while (i < str.length) {
|
|
115
|
+
if (str[i] === '\\') {
|
|
116
|
+
if (i + 1 >= str.length) {
|
|
117
|
+
// Trailing backslash - keep as-is
|
|
118
|
+
result += '\\';
|
|
119
|
+
i++;
|
|
120
|
+
continue;
|
|
121
|
+
}
|
|
122
|
+
const next = str[i + 1];
|
|
123
|
+
switch (next) {
|
|
124
|
+
case 'n':
|
|
125
|
+
result += '\n';
|
|
126
|
+
i += 2;
|
|
127
|
+
break;
|
|
128
|
+
case 't':
|
|
129
|
+
result += '\t';
|
|
130
|
+
i += 2;
|
|
131
|
+
break;
|
|
132
|
+
case 'r':
|
|
133
|
+
result += '\r';
|
|
134
|
+
i += 2;
|
|
135
|
+
break;
|
|
136
|
+
case '\\':
|
|
137
|
+
result += '\\';
|
|
138
|
+
i += 2;
|
|
139
|
+
break;
|
|
140
|
+
case '"':
|
|
141
|
+
result += '"';
|
|
142
|
+
i += 2;
|
|
143
|
+
break;
|
|
144
|
+
case "'":
|
|
145
|
+
result += "'";
|
|
146
|
+
i += 2;
|
|
147
|
+
break;
|
|
148
|
+
case '0':
|
|
149
|
+
result += '\0';
|
|
150
|
+
i += 2;
|
|
151
|
+
break;
|
|
152
|
+
case 'b':
|
|
153
|
+
result += '\b';
|
|
154
|
+
i += 2;
|
|
155
|
+
break;
|
|
156
|
+
case 'f':
|
|
157
|
+
result += '\f';
|
|
158
|
+
i += 2;
|
|
159
|
+
break;
|
|
160
|
+
case 'v':
|
|
161
|
+
result += '\v';
|
|
162
|
+
i += 2;
|
|
163
|
+
break;
|
|
164
|
+
case 'x': {
|
|
165
|
+
// \xHH - two hex digits
|
|
166
|
+
if (i + 3 < str.length) {
|
|
167
|
+
const hex = str.slice(i + 2, i + 4);
|
|
168
|
+
if (/^[0-9a-fA-F]{2}$/.test(hex)) {
|
|
169
|
+
result += String.fromCharCode(parseInt(hex, 16));
|
|
170
|
+
i += 4;
|
|
171
|
+
break;
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
// Invalid \x escape - keep as-is
|
|
175
|
+
result += '\\x';
|
|
176
|
+
i += 2;
|
|
177
|
+
break;
|
|
178
|
+
}
|
|
179
|
+
case 'u': {
|
|
180
|
+
// \uHHHH - four hex digits
|
|
181
|
+
if (i + 5 < str.length) {
|
|
182
|
+
const hex = str.slice(i + 2, i + 6);
|
|
183
|
+
if (/^[0-9a-fA-F]{4}$/.test(hex)) {
|
|
184
|
+
result += String.fromCharCode(parseInt(hex, 16));
|
|
185
|
+
i += 6;
|
|
186
|
+
break;
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
// Invalid \u escape - keep as-is
|
|
190
|
+
result += '\\u';
|
|
191
|
+
i += 2;
|
|
192
|
+
break;
|
|
193
|
+
}
|
|
194
|
+
default:
|
|
195
|
+
// Unknown escape - keep backslash and character
|
|
196
|
+
result += '\\' + next;
|
|
197
|
+
i += 2;
|
|
198
|
+
break;
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
else {
|
|
202
|
+
result += str[i];
|
|
203
|
+
i++;
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
return result;
|
|
207
|
+
}
|
|
208
|
+
/**
|
|
209
|
+
* Parse a string literal with proper escape sequence handling.
|
|
210
|
+
* Unlike Token.String, this parser correctly handles escaped quotes within strings.
|
|
211
|
+
*/
|
|
212
|
+
function parseStringLiteral(quotes, input) {
|
|
213
|
+
// Trim leading whitespace
|
|
214
|
+
const trimmed = input.replace(/^[\s]*/, '');
|
|
215
|
+
if (trimmed.length === 0)
|
|
216
|
+
return [];
|
|
217
|
+
// Check for opening quote
|
|
218
|
+
const openQuote = quotes.find((q) => trimmed.startsWith(q));
|
|
219
|
+
if (!openQuote)
|
|
220
|
+
return [];
|
|
221
|
+
// Find closing quote, respecting escape sequences
|
|
222
|
+
let i = openQuote.length;
|
|
223
|
+
let rawContent = '';
|
|
224
|
+
while (i < trimmed.length) {
|
|
225
|
+
const char = trimmed[i];
|
|
226
|
+
// Check for escape sequence
|
|
227
|
+
if (char === '\\') {
|
|
228
|
+
if (i + 1 < trimmed.length) {
|
|
229
|
+
// Include both the backslash and the escaped character in raw content
|
|
230
|
+
rawContent += char + trimmed[i + 1];
|
|
231
|
+
i += 2;
|
|
232
|
+
continue;
|
|
233
|
+
}
|
|
234
|
+
else {
|
|
235
|
+
// Trailing backslash - include it
|
|
236
|
+
rawContent += char;
|
|
237
|
+
i++;
|
|
238
|
+
continue;
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
// Check for closing quote
|
|
242
|
+
if (char === openQuote) {
|
|
243
|
+
return [rawContent, trimmed.slice(i + openQuote.length)];
|
|
244
|
+
}
|
|
245
|
+
// Regular character
|
|
246
|
+
rawContent += char;
|
|
247
|
+
i++;
|
|
248
|
+
}
|
|
249
|
+
// Unterminated string
|
|
250
|
+
return [];
|
|
251
|
+
}
|
|
252
|
+
function parseString(quotes, input) {
|
|
253
|
+
const result = parseStringLiteral(quotes, input);
|
|
254
|
+
if (result.length === 0)
|
|
255
|
+
return [];
|
|
256
|
+
const rawValue = result[0];
|
|
257
|
+
const processedValue = processEscapeSequences(rawValue);
|
|
258
|
+
return [
|
|
259
|
+
{
|
|
260
|
+
node: 'literal',
|
|
261
|
+
raw: rawValue,
|
|
262
|
+
value: processedValue,
|
|
263
|
+
outputSchema: 'string',
|
|
264
|
+
},
|
|
265
|
+
result[1],
|
|
266
|
+
];
|
|
267
|
+
}
|
|
268
|
+
function parseIdent(input, context) {
|
|
269
|
+
const result = Token.Ident(input);
|
|
270
|
+
if (result.length === 0)
|
|
271
|
+
return [];
|
|
272
|
+
const name = result[0];
|
|
273
|
+
const valueType = name in context.data ? context.data[name] : 'unknown';
|
|
274
|
+
return [
|
|
275
|
+
{ node: 'identifier', name, outputSchema: valueType },
|
|
276
|
+
result[1],
|
|
277
|
+
];
|
|
278
|
+
}
|
|
279
|
+
function parseConst(value, input) {
|
|
280
|
+
const result = Token.Const(value, input);
|
|
281
|
+
if (result.length === 0)
|
|
282
|
+
return [];
|
|
283
|
+
return [{ node: 'const', outputSchema: JSON.stringify(value) }, result[1]];
|
|
284
|
+
}
|
|
285
|
+
function parseNull(input) {
|
|
286
|
+
const result = Token.Const('null', input);
|
|
287
|
+
if (result.length === 0)
|
|
288
|
+
return [];
|
|
289
|
+
// Ensure it's not part of a longer identifier (e.g., "nullable")
|
|
290
|
+
const remaining = result[1];
|
|
291
|
+
if (remaining.length > 0 && /^[a-zA-Z0-9_$]/.test(remaining)) {
|
|
292
|
+
return [];
|
|
293
|
+
}
|
|
294
|
+
return [
|
|
295
|
+
{
|
|
296
|
+
node: 'literal',
|
|
297
|
+
raw: 'null',
|
|
298
|
+
value: null,
|
|
299
|
+
outputSchema: 'null',
|
|
300
|
+
},
|
|
301
|
+
remaining,
|
|
302
|
+
];
|
|
303
|
+
}
|
|
304
|
+
function parseBoolean(input) {
|
|
305
|
+
// Try "true" first
|
|
306
|
+
let result = Token.Const('true', input);
|
|
307
|
+
if (result.length === 2) {
|
|
308
|
+
const remaining = result[1];
|
|
309
|
+
// Ensure it's not part of a longer identifier (e.g., "trueName")
|
|
310
|
+
if (remaining.length === 0 || !/^[a-zA-Z0-9_$]/.test(remaining)) {
|
|
311
|
+
return [
|
|
312
|
+
{
|
|
313
|
+
node: 'literal',
|
|
314
|
+
raw: 'true',
|
|
315
|
+
value: true,
|
|
316
|
+
outputSchema: 'boolean',
|
|
317
|
+
},
|
|
318
|
+
remaining,
|
|
319
|
+
];
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
// Try "false"
|
|
323
|
+
result = Token.Const('false', input);
|
|
324
|
+
if (result.length === 2) {
|
|
325
|
+
const remaining = result[1];
|
|
326
|
+
// Ensure it's not part of a longer identifier (e.g., "falsePositive")
|
|
327
|
+
if (remaining.length === 0 || !/^[a-zA-Z0-9_$]/.test(remaining)) {
|
|
328
|
+
return [
|
|
329
|
+
{
|
|
330
|
+
node: 'literal',
|
|
331
|
+
raw: 'false',
|
|
332
|
+
value: false,
|
|
333
|
+
outputSchema: 'boolean',
|
|
334
|
+
},
|
|
335
|
+
remaining,
|
|
336
|
+
];
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
return [];
|
|
340
|
+
}
|
|
341
|
+
function parseUndefined(input) {
|
|
342
|
+
const result = Token.Const('undefined', input);
|
|
343
|
+
if (result.length === 0)
|
|
344
|
+
return [];
|
|
345
|
+
// Ensure it's not part of a longer identifier (e.g., "undefinedVar")
|
|
346
|
+
const remaining = result[1];
|
|
347
|
+
if (remaining.length > 0 && /^[a-zA-Z0-9_$]/.test(remaining)) {
|
|
348
|
+
return [];
|
|
349
|
+
}
|
|
350
|
+
return [
|
|
351
|
+
{
|
|
352
|
+
node: 'literal',
|
|
353
|
+
raw: 'undefined',
|
|
354
|
+
value: undefined,
|
|
355
|
+
outputSchema: 'undefined',
|
|
356
|
+
},
|
|
357
|
+
remaining,
|
|
358
|
+
];
|
|
359
|
+
}
|
|
360
|
+
// =============================================================================
|
|
361
|
+
// Build Runtime Grammar from Node Schemas
|
|
362
|
+
// =============================================================================
|
|
363
|
+
/**
|
|
364
|
+
* Build runtime grammar from operator schemas.
|
|
365
|
+
*
|
|
366
|
+
* Returns a flat tuple of levels:
|
|
367
|
+
* [[ops@prec1], [ops@prec2], ..., [builtInAtoms]]
|
|
368
|
+
*
|
|
369
|
+
* Operators are sorted by precedence ascending (lowest first).
|
|
370
|
+
* Built-in atoms are always appended as the last level.
|
|
371
|
+
*/
|
|
372
|
+
export function buildGrammar(operators) {
|
|
373
|
+
const operatorsByPrec = new Map();
|
|
374
|
+
const operatorsAndPrimitives = [...operators];
|
|
375
|
+
for (const op of operatorsAndPrimitives) {
|
|
376
|
+
const prec = op.precedence;
|
|
377
|
+
const ops = operatorsByPrec.get(prec) ?? [];
|
|
378
|
+
operatorsByPrec.set(prec, ops);
|
|
379
|
+
ops.push(op);
|
|
380
|
+
}
|
|
381
|
+
// Sort precedences ascending
|
|
382
|
+
const precedences = [...operatorsByPrec.keys()].sort((a, b) => a - b);
|
|
383
|
+
// Build flat grammar: [[ops@prec1], [ops@prec2], ..., [builtInAtoms]]
|
|
384
|
+
const grammar = [];
|
|
385
|
+
for (const prec of precedences) {
|
|
386
|
+
grammar.push(operatorsByPrec.get(prec) ?? []);
|
|
387
|
+
}
|
|
388
|
+
// Append built-in atoms as the last level
|
|
389
|
+
grammar.push(BUILT_IN_ATOMS);
|
|
390
|
+
return grammar;
|
|
391
|
+
}
|
|
392
|
+
// =============================================================================
|
|
393
|
+
// Pattern Element Parsing
|
|
394
|
+
// =============================================================================
|
|
395
|
+
/**
|
|
396
|
+
* Parse a single pattern element (non-Expr).
|
|
397
|
+
*/
|
|
398
|
+
function parseElement(element, input, context) {
|
|
399
|
+
switch (element.kind) {
|
|
400
|
+
case 'number':
|
|
401
|
+
return parseNumber(input);
|
|
402
|
+
case 'string':
|
|
403
|
+
return parseString(element.quotes, input);
|
|
404
|
+
case 'ident':
|
|
405
|
+
return parseIdent(input, context);
|
|
406
|
+
case 'const':
|
|
407
|
+
return parseConst(element.value, input);
|
|
408
|
+
case 'null':
|
|
409
|
+
return parseNull(input);
|
|
410
|
+
case 'boolean':
|
|
411
|
+
return parseBoolean(input);
|
|
412
|
+
case 'undefined':
|
|
413
|
+
return parseUndefined(input);
|
|
414
|
+
default:
|
|
415
|
+
return [];
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
/**
|
|
419
|
+
* Parse an expression element based on its role.
|
|
420
|
+
*
|
|
421
|
+
* Role determines which grammar slice is used:
|
|
422
|
+
* - "lhs": nextLevels (avoids left-recursion)
|
|
423
|
+
* - "rhs": currentLevels (maintains precedence, enables right-associativity)
|
|
424
|
+
* - "expr": fullGrammar (full reset for delimited contexts)
|
|
425
|
+
*/
|
|
426
|
+
function parseElementWithLevel(element, input, context, currentLevels, nextLevels, fullGrammar) {
|
|
427
|
+
if (element.kind === 'expr') {
|
|
428
|
+
const exprElement = element;
|
|
429
|
+
const constraint = exprElement.constraint;
|
|
430
|
+
const role = exprElement.role;
|
|
431
|
+
if (role === 'lhs') {
|
|
432
|
+
return parseExprWithConstraint(nextLevels, input, context, constraint, fullGrammar);
|
|
433
|
+
}
|
|
434
|
+
else if (role === 'rhs') {
|
|
435
|
+
return parseExprWithConstraint(currentLevels, input, context, constraint, fullGrammar);
|
|
436
|
+
}
|
|
437
|
+
else {
|
|
438
|
+
return parseExprWithConstraint(fullGrammar, input, context, constraint, fullGrammar);
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
return parseElement(element, input, context);
|
|
442
|
+
}
|
|
443
|
+
/**
|
|
444
|
+
* Parse a pattern tuple.
|
|
445
|
+
*/
|
|
446
|
+
function parsePatternTuple(pattern, input, context, currentLevels, nextLevels, fullGrammar) {
|
|
447
|
+
let remaining = input;
|
|
448
|
+
const children = [];
|
|
449
|
+
for (const element of pattern) {
|
|
450
|
+
const result = parseElementWithLevel(element, remaining, context, currentLevels, nextLevels, fullGrammar);
|
|
451
|
+
if (result.length === 0)
|
|
452
|
+
return [];
|
|
453
|
+
children.push(result[0]);
|
|
454
|
+
remaining = result[1];
|
|
455
|
+
}
|
|
456
|
+
return [children, remaining];
|
|
457
|
+
}
|
|
458
|
+
/**
|
|
459
|
+
* Extract named bindings from pattern and children.
|
|
460
|
+
* Only includes children where the pattern element has .as(name).
|
|
461
|
+
*/
|
|
462
|
+
function extractBindings(pattern, children) {
|
|
463
|
+
const bindings = {};
|
|
464
|
+
for (let i = 0; i < pattern.length; i++) {
|
|
465
|
+
const element = pattern[i];
|
|
466
|
+
const child = children[i];
|
|
467
|
+
// Check if element is a NamedSchema (has __named and name properties)
|
|
468
|
+
if ('__named' in element && element.__named === true) {
|
|
469
|
+
bindings[element.name] = child;
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
return bindings;
|
|
473
|
+
}
|
|
474
|
+
/**
|
|
475
|
+
* Build AST node from parsed children.
|
|
476
|
+
*
|
|
477
|
+
* Uses named bindings from .as() to determine node fields.
|
|
478
|
+
* - Single child without names: passthrough (atom behavior)
|
|
479
|
+
* - If configure() provided: transform bindings to fields
|
|
480
|
+
* - Otherwise: bindings become node fields directly
|
|
481
|
+
*
|
|
482
|
+
* Special case: If resultType is "unknown" and there's a single expr binding,
|
|
483
|
+
* we propagate that binding's outputSchema (for generic parentheses, etc.).
|
|
484
|
+
*/
|
|
485
|
+
/**
|
|
486
|
+
* Helper: Check if resultType is a UnionResultType (computed union).
|
|
487
|
+
*/
|
|
488
|
+
function isUnionResultType(resultType) {
|
|
489
|
+
return typeof resultType === 'object' && resultType !== null && 'union' in resultType;
|
|
490
|
+
}
|
|
491
|
+
/**
|
|
492
|
+
* Helper: Compute the union outputSchema string from multiple bindings.
|
|
493
|
+
* Given a list of binding names, extracts each binding's outputSchema and
|
|
494
|
+
* constructs a union string like "boolean | number".
|
|
495
|
+
*
|
|
496
|
+
* @example
|
|
497
|
+
* // bindings = { then: { outputSchema: 'boolean' }, else: { outputSchema: 'number' } }
|
|
498
|
+
* // names = ['then', 'else']
|
|
499
|
+
* // result = 'boolean | number'
|
|
500
|
+
*/
|
|
501
|
+
function computeUnionOutputSchema(bindings, names) {
|
|
502
|
+
const schemas = [];
|
|
503
|
+
for (const name of names) {
|
|
504
|
+
const binding = bindings[name];
|
|
505
|
+
if (binding?.outputSchema && binding.outputSchema !== 'unknown') {
|
|
506
|
+
// Only add unique schemas
|
|
507
|
+
if (!schemas.includes(binding.outputSchema)) {
|
|
508
|
+
schemas.push(binding.outputSchema);
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
if (schemas.length === 0) {
|
|
513
|
+
return 'unknown';
|
|
514
|
+
}
|
|
515
|
+
if (schemas.length === 1) {
|
|
516
|
+
return schemas[0];
|
|
517
|
+
}
|
|
518
|
+
// Sort for consistency and join with ' | '
|
|
519
|
+
return schemas.sort().join(' | ');
|
|
520
|
+
}
|
|
521
|
+
function buildNodeResult(nodeSchema, children, context) {
|
|
522
|
+
const bindings = extractBindings(nodeSchema.pattern, children);
|
|
523
|
+
// Single unnamed child → passthrough (atom behavior)
|
|
524
|
+
if (Object.keys(bindings).length === 0 && children.length === 1) {
|
|
525
|
+
return children[0];
|
|
526
|
+
}
|
|
527
|
+
// Apply configure() if provided, otherwise use bindings directly
|
|
528
|
+
const fields = nodeSchema.configure ? nodeSchema.configure(bindings, context) : bindings;
|
|
529
|
+
// Determine output schema:
|
|
530
|
+
// - If resultType is a UnionResultType, compute the union from the specified bindings
|
|
531
|
+
// - If resultType is "unknown" and there's a single expr binding, use its outputSchema
|
|
532
|
+
// - Otherwise use the node's static resultType
|
|
533
|
+
let outputSchema;
|
|
534
|
+
if (isUnionResultType(nodeSchema.resultType)) {
|
|
535
|
+
// Computed union: extract schemas from named bindings and join with ' | '
|
|
536
|
+
outputSchema = computeUnionOutputSchema(bindings, nodeSchema.resultType.union);
|
|
537
|
+
}
|
|
538
|
+
else {
|
|
539
|
+
outputSchema = nodeSchema.resultType;
|
|
540
|
+
// TODO (see type ComputeOutputSchema<>): Remove hacky logic and use HKT potentially
|
|
541
|
+
if (outputSchema === 'unknown') {
|
|
542
|
+
const bindingKeys = Object.keys(bindings);
|
|
543
|
+
if (bindingKeys.length === 1) {
|
|
544
|
+
const singleBinding = bindings[bindingKeys[0]];
|
|
545
|
+
if (singleBinding.outputSchema) {
|
|
546
|
+
outputSchema = singleBinding.outputSchema;
|
|
547
|
+
}
|
|
548
|
+
}
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
// Build node with fields
|
|
552
|
+
return {
|
|
553
|
+
node: nodeSchema.name,
|
|
554
|
+
outputSchema,
|
|
555
|
+
...fields,
|
|
556
|
+
};
|
|
557
|
+
}
|
|
558
|
+
/**
|
|
559
|
+
* Parse a node pattern.
|
|
560
|
+
*/
|
|
561
|
+
function parseNodePattern(node, input, context, currentLevels, nextLevels, fullGrammar) {
|
|
562
|
+
const result = parsePatternTuple(node.pattern, input, context, currentLevels, nextLevels, fullGrammar);
|
|
563
|
+
if (result.length === 0)
|
|
564
|
+
return [];
|
|
565
|
+
return [buildNodeResult(node, result[0], context), result[1]];
|
|
566
|
+
}
|
|
567
|
+
/**
|
|
568
|
+
* Parse with expression constraint check.
|
|
569
|
+
*/
|
|
570
|
+
function parseExprWithConstraint(startLevels, input, context, constraint, fullGrammar) {
|
|
571
|
+
const result = parseLevels(startLevels, input, context, fullGrammar);
|
|
572
|
+
if (result.length === 0)
|
|
573
|
+
return [];
|
|
574
|
+
const [node, remaining] = result;
|
|
575
|
+
if (constraint !== undefined) {
|
|
576
|
+
const nodeOutputSchema = node.outputSchema;
|
|
577
|
+
if (nodeOutputSchema !== constraint) {
|
|
578
|
+
return [];
|
|
579
|
+
}
|
|
580
|
+
}
|
|
581
|
+
return [node, remaining];
|
|
582
|
+
}
|
|
583
|
+
/**
|
|
584
|
+
* Try parsing each node in a level.
|
|
585
|
+
*/
|
|
586
|
+
function parseNodes(nodes, input, context, currentLevels, nextLevels, fullGrammar) {
|
|
587
|
+
for (const node of nodes) {
|
|
588
|
+
const result = parseNodePattern(node, input, context, currentLevels, nextLevels, fullGrammar);
|
|
589
|
+
if (result.length === 2)
|
|
590
|
+
return result;
|
|
591
|
+
}
|
|
592
|
+
return [];
|
|
593
|
+
}
|
|
594
|
+
/**
|
|
595
|
+
* Parse using grammar levels (flat tuple).
|
|
596
|
+
*
|
|
597
|
+
* levels[0] is current level, levels[1:] is next levels.
|
|
598
|
+
* Base case: single level (atoms) - just try those nodes.
|
|
599
|
+
*/
|
|
600
|
+
function parseLevels(levels, input, context, fullGrammar) {
|
|
601
|
+
if (levels.length === 0) {
|
|
602
|
+
return [];
|
|
603
|
+
}
|
|
604
|
+
const currentNodes = levels[0];
|
|
605
|
+
const nextLevels = levels.slice(1);
|
|
606
|
+
// Try nodes at current level
|
|
607
|
+
const result = parseNodes(currentNodes, input, context, levels, nextLevels, fullGrammar);
|
|
608
|
+
if (result.length === 2) {
|
|
609
|
+
return result;
|
|
610
|
+
}
|
|
611
|
+
// Fall through to next levels (if any)
|
|
612
|
+
if (nextLevels.length > 0) {
|
|
613
|
+
return parseLevels(nextLevels, input, context, fullGrammar);
|
|
614
|
+
}
|
|
615
|
+
return [];
|
|
616
|
+
}
|
|
617
|
+
// =============================================================================
|
|
618
|
+
// Public API
|
|
619
|
+
// =============================================================================
|
|
620
|
+
/**
|
|
621
|
+
* Parse input string using node schemas.
|
|
622
|
+
*
|
|
623
|
+
* The return type is computed from the input types using the type-level
|
|
624
|
+
* Parse<Grammar, Input, Context> type, ensuring runtime and type-level
|
|
625
|
+
* parsing stay in sync.
|
|
626
|
+
*/
|
|
627
|
+
export function parse(nodes, input, context) {
|
|
628
|
+
const grammar = buildGrammar(nodes);
|
|
629
|
+
return parseLevels(grammar, input, context, grammar);
|
|
630
|
+
}
|
|
631
|
+
// =============================================================================
|
|
632
|
+
// Enhanced Parse API with Error Information
|
|
633
|
+
// =============================================================================
|
|
634
|
+
import { noMatchError, emptyInputError, } from '../errors.js';
|
|
635
|
+
/**
|
|
636
|
+
* Parse input with rich error information.
|
|
637
|
+
*
|
|
638
|
+
* Unlike `parse()` which returns an empty array on failure, this function
|
|
639
|
+
* returns detailed error information including:
|
|
640
|
+
* - Position (line, column, offset)
|
|
641
|
+
* - Error message
|
|
642
|
+
* - Source snippet showing where the error occurred
|
|
643
|
+
*
|
|
644
|
+
* @example
|
|
645
|
+
* ```ts
|
|
646
|
+
* const result = parseWithErrors([add], "1 + ", context);
|
|
647
|
+
* if (!result.success) {
|
|
648
|
+
* console.log(result.error.message);
|
|
649
|
+
* // "No grammar rule matched at position 1:5: """
|
|
650
|
+
* console.log(result.error.snippet);
|
|
651
|
+
* // "1 + →"
|
|
652
|
+
* }
|
|
653
|
+
* ```
|
|
654
|
+
*/
|
|
655
|
+
export function parseWithErrors(nodes, input, context) {
|
|
656
|
+
// Handle empty/whitespace-only input
|
|
657
|
+
if (input.trim().length === 0) {
|
|
658
|
+
return {
|
|
659
|
+
success: false,
|
|
660
|
+
error: emptyInputError(input),
|
|
661
|
+
input,
|
|
662
|
+
};
|
|
663
|
+
}
|
|
664
|
+
const grammar = buildGrammar(nodes);
|
|
665
|
+
const result = parseLevels(grammar, input, context, grammar);
|
|
666
|
+
if (result.length === 0) {
|
|
667
|
+
// Parse failed - determine where it failed
|
|
668
|
+
// Try to find how far we got before failing
|
|
669
|
+
const failOffset = findFailureOffset(grammar, input, context);
|
|
670
|
+
return {
|
|
671
|
+
success: false,
|
|
672
|
+
error: noMatchError(input, failOffset),
|
|
673
|
+
input,
|
|
674
|
+
};
|
|
675
|
+
}
|
|
676
|
+
// Parse succeeded
|
|
677
|
+
return {
|
|
678
|
+
success: true,
|
|
679
|
+
ast: result[0],
|
|
680
|
+
remaining: result[1],
|
|
681
|
+
input,
|
|
682
|
+
};
|
|
683
|
+
}
|
|
684
|
+
/**
|
|
685
|
+
* Find the offset where parsing failed by tracking the furthest successful parse.
|
|
686
|
+
* This helps provide more accurate error positions.
|
|
687
|
+
*/
|
|
688
|
+
function findFailureOffset(grammar, input, context) {
|
|
689
|
+
// Start by trimming leading whitespace since the parser does this
|
|
690
|
+
const trimmed = input.replace(/^[\s]*/, '');
|
|
691
|
+
const leadingWs = input.length - trimmed.length;
|
|
692
|
+
if (trimmed.length === 0) {
|
|
693
|
+
return 0;
|
|
694
|
+
}
|
|
695
|
+
// Try to parse and track how far we get
|
|
696
|
+
// This is a simplified heuristic - in a more complex implementation,
|
|
697
|
+
// we would thread position tracking through all parse functions
|
|
698
|
+
let furthestOffset = leadingWs;
|
|
699
|
+
// Try to parse the first atom/expression
|
|
700
|
+
const result = parseLevels(grammar, trimmed, context, grammar);
|
|
701
|
+
if (result.length === 2) {
|
|
702
|
+
// We parsed something - the failure is after what we parsed
|
|
703
|
+
const parsedLength = trimmed.length - result[1].length;
|
|
704
|
+
furthestOffset = leadingWs + parsedLength;
|
|
705
|
+
// Check if there's unparsed content
|
|
706
|
+
const remaining = result[1].trim();
|
|
707
|
+
if (remaining.length > 0) {
|
|
708
|
+
// There's remaining unparsed content - that's where the error is
|
|
709
|
+
furthestOffset = input.length - result[1].trimStart().length;
|
|
710
|
+
}
|
|
711
|
+
}
|
|
712
|
+
return furthestOffset;
|
|
713
|
+
}
|
|
714
|
+
/**
|
|
715
|
+
* Format a parse error for display.
|
|
716
|
+
*
|
|
717
|
+
* @example
|
|
718
|
+
* ```ts
|
|
719
|
+
* const result = parseWithErrors([add], "1 + ", context);
|
|
720
|
+
* if (!result.success) {
|
|
721
|
+
* console.log(formatParseError(result.error));
|
|
722
|
+
* // Error at line 1, column 5:
|
|
723
|
+
* // No grammar rule matched at position 1:5: ""
|
|
724
|
+
* //
|
|
725
|
+
* // 1 + →
|
|
726
|
+
* }
|
|
727
|
+
* ```
|
|
728
|
+
*/
|
|
729
|
+
export function formatParseError(error) {
|
|
730
|
+
const { position, message, snippet } = error;
|
|
731
|
+
const lines = [];
|
|
732
|
+
lines.push(`Error at line ${position.line}, column ${position.column}:`);
|
|
733
|
+
lines.push(` ${message}`);
|
|
734
|
+
lines.push('');
|
|
735
|
+
lines.push(` ${snippet}`);
|
|
736
|
+
if (error.context) {
|
|
737
|
+
const ctx = error.context;
|
|
738
|
+
if (ctx.expected && ctx.actual) {
|
|
739
|
+
lines.push('');
|
|
740
|
+
lines.push(` Expected: ${ctx.expected}`);
|
|
741
|
+
lines.push(` Actual: ${ctx.actual}`);
|
|
742
|
+
}
|
|
743
|
+
}
|
|
744
|
+
return lines.join('\n');
|
|
745
|
+
}
|
|
746
|
+
//# sourceMappingURL=parser.js.map
|