npm - stringent - Versions diffs - 0.0.2 → 0.0.4 - Mend

stringent 0.0.2 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

package/README.md +61 -73
package/dist/context.d.ts +20 -2
package/dist/context.d.ts.map +1 -0
package/dist/context.js +1 -0
package/dist/context.js.map +1 -0
package/dist/createParser.d.ts +109 -26
package/dist/createParser.d.ts.map +1 -0
package/dist/createParser.js +80 -19
package/dist/createParser.js.map +1 -0
package/dist/errors.d.ts +121 -0
package/dist/errors.d.ts.map +1 -0
package/dist/errors.js +186 -0
package/dist/errors.js.map +1 -0
package/dist/grammar/index.d.ts +19 -14
package/dist/grammar/index.d.ts.map +1 -0
package/dist/grammar/index.js +4 -3
package/dist/grammar/index.js.map +1 -0
package/dist/index.d.ts +19 -11
package/dist/index.d.ts.map +1 -0
package/dist/index.js +16 -7
package/dist/index.js.map +1 -0
package/dist/parse/index.d.ts +101 -27
package/dist/parse/index.d.ts.map +1 -0
package/dist/parse/index.js +1 -0
package/dist/parse/index.js.map +1 -0
package/dist/performance.bench.d.ts +10 -0
package/dist/performance.bench.d.ts.map +1 -0
package/dist/performance.bench.js +379 -0
package/dist/performance.bench.js.map +1 -0
package/dist/primitive/index.d.ts +27 -35
package/dist/primitive/index.d.ts.map +1 -0
package/dist/primitive/index.js +22 -17
package/dist/primitive/index.js.map +1 -0
package/dist/runtime/eval.d.ts +157 -0
package/dist/runtime/eval.d.ts.map +1 -0
package/dist/runtime/eval.js +206 -0
package/dist/runtime/eval.js.map +1 -0
package/dist/runtime/infer.d.ts +2 -1
package/dist/runtime/infer.d.ts.map +1 -0
package/dist/runtime/infer.js +3 -2
package/dist/runtime/infer.js.map +1 -0
package/dist/runtime/parser.d.ts +92 -11
package/dist/runtime/parser.d.ts.map +1 -0
package/dist/runtime/parser.js +522 -47
package/dist/runtime/parser.js.map +1 -0
package/dist/schema/index.d.ts +230 -27
package/dist/schema/index.d.ts.map +1 -0
package/dist/schema/index.js +54 -28
package/dist/schema/index.js.map +1 -0
package/dist/static/infer.d.ts +4 -3
package/dist/static/infer.d.ts.map +1 -0
package/dist/static/infer.js +1 -0
package/dist/static/infer.js.map +1 -0
package/package.json +35 -4
package/dist/combinators/index.d.ts +0 -57
package/dist/combinators/index.js +0 -104
package/dist/static/parser.d.ts +0 -7
package/dist/static/parser.js +0 -6

package/dist/runtime/parser.js CHANGED Viewed

@@ -7,7 +7,83 @@
  *   2. Fall back to next level (higher precedence)
  *   3. Base case: try atoms (last level)
  */
-import { Token } from "@sinclair/parsebox";
+import { Token } from '@sinclair/parsebox';
+import { defineNode, number, string, ident, constVal, expr, nullLiteral, booleanLiteral, undefinedLiteral, } from '../schema/index.js';
+// =============================================================================
+// Built-in Atoms
+// =============================================================================
+/**
+ * Built-in atom schemas.
+ * These are always appended as the last level of the grammar.
+ * Users don't need to define these - they're provided automatically.
+ */
+/**
+ * Precedence for built-in atoms.
+ * Atoms are precedence 0 (base case), operators have precedence 1, 2, 3, etc.
+ * Note: Atoms are appended separately, so this value isn't used in sorting.
+ */
+const ATOM_PRECEDENCE = 0;
+/** Number literal atom - matches numeric literals */
+const numberLiteral = defineNode({
+    name: 'numberLiteral',
+    pattern: [number()],
+    precedence: ATOM_PRECEDENCE,
+    resultType: 'number',
+});
+/** String literal atom - matches strings with " or ' quotes */
+const stringLiteral = defineNode({
+    name: 'stringLiteral',
+    pattern: [string(['"', "'"])],
+    precedence: ATOM_PRECEDENCE,
+    resultType: 'string',
+});
+/** Identifier atom - matches identifiers */
+const identifierAtom = defineNode({
+    name: 'identifier',
+    pattern: [ident()],
+    precedence: ATOM_PRECEDENCE,
+    resultType: 'unknown',
+});
+/** Parentheses atom - matches ( expr ) for grouping */
+const parentheses = defineNode({
+    name: 'parentheses',
+    pattern: [constVal('('), expr().as('inner'), constVal(')')],
+    precedence: ATOM_PRECEDENCE,
+    resultType: 'unknown',
+});
+/** Null literal atom - matches the keyword null */
+const nullAtom = defineNode({
+    name: 'nullLiteral',
+    pattern: [nullLiteral()],
+    precedence: ATOM_PRECEDENCE,
+    resultType: 'null',
+});
+/** Boolean literal atom - matches true or false */
+const booleanAtom = defineNode({
+    name: 'booleanLiteral',
+    pattern: [booleanLiteral()],
+    precedence: ATOM_PRECEDENCE,
+    resultType: 'boolean',
+});
+/** Undefined literal atom - matches the keyword undefined */
+const undefinedAtom = defineNode({
+    name: 'undefinedLiteral',
+    pattern: [undefinedLiteral()],
+    precedence: ATOM_PRECEDENCE,
+    resultType: 'undefined',
+});
+/** All built-in atoms, used as the last level of the grammar */
+// Note: Keyword literals (null, true, false, undefined) must come BEFORE
+// identifierAtom to ensure they're matched correctly rather than as identifiers
+export const BUILT_IN_ATOMS = [
+    numberLiteral,
+    stringLiteral,
+    nullAtom,
+    booleanAtom,
+    undefinedAtom,
+    identifierAtom,
+    parentheses,
+];
 // =============================================================================
 // Primitive Parsers
 // =============================================================================
@@ -17,24 +93,174 @@ function parseNumber(input) {
         return [];
     return [
         {
-            node: "literal",
+            node: 'literal',
             raw: result[0],
             value: +result[0],
-            outputSchema: "number",
+            outputSchema: 'number',
         },
         result[1],
     ];
 }
+/**
+ * Process escape sequences in a string.
+ * Supports: \n, \t, \r, \\, \", \', \0, \b, \f, \v, \xHH, \uHHHH
+ *
+ * @param str - The raw string with escape sequences
+ * @returns The processed string with escape sequences converted
+ */
+export function processEscapeSequences(str) {
+    let result = '';
+    let i = 0;
+    while (i < str.length) {
+        if (str[i] === '\\') {
+            if (i + 1 >= str.length) {
+                // Trailing backslash - keep as-is
+                result += '\\';
+                i++;
+                continue;
+            }
+            const next = str[i + 1];
+            switch (next) {
+                case 'n':
+                    result += '\n';
+                    i += 2;
+                    break;
+                case 't':
+                    result += '\t';
+                    i += 2;
+                    break;
+                case 'r':
+                    result += '\r';
+                    i += 2;
+                    break;
+                case '\\':
+                    result += '\\';
+                    i += 2;
+                    break;
+                case '"':
+                    result += '"';
+                    i += 2;
+                    break;
+                case "'":
+                    result += "'";
+                    i += 2;
+                    break;
+                case '0':
+                    result += '\0';
+                    i += 2;
+                    break;
+                case 'b':
+                    result += '\b';
+                    i += 2;
+                    break;
+                case 'f':
+                    result += '\f';
+                    i += 2;
+                    break;
+                case 'v':
+                    result += '\v';
+                    i += 2;
+                    break;
+                case 'x': {
+                    // \xHH - two hex digits
+                    if (i + 3 < str.length) {
+                        const hex = str.slice(i + 2, i + 4);
+                        if (/^[0-9a-fA-F]{2}$/.test(hex)) {
+                            result += String.fromCharCode(parseInt(hex, 16));
+                            i += 4;
+                            break;
+                        }
+                    }
+                    // Invalid \x escape - keep as-is
+                    result += '\\x';
+                    i += 2;
+                    break;
+                }
+                case 'u': {
+                    // \uHHHH - four hex digits
+                    if (i + 5 < str.length) {
+                        const hex = str.slice(i + 2, i + 6);
+                        if (/^[0-9a-fA-F]{4}$/.test(hex)) {
+                            result += String.fromCharCode(parseInt(hex, 16));
+                            i += 6;
+                            break;
+                        }
+                    }
+                    // Invalid \u escape - keep as-is
+                    result += '\\u';
+                    i += 2;
+                    break;
+                }
+                default:
+                    // Unknown escape - keep backslash and character
+                    result += '\\' + next;
+                    i += 2;
+                    break;
+            }
+        }
+        else {
+            result += str[i];
+            i++;
+        }
+    }
+    return result;
+}
+/**
+ * Parse a string literal with proper escape sequence handling.
+ * Unlike Token.String, this parser correctly handles escaped quotes within strings.
+ */
+function parseStringLiteral(quotes, input) {
+    // Trim leading whitespace
+    const trimmed = input.replace(/^[\s]*/, '');
+    if (trimmed.length === 0)
+        return [];
+    // Check for opening quote
+    const openQuote = quotes.find((q) => trimmed.startsWith(q));
+    if (!openQuote)
+        return [];
+    // Find closing quote, respecting escape sequences
+    let i = openQuote.length;
+    let rawContent = '';
+    while (i < trimmed.length) {
+        const char = trimmed[i];
+        // Check for escape sequence
+        if (char === '\\') {
+            if (i + 1 < trimmed.length) {
+                // Include both the backslash and the escaped character in raw content
+                rawContent += char + trimmed[i + 1];
+                i += 2;
+                continue;
+            }
+            else {
+                // Trailing backslash - include it
+                rawContent += char;
+                i++;
+                continue;
+            }
+        }
+        // Check for closing quote
+        if (char === openQuote) {
+            return [rawContent, trimmed.slice(i + openQuote.length)];
+        }
+        // Regular character
+        rawContent += char;
+        i++;
+    }
+    // Unterminated string
+    return [];
+}
 function parseString(quotes, input) {
-    const result = Token.String([...quotes], input);
+    const result = parseStringLiteral(quotes, input);
     if (result.length === 0)
         return [];
+    const rawValue = result[0];
+    const processedValue = processEscapeSequences(rawValue);
     return [
         {
-            node: "literal",
-            raw: result[0],
-            value: result[0],
-            outputSchema: "string",
+            node: 'literal',
+            raw: rawValue,
+            value: processedValue,
+            outputSchema: 'string',
         },
         result[1],
     ];
@@ -44,11 +270,9 @@ function parseIdent(input, context) {
     if (result.length === 0)
         return [];
     const name = result[0];
-    const valueType = name in context.data
-        ? context.data[name]
-        : "unknown";
+    const valueType = name in context.data ? context.data[name] : 'unknown';
     return [
-        { node: "identifier", name, outputSchema: valueType },
+        { node: 'identifier', name, outputSchema: valueType },
         result[1],
     ];
 }
@@ -56,43 +280,113 @@ function parseConst(value, input) {
     const result = Token.Const(value, input);
     if (result.length === 0)
         return [];
-    return [{ node: "const", outputSchema: JSON.stringify(value) }, result[1]];
+    return [{ node: 'const', outputSchema: JSON.stringify(value) }, result[1]];
+}
+function parseNull(input) {
+    const result = Token.Const('null', input);
+    if (result.length === 0)
+        return [];
+    // Ensure it's not part of a longer identifier (e.g., "nullable")
+    const remaining = result[1];
+    if (remaining.length > 0 && /^[a-zA-Z0-9_$]/.test(remaining)) {
+        return [];
+    }
+    return [
+        {
+            node: 'literal',
+            raw: 'null',
+            value: null,
+            outputSchema: 'null',
+        },
+        remaining,
+    ];
+}
+function parseBoolean(input) {
+    // Try "true" first
+    let result = Token.Const('true', input);
+    if (result.length === 2) {
+        const remaining = result[1];
+        // Ensure it's not part of a longer identifier (e.g., "trueName")
+        if (remaining.length === 0 || !/^[a-zA-Z0-9_$]/.test(remaining)) {
+            return [
+                {
+                    node: 'literal',
+                    raw: 'true',
+                    value: true,
+                    outputSchema: 'boolean',
+                },
+                remaining,
+            ];
+        }
+    }
+    // Try "false"
+    result = Token.Const('false', input);
+    if (result.length === 2) {
+        const remaining = result[1];
+        // Ensure it's not part of a longer identifier (e.g., "falsePositive")
+        if (remaining.length === 0 || !/^[a-zA-Z0-9_$]/.test(remaining)) {
+            return [
+                {
+                    node: 'literal',
+                    raw: 'false',
+                    value: false,
+                    outputSchema: 'boolean',
+                },
+                remaining,
+            ];
+        }
+    }
+    return [];
+}
+function parseUndefined(input) {
+    const result = Token.Const('undefined', input);
+    if (result.length === 0)
+        return [];
+    // Ensure it's not part of a longer identifier (e.g., "undefinedVar")
+    const remaining = result[1];
+    if (remaining.length > 0 && /^[a-zA-Z0-9_$]/.test(remaining)) {
+        return [];
+    }
+    return [
+        {
+            node: 'literal',
+            raw: 'undefined',
+            value: undefined,
+            outputSchema: 'undefined',
+        },
+        remaining,
+    ];
 }
 // =============================================================================
 // Build Runtime Grammar from Node Schemas
 // =============================================================================
 /**
- * Build runtime grammar from node schemas.
+ * Build runtime grammar from operator schemas.
  *
  * Returns a flat tuple of levels:
- *   [[ops@prec1], [ops@prec2], ..., [atoms]]
+ *   [[ops@prec1], [ops@prec2], ..., [builtInAtoms]]
  *
- * Levels are sorted by precedence ascending (lowest first).
- * Atoms are always the last level.
+ * Operators are sorted by precedence ascending (lowest first).
+ * Built-in atoms are always appended as the last level.
  */
-export function buildGrammar(nodes) {
-    const atoms = [];
-    const operators = new Map();
-    for (const node of nodes) {
-        if (node.precedence === "atom") {
-            atoms.push(node);
-        }
-        else {
-            const prec = node.precedence;
-            if (!operators.has(prec)) {
-                operators.set(prec, []);
-            }
-            operators.get(prec).push(node);
-        }
+export function buildGrammar(operators) {
+    const operatorsByPrec = new Map();
+    const operatorsAndPrimitives = [...operators];
+    for (const op of operatorsAndPrimitives) {
+        const prec = op.precedence;
+        const ops = operatorsByPrec.get(prec) ?? [];
+        operatorsByPrec.set(prec, ops);
+        ops.push(op);
     }
     // Sort precedences ascending
-    const precedences = [...operators.keys()].sort((a, b) => a - b);
-    // Build flat grammar: [[ops@prec1], [ops@prec2], ..., [atoms]]
+    const precedences = [...operatorsByPrec.keys()].sort((a, b) => a - b);
+    // Build flat grammar: [[ops@prec1], [ops@prec2], ..., [builtInAtoms]]
     const grammar = [];
     for (const prec of precedences) {
-        grammar.push(operators.get(prec));
+        grammar.push(operatorsByPrec.get(prec) ?? []);
     }
-    grammar.push(atoms);
+    // Append built-in atoms as the last level
+    grammar.push(BUILT_IN_ATOMS);
     return grammar;
 }
 // =============================================================================
@@ -103,14 +397,20 @@ export function buildGrammar(nodes) {
  */
 function parseElement(element, input, context) {
     switch (element.kind) {
-        case "number":
+        case 'number':
             return parseNumber(input);
-        case "string":
+        case 'string':
             return parseString(element.quotes, input);
-        case "ident":
+        case 'ident':
             return parseIdent(input, context);
-        case "const":
+        case 'const':
             return parseConst(element.value, input);
+        case 'null':
+            return parseNull(input);
+        case 'boolean':
+            return parseBoolean(input);
+        case 'undefined':
+            return parseUndefined(input);
         default:
             return [];
     }
@@ -124,14 +424,14 @@ function parseElement(element, input, context) {
  * - "expr": fullGrammar (full reset for delimited contexts)
  */
 function parseElementWithLevel(element, input, context, currentLevels, nextLevels, fullGrammar) {
-    if (element.kind === "expr") {
+    if (element.kind === 'expr') {
         const exprElement = element;
         const constraint = exprElement.constraint;
         const role = exprElement.role;
-        if (role === "lhs") {
+        if (role === 'lhs') {
             return parseExprWithConstraint(nextLevels, input, context, constraint, fullGrammar);
         }
-        else if (role === "rhs") {
+        else if (role === 'rhs') {
             return parseExprWithConstraint(currentLevels, input, context, constraint, fullGrammar);
         }
         else {
@@ -165,7 +465,7 @@ function extractBindings(pattern, children) {
         const element = pattern[i];
         const child = children[i];
         // Check if element is a NamedSchema (has __named and name properties)
-        if ("__named" in element && element.__named === true) {
+        if ('__named' in element && element.__named === true) {
             bindings[element.name] = child;
         }
     }
@@ -178,7 +478,46 @@ function extractBindings(pattern, children) {
  * - Single child without names: passthrough (atom behavior)
  * - If configure() provided: transform bindings to fields
  * - Otherwise: bindings become node fields directly
+ *
+ * Special case: If resultType is "unknown" and there's a single expr binding,
+ * we propagate that binding's outputSchema (for generic parentheses, etc.).
+ */
+/**
+ * Helper: Check if resultType is a UnionResultType (computed union).
  */
+function isUnionResultType(resultType) {
+    return typeof resultType === 'object' && resultType !== null && 'union' in resultType;
+}
+/**
+ * Helper: Compute the union outputSchema string from multiple bindings.
+ * Given a list of binding names, extracts each binding's outputSchema and
+ * constructs a union string like "boolean | number".
+ *
+ * @example
+ * // bindings = { then: { outputSchema: 'boolean' }, else: { outputSchema: 'number' } }
+ * // names = ['then', 'else']
+ * // result = 'boolean | number'
+ */
+function computeUnionOutputSchema(bindings, names) {
+    const schemas = [];
+    for (const name of names) {
+        const binding = bindings[name];
+        if (binding?.outputSchema && binding.outputSchema !== 'unknown') {
+            // Only add unique schemas
+            if (!schemas.includes(binding.outputSchema)) {
+                schemas.push(binding.outputSchema);
+            }
+        }
+    }
+    if (schemas.length === 0) {
+        return 'unknown';
+    }
+    if (schemas.length === 1) {
+        return schemas[0];
+    }
+    // Sort for consistency and join with ' | '
+    return schemas.sort().join(' | ');
+}
 function buildNodeResult(nodeSchema, children, context) {
     const bindings = extractBindings(nodeSchema.pattern, children);
     // Single unnamed child → passthrough (atom behavior)
@@ -186,13 +525,33 @@ function buildNodeResult(nodeSchema, children, context) {
         return children[0];
     }
     // Apply configure() if provided, otherwise use bindings directly
-    const fields = nodeSchema.configure
-        ? nodeSchema.configure(bindings, context)
-        : bindings;
+    const fields = nodeSchema.configure ? nodeSchema.configure(bindings, context) : bindings;
+    // Determine output schema:
+    // - If resultType is a UnionResultType, compute the union from the specified bindings
+    // - If resultType is "unknown" and there's a single expr binding, use its outputSchema
+    // - Otherwise use the node's static resultType
+    let outputSchema;
+    if (isUnionResultType(nodeSchema.resultType)) {
+        // Computed union: extract schemas from named bindings and join with ' | '
+        outputSchema = computeUnionOutputSchema(bindings, nodeSchema.resultType.union);
+    }
+    else {
+        outputSchema = nodeSchema.resultType;
+        // TODO (see type ComputeOutputSchema<>): Remove hacky logic and use HKT potentially
+        if (outputSchema === 'unknown') {
+            const bindingKeys = Object.keys(bindings);
+            if (bindingKeys.length === 1) {
+                const singleBinding = bindings[bindingKeys[0]];
+                if (singleBinding.outputSchema) {
+                    outputSchema = singleBinding.outputSchema;
+                }
+            }
+        }
+    }
     // Build node with fields
     return {
         node: nodeSchema.name,
-        outputSchema: nodeSchema.resultType,
+        outputSchema,
         ...fields,
     };
 }
@@ -269,3 +628,119 @@ export function parse(nodes, input, context) {
     const grammar = buildGrammar(nodes);
     return parseLevels(grammar, input, context, grammar);
 }
+// =============================================================================
+// Enhanced Parse API with Error Information
+// =============================================================================
+import { noMatchError, emptyInputError, } from '../errors.js';
+/**
+ * Parse input with rich error information.
+ *
+ * Unlike `parse()` which returns an empty array on failure, this function
+ * returns detailed error information including:
+ * - Position (line, column, offset)
+ * - Error message
+ * - Source snippet showing where the error occurred
+ *
+ * @example
+ * ```ts
+ * const result = parseWithErrors([add], "1 + ", context);
+ * if (!result.success) {
+ *   console.log(result.error.message);
+ *   // "No grammar rule matched at position 1:5: """
+ *   console.log(result.error.snippet);
+ *   // "1 + →"
+ * }
+ * ```
+ */
+export function parseWithErrors(nodes, input, context) {
+    // Handle empty/whitespace-only input
+    if (input.trim().length === 0) {
+        return {
+            success: false,
+            error: emptyInputError(input),
+            input,
+        };
+    }
+    const grammar = buildGrammar(nodes);
+    const result = parseLevels(grammar, input, context, grammar);
+    if (result.length === 0) {
+        // Parse failed - determine where it failed
+        // Try to find how far we got before failing
+        const failOffset = findFailureOffset(grammar, input, context);
+        return {
+            success: false,
+            error: noMatchError(input, failOffset),
+            input,
+        };
+    }
+    // Parse succeeded
+    return {
+        success: true,
+        ast: result[0],
+        remaining: result[1],
+        input,
+    };
+}
+/**
+ * Find the offset where parsing failed by tracking the furthest successful parse.
+ * This helps provide more accurate error positions.
+ */
+function findFailureOffset(grammar, input, context) {
+    // Start by trimming leading whitespace since the parser does this
+    const trimmed = input.replace(/^[\s]*/, '');
+    const leadingWs = input.length - trimmed.length;
+    if (trimmed.length === 0) {
+        return 0;
+    }
+    // Try to parse and track how far we get
+    // This is a simplified heuristic - in a more complex implementation,
+    // we would thread position tracking through all parse functions
+    let furthestOffset = leadingWs;
+    // Try to parse the first atom/expression
+    const result = parseLevels(grammar, trimmed, context, grammar);
+    if (result.length === 2) {
+        // We parsed something - the failure is after what we parsed
+        const parsedLength = trimmed.length - result[1].length;
+        furthestOffset = leadingWs + parsedLength;
+        // Check if there's unparsed content
+        const remaining = result[1].trim();
+        if (remaining.length > 0) {
+            // There's remaining unparsed content - that's where the error is
+            furthestOffset = input.length - result[1].trimStart().length;
+        }
+    }
+    return furthestOffset;
+}
+/**
+ * Format a parse error for display.
+ *
+ * @example
+ * ```ts
+ * const result = parseWithErrors([add], "1 + ", context);
+ * if (!result.success) {
+ *   console.log(formatParseError(result.error));
+ *   // Error at line 1, column 5:
+ *   //   No grammar rule matched at position 1:5: ""
+ *   //
+ *   //   1 + →
+ * }
+ * ```
+ */
+export function formatParseError(error) {
+    const { position, message, snippet } = error;
+    const lines = [];
+    lines.push(`Error at line ${position.line}, column ${position.column}:`);
+    lines.push(`  ${message}`);
+    lines.push('');
+    lines.push(`  ${snippet}`);
+    if (error.context) {
+        const ctx = error.context;
+        if (ctx.expected && ctx.actual) {
+            lines.push('');
+            lines.push(`  Expected: ${ctx.expected}`);
+            lines.push(`  Actual:   ${ctx.actual}`);
+        }
+    }
+    return lines.join('\n');
+}
+//# sourceMappingURL=parser.js.map