npm - tarsec - Versions diffs - 0.1.8 → 0.1.9 - Mend

tarsec 0.1.8 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/README.md CHANGED Viewed

@@ -45,6 +45,7 @@ parser("hello there"); // failure
 - Tools to debug your parser's [performance](/tutorials/performance.md)
 - Partial [backtracking](/tutorials/backtracking.md) support
 - A way to make your parser more [secure](/tutorials/security.md).
+- [Pretty error messages](/tutorials/pretty-errors.md)
 ## Examples
 - [A markdown parser](/tests/examples/markdown.ts)

package/dist/combinators.d.ts CHANGED Viewed

@@ -438,3 +438,55 @@ export declare function and<const T extends readonly GeneralParser<any, any>[]>(
  * @returns
  */
 export declare function parseError<const T extends readonly GeneralParser<any, any>[]>(_message: string, ...parsers: T): Parser<MergedCaptures<T>>;
+/**
+ * Defers evaluation of a parser, allowing recursive parser definitions.
+ * Without `lazy`, you can't reference a parser before it's defined:
+ *
+ * ```ts
+ * // ReferenceError: expr is not defined
+ * const expr = or(number, seqR(char("("), expr, char(")")));
+ *
+ * // Works: the reference to expr is deferred
+ * const expr = or(number, seqR(char("("), lazy(() => expr), char(")")));
+ * ```
+ *
+ * @param thunk - a function that returns the parser
+ * @returns - a parser that evaluates the thunk on each parse attempt
+ */
+export declare function lazy<T>(thunk: () => Parser<T>): Parser<T>;
+export type Associativity = "left" | "right";
+export type OperatorInfo<T> = {
+    /** A parser that matches the operator (e.g. `char("+")` or `str("**")`) */
+    op: Parser<any>;
+    /** Whether the operator is left- or right-associative */
+    assoc: Associativity;
+    /** A function that combines the left and right operands.
+     * For example: `(a, b) => a + b` */
+    apply: (left: T, right: T) => T;
+};
+/**
+ * Builds an expression parser that handles operator precedence and associativity.
+ * Operators are given as an array of precedence levels, from **highest** to **lowest**.
+ * Each level is an array of operators at that precedence.
+ *
+ * ```ts
+ * const expr = buildExpressionParser(
+ *   numberParser,
+ *   [
+ *     [{ op: char("*"), assoc: "left", apply: (a, b) => a * b },
+ *      { op: char("/"), assoc: "left", apply: (a, b) => a / b }],
+ *     [{ op: char("+"), assoc: "left", apply: (a, b) => a + b },
+ *      { op: char("-"), assoc: "left", apply: (a, b) => a - b }],
+ *   ],
+ *   // optional: override how parenthesized sub-expressions are parsed
+ *   // defaults to: seqR(char("("), lazy(() => expr), char(")")) mapped to the middle result
+ * );
+ * ```
+ *
+ * @param atom - parser for the smallest unit (numbers, identifiers, parenthesized sub-expressions)
+ * @param operatorTable - array of precedence levels, highest first. Each level is an array of OperatorInfo.
+ * @param parenParser - optional parser for parenthesized sub-expressions. If not provided,
+ *   one is built automatically using `(` and `)` with `lazy` to handle recursion.
+ * @returns - a parser that handles the full expression grammar
+ */
+export declare function buildExpressionParser<T>(atom: Parser<T>, operatorTable: OperatorInfo<T>[][], parenParser?: Parser<T>): Parser<T>;

package/dist/combinators.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import { within } from "./parsers/within.js";
 import { TarsecError } from "./tarsecError.js";
-import { getInputStr, trace } from "./trace.js";
+import { getDiagnostics, trace } from "./trace.js";
 import { captureSuccess, createTree, failure, isCaptureResult, isSuccess, success, } from "./types.js";
 import { escape, findAncestorWithNextParser, popMany } from "./utils.js";
 /**
@@ -871,47 +871,158 @@ export function parseError(_message, ...parsers) {
             return result;
         }
         else {
-            const inputStr = getInputStr();
-            const messages = [];
-            const prefix = "Near: ";
-            if (inputStr.length > 0) {
-                const index = inputStr.length - input.length;
-                const start = Math.max(0, index - 20);
-                const end = Math.min(inputStr.length, index + 20);
-                const previewStr = inputStr.substring(start, end).split("\n")[0];
-                messages.push(`${prefix}${previewStr}`);
-                messages.push(`${" ".repeat(index + prefix.length)}^`);
-                messages.push(_message);
-                const message = messages.join("\n");
-                const lines = inputStr.split("\n");
-                let acc = 0;
-                let i = 0;
-                while (index >= acc) {
-                    acc += lines[i].length;
-                    i++;
-                }
-                const linesIndex = Math.max(0, i - 1);
-                const column = lines[linesIndex].length - (acc - index);
-                throw new TarsecError({
-                    line: i - 1,
-                    column,
-                    length: 1,
-                    prettyMessage: message,
-                    message: _message,
-                });
-            }
-            else {
-                messages.push(`${prefix}${input.substring(1, 100)}`);
-                messages.push(_message);
-                const message = messages.join("\n");
-                throw new TarsecError({
-                    line: 0,
-                    column: 0,
-                    length: 0,
-                    prettyMessage: message,
-                    message: _message,
-                });
-            }
+            const error = getDiagnostics(result, input, _message);
+            throw new TarsecError(error);
+        }
+    };
+}
+/**
+ * Defers evaluation of a parser, allowing recursive parser definitions.
+ * Without `lazy`, you can't reference a parser before it's defined:
+ *
+ * ```ts
+ * // ReferenceError: expr is not defined
+ * const expr = or(number, seqR(char("("), expr, char(")")));
+ *
+ * // Works: the reference to expr is deferred
+ * const expr = or(number, seqR(char("("), lazy(() => expr), char(")")));
+ * ```
+ *
+ * @param thunk - a function that returns the parser
+ * @returns - a parser that evaluates the thunk on each parse attempt
+ */
+export function lazy(thunk) {
+    return trace("lazy", (input) => thunk()(input));
+}
+/**
+ * Builds an expression parser that handles operator precedence and associativity.
+ * Operators are given as an array of precedence levels, from **highest** to **lowest**.
+ * Each level is an array of operators at that precedence.
+ *
+ * ```ts
+ * const expr = buildExpressionParser(
+ *   numberParser,
+ *   [
+ *     [{ op: char("*"), assoc: "left", apply: (a, b) => a * b },
+ *      { op: char("/"), assoc: "left", apply: (a, b) => a / b }],
+ *     [{ op: char("+"), assoc: "left", apply: (a, b) => a + b },
+ *      { op: char("-"), assoc: "left", apply: (a, b) => a - b }],
+ *   ],
+ *   // optional: override how parenthesized sub-expressions are parsed
+ *   // defaults to: seqR(char("("), lazy(() => expr), char(")")) mapped to the middle result
+ * );
+ * ```
+ *
+ * @param atom - parser for the smallest unit (numbers, identifiers, parenthesized sub-expressions)
+ * @param operatorTable - array of precedence levels, highest first. Each level is an array of OperatorInfo.
+ * @param parenParser - optional parser for parenthesized sub-expressions. If not provided,
+ *   one is built automatically using `(` and `)` with `lazy` to handle recursion.
+ * @returns - a parser that handles the full expression grammar
+ */
+export function buildExpressionParser(atom, operatorTable, parenParser) {
+    // Build the parser from highest precedence to lowest.
+    // Each level wraps the previous one, so higher-precedence operators bind tighter.
+    // The "base" is the atom or a parenthesized expression.
+    // We use lazy for the paren parser since it references the final `expr` which doesn't exist yet.
+    let expr;
+    const base = (input) => {
+        const paren = parenParser !== null && parenParser !== void 0 ? parenParser : buildDefaultParenParser();
+        const result = paren(input);
+        if (result.success)
+            return result;
+        return atom(input);
+    };
+    function buildDefaultParenParser() {
+        return (input) => {
+            const openResult = input[0] === "(" ? success("(", input.slice(1)) : failure("expected (", input);
+            if (!openResult.success)
+                return openResult;
+            const exprResult = expr(openResult.rest);
+            if (!exprResult.success)
+                return failure(exprResult.message, input);
+            const closeResult = exprResult.rest[0] === ")" ? success(")", exprResult.rest.slice(1)) : failure("expected )", input);
+            if (!closeResult.success)
+                return closeResult;
+            return success(exprResult.result, closeResult.rest);
+        };
+    }
+    // Start with the base (atoms and parens), then wrap with each precedence level
+    let currentLevel = base;
+    for (let i = 0; i < operatorTable.length; i++) {
+        const ops = operatorTable[i];
+        currentLevel = buildLevel(currentLevel, ops);
+    }
+    expr = currentLevel;
+    return expr;
+}
+function buildLevel(nextLevel, ops) {
+    return (input) => {
+        const leftResult = nextLevel(input);
+        if (!leftResult.success)
+            return leftResult;
+        // Try to parse a chain of operators at this level
+        let left = leftResult.result;
+        let rest = leftResult.rest;
+        // For right-associative, we collect all operands and operators, then fold right
+        const rightOps = ops.filter((o) => o.assoc === "right");
+        const leftOps = ops.filter((o) => o.assoc === "left");
+        if (rightOps.length > 0 && leftOps.length > 0) {
+            // Mixed associativity at same level: handle left-assoc first in the loop,
+            // then right-assoc. This is an unusual case but we handle it.
+            return parseChain(left, rest, nextLevel, ops);
+        }
+        else if (rightOps.length > 0) {
+            return parseRight(left, rest, nextLevel, rightOps);
+        }
+        else {
+            return parseLeft(left, rest, nextLevel, leftOps);
         }
     };
 }
+function parseLeft(left, rest, nextLevel, ops) {
+    while (true) {
+        const opMatch = tryOps(ops, rest);
+        if (!opMatch)
+            break;
+        const rightResult = nextLevel(opMatch.rest);
+        if (!rightResult.success)
+            break;
+        left = opMatch.apply(left, rightResult.result);
+        rest = rightResult.rest;
+    }
+    return success(left, rest);
+}
+function parseRight(left, rest, nextLevel, ops) {
+    const opMatch = tryOps(ops, rest);
+    if (!opMatch)
+        return success(left, rest);
+    const rightResult = nextLevel(opMatch.rest);
+    if (!rightResult.success)
+        return success(left, rest);
+    // Recursively parse the right side to get right-associativity
+    const rightFolded = parseRight(rightResult.result, rightResult.rest, nextLevel, ops);
+    return success(opMatch.apply(left, rightFolded.result), rightFolded.rest);
+}
+function parseChain(left, rest, nextLevel, ops) {
+    // Fallback: treat everything as left-associative
+    while (true) {
+        const opMatch = tryOps(ops, rest);
+        if (!opMatch)
+            break;
+        const rightResult = nextLevel(opMatch.rest);
+        if (!rightResult.success)
+            break;
+        left = opMatch.apply(left, rightResult.result);
+        rest = rightResult.rest;
+    }
+    return success(left, rest);
+}
+function tryOps(ops, input) {
+    for (const op of ops) {
+        const result = op.op(input);
+        if (result.success) {
+            return { rest: result.rest, apply: op.apply };
+        }
+    }
+    return null;
+}

package/dist/index.d.ts CHANGED Viewed

@@ -3,3 +3,4 @@ export * from "./combinators.js";
 export * from "./trace.js";
 export * from "./types.js";
 export * from "./tarsecError.js";
+export * from "./position.js";

package/dist/index.js CHANGED Viewed

@@ -3,3 +3,4 @@ export * from "./combinators.js";
 export * from "./trace.js";
 export * from "./types.js";
 export * from "./tarsecError.js";
+export * from "./position.js";

package/dist/position.d.ts ADDED Viewed

@@ -0,0 +1,45 @@
+import { Parser } from "./types.js";
+export type Position = {
+    offset: number;
+    line: number;
+    column: number;
+};
+export type Span = {
+    start: Position;
+    end: Position;
+};
+/**
+ * Build a lookup table of line-start offsets for a given source string.
+ * This allows O(log n) offset-to-position conversion via binary search.
+ */
+export declare function buildLineTable(source: string): number[];
+/**
+ * Convert an absolute offset into a line and column using a precomputed line table.
+ * Both line and column are 0-based.
+ */
+export declare function offsetToPosition(lineTable: number[], offset: number): Position;
+/**
+ * A zero-width parser that returns the current offset into the input string.
+ * Requires `setInputStr` to have been called with the full input.
+ */
+export declare const getOffset: Parser<number>;
+/**
+ * A zero-width parser that returns the current position (offset, line, column).
+ * Requires `setInputStr` to have been called with the full input.
+ */
+export declare const getPosition: Parser<Position>;
+/**
+ * Wraps a parser so that its result includes span information (start and end positions).
+ * Useful for building ASTs with location data for language servers / editors.
+ * Requires `setInputStr` to have been called with the full input.
+ *
+ * @example
+ * ```ts
+ * const locatedWord = withSpan(word);
+ * // Result: { value: "hello", span: { start: { offset: 0, line: 0, column: 0 }, end: { offset: 5, line: 0, column: 5 } } }
+ * ```
+ */
+export declare function withSpan<T>(parser: Parser<T>): Parser<{
+    value: T;
+    span: Span;
+}>;

package/dist/position.js ADDED Viewed

@@ -0,0 +1,85 @@
+import { getInputStr } from "./trace.js";
+import { success } from "./types.js";
+/**
+ * Build a lookup table of line-start offsets for a given source string.
+ * This allows O(log n) offset-to-position conversion via binary search.
+ */
+export function buildLineTable(source) {
+    const lineStarts = [0];
+    for (let i = 0; i < source.length; i++) {
+        if (source[i] === "\n") {
+            lineStarts.push(i + 1);
+        }
+    }
+    return lineStarts;
+}
+/**
+ * Convert an absolute offset into a line and column using a precomputed line table.
+ * Both line and column are 0-based.
+ */
+export function offsetToPosition(lineTable, offset) {
+    // binary search for the line
+    let lo = 0;
+    let hi = lineTable.length - 1;
+    while (lo < hi) {
+        const mid = (lo + hi + 1) >> 1;
+        if (lineTable[mid] <= offset) {
+            lo = mid;
+        }
+        else {
+            hi = mid - 1;
+        }
+    }
+    return {
+        offset,
+        line: lo,
+        column: offset - lineTable[lo],
+    };
+}
+/**
+ * A zero-width parser that returns the current offset into the input string.
+ * Requires `setInputStr` to have been called with the full input.
+ */
+export const getOffset = (input) => {
+    const source = getInputStr();
+    return success(source.length - input.length, input);
+};
+/**
+ * A zero-width parser that returns the current position (offset, line, column).
+ * Requires `setInputStr` to have been called with the full input.
+ */
+export const getPosition = (input) => {
+    const source = getInputStr();
+    const offset = source.length - input.length;
+    const lineTable = buildLineTable(source);
+    return success(offsetToPosition(lineTable, offset), input);
+};
+/**
+ * Wraps a parser so that its result includes span information (start and end positions).
+ * Useful for building ASTs with location data for language servers / editors.
+ * Requires `setInputStr` to have been called with the full input.
+ *
+ * @example
+ * ```ts
+ * const locatedWord = withSpan(word);
+ * // Result: { value: "hello", span: { start: { offset: 0, line: 0, column: 0 }, end: { offset: 5, line: 0, column: 5 } } }
+ * ```
+ */
+export function withSpan(parser) {
+    return (input) => {
+        const source = getInputStr();
+        const lineTable = buildLineTable(source);
+        const startOffset = source.length - input.length;
+        const result = parser(input);
+        if (!result.success)
+            return result;
+        const endOffset = source.length - result.rest.length;
+        return success({
+            value: result.result,
+            span: {
+                start: offsetToPosition(lineTable, startOffset),
+                end: offsetToPosition(lineTable, endOffset),
+            },
+        }, result.rest);
+    };
+}

package/dist/tarsecError.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-type TarsecErrorData = {
+export type TarsecErrorData = {
     line: number;
     column: number;
     length: number;
@@ -9,4 +9,3 @@ export declare class TarsecError extends Error {
     data: TarsecErrorData;
     constructor(error: TarsecErrorData);
 }
-export {};

package/dist/trace.d.ts CHANGED Viewed

@@ -1,4 +1,5 @@
-import { ParserResult, Parser, PlainObject, CaptureParser } from "./types.js";
+import { ParserResult, Parser, PlainObject, CaptureParser, ParserFailure } from "./types.js";
+import { TarsecErrorData } from "./tarsecError.js";
 export declare function setTraceHost(host: string): void;
 export declare function getTraceHost(): string;
 export declare function setTraceId(id: string): void;
@@ -122,3 +123,4 @@ export declare function limitSteps(limit: number, callback: Function): void;
  */
 export declare function setInputStr(s: string): void;
 export declare function getInputStr(): string;
+export declare function getDiagnostics(result: ParserFailure, input: string, _message?: string): TarsecErrorData;

package/dist/trace.js CHANGED Viewed

@@ -198,3 +198,45 @@ export function setInputStr(s) {
 export function getInputStr() {
     return inputStr;
 }
+export function getDiagnostics(result, input, _message) {
+    const inputStr = getInputStr();
+    const messages = [];
+    const prefix = "Near: ";
+    const message = _message || result.message || "Parsing failed";
+    if (inputStr.length > 0) {
+        const index = inputStr.length - input.length;
+        const start = Math.max(0, index - 20);
+        const end = Math.min(inputStr.length, index + 20);
+        const previewStr = inputStr.substring(start, end).split("\n")[0];
+        messages.push(`${prefix}${previewStr}`);
+        messages.push(`${" ".repeat(index + prefix.length)}^`);
+        messages.push(message);
+        const lines = inputStr.split("\n");
+        let acc = 0;
+        let i = 0;
+        while (index >= acc && i < lines.length) {
+            acc += lines[i].length;
+            i++;
+        }
+        const linesIndex = Math.max(0, i - 1);
+        const column = lines[linesIndex].length - (acc - index);
+        return {
+            line: i - 1,
+            column,
+            length: 1,
+            prettyMessage: messages.join("\n"),
+            message: message,
+        };
+    }
+    else {
+        messages.push(`${prefix}${input.substring(1, 100)}`);
+        messages.push(message);
+        return {
+            line: 0,
+            column: 0,
+            length: 0,
+            prettyMessage: messages.join("\n"),
+            message: message,
+        };
+    }
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "tarsec",
-  "version": "0.1.8",
+  "version": "0.1.9",
   "description": "A parser combinator library for TypeScript, inspired by Parsec.",
   "homepage": "https://github.com/egonSchiele/tarsec",
   "scripts": {
@@ -38,4 +38,4 @@
     "typescript": "^5.4.2",
     "vitest": "^1.4.0"
   }
-}
+}