tarsec 0.1.8 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -45,6 +45,7 @@ parser("hello there"); // failure
45
45
  - Tools to debug your parser's [performance](/tutorials/performance.md)
46
46
  - Partial [backtracking](/tutorials/backtracking.md) support
47
47
  - A way to make your parser more [secure](/tutorials/security.md).
48
+ - [Pretty error messages](/tutorials/pretty-errors.md)
48
49
 
49
50
  ## Examples
50
51
  - [A markdown parser](/tests/examples/markdown.ts)
@@ -438,3 +438,55 @@ export declare function and<const T extends readonly GeneralParser<any, any>[]>(
438
438
  * @returns
439
439
  */
440
440
  export declare function parseError<const T extends readonly GeneralParser<any, any>[]>(_message: string, ...parsers: T): Parser<MergedCaptures<T>>;
441
+ /**
442
+ * Defers evaluation of a parser, allowing recursive parser definitions.
443
+ * Without `lazy`, you can't reference a parser before it's defined:
444
+ *
445
+ * ```ts
446
+ * // ReferenceError: expr is not defined
447
+ * const expr = or(number, seqR(char("("), expr, char(")")));
448
+ *
449
+ * // Works: the reference to expr is deferred
450
+ * const expr = or(number, seqR(char("("), lazy(() => expr), char(")")));
451
+ * ```
452
+ *
453
+ * @param thunk - a function that returns the parser
454
+ * @returns - a parser that evaluates the thunk on each parse attempt
455
+ */
456
+ export declare function lazy<T>(thunk: () => Parser<T>): Parser<T>;
457
+ export type Associativity = "left" | "right";
458
+ export type OperatorInfo<T> = {
459
+ /** A parser that matches the operator (e.g. `char("+")` or `str("**")`) */
460
+ op: Parser<any>;
461
+ /** Whether the operator is left- or right-associative */
462
+ assoc: Associativity;
463
+ /** A function that combines the left and right operands.
464
+ * For example: `(a, b) => a + b` */
465
+ apply: (left: T, right: T) => T;
466
+ };
467
+ /**
468
+ * Builds an expression parser that handles operator precedence and associativity.
469
+ * Operators are given as an array of precedence levels, from **highest** to **lowest**.
470
+ * Each level is an array of operators at that precedence.
471
+ *
472
+ * ```ts
473
+ * const expr = buildExpressionParser(
474
+ * numberParser,
475
+ * [
476
+ * [{ op: char("*"), assoc: "left", apply: (a, b) => a * b },
477
+ * { op: char("/"), assoc: "left", apply: (a, b) => a / b }],
478
+ * [{ op: char("+"), assoc: "left", apply: (a, b) => a + b },
479
+ * { op: char("-"), assoc: "left", apply: (a, b) => a - b }],
480
+ * ],
481
+ * // optional: override how parenthesized sub-expressions are parsed
482
+ * // defaults to: seqR(char("("), lazy(() => expr), char(")")) mapped to the middle result
483
+ * );
484
+ * ```
485
+ *
486
+ * @param atom - parser for the smallest unit (numbers, identifiers, parenthesized sub-expressions)
487
+ * @param operatorTable - array of precedence levels, highest first. Each level is an array of OperatorInfo.
488
+ * @param parenParser - optional parser for parenthesized sub-expressions. If not provided,
489
+ * one is built automatically using `(` and `)` with `lazy` to handle recursion.
490
+ * @returns - a parser that handles the full expression grammar
491
+ */
492
+ export declare function buildExpressionParser<T>(atom: Parser<T>, operatorTable: OperatorInfo<T>[][], parenParser?: Parser<T>): Parser<T>;
@@ -1,6 +1,6 @@
1
1
  import { within } from "./parsers/within.js";
2
2
  import { TarsecError } from "./tarsecError.js";
3
- import { getInputStr, trace } from "./trace.js";
3
+ import { getDiagnostics, trace } from "./trace.js";
4
4
  import { captureSuccess, createTree, failure, isCaptureResult, isSuccess, success, } from "./types.js";
5
5
  import { escape, findAncestorWithNextParser, popMany } from "./utils.js";
6
6
  /**
@@ -871,47 +871,158 @@ export function parseError(_message, ...parsers) {
871
871
  return result;
872
872
  }
873
873
  else {
874
- const inputStr = getInputStr();
875
- const messages = [];
876
- const prefix = "Near: ";
877
- if (inputStr.length > 0) {
878
- const index = inputStr.length - input.length;
879
- const start = Math.max(0, index - 20);
880
- const end = Math.min(inputStr.length, index + 20);
881
- const previewStr = inputStr.substring(start, end).split("\n")[0];
882
- messages.push(`${prefix}${previewStr}`);
883
- messages.push(`${" ".repeat(index + prefix.length)}^`);
884
- messages.push(_message);
885
- const message = messages.join("\n");
886
- const lines = inputStr.split("\n");
887
- let acc = 0;
888
- let i = 0;
889
- while (index >= acc) {
890
- acc += lines[i].length;
891
- i++;
892
- }
893
- const linesIndex = Math.max(0, i - 1);
894
- const column = lines[linesIndex].length - (acc - index);
895
- throw new TarsecError({
896
- line: i - 1,
897
- column,
898
- length: 1,
899
- prettyMessage: message,
900
- message: _message,
901
- });
902
- }
903
- else {
904
- messages.push(`${prefix}${input.substring(1, 100)}`);
905
- messages.push(_message);
906
- const message = messages.join("\n");
907
- throw new TarsecError({
908
- line: 0,
909
- column: 0,
910
- length: 0,
911
- prettyMessage: message,
912
- message: _message,
913
- });
914
- }
874
+ const error = getDiagnostics(result, input, _message);
875
+ throw new TarsecError(error);
876
+ }
877
+ };
878
+ }
879
+ /**
880
+ * Defers evaluation of a parser, allowing recursive parser definitions.
881
+ * Without `lazy`, you can't reference a parser before it's defined:
882
+ *
883
+ * ```ts
884
+ * // ReferenceError: expr is not defined
885
+ * const expr = or(number, seqR(char("("), expr, char(")")));
886
+ *
887
+ * // Works: the reference to expr is deferred
888
+ * const expr = or(number, seqR(char("("), lazy(() => expr), char(")")));
889
+ * ```
890
+ *
891
+ * @param thunk - a function that returns the parser
892
+ * @returns - a parser that evaluates the thunk on each parse attempt
893
+ */
894
+ export function lazy(thunk) {
895
+ return trace("lazy", (input) => thunk()(input));
896
+ }
897
+ /**
898
+ * Builds an expression parser that handles operator precedence and associativity.
899
+ * Operators are given as an array of precedence levels, from **highest** to **lowest**.
900
+ * Each level is an array of operators at that precedence.
901
+ *
902
+ * ```ts
903
+ * const expr = buildExpressionParser(
904
+ * numberParser,
905
+ * [
906
+ * [{ op: char("*"), assoc: "left", apply: (a, b) => a * b },
907
+ * { op: char("/"), assoc: "left", apply: (a, b) => a / b }],
908
+ * [{ op: char("+"), assoc: "left", apply: (a, b) => a + b },
909
+ * { op: char("-"), assoc: "left", apply: (a, b) => a - b }],
910
+ * ],
911
+ * // optional: override how parenthesized sub-expressions are parsed
912
+ * // defaults to: seqR(char("("), lazy(() => expr), char(")")) mapped to the middle result
913
+ * );
914
+ * ```
915
+ *
916
+ * @param atom - parser for the smallest unit (numbers, identifiers, parenthesized sub-expressions)
917
+ * @param operatorTable - array of precedence levels, highest first. Each level is an array of OperatorInfo.
918
+ * @param parenParser - optional parser for parenthesized sub-expressions. If not provided,
919
+ * one is built automatically using `(` and `)` with `lazy` to handle recursion.
920
+ * @returns - a parser that handles the full expression grammar
921
+ */
922
+ export function buildExpressionParser(atom, operatorTable, parenParser) {
923
+ // Build the parser from highest precedence to lowest.
924
+ // Each level wraps the previous one, so higher-precedence operators bind tighter.
925
+ // The "base" is the atom or a parenthesized expression.
926
+ // We use lazy for the paren parser since it references the final `expr` which doesn't exist yet.
927
+ let expr;
928
+ const base = (input) => {
929
+ const paren = parenParser !== null && parenParser !== void 0 ? parenParser : buildDefaultParenParser();
930
+ const result = paren(input);
931
+ if (result.success)
932
+ return result;
933
+ return atom(input);
934
+ };
935
+ function buildDefaultParenParser() {
936
+ return (input) => {
937
+ const openResult = input[0] === "(" ? success("(", input.slice(1)) : failure("expected (", input);
938
+ if (!openResult.success)
939
+ return openResult;
940
+ const exprResult = expr(openResult.rest);
941
+ if (!exprResult.success)
942
+ return failure(exprResult.message, input);
943
+ const closeResult = exprResult.rest[0] === ")" ? success(")", exprResult.rest.slice(1)) : failure("expected )", input);
944
+ if (!closeResult.success)
945
+ return closeResult;
946
+ return success(exprResult.result, closeResult.rest);
947
+ };
948
+ }
949
+ // Start with the base (atoms and parens), then wrap with each precedence level
950
+ let currentLevel = base;
951
+ for (let i = 0; i < operatorTable.length; i++) {
952
+ const ops = operatorTable[i];
953
+ currentLevel = buildLevel(currentLevel, ops);
954
+ }
955
+ expr = currentLevel;
956
+ return expr;
957
+ }
958
+ function buildLevel(nextLevel, ops) {
959
+ return (input) => {
960
+ const leftResult = nextLevel(input);
961
+ if (!leftResult.success)
962
+ return leftResult;
963
+ // Try to parse a chain of operators at this level
964
+ let left = leftResult.result;
965
+ let rest = leftResult.rest;
966
+ // For right-associative, we collect all operands and operators, then fold right
967
+ const rightOps = ops.filter((o) => o.assoc === "right");
968
+ const leftOps = ops.filter((o) => o.assoc === "left");
969
+ if (rightOps.length > 0 && leftOps.length > 0) {
970
+ // Mixed associativity at same level: handle left-assoc first in the loop,
971
+ // then right-assoc. This is an unusual case but we handle it.
972
+ return parseChain(left, rest, nextLevel, ops);
973
+ }
974
+ else if (rightOps.length > 0) {
975
+ return parseRight(left, rest, nextLevel, rightOps);
976
+ }
977
+ else {
978
+ return parseLeft(left, rest, nextLevel, leftOps);
915
979
  }
916
980
  };
917
981
  }
982
+ function parseLeft(left, rest, nextLevel, ops) {
983
+ while (true) {
984
+ const opMatch = tryOps(ops, rest);
985
+ if (!opMatch)
986
+ break;
987
+ const rightResult = nextLevel(opMatch.rest);
988
+ if (!rightResult.success)
989
+ break;
990
+ left = opMatch.apply(left, rightResult.result);
991
+ rest = rightResult.rest;
992
+ }
993
+ return success(left, rest);
994
+ }
995
+ function parseRight(left, rest, nextLevel, ops) {
996
+ const opMatch = tryOps(ops, rest);
997
+ if (!opMatch)
998
+ return success(left, rest);
999
+ const rightResult = nextLevel(opMatch.rest);
1000
+ if (!rightResult.success)
1001
+ return success(left, rest);
1002
+ // Recursively parse the right side to get right-associativity
1003
+ const rightFolded = parseRight(rightResult.result, rightResult.rest, nextLevel, ops);
1004
+ return success(opMatch.apply(left, rightFolded.result), rightFolded.rest);
1005
+ }
1006
+ function parseChain(left, rest, nextLevel, ops) {
1007
+ // Fallback: treat everything as left-associative
1008
+ while (true) {
1009
+ const opMatch = tryOps(ops, rest);
1010
+ if (!opMatch)
1011
+ break;
1012
+ const rightResult = nextLevel(opMatch.rest);
1013
+ if (!rightResult.success)
1014
+ break;
1015
+ left = opMatch.apply(left, rightResult.result);
1016
+ rest = rightResult.rest;
1017
+ }
1018
+ return success(left, rest);
1019
+ }
1020
+ function tryOps(ops, input) {
1021
+ for (const op of ops) {
1022
+ const result = op.op(input);
1023
+ if (result.success) {
1024
+ return { rest: result.rest, apply: op.apply };
1025
+ }
1026
+ }
1027
+ return null;
1028
+ }
package/dist/index.d.ts CHANGED
@@ -3,3 +3,4 @@ export * from "./combinators.js";
3
3
  export * from "./trace.js";
4
4
  export * from "./types.js";
5
5
  export * from "./tarsecError.js";
6
+ export * from "./position.js";
package/dist/index.js CHANGED
@@ -3,3 +3,4 @@ export * from "./combinators.js";
3
3
  export * from "./trace.js";
4
4
  export * from "./types.js";
5
5
  export * from "./tarsecError.js";
6
+ export * from "./position.js";
@@ -0,0 +1,45 @@
1
+ import { Parser } from "./types.js";
2
+ export type Position = {
3
+ offset: number;
4
+ line: number;
5
+ column: number;
6
+ };
7
+ export type Span = {
8
+ start: Position;
9
+ end: Position;
10
+ };
11
+ /**
12
+ * Build a lookup table of line-start offsets for a given source string.
13
+ * This allows O(log n) offset-to-position conversion via binary search.
14
+ */
15
+ export declare function buildLineTable(source: string): number[];
16
+ /**
17
+ * Convert an absolute offset into a line and column using a precomputed line table.
18
+ * Both line and column are 0-based.
19
+ */
20
+ export declare function offsetToPosition(lineTable: number[], offset: number): Position;
21
+ /**
22
+ * A zero-width parser that returns the current offset into the input string.
23
+ * Requires `setInputStr` to have been called with the full input.
24
+ */
25
+ export declare const getOffset: Parser<number>;
26
+ /**
27
+ * A zero-width parser that returns the current position (offset, line, column).
28
+ * Requires `setInputStr` to have been called with the full input.
29
+ */
30
+ export declare const getPosition: Parser<Position>;
31
+ /**
32
+ * Wraps a parser so that its result includes span information (start and end positions).
33
+ * Useful for building ASTs with location data for language servers / editors.
34
+ * Requires `setInputStr` to have been called with the full input.
35
+ *
36
+ * @example
37
+ * ```ts
38
+ * const locatedWord = withSpan(word);
39
+ * // Result: { value: "hello", span: { start: { offset: 0, line: 0, column: 0 }, end: { offset: 5, line: 0, column: 5 } } }
40
+ * ```
41
+ */
42
+ export declare function withSpan<T>(parser: Parser<T>): Parser<{
43
+ value: T;
44
+ span: Span;
45
+ }>;
@@ -0,0 +1,85 @@
1
+ import { getInputStr } from "./trace.js";
2
+ import { success } from "./types.js";
3
+ /**
4
+ * Build a lookup table of line-start offsets for a given source string.
5
+ * This allows O(log n) offset-to-position conversion via binary search.
6
+ */
7
+ export function buildLineTable(source) {
8
+ const lineStarts = [0];
9
+ for (let i = 0; i < source.length; i++) {
10
+ if (source[i] === "\n") {
11
+ lineStarts.push(i + 1);
12
+ }
13
+ }
14
+ return lineStarts;
15
+ }
16
+ /**
17
+ * Convert an absolute offset into a line and column using a precomputed line table.
18
+ * Both line and column are 0-based.
19
+ */
20
+ export function offsetToPosition(lineTable, offset) {
21
+ // binary search for the line
22
+ let lo = 0;
23
+ let hi = lineTable.length - 1;
24
+ while (lo < hi) {
25
+ const mid = (lo + hi + 1) >> 1;
26
+ if (lineTable[mid] <= offset) {
27
+ lo = mid;
28
+ }
29
+ else {
30
+ hi = mid - 1;
31
+ }
32
+ }
33
+ return {
34
+ offset,
35
+ line: lo,
36
+ column: offset - lineTable[lo],
37
+ };
38
+ }
39
+ /**
40
+ * A zero-width parser that returns the current offset into the input string.
41
+ * Requires `setInputStr` to have been called with the full input.
42
+ */
43
+ export const getOffset = (input) => {
44
+ const source = getInputStr();
45
+ return success(source.length - input.length, input);
46
+ };
47
+ /**
48
+ * A zero-width parser that returns the current position (offset, line, column).
49
+ * Requires `setInputStr` to have been called with the full input.
50
+ */
51
+ export const getPosition = (input) => {
52
+ const source = getInputStr();
53
+ const offset = source.length - input.length;
54
+ const lineTable = buildLineTable(source);
55
+ return success(offsetToPosition(lineTable, offset), input);
56
+ };
57
+ /**
58
+ * Wraps a parser so that its result includes span information (start and end positions).
59
+ * Useful for building ASTs with location data for language servers / editors.
60
+ * Requires `setInputStr` to have been called with the full input.
61
+ *
62
+ * @example
63
+ * ```ts
64
+ * const locatedWord = withSpan(word);
65
+ * // Result: { value: "hello", span: { start: { offset: 0, line: 0, column: 0 }, end: { offset: 5, line: 0, column: 5 } } }
66
+ * ```
67
+ */
68
+ export function withSpan(parser) {
69
+ return (input) => {
70
+ const source = getInputStr();
71
+ const lineTable = buildLineTable(source);
72
+ const startOffset = source.length - input.length;
73
+ const result = parser(input);
74
+ if (!result.success)
75
+ return result;
76
+ const endOffset = source.length - result.rest.length;
77
+ return success({
78
+ value: result.result,
79
+ span: {
80
+ start: offsetToPosition(lineTable, startOffset),
81
+ end: offsetToPosition(lineTable, endOffset),
82
+ },
83
+ }, result.rest);
84
+ };
85
+ }
@@ -1,4 +1,4 @@
1
- type TarsecErrorData = {
1
+ export type TarsecErrorData = {
2
2
  line: number;
3
3
  column: number;
4
4
  length: number;
@@ -9,4 +9,3 @@ export declare class TarsecError extends Error {
9
9
  data: TarsecErrorData;
10
10
  constructor(error: TarsecErrorData);
11
11
  }
12
- export {};
package/dist/trace.d.ts CHANGED
@@ -1,4 +1,5 @@
1
- import { ParserResult, Parser, PlainObject, CaptureParser } from "./types.js";
1
+ import { ParserResult, Parser, PlainObject, CaptureParser, ParserFailure } from "./types.js";
2
+ import { TarsecErrorData } from "./tarsecError.js";
2
3
  export declare function setTraceHost(host: string): void;
3
4
  export declare function getTraceHost(): string;
4
5
  export declare function setTraceId(id: string): void;
@@ -122,3 +123,4 @@ export declare function limitSteps(limit: number, callback: Function): void;
122
123
  */
123
124
  export declare function setInputStr(s: string): void;
124
125
  export declare function getInputStr(): string;
126
+ export declare function getDiagnostics(result: ParserFailure, input: string, _message?: string): TarsecErrorData;
package/dist/trace.js CHANGED
@@ -198,3 +198,45 @@ export function setInputStr(s) {
198
198
  export function getInputStr() {
199
199
  return inputStr;
200
200
  }
201
+ export function getDiagnostics(result, input, _message) {
202
+ const inputStr = getInputStr();
203
+ const messages = [];
204
+ const prefix = "Near: ";
205
+ const message = _message || result.message || "Parsing failed";
206
+ if (inputStr.length > 0) {
207
+ const index = inputStr.length - input.length;
208
+ const start = Math.max(0, index - 20);
209
+ const end = Math.min(inputStr.length, index + 20);
210
+ const previewStr = inputStr.substring(start, end).split("\n")[0];
211
+ messages.push(`${prefix}${previewStr}`);
212
+ messages.push(`${" ".repeat(index + prefix.length)}^`);
213
+ messages.push(message);
214
+ const lines = inputStr.split("\n");
215
+ let acc = 0;
216
+ let i = 0;
217
+ while (index >= acc && i < lines.length) {
218
+ acc += lines[i].length;
219
+ i++;
220
+ }
221
+ const linesIndex = Math.max(0, i - 1);
222
+ const column = lines[linesIndex].length - (acc - index);
223
+ return {
224
+ line: i - 1,
225
+ column,
226
+ length: 1,
227
+ prettyMessage: messages.join("\n"),
228
+ message: message,
229
+ };
230
+ }
231
+ else {
232
+ messages.push(`${prefix}${input.substring(1, 100)}`);
233
+ messages.push(message);
234
+ return {
235
+ line: 0,
236
+ column: 0,
237
+ length: 0,
238
+ prettyMessage: messages.join("\n"),
239
+ message: message,
240
+ };
241
+ }
242
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tarsec",
3
- "version": "0.1.8",
3
+ "version": "0.1.9",
4
4
  "description": "A parser combinator library for TypeScript, inspired by Parsec.",
5
5
  "homepage": "https://github.com/egonSchiele/tarsec",
6
6
  "scripts": {
@@ -38,4 +38,4 @@
38
38
  "typescript": "^5.4.2",
39
39
  "vitest": "^1.4.0"
40
40
  }
41
- }
41
+ }