tarsec 0.1.8 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -45,6 +45,7 @@ parser("hello there"); // failure
45
45
  - Tools to debug your parser's [performance](/tutorials/performance.md)
46
46
  - Partial [backtracking](/tutorials/backtracking.md) support
47
47
  - A way to make your parser more [secure](/tutorials/security.md).
48
+ - [Pretty error messages](/tutorials/pretty-errors.md)
48
49
 
49
50
  ## Examples
50
51
  - [A markdown parser](/tests/examples/markdown.ts)
@@ -438,3 +438,55 @@ export declare function and<const T extends readonly GeneralParser<any, any>[]>(
438
438
  * @returns
439
439
  */
440
440
  export declare function parseError<const T extends readonly GeneralParser<any, any>[]>(_message: string, ...parsers: T): Parser<MergedCaptures<T>>;
441
+ /**
442
+ * Defers evaluation of a parser, allowing recursive parser definitions.
443
+ * Without `lazy`, you can't reference a parser before it's defined:
444
+ *
445
+ * ```ts
446
+ * // ReferenceError: expr is not defined
447
+ * const expr = or(number, seqR(char("("), expr, char(")")));
448
+ *
449
+ * // Works: the reference to expr is deferred
450
+ * const expr = or(number, seqR(char("("), lazy(() => expr), char(")")));
451
+ * ```
452
+ *
453
+ * @param thunk - a function that returns the parser
454
+ * @returns - a parser that evaluates the thunk on each parse attempt
455
+ */
456
+ export declare function lazy<T>(thunk: () => Parser<T>): Parser<T>;
457
+ export type Associativity = "left" | "right";
458
+ export type OperatorInfo<T> = {
459
+ /** A parser that matches the operator (e.g. `char("+")` or `str("**")`) */
460
+ op: Parser<any>;
461
+ /** Whether the operator is left- or right-associative */
462
+ assoc: Associativity;
463
+ /** A function that combines the left and right operands.
464
+ * For example: `(a, b) => a + b` */
465
+ apply: (left: T, right: T) => T;
466
+ };
467
+ /**
468
+ * Builds an expression parser that handles operator precedence and associativity.
469
+ * Operators are given as an array of precedence levels, from **highest** to **lowest**.
470
+ * Each level is an array of operators at that precedence.
471
+ *
472
+ * ```ts
473
+ * const expr = buildExpressionParser(
474
+ * numberParser,
475
+ * [
476
+ * [{ op: char("*"), assoc: "left", apply: (a, b) => a * b },
477
+ * { op: char("/"), assoc: "left", apply: (a, b) => a / b }],
478
+ * [{ op: char("+"), assoc: "left", apply: (a, b) => a + b },
479
+ * { op: char("-"), assoc: "left", apply: (a, b) => a - b }],
480
+ * ],
481
+ * // optional: override how parenthesized sub-expressions are parsed
482
+ * // defaults to: seqR(char("("), lazy(() => expr), char(")")) mapped to the middle result
483
+ * );
484
+ * ```
485
+ *
486
+ * @param atom - parser for the smallest unit (numbers, identifiers, parenthesized sub-expressions)
487
+ * @param operatorTable - array of precedence levels, highest first. Each level is an array of OperatorInfo.
488
+ * @param parenParser - optional parser for parenthesized sub-expressions. If not provided,
489
+ * one is built automatically using `(` and `)` with `lazy` to handle recursion.
490
+ * @returns - a parser that handles the full expression grammar
491
+ */
492
+ export declare function buildExpressionParser<T>(atom: Parser<T>, operatorTable: OperatorInfo<T>[][], parenParser?: Parser<T>): Parser<T>;
@@ -1,6 +1,6 @@
1
1
  import { within } from "./parsers/within.js";
2
2
  import { TarsecError } from "./tarsecError.js";
3
- import { getInputStr, trace } from "./trace.js";
3
+ import { getDiagnostics, trace } from "./trace.js";
4
4
  import { captureSuccess, createTree, failure, isCaptureResult, isSuccess, success, } from "./types.js";
5
5
  import { escape, findAncestorWithNextParser, popMany } from "./utils.js";
6
6
  /**
@@ -871,47 +871,158 @@ export function parseError(_message, ...parsers) {
871
871
  return result;
872
872
  }
873
873
  else {
874
- const inputStr = getInputStr();
875
- const messages = [];
876
- const prefix = "Near: ";
877
- if (inputStr.length > 0) {
878
- const index = inputStr.length - input.length;
879
- const start = Math.max(0, index - 20);
880
- const end = Math.min(inputStr.length, index + 20);
881
- const previewStr = inputStr.substring(start, end).split("\n")[0];
882
- messages.push(`${prefix}${previewStr}`);
883
- messages.push(`${" ".repeat(index + prefix.length)}^`);
884
- messages.push(_message);
885
- const message = messages.join("\n");
886
- const lines = inputStr.split("\n");
887
- let acc = 0;
888
- let i = 0;
889
- while (index >= acc) {
890
- acc += lines[i].length;
891
- i++;
892
- }
893
- const linesIndex = Math.max(0, i - 1);
894
- const column = lines[linesIndex].length - (acc - index);
895
- throw new TarsecError({
896
- line: i - 1,
897
- column,
898
- length: 1,
899
- prettyMessage: message,
900
- message: _message,
901
- });
902
- }
903
- else {
904
- messages.push(`${prefix}${input.substring(1, 100)}`);
905
- messages.push(_message);
906
- const message = messages.join("\n");
907
- throw new TarsecError({
908
- line: 0,
909
- column: 0,
910
- length: 0,
911
- prettyMessage: message,
912
- message: _message,
913
- });
914
- }
874
+ const error = getDiagnostics(result, input, _message);
875
+ throw new TarsecError(error);
876
+ }
877
+ };
878
+ }
879
+ /**
880
+ * Defers evaluation of a parser, allowing recursive parser definitions.
881
+ * Without `lazy`, you can't reference a parser before it's defined:
882
+ *
883
+ * ```ts
884
+ * // ReferenceError: expr is not defined
885
+ * const expr = or(number, seqR(char("("), expr, char(")")));
886
+ *
887
+ * // Works: the reference to expr is deferred
888
+ * const expr = or(number, seqR(char("("), lazy(() => expr), char(")")));
889
+ * ```
890
+ *
891
+ * @param thunk - a function that returns the parser
892
+ * @returns - a parser that evaluates the thunk on each parse attempt
893
+ */
894
+ export function lazy(thunk) {
895
+ return trace("lazy", (input) => thunk()(input));
896
+ }
897
+ /**
898
+ * Builds an expression parser that handles operator precedence and associativity.
899
+ * Operators are given as an array of precedence levels, from **highest** to **lowest**.
900
+ * Each level is an array of operators at that precedence.
901
+ *
902
+ * ```ts
903
+ * const expr = buildExpressionParser(
904
+ * numberParser,
905
+ * [
906
+ * [{ op: char("*"), assoc: "left", apply: (a, b) => a * b },
907
+ * { op: char("/"), assoc: "left", apply: (a, b) => a / b }],
908
+ * [{ op: char("+"), assoc: "left", apply: (a, b) => a + b },
909
+ * { op: char("-"), assoc: "left", apply: (a, b) => a - b }],
910
+ * ],
911
+ * // optional: override how parenthesized sub-expressions are parsed
912
+ * // defaults to: seqR(char("("), lazy(() => expr), char(")")) mapped to the middle result
913
+ * );
914
+ * ```
915
+ *
916
+ * @param atom - parser for the smallest unit (numbers, identifiers, parenthesized sub-expressions)
917
+ * @param operatorTable - array of precedence levels, highest first. Each level is an array of OperatorInfo.
918
+ * @param parenParser - optional parser for parenthesized sub-expressions. If not provided,
919
+ * one is built automatically using `(` and `)` with `lazy` to handle recursion.
920
+ * @returns - a parser that handles the full expression grammar
921
+ */
922
+ export function buildExpressionParser(atom, operatorTable, parenParser) {
923
+ // Build the parser from highest precedence to lowest.
924
+ // Each level wraps the previous one, so higher-precedence operators bind tighter.
925
+ // The "base" is the atom or a parenthesized expression.
926
+ // We use lazy for the paren parser since it references the final `expr` which doesn't exist yet.
927
+ let expr;
928
+ const base = (input) => {
929
+ const paren = parenParser !== null && parenParser !== void 0 ? parenParser : buildDefaultParenParser();
930
+ const result = paren(input);
931
+ if (result.success)
932
+ return result;
933
+ return atom(input);
934
+ };
935
+ function buildDefaultParenParser() {
936
+ return (input) => {
937
+ const openResult = input[0] === "(" ? success("(", input.slice(1)) : failure("expected (", input);
938
+ if (!openResult.success)
939
+ return openResult;
940
+ const exprResult = expr(openResult.rest);
941
+ if (!exprResult.success)
942
+ return failure(exprResult.message, input);
943
+ const closeResult = exprResult.rest[0] === ")" ? success(")", exprResult.rest.slice(1)) : failure("expected )", input);
944
+ if (!closeResult.success)
945
+ return closeResult;
946
+ return success(exprResult.result, closeResult.rest);
947
+ };
948
+ }
949
+ // Start with the base (atoms and parens), then wrap with each precedence level
950
+ let currentLevel = base;
951
+ for (let i = 0; i < operatorTable.length; i++) {
952
+ const ops = operatorTable[i];
953
+ currentLevel = buildLevel(currentLevel, ops);
954
+ }
955
+ expr = currentLevel;
956
+ return expr;
957
+ }
958
+ function buildLevel(nextLevel, ops) {
959
+ return (input) => {
960
+ const leftResult = nextLevel(input);
961
+ if (!leftResult.success)
962
+ return leftResult;
963
+ // Try to parse a chain of operators at this level
964
+ let left = leftResult.result;
965
+ let rest = leftResult.rest;
966
+ // For right-associative, we collect all operands and operators, then fold right
967
+ const rightOps = ops.filter((o) => o.assoc === "right");
968
+ const leftOps = ops.filter((o) => o.assoc === "left");
969
+ if (rightOps.length > 0 && leftOps.length > 0) {
970
+ // Mixed associativity at same level: handle left-assoc first in the loop,
971
+ // then right-assoc. This is an unusual case but we handle it.
972
+ return parseChain(left, rest, nextLevel, ops);
973
+ }
974
+ else if (rightOps.length > 0) {
975
+ return parseRight(left, rest, nextLevel, rightOps);
976
+ }
977
+ else {
978
+ return parseLeft(left, rest, nextLevel, leftOps);
915
979
  }
916
980
  };
917
981
  }
982
+ function parseLeft(left, rest, nextLevel, ops) {
983
+ while (true) {
984
+ const opMatch = tryOps(ops, rest);
985
+ if (!opMatch)
986
+ break;
987
+ const rightResult = nextLevel(opMatch.rest);
988
+ if (!rightResult.success)
989
+ break;
990
+ left = opMatch.apply(left, rightResult.result);
991
+ rest = rightResult.rest;
992
+ }
993
+ return success(left, rest);
994
+ }
995
+ function parseRight(left, rest, nextLevel, ops) {
996
+ const opMatch = tryOps(ops, rest);
997
+ if (!opMatch)
998
+ return success(left, rest);
999
+ const rightResult = nextLevel(opMatch.rest);
1000
+ if (!rightResult.success)
1001
+ return success(left, rest);
1002
+ // Recursively parse the right side to get right-associativity
1003
+ const rightFolded = parseRight(rightResult.result, rightResult.rest, nextLevel, ops);
1004
+ return success(opMatch.apply(left, rightFolded.result), rightFolded.rest);
1005
+ }
1006
+ function parseChain(left, rest, nextLevel, ops) {
1007
+ // Fallback: treat everything as left-associative
1008
+ while (true) {
1009
+ const opMatch = tryOps(ops, rest);
1010
+ if (!opMatch)
1011
+ break;
1012
+ const rightResult = nextLevel(opMatch.rest);
1013
+ if (!rightResult.success)
1014
+ break;
1015
+ left = opMatch.apply(left, rightResult.result);
1016
+ rest = rightResult.rest;
1017
+ }
1018
+ return success(left, rest);
1019
+ }
1020
+ function tryOps(ops, input) {
1021
+ for (const op of ops) {
1022
+ const result = op.op(input);
1023
+ if (result.success) {
1024
+ return { rest: result.rest, apply: op.apply };
1025
+ }
1026
+ }
1027
+ return null;
1028
+ }
package/dist/index.d.ts CHANGED
@@ -3,3 +3,5 @@ export * from "./combinators.js";
3
3
  export * from "./trace.js";
4
4
  export * from "./types.js";
5
5
  export * from "./tarsecError.js";
6
+ export * from "./position.js";
7
+ export * from "./rightmostFailure.js";
package/dist/index.js CHANGED
@@ -3,3 +3,5 @@ export * from "./combinators.js";
3
3
  export * from "./trace.js";
4
4
  export * from "./types.js";
5
5
  export * from "./tarsecError.js";
6
+ export * from "./position.js";
7
+ export * from "./rightmostFailure.js";
package/dist/parsers.d.ts CHANGED
@@ -44,6 +44,16 @@ export declare function noneOf(chars: string): Parser<string>;
44
44
  * @returns - ParserResult
45
45
  */
46
46
  export declare const anyChar: Parser<string>;
47
+ /**
48
+ * Wraps a parser with a human-readable label for error reporting.
49
+ * On failure, suppresses any inner failure recordings and records only the label.
50
+ * This produces clean error messages like `expected a digit` instead of `expected one of "0123456789"`.
51
+ *
52
+ * @param name - human-readable description of what the parser expects
53
+ * @param parser - the parser to wrap
54
+ * @returns - a parser that records the label on failure
55
+ */
56
+ export declare function label<T>(name: string, parser: Parser<T>): Parser<T>;
47
57
  /** A parser that matches one of " \t\n\r". */
48
58
  export declare const space: Parser<string>;
49
59
  /** A parser that matches one or more spaces. */
package/dist/parsers.js CHANGED
@@ -1,5 +1,6 @@
1
1
  import { many1WithJoin, manyWithJoin, seq } from "./combinators.js";
2
2
  import { trace } from "./trace.js";
3
+ import { recordFailure, saveRightmostFailure, restoreRightmostFailure } from "./rightmostFailure.js";
3
4
  import { captureSuccess, failure, success, } from "./types.js";
4
5
  import { escape } from "./utils.js";
5
6
  export { within } from "./parsers/within.js";
@@ -12,15 +13,13 @@ export { within } from "./parsers/within.js";
12
13
  export function char(c) {
13
14
  return trace(`char(${escape(c)})`, (input) => {
14
15
  if (input.length === 0) {
15
- return {
16
- success: false,
17
- rest: input,
18
- message: "unexpected end of input",
19
- };
16
+ recordFailure(input, `"${c}"`);
17
+ return failure("unexpected end of input", input);
20
18
  }
21
19
  if (input[0] === c) {
22
20
  return success(c, input.slice(1));
23
21
  }
22
+ recordFailure(input, `"${c}"`);
24
23
  return failure(`expected ${escape(c)}, got ${escape(input[0])}`, input);
25
24
  });
26
25
  }
@@ -35,6 +34,7 @@ export function str(s) {
35
34
  if (input.substring(0, s.length) === s) {
36
35
  return success(s, input.slice(s.length));
37
36
  }
37
+ recordFailure(input, `"${s}"`);
38
38
  return failure(`expected ${s}, got ${input.substring(0, s.length)}`, input);
39
39
  });
40
40
  }
@@ -48,6 +48,7 @@ export function istr(s) {
48
48
  if (input.substring(0, s.length).toLocaleLowerCase() === s.toLocaleLowerCase()) {
49
49
  return success(input.substring(0, s.length), input.slice(s.length));
50
50
  }
51
+ recordFailure(input, `"${s}"`);
51
52
  return failure(`expected ${s}, got ${input.substring(0, s.length)}`, input);
52
53
  });
53
54
  }
@@ -61,12 +62,14 @@ export function istr(s) {
61
62
  export function oneOf(chars) {
62
63
  return trace(`oneOf(${escape(chars)})`, (input) => {
63
64
  if (input.length === 0) {
65
+ recordFailure(input, `one of "${chars}"`);
64
66
  return failure("unexpected end of input", input);
65
67
  }
66
68
  const c = input[0];
67
69
  if (chars.includes(c)) {
68
70
  return char(c)(input);
69
71
  }
72
+ recordFailure(input, `one of "${chars}"`);
70
73
  return failure(`expected one of ${escape(chars)}, got ${c}`, input);
71
74
  });
72
75
  }
@@ -80,9 +83,11 @@ export function oneOf(chars) {
80
83
  export function noneOf(chars) {
81
84
  return trace(`noneOf(${escape(chars)})`, (input) => {
82
85
  if (input.length === 0) {
86
+ recordFailure(input, `none of "${chars}"`);
83
87
  return failure("unexpected end of input", input);
84
88
  }
85
89
  if (chars.includes(input[0])) {
90
+ recordFailure(input, `none of "${chars}"`);
86
91
  return failure(`expected none of ${escape(chars)}, got ${input[0]}`, input);
87
92
  }
88
93
  return char(input[0])(input);
@@ -97,26 +102,46 @@ export function noneOf(chars) {
97
102
  */
98
103
  export const anyChar = trace("anyChar", (input) => {
99
104
  if (input.length === 0) {
105
+ recordFailure(input, "any character");
100
106
  return failure("unexpected end of input", input);
101
107
  }
102
108
  return success(input[0], input.slice(1));
103
109
  });
110
+ /**
111
+ * Wraps a parser with a human-readable label for error reporting.
112
+ * On failure, suppresses any inner failure recordings and records only the label.
113
+ * This produces clean error messages like `expected a digit` instead of `expected one of "0123456789"`.
114
+ *
115
+ * @param name - human-readable description of what the parser expects
116
+ * @param parser - the parser to wrap
117
+ * @returns - a parser that records the label on failure
118
+ */
119
+ export function label(name, parser) {
120
+ return (input) => {
121
+ const saved = saveRightmostFailure();
122
+ const result = parser(input);
123
+ restoreRightmostFailure(saved);
124
+ if (!result.success)
125
+ recordFailure(input, name);
126
+ return result;
127
+ };
128
+ }
104
129
  /** A parser that matches one of " \t\n\r". */
105
- export const space = oneOf(" \t\n\r");
130
+ export const space = label("whitespace", oneOf(" \t\n\r"));
106
131
  /** A parser that matches one or more spaces. */
107
132
  export const spaces = many1WithJoin(space);
108
133
  /** A parser that matches one digit. */
109
- export const digit = oneOf("0123456789");
134
+ export const digit = label("a digit", oneOf("0123456789"));
110
135
  /** A parser that matches one letter, case insensitive. */
111
- export const letter = oneOf("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
136
+ export const letter = label("a letter", oneOf("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"));
112
137
  /** A parser that matches one digit or letter, case insensitive. */
113
- export const alphanum = oneOf("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789");
138
+ export const alphanum = label("a letter or digit", oneOf("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"));
114
139
  /** A parser that matches one word, case insensitive. */
115
- export const word = regexParser("^[a-z]+", "ui");
140
+ export const word = label("a word", regexParser("^[a-z]+", "ui"));
116
141
  /** A parser that matches one or more digits. */
117
- export const num = regexParser("^[0-9]+");
142
+ export const num = label("a number", regexParser("^[0-9]+"));
118
143
  /** A parser that matches one single or double quote. */
119
- export const quote = oneOf(`'"`);
144
+ export const quote = label("a quote", oneOf(`'"`));
120
145
  /** A parser that matches one tab character. */
121
146
  export const tab = char("\t");
122
147
  /** A parser that matches one newline ("\n" only) character. */
@@ -126,6 +151,7 @@ export const eof = (input) => {
126
151
  if (input === "") {
127
152
  return success(null, input);
128
153
  }
154
+ recordFailure(input, "end of input");
129
155
  return failure("expected end of input", input);
130
156
  };
131
157
  /** A parser that matches a quoted string, in single or double quotes.
@@ -154,6 +180,7 @@ export function regexParser(str, options = "") {
154
180
  if (match) {
155
181
  return success(match[0], input.slice(match[0].length));
156
182
  }
183
+ recordFailure(input, `${str}`);
157
184
  return failure(`expected ${str}, got ${input.slice(0, 10)}`, input);
158
185
  });
159
186
  }
@@ -187,6 +214,7 @@ export function captureRegex(str, options = "", ...captureNames) {
187
214
  const captures = Object.assign({}, Object.fromEntries(match.slice(1).map((value, index) => [captureNames[index], value])));
188
215
  return success(captures, input.slice(match[0].length));
189
216
  }
217
+ recordFailure(input, `${str}`);
190
218
  return failure(`expected ${str}, got ${input.slice(0, 10)}`, input);
191
219
  };
192
220
  return trace(`captureRegex(${str})`, _parser);
@@ -0,0 +1,45 @@
1
+ import { Parser } from "./types.js";
2
+ export type Position = {
3
+ offset: number;
4
+ line: number;
5
+ column: number;
6
+ };
7
+ export type Span = {
8
+ start: Position;
9
+ end: Position;
10
+ };
11
+ /**
12
+ * Build a lookup table of line-start offsets for a given source string.
13
+ * This allows O(log n) offset-to-position conversion via binary search.
14
+ */
15
+ export declare function buildLineTable(source: string): number[];
16
+ /**
17
+ * Convert an absolute offset into a line and column using a precomputed line table.
18
+ * Both line and column are 0-based.
19
+ */
20
+ export declare function offsetToPosition(lineTable: number[], offset: number): Position;
21
+ /**
22
+ * A zero-width parser that returns the current offset into the input string.
23
+ * Requires `setInputStr` to have been called with the full input.
24
+ */
25
+ export declare const getOffset: Parser<number>;
26
+ /**
27
+ * A zero-width parser that returns the current position (offset, line, column).
28
+ * Requires `setInputStr` to have been called with the full input.
29
+ */
30
+ export declare const getPosition: Parser<Position>;
31
+ /**
32
+ * Wraps a parser so that its result includes span information (start and end positions).
33
+ * Useful for building ASTs with location data for language servers / editors.
34
+ * Requires `setInputStr` to have been called with the full input.
35
+ *
36
+ * @example
37
+ * ```ts
38
+ * const locatedWord = withSpan(word);
39
+ * // Result: { value: "hello", span: { start: { offset: 0, line: 0, column: 0 }, end: { offset: 5, line: 0, column: 5 } } }
40
+ * ```
41
+ */
42
+ export declare function withSpan<T>(parser: Parser<T>): Parser<{
43
+ value: T;
44
+ span: Span;
45
+ }>;
@@ -0,0 +1,85 @@
1
+ import { getInputStr } from "./trace.js";
2
+ import { success } from "./types.js";
3
+ /**
4
+ * Build a lookup table of line-start offsets for a given source string.
5
+ * This allows O(log n) offset-to-position conversion via binary search.
6
+ */
7
+ export function buildLineTable(source) {
8
+ const lineStarts = [0];
9
+ for (let i = 0; i < source.length; i++) {
10
+ if (source[i] === "\n") {
11
+ lineStarts.push(i + 1);
12
+ }
13
+ }
14
+ return lineStarts;
15
+ }
16
+ /**
17
+ * Convert an absolute offset into a line and column using a precomputed line table.
18
+ * Both line and column are 0-based.
19
+ */
20
+ export function offsetToPosition(lineTable, offset) {
21
+ // binary search for the line
22
+ let lo = 0;
23
+ let hi = lineTable.length - 1;
24
+ while (lo < hi) {
25
+ const mid = (lo + hi + 1) >> 1;
26
+ if (lineTable[mid] <= offset) {
27
+ lo = mid;
28
+ }
29
+ else {
30
+ hi = mid - 1;
31
+ }
32
+ }
33
+ return {
34
+ offset,
35
+ line: lo,
36
+ column: offset - lineTable[lo],
37
+ };
38
+ }
39
+ /**
40
+ * A zero-width parser that returns the current offset into the input string.
41
+ * Requires `setInputStr` to have been called with the full input.
42
+ */
43
+ export const getOffset = (input) => {
44
+ const source = getInputStr();
45
+ return success(source.length - input.length, input);
46
+ };
47
+ /**
48
+ * A zero-width parser that returns the current position (offset, line, column).
49
+ * Requires `setInputStr` to have been called with the full input.
50
+ */
51
+ export const getPosition = (input) => {
52
+ const source = getInputStr();
53
+ const offset = source.length - input.length;
54
+ const lineTable = buildLineTable(source);
55
+ return success(offsetToPosition(lineTable, offset), input);
56
+ };
57
+ /**
58
+ * Wraps a parser so that its result includes span information (start and end positions).
59
+ * Useful for building ASTs with location data for language servers / editors.
60
+ * Requires `setInputStr` to have been called with the full input.
61
+ *
62
+ * @example
63
+ * ```ts
64
+ * const locatedWord = withSpan(word);
65
+ * // Result: { value: "hello", span: { start: { offset: 0, line: 0, column: 0 }, end: { offset: 5, line: 0, column: 5 } } }
66
+ * ```
67
+ */
68
+ export function withSpan(parser) {
69
+ return (input) => {
70
+ const source = getInputStr();
71
+ const lineTable = buildLineTable(source);
72
+ const startOffset = source.length - input.length;
73
+ const result = parser(input);
74
+ if (!result.success)
75
+ return result;
76
+ const endOffset = source.length - result.rest.length;
77
+ return success({
78
+ value: result.result,
79
+ span: {
80
+ start: offsetToPosition(lineTable, startOffset),
81
+ end: offsetToPosition(lineTable, endOffset),
82
+ },
83
+ }, result.rest);
84
+ };
85
+ }
@@ -0,0 +1,32 @@
1
+ export declare function resetRightmostFailure(): void;
2
+ /**
3
+ * Record an expected alternative at the current failure position.
4
+ * If this position is further right than any previous failure, it replaces the previous.
5
+ * If it's at the same position, it adds to the list of expectations (with dedup).
6
+ * No-op if `setInputStr` has not been called.
7
+ *
8
+ * @param input - the remaining input at the point of failure
9
+ * @param expected - a human-readable description of what was expected
10
+ */
11
+ export declare function recordFailure(input: string, expected: string): void;
12
+ /**
13
+ * Returns the current rightmost failure position and expected alternatives,
14
+ * or `null` if no failures have been recorded.
15
+ */
16
+ export declare function getRightmostFailure(): {
17
+ pos: number;
18
+ expected: string[];
19
+ } | null;
20
+ type SavedRightmostFailure = {
21
+ pos: number;
22
+ expected: string[];
23
+ };
24
+ export declare function saveRightmostFailure(): SavedRightmostFailure;
25
+ export declare function restoreRightmostFailure(saved: SavedRightmostFailure): void;
26
+ /**
27
+ * Formats the rightmost failure into a human-readable error message with line and column info.
28
+ * Returns `null` if no failures have been recorded.
29
+ * Requires `setInputStr` to have been called.
30
+ */
31
+ export declare function getErrorMessage(): string | null;
32
+ export {};
@@ -0,0 +1,70 @@
1
+ import { getInputStr } from "./trace.js";
2
+ import { buildLineTable, offsetToPosition } from "./position.js";
3
+ let rightmostFailurePos = -1;
4
+ let rightmostFailureExpected = [];
5
+ export function resetRightmostFailure() {
6
+ rightmostFailurePos = -1;
7
+ rightmostFailureExpected = [];
8
+ }
9
+ /**
10
+ * Record an expected alternative at the current failure position.
11
+ * If this position is further right than any previous failure, it replaces the previous.
12
+ * If it's at the same position, it adds to the list of expectations (with dedup).
13
+ * No-op if `setInputStr` has not been called.
14
+ *
15
+ * @param input - the remaining input at the point of failure
16
+ * @param expected - a human-readable description of what was expected
17
+ */
18
+ export function recordFailure(input, expected) {
19
+ const source = getInputStr();
20
+ if (source.length === 0)
21
+ return;
22
+ const pos = source.length - input.length;
23
+ if (pos > rightmostFailurePos) {
24
+ rightmostFailurePos = pos;
25
+ rightmostFailureExpected = [expected];
26
+ }
27
+ else if (pos === rightmostFailurePos) {
28
+ if (!rightmostFailureExpected.includes(expected)) {
29
+ rightmostFailureExpected.push(expected);
30
+ }
31
+ }
32
+ }
33
+ /**
34
+ * Returns the current rightmost failure position and expected alternatives,
35
+ * or `null` if no failures have been recorded.
36
+ */
37
+ export function getRightmostFailure() {
38
+ if (rightmostFailurePos < 0)
39
+ return null;
40
+ return { pos: rightmostFailurePos, expected: [...rightmostFailureExpected] };
41
+ }
42
+ export function saveRightmostFailure() {
43
+ return { pos: rightmostFailurePos, expected: [...rightmostFailureExpected] };
44
+ }
45
+ export function restoreRightmostFailure(saved) {
46
+ rightmostFailurePos = saved.pos;
47
+ rightmostFailureExpected = [...saved.expected];
48
+ }
49
+ function formatExpected(expected) {
50
+ if (expected.length === 1)
51
+ return expected[0];
52
+ if (expected.length === 2)
53
+ return `${expected[0]} or ${expected[1]}`;
54
+ return expected.slice(0, -1).join(", ") + ", or " + expected[expected.length - 1];
55
+ }
56
+ /**
57
+ * Formats the rightmost failure into a human-readable error message with line and column info.
58
+ * Returns `null` if no failures have been recorded.
59
+ * Requires `setInputStr` to have been called.
60
+ */
61
+ export function getErrorMessage() {
62
+ if (rightmostFailurePos < 0)
63
+ return null;
64
+ const source = getInputStr();
65
+ const lineTable = buildLineTable(source);
66
+ const pos = offsetToPosition(lineTable, rightmostFailurePos);
67
+ const line = pos.line + 1;
68
+ const column = pos.column + 1;
69
+ return `Line ${line}, col ${column}: expected ${formatExpected(rightmostFailureExpected)}`;
70
+ }
@@ -1,4 +1,4 @@
1
- type TarsecErrorData = {
1
+ export type TarsecErrorData = {
2
2
  line: number;
3
3
  column: number;
4
4
  length: number;
@@ -9,4 +9,3 @@ export declare class TarsecError extends Error {
9
9
  data: TarsecErrorData;
10
10
  constructor(error: TarsecErrorData);
11
11
  }
12
- export {};
package/dist/trace.d.ts CHANGED
@@ -1,4 +1,5 @@
1
- import { ParserResult, Parser, PlainObject, CaptureParser } from "./types.js";
1
+ import { ParserResult, Parser, PlainObject, CaptureParser, ParserFailure } from "./types.js";
2
+ import { TarsecErrorData } from "./tarsecError.js";
2
3
  export declare function setTraceHost(host: string): void;
3
4
  export declare function getTraceHost(): string;
4
5
  export declare function setTraceId(id: string): void;
@@ -122,3 +123,4 @@ export declare function limitSteps(limit: number, callback: Function): void;
122
123
  */
123
124
  export declare function setInputStr(s: string): void;
124
125
  export declare function getInputStr(): string;
126
+ export declare function getDiagnostics(result: ParserFailure, input: string, _message?: string): TarsecErrorData;
package/dist/trace.js CHANGED
@@ -1,6 +1,7 @@
1
1
  import { escape, round, shorten } from "./utils.js";
2
2
  import process from "process";
3
3
  import { execSync } from "child_process";
4
+ import { resetRightmostFailure } from "./rightmostFailure.js";
4
5
  const isNode = typeof process !== "undefined" &&
5
6
  process.versions != null &&
6
7
  process.versions.node != null;
@@ -194,7 +195,50 @@ let inputStr = "";
194
195
  */
195
196
  export function setInputStr(s) {
196
197
  inputStr = s;
198
+ resetRightmostFailure();
197
199
  }
198
200
  export function getInputStr() {
199
201
  return inputStr;
200
202
  }
203
+ export function getDiagnostics(result, input, _message) {
204
+ const inputStr = getInputStr();
205
+ const messages = [];
206
+ const prefix = "Near: ";
207
+ const message = _message || result.message || "Parsing failed";
208
+ if (inputStr.length > 0) {
209
+ const index = inputStr.length - input.length;
210
+ const start = Math.max(0, index - 20);
211
+ const end = Math.min(inputStr.length, index + 20);
212
+ const previewStr = inputStr.substring(start, end).split("\n")[0];
213
+ messages.push(`${prefix}${previewStr}`);
214
+ messages.push(`${" ".repeat(index + prefix.length)}^`);
215
+ messages.push(message);
216
+ const lines = inputStr.split("\n");
217
+ let acc = 0;
218
+ let i = 0;
219
+ while (index >= acc && i < lines.length) {
220
+ acc += lines[i].length;
221
+ i++;
222
+ }
223
+ const linesIndex = Math.max(0, i - 1);
224
+ const column = lines[linesIndex].length - (acc - index);
225
+ return {
226
+ line: i - 1,
227
+ column,
228
+ length: 1,
229
+ prettyMessage: messages.join("\n"),
230
+ message: message,
231
+ };
232
+ }
233
+ else {
234
+ messages.push(`${prefix}${input.substring(1, 100)}`);
235
+ messages.push(message);
236
+ return {
237
+ line: 0,
238
+ column: 0,
239
+ length: 0,
240
+ prettyMessage: messages.join("\n"),
241
+ message: message,
242
+ };
243
+ }
244
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tarsec",
3
- "version": "0.1.8",
3
+ "version": "0.2.0",
4
4
  "description": "A parser combinator library for TypeScript, inspired by Parsec.",
5
5
  "homepage": "https://github.com/egonSchiele/tarsec",
6
6
  "scripts": {
@@ -38,4 +38,4 @@
38
38
  "typescript": "^5.4.2",
39
39
  "vitest": "^1.4.0"
40
40
  }
41
- }
41
+ }