tarsec 0.1.8 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/dist/combinators.d.ts +52 -0
- package/dist/combinators.js +153 -42
- package/dist/index.d.ts +2 -0
- package/dist/index.js +2 -0
- package/dist/parsers.d.ts +10 -0
- package/dist/parsers.js +40 -12
- package/dist/position.d.ts +45 -0
- package/dist/position.js +85 -0
- package/dist/rightmostFailure.d.ts +32 -0
- package/dist/rightmostFailure.js +70 -0
- package/dist/tarsecError.d.ts +1 -2
- package/dist/trace.d.ts +3 -1
- package/dist/trace.js +44 -0
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -45,6 +45,7 @@ parser("hello there"); // failure
|
|
|
45
45
|
- Tools to debug your parser's [performance](/tutorials/performance.md)
|
|
46
46
|
- Partial [backtracking](/tutorials/backtracking.md) support
|
|
47
47
|
- A way to make your parser more [secure](/tutorials/security.md).
|
|
48
|
+
- [Pretty error messages](/tutorials/pretty-errors.md)
|
|
48
49
|
|
|
49
50
|
## Examples
|
|
50
51
|
- [A markdown parser](/tests/examples/markdown.ts)
|
package/dist/combinators.d.ts
CHANGED
|
@@ -438,3 +438,55 @@ export declare function and<const T extends readonly GeneralParser<any, any>[]>(
|
|
|
438
438
|
* @returns
|
|
439
439
|
*/
|
|
440
440
|
export declare function parseError<const T extends readonly GeneralParser<any, any>[]>(_message: string, ...parsers: T): Parser<MergedCaptures<T>>;
|
|
441
|
+
/**
|
|
442
|
+
* Defers evaluation of a parser, allowing recursive parser definitions.
|
|
443
|
+
* Without `lazy`, you can't reference a parser before it's defined:
|
|
444
|
+
*
|
|
445
|
+
* ```ts
|
|
446
|
+
* // ReferenceError: expr is not defined
|
|
447
|
+
* const expr = or(number, seqR(char("("), expr, char(")")));
|
|
448
|
+
*
|
|
449
|
+
* // Works: the reference to expr is deferred
|
|
450
|
+
* const expr = or(number, seqR(char("("), lazy(() => expr), char(")")));
|
|
451
|
+
* ```
|
|
452
|
+
*
|
|
453
|
+
* @param thunk - a function that returns the parser
|
|
454
|
+
* @returns - a parser that evaluates the thunk on each parse attempt
|
|
455
|
+
*/
|
|
456
|
+
export declare function lazy<T>(thunk: () => Parser<T>): Parser<T>;
|
|
457
|
+
export type Associativity = "left" | "right";
|
|
458
|
+
export type OperatorInfo<T> = {
|
|
459
|
+
/** A parser that matches the operator (e.g. `char("+")` or `str("**")`) */
|
|
460
|
+
op: Parser<any>;
|
|
461
|
+
/** Whether the operator is left- or right-associative */
|
|
462
|
+
assoc: Associativity;
|
|
463
|
+
/** A function that combines the left and right operands.
|
|
464
|
+
* For example: `(a, b) => a + b` */
|
|
465
|
+
apply: (left: T, right: T) => T;
|
|
466
|
+
};
|
|
467
|
+
/**
|
|
468
|
+
* Builds an expression parser that handles operator precedence and associativity.
|
|
469
|
+
* Operators are given as an array of precedence levels, from **highest** to **lowest**.
|
|
470
|
+
* Each level is an array of operators at that precedence.
|
|
471
|
+
*
|
|
472
|
+
* ```ts
|
|
473
|
+
* const expr = buildExpressionParser(
|
|
474
|
+
* numberParser,
|
|
475
|
+
* [
|
|
476
|
+
* [{ op: char("*"), assoc: "left", apply: (a, b) => a * b },
|
|
477
|
+
* { op: char("/"), assoc: "left", apply: (a, b) => a / b }],
|
|
478
|
+
* [{ op: char("+"), assoc: "left", apply: (a, b) => a + b },
|
|
479
|
+
* { op: char("-"), assoc: "left", apply: (a, b) => a - b }],
|
|
480
|
+
* ],
|
|
481
|
+
* // optional: override how parenthesized sub-expressions are parsed
|
|
482
|
+
* // defaults to: seqR(char("("), lazy(() => expr), char(")")) mapped to the middle result
|
|
483
|
+
* );
|
|
484
|
+
* ```
|
|
485
|
+
*
|
|
486
|
+
* @param atom - parser for the smallest unit (numbers, identifiers, parenthesized sub-expressions)
|
|
487
|
+
* @param operatorTable - array of precedence levels, highest first. Each level is an array of OperatorInfo.
|
|
488
|
+
* @param parenParser - optional parser for parenthesized sub-expressions. If not provided,
|
|
489
|
+
* one is built automatically using `(` and `)` with `lazy` to handle recursion.
|
|
490
|
+
* @returns - a parser that handles the full expression grammar
|
|
491
|
+
*/
|
|
492
|
+
export declare function buildExpressionParser<T>(atom: Parser<T>, operatorTable: OperatorInfo<T>[][], parenParser?: Parser<T>): Parser<T>;
|
package/dist/combinators.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { within } from "./parsers/within.js";
|
|
2
2
|
import { TarsecError } from "./tarsecError.js";
|
|
3
|
-
import {
|
|
3
|
+
import { getDiagnostics, trace } from "./trace.js";
|
|
4
4
|
import { captureSuccess, createTree, failure, isCaptureResult, isSuccess, success, } from "./types.js";
|
|
5
5
|
import { escape, findAncestorWithNextParser, popMany } from "./utils.js";
|
|
6
6
|
/**
|
|
@@ -871,47 +871,158 @@ export function parseError(_message, ...parsers) {
|
|
|
871
871
|
return result;
|
|
872
872
|
}
|
|
873
873
|
else {
|
|
874
|
-
const
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
874
|
+
const error = getDiagnostics(result, input, _message);
|
|
875
|
+
throw new TarsecError(error);
|
|
876
|
+
}
|
|
877
|
+
};
|
|
878
|
+
}
|
|
879
|
+
/**
|
|
880
|
+
* Defers evaluation of a parser, allowing recursive parser definitions.
|
|
881
|
+
* Without `lazy`, you can't reference a parser before it's defined:
|
|
882
|
+
*
|
|
883
|
+
* ```ts
|
|
884
|
+
* // ReferenceError: expr is not defined
|
|
885
|
+
* const expr = or(number, seqR(char("("), expr, char(")")));
|
|
886
|
+
*
|
|
887
|
+
* // Works: the reference to expr is deferred
|
|
888
|
+
* const expr = or(number, seqR(char("("), lazy(() => expr), char(")")));
|
|
889
|
+
* ```
|
|
890
|
+
*
|
|
891
|
+
* @param thunk - a function that returns the parser
|
|
892
|
+
* @returns - a parser that evaluates the thunk on each parse attempt
|
|
893
|
+
*/
|
|
894
|
+
export function lazy(thunk) {
|
|
895
|
+
return trace("lazy", (input) => thunk()(input));
|
|
896
|
+
}
|
|
897
|
+
/**
|
|
898
|
+
* Builds an expression parser that handles operator precedence and associativity.
|
|
899
|
+
* Operators are given as an array of precedence levels, from **highest** to **lowest**.
|
|
900
|
+
* Each level is an array of operators at that precedence.
|
|
901
|
+
*
|
|
902
|
+
* ```ts
|
|
903
|
+
* const expr = buildExpressionParser(
|
|
904
|
+
* numberParser,
|
|
905
|
+
* [
|
|
906
|
+
* [{ op: char("*"), assoc: "left", apply: (a, b) => a * b },
|
|
907
|
+
* { op: char("/"), assoc: "left", apply: (a, b) => a / b }],
|
|
908
|
+
* [{ op: char("+"), assoc: "left", apply: (a, b) => a + b },
|
|
909
|
+
* { op: char("-"), assoc: "left", apply: (a, b) => a - b }],
|
|
910
|
+
* ],
|
|
911
|
+
* // optional: override how parenthesized sub-expressions are parsed
|
|
912
|
+
* // defaults to: seqR(char("("), lazy(() => expr), char(")")) mapped to the middle result
|
|
913
|
+
* );
|
|
914
|
+
* ```
|
|
915
|
+
*
|
|
916
|
+
* @param atom - parser for the smallest unit (numbers, identifiers, parenthesized sub-expressions)
|
|
917
|
+
* @param operatorTable - array of precedence levels, highest first. Each level is an array of OperatorInfo.
|
|
918
|
+
* @param parenParser - optional parser for parenthesized sub-expressions. If not provided,
|
|
919
|
+
* one is built automatically using `(` and `)` with `lazy` to handle recursion.
|
|
920
|
+
* @returns - a parser that handles the full expression grammar
|
|
921
|
+
*/
|
|
922
|
+
export function buildExpressionParser(atom, operatorTable, parenParser) {
|
|
923
|
+
// Build the parser from highest precedence to lowest.
|
|
924
|
+
// Each level wraps the previous one, so higher-precedence operators bind tighter.
|
|
925
|
+
// The "base" is the atom or a parenthesized expression.
|
|
926
|
+
// We use lazy for the paren parser since it references the final `expr` which doesn't exist yet.
|
|
927
|
+
let expr;
|
|
928
|
+
const base = (input) => {
|
|
929
|
+
const paren = parenParser !== null && parenParser !== void 0 ? parenParser : buildDefaultParenParser();
|
|
930
|
+
const result = paren(input);
|
|
931
|
+
if (result.success)
|
|
932
|
+
return result;
|
|
933
|
+
return atom(input);
|
|
934
|
+
};
|
|
935
|
+
function buildDefaultParenParser() {
|
|
936
|
+
return (input) => {
|
|
937
|
+
const openResult = input[0] === "(" ? success("(", input.slice(1)) : failure("expected (", input);
|
|
938
|
+
if (!openResult.success)
|
|
939
|
+
return openResult;
|
|
940
|
+
const exprResult = expr(openResult.rest);
|
|
941
|
+
if (!exprResult.success)
|
|
942
|
+
return failure(exprResult.message, input);
|
|
943
|
+
const closeResult = exprResult.rest[0] === ")" ? success(")", exprResult.rest.slice(1)) : failure("expected )", input);
|
|
944
|
+
if (!closeResult.success)
|
|
945
|
+
return closeResult;
|
|
946
|
+
return success(exprResult.result, closeResult.rest);
|
|
947
|
+
};
|
|
948
|
+
}
|
|
949
|
+
// Start with the base (atoms and parens), then wrap with each precedence level
|
|
950
|
+
let currentLevel = base;
|
|
951
|
+
for (let i = 0; i < operatorTable.length; i++) {
|
|
952
|
+
const ops = operatorTable[i];
|
|
953
|
+
currentLevel = buildLevel(currentLevel, ops);
|
|
954
|
+
}
|
|
955
|
+
expr = currentLevel;
|
|
956
|
+
return expr;
|
|
957
|
+
}
|
|
958
|
+
function buildLevel(nextLevel, ops) {
|
|
959
|
+
return (input) => {
|
|
960
|
+
const leftResult = nextLevel(input);
|
|
961
|
+
if (!leftResult.success)
|
|
962
|
+
return leftResult;
|
|
963
|
+
// Try to parse a chain of operators at this level
|
|
964
|
+
let left = leftResult.result;
|
|
965
|
+
let rest = leftResult.rest;
|
|
966
|
+
// For right-associative, we collect all operands and operators, then fold right
|
|
967
|
+
const rightOps = ops.filter((o) => o.assoc === "right");
|
|
968
|
+
const leftOps = ops.filter((o) => o.assoc === "left");
|
|
969
|
+
if (rightOps.length > 0 && leftOps.length > 0) {
|
|
970
|
+
// Mixed associativity at same level: handle left-assoc first in the loop,
|
|
971
|
+
// then right-assoc. This is an unusual case but we handle it.
|
|
972
|
+
return parseChain(left, rest, nextLevel, ops);
|
|
973
|
+
}
|
|
974
|
+
else if (rightOps.length > 0) {
|
|
975
|
+
return parseRight(left, rest, nextLevel, rightOps);
|
|
976
|
+
}
|
|
977
|
+
else {
|
|
978
|
+
return parseLeft(left, rest, nextLevel, leftOps);
|
|
915
979
|
}
|
|
916
980
|
};
|
|
917
981
|
}
|
|
982
|
+
function parseLeft(left, rest, nextLevel, ops) {
|
|
983
|
+
while (true) {
|
|
984
|
+
const opMatch = tryOps(ops, rest);
|
|
985
|
+
if (!opMatch)
|
|
986
|
+
break;
|
|
987
|
+
const rightResult = nextLevel(opMatch.rest);
|
|
988
|
+
if (!rightResult.success)
|
|
989
|
+
break;
|
|
990
|
+
left = opMatch.apply(left, rightResult.result);
|
|
991
|
+
rest = rightResult.rest;
|
|
992
|
+
}
|
|
993
|
+
return success(left, rest);
|
|
994
|
+
}
|
|
995
|
+
function parseRight(left, rest, nextLevel, ops) {
|
|
996
|
+
const opMatch = tryOps(ops, rest);
|
|
997
|
+
if (!opMatch)
|
|
998
|
+
return success(left, rest);
|
|
999
|
+
const rightResult = nextLevel(opMatch.rest);
|
|
1000
|
+
if (!rightResult.success)
|
|
1001
|
+
return success(left, rest);
|
|
1002
|
+
// Recursively parse the right side to get right-associativity
|
|
1003
|
+
const rightFolded = parseRight(rightResult.result, rightResult.rest, nextLevel, ops);
|
|
1004
|
+
return success(opMatch.apply(left, rightFolded.result), rightFolded.rest);
|
|
1005
|
+
}
|
|
1006
|
+
function parseChain(left, rest, nextLevel, ops) {
|
|
1007
|
+
// Fallback: treat everything as left-associative
|
|
1008
|
+
while (true) {
|
|
1009
|
+
const opMatch = tryOps(ops, rest);
|
|
1010
|
+
if (!opMatch)
|
|
1011
|
+
break;
|
|
1012
|
+
const rightResult = nextLevel(opMatch.rest);
|
|
1013
|
+
if (!rightResult.success)
|
|
1014
|
+
break;
|
|
1015
|
+
left = opMatch.apply(left, rightResult.result);
|
|
1016
|
+
rest = rightResult.rest;
|
|
1017
|
+
}
|
|
1018
|
+
return success(left, rest);
|
|
1019
|
+
}
|
|
1020
|
+
function tryOps(ops, input) {
|
|
1021
|
+
for (const op of ops) {
|
|
1022
|
+
const result = op.op(input);
|
|
1023
|
+
if (result.success) {
|
|
1024
|
+
return { rest: result.rest, apply: op.apply };
|
|
1025
|
+
}
|
|
1026
|
+
}
|
|
1027
|
+
return null;
|
|
1028
|
+
}
|
package/dist/index.d.ts
CHANGED
package/dist/index.js
CHANGED
package/dist/parsers.d.ts
CHANGED
|
@@ -44,6 +44,16 @@ export declare function noneOf(chars: string): Parser<string>;
|
|
|
44
44
|
* @returns - ParserResult
|
|
45
45
|
*/
|
|
46
46
|
export declare const anyChar: Parser<string>;
|
|
47
|
+
/**
|
|
48
|
+
* Wraps a parser with a human-readable label for error reporting.
|
|
49
|
+
* On failure, suppresses any inner failure recordings and records only the label.
|
|
50
|
+
* This produces clean error messages like `expected a digit` instead of `expected one of "0123456789"`.
|
|
51
|
+
*
|
|
52
|
+
* @param name - human-readable description of what the parser expects
|
|
53
|
+
* @param parser - the parser to wrap
|
|
54
|
+
* @returns - a parser that records the label on failure
|
|
55
|
+
*/
|
|
56
|
+
export declare function label<T>(name: string, parser: Parser<T>): Parser<T>;
|
|
47
57
|
/** A parser that matches one of " \t\n\r". */
|
|
48
58
|
export declare const space: Parser<string>;
|
|
49
59
|
/** A parser that matches one or more spaces. */
|
package/dist/parsers.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { many1WithJoin, manyWithJoin, seq } from "./combinators.js";
|
|
2
2
|
import { trace } from "./trace.js";
|
|
3
|
+
import { recordFailure, saveRightmostFailure, restoreRightmostFailure } from "./rightmostFailure.js";
|
|
3
4
|
import { captureSuccess, failure, success, } from "./types.js";
|
|
4
5
|
import { escape } from "./utils.js";
|
|
5
6
|
export { within } from "./parsers/within.js";
|
|
@@ -12,15 +13,13 @@ export { within } from "./parsers/within.js";
|
|
|
12
13
|
export function char(c) {
|
|
13
14
|
return trace(`char(${escape(c)})`, (input) => {
|
|
14
15
|
if (input.length === 0) {
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
rest: input,
|
|
18
|
-
message: "unexpected end of input",
|
|
19
|
-
};
|
|
16
|
+
recordFailure(input, `"${c}"`);
|
|
17
|
+
return failure("unexpected end of input", input);
|
|
20
18
|
}
|
|
21
19
|
if (input[0] === c) {
|
|
22
20
|
return success(c, input.slice(1));
|
|
23
21
|
}
|
|
22
|
+
recordFailure(input, `"${c}"`);
|
|
24
23
|
return failure(`expected ${escape(c)}, got ${escape(input[0])}`, input);
|
|
25
24
|
});
|
|
26
25
|
}
|
|
@@ -35,6 +34,7 @@ export function str(s) {
|
|
|
35
34
|
if (input.substring(0, s.length) === s) {
|
|
36
35
|
return success(s, input.slice(s.length));
|
|
37
36
|
}
|
|
37
|
+
recordFailure(input, `"${s}"`);
|
|
38
38
|
return failure(`expected ${s}, got ${input.substring(0, s.length)}`, input);
|
|
39
39
|
});
|
|
40
40
|
}
|
|
@@ -48,6 +48,7 @@ export function istr(s) {
|
|
|
48
48
|
if (input.substring(0, s.length).toLocaleLowerCase() === s.toLocaleLowerCase()) {
|
|
49
49
|
return success(input.substring(0, s.length), input.slice(s.length));
|
|
50
50
|
}
|
|
51
|
+
recordFailure(input, `"${s}"`);
|
|
51
52
|
return failure(`expected ${s}, got ${input.substring(0, s.length)}`, input);
|
|
52
53
|
});
|
|
53
54
|
}
|
|
@@ -61,12 +62,14 @@ export function istr(s) {
|
|
|
61
62
|
export function oneOf(chars) {
|
|
62
63
|
return trace(`oneOf(${escape(chars)})`, (input) => {
|
|
63
64
|
if (input.length === 0) {
|
|
65
|
+
recordFailure(input, `one of "${chars}"`);
|
|
64
66
|
return failure("unexpected end of input", input);
|
|
65
67
|
}
|
|
66
68
|
const c = input[0];
|
|
67
69
|
if (chars.includes(c)) {
|
|
68
70
|
return char(c)(input);
|
|
69
71
|
}
|
|
72
|
+
recordFailure(input, `one of "${chars}"`);
|
|
70
73
|
return failure(`expected one of ${escape(chars)}, got ${c}`, input);
|
|
71
74
|
});
|
|
72
75
|
}
|
|
@@ -80,9 +83,11 @@ export function oneOf(chars) {
|
|
|
80
83
|
export function noneOf(chars) {
|
|
81
84
|
return trace(`noneOf(${escape(chars)})`, (input) => {
|
|
82
85
|
if (input.length === 0) {
|
|
86
|
+
recordFailure(input, `none of "${chars}"`);
|
|
83
87
|
return failure("unexpected end of input", input);
|
|
84
88
|
}
|
|
85
89
|
if (chars.includes(input[0])) {
|
|
90
|
+
recordFailure(input, `none of "${chars}"`);
|
|
86
91
|
return failure(`expected none of ${escape(chars)}, got ${input[0]}`, input);
|
|
87
92
|
}
|
|
88
93
|
return char(input[0])(input);
|
|
@@ -97,26 +102,46 @@ export function noneOf(chars) {
|
|
|
97
102
|
*/
|
|
98
103
|
export const anyChar = trace("anyChar", (input) => {
|
|
99
104
|
if (input.length === 0) {
|
|
105
|
+
recordFailure(input, "any character");
|
|
100
106
|
return failure("unexpected end of input", input);
|
|
101
107
|
}
|
|
102
108
|
return success(input[0], input.slice(1));
|
|
103
109
|
});
|
|
110
|
+
/**
|
|
111
|
+
* Wraps a parser with a human-readable label for error reporting.
|
|
112
|
+
* On failure, suppresses any inner failure recordings and records only the label.
|
|
113
|
+
* This produces clean error messages like `expected a digit` instead of `expected one of "0123456789"`.
|
|
114
|
+
*
|
|
115
|
+
* @param name - human-readable description of what the parser expects
|
|
116
|
+
* @param parser - the parser to wrap
|
|
117
|
+
* @returns - a parser that records the label on failure
|
|
118
|
+
*/
|
|
119
|
+
export function label(name, parser) {
|
|
120
|
+
return (input) => {
|
|
121
|
+
const saved = saveRightmostFailure();
|
|
122
|
+
const result = parser(input);
|
|
123
|
+
restoreRightmostFailure(saved);
|
|
124
|
+
if (!result.success)
|
|
125
|
+
recordFailure(input, name);
|
|
126
|
+
return result;
|
|
127
|
+
};
|
|
128
|
+
}
|
|
104
129
|
/** A parser that matches one of " \t\n\r". */
|
|
105
|
-
export const space = oneOf(" \t\n\r");
|
|
130
|
+
export const space = label("whitespace", oneOf(" \t\n\r"));
|
|
106
131
|
/** A parser that matches one or more spaces. */
|
|
107
132
|
export const spaces = many1WithJoin(space);
|
|
108
133
|
/** A parser that matches one digit. */
|
|
109
|
-
export const digit = oneOf("0123456789");
|
|
134
|
+
export const digit = label("a digit", oneOf("0123456789"));
|
|
110
135
|
/** A parser that matches one letter, case insensitive. */
|
|
111
|
-
export const letter = oneOf("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
|
|
136
|
+
export const letter = label("a letter", oneOf("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"));
|
|
112
137
|
/** A parser that matches one digit or letter, case insensitive. */
|
|
113
|
-
export const alphanum = oneOf("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789");
|
|
138
|
+
export const alphanum = label("a letter or digit", oneOf("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"));
|
|
114
139
|
/** A parser that matches one word, case insensitive. */
|
|
115
|
-
export const word = regexParser("^[a-z]+", "ui");
|
|
140
|
+
export const word = label("a word", regexParser("^[a-z]+", "ui"));
|
|
116
141
|
/** A parser that matches one or more digits. */
|
|
117
|
-
export const num = regexParser("^[0-9]+");
|
|
142
|
+
export const num = label("a number", regexParser("^[0-9]+"));
|
|
118
143
|
/** A parser that matches one single or double quote. */
|
|
119
|
-
export const quote = oneOf(`'"`);
|
|
144
|
+
export const quote = label("a quote", oneOf(`'"`));
|
|
120
145
|
/** A parser that matches one tab character. */
|
|
121
146
|
export const tab = char("\t");
|
|
122
147
|
/** A parser that matches one newline ("\n" only) character. */
|
|
@@ -126,6 +151,7 @@ export const eof = (input) => {
|
|
|
126
151
|
if (input === "") {
|
|
127
152
|
return success(null, input);
|
|
128
153
|
}
|
|
154
|
+
recordFailure(input, "end of input");
|
|
129
155
|
return failure("expected end of input", input);
|
|
130
156
|
};
|
|
131
157
|
/** A parser that matches a quoted string, in single or double quotes.
|
|
@@ -154,6 +180,7 @@ export function regexParser(str, options = "") {
|
|
|
154
180
|
if (match) {
|
|
155
181
|
return success(match[0], input.slice(match[0].length));
|
|
156
182
|
}
|
|
183
|
+
recordFailure(input, `${str}`);
|
|
157
184
|
return failure(`expected ${str}, got ${input.slice(0, 10)}`, input);
|
|
158
185
|
});
|
|
159
186
|
}
|
|
@@ -187,6 +214,7 @@ export function captureRegex(str, options = "", ...captureNames) {
|
|
|
187
214
|
const captures = Object.assign({}, Object.fromEntries(match.slice(1).map((value, index) => [captureNames[index], value])));
|
|
188
215
|
return success(captures, input.slice(match[0].length));
|
|
189
216
|
}
|
|
217
|
+
recordFailure(input, `${str}`);
|
|
190
218
|
return failure(`expected ${str}, got ${input.slice(0, 10)}`, input);
|
|
191
219
|
};
|
|
192
220
|
return trace(`captureRegex(${str})`, _parser);
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import { Parser } from "./types.js";
|
|
2
|
+
export type Position = {
|
|
3
|
+
offset: number;
|
|
4
|
+
line: number;
|
|
5
|
+
column: number;
|
|
6
|
+
};
|
|
7
|
+
export type Span = {
|
|
8
|
+
start: Position;
|
|
9
|
+
end: Position;
|
|
10
|
+
};
|
|
11
|
+
/**
|
|
12
|
+
* Build a lookup table of line-start offsets for a given source string.
|
|
13
|
+
* This allows O(log n) offset-to-position conversion via binary search.
|
|
14
|
+
*/
|
|
15
|
+
export declare function buildLineTable(source: string): number[];
|
|
16
|
+
/**
|
|
17
|
+
* Convert an absolute offset into a line and column using a precomputed line table.
|
|
18
|
+
* Both line and column are 0-based.
|
|
19
|
+
*/
|
|
20
|
+
export declare function offsetToPosition(lineTable: number[], offset: number): Position;
|
|
21
|
+
/**
|
|
22
|
+
* A zero-width parser that returns the current offset into the input string.
|
|
23
|
+
* Requires `setInputStr` to have been called with the full input.
|
|
24
|
+
*/
|
|
25
|
+
export declare const getOffset: Parser<number>;
|
|
26
|
+
/**
|
|
27
|
+
* A zero-width parser that returns the current position (offset, line, column).
|
|
28
|
+
* Requires `setInputStr` to have been called with the full input.
|
|
29
|
+
*/
|
|
30
|
+
export declare const getPosition: Parser<Position>;
|
|
31
|
+
/**
|
|
32
|
+
* Wraps a parser so that its result includes span information (start and end positions).
|
|
33
|
+
* Useful for building ASTs with location data for language servers / editors.
|
|
34
|
+
* Requires `setInputStr` to have been called with the full input.
|
|
35
|
+
*
|
|
36
|
+
* @example
|
|
37
|
+
* ```ts
|
|
38
|
+
* const locatedWord = withSpan(word);
|
|
39
|
+
* // Result: { value: "hello", span: { start: { offset: 0, line: 0, column: 0 }, end: { offset: 5, line: 0, column: 5 } } }
|
|
40
|
+
* ```
|
|
41
|
+
*/
|
|
42
|
+
export declare function withSpan<T>(parser: Parser<T>): Parser<{
|
|
43
|
+
value: T;
|
|
44
|
+
span: Span;
|
|
45
|
+
}>;
|
package/dist/position.js
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import { getInputStr } from "./trace.js";
|
|
2
|
+
import { success } from "./types.js";
|
|
3
|
+
/**
|
|
4
|
+
* Build a lookup table of line-start offsets for a given source string.
|
|
5
|
+
* This allows O(log n) offset-to-position conversion via binary search.
|
|
6
|
+
*/
|
|
7
|
+
export function buildLineTable(source) {
|
|
8
|
+
const lineStarts = [0];
|
|
9
|
+
for (let i = 0; i < source.length; i++) {
|
|
10
|
+
if (source[i] === "\n") {
|
|
11
|
+
lineStarts.push(i + 1);
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
return lineStarts;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Convert an absolute offset into a line and column using a precomputed line table.
|
|
18
|
+
* Both line and column are 0-based.
|
|
19
|
+
*/
|
|
20
|
+
export function offsetToPosition(lineTable, offset) {
|
|
21
|
+
// binary search for the line
|
|
22
|
+
let lo = 0;
|
|
23
|
+
let hi = lineTable.length - 1;
|
|
24
|
+
while (lo < hi) {
|
|
25
|
+
const mid = (lo + hi + 1) >> 1;
|
|
26
|
+
if (lineTable[mid] <= offset) {
|
|
27
|
+
lo = mid;
|
|
28
|
+
}
|
|
29
|
+
else {
|
|
30
|
+
hi = mid - 1;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
return {
|
|
34
|
+
offset,
|
|
35
|
+
line: lo,
|
|
36
|
+
column: offset - lineTable[lo],
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* A zero-width parser that returns the current offset into the input string.
|
|
41
|
+
* Requires `setInputStr` to have been called with the full input.
|
|
42
|
+
*/
|
|
43
|
+
export const getOffset = (input) => {
|
|
44
|
+
const source = getInputStr();
|
|
45
|
+
return success(source.length - input.length, input);
|
|
46
|
+
};
|
|
47
|
+
/**
|
|
48
|
+
* A zero-width parser that returns the current position (offset, line, column).
|
|
49
|
+
* Requires `setInputStr` to have been called with the full input.
|
|
50
|
+
*/
|
|
51
|
+
export const getPosition = (input) => {
|
|
52
|
+
const source = getInputStr();
|
|
53
|
+
const offset = source.length - input.length;
|
|
54
|
+
const lineTable = buildLineTable(source);
|
|
55
|
+
return success(offsetToPosition(lineTable, offset), input);
|
|
56
|
+
};
|
|
57
|
+
/**
|
|
58
|
+
* Wraps a parser so that its result includes span information (start and end positions).
|
|
59
|
+
* Useful for building ASTs with location data for language servers / editors.
|
|
60
|
+
* Requires `setInputStr` to have been called with the full input.
|
|
61
|
+
*
|
|
62
|
+
* @example
|
|
63
|
+
* ```ts
|
|
64
|
+
* const locatedWord = withSpan(word);
|
|
65
|
+
* // Result: { value: "hello", span: { start: { offset: 0, line: 0, column: 0 }, end: { offset: 5, line: 0, column: 5 } } }
|
|
66
|
+
* ```
|
|
67
|
+
*/
|
|
68
|
+
export function withSpan(parser) {
|
|
69
|
+
return (input) => {
|
|
70
|
+
const source = getInputStr();
|
|
71
|
+
const lineTable = buildLineTable(source);
|
|
72
|
+
const startOffset = source.length - input.length;
|
|
73
|
+
const result = parser(input);
|
|
74
|
+
if (!result.success)
|
|
75
|
+
return result;
|
|
76
|
+
const endOffset = source.length - result.rest.length;
|
|
77
|
+
return success({
|
|
78
|
+
value: result.result,
|
|
79
|
+
span: {
|
|
80
|
+
start: offsetToPosition(lineTable, startOffset),
|
|
81
|
+
end: offsetToPosition(lineTable, endOffset),
|
|
82
|
+
},
|
|
83
|
+
}, result.rest);
|
|
84
|
+
};
|
|
85
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
export declare function resetRightmostFailure(): void;
|
|
2
|
+
/**
|
|
3
|
+
* Record an expected alternative at the current failure position.
|
|
4
|
+
* If this position is further right than any previous failure, it replaces the previous.
|
|
5
|
+
* If it's at the same position, it adds to the list of expectations (with dedup).
|
|
6
|
+
* No-op if `setInputStr` has not been called.
|
|
7
|
+
*
|
|
8
|
+
* @param input - the remaining input at the point of failure
|
|
9
|
+
* @param expected - a human-readable description of what was expected
|
|
10
|
+
*/
|
|
11
|
+
export declare function recordFailure(input: string, expected: string): void;
|
|
12
|
+
/**
|
|
13
|
+
* Returns the current rightmost failure position and expected alternatives,
|
|
14
|
+
* or `null` if no failures have been recorded.
|
|
15
|
+
*/
|
|
16
|
+
export declare function getRightmostFailure(): {
|
|
17
|
+
pos: number;
|
|
18
|
+
expected: string[];
|
|
19
|
+
} | null;
|
|
20
|
+
type SavedRightmostFailure = {
|
|
21
|
+
pos: number;
|
|
22
|
+
expected: string[];
|
|
23
|
+
};
|
|
24
|
+
export declare function saveRightmostFailure(): SavedRightmostFailure;
|
|
25
|
+
export declare function restoreRightmostFailure(saved: SavedRightmostFailure): void;
|
|
26
|
+
/**
|
|
27
|
+
* Formats the rightmost failure into a human-readable error message with line and column info.
|
|
28
|
+
* Returns `null` if no failures have been recorded.
|
|
29
|
+
* Requires `setInputStr` to have been called.
|
|
30
|
+
*/
|
|
31
|
+
export declare function getErrorMessage(): string | null;
|
|
32
|
+
export {};
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import { getInputStr } from "./trace.js";
|
|
2
|
+
import { buildLineTable, offsetToPosition } from "./position.js";
|
|
3
|
+
let rightmostFailurePos = -1;
|
|
4
|
+
let rightmostFailureExpected = [];
|
|
5
|
+
export function resetRightmostFailure() {
|
|
6
|
+
rightmostFailurePos = -1;
|
|
7
|
+
rightmostFailureExpected = [];
|
|
8
|
+
}
|
|
9
|
+
/**
|
|
10
|
+
* Record an expected alternative at the current failure position.
|
|
11
|
+
* If this position is further right than any previous failure, it replaces the previous.
|
|
12
|
+
* If it's at the same position, it adds to the list of expectations (with dedup).
|
|
13
|
+
* No-op if `setInputStr` has not been called.
|
|
14
|
+
*
|
|
15
|
+
* @param input - the remaining input at the point of failure
|
|
16
|
+
* @param expected - a human-readable description of what was expected
|
|
17
|
+
*/
|
|
18
|
+
export function recordFailure(input, expected) {
|
|
19
|
+
const source = getInputStr();
|
|
20
|
+
if (source.length === 0)
|
|
21
|
+
return;
|
|
22
|
+
const pos = source.length - input.length;
|
|
23
|
+
if (pos > rightmostFailurePos) {
|
|
24
|
+
rightmostFailurePos = pos;
|
|
25
|
+
rightmostFailureExpected = [expected];
|
|
26
|
+
}
|
|
27
|
+
else if (pos === rightmostFailurePos) {
|
|
28
|
+
if (!rightmostFailureExpected.includes(expected)) {
|
|
29
|
+
rightmostFailureExpected.push(expected);
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Returns the current rightmost failure position and expected alternatives,
|
|
35
|
+
* or `null` if no failures have been recorded.
|
|
36
|
+
*/
|
|
37
|
+
export function getRightmostFailure() {
|
|
38
|
+
if (rightmostFailurePos < 0)
|
|
39
|
+
return null;
|
|
40
|
+
return { pos: rightmostFailurePos, expected: [...rightmostFailureExpected] };
|
|
41
|
+
}
|
|
42
|
+
export function saveRightmostFailure() {
|
|
43
|
+
return { pos: rightmostFailurePos, expected: [...rightmostFailureExpected] };
|
|
44
|
+
}
|
|
45
|
+
export function restoreRightmostFailure(saved) {
|
|
46
|
+
rightmostFailurePos = saved.pos;
|
|
47
|
+
rightmostFailureExpected = [...saved.expected];
|
|
48
|
+
}
|
|
49
|
+
function formatExpected(expected) {
|
|
50
|
+
if (expected.length === 1)
|
|
51
|
+
return expected[0];
|
|
52
|
+
if (expected.length === 2)
|
|
53
|
+
return `${expected[0]} or ${expected[1]}`;
|
|
54
|
+
return expected.slice(0, -1).join(", ") + ", or " + expected[expected.length - 1];
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Formats the rightmost failure into a human-readable error message with line and column info.
|
|
58
|
+
* Returns `null` if no failures have been recorded.
|
|
59
|
+
* Requires `setInputStr` to have been called.
|
|
60
|
+
*/
|
|
61
|
+
export function getErrorMessage() {
|
|
62
|
+
if (rightmostFailurePos < 0)
|
|
63
|
+
return null;
|
|
64
|
+
const source = getInputStr();
|
|
65
|
+
const lineTable = buildLineTable(source);
|
|
66
|
+
const pos = offsetToPosition(lineTable, rightmostFailurePos);
|
|
67
|
+
const line = pos.line + 1;
|
|
68
|
+
const column = pos.column + 1;
|
|
69
|
+
return `Line ${line}, col ${column}: expected ${formatExpected(rightmostFailureExpected)}`;
|
|
70
|
+
}
|
package/dist/tarsecError.d.ts
CHANGED
package/dist/trace.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
import { ParserResult, Parser, PlainObject, CaptureParser } from "./types.js";
|
|
1
|
+
import { ParserResult, Parser, PlainObject, CaptureParser, ParserFailure } from "./types.js";
|
|
2
|
+
import { TarsecErrorData } from "./tarsecError.js";
|
|
2
3
|
export declare function setTraceHost(host: string): void;
|
|
3
4
|
export declare function getTraceHost(): string;
|
|
4
5
|
export declare function setTraceId(id: string): void;
|
|
@@ -122,3 +123,4 @@ export declare function limitSteps(limit: number, callback: Function): void;
|
|
|
122
123
|
*/
|
|
123
124
|
export declare function setInputStr(s: string): void;
|
|
124
125
|
export declare function getInputStr(): string;
|
|
126
|
+
export declare function getDiagnostics(result: ParserFailure, input: string, _message?: string): TarsecErrorData;
|
package/dist/trace.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { escape, round, shorten } from "./utils.js";
|
|
2
2
|
import process from "process";
|
|
3
3
|
import { execSync } from "child_process";
|
|
4
|
+
import { resetRightmostFailure } from "./rightmostFailure.js";
|
|
4
5
|
const isNode = typeof process !== "undefined" &&
|
|
5
6
|
process.versions != null &&
|
|
6
7
|
process.versions.node != null;
|
|
@@ -194,7 +195,50 @@ let inputStr = "";
|
|
|
194
195
|
*/
|
|
195
196
|
export function setInputStr(s) {
|
|
196
197
|
inputStr = s;
|
|
198
|
+
resetRightmostFailure();
|
|
197
199
|
}
|
|
198
200
|
export function getInputStr() {
|
|
199
201
|
return inputStr;
|
|
200
202
|
}
|
|
203
|
+
export function getDiagnostics(result, input, _message) {
|
|
204
|
+
const inputStr = getInputStr();
|
|
205
|
+
const messages = [];
|
|
206
|
+
const prefix = "Near: ";
|
|
207
|
+
const message = _message || result.message || "Parsing failed";
|
|
208
|
+
if (inputStr.length > 0) {
|
|
209
|
+
const index = inputStr.length - input.length;
|
|
210
|
+
const start = Math.max(0, index - 20);
|
|
211
|
+
const end = Math.min(inputStr.length, index + 20);
|
|
212
|
+
const previewStr = inputStr.substring(start, end).split("\n")[0];
|
|
213
|
+
messages.push(`${prefix}${previewStr}`);
|
|
214
|
+
messages.push(`${" ".repeat(index + prefix.length)}^`);
|
|
215
|
+
messages.push(message);
|
|
216
|
+
const lines = inputStr.split("\n");
|
|
217
|
+
let acc = 0;
|
|
218
|
+
let i = 0;
|
|
219
|
+
while (index >= acc && i < lines.length) {
|
|
220
|
+
acc += lines[i].length;
|
|
221
|
+
i++;
|
|
222
|
+
}
|
|
223
|
+
const linesIndex = Math.max(0, i - 1);
|
|
224
|
+
const column = lines[linesIndex].length - (acc - index);
|
|
225
|
+
return {
|
|
226
|
+
line: i - 1,
|
|
227
|
+
column,
|
|
228
|
+
length: 1,
|
|
229
|
+
prettyMessage: messages.join("\n"),
|
|
230
|
+
message: message,
|
|
231
|
+
};
|
|
232
|
+
}
|
|
233
|
+
else {
|
|
234
|
+
messages.push(`${prefix}${input.substring(1, 100)}`);
|
|
235
|
+
messages.push(message);
|
|
236
|
+
return {
|
|
237
|
+
line: 0,
|
|
238
|
+
column: 0,
|
|
239
|
+
length: 0,
|
|
240
|
+
prettyMessage: messages.join("\n"),
|
|
241
|
+
message: message,
|
|
242
|
+
};
|
|
243
|
+
}
|
|
244
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "tarsec",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"description": "A parser combinator library for TypeScript, inspired by Parsec.",
|
|
5
5
|
"homepage": "https://github.com/egonSchiele/tarsec",
|
|
6
6
|
"scripts": {
|
|
@@ -38,4 +38,4 @@
|
|
|
38
38
|
"typescript": "^5.4.2",
|
|
39
39
|
"vitest": "^1.4.0"
|
|
40
40
|
}
|
|
41
|
-
}
|
|
41
|
+
}
|