tarsec 0.3.2 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/combinators.d.ts +40 -15
- package/dist/combinators.js +32 -109
- package/dist/types.d.ts +1 -20
- package/dist/types.js +0 -25
- package/dist/utils.d.ts +0 -3
- package/dist/utils.js +0 -16
- package/package.json +1 -1
- package/dist/combinators/seq.d.ts +0 -1
- package/dist/combinators/seq.js +0 -1
package/README.md
CHANGED
|
@@ -43,7 +43,7 @@ parser("hello there"); // failure
|
|
|
43
43
|
- Derived types: tarsec will generate TypeScript types for your parser
|
|
44
44
|
- [Debug mode](/tutorials/debugging.md) that prints what's happening step-by-step
|
|
45
45
|
- Tools to debug your parser's [performance](/tutorials/performance.md)
|
|
46
|
-
-
|
|
46
|
+
- `peek` / `not` lookahead for [disambiguating grammars](/tutorials/backtracking.md) without backtracking
|
|
47
47
|
- A way to make your parser more [secure](/tutorials/security.md).
|
|
48
48
|
- [Pretty error messages](/tutorials/pretty-errors.md)
|
|
49
49
|
|
package/dist/combinators.d.ts
CHANGED
|
@@ -58,20 +58,6 @@ export declare function many1WithJoin(parser: Parser<string>): Parser<string>;
|
|
|
58
58
|
* const parser = or(capture(digit, "num"), capture(word, "name"));
|
|
59
59
|
* ```
|
|
60
60
|
*
|
|
61
|
-
* `or` supports backtracking by returning a `nextParser`:
|
|
62
|
-
*
|
|
63
|
-
* ```ts
|
|
64
|
-
* const parser = or(str("hello"), str("hello!"));
|
|
65
|
-
*
|
|
66
|
-
* // this will match the first parser
|
|
67
|
-
* const result = parser("hello");
|
|
68
|
-
*
|
|
69
|
-
* // but or returns the untried parsers as a new parser
|
|
70
|
-
* result.nextParser("hello!"); // works
|
|
71
|
-
*
|
|
72
|
-
* // result.nextParser is the same as or(str("hello!"))
|
|
73
|
-
* ```
|
|
74
|
-
*
|
|
75
61
|
* @param parsers - parsers to try
|
|
76
62
|
* @returns - a parser that tries each parser in order. Returns the result of the first parser that succeeds.
|
|
77
63
|
*/
|
|
@@ -96,6 +82,28 @@ export declare function optional<T>(parser: Parser<T>): Parser<T | null>;
|
|
|
96
82
|
* If it succeeds, returns a failure.
|
|
97
83
|
*/
|
|
98
84
|
export declare function not(parser: Parser<any>): Parser<null>;
|
|
85
|
+
/**
|
|
86
|
+
* Positive lookahead. Runs the given parser without consuming any input.
|
|
87
|
+
* On success, returns the parser's result with `rest` set to the original input.
|
|
88
|
+
* On failure, returns the underlying failure (also with `rest` reset to the original input).
|
|
89
|
+
*
|
|
90
|
+
* Useful for disambiguating alternatives without backtracking:
|
|
91
|
+
*
|
|
92
|
+
* ```ts
|
|
93
|
+
* const parser = or(
|
|
94
|
+
* seqR(peek(str("hello!")), str("hello!")),
|
|
95
|
+
* str("hello"),
|
|
96
|
+
* );
|
|
97
|
+
* ```
|
|
98
|
+
*
|
|
99
|
+
* The `peek` decides which branch to commit to; the real parser then consumes.
|
|
100
|
+
* Captures are preserved when the inner parser is a `CaptureParser`.
|
|
101
|
+
*
|
|
102
|
+
* @param parser - parser to look ahead with
|
|
103
|
+
* @returns - a parser that runs the given parser without consuming input
|
|
104
|
+
*/
|
|
105
|
+
export declare function peek<T>(parser: Parser<T>): Parser<T>;
|
|
106
|
+
export declare function peek<T, C extends PlainObject>(parser: CaptureParser<T, C>): CaptureParser<T, C>;
|
|
99
107
|
/**
|
|
100
108
|
* Takes three parsers, `open`, `close`, and `parser`.
|
|
101
109
|
* `between` matches multiple instances of `parser`,
|
|
@@ -357,7 +365,6 @@ export declare function search<T>(parser: Parser<T>): Parser<T[]>;
|
|
|
357
365
|
* Finally, you don't need to use seq at all. You can just hand write the logic.
|
|
358
366
|
* But you'll need to do the error handling
|
|
359
367
|
* and pass the remaining input to the next parser yourself.
|
|
360
|
-
* seq also does some backtracking for you that you will need to do yourself.
|
|
361
368
|
*
|
|
362
369
|
* Also see `seqR` and `seqC` for convenience functions that return the results or captures respectively.
|
|
363
370
|
*
|
|
@@ -490,3 +497,21 @@ export type OperatorInfo<T> = {
|
|
|
490
497
|
* @returns - a parser that handles the full expression grammar
|
|
491
498
|
*/
|
|
492
499
|
export declare function buildExpressionParser<T>(atom: Parser<T>, operatorTable: OperatorInfo<T>[][], parenParser?: Parser<T>): Parser<T>;
|
|
500
|
+
/**
|
|
501
|
+
* Wraps a parser with a per-input cache. Useful for parsers that may be invoked
|
|
502
|
+
* many times at the same position (e.g. recursive grammars where the same
|
|
503
|
+
* sub-parser is consulted from multiple alternatives).
|
|
504
|
+
*
|
|
505
|
+
* Both successes and failures are cached. The cache is keyed by the input
|
|
506
|
+
* string the parser is called with — because the rest passed between parsers
|
|
507
|
+
* is value-equal across paths, identical sub-parses share a cache entry.
|
|
508
|
+
*
|
|
509
|
+
* `memo` assumes its wrapped parser is a pure function of its input. Don't
|
|
510
|
+
* memoize parsers that consult mutable external state.
|
|
511
|
+
*
|
|
512
|
+
* @param parser - parser to memoize
|
|
513
|
+
* @param name - optional debug name (shown in `parserDebug` counts/times as `memo(name)`)
|
|
514
|
+
* @returns - memoized parser
|
|
515
|
+
*/
|
|
516
|
+
export declare function memo<T>(parser: Parser<T>, name?: string): Parser<T>;
|
|
517
|
+
export declare function memo<T, C extends PlainObject>(parser: CaptureParser<T, C>, name?: string): CaptureParser<T, C>;
|
package/dist/combinators.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import { within } from "./parsers/within.js";
|
|
2
2
|
import { TarsecError } from "./tarsecError.js";
|
|
3
3
|
import { getDiagnostics, trace } from "./trace.js";
|
|
4
|
-
import { captureSuccess,
|
|
5
|
-
import { escape
|
|
4
|
+
import { captureSuccess, failure, isCaptureResult, isSuccess, success, } from "./types.js";
|
|
5
|
+
import { escape } from "./utils.js";
|
|
6
6
|
/**
|
|
7
7
|
* Takes a parser and runs it zero or more times, returning the results as an array.
|
|
8
8
|
* If the parser is a capture parser, it returns the captures as an array in this form:
|
|
@@ -148,20 +148,6 @@ export function many1WithJoin(parser) {
|
|
|
148
148
|
* const parser = or(capture(digit, "num"), capture(word, "name"));
|
|
149
149
|
* ```
|
|
150
150
|
*
|
|
151
|
-
* `or` supports backtracking by returning a `nextParser`:
|
|
152
|
-
*
|
|
153
|
-
* ```ts
|
|
154
|
-
* const parser = or(str("hello"), str("hello!"));
|
|
155
|
-
*
|
|
156
|
-
* // this will match the first parser
|
|
157
|
-
* const result = parser("hello");
|
|
158
|
-
*
|
|
159
|
-
* // but or returns the untried parsers as a new parser
|
|
160
|
-
* result.nextParser("hello!"); // works
|
|
161
|
-
*
|
|
162
|
-
* // result.nextParser is the same as or(str("hello!"))
|
|
163
|
-
* ```
|
|
164
|
-
*
|
|
165
151
|
* @param parsers - parsers to try
|
|
166
152
|
* @returns - a parser that tries each parser in order. Returns the result of the first parser that succeeds.
|
|
167
153
|
*/
|
|
@@ -170,11 +156,7 @@ export function or(...parsers) {
|
|
|
170
156
|
for (let i = 0; i < parsers.length; i++) {
|
|
171
157
|
let result = parsers[i](input);
|
|
172
158
|
if (result.success) {
|
|
173
|
-
|
|
174
|
-
return result;
|
|
175
|
-
const nextParser = or(...parsers.slice(i + 1));
|
|
176
|
-
/* console.log({ nextParser }, parsers.slice(i + 1)); */
|
|
177
|
-
return Object.assign(Object.assign({}, result), { nextParser });
|
|
159
|
+
return result;
|
|
178
160
|
}
|
|
179
161
|
}
|
|
180
162
|
return failure(`all parsers failed`, input);
|
|
@@ -220,6 +202,15 @@ export function not(parser) {
|
|
|
220
202
|
return success(null, input);
|
|
221
203
|
});
|
|
222
204
|
}
|
|
205
|
+
export function peek(parser) {
|
|
206
|
+
return trace("peek", (input) => {
|
|
207
|
+
const result = parser(input);
|
|
208
|
+
if (!result.success) {
|
|
209
|
+
return Object.assign(Object.assign({}, result), { rest: input });
|
|
210
|
+
}
|
|
211
|
+
return Object.assign(Object.assign({}, result), { rest: input });
|
|
212
|
+
});
|
|
213
|
+
}
|
|
223
214
|
/**
|
|
224
215
|
* Takes three parsers, `open`, `close`, and `parser`.
|
|
225
216
|
* `between` matches multiple instances of `parser`,
|
|
@@ -624,64 +615,6 @@ export function search(parser) {
|
|
|
624
615
|
return success([], input);
|
|
625
616
|
});
|
|
626
617
|
}
|
|
627
|
-
/*
|
|
628
|
-
To add backtracking support requires a fairly big change. Here's an example that needs backtracking.
|
|
629
|
-
|
|
630
|
-
```ts
|
|
631
|
-
const parser = seq([
|
|
632
|
-
str("hello "),
|
|
633
|
-
or(str("world"), str("world!")),
|
|
634
|
-
optional("?")
|
|
635
|
-
], getResults);
|
|
636
|
-
```
|
|
637
|
-
|
|
638
|
-
If we try to parse `"hello world!"`, the first parser in the OR will succeed, but then we'll get stuck at the `optional`. Instead, we need to go back up the tree and try the second parser in the OR. A few things need to happen.
|
|
639
|
-
|
|
640
|
-
1. instead of just processing these parsers sequentially in a for loop, we need to model them as a tree
|
|
641
|
-
2. the OR parser needs to let us know that there are other branches to try.
|
|
642
|
-
|
|
643
|
-
For #2, there's an optional `nextParser` key on a parser success. The or parser can use this to say "a parser succeeded and here's the result, but there are other parsers that could be tried". `nextParser` is a parser that runs the remaining branches. So in this example, the OR would return a success with `nextParser = or(str("world"))`.
|
|
644
|
-
|
|
645
|
-
Next, we need to model this as a tree. Each node in the tree has a parent and child and the parser for that node.
|
|
646
|
-
|
|
647
|
-
```ts
|
|
648
|
-
parent: Node;
|
|
649
|
-
parser: GeneralParser<any, any> | null;
|
|
650
|
-
child: Node;
|
|
651
|
-
```
|
|
652
|
-
|
|
653
|
-
Hopefully that is self-explanatory. We start at the root of the tree, try the parser there, then use `.child` to go to the next node and so on. We don't model multiple paths as multiple children. To keep the code simple, we do something else.
|
|
654
|
-
|
|
655
|
-
Each node also has a `closed` key. Once we've run the parser for a node, we mark it `closed`. Closed means there are no more branches here. UNLESS, the parser returns a `nextParser`. In that case, we *don't* mark it closed because there are still other options to try. In that case, we also *replace* the parser on that node with nextParser.
|
|
656
|
-
|
|
657
|
-
So, going back to the hello world example, let's say we're stuck at the `optional`:
|
|
658
|
-
|
|
659
|
-
```ts
|
|
660
|
-
const parser = seq([
|
|
661
|
-
str("hello "),
|
|
662
|
-
or(str("world"), str("world!")),
|
|
663
|
-
optional("?")
|
|
664
|
-
], getResults);
|
|
665
|
-
```
|
|
666
|
-
|
|
667
|
-
We use `.parent` to go back up the tree. We're looking for a node that isn't closed. If we find one, we start again from there. In this case, we'd find an open node at the or with parser `or(str("world"))`. We can restart from there, but there's a bunch of state to reset.
|
|
668
|
-
|
|
669
|
-
1. From the new `or` parser, we need to go to the optional parser. We're doing it all again in the same order. This is one reason why it's easier to model this without multiple children. Otherwise, all the children would have to point to the next level, the next level would have to point to all the children in the previous level, and you'd have multiple parents, which is awful to deal with.
|
|
670
|
-
|
|
671
|
-
2. We have consumed input and added to the results. We need to undo that. At this point, the input is `!`, because we've consumed `hello world`. And the results array is `["hello ", "world"]`. We need to rewind both of those.
|
|
672
|
-
|
|
673
|
-
To do that, I count how many levels up we've gone to find another branch, and just pop that many elements off the results array. So results is now `["hello "]`. The input is trickier. How would I keep track of what the input was when we were at the OR the last time?
|
|
674
|
-
|
|
675
|
-
This is where the final key on a tree node comes in. Nodes also have an optional `input` key.
|
|
676
|
-
|
|
677
|
-
IF a parser succeeds, and
|
|
678
|
-
IF there's a nextParser,
|
|
679
|
-
We know we may come back to this node. So we save the current input as `.input` on the node.
|
|
680
|
-
|
|
681
|
-
This approach has some issues. Notably, it doesn't work if you need to backtrack at multiple points in the tree. The test `backtracking-deep.test.ts` shows this.
|
|
682
|
-
|
|
683
|
-
The code is also complex and it would be easy to have bugs in this logic. I wish there was a cleaner solution for rewinding state.
|
|
684
|
-
*/
|
|
685
618
|
/**
|
|
686
619
|
* seq takes an array of parsers and runs them sequentially.
|
|
687
620
|
* If any of the parsers fail, seq fails without consuming any input.
|
|
@@ -699,7 +632,6 @@ The code is also complex and it would be easy to have bugs in this logic. I wish
|
|
|
699
632
|
* Finally, you don't need to use seq at all. You can just hand write the logic.
|
|
700
633
|
* But you'll need to do the error handling
|
|
701
634
|
* and pass the remaining input to the next parser yourself.
|
|
702
|
-
* seq also does some backtracking for you that you will need to do yourself.
|
|
703
635
|
*
|
|
704
636
|
* Also see `seqR` and `seqC` for convenience functions that return the results or captures respectively.
|
|
705
637
|
*
|
|
@@ -713,44 +645,18 @@ export function seq(parsers, transform, debugName = "") {
|
|
|
713
645
|
const results = [];
|
|
714
646
|
let rest = input;
|
|
715
647
|
const captures = {};
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
while (current) {
|
|
719
|
-
const parser = current.parser;
|
|
720
|
-
if (!parser) {
|
|
721
|
-
console.log({ current, parser, results, captures });
|
|
722
|
-
throw new Error("parser is null");
|
|
723
|
-
}
|
|
724
|
-
const parsed = parser(rest);
|
|
725
|
-
current.closed = true;
|
|
726
|
-
/* console.log({ parsed }); */
|
|
648
|
+
for (let i = 0; i < parsers.length; i++) {
|
|
649
|
+
const parsed = parsers[i](rest);
|
|
727
650
|
if (!parsed.success) {
|
|
728
|
-
|
|
729
|
-
if (ancestor) {
|
|
730
|
-
current = ancestor;
|
|
731
|
-
rest = ancestor.input;
|
|
732
|
-
popMany(results, count);
|
|
733
|
-
continue;
|
|
734
|
-
}
|
|
735
|
-
else {
|
|
736
|
-
// don't consume input if we're failing
|
|
737
|
-
return Object.assign(Object.assign({}, parsed), { rest: input });
|
|
738
|
-
}
|
|
651
|
+
return Object.assign(Object.assign({}, parsed), { rest: input });
|
|
739
652
|
}
|
|
740
653
|
results.push(parsed.result);
|
|
741
|
-
if (parsed.nextParser) {
|
|
742
|
-
/* console.log("setting next parser", parsed.nextParser); */
|
|
743
|
-
current.parser = parsed.nextParser;
|
|
744
|
-
current.input = rest;
|
|
745
|
-
current.closed = false;
|
|
746
|
-
}
|
|
747
654
|
rest = parsed.rest;
|
|
748
655
|
if (isCaptureResult(parsed)) {
|
|
749
656
|
for (const key in parsed.captures) {
|
|
750
657
|
captures[key] = parsed.captures[key];
|
|
751
658
|
}
|
|
752
659
|
}
|
|
753
|
-
current = current.child;
|
|
754
660
|
}
|
|
755
661
|
const result = transform(results, captures);
|
|
756
662
|
return success(result, rest);
|
|
@@ -1026,3 +932,20 @@ function tryOps(ops, input) {
|
|
|
1026
932
|
}
|
|
1027
933
|
return null;
|
|
1028
934
|
}
|
|
935
|
+
const DEFAULT_MEMO_LIMIT = 10000;
|
|
936
|
+
export function memo(parser, name) {
|
|
937
|
+
const cache = new Map();
|
|
938
|
+
return trace(name ? `memo(${name})` : "memo", (input) => {
|
|
939
|
+
const hit = cache.get(input);
|
|
940
|
+
if (hit !== undefined)
|
|
941
|
+
return hit;
|
|
942
|
+
const result = parser(input);
|
|
943
|
+
if (cache.size >= DEFAULT_MEMO_LIMIT) {
|
|
944
|
+
const oldest = cache.keys().next().value;
|
|
945
|
+
if (oldest !== undefined)
|
|
946
|
+
cache.delete(oldest);
|
|
947
|
+
}
|
|
948
|
+
cache.set(input, result);
|
|
949
|
+
return result;
|
|
950
|
+
});
|
|
951
|
+
}
|
package/dist/types.d.ts
CHANGED
|
@@ -5,15 +5,13 @@ export type ParserSuccess<T> = {
|
|
|
5
5
|
success: true;
|
|
6
6
|
result: T;
|
|
7
7
|
rest: string;
|
|
8
|
-
nextParser?: Parser<any>;
|
|
9
8
|
};
|
|
10
|
-
/** Represents a parse success with captures.
|
|
9
|
+
/** Represents a parse success with captures. */
|
|
11
10
|
export type CaptureParserSuccess<T, C extends PlainObject> = {
|
|
12
11
|
success: true;
|
|
13
12
|
result: T;
|
|
14
13
|
rest: string;
|
|
15
14
|
captures: C;
|
|
16
|
-
nextParser?: CaptureParser<any, any>;
|
|
17
15
|
};
|
|
18
16
|
/** Represents a parse failure. */
|
|
19
17
|
export type ParserFailure = {
|
|
@@ -98,23 +96,6 @@ export type InferManyReturnType<T extends GeneralParser<any, any>> = T extends C
|
|
|
98
96
|
captures: C[];
|
|
99
97
|
}> : T extends Parser<infer R> ? Parser<R[]> : never;
|
|
100
98
|
export type MergedResults<T extends readonly GeneralParser<any, any>[]> = ExtractResults<T[number]>;
|
|
101
|
-
/** Used to create a parser tree for backtracking. */
|
|
102
|
-
export type Node = ParserNode | EmptyNode;
|
|
103
|
-
export type ParserNode = {
|
|
104
|
-
parent: Node;
|
|
105
|
-
parser: GeneralParser<any, any> | null;
|
|
106
|
-
input?: string;
|
|
107
|
-
child: Node;
|
|
108
|
-
closed: boolean;
|
|
109
|
-
};
|
|
110
|
-
export type EmptyNode = null;
|
|
111
|
-
/** Convenience function to create a ParserNode. */
|
|
112
|
-
export declare function createNode(parent: Node | null, parser: GeneralParser<any, any>): ParserNode;
|
|
113
|
-
/** Convenience function where, given an array of parsers, it creates a tree we can use for backtracking.
|
|
114
|
-
* This tree is what `seq` use. It's used to keep track of the parsers we've tried so far,
|
|
115
|
-
* so we can backtrack if we need to.
|
|
116
|
-
*/
|
|
117
|
-
export declare function createTree(parsers: readonly GeneralParser<any, any>[]): Node;
|
|
118
99
|
/** Used by `within`. */
|
|
119
100
|
export type Matched<T> = {
|
|
120
101
|
type: "matched";
|
package/dist/types.js
CHANGED
|
@@ -30,28 +30,3 @@ export function captureSuccess(result, rest, captures) {
|
|
|
30
30
|
export function failure(message, rest) {
|
|
31
31
|
return { success: false, message, rest };
|
|
32
32
|
}
|
|
33
|
-
/** Convenience function to create a ParserNode. */
|
|
34
|
-
export function createNode(parent, parser) {
|
|
35
|
-
return {
|
|
36
|
-
parent,
|
|
37
|
-
parser,
|
|
38
|
-
child: null,
|
|
39
|
-
closed: false,
|
|
40
|
-
};
|
|
41
|
-
}
|
|
42
|
-
/** Convenience function where, given an array of parsers, it creates a tree we can use for backtracking.
|
|
43
|
-
* This tree is what `seq` use. It's used to keep track of the parsers we've tried so far,
|
|
44
|
-
* so we can backtrack if we need to.
|
|
45
|
-
*/
|
|
46
|
-
export function createTree(parsers) {
|
|
47
|
-
if (parsers.length === 0) {
|
|
48
|
-
return null;
|
|
49
|
-
}
|
|
50
|
-
const rootNode = createNode(null, parsers[0]);
|
|
51
|
-
let currentNode = rootNode;
|
|
52
|
-
for (let i = 1; i < parsers.length; i++) {
|
|
53
|
-
currentNode.child = createNode(currentNode, parsers[i]);
|
|
54
|
-
currentNode = currentNode.child;
|
|
55
|
-
}
|
|
56
|
-
return rootNode;
|
|
57
|
-
}
|
package/dist/utils.d.ts
CHANGED
|
@@ -1,8 +1,5 @@
|
|
|
1
|
-
import { Node } from "./types.js";
|
|
2
1
|
export declare function escape(str: any): string;
|
|
3
2
|
export declare function merge(a: any | any[], b: any | any[]): any[];
|
|
4
3
|
export declare function mergeCaptures(a: Record<string, any>, b: Record<string, any>): Record<string, any>;
|
|
5
|
-
export declare function findAncestorWithNextParser(node: Node, count?: number): [Node, number];
|
|
6
|
-
export declare function popMany(arr: any[], count: number): void;
|
|
7
4
|
export declare function round(num: number, places?: number): number;
|
|
8
5
|
export declare function shorten(str: string, length?: number): string;
|
package/dist/utils.js
CHANGED
|
@@ -30,22 +30,6 @@ export function mergeCaptures(a, b) {
|
|
|
30
30
|
});
|
|
31
31
|
return result;
|
|
32
32
|
}
|
|
33
|
-
export function findAncestorWithNextParser(node, count = 0) {
|
|
34
|
-
if (node === null)
|
|
35
|
-
return [null, count];
|
|
36
|
-
if (!node.closed) {
|
|
37
|
-
return [node, count];
|
|
38
|
-
}
|
|
39
|
-
if (node.parent) {
|
|
40
|
-
return findAncestorWithNextParser(node.parent, count + 1);
|
|
41
|
-
}
|
|
42
|
-
return [null, count];
|
|
43
|
-
}
|
|
44
|
-
export function popMany(arr, count) {
|
|
45
|
-
for (let i = 0; i < count; i++) {
|
|
46
|
-
arr.pop();
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
33
|
export function round(num, places = 2) {
|
|
50
34
|
return Math.round(num * 10 ** places) / 10 ** places;
|
|
51
35
|
}
|
package/package.json
CHANGED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export {};
|
package/dist/combinators/seq.js
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export {};
|