tarsec 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/combinators.d.ts +0 -15
- package/dist/combinators.js +6 -109
- package/dist/parsers/markdown/inline.js +12 -6
- package/dist/types.d.ts +1 -20
- package/dist/types.js +0 -25
- package/dist/utils.d.ts +0 -3
- package/dist/utils.js +0 -16
- package/package.json +1 -1
- package/dist/combinators/seq.d.ts +0 -1
- package/dist/combinators/seq.js +0 -1
package/README.md
CHANGED
|
@@ -43,7 +43,7 @@ parser("hello there"); // failure
|
|
|
43
43
|
- Derived types: tarsec will generate TypeScript types for your parser
|
|
44
44
|
- [Debug mode](/tutorials/debugging.md) that prints what's happening step-by-step
|
|
45
45
|
- Tools to debug your parser's [performance](/tutorials/performance.md)
|
|
46
|
-
-
|
|
46
|
+
- `peek` / `not` lookahead for [disambiguating grammars](/tutorials/backtracking.md) without backtracking
|
|
47
47
|
- A way to make your parser more [secure](/tutorials/security.md).
|
|
48
48
|
- [Pretty error messages](/tutorials/pretty-errors.md)
|
|
49
49
|
|
package/dist/combinators.d.ts
CHANGED
|
@@ -58,20 +58,6 @@ export declare function many1WithJoin(parser: Parser<string>): Parser<string>;
|
|
|
58
58
|
* const parser = or(capture(digit, "num"), capture(word, "name"));
|
|
59
59
|
* ```
|
|
60
60
|
*
|
|
61
|
-
* `or` supports backtracking by returning a `nextParser`:
|
|
62
|
-
*
|
|
63
|
-
* ```ts
|
|
64
|
-
* const parser = or(str("hello"), str("hello!"));
|
|
65
|
-
*
|
|
66
|
-
* // this will match the first parser
|
|
67
|
-
* const result = parser("hello");
|
|
68
|
-
*
|
|
69
|
-
* // but or returns the untried parsers as a new parser
|
|
70
|
-
* result.nextParser("hello!"); // works
|
|
71
|
-
*
|
|
72
|
-
* // result.nextParser is the same as or(str("hello!"))
|
|
73
|
-
* ```
|
|
74
|
-
*
|
|
75
61
|
* @param parsers - parsers to try
|
|
76
62
|
* @returns - a parser that tries each parser in order. Returns the result of the first parser that succeeds.
|
|
77
63
|
*/
|
|
@@ -357,7 +343,6 @@ export declare function search<T>(parser: Parser<T>): Parser<T[]>;
|
|
|
357
343
|
* Finally, you don't need to use seq at all. You can just hand write the logic.
|
|
358
344
|
* But you'll need to do the error handling
|
|
359
345
|
* and pass the remaining input to the next parser yourself.
|
|
360
|
-
* seq also does some backtracking for you that you will need to do yourself.
|
|
361
346
|
*
|
|
362
347
|
* Also see `seqR` and `seqC` for convenience functions that return the results or captures respectively.
|
|
363
348
|
*
|
package/dist/combinators.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import { within } from "./parsers/within.js";
|
|
2
2
|
import { TarsecError } from "./tarsecError.js";
|
|
3
3
|
import { getDiagnostics, trace } from "./trace.js";
|
|
4
|
-
import { captureSuccess,
|
|
5
|
-
import { escape
|
|
4
|
+
import { captureSuccess, failure, isCaptureResult, isSuccess, success, } from "./types.js";
|
|
5
|
+
import { escape } from "./utils.js";
|
|
6
6
|
/**
|
|
7
7
|
* Takes a parser and runs it zero or more times, returning the results as an array.
|
|
8
8
|
* If the parser is a capture parser, it returns the captures as an array in this form:
|
|
@@ -148,20 +148,6 @@ export function many1WithJoin(parser) {
|
|
|
148
148
|
* const parser = or(capture(digit, "num"), capture(word, "name"));
|
|
149
149
|
* ```
|
|
150
150
|
*
|
|
151
|
-
* `or` supports backtracking by returning a `nextParser`:
|
|
152
|
-
*
|
|
153
|
-
* ```ts
|
|
154
|
-
* const parser = or(str("hello"), str("hello!"));
|
|
155
|
-
*
|
|
156
|
-
* // this will match the first parser
|
|
157
|
-
* const result = parser("hello");
|
|
158
|
-
*
|
|
159
|
-
* // but or returns the untried parsers as a new parser
|
|
160
|
-
* result.nextParser("hello!"); // works
|
|
161
|
-
*
|
|
162
|
-
* // result.nextParser is the same as or(str("hello!"))
|
|
163
|
-
* ```
|
|
164
|
-
*
|
|
165
151
|
* @param parsers - parsers to try
|
|
166
152
|
* @returns - a parser that tries each parser in order. Returns the result of the first parser that succeeds.
|
|
167
153
|
*/
|
|
@@ -170,11 +156,7 @@ export function or(...parsers) {
|
|
|
170
156
|
for (let i = 0; i < parsers.length; i++) {
|
|
171
157
|
let result = parsers[i](input);
|
|
172
158
|
if (result.success) {
|
|
173
|
-
|
|
174
|
-
return result;
|
|
175
|
-
const nextParser = or(...parsers.slice(i + 1));
|
|
176
|
-
/* console.log({ nextParser }, parsers.slice(i + 1)); */
|
|
177
|
-
return Object.assign(Object.assign({}, result), { nextParser });
|
|
159
|
+
return result;
|
|
178
160
|
}
|
|
179
161
|
}
|
|
180
162
|
return failure(`all parsers failed`, input);
|
|
@@ -624,64 +606,6 @@ export function search(parser) {
|
|
|
624
606
|
return success([], input);
|
|
625
607
|
});
|
|
626
608
|
}
|
|
627
|
-
/*
|
|
628
|
-
To add backtracking support requires a fairly big change. Here's an example that needs backtracking.
|
|
629
|
-
|
|
630
|
-
```ts
|
|
631
|
-
const parser = seq([
|
|
632
|
-
str("hello "),
|
|
633
|
-
or(str("world"), str("world!")),
|
|
634
|
-
optional("?")
|
|
635
|
-
], getResults);
|
|
636
|
-
```
|
|
637
|
-
|
|
638
|
-
If we try to parse `"hello world!"`, the first parser in the OR will succeed, but then we'll get stuck at the `optional`. Instead, we need to go back up the tree and try the second parser in the OR. A few things need to happen.
|
|
639
|
-
|
|
640
|
-
1. instead of just processing these parsers sequentially in a for loop, we need to model them as a tree
|
|
641
|
-
2. the OR parser needs to let us know that there are other branches to try.
|
|
642
|
-
|
|
643
|
-
For #2, there's an optional `nextParser` key on a parser success. The or parser can use this to say "a parser succeeded and here's the result, but there are other parsers that could be tried". `nextParser` is a parser that runs the remaining branches. So in this example, the OR would return a success with `nextParser = or(str("world"))`.
|
|
644
|
-
|
|
645
|
-
Next, we need to model this as a tree. Each node in the tree has a parent and child and the parser for that node.
|
|
646
|
-
|
|
647
|
-
```ts
|
|
648
|
-
parent: Node;
|
|
649
|
-
parser: GeneralParser<any, any> | null;
|
|
650
|
-
child: Node;
|
|
651
|
-
```
|
|
652
|
-
|
|
653
|
-
Hopefully that is self-explanatory. We start at the root of the tree, try the parser there, then use `.child` to go to the next node and so on. We don't model multiple paths as multiple children. To keep the code simple, we do something else.
|
|
654
|
-
|
|
655
|
-
Each node also has a `closed` key. Once we've run the parser for a node, we mark it `closed`. Closed means there are no more branches here. UNLESS, the parser returns a `nextParser`. In that case, we *don't* mark it closed because there are still other options to try. In that case, we also *replace* the parser on that node with nextParser.
|
|
656
|
-
|
|
657
|
-
So, going back to the hello world example, let's say we're stuck at the `optional`:
|
|
658
|
-
|
|
659
|
-
```ts
|
|
660
|
-
const parser = seq([
|
|
661
|
-
str("hello "),
|
|
662
|
-
or(str("world"), str("world!")),
|
|
663
|
-
optional("?")
|
|
664
|
-
], getResults);
|
|
665
|
-
```
|
|
666
|
-
|
|
667
|
-
We use `.parent` to go back up the tree. We're looking for a node that isn't closed. If we find one, we start again from there. In this case, we'd find an open node at the or with parser `or(str("world"))`. We can restart from there, but there's a bunch of state to reset.
|
|
668
|
-
|
|
669
|
-
1. From the new `or` parser, we need to go to the optional parser. We're doing it all again in the same order. This is one reason why it's easier to model this without multiple children. Otherwise, all the children would have to point to the next level, the next level would have to point to all the children in the previous level, and you'd have multiple parents, which is awful to deal with.
|
|
670
|
-
|
|
671
|
-
2. We have consumed input and added to the results. We need to undo that. At this point, the input is `!`, because we've consumed `hello world`. And the results array is `["hello ", "world"]`. We need to rewind both of those.
|
|
672
|
-
|
|
673
|
-
To do that, I count how many levels up we've gone to find another branch, and just pop that many elements off the results array. So results is now `["hello "]`. The input is trickier. How would I keep track of what the input was when we were at the OR the last time?
|
|
674
|
-
|
|
675
|
-
This is where the final key on a tree node comes in. Nodes also have an optional `input` key.
|
|
676
|
-
|
|
677
|
-
IF a parser succeeds, and
|
|
678
|
-
IF there's a nextParser,
|
|
679
|
-
We know we may come back to this node. So we save the current input as `.input` on the node.
|
|
680
|
-
|
|
681
|
-
This approach has some issues. Notably, it doesn't work if you need to backtrack at multiple points in the tree. The test `backtracking-deep.test.ts` shows this.
|
|
682
|
-
|
|
683
|
-
The code is also complex and it would be easy to have bugs in this logic. I wish there was a cleaner solution for rewinding state.
|
|
684
|
-
*/
|
|
685
609
|
/**
|
|
686
610
|
* seq takes an array of parsers and runs them sequentially.
|
|
687
611
|
* If any of the parsers fail, seq fails without consuming any input.
|
|
@@ -699,7 +623,6 @@ The code is also complex and it would be easy to have bugs in this logic. I wish
|
|
|
699
623
|
* Finally, you don't need to use seq at all. You can just hand write the logic.
|
|
700
624
|
* But you'll need to do the error handling
|
|
701
625
|
* and pass the remaining input to the next parser yourself.
|
|
702
|
-
* seq also does some backtracking for you that you will need to do yourself.
|
|
703
626
|
*
|
|
704
627
|
* Also see `seqR` and `seqC` for convenience functions that return the results or captures respectively.
|
|
705
628
|
*
|
|
@@ -713,44 +636,18 @@ export function seq(parsers, transform, debugName = "") {
|
|
|
713
636
|
const results = [];
|
|
714
637
|
let rest = input;
|
|
715
638
|
const captures = {};
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
while (current) {
|
|
719
|
-
const parser = current.parser;
|
|
720
|
-
if (!parser) {
|
|
721
|
-
console.log({ current, parser, results, captures });
|
|
722
|
-
throw new Error("parser is null");
|
|
723
|
-
}
|
|
724
|
-
const parsed = parser(rest);
|
|
725
|
-
current.closed = true;
|
|
726
|
-
/* console.log({ parsed }); */
|
|
639
|
+
for (let i = 0; i < parsers.length; i++) {
|
|
640
|
+
const parsed = parsers[i](rest);
|
|
727
641
|
if (!parsed.success) {
|
|
728
|
-
|
|
729
|
-
if (ancestor) {
|
|
730
|
-
current = ancestor;
|
|
731
|
-
rest = ancestor.input;
|
|
732
|
-
popMany(results, count);
|
|
733
|
-
continue;
|
|
734
|
-
}
|
|
735
|
-
else {
|
|
736
|
-
// don't consume input if we're failing
|
|
737
|
-
return Object.assign(Object.assign({}, parsed), { rest: input });
|
|
738
|
-
}
|
|
642
|
+
return Object.assign(Object.assign({}, parsed), { rest: input });
|
|
739
643
|
}
|
|
740
644
|
results.push(parsed.result);
|
|
741
|
-
if (parsed.nextParser) {
|
|
742
|
-
/* console.log("setting next parser", parsed.nextParser); */
|
|
743
|
-
current.parser = parsed.nextParser;
|
|
744
|
-
current.input = rest;
|
|
745
|
-
current.closed = false;
|
|
746
|
-
}
|
|
747
645
|
rest = parsed.rest;
|
|
748
646
|
if (isCaptureResult(parsed)) {
|
|
749
647
|
for (const key in parsed.captures) {
|
|
750
648
|
captures[key] = parsed.captures[key];
|
|
751
649
|
}
|
|
752
650
|
}
|
|
753
|
-
current = current.child;
|
|
754
651
|
}
|
|
755
652
|
const result = transform(results, captures);
|
|
756
653
|
return success(result, rest);
|
|
@@ -2,11 +2,17 @@ import { seqC, seqR, capture, captureCaptures, or, not, map, many, many1, many1T
|
|
|
2
2
|
import { str, char, eof, set, oneOf, alphanum, noneOf, digit, letter, anyChar } from "../../parsers.js";
|
|
3
3
|
import { success, failure } from "../../types.js";
|
|
4
4
|
import { optional, between } from "../../combinators.js";
|
|
5
|
-
//
|
|
6
|
-
//
|
|
7
|
-
//
|
|
8
|
-
//
|
|
9
|
-
|
|
5
|
+
// The "two-or-more trailing spaces then `\n`" half of a hard break. Shared
|
|
6
|
+
// between `hardBreakParser` (which emits the hard-break node) and
|
|
7
|
+
// `inlineTextStop` (which uses it to know where to stop). Keeping these in
|
|
8
|
+
// sync matters: if the run pattern here disagreed with what `hardBreakParser`
|
|
9
|
+
// accepts, inline-text could either swallow a real hard break or — as the
|
|
10
|
+
// previous `str(" ")` did — stop on any incidental double-space (e.g. a
|
|
11
|
+
// 2-space line indent inside a list-item continuation) and freeze the
|
|
12
|
+
// surrounding paragraph at zero progress.
|
|
13
|
+
const hardBreakSpaces = seqR(str(" "), many(char(" ")), char("\n"));
|
|
14
|
+
// `]` is included so inline-text inside a link-text (`[...]`) ends at the `]`.
|
|
15
|
+
const inlineTextStop = or(oneOf("*_`[]!<~\\&\n"), hardBreakSpaces);
|
|
10
16
|
export const inlineTextParser = map(many1Till(inlineTextStop), (content) => ({ type: "inline-text", content }));
|
|
11
17
|
/**
|
|
12
18
|
* Run `inlineMarkdownParser` repeatedly until `stop` would match at the
|
|
@@ -219,7 +225,7 @@ export const imageParser = map(seqC(str(", "alt"
|
|
|
219
225
|
});
|
|
220
226
|
export const hardBreakParser = map(or(
|
|
221
227
|
// two-or-more trailing spaces then newline
|
|
222
|
-
|
|
228
|
+
hardBreakSpaces,
|
|
223
229
|
// backslash then newline
|
|
224
230
|
seqR(char("\\"), char("\n"))), () => ({ type: "inline-hard-break" }));
|
|
225
231
|
/** A single `\n` that is *not* part of a blank line (which would terminate the
|
package/dist/types.d.ts
CHANGED
|
@@ -5,15 +5,13 @@ export type ParserSuccess<T> = {
|
|
|
5
5
|
success: true;
|
|
6
6
|
result: T;
|
|
7
7
|
rest: string;
|
|
8
|
-
nextParser?: Parser<any>;
|
|
9
8
|
};
|
|
10
|
-
/** Represents a parse success with captures.
|
|
9
|
+
/** Represents a parse success with captures. */
|
|
11
10
|
export type CaptureParserSuccess<T, C extends PlainObject> = {
|
|
12
11
|
success: true;
|
|
13
12
|
result: T;
|
|
14
13
|
rest: string;
|
|
15
14
|
captures: C;
|
|
16
|
-
nextParser?: CaptureParser<any, any>;
|
|
17
15
|
};
|
|
18
16
|
/** Represents a parse failure. */
|
|
19
17
|
export type ParserFailure = {
|
|
@@ -98,23 +96,6 @@ export type InferManyReturnType<T extends GeneralParser<any, any>> = T extends C
|
|
|
98
96
|
captures: C[];
|
|
99
97
|
}> : T extends Parser<infer R> ? Parser<R[]> : never;
|
|
100
98
|
export type MergedResults<T extends readonly GeneralParser<any, any>[]> = ExtractResults<T[number]>;
|
|
101
|
-
/** Used to create a parser tree for backtracking. */
|
|
102
|
-
export type Node = ParserNode | EmptyNode;
|
|
103
|
-
export type ParserNode = {
|
|
104
|
-
parent: Node;
|
|
105
|
-
parser: GeneralParser<any, any> | null;
|
|
106
|
-
input?: string;
|
|
107
|
-
child: Node;
|
|
108
|
-
closed: boolean;
|
|
109
|
-
};
|
|
110
|
-
export type EmptyNode = null;
|
|
111
|
-
/** Convenience function to create a ParserNode. */
|
|
112
|
-
export declare function createNode(parent: Node | null, parser: GeneralParser<any, any>): ParserNode;
|
|
113
|
-
/** Convenience function where, given an array of parsers, it creates a tree we can use for backtracking.
|
|
114
|
-
* This tree is what `seq` use. It's used to keep track of the parsers we've tried so far,
|
|
115
|
-
* so we can backtrack if we need to.
|
|
116
|
-
*/
|
|
117
|
-
export declare function createTree(parsers: readonly GeneralParser<any, any>[]): Node;
|
|
118
99
|
/** Used by `within`. */
|
|
119
100
|
export type Matched<T> = {
|
|
120
101
|
type: "matched";
|
package/dist/types.js
CHANGED
|
@@ -30,28 +30,3 @@ export function captureSuccess(result, rest, captures) {
|
|
|
30
30
|
export function failure(message, rest) {
|
|
31
31
|
return { success: false, message, rest };
|
|
32
32
|
}
|
|
33
|
-
/** Convenience function to create a ParserNode. */
|
|
34
|
-
export function createNode(parent, parser) {
|
|
35
|
-
return {
|
|
36
|
-
parent,
|
|
37
|
-
parser,
|
|
38
|
-
child: null,
|
|
39
|
-
closed: false,
|
|
40
|
-
};
|
|
41
|
-
}
|
|
42
|
-
/** Convenience function where, given an array of parsers, it creates a tree we can use for backtracking.
|
|
43
|
-
* This tree is what `seq` use. It's used to keep track of the parsers we've tried so far,
|
|
44
|
-
* so we can backtrack if we need to.
|
|
45
|
-
*/
|
|
46
|
-
export function createTree(parsers) {
|
|
47
|
-
if (parsers.length === 0) {
|
|
48
|
-
return null;
|
|
49
|
-
}
|
|
50
|
-
const rootNode = createNode(null, parsers[0]);
|
|
51
|
-
let currentNode = rootNode;
|
|
52
|
-
for (let i = 1; i < parsers.length; i++) {
|
|
53
|
-
currentNode.child = createNode(currentNode, parsers[i]);
|
|
54
|
-
currentNode = currentNode.child;
|
|
55
|
-
}
|
|
56
|
-
return rootNode;
|
|
57
|
-
}
|
package/dist/utils.d.ts
CHANGED
|
@@ -1,8 +1,5 @@
|
|
|
1
|
-
import { Node } from "./types.js";
|
|
2
1
|
export declare function escape(str: any): string;
|
|
3
2
|
export declare function merge(a: any | any[], b: any | any[]): any[];
|
|
4
3
|
export declare function mergeCaptures(a: Record<string, any>, b: Record<string, any>): Record<string, any>;
|
|
5
|
-
export declare function findAncestorWithNextParser(node: Node, count?: number): [Node, number];
|
|
6
|
-
export declare function popMany(arr: any[], count: number): void;
|
|
7
4
|
export declare function round(num: number, places?: number): number;
|
|
8
5
|
export declare function shorten(str: string, length?: number): string;
|
package/dist/utils.js
CHANGED
|
@@ -30,22 +30,6 @@ export function mergeCaptures(a, b) {
|
|
|
30
30
|
});
|
|
31
31
|
return result;
|
|
32
32
|
}
|
|
33
|
-
export function findAncestorWithNextParser(node, count = 0) {
|
|
34
|
-
if (node === null)
|
|
35
|
-
return [null, count];
|
|
36
|
-
if (!node.closed) {
|
|
37
|
-
return [node, count];
|
|
38
|
-
}
|
|
39
|
-
if (node.parent) {
|
|
40
|
-
return findAncestorWithNextParser(node.parent, count + 1);
|
|
41
|
-
}
|
|
42
|
-
return [null, count];
|
|
43
|
-
}
|
|
44
|
-
export function popMany(arr, count) {
|
|
45
|
-
for (let i = 0; i < count; i++) {
|
|
46
|
-
arr.pop();
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
33
|
export function round(num, places = 2) {
|
|
50
34
|
return Math.round(num * 10 ** places) / 10 ** places;
|
|
51
35
|
}
|
package/package.json
CHANGED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export {};
|
package/dist/combinators/seq.js
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export {};
|