tarsec 0.3.2 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -43,7 +43,7 @@ parser("hello there"); // failure
43
43
  - Derived types: tarsec will generate TypeScript types for your parser
44
44
  - [Debug mode](/tutorials/debugging.md) that prints what's happening step-by-step
45
45
  - Tools to debug your parser's [performance](/tutorials/performance.md)
46
- - Partial [backtracking](/tutorials/backtracking.md) support
46
+ - `peek` / `not` lookahead for [disambiguating grammars](/tutorials/backtracking.md) without backtracking
47
47
  - A way to make your parser more [secure](/tutorials/security.md).
48
48
  - [Pretty error messages](/tutorials/pretty-errors.md)
49
49
 
@@ -58,20 +58,6 @@ export declare function many1WithJoin(parser: Parser<string>): Parser<string>;
58
58
  * const parser = or(capture(digit, "num"), capture(word, "name"));
59
59
  * ```
60
60
  *
61
- * `or` supports backtracking by returning a `nextParser`:
62
- *
63
- * ```ts
64
- * const parser = or(str("hello"), str("hello!"));
65
- *
66
- * // this will match the first parser
67
- * const result = parser("hello");
68
- *
69
- * // but or returns the untried parsers as a new parser
70
- * result.nextParser("hello!"); // works
71
- *
72
- * // result.nextParser is the same as or(str("hello!"))
73
- * ```
74
- *
75
61
  * @param parsers - parsers to try
76
62
  * @returns - a parser that tries each parser in order. Returns the result of the first parser that succeeds.
77
63
  */
@@ -96,6 +82,28 @@ export declare function optional<T>(parser: Parser<T>): Parser<T | null>;
96
82
  * If it succeeds, returns a failure.
97
83
  */
98
84
  export declare function not(parser: Parser<any>): Parser<null>;
85
+ /**
86
+ * Positive lookahead. Runs the given parser without consuming any input.
87
+ * On success, returns the parser's result with `rest` set to the original input.
88
+ * On failure, returns the underlying failure (also with `rest` reset to the original input).
89
+ *
90
+ * Useful for disambiguating alternatives without backtracking:
91
+ *
92
+ * ```ts
93
+ * const parser = or(
94
+ * seqR(peek(str("hello!")), str("hello!")),
95
+ * str("hello"),
96
+ * );
97
+ * ```
98
+ *
99
+ * The `peek` decides which branch to commit to; the real parser then consumes.
100
+ * Captures are preserved when the inner parser is a `CaptureParser`.
101
+ *
102
+ * @param parser - parser to look ahead with
103
+ * @returns - a parser that runs the given parser without consuming input
104
+ */
105
+ export declare function peek<T>(parser: Parser<T>): Parser<T>;
106
+ export declare function peek<T, C extends PlainObject>(parser: CaptureParser<T, C>): CaptureParser<T, C>;
99
107
  /**
100
108
  * Takes three parsers, `open`, `close`, and `parser`.
101
109
  * `between` matches multiple instances of `parser`,
@@ -357,7 +365,6 @@ export declare function search<T>(parser: Parser<T>): Parser<T[]>;
357
365
  * Finally, you don't need to use seq at all. You can just hand write the logic.
358
366
  * But you'll need to do the error handling
359
367
  * and pass the remaining input to the next parser yourself.
360
- * seq also does some backtracking for you that you will need to do yourself.
361
368
  *
362
369
  * Also see `seqR` and `seqC` for convenience functions that return the results or captures respectively.
363
370
  *
@@ -490,3 +497,21 @@ export type OperatorInfo<T> = {
490
497
  * @returns - a parser that handles the full expression grammar
491
498
  */
492
499
  export declare function buildExpressionParser<T>(atom: Parser<T>, operatorTable: OperatorInfo<T>[][], parenParser?: Parser<T>): Parser<T>;
500
+ /**
501
+ * Wraps a parser with a per-input cache. Useful for parsers that may be invoked
502
+ * many times at the same position (e.g. recursive grammars where the same
503
+ * sub-parser is consulted from multiple alternatives).
504
+ *
505
+ * Both successes and failures are cached. The cache is keyed by the input
506
+ * string the parser is called with — because the rest passed between parsers
507
+ * is value-equal across paths, identical sub-parses share a cache entry.
508
+ *
509
+ * `memo` assumes its wrapped parser is a pure function of its input. Don't
510
+ * memoize parsers that consult mutable external state.
511
+ *
512
+ * @param parser - parser to memoize
513
+ * @param name - optional debug name (shown in `parserDebug` counts/times as `memo(name)`)
514
+ * @returns - memoized parser
515
+ */
516
+ export declare function memo<T>(parser: Parser<T>, name?: string): Parser<T>;
517
+ export declare function memo<T, C extends PlainObject>(parser: CaptureParser<T, C>, name?: string): CaptureParser<T, C>;
@@ -1,8 +1,8 @@
1
1
  import { within } from "./parsers/within.js";
2
2
  import { TarsecError } from "./tarsecError.js";
3
3
  import { getDiagnostics, trace } from "./trace.js";
4
- import { captureSuccess, createTree, failure, isCaptureResult, isSuccess, success, } from "./types.js";
5
- import { escape, findAncestorWithNextParser, popMany } from "./utils.js";
4
+ import { captureSuccess, failure, isCaptureResult, isSuccess, success, } from "./types.js";
5
+ import { escape } from "./utils.js";
6
6
  /**
7
7
  * Takes a parser and runs it zero or more times, returning the results as an array.
8
8
  * If the parser is a capture parser, it returns the captures as an array in this form:
@@ -148,20 +148,6 @@ export function many1WithJoin(parser) {
148
148
  * const parser = or(capture(digit, "num"), capture(word, "name"));
149
149
  * ```
150
150
  *
151
- * `or` supports backtracking by returning a `nextParser`:
152
- *
153
- * ```ts
154
- * const parser = or(str("hello"), str("hello!"));
155
- *
156
- * // this will match the first parser
157
- * const result = parser("hello");
158
- *
159
- * // but or returns the untried parsers as a new parser
160
- * result.nextParser("hello!"); // works
161
- *
162
- * // result.nextParser is the same as or(str("hello!"))
163
- * ```
164
- *
165
151
  * @param parsers - parsers to try
166
152
  * @returns - a parser that tries each parser in order. Returns the result of the first parser that succeeds.
167
153
  */
@@ -170,11 +156,7 @@ export function or(...parsers) {
170
156
  for (let i = 0; i < parsers.length; i++) {
171
157
  let result = parsers[i](input);
172
158
  if (result.success) {
173
- if (i === parsers.length - 1)
174
- return result;
175
- const nextParser = or(...parsers.slice(i + 1));
176
- /* console.log({ nextParser }, parsers.slice(i + 1)); */
177
- return Object.assign(Object.assign({}, result), { nextParser });
159
+ return result;
178
160
  }
179
161
  }
180
162
  return failure(`all parsers failed`, input);
@@ -220,6 +202,15 @@ export function not(parser) {
220
202
  return success(null, input);
221
203
  });
222
204
  }
205
+ export function peek(parser) {
206
+ return trace("peek", (input) => {
207
+ const result = parser(input);
208
+ if (!result.success) {
209
+ return Object.assign(Object.assign({}, result), { rest: input });
210
+ }
211
+ return Object.assign(Object.assign({}, result), { rest: input });
212
+ });
213
+ }
223
214
  /**
224
215
  * Takes three parsers, `open`, `close`, and `parser`.
225
216
  * `between` matches multiple instances of `parser`,
@@ -624,64 +615,6 @@ export function search(parser) {
624
615
  return success([], input);
625
616
  });
626
617
  }
627
- /*
628
- To add backtracking support requires a fairly big change. Here's an example that needs backtracking.
629
-
630
- ```ts
631
- const parser = seq([
632
- str("hello "),
633
- or(str("world"), str("world!")),
634
- optional("?")
635
- ], getResults);
636
- ```
637
-
638
- If we try to parse `"hello world!"`, the first parser in the OR will succeed, but then we'll get stuck at the `optional`. Instead, we need to go back up the tree and try the second parser in the OR. A few things need to happen.
639
-
640
- 1. instead of just processing these parsers sequentially in a for loop, we need to model them as a tree
641
- 2. the OR parser needs to let us know that there are other branches to try.
642
-
643
- For #2, there's an optional `nextParser` key on a parser success. The or parser can use this to say "a parser succeeded and here's the result, but there are other parsers that could be tried". `nextParser` is a parser that runs the remaining branches. So in this example, the OR would return a success with `nextParser = or(str("world"))`.
644
-
645
- Next, we need to model this as a tree. Each node in the tree has a parent and child and the parser for that node.
646
-
647
- ```ts
648
- parent: Node;
649
- parser: GeneralParser<any, any> | null;
650
- child: Node;
651
- ```
652
-
653
- Hopefully that is self-explanatory. We start at the root of the tree, try the parser there, then use `.child` to go to the next node and so on. We don't model multiple paths as multiple children. To keep the code simple, we do something else.
654
-
655
- Each node also has a `closed` key. Once we've run the parser for a node, we mark it `closed`. Closed means there are no more branches here. UNLESS, the parser returns a `nextParser`. In that case, we *don't* mark it closed because there are still other options to try. In that case, we also *replace* the parser on that node with nextParser.
656
-
657
- So, going back to the hello world example, let's say we're stuck at the `optional`:
658
-
659
- ```ts
660
- const parser = seq([
661
- str("hello "),
662
- or(str("world"), str("world!")),
663
- optional("?")
664
- ], getResults);
665
- ```
666
-
667
- We use `.parent` to go back up the tree. We're looking for a node that isn't closed. If we find one, we start again from there. In this case, we'd find an open node at the or with parser `or(str("world"))`. We can restart from there, but there's a bunch of state to reset.
668
-
669
- 1. From the new `or` parser, we need to go to the optional parser. We're doing it all again in the same order. This is one reason why it's easier to model this without multiple children. Otherwise, all the children would have to point to the next level, the next level would have to point to all the children in the previous level, and you'd have multiple parents, which is awful to deal with.
670
-
671
- 2. We have consumed input and added to the results. We need to undo that. At this point, the input is `!`, because we've consumed `hello world`. And the results array is `["hello ", "world"]`. We need to rewind both of those.
672
-
673
- To do that, I count how many levels up we've gone to find another branch, and just pop that many elements off the results array. So results is now `["hello "]`. The input is trickier. How would I keep track of what the input was when we were at the OR the last time?
674
-
675
- This is where the final key on a tree node comes in. Nodes also have an optional `input` key.
676
-
677
- IF a parser succeeds, and
678
- IF there's a nextParser,
679
- We know we may come back to this node. So we save the current input as `.input` on the node.
680
-
681
- This approach has some issues. Notably, it doesn't work if you need to backtrack at multiple points in the tree. The test `backtracking-deep.test.ts` shows this.
682
-
683
- The code is also complex and it would be easy to have bugs in this logic. I wish there was a cleaner solution for rewinding state.
684
- */
685
618
  /**
686
619
  * seq takes an array of parsers and runs them sequentially.
687
620
  * If any of the parsers fail, seq fails without consuming any input.
@@ -699,7 +632,6 @@ The code is also complex and it would be easy to have bugs in this logic. I wish
699
632
  * Finally, you don't need to use seq at all. You can just hand write the logic.
700
633
  * But you'll need to do the error handling
701
634
  * and pass the remaining input to the next parser yourself.
702
- * seq also does some backtracking for you that you will need to do yourself.
703
635
  *
704
636
  * Also see `seqR` and `seqC` for convenience functions that return the results or captures respectively.
705
637
  *
@@ -713,44 +645,18 @@ export function seq(parsers, transform, debugName = "") {
713
645
  const results = [];
714
646
  let rest = input;
715
647
  const captures = {};
716
- const rootNode = createTree(parsers);
717
- let current = rootNode;
718
- while (current) {
719
- const parser = current.parser;
720
- if (!parser) {
721
- console.log({ current, parser, results, captures });
722
- throw new Error("parser is null");
723
- }
724
- const parsed = parser(rest);
725
- current.closed = true;
726
- /* console.log({ parsed }); */
648
+ for (let i = 0; i < parsers.length; i++) {
649
+ const parsed = parsers[i](rest);
727
650
  if (!parsed.success) {
728
- const [ancestor, count] = findAncestorWithNextParser(current);
729
- if (ancestor) {
730
- current = ancestor;
731
- rest = ancestor.input;
732
- popMany(results, count);
733
- continue;
734
- }
735
- else {
736
- // don't consume input if we're failing
737
- return Object.assign(Object.assign({}, parsed), { rest: input });
738
- }
651
+ return Object.assign(Object.assign({}, parsed), { rest: input });
739
652
  }
740
653
  results.push(parsed.result);
741
- if (parsed.nextParser) {
742
- /* console.log("setting next parser", parsed.nextParser); */
743
- current.parser = parsed.nextParser;
744
- current.input = rest;
745
- current.closed = false;
746
- }
747
654
  rest = parsed.rest;
748
655
  if (isCaptureResult(parsed)) {
749
656
  for (const key in parsed.captures) {
750
657
  captures[key] = parsed.captures[key];
751
658
  }
752
659
  }
753
- current = current.child;
754
660
  }
755
661
  const result = transform(results, captures);
756
662
  return success(result, rest);
@@ -1026,3 +932,20 @@ function tryOps(ops, input) {
1026
932
  }
1027
933
  return null;
1028
934
  }
935
+ const DEFAULT_MEMO_LIMIT = 10000;
936
+ export function memo(parser, name) {
937
+ const cache = new Map();
938
+ return trace(name ? `memo(${name})` : "memo", (input) => {
939
+ const hit = cache.get(input);
940
+ if (hit !== undefined)
941
+ return hit;
942
+ const result = parser(input);
943
+ if (cache.size >= DEFAULT_MEMO_LIMIT) {
944
+ const oldest = cache.keys().next().value;
945
+ if (oldest !== undefined)
946
+ cache.delete(oldest);
947
+ }
948
+ cache.set(input, result);
949
+ return result;
950
+ });
951
+ }
package/dist/types.d.ts CHANGED
@@ -5,15 +5,13 @@ export type ParserSuccess<T> = {
5
5
  success: true;
6
6
  result: T;
7
7
  rest: string;
8
- nextParser?: Parser<any>;
9
8
  };
10
- /** Represents a parse success with captures. Notice nextParser is also a CaptureParser. */
9
+ /** Represents a parse success with captures. */
11
10
  export type CaptureParserSuccess<T, C extends PlainObject> = {
12
11
  success: true;
13
12
  result: T;
14
13
  rest: string;
15
14
  captures: C;
16
- nextParser?: CaptureParser<any, any>;
17
15
  };
18
16
  /** Represents a parse failure. */
19
17
  export type ParserFailure = {
@@ -98,23 +96,6 @@ export type InferManyReturnType<T extends GeneralParser<any, any>> = T extends C
98
96
  captures: C[];
99
97
  }> : T extends Parser<infer R> ? Parser<R[]> : never;
100
98
  export type MergedResults<T extends readonly GeneralParser<any, any>[]> = ExtractResults<T[number]>;
101
- /** Used to create a parser tree for backtracking. */
102
- export type Node = ParserNode | EmptyNode;
103
- export type ParserNode = {
104
- parent: Node;
105
- parser: GeneralParser<any, any> | null;
106
- input?: string;
107
- child: Node;
108
- closed: boolean;
109
- };
110
- export type EmptyNode = null;
111
- /** Convenience function to create a ParserNode. */
112
- export declare function createNode(parent: Node | null, parser: GeneralParser<any, any>): ParserNode;
113
- /** Convenience function where, given an array of parsers, it creates a tree we can use for backtracking.
114
- * This tree is what `seq` use. It's used to keep track of the parsers we've tried so far,
115
- * so we can backtrack if we need to.
116
- */
117
- export declare function createTree(parsers: readonly GeneralParser<any, any>[]): Node;
118
99
  /** Used by `within`. */
119
100
  export type Matched<T> = {
120
101
  type: "matched";
package/dist/types.js CHANGED
@@ -30,28 +30,3 @@ export function captureSuccess(result, rest, captures) {
30
30
  export function failure(message, rest) {
31
31
  return { success: false, message, rest };
32
32
  }
33
- /** Convenience function to create a ParserNode. */
34
- export function createNode(parent, parser) {
35
- return {
36
- parent,
37
- parser,
38
- child: null,
39
- closed: false,
40
- };
41
- }
42
- /** Convenience function where, given an array of parsers, it creates a tree we can use for backtracking.
43
- * This tree is what `seq` use. It's used to keep track of the parsers we've tried so far,
44
- * so we can backtrack if we need to.
45
- */
46
- export function createTree(parsers) {
47
- if (parsers.length === 0) {
48
- return null;
49
- }
50
- const rootNode = createNode(null, parsers[0]);
51
- let currentNode = rootNode;
52
- for (let i = 1; i < parsers.length; i++) {
53
- currentNode.child = createNode(currentNode, parsers[i]);
54
- currentNode = currentNode.child;
55
- }
56
- return rootNode;
57
- }
package/dist/utils.d.ts CHANGED
@@ -1,8 +1,5 @@
1
- import { Node } from "./types.js";
2
1
  export declare function escape(str: any): string;
3
2
  export declare function merge(a: any | any[], b: any | any[]): any[];
4
3
  export declare function mergeCaptures(a: Record<string, any>, b: Record<string, any>): Record<string, any>;
5
- export declare function findAncestorWithNextParser(node: Node, count?: number): [Node, number];
6
- export declare function popMany(arr: any[], count: number): void;
7
4
  export declare function round(num: number, places?: number): number;
8
5
  export declare function shorten(str: string, length?: number): string;
package/dist/utils.js CHANGED
@@ -30,22 +30,6 @@ export function mergeCaptures(a, b) {
30
30
  });
31
31
  return result;
32
32
  }
33
- export function findAncestorWithNextParser(node, count = 0) {
34
- if (node === null)
35
- return [null, count];
36
- if (!node.closed) {
37
- return [node, count];
38
- }
39
- if (node.parent) {
40
- return findAncestorWithNextParser(node.parent, count + 1);
41
- }
42
- return [null, count];
43
- }
44
- export function popMany(arr, count) {
45
- for (let i = 0; i < count; i++) {
46
- arr.pop();
47
- }
48
- }
49
33
  export function round(num, places = 2) {
50
34
  return Math.round(num * 10 ** places) / 10 ** places;
51
35
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tarsec",
3
- "version": "0.3.2",
3
+ "version": "0.4.1",
4
4
  "description": "A parser combinator library for TypeScript, inspired by Parsec.",
5
5
  "homepage": "https://github.com/egonSchiele/tarsec",
6
6
  "scripts": {
@@ -1 +0,0 @@
1
- export {};
@@ -1 +0,0 @@
1
- export {};