tarsec 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -43,7 +43,7 @@ parser("hello there"); // failure
43
43
  - Derived types: tarsec will generate TypeScript types for your parser
44
44
  - [Debug mode](/tutorials/debugging.md) that prints what's happening step-by-step
45
45
  - Tools to debug your parser's [performance](/tutorials/performance.md)
46
- - Partial [backtracking](/tutorials/backtracking.md) support
46
+ - `peek` / `not` lookahead for [disambiguating grammars](/tutorials/backtracking.md) without backtracking
47
47
  - A way to make your parser more [secure](/tutorials/security.md).
48
48
  - [Pretty error messages](/tutorials/pretty-errors.md)
49
49
 
@@ -58,20 +58,6 @@ export declare function many1WithJoin(parser: Parser<string>): Parser<string>;
58
58
  * const parser = or(capture(digit, "num"), capture(word, "name"));
59
59
  * ```
60
60
  *
61
- * `or` supports backtracking by returning a `nextParser`:
62
- *
63
- * ```ts
64
- * const parser = or(str("hello"), str("hello!"));
65
- *
66
- * // this will match the first parser
67
- * const result = parser("hello");
68
- *
69
- * // but or returns the untried parsers as a new parser
70
- * result.nextParser("hello!"); // works
71
- *
72
- * // result.nextParser is the same as or(str("hello!"))
73
- * ```
74
- *
75
61
  * @param parsers - parsers to try
76
62
  * @returns - a parser that tries each parser in order. Returns the result of the first parser that succeeds.
77
63
  */
@@ -357,7 +343,6 @@ export declare function search<T>(parser: Parser<T>): Parser<T[]>;
357
343
  * Finally, you don't need to use seq at all. You can just hand write the logic.
358
344
  * But you'll need to do the error handling
359
345
  * and pass the remaining input to the next parser yourself.
360
- * seq also does some backtracking for you that you will need to do yourself.
361
346
  *
362
347
  * Also see `seqR` and `seqC` for convenience functions that return the results or captures respectively.
363
348
  *
@@ -1,8 +1,8 @@
1
1
  import { within } from "./parsers/within.js";
2
2
  import { TarsecError } from "./tarsecError.js";
3
3
  import { getDiagnostics, trace } from "./trace.js";
4
- import { captureSuccess, createTree, failure, isCaptureResult, isSuccess, success, } from "./types.js";
5
- import { escape, findAncestorWithNextParser, popMany } from "./utils.js";
4
+ import { captureSuccess, failure, isCaptureResult, isSuccess, success, } from "./types.js";
5
+ import { escape } from "./utils.js";
6
6
  /**
7
7
  * Takes a parser and runs it zero or more times, returning the results as an array.
8
8
  * If the parser is a capture parser, it returns the captures as an array in this form:
@@ -148,20 +148,6 @@ export function many1WithJoin(parser) {
148
148
  * const parser = or(capture(digit, "num"), capture(word, "name"));
149
149
  * ```
150
150
  *
151
- * `or` supports backtracking by returning a `nextParser`:
152
- *
153
- * ```ts
154
- * const parser = or(str("hello"), str("hello!"));
155
- *
156
- * // this will match the first parser
157
- * const result = parser("hello");
158
- *
159
- * // but or returns the untried parsers as a new parser
160
- * result.nextParser("hello!"); // works
161
- *
162
- * // result.nextParser is the same as or(str("hello!"))
163
- * ```
164
- *
165
151
  * @param parsers - parsers to try
166
152
  * @returns - a parser that tries each parser in order. Returns the result of the first parser that succeeds.
167
153
  */
@@ -170,11 +156,7 @@ export function or(...parsers) {
170
156
  for (let i = 0; i < parsers.length; i++) {
171
157
  let result = parsers[i](input);
172
158
  if (result.success) {
173
- if (i === parsers.length - 1)
174
- return result;
175
- const nextParser = or(...parsers.slice(i + 1));
176
- /* console.log({ nextParser }, parsers.slice(i + 1)); */
177
- return Object.assign(Object.assign({}, result), { nextParser });
159
+ return result;
178
160
  }
179
161
  }
180
162
  return failure(`all parsers failed`, input);
@@ -624,64 +606,6 @@ export function search(parser) {
624
606
  return success([], input);
625
607
  });
626
608
  }
627
- /*
628
- To add backtracking support requires a fairly big change. Here's an example that needs backtracking.
629
-
630
- ```ts
631
- const parser = seq([
632
- str("hello "),
633
- or(str("world"), str("world!")),
634
- optional("?")
635
- ], getResults);
636
- ```
637
-
638
- If we try to parse `"hello world!"`, the first parser in the OR will succeed, but then we'll get stuck at the `optional`. Instead, we need to go back up the tree and try the second parser in the OR. A few things need to happen.
639
-
640
- 1. instead of just processing these parsers sequentially in a for loop, we need to model them as a tree
641
- 2. the OR parser needs to let us know that there are other branches to try.
642
-
643
- For #2, there's an optional `nextParser` key on a parser success. The or parser can use this to say "a parser succeeded and here's the result, but there are other parsers that could be tried". `nextParser` is a parser that runs the remaining branches. So in this example, the OR would return a success with `nextParser = or(str("world"))`.
644
-
645
- Next, we need to model this as a tree. Each node in the tree has a parent and child and the parser for that node.
646
-
647
- ```ts
648
- parent: Node;
649
- parser: GeneralParser<any, any> | null;
650
- child: Node;
651
- ```
652
-
653
- Hopefully that is self-explanatory. We start at the root of the tree, try the parser there, then use `.child` to go to the next node and so on. We don't model multiple paths as multiple children. To keep the code simple, we do something else.
654
-
655
- Each node also has a `closed` key. Once we've run the parser for a node, we mark it `closed`. Closed means there are no more branches here. UNLESS, the parser returns a `nextParser`. In that case, we *don't* mark it closed because there are still other options to try. In that case, we also *replace* the parser on that node with nextParser.
656
-
657
- So, going back to the hello world example, let's say we're stuck at the `optional`:
658
-
659
- ```ts
660
- const parser = seq([
661
- str("hello "),
662
- or(str("world"), str("world!")),
663
- optional("?")
664
- ], getResults);
665
- ```
666
-
667
- We use `.parent` to go back up the tree. We're looking for a node that isn't closed. If we find one, we start again from there. In this case, we'd find an open node at the or with parser `or(str("world"))`. We can restart from there, but there's a bunch of state to reset.
668
-
669
- 1. From the new `or` parser, we need to go to the optional parser. We're doing it all again in the same order. This is one reason why it's easier to model this without multiple children. Otherwise, all the children would have to point to the next level, the next level would have to point to all the children in the previous level, and you'd have multiple parents, which is awful to deal with.
670
-
671
- 2. We have consumed input and added to the results. We need to undo that. At this point, the input is `!`, because we've consumed `hello world`. And the results array is `["hello ", "world"]`. We need to rewind both of those.
672
-
673
- To do that, I count how many levels up we've gone to find another branch, and just pop that many elements off the results array. So results is now `["hello "]`. The input is trickier. How would I keep track of what the input was when we were at the OR the last time?
674
-
675
- This is where the final key on a tree node comes in. Nodes also have an optional `input` key.
676
-
677
- IF a parser succeeds, and
678
- IF there's a nextParser,
679
- We know we may come back to this node. So we save the current input as `.input` on the node.
680
-
681
- This approach has some issues. Notably, it doesn't work if you need to backtrack at multiple points in the tree. The test `backtracking-deep.test.ts` shows this.
682
-
683
- The code is also complex and it would be easy to have bugs in this logic. I wish there was a cleaner solution for rewinding state.
684
- */
685
609
  /**
686
610
  * seq takes an array of parsers and runs them sequentially.
687
611
  * If any of the parsers fail, seq fails without consuming any input.
@@ -699,7 +623,6 @@ The code is also complex and it would be easy to have bugs in this logic. I wish
699
623
  * Finally, you don't need to use seq at all. You can just hand write the logic.
700
624
  * But you'll need to do the error handling
701
625
  * and pass the remaining input to the next parser yourself.
702
- * seq also does some backtracking for you that you will need to do yourself.
703
626
  *
704
627
  * Also see `seqR` and `seqC` for convenience functions that return the results or captures respectively.
705
628
  *
@@ -713,44 +636,18 @@ export function seq(parsers, transform, debugName = "") {
713
636
  const results = [];
714
637
  let rest = input;
715
638
  const captures = {};
716
- const rootNode = createTree(parsers);
717
- let current = rootNode;
718
- while (current) {
719
- const parser = current.parser;
720
- if (!parser) {
721
- console.log({ current, parser, results, captures });
722
- throw new Error("parser is null");
723
- }
724
- const parsed = parser(rest);
725
- current.closed = true;
726
- /* console.log({ parsed }); */
639
+ for (let i = 0; i < parsers.length; i++) {
640
+ const parsed = parsers[i](rest);
727
641
  if (!parsed.success) {
728
- const [ancestor, count] = findAncestorWithNextParser(current);
729
- if (ancestor) {
730
- current = ancestor;
731
- rest = ancestor.input;
732
- popMany(results, count);
733
- continue;
734
- }
735
- else {
736
- // don't consume input if we're failing
737
- return Object.assign(Object.assign({}, parsed), { rest: input });
738
- }
642
+ return Object.assign(Object.assign({}, parsed), { rest: input });
739
643
  }
740
644
  results.push(parsed.result);
741
- if (parsed.nextParser) {
742
- /* console.log("setting next parser", parsed.nextParser); */
743
- current.parser = parsed.nextParser;
744
- current.input = rest;
745
- current.closed = false;
746
- }
747
645
  rest = parsed.rest;
748
646
  if (isCaptureResult(parsed)) {
749
647
  for (const key in parsed.captures) {
750
648
  captures[key] = parsed.captures[key];
751
649
  }
752
650
  }
753
- current = current.child;
754
651
  }
755
652
  const result = transform(results, captures);
756
653
  return success(result, rest);
@@ -2,11 +2,17 @@ import { seqC, seqR, capture, captureCaptures, or, not, map, many, many1, many1T
2
2
  import { str, char, eof, set, oneOf, alphanum, noneOf, digit, letter, anyChar } from "../../parsers.js";
3
3
  import { success, failure } from "../../types.js";
4
4
  import { optional, between } from "../../combinators.js";
5
- // Stop inline-text at any single delimiter char OR at a hard-break sequence
6
- // (" \n"+). Using many1Till with an `or` of delimiters makes the stop set
7
- // composable rather than embedded inside a regex. `]` is included so that
8
- // inline-text inside a link-text (`[...]`) terminates at the closing `]`.
9
- const inlineTextStop = or(oneOf("*_`[]!<~\\&\n"), str(" "));
5
+ // The "two-or-more trailing spaces then `\n`" half of a hard break. Shared
6
+ // between `hardBreakParser` (which emits the hard-break node) and
7
+ // `inlineTextStop` (which uses it to know where to stop). Keeping these in
8
+ // sync matters: if the run pattern here disagreed with what `hardBreakParser`
9
+ // accepts, inline-text could either swallow a real hard break or — as the
10
+ // previous `str(" ")` did — stop on any incidental double-space (e.g. a
11
+ // 2-space line indent inside a list-item continuation) and freeze the
12
+ // surrounding paragraph at zero progress.
13
+ const hardBreakSpaces = seqR(str(" "), many(char(" ")), char("\n"));
14
+ // `]` is included so inline-text inside a link-text (`[...]`) ends at the `]`.
15
+ const inlineTextStop = or(oneOf("*_`[]!<~\\&\n"), hardBreakSpaces);
10
16
  export const inlineTextParser = map(many1Till(inlineTextStop), (content) => ({ type: "inline-text", content }));
11
17
  /**
12
18
  * Run `inlineMarkdownParser` repeatedly until `stop` would match at the
@@ -219,7 +225,7 @@ export const imageParser = map(seqC(str("!["), capture(iManyTillStr("]("), "alt"
219
225
  });
220
226
  export const hardBreakParser = map(or(
221
227
  // two-or-more trailing spaces then newline
222
- seqR(str(" "), many(char(" ")), char("\n")),
228
+ hardBreakSpaces,
223
229
  // backslash then newline
224
230
  seqR(char("\\"), char("\n"))), () => ({ type: "inline-hard-break" }));
225
231
  /** A single `\n` that is *not* part of a blank line (which would terminate the
package/dist/types.d.ts CHANGED
@@ -5,15 +5,13 @@ export type ParserSuccess<T> = {
5
5
  success: true;
6
6
  result: T;
7
7
  rest: string;
8
- nextParser?: Parser<any>;
9
8
  };
10
- /** Represents a parse success with captures. Notice nextParser is also a CaptureParser. */
9
+ /** Represents a parse success with captures. */
11
10
  export type CaptureParserSuccess<T, C extends PlainObject> = {
12
11
  success: true;
13
12
  result: T;
14
13
  rest: string;
15
14
  captures: C;
16
- nextParser?: CaptureParser<any, any>;
17
15
  };
18
16
  /** Represents a parse failure. */
19
17
  export type ParserFailure = {
@@ -98,23 +96,6 @@ export type InferManyReturnType<T extends GeneralParser<any, any>> = T extends C
98
96
  captures: C[];
99
97
  }> : T extends Parser<infer R> ? Parser<R[]> : never;
100
98
  export type MergedResults<T extends readonly GeneralParser<any, any>[]> = ExtractResults<T[number]>;
101
- /** Used to create a parser tree for backtracking. */
102
- export type Node = ParserNode | EmptyNode;
103
- export type ParserNode = {
104
- parent: Node;
105
- parser: GeneralParser<any, any> | null;
106
- input?: string;
107
- child: Node;
108
- closed: boolean;
109
- };
110
- export type EmptyNode = null;
111
- /** Convenience function to create a ParserNode. */
112
- export declare function createNode(parent: Node | null, parser: GeneralParser<any, any>): ParserNode;
113
- /** Convenience function where, given an array of parsers, it creates a tree we can use for backtracking.
114
- * This tree is what `seq` use. It's used to keep track of the parsers we've tried so far,
115
- * so we can backtrack if we need to.
116
- */
117
- export declare function createTree(parsers: readonly GeneralParser<any, any>[]): Node;
118
99
  /** Used by `within`. */
119
100
  export type Matched<T> = {
120
101
  type: "matched";
package/dist/types.js CHANGED
@@ -30,28 +30,3 @@ export function captureSuccess(result, rest, captures) {
30
30
  export function failure(message, rest) {
31
31
  return { success: false, message, rest };
32
32
  }
33
- /** Convenience function to create a ParserNode. */
34
- export function createNode(parent, parser) {
35
- return {
36
- parent,
37
- parser,
38
- child: null,
39
- closed: false,
40
- };
41
- }
42
- /** Convenience function where, given an array of parsers, it creates a tree we can use for backtracking.
43
- * This tree is what `seq` use. It's used to keep track of the parsers we've tried so far,
44
- * so we can backtrack if we need to.
45
- */
46
- export function createTree(parsers) {
47
- if (parsers.length === 0) {
48
- return null;
49
- }
50
- const rootNode = createNode(null, parsers[0]);
51
- let currentNode = rootNode;
52
- for (let i = 1; i < parsers.length; i++) {
53
- currentNode.child = createNode(currentNode, parsers[i]);
54
- currentNode = currentNode.child;
55
- }
56
- return rootNode;
57
- }
package/dist/utils.d.ts CHANGED
@@ -1,8 +1,5 @@
1
- import { Node } from "./types.js";
2
1
  export declare function escape(str: any): string;
3
2
  export declare function merge(a: any | any[], b: any | any[]): any[];
4
3
  export declare function mergeCaptures(a: Record<string, any>, b: Record<string, any>): Record<string, any>;
5
- export declare function findAncestorWithNextParser(node: Node, count?: number): [Node, number];
6
- export declare function popMany(arr: any[], count: number): void;
7
4
  export declare function round(num: number, places?: number): number;
8
5
  export declare function shorten(str: string, length?: number): string;
package/dist/utils.js CHANGED
@@ -30,22 +30,6 @@ export function mergeCaptures(a, b) {
30
30
  });
31
31
  return result;
32
32
  }
33
- export function findAncestorWithNextParser(node, count = 0) {
34
- if (node === null)
35
- return [null, count];
36
- if (!node.closed) {
37
- return [node, count];
38
- }
39
- if (node.parent) {
40
- return findAncestorWithNextParser(node.parent, count + 1);
41
- }
42
- return [null, count];
43
- }
44
- export function popMany(arr, count) {
45
- for (let i = 0; i < count; i++) {
46
- arr.pop();
47
- }
48
- }
49
33
  export function round(num, places = 2) {
50
34
  return Math.round(num * 10 ** places) / 10 ** places;
51
35
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tarsec",
3
- "version": "0.3.1",
3
+ "version": "0.4.0",
4
4
  "description": "A parser combinator library for TypeScript, inspired by Parsec.",
5
5
  "homepage": "https://github.com/egonSchiele/tarsec",
6
6
  "scripts": {
@@ -1 +0,0 @@
1
- export {};
@@ -1 +0,0 @@
1
- export {};