tarsec 0.0.16 → 0.0.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,664 +0,0 @@
1
- import { within } from "./parsers/within.js";
2
- import { trace } from "./trace.js";
3
- import { captureSuccess, createTree, failure, isCaptureResult, isSuccess, success, } from "./types.js";
4
- import { escape, findAncestorWithNextParser, popMany } from "./utils.js";
5
- /**
6
- * Takes a parser and runs it zero or more times, returning the results as an array.
7
- * If the parser is a capture parser, it returns the captures as an array in this form:
8
- *
9
- * ```ts
10
- * { captures: <array of captures> }
11
- * ```
12
- *
13
- * @param parser - parser to run
14
- * @returns - parser that runs the given parser zero to many times,
15
- * and returns the result as an array
16
- */
17
- export function many(parser) {
18
- return trace("many", (input) => {
19
- let results = [];
20
- let captures = [];
21
- let rest = input;
22
- while (true) {
23
- let parsed = parser(rest);
24
- if (!parsed.success) {
25
- if (Object.keys(captures).length) {
26
- return captureSuccess(results, rest, { captures });
27
- }
28
- else {
29
- return success(results, rest);
30
- }
31
- }
32
- results.push(parsed.result);
33
- if (isCaptureResult(parsed)) {
34
- captures.push(parsed.captures);
35
- }
36
- rest = parsed.rest;
37
- // don't loop infinitely on empty strings
38
- if (rest === "") {
39
- if (Object.keys(captures).length) {
40
- return captureSuccess(results, rest, { captures });
41
- }
42
- else {
43
- return success(results, rest);
44
- }
45
- }
46
- }
47
- });
48
- }
49
- /**
50
- * Same as `many`, but fails if the parser doesn't match at least once.
51
- *
52
- * @param parser - parser to run
53
- * @returns a parser that runs the given parser one to many times,
54
- */
55
- export function many1(parser) {
56
- return trace(`many1`, (input) => {
57
- let result = many(parser)(input);
58
- // this logic doesn't work with optional and not
59
- if (result.rest !== input) {
60
- return result;
61
- }
62
- return {
63
- success: false,
64
- rest: input,
65
- message: "expected at least one match",
66
- };
67
- });
68
- }
69
- /**
70
- * Takes a parser, runs it, and returns the number of times it succeeded.
71
- * @param parser - parser to run
72
- * @returns - the number of times the parser succeeded.
73
- */
74
- export function count(parser) {
75
- return trace("count", (input) => {
76
- const result = many(parser)(input);
77
- if (result.success) {
78
- if (result.result.length === 0) {
79
- return failure("expected at least one match", input);
80
- }
81
- return success(result.result.length, result.rest);
82
- }
83
- return result;
84
- });
85
- }
86
- /**
87
- * Takes a parser, runs it n times, and returns the results as an array.
88
- * If it cannot run the parser n times, it fails without consuming input.
89
- * @param num - number of times to run the parser
90
- * @param parser - parser to run
91
- * @returns - parser that runs the given parser `num` times and returns an array of the results
92
- */
93
- export function exactly(num, parser) {
94
- return trace("exactly", (input) => {
95
- let results = [];
96
- let rest = input;
97
- for (let i = 0; i < num; i++) {
98
- let parsed = parser(rest);
99
- if (!parsed.success) {
100
- return failure(`expected ${num} matches, got ${i}`, input);
101
- }
102
- results.push(parsed.result);
103
- rest = parsed.rest;
104
- }
105
- return success(results, rest);
106
- });
107
- }
108
- /**
109
- * Same as `many`, but joins the results into a single string.
110
- *
111
- * @param parser - parser to run. The parser must return a string as its result.
112
- * @returns - parser that runs the given parser zero to many times,
113
- * and returns the result as a single string
114
- */
115
- export function manyWithJoin(parser) {
116
- return trace("manyWithJoin", (input) => {
117
- const result = many(parser)(input);
118
- if (result.success) {
119
- return Object.assign(Object.assign({}, result), { result: result.result.join("") });
120
- }
121
- return result;
122
- });
123
- }
124
- /**
125
- * Same as `many1`, but joins the results into a single string.
126
- *
127
- * @param parser - parser to run. The parser must return a string as its result.
128
- * @returns - parser that runs the given parser one to many times,
129
- * and returns the result as a single string
130
- */
131
- export function many1WithJoin(parser) {
132
- return trace("many1WithJoin", (input) => {
133
- const result = many1(parser)(input);
134
- if (result.success) {
135
- return Object.assign(Object.assign({}, result), { result: result.result.join("") });
136
- }
137
- return result;
138
- });
139
- }
140
- /**
141
- * `or` takes an array of parsers and runs them sequentially.
142
- * It returns the results of the first parser that succeeds.
143
- * You can use `capture` in an `or`:
144
- *
145
- * ```ts
146
- * const parser = or(capture(digit, "num"), capture(word, "name"));
147
- * ```
148
- *
149
- * `or` supports backtracking by returning a `nextParser`:
150
- *
151
- * ```ts
152
- * const parser = or(str("hello"), str("hello!"));
153
- *
154
- * // this will match the first parser
155
- * const result = parser("hello");
156
- *
157
- * // but or returns the untried parsers as a new parser
158
- * result.nextParser("hello!"); // works
159
- *
160
- * // result.nextParser is the same as or(str("hello!"))
161
- * ```
162
- *
163
- * @param parsers - parsers to try
164
- * @returns - a parser that tries each parser in order. Returns the result of the first parser that succeeds.
165
- */
166
- export function or(...parsers) {
167
- return trace(`or()`, (input) => {
168
- for (let i = 0; i < parsers.length; i++) {
169
- let result = parsers[i](input);
170
- if (result.success) {
171
- if (i === parsers.length - 1)
172
- return result;
173
- const nextParser = or(...parsers.slice(i + 1));
174
- /* console.log({ nextParser }, parsers.slice(i + 1)); */
175
- return Object.assign(Object.assign({}, result), { nextParser });
176
- }
177
- }
178
- return failure(`all parsers failed`, input);
179
- });
180
- }
181
- /**
182
- * Takes a parser and runs it. If the parser fails,
183
- * optional returns a success with a null result.
184
- *
185
- * @param parser - parser to run
186
- * @returns - a parser that runs the given parser.
187
- * If it fails, returns a success with a null result.
188
- */
189
- export function optional(parser) {
190
- return trace("optional", (input) => {
191
- let result = parser(input);
192
- if (result.success) {
193
- return result;
194
- }
195
- return success(null, input);
196
- });
197
- }
198
- /**
199
- * Takes a parser and runs it. If the parser fails,
200
- * `not` returns a success with a `null` result.
201
- * If the parser succeeds, `not` returns a failure.
202
- *
203
- * @param parser - parser to run
204
- * @returns - a parser that runs the given parser.
205
- * If it fails, returns a success with a `null` result.
206
- * If it succeeds, returns a failure.
207
- */
208
- export function not(parser) {
209
- return trace("not", (input) => {
210
- let result = parser(input);
211
- if (result.success) {
212
- return {
213
- success: false,
214
- rest: input,
215
- message: "expected parser not to succeed",
216
- };
217
- }
218
- return success(null, input);
219
- });
220
- }
221
- /**
222
- * Takes three parsers, `open`, `close`, and `parser`.
223
- * `between` matches something that matches `parser`,
224
- * surrounded by `open` and `close`. It returns the result of `parser`.
225
- * If any of the parsers fail, `between` fails.
226
- *
227
- * @param open - parser for the opening delimiter
228
- * @param close - parser for the closing delimiter
229
- * @param parser - parser for the content
230
- * @returns a parser that returns the result of `parser`.
231
- */
232
- export function between(open, close, parser) {
233
- return (input) => {
234
- const result1 = open(input);
235
- if (!result1.success) {
236
- return result1;
237
- }
238
- const parserResult = parser(result1.rest);
239
- if (!parserResult.success) {
240
- return parserResult;
241
- }
242
- const result2 = close(parserResult.rest);
243
- if (!result2.success) {
244
- return result2;
245
- }
246
- return success(parserResult.result, result2.rest);
247
- };
248
- }
249
- /**
250
- * Parses many instances of the parser separated by separator.
251
- * @param separator
252
- * @param parser
253
- * @returns a parser that runs the given parser zero to many times, separated by the separator parser.
254
- */
255
- export function sepBy(separator, parser) {
256
- return (input) => {
257
- let results = [];
258
- let rest = input;
259
- while (true) {
260
- const result = parser(rest);
261
- if (!result.success) {
262
- return success(results, rest);
263
- }
264
- results.push(result.result);
265
- rest = result.rest;
266
- const sepResult = separator(rest);
267
- if (!sepResult.success) {
268
- return success(results, rest);
269
- }
270
- rest = sepResult.rest;
271
- }
272
- };
273
- }
274
- /**
275
- * Convenience function to use as the second argument to `seq` to get all the results from `seq`
276
- * @param results
277
- * @param captures
278
- * @returns `results`
279
- */
280
- export function getResults(results, captures) {
281
- return results;
282
- }
283
- /**
284
- * Convenience function to use as the second argument to seq to get all the captures.
285
- * @param results
286
- * @param captures
287
- * @returns `captures`
288
- */
289
- export function getCaptures(results, captures) {
290
- return captures;
291
- }
292
- /**
293
- * `capture` is the only way to create a capture. Given a parser and a name,
294
- * `capture` runs the parser and saves its result in a captures object
295
- * with the given name as the key. It returns the result from the parser,
296
- * and attaches the captures object along with it.
297
- *
298
- * @param parser - parser to run
299
- * @param name - name of the capture
300
- * @returns - the results of the parser, with the captures object attached.
301
- */
302
- export function capture(parser, name) {
303
- return trace(`capture(${escape(name)})`, (input) => {
304
- let result = parser(input);
305
- if (result.success) {
306
- const captures = {
307
- [name]: result.result,
308
- };
309
- return Object.assign(Object.assign({}, result), { captures });
310
- }
311
- return result;
312
- });
313
- }
314
- /**
315
- * Returns a parser that consumes input till the given parser succeeds.
316
- * @param parser - the stop parser
317
- * @returns a parser that consumes the input string until the stop parser succeeds.
318
- * Then it returns the consumed input as a string.
319
- * The stop parser's match is not included in the result.
320
- */
321
- export function manyTill(parser) {
322
- return (input) => {
323
- let current = 0;
324
- while (current < input.length) {
325
- const parsed = parser(input.slice(current));
326
- if (parsed.success) {
327
- return success(input.slice(0, current), input.slice(current));
328
- }
329
- current++;
330
- }
331
- return success(input, "");
332
- };
333
- }
334
- /**
335
- * Just like `manyTill`, but fails unless at least one character of input is consumed.
336
- * @param parser - the stop parser
337
- * @returns a parser that consumes the input string until the stop parser succeeds.
338
- */
339
- export function many1Till(parser) {
340
- return (input) => {
341
- let current = 0;
342
- while (current < input.length) {
343
- const parsed = parser(input.slice(current));
344
- if (parsed.success) {
345
- if (current === 0) {
346
- return failure("expected to consume at least one character of input", input);
347
- }
348
- return success(input.slice(0, current), input.slice(current));
349
- }
350
- current++;
351
- }
352
- if (current === 0) {
353
- return failure("expected to consume at least one character of input", input);
354
- }
355
- return success(input, "");
356
- };
357
- }
358
- /**
359
- * `manyTillOneOf` is an optimized version of `manyTill`.
360
- * The `manyTill` combinator is slow because it runs the given parser
361
- * on every character of the string until it succeeds. However, if you
362
- * just want to consume input until you get to a substring,
363
- * use `manyTillOneOf`. It uses `indexOf`, which is significantly faster
364
- * than running a parser over every character.
365
- *
366
- * Given an array of strings, this parser consumes input until it hits one of those strings.
367
- * If none of the strings is found, the parser will consume all input and return success.
368
- *
369
- * @param str - the string to stop at
370
- * @param options - object of optional parameters. { insensitive: boolean }
371
- * @returns a parser that consumes the input string until one of the given strings is found.
372
- */
373
- export function manyTillOneOf(stops, { insensitive = false } = {}) {
374
- return trace(`manyTillOneOf(${escape(stops.join(","))})`, (input) => {
375
- const indexes = [];
376
- stops.forEach((stop) => {
377
- const index = insensitive
378
- ? input.toLocaleLowerCase().indexOf(stop.toLocaleLowerCase())
379
- : input.indexOf(stop);
380
- if (index !== -1) {
381
- indexes.push(index);
382
- }
383
- });
384
- if (indexes.length === 0) {
385
- return success(input, "");
386
- }
387
- const min = Math.min(...indexes);
388
- return success(input.slice(0, min), input.slice(min));
389
- });
390
- }
391
- /**
392
- * `manyTillStr` is an optimized version of `manyTill`.
393
- * The `manyTill` combinator is slow because it runs the given parser
394
- * on every character of the string until it succeeds. However, if you
395
- * just want to consume input until you get to a substring,
396
- * use `manyTillStr`. It uses `indexOf`, which is significantly faster
397
- * than running a parser over every character.
398
- *
399
- * @param str - the string to stop at
400
- * @param options - object of optional parameters. { insensitive: boolean }
401
- * @returns a parser that consumes the input string until the given string is found.
402
- */
403
- export function manyTillStr(str, { insensitive = false } = {}) {
404
- return trace(`manyTillStr(${escape(str)})`, (input) => {
405
- return manyTillOneOf([str], { insensitive })(input);
406
- });
407
- }
408
- /**
409
- * Like `manyTillStr`, but case insensitive.
410
- * @param str - the string to stop at
411
- * @returns a parser that consumes the input string until the given string is found.
412
- */
413
- export function iManyTillStr(str) {
414
- return manyTillStr(str, { insensitive: true });
415
- }
416
- /**
417
- * `map` is a parser combinator that takes a parser and a mapper function.
418
- * If the parser succeeds, it maps its result using the mapper function.
419
- * You can think of map as a general `map`, like for functors, applied to a parser.
420
- * Since `map` itself is a parser, you can use it in `seq` or other combinators.
421
- *
422
- * @param parser - parser to run
423
- * @param mapperFunc - function to map the result of the parser
424
- * @returns
425
- */
426
- export function map(parser, mapperFunc) {
427
- return trace(`map(${mapperFunc})`, (input) => {
428
- let parsed = parser(input);
429
- if (parsed.success) {
430
- return Object.assign(Object.assign({}, parsed), { result: mapperFunc(parsed.result) });
431
- }
432
- return parsed;
433
- });
434
- }
435
- /**
436
- * Given a parser that returns a string, `search` looks for all substrings in a string that match that parser.
437
- * For example, given a parser that matches quoted strings, `search` will return an array of all the quoted strings
438
- * it finds in the input, as an array.
439
- *
440
- * The rest of the input that isn't part of the result is simply joined together and returned as a string.
441
- * If you need a more structured result + rest, you can use `within` instead.
442
- *
443
- * @param parser - a parser that returns a string
444
- * @returns - a parser that returns an array of strings
445
- */
446
- export function search(parser) {
447
- return trace("search", (input) => {
448
- let parsed = within(parser)(input);
449
- if (parsed.success) {
450
- const result = parsed.result
451
- .filter((x) => x.type === "matched")
452
- .map((x) => x.value);
453
- const rest = parsed.result
454
- .filter((x) => x.type === "unmatched")
455
- .map((x) => x.value)
456
- .join(" ");
457
- return success(result, rest);
458
- }
459
- return success("", input);
460
- });
461
- }
462
- /*
463
- To add backtracking support requires a fairly big change. Here's an example that needs backtracking.
464
-
465
- ```ts
466
- const parser = seq([
467
- str("hello "),
468
- or(str("world"), str("world!")),
469
- optional("?")
470
- ], getResults);
471
- ```
472
-
473
- If we try to parse `"hello world!"`, the first parser in the OR will succeed, but then we'll get stuck at the `optional`. Instead, we need to go back up the tree and try the second parser in the OR. A few things need to happen.
474
-
475
- 1. instead of just processing these parsers sequentially in a for loop, we need to model them as a tree
476
- 2. the OR parser needs to let us know that there are other branches to try.
477
-
478
- For #2, there's an optional `nextParser` key on a parser success. The or parser can use this to say "a parser succeeded and here's the result, but there are other parsers that could be tried". `nextParser` is a parser that runs the remaining branches. So in this example, the OR would return a success with `nextParser = or(str("world"))`.
479
-
480
- Next, we need to model this as a tree. Each node in the tree has a parent and child and the parser for that node.
481
-
482
- ```ts
483
- parent: Node;
484
- parser: GeneralParser<any, any> | null;
485
- child: Node;
486
- ```
487
-
488
- Hopefully that is self-explanatory. We start at the root of the tree, try the parser there, then use `.child` to go to the next node and so on. We don't model multiple paths as multiple children. To keep the code simple, we do something else.
489
-
490
- Each node also has a `closed` key. Once we've run the parser for a node, we mark it `closed`. Closed means there are no more branches here. UNLESS, the parser returns a `nextParser`. In that case, we *don't* mark it closed because there are still other options to try. In that case, we also *replace* the parser on that node with nextParser.
491
-
492
- So, going back to the hello world example, let's say we're stuck at the `optional`:
493
-
494
- ```ts
495
- const parser = seq([
496
- str("hello "),
497
- or(str("world"), str("world!")),
498
- optional("?")
499
- ], getResults);
500
- ```
501
-
502
- We use `.parent` to go back up the tree. We're looking for a node that isn't closed. If we find one, we start again from there. In this case, we'd find an open node at the or with parser `or(str("world"))`. We can restart from there, but there's a bunch of state to reset.
503
-
504
- 1. From the new `or` parser, we need to go to the optional parser. We're doing it all again in the same order. This is one reason why it's easier to model this without multiple children. Otherwise, all the children would have to point to the next level, the next level would have to point to all the children in the previous level, and you'd have multiple parents, which is awful to deal with.
505
-
506
- 2. We have consumed input and added to the results. We need to undo that. At this point, the input is `!`, because we've consumed `hello world`. And the results array is `["hello ", "world"]`. We need to rewind both of those.
507
-
508
- To do that, I count how many levels up we've gone to find another branch, and just pop that many elements off the results array. So results is now `["hello "]`. The input is trickier. How would I keep track of what the input was when we were at the OR the last time?
509
-
510
- This is where the final key on a tree node comes in. Nodes also have an optional `input` key.
511
-
512
- IF a parser succeeds, and
513
- IF there's a nextParser,
514
- We know we may come back to this node. So we save the current input as `.input` on the node.
515
-
516
- This approach has some issues. Notably, it doesn't work if you need to backtrack at multiple points in the tree. The test `backtracking-deep.test.ts` shows this.
517
-
518
- The code is also complex and it would be easy to have bugs in this logic. I wish there was a cleaner solution for rewinding state.
519
- */
520
- /**
521
- * seq takes an array of parsers and runs them sequentially.
522
- * If any of the parsers fail, seq fails without consuming any input.
523
- *
524
- * The second argument to seq is a function.
525
- * The first argument of that function is an array of results:
526
- * one result from each of the parsers you gave to seq.
527
- * The second is an object containing any captures.
528
- * You can use this second argument, the transformer function,
529
- * to transform these however you want and return a result
530
- *
531
- * Tarsec includes the utility functions `getResults` and `getCaptures`
532
- * to just return the results array or captures object respectively for you.
533
- *
534
- * Finally, you don't need to use seq at all. You can just hand write the logic.
535
- * But you'll need to do the error handling
536
- * and pass the remaining input to the next parser yourself.
537
- * seq also does some backtracking for you that you will need to do yourself.
538
- *
539
- * Also see `seqR` and `seqC` for convenience functions that return the results or captures respectively.
540
- *
541
- * @param parsers - parsers to run sequentially
542
- * @param transform - function to transform the results and captures. The params are the results and captures
543
- * @param debugName - optional name for trace debugging
544
- * @returns
545
- */
546
- export function seq(parsers, transform, debugName = "") {
547
- return trace(`seq(${debugName})`, (input) => {
548
- const results = [];
549
- let rest = input;
550
- const captures = {};
551
- const rootNode = createTree(parsers);
552
- let current = rootNode;
553
- while (current) {
554
- const parser = current.parser;
555
- if (!parser) {
556
- console.log({ current, parser, results, captures });
557
- throw new Error("parser is null");
558
- }
559
- const parsed = parser(rest);
560
- current.closed = true;
561
- /* console.log({ parsed }); */
562
- if (!parsed.success) {
563
- const [ancestor, count] = findAncestorWithNextParser(current);
564
- if (ancestor) {
565
- current = ancestor;
566
- rest = ancestor.input;
567
- popMany(results, count);
568
- continue;
569
- }
570
- else {
571
- // don't consume input if we're failing
572
- return Object.assign(Object.assign({}, parsed), { rest: input });
573
- }
574
- }
575
- results.push(parsed.result);
576
- if (parsed.nextParser) {
577
- /* console.log("setting next parser", parsed.nextParser); */
578
- current.parser = parsed.nextParser;
579
- current.input = rest;
580
- current.closed = false;
581
- }
582
- rest = parsed.rest;
583
- if (isCaptureResult(parsed)) {
584
- for (const key in parsed.captures) {
585
- captures[key] = parsed.captures[key];
586
- }
587
- }
588
- current = current.child;
589
- }
590
- const result = transform(results, captures);
591
- return success(result, rest);
592
- });
593
- }
594
- /** Just like seq except it returns the results.
595
- * It's like using `seq([parsers], getResults)`.
596
- */
597
- export function seqR(...parsers) {
598
- return seq(parsers, getResults);
599
- }
600
- /** Just like seq except it returns the captures.
601
- * It's like using `seq([parsers], getCaptures)`.
602
- */
603
- export function seqC(...parsers) {
604
- return seq(parsers, getCaptures);
605
- }
606
- /**
607
- * Match takes an input string and a parser. If the parser matches the input string
608
- * and consumes the entire input string, `match` returns `true`. Otherwise it returns `false`.
609
- *
610
- * @param input - input string
611
- * @param parser - parser to match input against
612
- * @returns - true if the parser matches the input and consumes all input, false otherwise
613
- */
614
- export function match(input, parser) {
615
- const result = parser(input);
616
- return result.success && result.rest === "";
617
- }
618
- export function ifElse(condition, ifParser, elseParser) {
619
- return trace(`ifElse(${escape(condition)})`, (input) => {
620
- if (condition) {
621
- return ifParser(input);
622
- }
623
- return elseParser(input);
624
- });
625
- }
626
- /**
627
- * Apply multiple parsers to the same input and collect all the results.
628
- * Consumes no input.
629
- *
630
- * @param parsers - parsers to try
631
- * @returns
632
- */
633
- export function manyParsers(...parsers) {
634
- return trace(`manyParsers()`, (input) => {
635
- const results = [];
636
- for (let i = 0; i < parsers.length; i++) {
637
- let result = parsers[i](input);
638
- results.push(result);
639
- }
640
- if (results.some(isSuccess)) {
641
- return success(results, input);
642
- }
643
- return failure("no parsers succeeded", input);
644
- });
645
- }
646
- /**
647
- * Runs all the given parsers. If they all succeed, returns their results as an array.
648
- * Otherwise fails. Consumes no input.
649
- * @param parsers - parsers to try
650
- * @returns - An array of results, or a failure.
651
- */
652
- export function and(...parsers) {
653
- return trace(`and()`, (input) => {
654
- const results = manyParsers(...parsers)(input);
655
- if (results.success) {
656
- const successes = results.result.filter(isSuccess);
657
- if (successes.length === results.result.length) {
658
- return success(results.result.map((r) => r.result), input);
659
- }
660
- return failure("not all parsers succeeded", input);
661
- }
662
- return results;
663
- });
664
- }
package/dist/index.d.ts DELETED
@@ -1,4 +0,0 @@
1
- export * from "./parsers.js";
2
- export * from "./combinators.js";
3
- export * from "./trace.js";
4
- export * from "./types.js";
package/dist/index.js DELETED
@@ -1,4 +0,0 @@
1
- export * from "./parsers.js";
2
- export * from "./combinators.js";
3
- export * from "./trace.js";
4
- export * from "./types.js";
@@ -1,2 +0,0 @@
1
- import { WithinResult, Parser } from "../types.js";
2
- export declare function within(parser: Parser<string>): Parser<WithinResult[]>;