@claudiu-ceia/combine 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +389 -0
- package/esm/_dnt.shims.d.ts +2 -0
- package/esm/_dnt.shims.d.ts.map +1 -0
- package/esm/_dnt.shims.js +57 -0
- package/esm/mod.d.ts +6 -0
- package/esm/mod.d.ts.map +1 -0
- package/esm/mod.js +5 -0
- package/esm/package.json +3 -0
- package/esm/src/Parser.d.ts +85 -0
- package/esm/src/Parser.d.ts.map +1 -0
- package/esm/src/Parser.js +93 -0
- package/esm/src/Trie.d.ts +17 -0
- package/esm/src/Trie.d.ts.map +1 -0
- package/esm/src/Trie.js +75 -0
- package/esm/src/combinators.d.ts +199 -0
- package/esm/src/combinators.d.ts.map +1 -0
- package/esm/src/combinators.js +531 -0
- package/esm/src/internal_assert.d.ts +2 -0
- package/esm/src/internal_assert.d.ts.map +1 -0
- package/esm/src/internal_assert.js +5 -0
- package/esm/src/language.d.ts +12 -0
- package/esm/src/language.d.ts.map +1 -0
- package/esm/src/language.js +13 -0
- package/esm/src/parsers.d.ts +94 -0
- package/esm/src/parsers.d.ts.map +1 -0
- package/esm/src/parsers.js +256 -0
- package/esm/src/utility.d.ts +91 -0
- package/esm/src/utility.d.ts.map +1 -0
- package/esm/src/utility.js +178 -0
- package/package.json +21 -0
- package/script/_dnt.shims.d.ts +2 -0
- package/script/_dnt.shims.d.ts.map +1 -0
- package/script/_dnt.shims.js +60 -0
- package/script/mod.d.ts +6 -0
- package/script/mod.d.ts.map +1 -0
- package/script/mod.js +21 -0
- package/script/package.json +3 -0
- package/script/src/Parser.d.ts +85 -0
- package/script/src/Parser.d.ts.map +1 -0
- package/script/src/Parser.js +104 -0
- package/script/src/Trie.d.ts +17 -0
- package/script/src/Trie.d.ts.map +1 -0
- package/script/src/Trie.js +80 -0
- package/script/src/combinators.d.ts +199 -0
- package/script/src/combinators.d.ts.map +1 -0
- package/script/src/combinators.js +557 -0
- package/script/src/internal_assert.d.ts +2 -0
- package/script/src/internal_assert.d.ts.map +1 -0
- package/script/src/internal_assert.js +9 -0
- package/script/src/language.d.ts +12 -0
- package/script/src/language.d.ts.map +1 -0
- package/script/src/language.js +17 -0
- package/script/src/parsers.d.ts +94 -0
- package/script/src/parsers.d.ts.map +1 -0
- package/script/src/parsers.js +281 -0
- package/script/src/utility.d.ts +91 -0
- package/script/src/utility.d.ts.map +1 -0
- package/script/src/utility.js +214 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2022 Claudiu Ceia
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,389 @@
|
|
|
1
|
+
# combine
|
|
2
|
+
|
|
3
|
+
An implementation of
|
|
4
|
+
[parser combinators](https://en.wikipedia.org/wiki/Parser_combinator) for
|
|
5
|
+
TypeScript.
|
|
6
|
+
|
|
7
|
+
## Example
|
|
8
|
+
|
|
9
|
+
```ts
|
|
10
|
+
import {
|
|
11
|
+
anyChar,
|
|
12
|
+
manyTill,
|
|
13
|
+
map,
|
|
14
|
+
mapJoin,
|
|
15
|
+
optional,
|
|
16
|
+
seq,
|
|
17
|
+
space,
|
|
18
|
+
str,
|
|
19
|
+
} from "@claudiu-ceia/combine";
|
|
20
|
+
|
|
21
|
+
const helloWorldParser = seq(
|
|
22
|
+
str("Hello,"),
|
|
23
|
+
optional(space()),
|
|
24
|
+
mapJoin(manyTill(anyChar(), str("!"))),
|
|
25
|
+
);
|
|
26
|
+
|
|
27
|
+
const worldRes = helloWorldParser({
|
|
28
|
+
text: "Hello, World!",
|
|
29
|
+
index: 0,
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
{
|
|
34
|
+
success: true,
|
|
35
|
+
ctx: {
|
|
36
|
+
text: "Hello, World!",
|
|
37
|
+
index: 13
|
|
38
|
+
},
|
|
39
|
+
}
|
|
40
|
+
*/
|
|
41
|
+
|
|
42
|
+
const nameParser = map(helloWorldParser, ([, , name]) => name);
|
|
43
|
+
const nameRes = nameParser({
|
|
44
|
+
text: "Hello, Joe Doe!",
|
|
45
|
+
index: 0,
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
{
|
|
50
|
+
success: true,
|
|
51
|
+
value: "Joe Doe!",
|
|
52
|
+
ctx: {
|
|
53
|
+
text: "Hello, Joe Doe!",
|
|
54
|
+
index: 15
|
|
55
|
+
},
|
|
56
|
+
}
|
|
57
|
+
*/
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
For more examples,
|
|
61
|
+
[take a look at tests](https://github.com/ClaudiuCeia/combine/tree/main/tests).
|
|
62
|
+
|
|
63
|
+
## About
|
|
64
|
+
|
|
65
|
+
A parser combinator is a function that takes several parsers as input, and
|
|
66
|
+
returns a new parser. [combine](https://github.com/ClaudiuCeia/combine/) defines
|
|
67
|
+
a few such combinators depending on how the parsers should be combined,
|
|
68
|
+
[seq](https://github.com/ClaudiuCeia/combine/blob/main/src/combinators.ts#L42)
|
|
69
|
+
which takes a list of parser that are applied sequentially,
|
|
70
|
+
[oneOf](https://github.com/ClaudiuCeia/combine/blob/main/src/combinators.ts#L109)
|
|
71
|
+
which tries all parsers sequentially and applies the first one that's succesful,
|
|
72
|
+
[furthest](https://github.com/ClaudiuCeia/combine/blob/main/src/combinators.ts#L150)
|
|
73
|
+
which tries all parsers and applies the one that consumes the most input
|
|
74
|
+
[and more](https://github.com/ClaudiuCeia/combine/blob/main/src/combinators.ts).
|
|
75
|
+
|
|
76
|
+
Most included parsers are [LL(1)](https://en.wikipedia.org/wiki/LL_parser), with
|
|
77
|
+
some notable exceptions such as
|
|
78
|
+
[str](https://github.com/ClaudiuCeia/combine/blob/main/src/parsers.ts#L8) and
|
|
79
|
+
[regex](https://github.com/ClaudiuCeia/combine/blob/main/src/parsers.ts#L274).
|
|
80
|
+
Other LL(k) parsers library are the result of using combinators and are included
|
|
81
|
+
for convenience, like
|
|
82
|
+
[signed](https://github.com/ClaudiuCeia/combine/blob/main/src/parsers.ts#L259),
|
|
83
|
+
[horizontalSpace](https://github.com/ClaudiuCeia/combine/blob/main/src/parsers.ts#L189)
|
|
84
|
+
and [others](https://github.com/ClaudiuCeia/combine/blob/main/src/parsers.ts).
|
|
85
|
+
|
|
86
|
+
A couple of
|
|
87
|
+
[common utility functions](https://github.com/ClaudiuCeia/combine/blob/main/src/utility.ts)
|
|
88
|
+
are also included.
|
|
89
|
+
|
|
90
|
+
## Order and recursion
|
|
91
|
+
|
|
92
|
+
While you can use parsers as shown in the above example, that quickly becomes a
|
|
93
|
+
problem for some parsing tasks, like DSLs.
|
|
94
|
+
|
|
95
|
+
Take a simple calculator grammar defined as:
|
|
96
|
+
|
|
97
|
+
```
|
|
98
|
+
expr=term, expr1;
|
|
99
|
+
expr1="+",term,expr1|"-",term,expr1|;
|
|
100
|
+
term=factor, term1;
|
|
101
|
+
term1="*", factor, term1 | "/", factor, term1|;
|
|
102
|
+
factor="(", expr , ")" | number;
|
|
103
|
+
number=digit , {digit};
|
|
104
|
+
digit = "1"|"2"|"3"|"4"|"5"|"6"|"7"|"8"|"9"|"0";
|
|
105
|
+
syntax=expr;
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
`expr` needs to be defined using `term` and `expr1`, so these two parsers need
|
|
109
|
+
to be defined first. But then `expr1` refers to itself which triggers an
|
|
110
|
+
infinite loop unless we use
|
|
111
|
+
[lazy](https://github.com/ClaudiuCeia/combine/blob/main/src/utility.ts#L29-L31).
|
|
112
|
+
|
|
113
|
+
An implementation of the above can be seen in the
|
|
114
|
+
[calculator test](https://github.com/ClaudiuCeia/combine/blob/main/tests/calculator.test.ts).
|
|
115
|
+
|
|
116
|
+
We can see that the parsers which depend on each other need to be declared using
|
|
117
|
+
a named function as opposed to `addop` and `mulop`. Also, in the `factor` parser
|
|
118
|
+
we need to use `lazy`, otherwise we'd trigger an infinite mutual recursion
|
|
119
|
+
where:
|
|
120
|
+
|
|
121
|
+
`factor` calls `expression` `expression` calls `factor` ...
|
|
122
|
+
|
|
123
|
+
### createLanguage
|
|
124
|
+
|
|
125
|
+
Borrowing a trick from [Parsimmon](https://github.com/jneen/parsimmon), we can
|
|
126
|
+
use the `createLanguage` function to define our grammar. This allows us to not
|
|
127
|
+
worry about the order in which we define parsers, and we get each parser defined
|
|
128
|
+
as lazy for free (well, with some minor computational cost). You can see a
|
|
129
|
+
comparison of directly using the parser vs `createLanguage` in
|
|
130
|
+
[this benchmark](https://github.com/ClaudiuCeia/combine/blob/main/bench/createLanguage_bench.ts),
|
|
131
|
+
and you can see another example in
|
|
132
|
+
[this other benchmark](https://github.com/ClaudiuCeia/combine/blob/main/bench/lisp_bench.ts).
|
|
133
|
+
|
|
134
|
+
Typing support for `createLanguage` is not great at the moment. There are two
|
|
135
|
+
ways to use it:
|
|
136
|
+
|
|
137
|
+
```ts
|
|
138
|
+
import {
|
|
139
|
+
createLanguage,
|
|
140
|
+
either,
|
|
141
|
+
str,
|
|
142
|
+
Parser,
|
|
143
|
+
UntypedLanguage,
|
|
144
|
+
number
|
|
145
|
+
} from "@claudiu-ceia/combine";
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Untyped, provide `UntypedLanguage` as a type parameter.
|
|
149
|
+
* This will make all of the grammar consist of Parser<unknown>,
|
|
150
|
+
* but you at least get a mapping for the `self` parameter.
|
|
151
|
+
*/
|
|
152
|
+
const lang = createLanguage<UntypedLanguage>({
|
|
153
|
+
Foo: (s) => either(s.Bar /* this is checked to exist */, number()),
|
|
154
|
+
Bar: () => str("Bar"),
|
|
155
|
+
});
|
|
156
|
+
|
|
157
|
+
// Typed
|
|
158
|
+
type TypedLanguage = {
|
|
159
|
+
Foo: Parser<string, number>,
|
|
160
|
+
Bar: Parser<string>,
|
|
161
|
+
// ...
|
|
162
|
+
}
|
|
163
|
+
const typedLang = createLanguage<TypedLanguage>({
|
|
164
|
+
Foo: (s) => either(
|
|
165
|
+
s.Bar // this is checked to exist with the expected type
|
|
166
|
+
number(),
|
|
167
|
+
),
|
|
168
|
+
Bar: () => str("Bar"),
|
|
169
|
+
});
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
Note that for more complex grammar you generally need some sort of recursion.
|
|
173
|
+
For those cases, it can be tricky to define the `TypedLanguage`, have a look at
|
|
174
|
+
[this example](https://github.com/ClaudiuCeia/combine/blob/main/tests/language.test.ts)
|
|
175
|
+
for inspiration.
|
|
176
|
+
|
|
177
|
+
Note that since this wraps all of the functions in a `lazy()` closure, this also
|
|
178
|
+
bring a small performance hit. In the future we should be able to apply `lazy()`
|
|
179
|
+
only where it's needed.
|
|
180
|
+
|
|
181
|
+
## Error Handling
|
|
182
|
+
|
|
183
|
+
combine provides TypeScript-style error stack traces for better debugging. When
|
|
184
|
+
a parse fails, you get a detailed trace showing the context at each level of
|
|
185
|
+
your grammar.
|
|
186
|
+
|
|
187
|
+
### Error Stack
|
|
188
|
+
|
|
189
|
+
The `Failure` type includes a `stack` field containing error frames:
|
|
190
|
+
|
|
191
|
+
```ts
|
|
192
|
+
type ErrorFrame = {
|
|
193
|
+
label: string; // Context description (e.g., "in match expression")
|
|
194
|
+
location: { line: number; column: number };
|
|
195
|
+
};
|
|
196
|
+
|
|
197
|
+
type Failure = {
|
|
198
|
+
success: false;
|
|
199
|
+
expected: string; // What was expected (from innermost parser or cut)
|
|
200
|
+
ctx: Context;
|
|
201
|
+
location: { line: number; column: number };
|
|
202
|
+
variants: Failure[]; // Alternative failures from `any`/`either`
|
|
203
|
+
stack: ErrorFrame[]; // Error causation chain (innermost first)
|
|
204
|
+
fatal: boolean; // If true, won't backtrack in any/either
|
|
205
|
+
};
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
### The `context` combinator
|
|
209
|
+
|
|
210
|
+
Add a stack frame to parser errors using `context`. This tells the user **where
|
|
211
|
+
in the grammar** the error occurred:
|
|
212
|
+
|
|
213
|
+
```ts
|
|
214
|
+
import { context, letter, many1, seq, str } from "@claudiu-ceia/combine";
|
|
215
|
+
|
|
216
|
+
const identifier = context("in identifier", many1(letter()));
|
|
217
|
+
const declaration = context(
|
|
218
|
+
"in declaration",
|
|
219
|
+
seq(str("let"), str(" "), identifier),
|
|
220
|
+
);
|
|
221
|
+
|
|
222
|
+
const result = declaration({ text: "let 123", index: 0 });
|
|
223
|
+
// Error: expected letter at 1:5
|
|
224
|
+
// in identifier at 1:5
|
|
225
|
+
// in declaration at 1:1
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
Key points:
|
|
229
|
+
|
|
230
|
+
- Each `context` wrapping a **failing** parser adds one frame to the stack
|
|
231
|
+
- Frames are added as the failure bubbles up: innermost first, outermost last
|
|
232
|
+
- On **success**, `context` is a no-op (no frame added)
|
|
233
|
+
|
|
234
|
+
### The `cut` combinator
|
|
235
|
+
|
|
236
|
+
Mark a point of no return with `cut`. After a cut, failures become "fatal" and
|
|
237
|
+
won't be caught by alternative parsers like `any` or `either`.
|
|
238
|
+
|
|
239
|
+
`cut` does two things:
|
|
240
|
+
|
|
241
|
+
1. **Always:** Makes the failure fatal (prevents backtracking)
|
|
242
|
+
2. **Optionally:** Overrides the `expected` message if you provide a second
|
|
243
|
+
argument
|
|
244
|
+
|
|
245
|
+
```ts
|
|
246
|
+
// cut(parser) — fatal failure, keeps original expected message
|
|
247
|
+
cut(str("then"));
|
|
248
|
+
// Failure: { expected: "then", fatal: true }
|
|
249
|
+
|
|
250
|
+
// cut(parser, "message") — fatal failure, overrides expected message
|
|
251
|
+
cut(str("then"), "'then' keyword after condition");
|
|
252
|
+
// Failure: { expected: "'then' keyword after condition", fatal: true }
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
### `context` vs `cut` — when to use which
|
|
256
|
+
|
|
257
|
+
| | `context("label", parser)` | `cut(parser, "label")` |
|
|
258
|
+
| --------------------- | -------------------------- | ---------------------- |
|
|
259
|
+
| **Purpose** | WHERE in grammar | WHAT was expected |
|
|
260
|
+
| **Adds to stack?** | Yes | No |
|
|
261
|
+
| **Changes expected?** | No | Yes |
|
|
262
|
+
| **Makes fatal?** | No | Yes |
|
|
263
|
+
|
|
264
|
+
You can combine both for rich error messages:
|
|
265
|
+
|
|
266
|
+
```ts
|
|
267
|
+
// Stack frame AND custom expected message
|
|
268
|
+
context("in then keyword", cut(str("then"), "'then' after condition"));
|
|
269
|
+
// Failure: { expected: "'then' after condition", stack: ["in then keyword"], fatal: true }
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
### Real-world example with `cut`
|
|
273
|
+
|
|
274
|
+
```ts
|
|
275
|
+
import {
|
|
276
|
+
any,
|
|
277
|
+
context,
|
|
278
|
+
cut,
|
|
279
|
+
letter,
|
|
280
|
+
many1,
|
|
281
|
+
map,
|
|
282
|
+
seq,
|
|
283
|
+
str,
|
|
284
|
+
} from "@claudiu-ceia/combine";
|
|
285
|
+
|
|
286
|
+
const identifier = map(many1(letter()), (ls) => ls.join(""));
|
|
287
|
+
|
|
288
|
+
// After seeing "if", we're committed - don't backtrack
|
|
289
|
+
const ifExpr = context(
|
|
290
|
+
"in if expression",
|
|
291
|
+
seq(
|
|
292
|
+
str("if "),
|
|
293
|
+
cut(seq( // everything after "if" is committed
|
|
294
|
+
context("in condition", identifier),
|
|
295
|
+
str(" "),
|
|
296
|
+
context("in then keyword", str("then ")),
|
|
297
|
+
context("in then branch", identifier),
|
|
298
|
+
str(" "),
|
|
299
|
+
context("in else keyword", str("else ")),
|
|
300
|
+
context("in else branch", identifier),
|
|
301
|
+
)),
|
|
302
|
+
),
|
|
303
|
+
);
|
|
304
|
+
|
|
305
|
+
// Without cut: if "then" is misspelled, `any` would backtrack and try
|
|
306
|
+
// whileExpr, forExpr, then identifier - giving a confusing error
|
|
307
|
+
const expr = any(ifExpr, whileExpr, forExpr, identifier);
|
|
308
|
+
|
|
309
|
+
// With cut: after matching "if ", we're committed to parsing an if-expression
|
|
310
|
+
const result = expr({ text: "if x thn y else z", index: 0 });
|
|
311
|
+
// Error: expected then at line 1, column 5
|
|
312
|
+
// in then keyword at line 1, column 5
|
|
313
|
+
// in if expression at line 1, column 1
|
|
314
|
+
```
|
|
315
|
+
|
|
316
|
+
### The `attempt` combinator
|
|
317
|
+
|
|
318
|
+
Convert a fatal error back to non-fatal, restoring backtracking. Use sparingly -
|
|
319
|
+
it defeats the purpose of `cut`:
|
|
320
|
+
|
|
321
|
+
```ts
|
|
322
|
+
import { any, attempt } from "@claudiu-ceia/combine";
|
|
323
|
+
|
|
324
|
+
// Without attempt: fatal error propagates, otherExpr is NEVER tried
|
|
325
|
+
const parser1 = any(ifExpr, otherExpr);
|
|
326
|
+
|
|
327
|
+
// With attempt: fatal converted to non-fatal, any() tries otherExpr
|
|
328
|
+
const parser2 = any(attempt(ifExpr), otherExpr);
|
|
329
|
+
```
|
|
330
|
+
|
|
331
|
+
### `any` vs `furthest` for error quality
|
|
332
|
+
|
|
333
|
+
`any` short-circuits on the first success. If a "greedy" parser like
|
|
334
|
+
`identifier` succeeds early, you may get wrong results:
|
|
335
|
+
|
|
336
|
+
```ts
|
|
337
|
+
const expr = any(ifExpr, whileExpr, identifier);
|
|
338
|
+
expr({ text: "if x thn y", index: 0 });
|
|
339
|
+
// SUCCESS: "if" — wrong! identifier matched the keyword
|
|
340
|
+
```
|
|
341
|
+
|
|
342
|
+
`furthest` tries all alternatives and picks the one that consumed the most
|
|
343
|
+
input:
|
|
344
|
+
|
|
345
|
+
```ts
|
|
346
|
+
const expr = furthest(ifExpr, whileExpr, identifier);
|
|
347
|
+
expr({ text: "if x thn y", index: 0 });
|
|
348
|
+
// FAILURE: expected "then" at position 5 — correct! ifExpr got furthest
|
|
349
|
+
```
|
|
350
|
+
|
|
351
|
+
Use `furthest` for better error messages, or use `cut` to prevent backtracking
|
|
352
|
+
to greedy alternatives.
|
|
353
|
+
|
|
354
|
+
### Formatting errors
|
|
355
|
+
|
|
356
|
+
Use the built-in formatters for error messages:
|
|
357
|
+
|
|
358
|
+
```ts
|
|
359
|
+
import { formatErrorCompact, formatErrorStack } from "@claudiu-ceia/combine";
|
|
360
|
+
|
|
361
|
+
if (!result.success) {
|
|
362
|
+
// Multi-line trace
|
|
363
|
+
console.log(formatErrorStack(result));
|
|
364
|
+
// expected '}' at line 5, column 3
|
|
365
|
+
// in block at line 3, column 1
|
|
366
|
+
// in function declaration at line 2, column 1
|
|
367
|
+
|
|
368
|
+
// Single-line summary
|
|
369
|
+
console.log(formatErrorCompact(result));
|
|
370
|
+
// expected '}' (in block) at 5:3
|
|
371
|
+
}
|
|
372
|
+
```
|
|
373
|
+
|
|
374
|
+
## Going forward
|
|
375
|
+
|
|
376
|
+
This started out as a learning exercise and it most likely will stay that way
|
|
377
|
+
for some time, or until it sees some real use. I'm not sure how much time I'll
|
|
378
|
+
be able to dedicate to this project, but I'll try to keep it up to date with
|
|
379
|
+
Deno releases.
|
|
380
|
+
|
|
381
|
+
### Major improvement opportunities:
|
|
382
|
+
|
|
383
|
+
- Tooling: tracing, profiling, etc.
|
|
384
|
+
- Nicer composition of parsers (avoid the
|
|
385
|
+
[pyramid of doom](https://en.wikipedia.org/wiki/Pyramid_of_doom_(programming)))
|
|
386
|
+
|
|
387
|
+
## License
|
|
388
|
+
|
|
389
|
+
MIT © [Claudiu Ceia](https://github.com/ClaudiuCeia)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"_dnt.shims.d.ts","sourceRoot":"","sources":["../src/_dnt.shims.ts"],"names":[],"mappings":"AAEA,eAAO,MAAM,aAAa,gCAA2C,CAAC"}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
const dntGlobals = {};
|
|
2
|
+
export const dntGlobalThis = createMergeProxy(globalThis, dntGlobals);
|
|
3
|
+
function createMergeProxy(baseObj, extObj) {
|
|
4
|
+
return new Proxy(baseObj, {
|
|
5
|
+
get(_target, prop, _receiver) {
|
|
6
|
+
if (prop in extObj) {
|
|
7
|
+
return extObj[prop];
|
|
8
|
+
}
|
|
9
|
+
else {
|
|
10
|
+
return baseObj[prop];
|
|
11
|
+
}
|
|
12
|
+
},
|
|
13
|
+
set(_target, prop, value) {
|
|
14
|
+
if (prop in extObj) {
|
|
15
|
+
delete extObj[prop];
|
|
16
|
+
}
|
|
17
|
+
baseObj[prop] = value;
|
|
18
|
+
return true;
|
|
19
|
+
},
|
|
20
|
+
deleteProperty(_target, prop) {
|
|
21
|
+
let success = false;
|
|
22
|
+
if (prop in extObj) {
|
|
23
|
+
delete extObj[prop];
|
|
24
|
+
success = true;
|
|
25
|
+
}
|
|
26
|
+
if (prop in baseObj) {
|
|
27
|
+
delete baseObj[prop];
|
|
28
|
+
success = true;
|
|
29
|
+
}
|
|
30
|
+
return success;
|
|
31
|
+
},
|
|
32
|
+
ownKeys(_target) {
|
|
33
|
+
const baseKeys = Reflect.ownKeys(baseObj);
|
|
34
|
+
const extKeys = Reflect.ownKeys(extObj);
|
|
35
|
+
const extKeysSet = new Set(extKeys);
|
|
36
|
+
return [...baseKeys.filter((k) => !extKeysSet.has(k)), ...extKeys];
|
|
37
|
+
},
|
|
38
|
+
defineProperty(_target, prop, desc) {
|
|
39
|
+
if (prop in extObj) {
|
|
40
|
+
delete extObj[prop];
|
|
41
|
+
}
|
|
42
|
+
Reflect.defineProperty(baseObj, prop, desc);
|
|
43
|
+
return true;
|
|
44
|
+
},
|
|
45
|
+
getOwnPropertyDescriptor(_target, prop) {
|
|
46
|
+
if (prop in extObj) {
|
|
47
|
+
return Reflect.getOwnPropertyDescriptor(extObj, prop);
|
|
48
|
+
}
|
|
49
|
+
else {
|
|
50
|
+
return Reflect.getOwnPropertyDescriptor(baseObj, prop);
|
|
51
|
+
}
|
|
52
|
+
},
|
|
53
|
+
has(_target, prop) {
|
|
54
|
+
return prop in extObj || prop in baseObj;
|
|
55
|
+
},
|
|
56
|
+
});
|
|
57
|
+
}
|
package/esm/mod.d.ts
ADDED
package/esm/mod.d.ts.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"mod.d.ts","sourceRoot":"","sources":["../src/mod.ts"],"names":[],"mappings":"AAAA,cAAc,iBAAiB,CAAC;AAChC,cAAc,sBAAsB,CAAC;AACrC,cAAc,kBAAkB,CAAC;AACjC,cAAc,kBAAkB,CAAC;AACjC,cAAc,mBAAmB,CAAC"}
|
package/esm/mod.js
ADDED
package/esm/package.json
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
export type Parser<T> = (ctx: Context) => Result<T>;
|
|
2
|
+
export type Context = Readonly<{
|
|
3
|
+
text: string;
|
|
4
|
+
index: number;
|
|
5
|
+
}>;
|
|
6
|
+
export type Result<T> = Success<T> | Failure;
|
|
7
|
+
export type Success<T> = Readonly<{
|
|
8
|
+
success: true;
|
|
9
|
+
value: T;
|
|
10
|
+
ctx: Context;
|
|
11
|
+
}>;
|
|
12
|
+
/**
|
|
13
|
+
* A single frame in an error stack trace.
|
|
14
|
+
* Similar to how TypeScript traces type errors through nested structures.
|
|
15
|
+
*/
|
|
16
|
+
export type ErrorFrame = Readonly<{
|
|
17
|
+
/** Human-readable context label (e.g., "in match expression", "in function body") */
|
|
18
|
+
label: string;
|
|
19
|
+
/** Location where this context started */
|
|
20
|
+
location: {
|
|
21
|
+
line: number;
|
|
22
|
+
column: number;
|
|
23
|
+
};
|
|
24
|
+
}>;
|
|
25
|
+
export type Failure = Readonly<{
|
|
26
|
+
success: false;
|
|
27
|
+
/** The immediate expected value/token */
|
|
28
|
+
expected: string;
|
|
29
|
+
ctx: Context;
|
|
30
|
+
location: {
|
|
31
|
+
line: number;
|
|
32
|
+
column: number;
|
|
33
|
+
};
|
|
34
|
+
/** Alternative parse attempts at the same position */
|
|
35
|
+
variants: Failure[];
|
|
36
|
+
/**
|
|
37
|
+
* Error causation stack - traces back through parser hierarchy.
|
|
38
|
+
* First element is the most immediate cause, last is the root context.
|
|
39
|
+
* Example: ["expected identifier", "in field declaration", "in viewer block"]
|
|
40
|
+
*/
|
|
41
|
+
stack: ErrorFrame[];
|
|
42
|
+
/** If true, this error should not be caught by alternative parsers (committed parse) */
|
|
43
|
+
fatal: boolean;
|
|
44
|
+
}>;
|
|
45
|
+
export declare const success: <T>(ctx: Context, value: T) => Success<T>;
|
|
46
|
+
/**
|
|
47
|
+
* Compute line and column from context
|
|
48
|
+
*/
|
|
49
|
+
export declare const getLocation: (ctx: Context) => {
|
|
50
|
+
line: number;
|
|
51
|
+
column: number;
|
|
52
|
+
};
|
|
53
|
+
export declare const failure: (ctx: Context, expected: string, variants?: Failure[], stack?: ErrorFrame[], fatal?: boolean) => Failure;
|
|
54
|
+
/**
|
|
55
|
+
* Create a fatal failure that will not be caught by alternative parsers.
|
|
56
|
+
* Use after a "point of no return" - when enough has been parsed to commit
|
|
57
|
+
* to this branch of the grammar.
|
|
58
|
+
*/
|
|
59
|
+
export declare const fatalFailure: (ctx: Context, expected: string, stack?: ErrorFrame[]) => Failure;
|
|
60
|
+
/**
|
|
61
|
+
* Add a context frame to an existing failure's stack.
|
|
62
|
+
* This creates the "in X" chain in error messages.
|
|
63
|
+
*/
|
|
64
|
+
export declare const pushFrame: (f: Failure, label: string, ctx?: Context) => Failure;
|
|
65
|
+
/**
|
|
66
|
+
* Check if a failure is fatal (committed parse that should propagate)
|
|
67
|
+
*/
|
|
68
|
+
export declare const isFatal: (f: Failure) => boolean;
|
|
69
|
+
/**
|
|
70
|
+
* Format an error stack into a human-readable string.
|
|
71
|
+
* Produces output similar to TypeScript's type error traces.
|
|
72
|
+
*
|
|
73
|
+
* Example output:
|
|
74
|
+
* ```
|
|
75
|
+
* expected '}' at line 5, column 3
|
|
76
|
+
* in viewer declaration at line 2, column 1
|
|
77
|
+
* in program
|
|
78
|
+
* ```
|
|
79
|
+
*/
|
|
80
|
+
export declare const formatErrorStack: (f: Failure) => string;
|
|
81
|
+
/**
|
|
82
|
+
* Get a compact single-line error message
|
|
83
|
+
*/
|
|
84
|
+
export declare const formatErrorCompact: (f: Failure) => string;
|
|
85
|
+
//# sourceMappingURL=Parser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"Parser.d.ts","sourceRoot":"","sources":["../../src/src/Parser.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,OAAO,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC;AAEpD,MAAM,MAAM,OAAO,GAAG,QAAQ,CAAC;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;CACf,CAAC,CAAC;AAEH,MAAM,MAAM,MAAM,CAAC,CAAC,IAAI,OAAO,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC;AAE7C,MAAM,MAAM,OAAO,CAAC,CAAC,IAAI,QAAQ,CAAC;IAChC,OAAO,EAAE,IAAI,CAAC;IACd,KAAK,EAAE,CAAC,CAAC;IACT,GAAG,EAAE,OAAO,CAAC;CACd,CAAC,CAAC;AAEH;;;GAGG;AACH,MAAM,MAAM,UAAU,GAAG,QAAQ,CAAC;IAChC,qFAAqF;IACrF,KAAK,EAAE,MAAM,CAAC;IACd,0CAA0C;IAC1C,QAAQ,EAAE;QACR,IAAI,EAAE,MAAM,CAAC;QACb,MAAM,EAAE,MAAM,CAAC;KAChB,CAAC;CACH,CAAC,CAAC;AAEH,MAAM,MAAM,OAAO,GAAG,QAAQ,CAAC;IAC7B,OAAO,EAAE,KAAK,CAAC;IACf,yCAAyC;IACzC,QAAQ,EAAE,MAAM,CAAC;IACjB,GAAG,EAAE,OAAO,CAAC;IACb,QAAQ,EAAE;QACR,IAAI,EAAE,MAAM,CAAC;QACb,MAAM,EAAE,MAAM,CAAC;KAChB,CAAC;IACF,sDAAsD;IACtD,QAAQ,EAAE,OAAO,EAAE,CAAC;IACpB;;;;OAIG;IACH,KAAK,EAAE,UAAU,EAAE,CAAC;IACpB,wFAAwF;IACxF,KAAK,EAAE,OAAO,CAAC;CAChB,CAAC,CAAC;AAEH,eAAO,MAAM,OAAO,GAAI,CAAC,OAAO,OAAO,SAAS,CAAC,KAAG,OAAO,CAAC,CAAC,CAM5D,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,WAAW,QAAS,OAAO,KAAG;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAiBxE,CAAC;AAEF,eAAO,MAAM,OAAO,QACb,OAAO,YACF,MAAM,aACN,OAAO,EAAE,UACZ,UAAU,EAAE,sBAElB,OAYF,CAAC;AAEF;;;;GAIG;AACH,eAAO,MAAM,YAAY,QAClB,OAAO,YACF,MAAM,UACT,UAAU,EAAE,KAClB,OAEF,CAAC;AAEF;;;GAGG;AACH,eAAO,MAAM,SAAS,MACjB,OAAO,SACH,MAAM,QACP,OAAO,KACZ,OAUF,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,OAAO,MAAO,OAAO,KAAG,OAAkB,CAAC;AAExD;;;;;;;;;;GAUG;AACH,eAAO,MAAM,gBAAgB,MAAO,OAAO,KAAG,MAgB7C,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,kBAAkB,MAAO,OAAO,KAAG,MAG/C,CAAC"}
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
export const success = (ctx, value) => {
|
|
2
|
+
return {
|
|
3
|
+
success: true,
|
|
4
|
+
value,
|
|
5
|
+
ctx,
|
|
6
|
+
};
|
|
7
|
+
};
|
|
8
|
+
/**
|
|
9
|
+
* Compute line and column from context
|
|
10
|
+
*/
|
|
11
|
+
export const getLocation = (ctx) => {
|
|
12
|
+
const text = ctx.text;
|
|
13
|
+
const textLength = text.length;
|
|
14
|
+
let index = Number.isFinite(ctx.index) ? Math.trunc(ctx.index) : 0;
|
|
15
|
+
if (index < 0)
|
|
16
|
+
index = 0;
|
|
17
|
+
if (index > textLength)
|
|
18
|
+
index = textLength;
|
|
19
|
+
const parsedCtx = text.slice(0, index);
|
|
20
|
+
const parsedLines = parsedCtx.split("\n");
|
|
21
|
+
const line = parsedLines.length;
|
|
22
|
+
// `split` always returns at least one element, but keep a safe fallback.
|
|
23
|
+
const lastLine = parsedLines[parsedLines.length - 1] ?? "";
|
|
24
|
+
const column = lastLine.length + 1;
|
|
25
|
+
return { line, column };
|
|
26
|
+
};
|
|
27
|
+
export const failure = (ctx, expected, variants = [], stack = [], fatal = false) => {
|
|
28
|
+
const location = getLocation(ctx);
|
|
29
|
+
return {
|
|
30
|
+
success: false,
|
|
31
|
+
expected,
|
|
32
|
+
ctx,
|
|
33
|
+
location,
|
|
34
|
+
variants,
|
|
35
|
+
stack,
|
|
36
|
+
fatal,
|
|
37
|
+
};
|
|
38
|
+
};
|
|
39
|
+
/**
|
|
40
|
+
* Create a fatal failure that will not be caught by alternative parsers.
|
|
41
|
+
* Use after a "point of no return" - when enough has been parsed to commit
|
|
42
|
+
* to this branch of the grammar.
|
|
43
|
+
*/
|
|
44
|
+
export const fatalFailure = (ctx, expected, stack = []) => {
|
|
45
|
+
return failure(ctx, expected, [], stack, true);
|
|
46
|
+
};
|
|
47
|
+
/**
|
|
48
|
+
* Add a context frame to an existing failure's stack.
|
|
49
|
+
* This creates the "in X" chain in error messages.
|
|
50
|
+
*/
|
|
51
|
+
export const pushFrame = (f, label, ctx) => {
|
|
52
|
+
const location = ctx ? getLocation(ctx) : f.location;
|
|
53
|
+
const frame = {
|
|
54
|
+
label,
|
|
55
|
+
location,
|
|
56
|
+
};
|
|
57
|
+
return {
|
|
58
|
+
...f,
|
|
59
|
+
stack: [...f.stack, frame],
|
|
60
|
+
};
|
|
61
|
+
};
|
|
62
|
+
/**
|
|
63
|
+
* Check if a failure is fatal (committed parse that should propagate)
|
|
64
|
+
*/
|
|
65
|
+
export const isFatal = (f) => f.fatal;
|
|
66
|
+
/**
|
|
67
|
+
* Format an error stack into a human-readable string.
|
|
68
|
+
* Produces output similar to TypeScript's type error traces.
|
|
69
|
+
*
|
|
70
|
+
* Example output:
|
|
71
|
+
* ```
|
|
72
|
+
* expected '}' at line 5, column 3
|
|
73
|
+
* in viewer declaration at line 2, column 1
|
|
74
|
+
* in program
|
|
75
|
+
* ```
|
|
76
|
+
*/
|
|
77
|
+
export const formatErrorStack = (f) => {
|
|
78
|
+
const lines = [];
|
|
79
|
+
// Primary error message
|
|
80
|
+
lines.push(`expected ${f.expected} at line ${f.location.line}, column ${f.location.column}`);
|
|
81
|
+
// Stack frames (indented)
|
|
82
|
+
for (const frame of f.stack) {
|
|
83
|
+
lines.push(` ${frame.label} at line ${frame.location.line}, column ${frame.location.column}`);
|
|
84
|
+
}
|
|
85
|
+
return lines.join("\n");
|
|
86
|
+
};
|
|
87
|
+
/**
|
|
88
|
+
* Get a compact single-line error message
|
|
89
|
+
*/
|
|
90
|
+
export const formatErrorCompact = (f) => {
|
|
91
|
+
const context = f.stack.length > 0 ? ` (${f.stack[0].label})` : "";
|
|
92
|
+
return `expected ${f.expected}${context} at ${f.location.line}:${f.location.column}`;
|
|
93
|
+
};
|