@thi.ng/parse 2.5.0 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +20 -1
- package/README.md +41 -33
- package/grammar.js +47 -14
- package/package.json +3 -3
package/CHANGELOG.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Change Log
|
|
2
2
|
|
|
3
|
-
- **Last updated**: 2025-01-
|
|
3
|
+
- **Last updated**: 2025-01-21T11:16:50Z
|
|
4
4
|
- **Generator**: [thi.ng/monopub](https://thi.ng/monopub)
|
|
5
5
|
|
|
6
6
|
All notable changes to this project will be documented in this file.
|
|
@@ -9,6 +9,25 @@ See [Conventional Commits](https://conventionalcommits.org/) for commit guidelin
|
|
|
9
9
|
**Note:** Unlisted _patch_ versions only involve non-code or otherwise excluded changes
|
|
10
10
|
and/or version bumps of transitive dependencies.
|
|
11
11
|
|
|
12
|
+
## [2.6.0](https://github.com/thi-ng/umbrella/tree/@thi.ng/parse@2.6.0) (2025-01-21)
|
|
13
|
+
|
|
14
|
+
#### 🚀 Features
|
|
15
|
+
|
|
16
|
+
- expose more built-in grammar presets ([6e5a057](https://github.com/thi-ng/umbrella/commit/6e5a057))
|
|
17
|
+
- expose as new builtins:
|
|
18
|
+
- `BINARY_UINT`
|
|
19
|
+
- `HEX_UINT`
|
|
20
|
+
- `SPACE`
|
|
21
|
+
- `UINT`
|
|
22
|
+
|
|
23
|
+
#### ⏱ Performance improvements
|
|
24
|
+
|
|
25
|
+
- optimize char selection grammar compilation ([0476baa](https://github.com/thi-ng/umbrella/commit/0476baa))
|
|
26
|
+
- check if char selection only contains characters (no ranges)
|
|
27
|
+
- if so, compile using `oneOf()` instead of `alt()` (avoiding extra level of iteration)
|
|
28
|
+
- update grammar rule compilation ([8341af6](https://github.com/thi-ng/umbrella/commit/8341af6))
|
|
29
|
+
- avoid `dynamic()` wrapper for grammar rules which don't require it (to avoid extraneous indirection)
|
|
30
|
+
|
|
12
31
|
## [2.5.0](https://github.com/thi-ng/umbrella/tree/@thi.ng/parse@2.5.0) (2025-01-17)
|
|
13
32
|
|
|
14
33
|
#### 🚀 Features
|
package/README.md
CHANGED
|
@@ -103,7 +103,7 @@ For Node.js REPL:
|
|
|
103
103
|
const parse = await import("@thi.ng/parse");
|
|
104
104
|
```
|
|
105
105
|
|
|
106
|
-
Package sizes (brotli'd, pre-treeshake): ESM: 5.
|
|
106
|
+
Package sizes (brotli'd, pre-treeshake): ESM: 5.40 KB
|
|
107
107
|
|
|
108
108
|
## Dependencies
|
|
109
109
|
|
|
@@ -387,42 +387,50 @@ Custom transforms functions can be supplied via an additional arg to
|
|
|
387
387
|
be overwritten) and correspond to the [above mentioned
|
|
388
388
|
transforms](#transformers):
|
|
389
389
|
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
390
|
+
| **Transform** | **Description** |
|
|
391
|
+
|---------------|---------------------------------------|
|
|
392
|
+
| `binary` | parse as binary number |
|
|
393
|
+
| `collect` | collect sub terms into array |
|
|
394
|
+
| `discard` | discard result |
|
|
395
|
+
| `float` | join & parse as floating point number |
|
|
396
|
+
| `hex` | join & parse as hex integer |
|
|
397
|
+
| `hoist` | replace AST node with its 1st child |
|
|
398
|
+
| `hoistR` | use result of 1st child term only |
|
|
399
|
+
| `int` | join & parse as integer |
|
|
400
|
+
| `join` | join sub terms into single string |
|
|
401
|
+
| `json` | join & parse as JSON |
|
|
402
|
+
| `print` | print out node's subtree (AST) |
|
|
403
|
+
| `trim` | trim result |
|
|
402
404
|
|
|
403
405
|
For convenience, the following built-in parser presets are available as
|
|
404
406
|
rule references in the grammar definition as well:
|
|
405
407
|
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
408
|
+
| **Preset** | **Description** |
|
|
409
|
+
|---------------|------------------------------------------------------------|
|
|
410
|
+
| `ALPHA` | single alphabetical char `[A-Za-z]` |
|
|
411
|
+
| `ALPHA_NUM` | single alphanumeric char `[A-Za-z0-9]` |
|
|
412
|
+
| `BIT` | single 0 or 1 digit |
|
|
413
|
+
| `BINARY_UINT` | unprefixed & unsigned binary integer (parsed as JS number) |
|
|
414
|
+
| `DIGIT` | single decimal digit |
|
|
415
|
+
| `DNL` | discarded single newline char |
|
|
416
|
+
| `END` | input end |
|
|
417
|
+
| `ESC` | single escape sequence |
|
|
418
|
+
| `FLOAT` | floating point number (parsed as JS number) |
|
|
419
|
+
| `HEX_DIGIT` | single hex digit `[0-9a-fA-F]` |
|
|
420
|
+
| `HEX_UINT` | unprefixed & unsigned hex integer (parsed as JS number) |
|
|
421
|
+
| `INT` | signed/unsigned decimal integer (parsed as JS number) |
|
|
422
|
+
| `LEND` | line end |
|
|
423
|
+
| `LSTART` | line start |
|
|
424
|
+
| `NL` | single newline char `[\n\r]` |
|
|
425
|
+
| `SPACE` | single space or tab `[ \t]` |
|
|
426
|
+
| `START` | input start |
|
|
427
|
+
| `STRING` | quoted string, incl. escapes, `"foo\"bar\u2587"` |
|
|
428
|
+
| `UNICODE` | unicode escape sequence `\uxxxx` |
|
|
429
|
+
| `UINT` | unsigned decimal integer (parsed as JS number) |
|
|
430
|
+
| `WB` | word boundary |
|
|
431
|
+
| `WS` | single whitespace char |
|
|
432
|
+
| `WS0` | zero or more whitespace chars |
|
|
433
|
+
| `WS1` | one or more whitespace chars |
|
|
426
434
|
|
|
427
435
|
## Examples
|
|
428
436
|
|
package/grammar.js
CHANGED
|
@@ -11,13 +11,13 @@ import { seq, seqD } from "./combinators/seq.js";
|
|
|
11
11
|
import { xform } from "./combinators/xform.js";
|
|
12
12
|
import { defContext } from "./context.js";
|
|
13
13
|
import { ALPHA, ALPHA_NUM } from "./presets/alpha.js";
|
|
14
|
-
import { BIT } from "./presets/bits.js";
|
|
14
|
+
import { BINARY_UINT, BIT } from "./presets/bits.js";
|
|
15
15
|
import { DIGIT } from "./presets/digits.js";
|
|
16
16
|
import { ESC, UNICODE } from "./presets/escape.js";
|
|
17
|
-
import { HEX_DIGIT } from "./presets/hex.js";
|
|
17
|
+
import { HEX_DIGIT, HEX_UINT } from "./presets/hex.js";
|
|
18
18
|
import { FLOAT, INT, UINT } from "./presets/numbers.js";
|
|
19
19
|
import { STRING } from "./presets/string.js";
|
|
20
|
-
import { DNL, NL, WS, WS0, WS1 } from "./presets/whitespace.js";
|
|
20
|
+
import { DNL, NL, SPACE, WS, WS0, WS1 } from "./presets/whitespace.js";
|
|
21
21
|
import { always, alwaysD } from "./prims/always.js";
|
|
22
22
|
import {
|
|
23
23
|
inputEnd,
|
|
@@ -121,6 +121,15 @@ const COMMENT = seqD([WS0, litD("#"), lookahead(always(), DNL)]);
|
|
|
121
121
|
const GRAMMAR = zeroOrMore(alt([RULE, COMMENT]), "rules");
|
|
122
122
|
const __first = ($) => $.children[0];
|
|
123
123
|
const __nth = ($, n) => $.children[n];
|
|
124
|
+
const __hasDynRuleRefs = (term, builtins) => {
|
|
125
|
+
let res = term.id === "ref" && !builtins.has(__first(term).result);
|
|
126
|
+
if (term.children) {
|
|
127
|
+
for (let x of term.children) {
|
|
128
|
+
res ||= __hasDynRuleRefs(x, builtins);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
return res;
|
|
132
|
+
};
|
|
124
133
|
const __compile = defmulti(
|
|
125
134
|
(scope) => scope.id,
|
|
126
135
|
{
|
|
@@ -130,15 +139,25 @@ const __compile = defmulti(
|
|
|
130
139
|
[DEFAULT]: ($) => unsupported(`unknown op: ${$.id}`),
|
|
131
140
|
root: ($, lang, opts, flags) => {
|
|
132
141
|
const rules = __first($).children;
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
);
|
|
142
|
+
const builtins = new Set(Object.keys(lang.rules));
|
|
143
|
+
const staticRules = /* @__PURE__ */ new Set();
|
|
144
|
+
const dynamicRules = /* @__PURE__ */ new Set();
|
|
137
145
|
for (let r of rules) {
|
|
146
|
+
if (__hasDynRuleRefs(r, builtins)) {
|
|
147
|
+
lang.rules[__first(r).result] = dynamic();
|
|
148
|
+
dynamicRules.add(r);
|
|
149
|
+
} else {
|
|
150
|
+
staticRules.add(r);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
for (let r of [...staticRules, ...dynamicRules]) {
|
|
138
154
|
const id = __first(r).result;
|
|
139
|
-
lang
|
|
140
|
-
|
|
141
|
-
|
|
155
|
+
const parser = __compile(r, lang, opts, flags);
|
|
156
|
+
if (dynamicRules.has(r)) {
|
|
157
|
+
lang.rules[id].set(parser);
|
|
158
|
+
} else {
|
|
159
|
+
lang.rules[id] = parser;
|
|
160
|
+
}
|
|
142
161
|
}
|
|
143
162
|
return lang;
|
|
144
163
|
},
|
|
@@ -233,11 +252,21 @@ const __compile = defmulti(
|
|
|
233
252
|
},
|
|
234
253
|
charSel: ($, lang, opts, flags) => {
|
|
235
254
|
opts.debug && console.log("charSel", flags);
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
)
|
|
255
|
+
let parser;
|
|
256
|
+
const children = __nth($, 1).children;
|
|
257
|
+
if (children.length === 1) {
|
|
258
|
+
parser = __compile(children[0], lang, opts, flags);
|
|
259
|
+
} else {
|
|
260
|
+
const onlyChars = children.every((x) => x.id === "char");
|
|
261
|
+
if (onlyChars) {
|
|
262
|
+
parser = oneOf(children.map((x) => x.result).join(""));
|
|
263
|
+
} else {
|
|
264
|
+
parser = alt(
|
|
265
|
+
children.map((c) => __compile(c, lang, opts, flags))
|
|
266
|
+
);
|
|
267
|
+
}
|
|
268
|
+
}
|
|
239
269
|
const invert = __first($).result;
|
|
240
|
-
const parser = choices.length > 1 ? alt(choices) : choices[0];
|
|
241
270
|
opts.debug && console.log(`invert: ${invert}`);
|
|
242
271
|
return invert ? not(parser, flags.discard ? alwaysD() : always()) : parser;
|
|
243
272
|
}
|
|
@@ -315,19 +344,23 @@ const defGrammar = (rules, env, opts) => {
|
|
|
315
344
|
ALPHA_NUM,
|
|
316
345
|
ALPHA,
|
|
317
346
|
BIT,
|
|
347
|
+
BINARY_UINT,
|
|
318
348
|
DIGIT,
|
|
319
349
|
DNL,
|
|
320
350
|
END: inputEnd,
|
|
321
351
|
ESC,
|
|
322
352
|
FLOAT,
|
|
323
353
|
HEX_DIGIT,
|
|
354
|
+
HEX_UINT,
|
|
324
355
|
INT,
|
|
325
356
|
LEND: lineEnd,
|
|
326
357
|
LSTART: lineStart,
|
|
327
358
|
NL,
|
|
359
|
+
SPACE,
|
|
328
360
|
START: inputStart,
|
|
329
361
|
STRING,
|
|
330
362
|
UNICODE,
|
|
363
|
+
UINT,
|
|
331
364
|
WB: wordBoundary,
|
|
332
365
|
WS,
|
|
333
366
|
WS0,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@thi.ng/parse",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.6.0",
|
|
4
4
|
"description": "Purely functional parser combinators & AST generation for generic inputs",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"module": "./index.js",
|
|
@@ -44,7 +44,7 @@
|
|
|
44
44
|
"@thi.ng/checks": "^3.6.19",
|
|
45
45
|
"@thi.ng/defmulti": "^3.0.55",
|
|
46
46
|
"@thi.ng/errors": "^2.5.22",
|
|
47
|
-
"@thi.ng/strings": "^3.9.
|
|
47
|
+
"@thi.ng/strings": "^3.9.1"
|
|
48
48
|
},
|
|
49
49
|
"devDependencies": {
|
|
50
50
|
"@microsoft/api-extractor": "^7.48.1",
|
|
@@ -246,5 +246,5 @@
|
|
|
246
246
|
],
|
|
247
247
|
"year": 2020
|
|
248
248
|
},
|
|
249
|
-
"gitHead": "
|
|
249
|
+
"gitHead": "56e7a1724e7b0cb5c41119f60320b6ff0e8a3c1c\n"
|
|
250
250
|
}
|