@thi.ng/parse 2.5.0 → 2.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Change Log
2
2
 
3
- - **Last updated**: 2025-01-17T14:10:58Z
3
+ - **Last updated**: 2025-01-21T15:46:53Z
4
4
  - **Generator**: [thi.ng/monopub](https://thi.ng/monopub)
5
5
 
6
6
  All notable changes to this project will be documented in this file.
@@ -9,6 +9,25 @@ See [Conventional Commits](https://conventionalcommits.org/) for commit guidelin
9
9
  **Note:** Unlisted _patch_ versions only involve non-code or otherwise excluded changes
10
10
  and/or version bumps of transitive dependencies.
11
11
 
12
+ ## [2.6.0](https://github.com/thi-ng/umbrella/tree/@thi.ng/parse@2.6.0) (2025-01-21)
13
+
14
+ #### 🚀 Features
15
+
16
+ - expose more built-in grammar presets ([6e5a057](https://github.com/thi-ng/umbrella/commit/6e5a057))
17
+ - expose as new builtins:
18
+ - `BINARY_UINT`
19
+ - `HEX_UINT`
20
+ - `SPACE`
21
+ - `UINT`
22
+
23
+ #### ⏱ Performance improvements
24
+
25
+ - optimize char selection grammar compilation ([0476baa](https://github.com/thi-ng/umbrella/commit/0476baa))
26
+ - check if char selection only contains characters (no ranges)
27
+ - if so, compile using `oneOf()` instead of `alt()` (avoiding extra level of iteration)
28
+ - update grammar rule compilation ([8341af6](https://github.com/thi-ng/umbrella/commit/8341af6))
29
+ - avoid `dynamic()` wrapper for grammar rules which don't require it (to avoid extraneous indirection)
30
+
12
31
  ## [2.5.0](https://github.com/thi-ng/umbrella/tree/@thi.ng/parse@2.5.0) (2025-01-17)
13
32
 
14
33
  #### 🚀 Features
package/README.md CHANGED
@@ -103,7 +103,7 @@ For Node.js REPL:
103
103
  const parse = await import("@thi.ng/parse");
104
104
  ```
105
105
 
106
- Package sizes (brotli'd, pre-treeshake): ESM: 5.27 KB
106
+ Package sizes (brotli'd, pre-treeshake): ESM: 5.40 KB
107
107
 
108
108
  ## Dependencies
109
109
 
@@ -387,42 +387,50 @@ Custom transforms functions can be supplied via an additional arg to
387
387
  be overwritten) and correspond to the [above mentioned
388
388
  transforms](#transformers):
389
389
 
390
- - `binary` - parse as binary number
391
- - `collect` - collect sub terms into array
392
- - `discard` - discard result
393
- - `float` - join & parse as floating point number
394
- - `hex` - join & parse as hex integer
395
- - `hoist` - replace AST node with its 1st child
396
- - `hoistR` - use result of 1st child term only
397
- - `int` - join & parse as integer
398
- - `join` - join sub terms into single string
399
- - `json` - join & parse as JSON
400
- - `print` - print out node's subtree (AST)
401
- - `trim` - trim result
390
+ | **Transform** | **Description** |
391
+ |---------------|---------------------------------------|
392
+ | `binary` | parse as binary number |
393
+ | `collect` | collect sub terms into array |
394
+ | `discard` | discard result |
395
+ | `float` | join & parse as floating point number |
396
+ | `hex` | join & parse as hex integer |
397
+ | `hoist` | replace AST node with its 1st child |
398
+ | `hoistR` | use result of 1st child term only |
399
+ | `int` | join & parse as integer |
400
+ | `join` | join sub terms into single string |
401
+ | `json` | join & parse as JSON |
402
+ | `print` | print out node's subtree (AST) |
403
+ | `trim` | trim result |
402
404
 
403
405
  For convenience, the following built-in parser presets are available as
404
406
  rule references in the grammar definition as well:
405
407
 
406
- - `ALPHA`
407
- - `ALPHA_NUM`
408
- - `BIT`
409
- - `DIGIT`
410
- - `DNL` - discarded newline
411
- - `END` - input end
412
- - `ESC` - escape sequences
413
- - `FLOAT`
414
- - `HEX_DIGIT`
415
- - `INT`
416
- - `LEND` - line end
417
- - `LSTART` - line start
418
- - `NL` - newline chars
419
- - `START` - input start
420
- - `STRING`
421
- - `UNICODE`
422
- - `WB` - word boundary
423
- - `WS`
424
- - `WS0`
425
- - `WS1`
408
+ | **Preset** | **Description** |
409
+ |---------------|------------------------------------------------------------|
410
+ | `ALPHA` | single alphabetical char `[A-Za-z]` |
411
+ | `ALPHA_NUM` | single alphanumeric char `[A-Za-z0-9]` |
412
+ | `BIT` | single 0 or 1 digit |
413
+ | `BINARY_UINT` | unprefixed & unsigned binary integer (parsed as JS number) |
414
+ | `DIGIT` | single decimal digit |
415
+ | `DNL` | discarded single newline char |
416
+ | `END` | input end |
417
+ | `ESC` | single escape sequence |
418
+ | `FLOAT` | floating point number (parsed as JS number) |
419
+ | `HEX_DIGIT` | single hex digit `[0-9a-fA-F]` |
420
+ | `HEX_UINT` | unprefixed & unsigned hex integer (parsed as JS number) |
421
+ | `INT` | signed/unsigned decimal integer (parsed as JS number) |
422
+ | `LEND` | line end |
423
+ | `LSTART` | line start |
424
+ | `NL` | single newline char `[\n\r]` |
425
+ | `SPACE` | single space or tab `[ \t]` |
426
+ | `START` | input start |
427
+ | `STRING` | quoted string, incl. escapes, `"foo\"bar\u2587"` |
428
+ | `UNICODE` | unicode escape sequence `\uxxxx` |
429
+ | `UINT` | unsigned decimal integer (parsed as JS number) |
430
+ | `WB` | word boundary |
431
+ | `WS` | single whitespace char |
432
+ | `WS0` | zero or more whitespace chars |
433
+ | `WS1` | one or more whitespace chars |
426
434
 
427
435
  ## Examples
428
436
 
package/grammar.js CHANGED
@@ -11,13 +11,13 @@ import { seq, seqD } from "./combinators/seq.js";
11
11
  import { xform } from "./combinators/xform.js";
12
12
  import { defContext } from "./context.js";
13
13
  import { ALPHA, ALPHA_NUM } from "./presets/alpha.js";
14
- import { BIT } from "./presets/bits.js";
14
+ import { BINARY_UINT, BIT } from "./presets/bits.js";
15
15
  import { DIGIT } from "./presets/digits.js";
16
16
  import { ESC, UNICODE } from "./presets/escape.js";
17
- import { HEX_DIGIT } from "./presets/hex.js";
17
+ import { HEX_DIGIT, HEX_UINT } from "./presets/hex.js";
18
18
  import { FLOAT, INT, UINT } from "./presets/numbers.js";
19
19
  import { STRING } from "./presets/string.js";
20
- import { DNL, NL, WS, WS0, WS1 } from "./presets/whitespace.js";
20
+ import { DNL, NL, SPACE, WS, WS0, WS1 } from "./presets/whitespace.js";
21
21
  import { always, alwaysD } from "./prims/always.js";
22
22
  import {
23
23
  inputEnd,
@@ -121,6 +121,15 @@ const COMMENT = seqD([WS0, litD("#"), lookahead(always(), DNL)]);
121
121
  const GRAMMAR = zeroOrMore(alt([RULE, COMMENT]), "rules");
122
122
  const __first = ($) => $.children[0];
123
123
  const __nth = ($, n) => $.children[n];
124
+ const __hasDynRuleRefs = (term, builtins) => {
125
+ let res = term.id === "ref" && !builtins.has(__first(term).result);
126
+ if (term.children) {
127
+ for (let x of term.children) {
128
+ res ||= __hasDynRuleRefs(x, builtins);
129
+ }
130
+ }
131
+ return res;
132
+ };
124
133
  const __compile = defmulti(
125
134
  (scope) => scope.id,
126
135
  {
@@ -130,15 +139,25 @@ const __compile = defmulti(
130
139
  [DEFAULT]: ($) => unsupported(`unknown op: ${$.id}`),
131
140
  root: ($, lang, opts, flags) => {
132
141
  const rules = __first($).children;
133
- rules.reduce(
134
- (acc, r) => (acc[__first(r).result] = dynamic(), acc),
135
- lang.rules
136
- );
142
+ const builtins = new Set(Object.keys(lang.rules));
143
+ const staticRules = /* @__PURE__ */ new Set();
144
+ const dynamicRules = /* @__PURE__ */ new Set();
137
145
  for (let r of rules) {
146
+ if (__hasDynRuleRefs(r, builtins)) {
147
+ lang.rules[__first(r).result] = dynamic();
148
+ dynamicRules.add(r);
149
+ } else {
150
+ staticRules.add(r);
151
+ }
152
+ }
153
+ for (let r of [...staticRules, ...dynamicRules]) {
138
154
  const id = __first(r).result;
139
- lang.rules[id].set(
140
- __compile(r, lang, opts, flags)
141
- );
155
+ const parser = __compile(r, lang, opts, flags);
156
+ if (dynamicRules.has(r)) {
157
+ lang.rules[id].set(parser);
158
+ } else {
159
+ lang.rules[id] = parser;
160
+ }
142
161
  }
143
162
  return lang;
144
163
  },
@@ -233,11 +252,21 @@ const __compile = defmulti(
233
252
  },
234
253
  charSel: ($, lang, opts, flags) => {
235
254
  opts.debug && console.log("charSel", flags);
236
- const choices = __nth($, 1).children.map(
237
- (c) => __compile(c, lang, opts, flags)
238
- );
255
+ let parser;
256
+ const children = __nth($, 1).children;
257
+ if (children.length === 1) {
258
+ parser = __compile(children[0], lang, opts, flags);
259
+ } else {
260
+ const onlyChars = children.every((x) => x.id === "char");
261
+ if (onlyChars) {
262
+ parser = oneOf(children.map((x) => x.result).join(""));
263
+ } else {
264
+ parser = alt(
265
+ children.map((c) => __compile(c, lang, opts, flags))
266
+ );
267
+ }
268
+ }
239
269
  const invert = __first($).result;
240
- const parser = choices.length > 1 ? alt(choices) : choices[0];
241
270
  opts.debug && console.log(`invert: ${invert}`);
242
271
  return invert ? not(parser, flags.discard ? alwaysD() : always()) : parser;
243
272
  }
@@ -315,19 +344,23 @@ const defGrammar = (rules, env, opts) => {
315
344
  ALPHA_NUM,
316
345
  ALPHA,
317
346
  BIT,
347
+ BINARY_UINT,
318
348
  DIGIT,
319
349
  DNL,
320
350
  END: inputEnd,
321
351
  ESC,
322
352
  FLOAT,
323
353
  HEX_DIGIT,
354
+ HEX_UINT,
324
355
  INT,
325
356
  LEND: lineEnd,
326
357
  LSTART: lineStart,
327
358
  NL,
359
+ SPACE,
328
360
  START: inputStart,
329
361
  STRING,
330
362
  UNICODE,
363
+ UINT,
331
364
  WB: wordBoundary,
332
365
  WS,
333
366
  WS0,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@thi.ng/parse",
3
- "version": "2.5.0",
3
+ "version": "2.6.1",
4
4
  "description": "Purely functional parser combinators & AST generation for generic inputs",
5
5
  "type": "module",
6
6
  "module": "./index.js",
@@ -40,11 +40,11 @@
40
40
  "tool:tangle": "../../node_modules/.bin/tangle src/**/*.ts"
41
41
  },
42
42
  "dependencies": {
43
- "@thi.ng/api": "^8.11.16",
44
- "@thi.ng/checks": "^3.6.19",
45
- "@thi.ng/defmulti": "^3.0.55",
46
- "@thi.ng/errors": "^2.5.22",
47
- "@thi.ng/strings": "^3.9.0"
43
+ "@thi.ng/api": "^8.11.17",
44
+ "@thi.ng/checks": "^3.6.20",
45
+ "@thi.ng/defmulti": "^3.0.56",
46
+ "@thi.ng/errors": "^2.5.23",
47
+ "@thi.ng/strings": "^3.9.2"
48
48
  },
49
49
  "devDependencies": {
50
50
  "@microsoft/api-extractor": "^7.48.1",
@@ -246,5 +246,5 @@
246
246
  ],
247
247
  "year": 2020
248
248
  },
249
- "gitHead": "d888087b36b086fd8c3e7dc98d35857266f78942\n"
249
+ "gitHead": "22f6d518aed5951bb37b406c8ae85a6c3e6be517\n"
250
250
  }