@thi.ng/parse 2.4.64 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Change Log
2
2
 
3
- - **Last updated**: 2025-01-14T12:23:33Z
3
+ - **Last updated**: 2025-01-21T11:16:50Z
4
4
  - **Generator**: [thi.ng/monopub](https://thi.ng/monopub)
5
5
 
6
6
  All notable changes to this project will be documented in this file.
@@ -9,6 +9,43 @@ See [Conventional Commits](https://conventionalcommits.org/) for commit guidelin
9
9
  **Note:** Unlisted _patch_ versions only involve non-code or otherwise excluded changes
10
10
  and/or version bumps of transitive dependencies.
11
11
 
12
+ ## [2.6.0](https://github.com/thi-ng/umbrella/tree/@thi.ng/parse@2.6.0) (2025-01-21)
13
+
14
+ #### 🚀 Features
15
+
16
+ - expose more built-in grammar presets ([6e5a057](https://github.com/thi-ng/umbrella/commit/6e5a057))
17
+ - expose as new builtins:
18
+ - `BINARY_UINT`
19
+ - `HEX_UINT`
20
+ - `SPACE`
21
+ - `UINT`
22
+
23
+ #### ⏱ Performance improvements
24
+
25
+ - optimize char selection grammar compilation ([0476baa](https://github.com/thi-ng/umbrella/commit/0476baa))
26
+ - check if char selection only contains characters (no ranges)
27
+ - if so, compile using `oneOf()` instead of `alt()` (avoiding extra level of iteration)
28
+ - update grammar rule compilation ([8341af6](https://github.com/thi-ng/umbrella/commit/8341af6))
29
+ - avoid `dynamic()` wrapper for grammar rules which don't require it (to avoid extraneous indirection)
30
+
31
+ ## [2.5.0](https://github.com/thi-ng/umbrella/tree/@thi.ng/parse@2.5.0) (2025-01-17)
32
+
33
+ #### 🚀 Features
34
+
35
+ - update `DynamicParser`, add `IDeref` support ([cf0d51c](https://github.com/thi-ng/umbrella/commit/cf0d51c))
36
+
37
+ #### ♻️ Refactoring
38
+
39
+ - remove `ParseState.last`, update `IReader` & impls ([20fc5cf](https://github.com/thi-ng/umbrella/commit/20fc5cf))
40
+ - remove `ParseState.last` to lower RAM usage
41
+ - add `IReader.prev()` to obtain previous char, add docs
42
+ - update reader impls
43
+ - update anchor parsers
44
+ - update tests
45
+ - minor internal updates ([ef97aee](https://github.com/thi-ng/umbrella/commit/ef97aee))
46
+ - update `ParseContext.start()`
47
+ - update `check()` combinator impl
48
+
12
49
  ### [2.4.64](https://github.com/thi-ng/umbrella/tree/@thi.ng/parse@2.4.64) (2025-01-14)
13
50
 
14
51
  #### ♻️ Refactoring
package/README.md CHANGED
@@ -103,7 +103,7 @@ For Node.js REPL:
103
103
  const parse = await import("@thi.ng/parse");
104
104
  ```
105
105
 
106
- Package sizes (brotli'd, pre-treeshake): ESM: 5.27 KB
106
+ Package sizes (brotli'd, pre-treeshake): ESM: 5.40 KB
107
107
 
108
108
  ## Dependencies
109
109
 
@@ -387,42 +387,50 @@ Custom transforms functions can be supplied via an additional arg to
387
387
  be overwritten) and correspond to the [above mentioned
388
388
  transforms](#transformers):
389
389
 
390
- - `binary` - parse as binary number
391
- - `collect` - collect sub terms into array
392
- - `discard` - discard result
393
- - `float` - join & parse as floating point number
394
- - `hex` - join & parse as hex integer
395
- - `hoist` - replace AST node with its 1st child
396
- - `hoistR` - use result of 1st child term only
397
- - `int` - join & parse as integer
398
- - `join` - join sub terms into single string
399
- - `json` - join & parse as JSON
400
- - `print` - print out node's subtree (AST)
401
- - `trim` - trim result
390
+ | **Transform** | **Description** |
391
+ |---------------|---------------------------------------|
392
+ | `binary` | parse as binary number |
393
+ | `collect` | collect sub terms into array |
394
+ | `discard` | discard result |
395
+ | `float` | join & parse as floating point number |
396
+ | `hex` | join & parse as hex integer |
397
+ | `hoist` | replace AST node with its 1st child |
398
+ | `hoistR` | use result of 1st child term only |
399
+ | `int` | join & parse as integer |
400
+ | `join` | join sub terms into single string |
401
+ | `json` | join & parse as JSON |
402
+ | `print` | print out node's subtree (AST) |
403
+ | `trim` | trim result |
402
404
 
403
405
  For convenience, the following built-in parser presets are available as
404
406
  rule references in the grammar definition as well:
405
407
 
406
- - `ALPHA`
407
- - `ALPHA_NUM`
408
- - `BIT`
409
- - `DIGIT`
410
- - `DNL` - discarded newline
411
- - `END` - input end
412
- - `ESC` - escape sequences
413
- - `FLOAT`
414
- - `HEX_DIGIT`
415
- - `INT`
416
- - `LEND` - line end
417
- - `LSTART` - line start
418
- - `NL` - newline chars
419
- - `START` - input start
420
- - `STRING`
421
- - `UNICODE`
422
- - `WB` - word boundary
423
- - `WS`
424
- - `WS0`
425
- - `WS1`
408
+ | **Preset** | **Description** |
409
+ |---------------|------------------------------------------------------------|
410
+ | `ALPHA` | single alphabetical char `[A-Za-z]` |
411
+ | `ALPHA_NUM` | single alphanumeric char `[A-Za-z0-9]` |
412
+ | `BIT` | single 0 or 1 digit |
413
+ | `BINARY_UINT` | unprefixed & unsigned binary integer (parsed as JS number) |
414
+ | `DIGIT` | single decimal digit |
415
+ | `DNL` | discarded single newline char |
416
+ | `END` | input end |
417
+ | `ESC` | single escape sequence |
418
+ | `FLOAT` | floating point number (parsed as JS number) |
419
+ | `HEX_DIGIT` | single hex digit `[0-9a-fA-F]` |
420
+ | `HEX_UINT` | unprefixed & unsigned hex integer (parsed as JS number) |
421
+ | `INT` | signed/unsigned decimal integer (parsed as JS number) |
422
+ | `LEND` | line end |
423
+ | `LSTART` | line start |
424
+ | `NL` | single newline char `[\n\r]` |
425
+ | `SPACE` | single space or tab `[ \t]` |
426
+ | `START` | input start |
427
+ | `STRING` | quoted string, incl. escapes, `"foo\"bar\u2587"` |
428
+ | `UNICODE` | unicode escape sequence `\uxxxx` |
429
+ | `UINT` | unsigned decimal integer (parsed as JS number) |
430
+ | `WB` | word boundary |
431
+ | `WS` | single whitespace char |
432
+ | `WS0` | zero or more whitespace chars |
433
+ | `WS1` | one or more whitespace chars |
426
434
 
427
435
  ## Examples
428
436
 
package/api.d.ts CHANGED
@@ -1,16 +1,48 @@
1
- import type { Fn, Fn0, IObjectOf, Nullable } from "@thi.ng/api";
1
+ import type { Fn, Fn0, IDeref, IObjectOf, Maybe, Nullable } from "@thi.ng/api";
2
2
  import type { ParseContext, ParseScope, ParseState } from "./context.js";
3
3
  export interface IReader<T> {
4
+ /**
5
+ * Returns the char/value at the current read position. No bounds checking
6
+ * done, assumes reader is not yet {@link IReader.isDone}.
7
+ *
8
+ * @param state
9
+ */
4
10
  read(state: ParseState<T>): T;
11
+ /**
12
+ * Returns the char/value at the previous read position (if any).
13
+ *
14
+ * @param state
15
+ */
16
+ prev(state: ParseState<T>): Maybe<T>;
17
+ /**
18
+ * Updates the reader's read position.
19
+ *
20
+ * @param state
21
+ */
5
22
  next(state: ParseState<T>): void;
23
+ /**
24
+ * Returns true if the reader already consumed all chars/values.
25
+ *
26
+ * @param state
27
+ */
6
28
  isDone(state: ParseState<T>): boolean;
29
+ /**
30
+ * Returns a string formatted version of the reader's position.
31
+ *
32
+ * @param state
33
+ */
7
34
  format(state: ParseState<T>): string;
8
35
  }
9
36
  export type Parser<T> = Fn<ParseContext<T>, boolean>;
10
37
  export type LitParser<T> = Parser<T> & {
11
38
  __lit: true;
12
39
  };
13
- export type DynamicParser<T> = Parser<T> & {
40
+ /**
41
+ * A {@link Parser} wrapper, whose actual implementation can (and must!) be
42
+ * defined dynamically via the exposed `.set()` function and which can be
43
+ * retrieved via `.deref()`.
44
+ */
45
+ export type DynamicParser<T> = Parser<T> & IDeref<Maybe<Parser<T>>> & {
14
46
  set: Fn<Parser<T>, void>;
15
47
  };
16
48
  export type PassValue<T> = T | Fn0<T>;
@@ -43,7 +75,7 @@ export interface ContextOpts {
43
75
  * Max recursion depth failsafe. Parsing will terminate once this limit is
44
76
  * reached.
45
77
  *
46
- * @defaultVal 64
78
+ * @defaultValue 64
47
79
  */
48
80
  maxDepth: number;
49
81
  /**
@@ -1,9 +1,9 @@
1
1
  import { parseError } from "../error.js";
2
2
  import { xform } from "./xform.js";
3
- const check = (parser, pred, msg = "check failed") => xform(parser, (scope, ctx) => {
4
- if (!pred(scope)) parseError(ctx, msg);
5
- return scope;
6
- });
3
+ const check = (parser, pred, msg = "check failed") => xform(
4
+ parser,
5
+ (scope, ctx) => pred(scope) ? scope : parseError(ctx, msg)
6
+ );
7
7
  export {
8
8
  check
9
9
  };
@@ -4,7 +4,7 @@ import type { DynamicParser } from "../api.js";
4
4
  * later stage via calling `.set()`. The parser always fails until set, after
5
5
  * which it then delegates to the chosen impl.
6
6
  *
7
- * @examples
7
+ * @example
8
8
  * ```ts tangle:../../export/dynamic.ts
9
9
  * import { defContext, dynamic,lit } from "@thi.ng/parse";
10
10
  *
@@ -1,6 +1,7 @@
1
1
  const dynamic = () => {
2
2
  let impl;
3
- const wrapper = (ctx) => impl ? impl(ctx) : false;
3
+ const wrapper = (ctx) => impl?.(ctx) ?? false;
4
+ wrapper.deref = () => impl;
4
5
  wrapper.set = (p) => impl = p;
5
6
  return wrapper;
6
7
  };
package/context.d.ts CHANGED
@@ -5,8 +5,7 @@ export declare class ParseState<T> implements ICopy<ParseState<T>> {
5
5
  l: number;
6
6
  c: number;
7
7
  done?: boolean | undefined;
8
- last?: T | undefined;
9
- constructor(p: number, l: number, c: number, done?: boolean | undefined, last?: T | undefined);
8
+ constructor(p: number, l: number, c: number, done?: boolean | undefined);
10
9
  copy(): ParseState<T>;
11
10
  }
12
11
  export declare class ParseScope<T> implements ICopy<ParseScope<T>> {
package/context.js CHANGED
@@ -5,15 +5,14 @@ import { defArrayReader } from "./readers/array-reader.js";
5
5
  import { defStringReader } from "./readers/string-reader.js";
6
6
  import { __indent } from "./utils.js";
7
7
  class ParseState {
8
- constructor(p, l, c, done, last) {
8
+ constructor(p, l, c, done) {
9
9
  this.p = p;
10
10
  this.l = l;
11
11
  this.c = c;
12
12
  this.done = done;
13
- this.last = last;
14
13
  }
15
14
  copy() {
16
- return new ParseState(this.p, this.l, this.c, this.done, this.last);
15
+ return new ParseState(this.p, this.l, this.c, this.done);
17
16
  }
18
17
  }
19
18
  class ParseScope {
@@ -57,15 +56,12 @@ class ParseContext {
57
56
  }
58
57
  start(id) {
59
58
  const { _scopes: scopes, _maxDepth } = this;
60
- if (scopes.length >= _maxDepth) {
59
+ const num = scopes.length;
60
+ if (num >= _maxDepth) {
61
61
  parseError(this, `recursion limit reached ${_maxDepth}`);
62
62
  }
63
- const scope = new ParseScope(
64
- id,
65
- scopes[scopes.length - 1].state.copy()
66
- );
67
- scopes.push(scope);
68
- this._peakDepth = Math.max(this._peakDepth, scopes.length);
63
+ const scope = new ParseScope(id, scopes[num - 1].state.copy());
64
+ this._peakDepth = Math.max(this._peakDepth, scopes.push(scope));
69
65
  this._debug && console.log(
70
66
  `${__indent(scopes.length)}start: ${id} (${scope.state.p})`
71
67
  );
package/grammar.js CHANGED
@@ -11,13 +11,13 @@ import { seq, seqD } from "./combinators/seq.js";
11
11
  import { xform } from "./combinators/xform.js";
12
12
  import { defContext } from "./context.js";
13
13
  import { ALPHA, ALPHA_NUM } from "./presets/alpha.js";
14
- import { BIT } from "./presets/bits.js";
14
+ import { BINARY_UINT, BIT } from "./presets/bits.js";
15
15
  import { DIGIT } from "./presets/digits.js";
16
16
  import { ESC, UNICODE } from "./presets/escape.js";
17
- import { HEX_DIGIT } from "./presets/hex.js";
17
+ import { HEX_DIGIT, HEX_UINT } from "./presets/hex.js";
18
18
  import { FLOAT, INT, UINT } from "./presets/numbers.js";
19
19
  import { STRING } from "./presets/string.js";
20
- import { DNL, NL, WS, WS0, WS1 } from "./presets/whitespace.js";
20
+ import { DNL, NL, SPACE, WS, WS0, WS1 } from "./presets/whitespace.js";
21
21
  import { always, alwaysD } from "./prims/always.js";
22
22
  import {
23
23
  inputEnd,
@@ -121,6 +121,15 @@ const COMMENT = seqD([WS0, litD("#"), lookahead(always(), DNL)]);
121
121
  const GRAMMAR = zeroOrMore(alt([RULE, COMMENT]), "rules");
122
122
  const __first = ($) => $.children[0];
123
123
  const __nth = ($, n) => $.children[n];
124
+ const __hasDynRuleRefs = (term, builtins) => {
125
+ let res = term.id === "ref" && !builtins.has(__first(term).result);
126
+ if (term.children) {
127
+ for (let x of term.children) {
128
+ res ||= __hasDynRuleRefs(x, builtins);
129
+ }
130
+ }
131
+ return res;
132
+ };
124
133
  const __compile = defmulti(
125
134
  (scope) => scope.id,
126
135
  {
@@ -130,15 +139,25 @@ const __compile = defmulti(
130
139
  [DEFAULT]: ($) => unsupported(`unknown op: ${$.id}`),
131
140
  root: ($, lang, opts, flags) => {
132
141
  const rules = __first($).children;
133
- rules.reduce(
134
- (acc, r) => (acc[__first(r).result] = dynamic(), acc),
135
- lang.rules
136
- );
142
+ const builtins = new Set(Object.keys(lang.rules));
143
+ const staticRules = /* @__PURE__ */ new Set();
144
+ const dynamicRules = /* @__PURE__ */ new Set();
137
145
  for (let r of rules) {
146
+ if (__hasDynRuleRefs(r, builtins)) {
147
+ lang.rules[__first(r).result] = dynamic();
148
+ dynamicRules.add(r);
149
+ } else {
150
+ staticRules.add(r);
151
+ }
152
+ }
153
+ for (let r of [...staticRules, ...dynamicRules]) {
138
154
  const id = __first(r).result;
139
- lang.rules[id].set(
140
- __compile(r, lang, opts, flags)
141
- );
155
+ const parser = __compile(r, lang, opts, flags);
156
+ if (dynamicRules.has(r)) {
157
+ lang.rules[id].set(parser);
158
+ } else {
159
+ lang.rules[id] = parser;
160
+ }
142
161
  }
143
162
  return lang;
144
163
  },
@@ -233,11 +252,21 @@ const __compile = defmulti(
233
252
  },
234
253
  charSel: ($, lang, opts, flags) => {
235
254
  opts.debug && console.log("charSel", flags);
236
- const choices = __nth($, 1).children.map(
237
- (c) => __compile(c, lang, opts, flags)
238
- );
255
+ let parser;
256
+ const children = __nth($, 1).children;
257
+ if (children.length === 1) {
258
+ parser = __compile(children[0], lang, opts, flags);
259
+ } else {
260
+ const onlyChars = children.every((x) => x.id === "char");
261
+ if (onlyChars) {
262
+ parser = oneOf(children.map((x) => x.result).join(""));
263
+ } else {
264
+ parser = alt(
265
+ children.map((c) => __compile(c, lang, opts, flags))
266
+ );
267
+ }
268
+ }
239
269
  const invert = __first($).result;
240
- const parser = choices.length > 1 ? alt(choices) : choices[0];
241
270
  opts.debug && console.log(`invert: ${invert}`);
242
271
  return invert ? not(parser, flags.discard ? alwaysD() : always()) : parser;
243
272
  }
@@ -315,19 +344,23 @@ const defGrammar = (rules, env, opts) => {
315
344
  ALPHA_NUM,
316
345
  ALPHA,
317
346
  BIT,
347
+ BINARY_UINT,
318
348
  DIGIT,
319
349
  DNL,
320
350
  END: inputEnd,
321
351
  ESC,
322
352
  FLOAT,
323
353
  HEX_DIGIT,
354
+ HEX_UINT,
324
355
  INT,
325
356
  LEND: lineEnd,
326
357
  LSTART: lineStart,
327
358
  NL,
359
+ SPACE,
328
360
  START: inputStart,
329
361
  STRING,
330
362
  UNICODE,
363
+ UINT,
331
364
  WB: wordBoundary,
332
365
  WS,
333
366
  WS0,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@thi.ng/parse",
3
- "version": "2.4.64",
3
+ "version": "2.6.0",
4
4
  "description": "Purely functional parser combinators & AST generation for generic inputs",
5
5
  "type": "module",
6
6
  "module": "./index.js",
@@ -44,7 +44,7 @@
44
44
  "@thi.ng/checks": "^3.6.19",
45
45
  "@thi.ng/defmulti": "^3.0.55",
46
46
  "@thi.ng/errors": "^2.5.22",
47
- "@thi.ng/strings": "^3.9.0"
47
+ "@thi.ng/strings": "^3.9.1"
48
48
  },
49
49
  "devDependencies": {
50
50
  "@microsoft/api-extractor": "^7.48.1",
@@ -246,5 +246,5 @@
246
246
  ],
247
247
  "year": 2020
248
248
  },
249
- "gitHead": "6542b842120bef47cc18d45a1b1db68307a7f04b\n"
249
+ "gitHead": "56e7a1724e7b0cb5c41119f60320b6ff0e8a3c1c\n"
250
250
  }
package/prims/anchor.js CHANGED
@@ -1,9 +1,9 @@
1
1
  import { ALPHA_NUM } from "@thi.ng/strings/groups";
2
- const anchor = (fn) => ({ reader, state }) => fn(state.last, state.done ? null : reader.read(state));
3
- const inputStart = (ctx) => ctx.state.last == null;
2
+ const anchor = (fn) => ({ reader, state }) => fn(reader.prev(state), state.done ? null : reader.read(state));
3
+ const inputStart = (ctx) => ctx.reader.prev(ctx.state) == null;
4
4
  const inputEnd = ({ reader, state }) => state.done || reader.read(state) === void 0;
5
5
  const lineStart = (ctx) => {
6
- const l = ctx.state.last;
6
+ const l = ctx.reader.prev(ctx.state);
7
7
  return l == null || l === "\n" || l === "\r";
8
8
  };
9
9
  const lineEnd = ({ reader, state }) => {
@@ -1,9 +1,11 @@
1
+ import type { Maybe } from "@thi.ng/api";
1
2
  import type { IReader } from "../api.js";
2
3
  import type { ParseState } from "../context.js";
3
4
  export declare class ArrayReader<T> implements IReader<T> {
4
5
  protected _src: ArrayLike<T>;
5
6
  constructor(_src: ArrayLike<T>);
6
7
  read(state: ParseState<T>): T;
8
+ prev(state: ParseState<T>): Maybe<T>;
7
9
  next(state: ParseState<T>): void;
8
10
  isDone(state: ParseState<T>): boolean;
9
11
  format(state: ParseState<T>): string;
@@ -5,9 +5,11 @@ class ArrayReader {
5
5
  read(state) {
6
6
  return this._src[state.p];
7
7
  }
8
+ prev(state) {
9
+ return this._src[state.p - 1];
10
+ }
8
11
  next(state) {
9
12
  if (state.done) return;
10
- state.last = this._src[state.p];
11
13
  state.done = ++state.p >= this._src.length;
12
14
  }
13
15
  isDone(state) {
@@ -1,9 +1,11 @@
1
+ import type { Maybe } from "@thi.ng/api";
1
2
  import type { IReader } from "../api.js";
2
3
  import type { ParseState } from "../context.js";
3
4
  export declare class StringReader implements IReader<string> {
4
5
  protected _src: string;
5
6
  constructor(_src: string);
6
7
  read(state: ParseState<string>): string;
8
+ prev(state: ParseState<string>): Maybe<string>;
7
9
  next(state: ParseState<string>): void;
8
10
  isDone(state: ParseState<string>): boolean;
9
11
  format(state: ParseState<string>): string;
@@ -5,10 +5,12 @@ class StringReader {
5
5
  read(state) {
6
6
  return this._src[state.p];
7
7
  }
8
+ prev(state) {
9
+ return this._src[state.p - 1];
10
+ }
8
11
  next(state) {
9
12
  if (state.done) return;
10
- state.last = this._src[state.p];
11
- if (state.last === "\n") {
13
+ if (this._src[state.p] === "\n") {
12
14
  state.l++;
13
15
  state.c = 1;
14
16
  } else {