bireactive 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/README.md +14 -7
  2. package/dist/automerge/doc-cell.d.ts +20 -0
  3. package/dist/automerge/doc-cell.js +80 -0
  4. package/dist/automerge/index.d.ts +3 -0
  5. package/dist/automerge/index.js +12 -0
  6. package/dist/automerge/reconcile.d.ts +5 -0
  7. package/dist/automerge/reconcile.js +63 -0
  8. package/dist/core/_counts.d.ts +48 -0
  9. package/dist/core/_counts.js +51 -0
  10. package/dist/core/cell.d.ts +148 -112
  11. package/dist/core/cell.js +945 -768
  12. package/dist/core/debug.d.ts +25 -0
  13. package/dist/core/debug.js +121 -0
  14. package/dist/core/derived-geometry.js +4 -7
  15. package/dist/core/index.d.ts +9 -2
  16. package/dist/core/index.js +8 -1
  17. package/dist/core/lenses/aggregates.d.ts +42 -52
  18. package/dist/core/lenses/aggregates.js +225 -116
  19. package/dist/core/lenses/geometry.d.ts +22 -4
  20. package/dist/core/lenses/geometry.js +59 -27
  21. package/dist/core/lenses/index.d.ts +6 -6
  22. package/dist/core/lenses/index.js +6 -6
  23. package/dist/core/lenses/memory.js +4 -17
  24. package/dist/core/lenses/numerical.d.ts +100 -0
  25. package/dist/core/lenses/{typed-factor.js → numerical.js} +136 -34
  26. package/dist/core/lenses/point-cloud.d.ts +67 -0
  27. package/dist/core/lenses/{closed-form-policies.js → point-cloud.js} +226 -84
  28. package/dist/core/lenses/snap.d.ts +18 -0
  29. package/dist/core/lenses/snap.js +138 -0
  30. package/dist/core/lenses/text.d.ts +40 -0
  31. package/dist/core/lenses/text.js +202 -0
  32. package/dist/core/lifecycle.js +3 -6
  33. package/dist/core/linalg.js +5 -11
  34. package/dist/core/optic.d.ts +13 -0
  35. package/dist/core/optic.js +39 -0
  36. package/dist/core/optics.d.ts +10 -0
  37. package/dist/core/optics.js +26 -0
  38. package/dist/core/store.d.ts +9 -0
  39. package/dist/core/store.js +77 -0
  40. package/dist/core/traits.d.ts +4 -7
  41. package/dist/core/traits.js +8 -12
  42. package/dist/core/values/anchor.js +0 -4
  43. package/dist/core/values/arr.d.ts +110 -0
  44. package/dist/core/values/arr.js +336 -0
  45. package/dist/core/values/audio.d.ts +8 -9
  46. package/dist/core/values/audio.js +11 -28
  47. package/dist/core/values/bool.d.ts +11 -11
  48. package/dist/core/values/bool.js +12 -22
  49. package/dist/core/values/box.d.ts +15 -20
  50. package/dist/core/values/box.js +20 -33
  51. package/dist/core/values/canvas.d.ts +18 -25
  52. package/dist/core/values/canvas.js +32 -66
  53. package/dist/core/values/color.d.ts +5 -7
  54. package/dist/core/values/color.js +5 -11
  55. package/dist/core/values/field.d.ts +6 -7
  56. package/dist/core/values/field.js +10 -35
  57. package/dist/core/values/flags.d.ts +1 -2
  58. package/dist/core/values/flags.js +1 -17
  59. package/dist/core/values/gpu.d.ts +6 -10
  60. package/dist/core/values/gpu.js +8 -22
  61. package/dist/core/values/matrix.d.ts +2 -4
  62. package/dist/core/values/matrix.js +2 -12
  63. package/dist/core/values/num.d.ts +19 -28
  64. package/dist/core/values/num.js +23 -41
  65. package/dist/core/values/pose.d.ts +2 -4
  66. package/dist/core/values/pose.js +3 -12
  67. package/dist/core/values/range.d.ts +18 -26
  68. package/dist/core/values/range.js +22 -39
  69. package/dist/core/values/reg/ambiguity.d.ts +8 -0
  70. package/dist/core/values/reg/ambiguity.js +131 -0
  71. package/dist/core/values/reg/engine.d.ts +91 -0
  72. package/dist/core/values/reg/engine.js +373 -0
  73. package/dist/core/values/reg/nfa.d.ts +42 -0
  74. package/dist/core/values/reg/nfa.js +391 -0
  75. package/dist/core/values/reg/regex.d.ts +7 -0
  76. package/dist/core/values/reg/regex.js +318 -0
  77. package/dist/core/values/reg/types.d.ts +60 -0
  78. package/dist/core/values/reg/types.js +3 -0
  79. package/dist/core/values/reg.d.ts +250 -0
  80. package/dist/core/values/reg.js +649 -0
  81. package/dist/core/values/str.d.ts +16 -60
  82. package/dist/core/values/str.js +133 -315
  83. package/dist/core/values/template.js +1 -24
  84. package/dist/core/values/transform.d.ts +3 -5
  85. package/dist/core/values/transform.js +3 -12
  86. package/dist/core/values/tri.d.ts +9 -10
  87. package/dist/core/values/tri.js +9 -15
  88. package/dist/core/values/vec.d.ts +9 -24
  89. package/dist/core/values/vec.js +9 -64
  90. package/dist/formats/lens.js +6 -9
  91. package/dist/index.d.ts +0 -11
  92. package/dist/index.js +1 -11
  93. package/dist/jsx-dev-runtime.d.ts +2 -0
  94. package/dist/jsx-dev-runtime.js +5 -0
  95. package/dist/jsx-runtime.d.ts +54 -0
  96. package/dist/jsx-runtime.js +219 -0
  97. package/dist/schema/lens.js +5 -5
  98. package/dist/shapes/drag-behaviors.d.ts +56 -0
  99. package/dist/shapes/drag-behaviors.js +102 -0
  100. package/dist/shapes/drag-spec.d.ts +52 -0
  101. package/dist/shapes/drag-spec.js +112 -0
  102. package/dist/shapes/index.d.ts +3 -1
  103. package/dist/shapes/index.js +3 -1
  104. package/dist/shapes/interaction.d.ts +2 -3
  105. package/dist/shapes/interaction.js +77 -56
  106. package/dist/shapes/label.js +6 -0
  107. package/dist/shapes/layout.d.ts +47 -1
  108. package/dist/shapes/layout.js +59 -1
  109. package/package.json +22 -1
  110. package/dist/coll.d.ts +0 -74
  111. package/dist/coll.js +0 -210
  112. package/dist/core/lenses/closed-form-policies.d.ts +0 -57
  113. package/dist/core/lenses/decompositions.d.ts +0 -14
  114. package/dist/core/lenses/decompositions.js +0 -224
  115. package/dist/core/lenses/domain-aggregates.d.ts +0 -42
  116. package/dist/core/lenses/domain-aggregates.js +0 -245
  117. package/dist/core/lenses/typed-factor.d.ts +0 -40
@@ -0,0 +1,8 @@
1
+ import { type Re } from "./engine.js";
2
+ /** A witness in `L(a) ∩ L(b)` (shortest), or `null` if the languages are
3
+ * disjoint. Product BFS over derivative pairs. */
4
+ export declare function intersects(a: Re, b: Re): string | null;
5
+ /** A witness string that `a · b` factors two distinct ways, or `null` if the
6
+ * concatenation is unambiguous. Ambiguous iff some nonempty bridge `t` can
7
+ * both extend a word of `L(a)` and be absorbed into `L(b)`. */
8
+ export declare function concatAmbiguity(a: Re, b: Re): string | null;
@@ -0,0 +1,131 @@
1
+ import { alphabetOf, der, nullable, reKey } from "./engine.js";
2
+ const cp = (c) => String.fromCharCode(c);
3
+ /** Representative code units that distinguish every transition in `a` and `b`
4
+ * (the range endpoints of every char class). Sufficient to realize every
5
+ * reachable derivative pair. */
6
+ function alphaUnion(a, b) {
7
+ const set = alphabetOf(a);
8
+ alphabetOf(b, set);
9
+ return [...set];
10
+ }
11
+ /** Fold the derivative across a string. */
12
+ function applyStr(r, s) {
13
+ let cur = r;
14
+ for (let i = 0; i < s.length && cur.k !== "emp"; i++)
15
+ cur = der(cur, s.charCodeAt(i));
16
+ return cur;
17
+ }
18
+ /** A witness in `L(a) ∩ L(b)` (shortest), or `null` if the languages are
19
+ * disjoint. Product BFS over derivative pairs. */
20
+ export function intersects(a, b) {
21
+ const alpha = alphaUnion(a, b);
22
+ const seen = new Set([`${reKey(a)}|${reKey(b)}`]);
23
+ const queue = [{ a, b, w: "" }];
24
+ for (let head = 0; head < queue.length; head++) {
25
+ const { a: da, b: db, w } = queue[head];
26
+ if (nullable(da) && nullable(db))
27
+ return w;
28
+ for (const c of alpha) {
29
+ const na = der(da, c);
30
+ if (na.k === "emp")
31
+ continue;
32
+ const nb = der(db, c);
33
+ if (nb.k === "emp")
34
+ continue;
35
+ const key = `${reKey(na)}|${reKey(nb)}`;
36
+ if (seen.has(key))
37
+ continue;
38
+ seen.add(key);
39
+ queue.push({ a: na, b: nb, w: w + cp(c) });
40
+ }
41
+ }
42
+ return null;
43
+ }
44
+ /** All reachable derivative states of `r`, each with a shortest word reaching
45
+ * it (BFS over the derivative automaton). */
46
+ function reachableStates(r) {
47
+ const alpha = [...alphabetOf(r)];
48
+ const out = new Map([[reKey(r), { re: r, word: "" }]]);
49
+ const queue = [{ re: r, word: "" }];
50
+ for (let head = 0; head < queue.length; head++) {
51
+ const { re, word } = queue[head];
52
+ for (const c of alpha) {
53
+ const d = der(re, c);
54
+ if (d.k === "emp")
55
+ continue;
56
+ const k = reKey(d);
57
+ if (out.has(k))
58
+ continue;
59
+ const w = word + cp(c);
60
+ out.set(k, { re: d, word: w });
61
+ queue.push({ re: d, word: w });
62
+ }
63
+ }
64
+ return out;
65
+ }
66
+ const SET_KEY = (states) => states.map(reKey).sort().join(",");
67
+ const dedup = (states) => {
68
+ const seen = new Set();
69
+ const out = [];
70
+ for (const s of states) {
71
+ const k = reKey(s);
72
+ if (!seen.has(k)) {
73
+ seen.add(k);
74
+ out.push(s);
75
+ }
76
+ }
77
+ return out;
78
+ };
79
+ // A generous bound: if the product search explodes past this, refuse to certify
80
+ // (sound — we reject rather than risk admitting an ambiguous grammar).
81
+ const MAX_STATES = 200000;
82
+ /** A witness string that `a · b` factors two distinct ways, or `null` if the
83
+ * concatenation is unambiguous. Ambiguous iff some nonempty bridge `t` can
84
+ * both extend a word of `L(a)` and be absorbed into `L(b)`. */
85
+ export function concatAmbiguity(a, b) {
86
+ const statesA = reachableStates(a);
87
+ const accepting = [];
88
+ for (const st of statesA.values())
89
+ if (nullable(st.re))
90
+ accepting.push(st);
91
+ if (accepting.length === 0)
92
+ return null; // L(a) = ∅: nothing to split
93
+ const alpha = alphaUnion(a, b);
94
+ // Search for a nonempty bridge `t`: state = (set of A-derivatives reached
95
+ // from A's accepting states by `t`, der(b, t)).
96
+ const start = dedup(accepting.map(s => s.re));
97
+ const seen = new Set([`${SET_KEY(start)}|${reKey(b)}`]);
98
+ const queue = [{ sa: start, db: b, t: "" }];
99
+ for (let head = 0; head < queue.length; head++) {
100
+ if (seen.size > MAX_STATES)
101
+ throw new Error("reg: grammar too complex to verify");
102
+ const { sa, db, t } = queue[head];
103
+ if (t.length > 0 && sa.some(nullable)) {
104
+ const v = intersects(db, b); // v ∈ L(b) with t·v ∈ L(b)
105
+ if (v !== null)
106
+ return witnessFor(accepting, t, v);
107
+ }
108
+ for (const c of alpha) {
109
+ const db2 = der(db, c);
110
+ if (db2.k === "emp")
111
+ continue; // t·… can no longer be a prefix of L(b)
112
+ const sa2 = dedup(sa.map(s => der(s, c)).filter(s => s.k !== "emp"));
113
+ if (sa2.length === 0)
114
+ continue; // no accepting continuation on the left
115
+ const key = `${SET_KEY(sa2)}|${reKey(db2)}`;
116
+ if (seen.has(key))
117
+ continue;
118
+ seen.add(key);
119
+ queue.push({ sa: sa2, db: db2, t: t + cp(c) });
120
+ }
121
+ }
122
+ return null;
123
+ }
124
+ /** Assemble the full doubly-parsing string `u·t·v`: pick an accepting `u ∈ L(a)`
125
+ * (shortest known) with `u·t ∈ L(a)`; falls back to `t·v` if none is found. */
126
+ function witnessFor(accepting, t, v) {
127
+ for (const s of accepting)
128
+ if (nullable(applyStr(s.re, t)))
129
+ return s.word + t + v;
130
+ return t + v;
131
+ }
@@ -0,0 +1,91 @@
1
+ /** A set of UTF-16 code units as sorted, merged, inclusive ranges. Negation is
2
+ * resolved at construction, so a set is always a positive union of ranges. */
3
+ export declare class CharSet {
4
+ readonly ranges: ReadonlyArray<readonly [number, number]>;
5
+ private constructor();
6
+ /** Build from arbitrary (possibly overlapping/unsorted) ranges. */
7
+ static of(ranges: ReadonlyArray<readonly [number, number]>): CharSet;
8
+ static char(cp: number): CharSet;
9
+ static range(lo: number, hi: number): CharSet;
10
+ /** The full code-unit alphabet. */
11
+ static full(): CharSet;
12
+ static empty(): CharSet;
13
+ has(cp: number): boolean;
14
+ isEmpty(): boolean;
15
+ union(other: CharSet): CharSet;
16
+ /** Do the two sets share any code unit? (Both are sorted/normalized.) */
17
+ overlaps(other: CharSet): boolean;
18
+ /** Complement against the full code-unit alphabet. */
19
+ complement(): CharSet;
20
+ /** Case-fold (ASCII + via `toUpperCase`/`toLowerCase`) for the `i` flag.
21
+ * Conservative: adds the upper/lower variant of every unit in range. */
22
+ ignoreCase(): CharSet;
23
+ }
24
+ /** A regular expression over code units. `emp` = ∅ (matches nothing), `eps` =
25
+ * ε (matches the empty string). Built only through the smart constructors
26
+ * below so that derivatives stay simplified. */
27
+ export type Re = {
28
+ readonly k: "emp";
29
+ } | {
30
+ readonly k: "eps";
31
+ } | {
32
+ readonly k: "chr";
33
+ readonly set: CharSet;
34
+ } | {
35
+ readonly k: "seq";
36
+ readonly a: Re;
37
+ readonly b: Re;
38
+ } | {
39
+ readonly k: "alt";
40
+ readonly a: Re;
41
+ readonly b: Re;
42
+ } | {
43
+ readonly k: "star";
44
+ readonly r: Re;
45
+ };
46
+ export declare const EMP: Re;
47
+ export declare const EPS: Re;
48
+ export declare function chr(set: CharSet): Re;
49
+ /** Concatenation, simplified: `∅·_ = _·∅ = ∅`, `ε·b = b`, `a·ε = a`. */
50
+ export declare function seq(a: Re, b: Re): Re;
51
+ /** Union, normalized modulo ACI: flatten nested alts and drop duplicate
52
+ * branches, preserving first-occurrence order. The derivative-state set is
53
+ * finite only modulo ACI, so this keeps `der` bounded. Order is preserved (not
54
+ * sorted) to keep greedy/backtracking semantics. */
55
+ export declare function alt(a: Re, b: Re): Re;
56
+ /** Kleene star, simplified: `∅* = ε* = ε`, `(r*)* = r*`. */
57
+ export declare function star(r: Re): Re;
58
+ /** N-ary concatenation (right-nested). */
59
+ export declare function seqAll(parts: readonly Re[]): Re;
60
+ /** N-ary union. */
61
+ export declare function altAll(branches: readonly Re[]): Re;
62
+ /** Bounded repetition `r{lo,hi}` (hi `undefined` = unbounded). */
63
+ export declare function repeat(r: Re, lo: number, hi: number | undefined): Re;
64
+ /** A canonical structural key, used both for ACI dedup in `alt` and for
65
+ * derivative-state dedup during language enumeration. */
66
+ export declare function reKey(r: Re): string;
67
+ /** Does `r` match the empty string? */
68
+ export declare function nullable(r: Re): boolean;
69
+ /** Brzozowski derivative of `r` with respect to code unit `cp`. */
70
+ export declare function der(r: Re, cp: number): Re;
71
+ /** Does `r` match exactly `s[from..to)`? */
72
+ export declare function accepts(r: Re, s: string, from?: number, to?: number): boolean;
73
+ /** Every prefix length `k ≥ 0` such that `r` matches `s[pos..pos+k)`, ascending.
74
+ * This is the backtracking lexer primitive: a leaf can accept several lengths
75
+ * (`\d+` over "123" accepts 1, 2, 3) and the value parser tries them
76
+ * greedily (longest first) with proper fallback. */
77
+ export declare function matchLengths(r: Re, s: string, pos: number): number[];
78
+ /** Characters that can begin a word in `L(r)`. */
79
+ export declare function firstSet(r: Re): CharSet;
80
+ /** Characters that can extend an already-complete match of `r` (the union of
81
+ * the first-sets of every reachable accepting derivative state). Finite and
82
+ * terminating because the derivative-state set is finite modulo ACI. */
83
+ export declare function followLast(r: Re): CharSet;
84
+ /** Representative code units that exercise every char-set boundary in `r`
85
+ * (each range's low/high endpoint). Enough to drive structural exploration
86
+ * without iterating the whole alphabet. */
87
+ export declare function alphabetOf(r: Re, out?: Set<number>): Set<number>;
88
+ /** Enumerate strings in `L(r)` over `alphabet`, shortest-first, up to `maxLen`
89
+ * and `cap` results. Used by the ambiguity oracle to find minimal
90
+ * counterexamples; bounded so it always terminates. */
91
+ export declare function language(r: Re, alphabet: readonly number[], maxLen: number, cap: number): Generator<string>;
@@ -0,0 +1,373 @@
1
+ // Matching is over UTF-16 code units (JS regex without the `/u` flag): `.` is
2
+ // one code unit, an astral char is two. Keeps the whole stack in one index space.
3
+ const UNIT_MAX = 0xffff;
4
+ /** A set of UTF-16 code units as sorted, merged, inclusive ranges. Negation is
5
+ * resolved at construction, so a set is always a positive union of ranges. */
6
+ export class CharSet {
7
+ ranges;
8
+ constructor(ranges) {
9
+ this.ranges = ranges;
10
+ }
11
+ /** Build from arbitrary (possibly overlapping/unsorted) ranges. */
12
+ static of(ranges) {
13
+ return new CharSet(normalize(ranges));
14
+ }
15
+ static char(cp) {
16
+ return new CharSet([[cp, cp]]);
17
+ }
18
+ static range(lo, hi) {
19
+ return new CharSet(lo <= hi ? [[lo, hi]] : [[hi, lo]]);
20
+ }
21
+ /** The full code-unit alphabet. */
22
+ static full() {
23
+ return new CharSet([[0, UNIT_MAX]]);
24
+ }
25
+ static empty() {
26
+ return new CharSet([]);
27
+ }
28
+ has(cp) {
29
+ for (const [lo, hi] of this.ranges) {
30
+ if (cp < lo)
31
+ return false;
32
+ if (cp <= hi)
33
+ return true;
34
+ }
35
+ return false;
36
+ }
37
+ isEmpty() {
38
+ return this.ranges.length === 0;
39
+ }
40
+ union(other) {
41
+ return CharSet.of([...this.ranges, ...other.ranges]);
42
+ }
43
+ /** Do the two sets share any code unit? (Both are sorted/normalized.) */
44
+ overlaps(other) {
45
+ let i = 0;
46
+ let j = 0;
47
+ while (i < this.ranges.length && j < other.ranges.length) {
48
+ const a = this.ranges[i];
49
+ const b = other.ranges[j];
50
+ if (a[1] < b[0])
51
+ i++;
52
+ else if (b[1] < a[0])
53
+ j++;
54
+ else
55
+ return true;
56
+ }
57
+ return false;
58
+ }
59
+ /** Complement against the full code-unit alphabet. */
60
+ complement() {
61
+ const out = [];
62
+ let at = 0;
63
+ for (const [lo, hi] of this.ranges) {
64
+ if (lo > at)
65
+ out.push([at, lo - 1]);
66
+ at = hi + 1;
67
+ }
68
+ if (at <= UNIT_MAX)
69
+ out.push([at, UNIT_MAX]);
70
+ return new CharSet(out);
71
+ }
72
+ /** Case-fold (ASCII + via `toUpperCase`/`toLowerCase`) for the `i` flag.
73
+ * Conservative: adds the upper/lower variant of every unit in range. */
74
+ ignoreCase() {
75
+ const extra = [];
76
+ for (const [lo, hi] of this.ranges) {
77
+ for (let cp = lo; cp <= hi; cp++) {
78
+ const ch = String.fromCharCode(cp);
79
+ const u = ch.toUpperCase();
80
+ const l = ch.toLowerCase();
81
+ if (u.length === 1 && u !== ch)
82
+ extra.push([u.charCodeAt(0), u.charCodeAt(0)]);
83
+ if (l.length === 1 && l !== ch)
84
+ extra.push([l.charCodeAt(0), l.charCodeAt(0)]);
85
+ if (hi - lo > 4096)
86
+ break; // don't fold gigantic ranges char-by-char
87
+ }
88
+ }
89
+ return extra.length === 0 ? this : this.union(CharSet.of(extra));
90
+ }
91
+ }
92
+ function normalize(ranges) {
93
+ const sorted = ranges
94
+ .filter(([lo, hi]) => lo <= hi)
95
+ .map(([lo, hi]) => [Math.max(0, lo), Math.min(UNIT_MAX, hi)])
96
+ .sort((a, b) => a[0] - b[0]);
97
+ const out = [];
98
+ for (const [lo, hi] of sorted) {
99
+ const last = out[out.length - 1];
100
+ if (last !== undefined && lo <= last[1] + 1) {
101
+ if (hi > last[1])
102
+ last[1] = hi;
103
+ }
104
+ else {
105
+ out.push([lo, hi]);
106
+ }
107
+ }
108
+ return out;
109
+ }
110
+ export const EMP = { k: "emp" };
111
+ export const EPS = { k: "eps" };
112
+ export function chr(set) {
113
+ return set.isEmpty() ? EMP : { k: "chr", set };
114
+ }
115
+ /** Concatenation, simplified: `∅·_ = _·∅ = ∅`, `ε·b = b`, `a·ε = a`. */
116
+ export function seq(a, b) {
117
+ if (a.k === "emp" || b.k === "emp")
118
+ return EMP;
119
+ if (a.k === "eps")
120
+ return b;
121
+ if (b.k === "eps")
122
+ return a;
123
+ return { k: "seq", a, b };
124
+ }
125
+ /** Union, normalized modulo ACI: flatten nested alts and drop duplicate
126
+ * branches, preserving first-occurrence order. The derivative-state set is
127
+ * finite only modulo ACI, so this keeps `der` bounded. Order is preserved (not
128
+ * sorted) to keep greedy/backtracking semantics. */
129
+ export function alt(a, b) {
130
+ if (a.k === "emp")
131
+ return b;
132
+ if (b.k === "emp")
133
+ return a;
134
+ const branches = [];
135
+ const seen = new Set();
136
+ const add = (r) => {
137
+ if (r.k === "alt") {
138
+ add(r.a);
139
+ add(r.b);
140
+ return;
141
+ }
142
+ if (r.k === "emp")
143
+ return;
144
+ const key = reKey(r);
145
+ if (!seen.has(key)) {
146
+ seen.add(key);
147
+ branches.push(r);
148
+ }
149
+ };
150
+ add(a);
151
+ add(b);
152
+ if (branches.length === 1)
153
+ return branches[0];
154
+ let out = branches[branches.length - 1];
155
+ for (let i = branches.length - 2; i >= 0; i--)
156
+ out = { k: "alt", a: branches[i], b: out };
157
+ return out;
158
+ }
159
+ /** Kleene star, simplified: `∅* = ε* = ε`, `(r*)* = r*`. */
160
+ export function star(r) {
161
+ if (r.k === "emp" || r.k === "eps")
162
+ return EPS;
163
+ if (r.k === "star")
164
+ return r;
165
+ return { k: "star", r };
166
+ }
167
+ /** N-ary concatenation (right-nested). */
168
+ export function seqAll(parts) {
169
+ let out = EPS;
170
+ for (let i = parts.length - 1; i >= 0; i--)
171
+ out = seq(parts[i], out);
172
+ return out;
173
+ }
174
+ /** N-ary union. */
175
+ export function altAll(branches) {
176
+ let out = EMP;
177
+ for (let i = branches.length - 1; i >= 0; i--)
178
+ out = alt(branches[i], out);
179
+ return out;
180
+ }
181
+ /** Bounded repetition `r{lo,hi}` (hi `undefined` = unbounded). */
182
+ export function repeat(r, lo, hi) {
183
+ const req = [];
184
+ for (let i = 0; i < lo; i++)
185
+ req.push(r);
186
+ if (hi === undefined)
187
+ return seq(seqAll(req), star(r));
188
+ let opt = EPS;
189
+ for (let i = lo; i < hi; i++)
190
+ opt = alt(EPS, seq(r, opt));
191
+ return seq(seqAll(req), opt);
192
+ }
193
+ /** A canonical structural key, used both for ACI dedup in `alt` and for
194
+ * derivative-state dedup during language enumeration. */
195
+ export function reKey(r) {
196
+ switch (r.k) {
197
+ case "emp":
198
+ return "0";
199
+ case "eps":
200
+ return "1";
201
+ case "chr":
202
+ return `c${r.set.ranges.map(([a, b]) => `${a}-${b}`).join(",")}`;
203
+ case "seq":
204
+ return `.(${reKey(r.a)})(${reKey(r.b)})`;
205
+ case "alt":
206
+ return `|(${reKey(r.a)})(${reKey(r.b)})`;
207
+ case "star":
208
+ return `*(${reKey(r.r)})`;
209
+ }
210
+ }
211
+ // ── derivative ─────────────────────────────────────────────────────────
212
+ /** Does `r` match the empty string? */
213
+ export function nullable(r) {
214
+ switch (r.k) {
215
+ case "emp":
216
+ case "chr":
217
+ return false;
218
+ case "eps":
219
+ case "star":
220
+ return true;
221
+ case "seq":
222
+ return nullable(r.a) && nullable(r.b);
223
+ case "alt":
224
+ return nullable(r.a) || nullable(r.b);
225
+ }
226
+ }
227
+ /** Brzozowski derivative of `r` with respect to code unit `cp`. */
228
+ export function der(r, cp) {
229
+ switch (r.k) {
230
+ case "emp":
231
+ case "eps":
232
+ return EMP;
233
+ case "chr":
234
+ return r.set.has(cp) ? EPS : EMP;
235
+ case "seq": {
236
+ const d = seq(der(r.a, cp), r.b);
237
+ return nullable(r.a) ? alt(d, der(r.b, cp)) : d;
238
+ }
239
+ case "alt":
240
+ return alt(der(r.a, cp), der(r.b, cp));
241
+ case "star":
242
+ return seq(der(r.r, cp), r);
243
+ }
244
+ }
245
+ // ── recognition services ───────────────────────────────────────────────
246
+ /** Does `r` match exactly `s[from..to)`? */
247
+ export function accepts(r, s, from = 0, to = s.length) {
248
+ let cur = r;
249
+ for (let i = from; i < to; i++) {
250
+ cur = der(cur, s.charCodeAt(i));
251
+ if (cur.k === "emp")
252
+ return false;
253
+ }
254
+ return nullable(cur);
255
+ }
256
+ /** Every prefix length `k ≥ 0` such that `r` matches `s[pos..pos+k)`, ascending.
257
+ * This is the backtracking lexer primitive: a leaf can accept several lengths
258
+ * (`\d+` over "123" accepts 1, 2, 3) and the value parser tries them
259
+ * greedily (longest first) with proper fallback. */
260
+ export function matchLengths(r, s, pos) {
261
+ const out = [];
262
+ let cur = r;
263
+ if (nullable(cur))
264
+ out.push(0);
265
+ for (let i = pos; i < s.length; i++) {
266
+ cur = der(cur, s.charCodeAt(i));
267
+ if (cur.k === "emp")
268
+ break;
269
+ if (nullable(cur))
270
+ out.push(i - pos + 1);
271
+ }
272
+ return out;
273
+ }
274
+ // ── determinism analysis (first / followLast) ───────────────────────────
275
+ // `firstSet` is the begin-set, `followLast` the continue-after-a-complete-match
276
+ // set. A grammar is deterministic when, at every split, the left's continue-set
277
+ // is disjoint from the right's begin-set.
278
+ /** Characters that can begin a word in `L(r)`. */
279
+ export function firstSet(r) {
280
+ switch (r.k) {
281
+ case "emp":
282
+ case "eps":
283
+ return CharSet.empty();
284
+ case "chr":
285
+ return r.set;
286
+ case "seq":
287
+ return nullable(r.a) ? firstSet(r.a).union(firstSet(r.b)) : firstSet(r.a);
288
+ case "alt":
289
+ return firstSet(r.a).union(firstSet(r.b));
290
+ case "star":
291
+ return firstSet(r.r);
292
+ }
293
+ }
294
+ /** Characters that can extend an already-complete match of `r` (the union of
295
+ * the first-sets of every reachable accepting derivative state). Finite and
296
+ * terminating because the derivative-state set is finite modulo ACI. */
297
+ export function followLast(r) {
298
+ const reps = [...alphabetOf(r)];
299
+ let acc = CharSet.empty();
300
+ const seen = new Set();
301
+ const stack = [r];
302
+ while (stack.length > 0) {
303
+ const st = stack.pop();
304
+ const key = reKey(st);
305
+ if (seen.has(key))
306
+ continue;
307
+ seen.add(key);
308
+ if (nullable(st))
309
+ acc = acc.union(firstSet(st));
310
+ for (const c of reps) {
311
+ const d = der(st, c);
312
+ if (d.k !== "emp")
313
+ stack.push(d);
314
+ }
315
+ }
316
+ return acc;
317
+ }
318
+ // ── language enumeration (for the ambiguity oracle) ─────────────────────
319
+ /** Representative code units that exercise every char-set boundary in `r`
320
+ * (each range's low/high endpoint). Enough to drive structural exploration
321
+ * without iterating the whole alphabet. */
322
+ export function alphabetOf(r, out = new Set()) {
323
+ switch (r.k) {
324
+ case "emp":
325
+ case "eps":
326
+ return out;
327
+ case "chr":
328
+ for (const [lo, hi] of r.set.ranges) {
329
+ out.add(lo);
330
+ if (hi !== lo)
331
+ out.add(hi);
332
+ }
333
+ return out;
334
+ case "seq":
335
+ case "alt":
336
+ alphabetOf(r.a, out);
337
+ alphabetOf(r.b, out);
338
+ return out;
339
+ case "star":
340
+ return alphabetOf(r.r, out);
341
+ }
342
+ }
343
+ /** Enumerate strings in `L(r)` over `alphabet`, shortest-first, up to `maxLen`
344
+ * and `cap` results. Used by the ambiguity oracle to find minimal
345
+ * counterexamples; bounded so it always terminates. */
346
+ export function* language(r, alphabet, maxLen, cap) {
347
+ let count = 0;
348
+ const queue = [["", r]];
349
+ const seen = new Set([`0:${reKey(r)}`]);
350
+ let head = 0;
351
+ while (head < queue.length) {
352
+ const [s, cur] = queue[head++];
353
+ if (nullable(cur)) {
354
+ yield s;
355
+ if (++count >= cap)
356
+ return;
357
+ }
358
+ if (s.length >= maxLen)
359
+ continue;
360
+ for (const c of alphabet) {
361
+ const d = der(cur, c);
362
+ if (d.k === "emp")
363
+ continue;
364
+ const key = `${s.length + 1}:${reKey(d)}`;
365
+ if (seen.has(key))
366
+ continue;
367
+ seen.add(key);
368
+ queue.push([s + String.fromCharCode(c), d]);
369
+ }
370
+ if (queue.length > 50000)
371
+ return; // hard cap on exploration
372
+ }
373
+ }
@@ -0,0 +1,42 @@
1
+ import type { Node, RegVal, Span } from "../reg.js";
2
+ import { CharSet } from "./engine.js";
3
+ interface EvT {
4
+ readonly k: number;
5
+ readonly id: number;
6
+ readonly i: number;
7
+ }
8
+ type Char = {
9
+ op: 0;
10
+ set: CharSet;
11
+ };
12
+ type Split = {
13
+ op: 1;
14
+ x: number;
15
+ y: number;
16
+ };
17
+ type Jmp = {
18
+ op: 2;
19
+ x: number;
20
+ };
21
+ type Mark = {
22
+ op: 3;
23
+ ev: EvT;
24
+ };
25
+ type MatchI = {
26
+ op: 4;
27
+ };
28
+ type Instr = Char | Split | Jmp | Mark | MatchI;
29
+ export interface Program {
30
+ readonly code: readonly Instr[];
31
+ readonly idOf: WeakMap<Node, number>;
32
+ }
33
+ /** Compile a grammar AST to a tagged Thompson program. */
34
+ export declare function compileProgram(root: Node): Program;
35
+ /** Parse `s` fully; `null` if it doesn't match. Builds the value tree (and, if
36
+ * given, the named-capture span map) from the winning marker log. */
37
+ export declare function parseValue(root: Node, prog: Program, s: string, spans?: Map<string, Span>): {
38
+ val: RegVal;
39
+ } | null;
40
+ /** Does the program match the whole of `s`? Recognition only (no allocation). */
41
+ export declare function recognize(prog: Program, s: string): boolean;
42
+ export {};