@turing-machine-js/machine 7.0.0-alpha.3 → 7.0.0-alpha.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -173,6 +173,153 @@ class Reference {
173
173
  }
174
174
  _Reference_referenceBinding = new WeakMap();
175
175
 
176
+ const movementDescriptionToLabel = {
177
+ 'move caret left command': 'L',
178
+ 'move caret right command': 'R',
179
+ 'do not move carer': 'S',
180
+ };
181
+ const symbolCommandDescriptionToLabel = {
182
+ 'keep symbol command': 'K',
183
+ 'erase symbol command': 'E',
184
+ };
185
+ // Reserved characters in the encoded pattern string:
186
+ // '*' ASCII asterisk (U+002A) — per-cell ifOtherSymbol, matches any symbol
187
+ // on that tape. ASCII (not a fancier glyph like U+1F7B0) so it renders
188
+ // in every Mermaid environment and every monospace font. A literal `*`
189
+ // in the alphabet is unambiguous from the marker because it's quoted
190
+ // (`'*'`).
191
+ // 'B' the tape's blank symbol shorthand (in read patterns). A literal `B`
192
+ // in the alphabet is unambiguous from the marker because it's quoted
193
+ // (`'B'`).
194
+ // ',' separates per-tape cells inside one pattern
195
+ // '|' separates alternative patterns
196
+ // "'" surrounds a literal alphabet symbol — e.g. `'0'` for literal `0`,
197
+ // `'X'` for literal `X`. The quoting is what visually separates literal
198
+ // symbols from the convention markers `*` / `B` and from the write
199
+ // commands `K` / `E`.
200
+ // '\\' escape prefix — to represent any of '*', 'B', ',', '|', "'", or '\\'
201
+ // as a *literal* alphabet symbol *inside* the quotes (e.g. `'\''` for
202
+ // a literal apostrophe).
203
+ const IF_OTHER_MARKER = '*';
204
+ const BLANK_MARKER = 'B';
205
+ function escapeAlphabetSymbol(s) {
206
+ return s
207
+ .replace(/\\/g, '\\\\')
208
+ .replace(/'/g, "\\'");
209
+ }
210
+ function decodePatternDescription(description, alphabets) {
211
+ if (!description) {
212
+ return '?';
213
+ }
214
+ if (description === 'other symbol') {
215
+ return IF_OTHER_MARKER;
216
+ }
217
+ try {
218
+ const patternList = JSON.parse(description);
219
+ return patternList
220
+ .map((pattern) => pattern
221
+ .map((s, tapeIx) => {
222
+ if (s === null) {
223
+ return IF_OTHER_MARKER;
224
+ }
225
+ if (s === alphabets[tapeIx]?.[0]) {
226
+ return BLANK_MARKER;
227
+ }
228
+ return `'${escapeAlphabetSymbol(s)}'`;
229
+ })
230
+ .join(','))
231
+ .join('|');
232
+ }
233
+ catch {
234
+ return description;
235
+ }
236
+ }
237
+ function decodeMovement(description) {
238
+ if (!description) {
239
+ return '?';
240
+ }
241
+ return movementDescriptionToLabel[description] ?? description;
242
+ }
243
+ function splitUnescaped(s, sep) {
244
+ const parts = [];
245
+ let current = '';
246
+ let i = 0;
247
+ while (i < s.length) {
248
+ if (s[i] === '\\' && i + 1 < s.length) {
249
+ current += s[i + 1];
250
+ i += 2;
251
+ }
252
+ else if (s[i] === sep) {
253
+ parts.push(current);
254
+ current = '';
255
+ i += 1;
256
+ }
257
+ else {
258
+ current += s[i];
259
+ i += 1;
260
+ }
261
+ }
262
+ parts.push(current);
263
+ return parts;
264
+ }
265
+ function parsePatternString(s, alphabets) {
266
+ if (s === IF_OTHER_MARKER) {
267
+ return null;
268
+ }
269
+ const alternatives = splitUnescaped(s, '|');
270
+ return alternatives.map((alt) => {
271
+ const cells = splitUnescaped(alt, ',');
272
+ return cells.map((cell, tapeIx) => {
273
+ if (cell === IF_OTHER_MARKER) {
274
+ return null;
275
+ }
276
+ if (cell === BLANK_MARKER) {
277
+ return alphabets[tapeIx]?.[0] ?? cell;
278
+ }
279
+ // Literal alphabet symbols are wrapped in single quotes by
280
+ // `decodePatternDescription` — strip them on the way back.
281
+ if (cell.length >= 2 && cell.startsWith("'") && cell.endsWith("'")) {
282
+ return cell.slice(1, -1);
283
+ }
284
+ return cell;
285
+ });
286
+ });
287
+ }
288
+ const movementLabelToSymbol = {
289
+ L: movements.left,
290
+ R: movements.right,
291
+ S: movements.stay,
292
+ };
293
+ function parseMovementLabel(label) {
294
+ const m = movementLabelToSymbol[label];
295
+ if (!m) {
296
+ throw new Error(`unknown movement label: ${label}`);
297
+ }
298
+ return m;
299
+ }
300
+ function parseWriteSymbolLabel(label) {
301
+ if (label === 'K') {
302
+ return symbolCommands.keep;
303
+ }
304
+ if (label === 'E') {
305
+ return symbolCommands.erase;
306
+ }
307
+ // Literal alphabet symbols are wrapped in single quotes by
308
+ // `decodeWriteSymbol` — strip them on the way back.
309
+ if (label.length >= 2 && label.startsWith("'") && label.endsWith("'")) {
310
+ return label.slice(1, -1);
311
+ }
312
+ return label;
313
+ }
314
+ function decodeWriteSymbol(symbol) {
315
+ if (typeof symbol === 'symbol') {
316
+ const description = symbol.description ?? '?';
317
+ return symbolCommandDescriptionToLabel[description] ?? description;
318
+ }
319
+ return `'${symbol}'`;
320
+ }
321
+ // Format converters (toMermaid / fromMermaid) live in ./graphFormats.
322
+
176
323
  var __classPrivateFieldSet$4 = (undefined && undefined.__classPrivateFieldSet) || function (receiver, state, value, kind, f) {
177
324
  if (kind === "m") throw new TypeError("Private method is not writable");
178
325
  if (kind === "a" && !f) throw new TypeError("Private accessor was defined without a setter");
@@ -529,152 +676,512 @@ _TapeBlock_generateSymbolHint = { value: (patternList) => JSON.stringify(pattern
529
676
  .map((pattern) => pattern
530
677
  .map((symbol) => (symbol === ifOtherSymbol ? null : symbol)))) };
531
678
 
532
- const movementDescriptionToLabel = {
533
- 'move caret left command': 'L',
534
- 'move caret right command': 'R',
535
- 'do not move carer': 'S',
536
- };
537
- const symbolCommandDescriptionToLabel = {
538
- 'keep symbol command': 'K',
539
- 'erase symbol command': 'E',
540
- };
541
- // Reserved characters in the encoded pattern string:
542
- // '*' ASCII asterisk (U+002A) — per-cell ifOtherSymbol, matches any symbol
543
- // on that tape. ASCII (not a fancier glyph like U+1F7B0) so it renders
544
- // in every Mermaid environment and every monospace font. A literal `*`
545
- // in the alphabet is unambiguous from the marker because it's quoted
546
- // (`'*'`).
547
- // 'B' the tape's blank symbol shorthand (in read patterns). A literal `B`
548
- // in the alphabet is unambiguous from the marker because it's quoted
549
- // (`'B'`).
550
- // ',' separates per-tape cells inside one pattern
551
- // '|' separates alternative patterns
552
- // "'" surrounds a literal alphabet symbol e.g. `'0'` for literal `0`,
553
- // `'X'` for literal `X`. The quoting is what visually separates literal
554
- // symbols from the convention markers `*` / `B` and from the write
555
- // commands `K` / `E`.
556
- // '\\' escape prefix — to represent any of '*', 'B', ',', '|', "'", or '\\'
557
- // as a *literal* alphabet symbol *inside* the quotes (e.g. `'\''` for
558
- // a literal apostrophe).
559
- const IF_OTHER_MARKER = '*';
560
- const BLANK_MARKER = 'B';
561
- function escapeAlphabetSymbol(s) {
562
- return s
563
- .replace(/\\/g, '\\\\')
564
- .replace(/'/g, "\\'");
565
- }
566
- function decodePatternDescription(description, alphabets) {
567
- if (!description) {
568
- return '?';
569
- }
570
- if (description === 'other symbol') {
571
- return IF_OTHER_MARKER;
572
- }
573
- try {
574
- const patternList = JSON.parse(description);
575
- return patternList
576
- .map((pattern) => pattern
577
- .map((s, tapeIx) => {
578
- if (s === null) {
579
- return IF_OTHER_MARKER;
679
+ // Graph serialization/reconstruction for State graphs. Extracted from
680
+ // `classes/State.ts` (#180) so the State class stays focused on the runtime
681
+ // machinery (transitions, debug, halt-stack composition). Sibling-module
682
+ // private access to State's internals goes through the `STATE_INTERNAL`
683
+ // Symbol re-exported from State.ts — see the @internal JSDoc there.
684
+ //
685
+ // Public surface is preserved: `State.toGraph` and `State.fromGraph` static
686
+ // methods continue to exist as thin delegates to the functions in this
687
+ // module. New consumers (e.g. #195's planned `collectStates`) will live
688
+ // here too and share the BFS-walk shape with `toGraph`.
689
+ /**
690
+ * Walks the reachable graph from `initialState` and returns a serializable
691
+ * `Graph`. The walk is a BFS that visits each State exactly once (keyed by
692
+ * the State's internal id) and emits one `GraphNode` per State plus
693
+ * synthetic halt-marker nodes per callable-subtree frame.
694
+ *
695
+ * Round-trips losslessly with `fromGraph` in the sense that running the
696
+ * rebuilt machine on the same input produces the same output — but State
697
+ * instance identities are NOT preserved across the cycle.
698
+ *
699
+ * See `classes/State.ts` for the runtime model these graph nodes describe;
700
+ * see `utilities/graphFormats.ts` for the Mermaid-flavored serialization
701
+ * built on top of `Graph`.
702
+ */
703
+ function toGraph(initialState, tapeBlock) {
704
+ const nodes = {};
705
+ const alphabets = tapeBlock.alphabets.map((alphabet) => alphabet.symbols);
706
+ // Pass 1: BFS-discover all reachable States; emit one GraphNode per State
707
+ // (wrapper or bare/regular). Wrappers and bares are separate nodes.
708
+ const visited = new Set();
709
+ const queue = [initialState];
710
+ const bareIds = new Set(); // ids referenced as a wrapper's bareStateId
711
+ while (queue.length > 0) {
712
+ const state = queue.shift();
713
+ const stateInternal = state[STATE_INTERNAL]();
714
+ if (visited.has(stateInternal.id)) {
715
+ continue;
716
+ }
717
+ visited.add(stateInternal.id);
718
+ if (state.isHalt) {
719
+ if (!(0 in nodes)) {
720
+ nodes[0] = {
721
+ id: 0,
722
+ name: stateInternal.name,
723
+ isHalt: true,
724
+ isHaltMarker: false,
725
+ isWrapper: false,
726
+ bareStateId: null,
727
+ frameId: null,
728
+ transitions: [],
729
+ overriddenHaltStateId: null,
730
+ tags: [...stateInternal.tags],
731
+ };
580
732
  }
581
- if (s === alphabets[tapeIx]?.[0]) {
582
- return BLANK_MARKER;
733
+ continue;
734
+ }
735
+ // Wrapper? Emit wrapper node + queue bare and override target.
736
+ if (stateInternal.overriddenHaltState !== null && stateInternal.bareState !== null) {
737
+ const bareState = stateInternal.bareState;
738
+ const overrideTarget = stateInternal.overriddenHaltState;
739
+ const bareInternal = bareState[STATE_INTERNAL]();
740
+ const overrideInternal = overrideTarget[STATE_INTERNAL]();
741
+ nodes[stateInternal.id] = {
742
+ id: stateInternal.id,
743
+ name: stateInternal.name, // composite name like "A(target)"
744
+ isHalt: false,
745
+ isHaltMarker: false,
746
+ isWrapper: true,
747
+ bareStateId: bareInternal.id,
748
+ frameId: null,
749
+ transitions: [],
750
+ overriddenHaltStateId: overrideInternal.id,
751
+ tags: [...stateInternal.tags],
752
+ };
753
+ bareIds.add(bareInternal.id);
754
+ queue.push(bareState);
755
+ queue.push(overrideTarget);
756
+ continue;
757
+ }
758
+ // Regular (or bare) state — build node with transitions.
759
+ const node = {
760
+ id: stateInternal.id,
761
+ name: stateInternal.name,
762
+ isHalt: false,
763
+ isHaltMarker: false,
764
+ isWrapper: false,
765
+ bareStateId: null,
766
+ frameId: null,
767
+ transitions: [],
768
+ overriddenHaltStateId: null,
769
+ tags: [...stateInternal.tags],
770
+ };
771
+ nodes[stateInternal.id] = node;
772
+ let patternIx = 0;
773
+ for (const [sym, { command, nextState }] of stateInternal.symbolToDataMap) {
774
+ let target;
775
+ try {
776
+ target = nextState instanceof State ? nextState : nextState.ref;
583
777
  }
584
- return `'${escapeAlphabetSymbol(s)}'`;
585
- })
586
- .join(','))
587
- .join('|');
778
+ catch {
779
+ patternIx += 1;
780
+ continue;
781
+ }
782
+ const targetInternal = target[STATE_INTERNAL]();
783
+ node.transitions.push({
784
+ pattern: decodePatternDescription(sym.description, alphabets),
785
+ command: command.tapesCommands.map((tc) => ({
786
+ symbol: decodeWriteSymbol(tc.symbol),
787
+ movement: decodeMovement(tc.movement.description),
788
+ })),
789
+ nextStateId: targetInternal.id,
790
+ id: `${stateInternal.id}-${patternIx}`,
791
+ });
792
+ queue.push(target);
793
+ patternIx += 1;
794
+ }
795
+ }
796
+ // Always emit real halt as a sentinel, even if no transition targets it.
797
+ // It anchors the `subtree -. halt .-> s0` frame-level arrow whenever a
798
+ // frame demand-emits one, and it's the canonical machine-halt singleton.
799
+ if (!(0 in nodes)) {
800
+ nodes[0] = {
801
+ id: 0,
802
+ name: 'halt',
803
+ isHalt: true,
804
+ isHaltMarker: false,
805
+ isWrapper: false,
806
+ bareStateId: null,
807
+ frameId: null,
808
+ transitions: [],
809
+ overriddenHaltStateId: null,
810
+ tags: [...haltState[STATE_INTERNAL]().tags],
811
+ };
588
812
  }
589
- catch {
590
- return description;
813
+ // Pass 2: For each bare, compute its forward-reachable set (following
814
+ // transitions; stopping at halt and at wrappers — both are frame
815
+ // boundaries).
816
+ const computeReach = (startId) => {
817
+ const reach = new Set();
818
+ const stack = [startId];
819
+ while (stack.length > 0) {
820
+ const id = stack.pop();
821
+ if (reach.has(id)) {
822
+ continue;
823
+ }
824
+ const node = nodes[id];
825
+ // `nodes[id]` is always populated for `id` that the BFS reached, so
826
+ // a defensive `!node` check would be dead. `isHalt` / `isWrapper`
827
+ // are real boundaries — both stop reach-set expansion.
828
+ if (node.isHalt || node.isWrapper) {
829
+ continue;
830
+ }
831
+ reach.add(id);
832
+ for (const t of node.transitions) {
833
+ const target = nodes[t.nextStateId];
834
+ if (!target || target.isHalt || target.isWrapper) {
835
+ continue;
836
+ }
837
+ stack.push(t.nextStateId);
838
+ }
839
+ }
840
+ return reach;
841
+ };
842
+ const reachByBare = new Map();
843
+ for (const bareId of bareIds) {
844
+ reachByBare.set(bareId, computeReach(bareId));
845
+ }
846
+ // Pass 3: Union-find on bare overlaps. Two bares merge if their reach
847
+ // sets share any state. Canonical representative = smallest bare-id in
848
+ // the component.
849
+ const ufParent = new Map();
850
+ // Note: no path compression. The union policy below ("smaller id always
851
+ // becomes root") keeps the tree flat — every union targets bares[0] as
852
+ // the root, so any node's parent IS the root. Walking up never exceeds
853
+ // one step. Path compression would be dead code under this invariant.
854
+ const ufFind = (id) => {
855
+ if (!ufParent.has(id)) {
856
+ ufParent.set(id, id);
857
+ }
858
+ let root = id;
859
+ while (ufParent.get(root) !== root) {
860
+ root = ufParent.get(root);
861
+ }
862
+ return root;
863
+ };
864
+ const ufUnion = (a, b) => {
865
+ const ra = ufFind(a);
866
+ const rb = ufFind(b);
867
+ if (ra === rb)
868
+ return;
869
+ if (ra < rb) {
870
+ ufParent.set(rb, ra);
871
+ }
872
+ else {
873
+ ufParent.set(ra, rb);
874
+ }
875
+ };
876
+ for (const bareId of bareIds) {
877
+ ufFind(bareId);
878
+ }
879
+ // For each state, collect the bares that reach it; union all bares that
880
+ // share a state.
881
+ const stateToReachingBares = new Map();
882
+ for (const [bareId, reachSet] of reachByBare) {
883
+ for (const stateId of reachSet) {
884
+ let bares = stateToReachingBares.get(stateId);
885
+ if (!bares) {
886
+ bares = [];
887
+ stateToReachingBares.set(stateId, bares);
888
+ }
889
+ bares.push(bareId);
890
+ }
891
+ }
892
+ for (const bares of stateToReachingBares.values()) {
893
+ for (let i = 1; i < bares.length; i += 1) {
894
+ ufUnion(bares[0], bares[i]);
895
+ }
896
+ }
897
+ // Assign frameId to each in-reach state.
898
+ const frameIds = new Set();
899
+ for (const [stateId, bares] of stateToReachingBares) {
900
+ const frameId = ufFind(bares[0]);
901
+ nodes[stateId].frameId = frameId;
902
+ frameIds.add(frameId);
903
+ }
904
+ // Pass 4: Retarget halt-bound transitions for in-frame states to the
905
+ // frame's halt marker. Out-of-frame states (top-level dispatcher, override
906
+ // targets, etc.) keep their halt-bound transitions pointing at real halt.
907
+ for (const node of Object.values(nodes)) {
908
+ if (node.frameId === null) {
909
+ continue;
910
+ }
911
+ const haltMarkerId = -node.frameId;
912
+ for (const t of node.transitions) {
913
+ const target = nodes[t.nextStateId];
914
+ if (target && target.isHalt && !target.isHaltMarker) {
915
+ t.nextStateId = haltMarkerId;
916
+ }
917
+ }
591
918
  }
592
- }
593
- function decodeMovement(description) {
594
- if (!description) {
595
- return '?';
919
+ // Pass 5: Emit one halt marker per frame.
920
+ for (const frameId of frameIds) {
921
+ const haltMarkerId = -frameId;
922
+ nodes[haltMarkerId] = {
923
+ id: haltMarkerId,
924
+ name: 'halt',
925
+ isHalt: true,
926
+ isHaltMarker: true,
927
+ isWrapper: false,
928
+ bareStateId: null,
929
+ frameId,
930
+ transitions: [],
931
+ overriddenHaltStateId: null,
932
+ tags: [],
933
+ };
596
934
  }
597
- return movementDescriptionToLabel[description] ?? description;
935
+ return { initialId: initialState[STATE_INTERNAL]().id, alphabets, nodes };
598
936
  }
599
- function splitUnescaped(s, sep) {
600
- const parts = [];
601
- let current = '';
602
- let i = 0;
603
- while (i < s.length) {
604
- if (s[i] === '\\' && i + 1 < s.length) {
605
- current += s[i + 1];
606
- i += 2;
937
+ /**
938
+ * Inverse of `toGraph`: rebuilds a State graph (and a fresh TapeBlock with
939
+ * the graph's alphabets) from a serialized Graph. Round-trips with `toGraph`
940
+ * in the sense that running the rebuilt machine on the same input gives the
941
+ * same output, but the rebuilt State instances have *new* internal IDs.
942
+ *
943
+ * Under the v7 callable-subtree model (#174), graph nodes split into:
944
+ * - Wrapper nodes (`isWrapper: true`, no transitions) — reconstructed via
945
+ * `bareStates[bareStateId].withOverriddenHaltState(finalStates[overriddenHaltStateId])`.
946
+ * - Bare/regular nodes — constructed as normal States with transitions.
947
+ * - Halt + halt-marker nodes — collapse to the singleton `haltState`.
948
+ */
949
+ function fromGraph(graph) {
950
+ const alphabetObjs = graph.alphabets.map((syms) => new Alphabet(syms));
951
+ const tapeBlock = TapeBlock.fromAlphabets(alphabetObjs);
952
+ const ids = Object.keys(graph.nodes).map(Number);
953
+ // Pass 1: pre-create a Reference for each non-halt non-halt-marker node
954
+ // (both wrappers and regulars). Halt and halt-marker nodes collapse to the
955
+ // singleton `haltState` and need no ref.
956
+ const refs = {};
957
+ for (const nodeId of ids) {
958
+ const node = graph.nodes[nodeId];
959
+ if (!node.isHalt) {
960
+ refs[nodeId] = new Reference();
961
+ }
962
+ }
963
+ // Convert a parsed pattern back to the symbol key the State expects.
964
+ const patternToKey = (parsed) => {
965
+ if (parsed === null) {
966
+ return ifOtherSymbol;
967
+ }
968
+ const flat = [];
969
+ for (const row of parsed) {
970
+ for (const cell of row) {
971
+ flat.push(cell === null ? ifOtherSymbol : cell);
972
+ }
973
+ }
974
+ return tapeBlock.symbol(flat);
975
+ };
976
+ // Pass 2: build a State for each non-wrapper non-halt non-halt-marker
977
+ // node. Transitions point at refs so cycles work; haltState (and halt
978
+ // markers, which collapse to haltState) are used directly.
979
+ const bareStates = {};
980
+ for (const nodeId of ids) {
981
+ const node = graph.nodes[nodeId];
982
+ if (node.isHalt || node.isWrapper) {
983
+ continue;
607
984
  }
608
- else if (s[i] === sep) {
609
- parts.push(current);
610
- current = '';
611
- i += 1;
985
+ const stateDefinition = {};
986
+ for (const t of node.transitions) {
987
+ const key = patternToKey(parsePatternString(t.pattern, graph.alphabets));
988
+ const target = graph.nodes[t.nextStateId];
989
+ const nextState = !target || target.isHalt
990
+ ? haltState
991
+ : refs[t.nextStateId];
992
+ stateDefinition[key] = {
993
+ command: t.command.map((c) => ({
994
+ symbol: parseWriteSymbolLabel(c.symbol),
995
+ movement: parseMovementLabel(c.movement),
996
+ })),
997
+ nextState,
998
+ };
999
+ }
1000
+ // Graph-sourced names may contain `(` and `)` (composite wrapper names —
1001
+ // although wrappers go through a separate path below, defensive
1002
+ // construction here keeps the bypass uniform). Construct without a name
1003
+ // and assign `name` directly through the internal accessor's setter to
1004
+ // skip the constructor's user-facing name validation.
1005
+ const bare = new State(stateDefinition);
1006
+ bare[STATE_INTERNAL]().name = node.name;
1007
+ if (node.tags.length > 0) {
1008
+ bare.tag(...node.tags);
1009
+ }
1010
+ bareStates[nodeId] = bare;
1011
+ }
1012
+ // Pass 3: resolve every node to its final State (memoized + cycle-safe).
1013
+ // Wrappers compose lazily via `withOverriddenHaltState` once their bare
1014
+ // and override are resolved.
1015
+ const finalStates = {};
1016
+ const inProgress = new Set();
1017
+ const getFinal = (nodeId) => {
1018
+ if (finalStates[nodeId]) {
1019
+ return finalStates[nodeId];
1020
+ }
1021
+ const node = graph.nodes[nodeId];
1022
+ if (!node || node.isHalt) {
1023
+ finalStates[nodeId] = haltState;
1024
+ return haltState;
1025
+ }
1026
+ if (inProgress.has(nodeId)) {
1027
+ throw new Error(`override-halt cycle at state #${nodeId}`);
1028
+ }
1029
+ inProgress.add(nodeId);
1030
+ let state;
1031
+ if (node.isWrapper) {
1032
+ const bare = getFinal(node.bareStateId);
1033
+ const override = getFinal(node.overriddenHaltStateId);
1034
+ state = bare.withOverriddenHaltState(override);
1035
+ // Apply wrapper-scoped tags (#186). Tags don't leak across wrappers
1036
+ // sharing a bare — the wrapper instance owns its own tag set, and
1037
+ // engine #175 memoization returns the same instance for the same
1038
+ // (bare, override) pair, so this is idempotent across rebuilds.
1039
+ if (node.tags.length > 0) {
1040
+ state.tag(...node.tags);
1041
+ }
612
1042
  }
613
1043
  else {
614
- current += s[i];
615
- i += 1;
1044
+ state = bareStates[nodeId];
616
1045
  }
1046
+ inProgress.delete(nodeId);
1047
+ finalStates[nodeId] = state;
1048
+ return state;
1049
+ };
1050
+ for (const nodeId of ids) {
1051
+ getFinal(nodeId);
617
1052
  }
618
- parts.push(current);
619
- return parts;
620
- }
621
- function parsePatternString(s, alphabets) {
622
- if (s === IF_OTHER_MARKER) {
623
- return null;
1053
+ // Pass 4: bind each ref to the resolved final State so cross-node
1054
+ // transitions land on the right instance.
1055
+ for (const nodeId of ids) {
1056
+ if (!graph.nodes[nodeId].isHalt) {
1057
+ refs[nodeId].bind(finalStates[nodeId]);
1058
+ }
624
1059
  }
625
- const alternatives = splitUnescaped(s, '|');
626
- return alternatives.map((alt) => {
627
- const cells = splitUnescaped(alt, ',');
628
- return cells.map((cell, tapeIx) => {
629
- if (cell === IF_OTHER_MARKER) {
630
- return null;
631
- }
632
- if (cell === BLANK_MARKER) {
633
- return alphabets[tapeIx]?.[0] ?? cell;
1060
+ return {
1061
+ start: finalStates[graph.initialId],
1062
+ tapeBlock,
1063
+ states: finalStates,
1064
+ };
1065
+ }
1066
+ /**
1067
+ * Returns a `Map<number, {state, transitionSymbols}>` keyed by engine
1068
+ * `GraphNode.id`, giving downstream tooling direct access to the `State`
1069
+ * instance + per-pattern Symbol references for breakpoint setup (#195).
1070
+ *
1071
+ * **Positional alignment contract.** For any `GraphTransition` whose id
1072
+ * is `${N}-${K}`, `result.get(N)!.transitionSymbols[K]` is the Symbol
1073
+ * the transition fires on (reference equality, not structural). The K-th
1074
+ * entry is the K-th key from the source State's `#symbolToDataMap` in
1075
+ * insertion order, including `ifOtherSymbol` when the user wrote one.
1076
+ * Consumers filtering the catch-all path identity-compare against the
1077
+ * engine-exported `ifOtherSymbol`.
1078
+ *
1079
+ * **Unbound-`Reference` slots.** `toGraph` increments `patternIx` even
1080
+ * when a transition's `nextState` is an unresolved `Reference` (it
1081
+ * `continue`s without pushing the GraphTransition). In that case
1082
+ * `transitionSymbols[K]` is still set to the K-th Map key, but no
1083
+ * `Graph.nodes[N].transitions` entry exists with id `${N}-${K}`. Sparse
1084
+ * on the Graph side, dense on the `transitionSymbols` side — same
1085
+ * indexing.
1086
+ *
1087
+ * **Coverage.** Map keys are the State-backed subset of `graph.nodes`:
1088
+ * regulars + bares + wrappers + the halt singleton (id `0`). Synthetic
1089
+ * halt markers (id `-frameId`) are excluded — they all reach the same
1090
+ * `haltState` object at runtime, and the named consumer
1091
+ * ([machines-demo#37](https://github.com/mellonis/machines-demo/issues/37))
1092
+ * surfaces halt-pause via a separate UI control, not via clicks on
1093
+ * halt glyphs. If a future consumer needs uniform-by-id lookup, the
1094
+ * helper can be extended additively.
1095
+ *
1096
+ * **Halt-singleton warning.** `result.get(0)!.state === haltState` — the
1097
+ * process-wide halt. Toggling `.debug` on that entry affects every
1098
+ * machine in the runtime, not just the one this map was built from.
1099
+ */
1100
+ function collectStates(initialState, tapeBlock) {
1101
+ // Anchor on toGraph's authoritative id set — it knows the canonical
1102
+ // ordering of wrapper/bare/regular emission and which nodes are
1103
+ // synthetic halt markers we have to skip. Building our own BFS would
1104
+ // duplicate that logic; reusing the Graph guarantees collectStates'
1105
+ // id keys never drift from toGraph's GraphTransition ids.
1106
+ const graph = toGraph(initialState, tapeBlock);
1107
+ // Walk the State graph to associate each State instance with its
1108
+ // engine id. The shape mirrors toGraph's Pass 1 — visit by id, branch
1109
+ // on halt / wrapper / regular — but only collects the (id → State)
1110
+ // mapping. Lighter than re-running the union-find passes; no
1111
+ // GraphNode construction.
1112
+ const stateById = new Map();
1113
+ const visited = new Set();
1114
+ const queue = [initialState];
1115
+ while (queue.length > 0) {
1116
+ const state = queue.shift();
1117
+ const internal = state[STATE_INTERNAL]();
1118
+ if (visited.has(internal.id))
1119
+ continue;
1120
+ visited.add(internal.id);
1121
+ stateById.set(internal.id, state);
1122
+ if (state.isHalt)
1123
+ continue;
1124
+ if (internal.bareState !== null && internal.overriddenHaltState !== null) {
1125
+ queue.push(internal.bareState);
1126
+ queue.push(internal.overriddenHaltState);
1127
+ continue;
1128
+ }
1129
+ for (const { nextState } of internal.symbolToDataMap.values()) {
1130
+ let target;
1131
+ try {
1132
+ target = nextState instanceof State ? nextState : nextState.ref;
634
1133
  }
635
- // Literal alphabet symbols are wrapped in single quotes by
636
- // `decodePatternDescription`strip them on the way back.
637
- if (cell.length >= 2 && cell.startsWith("'") && cell.endsWith("'")) {
638
- return cell.slice(1, -1);
1134
+ catch {
1135
+ continue; // unbound Reference skip silently, matches toGraph
639
1136
  }
640
- return cell;
641
- });
642
- });
643
- }
644
- const movementLabelToSymbol = {
645
- L: movements.left,
646
- R: movements.right,
647
- S: movements.stay,
648
- };
649
- function parseMovementLabel(label) {
650
- const m = movementLabelToSymbol[label];
651
- if (!m) {
652
- throw new Error(`unknown movement label: ${label}`);
653
- }
654
- return m;
655
- }
656
- function parseWriteSymbolLabel(label) {
657
- if (label === 'K') {
658
- return symbolCommands.keep;
659
- }
660
- if (label === 'E') {
661
- return symbolCommands.erase;
662
- }
663
- // Literal alphabet symbols are wrapped in single quotes by
664
- // `decodeWriteSymbol` — strip them on the way back.
665
- if (label.length >= 2 && label.startsWith("'") && label.endsWith("'")) {
666
- return label.slice(1, -1);
1137
+ queue.push(target);
1138
+ }
667
1139
  }
668
- return label;
669
- }
670
- function decodeWriteSymbol(symbol) {
671
- if (typeof symbol === 'symbol') {
672
- const description = symbol.description ?? '?';
673
- return symbolCommandDescriptionToLabel[description] ?? description;
1140
+ // Build the result by iterating graph.nodes — the authoritative id set
1141
+ // minus halt markers — and dispatching on node kind. The halt singleton
1142
+ // entry's `state` reads from `stateById` (the BFS visited haltState if
1143
+ // any path reached it) but falls back to the module-level singleton
1144
+ // for graphs whose only halt presence is the always-emitted sentinel.
1145
+ const result = new Map();
1146
+ for (const idStr of Object.keys(graph.nodes)) {
1147
+ const id = Number(idStr);
1148
+ const node = graph.nodes[id];
1149
+ if (node.isHaltMarker)
1150
+ continue; // synthetic; collapses to haltState at id 0
1151
+ if (node.isHalt) {
1152
+ // The real halt — always the engine-wide singleton. Prefer the
1153
+ // BFS-visited instance for identity-equality with whatever the
1154
+ // caller has; fall back to the module singleton when the BFS
1155
+ // didn't reach haltState (toGraph emits id 0 unconditionally).
1156
+ result.set(id, {
1157
+ state: stateById.get(0) ?? haltState,
1158
+ transitionSymbols: [],
1159
+ });
1160
+ continue;
1161
+ }
1162
+ if (node.isWrapper) {
1163
+ result.set(id, {
1164
+ state: stateById.get(id),
1165
+ transitionSymbols: [],
1166
+ });
1167
+ continue;
1168
+ }
1169
+ // Regular or bare State — enumerate `#symbolToDataMap.keys()` for
1170
+ // the patternIx alignment. The K-th key is the Symbol that
1171
+ // `${id}-${K}` GraphTransition fires on (positional contract).
1172
+ const state = stateById.get(id);
1173
+ const transitionSymbols = [...state[STATE_INTERNAL]().symbolToDataMap.keys()];
1174
+ result.set(id, { state, transitionSymbols });
674
1175
  }
675
- return `'${symbol}'`;
1176
+ return result;
676
1177
  }
677
- // Format converters (toMermaid / fromMermaid) live in ./graphFormats.
1178
+ // Note on the import cycle with `State.ts`: stateGraph.ts value-imports
1179
+ // `State`, `STATE_INTERNAL`, `haltState`, and `ifOtherSymbol`; State.ts
1180
+ // value-imports `toGraph` and `fromGraph` for its static-method delegates.
1181
+ // ESM resolves cycles via live bindings — both modules see each other's
1182
+ // exports as long as nothing at module-load reads a binding before its
1183
+ // source module finishes evaluating. All references here live inside
1184
+ // function bodies, so the cycle is safe.
678
1185
 
679
1186
  var __classPrivateFieldSet$1 = (undefined && undefined.__classPrivateFieldSet) || function (receiver, state, value, kind, f) {
680
1187
  if (kind === "m") throw new TypeError("Private method is not writable");
@@ -692,6 +1199,28 @@ const ifOtherSymbol = Symbol('other symbol');
692
1199
  // Module-private symbol used by DebugConfig setters to call State's validator
693
1200
  // without exposing the validator on the public surface.
694
1201
  const validateDebugFilter = Symbol('validateDebugFilter');
1202
+ /**
1203
+ * @internal
1204
+ *
1205
+ * Package-private accessor key for sibling modules in
1206
+ * `packages/machine/src` (e.g. `utilities/stateGraph.ts`, and the planned
1207
+ * `utilities/stateCollect.ts` for #195). Re-exported from this module so
1208
+ * sibling files can import it; intentionally NOT re-exported from the
1209
+ * package's public `index.ts`, so downstream consumers don't see it on
1210
+ * the supported surface.
1211
+ *
1212
+ * Calling `state[STATE_INTERNAL]()` returns a getter/setter view onto the
1213
+ * State's private fields. Reads are live (they close over `this`), so the
1214
+ * view stays in sync with subsequent mutations on the State. There's one
1215
+ * mutating setter on the view — `name` — used exclusively by
1216
+ * `fromGraph` to assign graph-sourced composite names (e.g. `A(target)`)
1217
+ * that the public name validator would reject; see the JSDoc on the
1218
+ * accessor itself.
1219
+ *
1220
+ * Designed in #180 with #195 in mind so its surface doesn't need to grow
1221
+ * when `collectStates` lands.
1222
+ */
1223
+ const STATE_INTERNAL = Symbol('State.internal');
695
1224
  class DebugConfig {
696
1225
  constructor(ownerState, initial) {
697
1226
  _DebugConfig_ownerState.set(this, void 0);
@@ -954,6 +1483,47 @@ class State {
954
1483
  innerCache.set(overriddenHaltState, new WeakRef(state));
955
1484
  return state;
956
1485
  }
1486
+ /**
1487
+ * @internal
1488
+ *
1489
+ * Package-private getter/setter view onto this State's private fields,
1490
+ * for sibling modules in `packages/machine/src` (currently `stateGraph.ts`
1491
+ * for `toGraph` / `fromGraph`, and the planned `stateCollect.ts` for
1492
+ * #195's `collectStates`).
1493
+ *
1494
+ * Read access is live — the getters close over `this`, so the view
1495
+ * stays in sync with subsequent mutations on this State. There's a
1496
+ * single mutating setter on the view, `name`, which exists to let
1497
+ * `fromGraph` assign graph-sourced composite names (e.g. `A(target)`)
1498
+ * to freshly-constructed bare States. The constructor's name validator
1499
+ * rejects parens (reserved as wrapper-composition delimiters in
1500
+ * `withOverriddenHaltState`); the setter intentionally bypasses that
1501
+ * check because the same delimiters appear in legitimate wrapper-bare
1502
+ * names round-tripped through the graph.
1503
+ *
1504
+ * Returns a fresh view object on every call — cheap enough for the
1505
+ * BFS-once-per-build callers, and avoids holding a reference object on
1506
+ * every State instance. Keep this surface tight: callers should only
1507
+ * read what they need. Adding fields here is a deliberate decision —
1508
+ * each adds to the implicit contract sibling modules can rely on.
1509
+ */
1510
+ [STATE_INTERNAL]() {
1511
+ // Aliasing `this` so the nested object-literal getters/setters below
1512
+ // can read/write the enclosing State's private fields — getters in an
1513
+ // object literal can't be arrow functions, so the standard arrow-
1514
+ // captures-`this` trick doesn't apply here.
1515
+ // eslint-disable-next-line @typescript-eslint/no-this-alias
1516
+ const self = this;
1517
+ return {
1518
+ get id() { return __classPrivateFieldGet$1(self, _State_id, "f"); },
1519
+ get name() { return __classPrivateFieldGet$1(self, _State_name, "f"); },
1520
+ set name(v) { __classPrivateFieldSet$1(self, _State_name, v, "f"); },
1521
+ get bareState() { return __classPrivateFieldGet$1(self, _State_bareState, "f"); },
1522
+ get overriddenHaltState() { return __classPrivateFieldGet$1(self, _State_overriddenHaltState, "f"); },
1523
+ get symbolToDataMap() { return __classPrivateFieldGet$1(self, _State_symbolToDataMap, "f"); },
1524
+ get tags() { return __classPrivateFieldGet$1(self, _State_tags, "f"); },
1525
+ };
1526
+ }
957
1527
  // Single-state introspection — no traversal, no tapeBlock required.
958
1528
  // Returns id, name, halt-status, override-halt target, and the list of
959
1529
  // transitions out of this state with decoded write/movement labels.
@@ -988,382 +1558,36 @@ class State {
988
1558
  transitions,
989
1559
  };
990
1560
  }
991
- // Walks the State graph and emits a `Graph` data structure. v7 callable-
992
- // subtree emit shape (#174):
993
- //
994
- // Each `withOverriddenHaltState` wrapper produces TWO graph nodes:
995
- // - A wrapper node (`isWrapper: true`, `[[composite-name]]` shape) — the
996
- // call site. No transitions of its own. `bareStateId` points to the
997
- // bare's GraphNode; `overriddenHaltStateId` points to the override
998
- // target's GraphNode.
999
- // - A bare node (`isWrapper: false`, regular shape) — the callable body.
1000
- // Has the bare's transitions. Shared across all wrappers that wrap
1001
- // this bare (no per-context duplication).
1002
- //
1003
- // Frames are computed via union-find on bare reachability: two bares whose
1004
- // forward-reachable sets overlap merge into one frame. Each frame contains
1005
- // its bares + body states + a single halt marker (id = `-frameId`). The
1006
- // canonical `frameId` is the smallest bare-id in the component.
1007
- //
1008
- // Halt-bound transitions of any in-frame state are retargeted to the
1009
- // frame's halt marker. The frame's `subtree -. return .-> wrapper` and
1010
- // `subtree -. halt .-> s0` arrows are demand-emitted by `toMermaid` from
1011
- // the frame structure; they're not stored as graph edges.
1561
+ /**
1562
+ * Walks the reachable State graph from `initialState` and returns a
1563
+ * serializable `Graph`. Thin delegate to `utilities/stateGraph.ts`'s
1564
+ * `toGraph` (extracted in #180); see that module for the BFS shape and
1565
+ * v7 callable-subtree emit semantics.
1566
+ */
1012
1567
  static toGraph(initialState, tapeBlock) {
1013
- const nodes = {};
1014
- const alphabets = tapeBlock.alphabets.map((alphabet) => alphabet.symbols);
1015
- // Pass 1: BFS-discover all reachable States; emit one GraphNode per State
1016
- // (wrapper or bare/regular). Wrappers and bares are separate nodes.
1017
- const visited = new Set();
1018
- const queue = [initialState];
1019
- const bareIds = new Set(); // ids referenced as a wrapper's bareStateId
1020
- while (queue.length > 0) {
1021
- const state = queue.shift();
1022
- if (visited.has(__classPrivateFieldGet$1(state, _State_id, "f"))) {
1023
- continue;
1024
- }
1025
- visited.add(__classPrivateFieldGet$1(state, _State_id, "f"));
1026
- if (state.isHalt) {
1027
- if (!(0 in nodes)) {
1028
- nodes[0] = {
1029
- id: 0,
1030
- name: __classPrivateFieldGet$1(state, _State_name, "f"),
1031
- isHalt: true,
1032
- isHaltMarker: false,
1033
- isWrapper: false,
1034
- bareStateId: null,
1035
- frameId: null,
1036
- transitions: [],
1037
- overriddenHaltStateId: null,
1038
- tags: [...__classPrivateFieldGet$1(state, _State_tags, "f")],
1039
- };
1040
- }
1041
- continue;
1042
- }
1043
- // Wrapper? Emit wrapper node + queue bare and override target.
1044
- if (__classPrivateFieldGet$1(state, _State_overriddenHaltState, "f") !== null && __classPrivateFieldGet$1(state, _State_bareState, "f") !== null) {
1045
- const bareState = __classPrivateFieldGet$1(state, _State_bareState, "f");
1046
- const overrideTarget = __classPrivateFieldGet$1(state, _State_overriddenHaltState, "f");
1047
- nodes[__classPrivateFieldGet$1(state, _State_id, "f")] = {
1048
- id: __classPrivateFieldGet$1(state, _State_id, "f"),
1049
- name: __classPrivateFieldGet$1(state, _State_name, "f"), // composite name like "A(target)"
1050
- isHalt: false,
1051
- isHaltMarker: false,
1052
- isWrapper: true,
1053
- bareStateId: __classPrivateFieldGet$1(bareState, _State_id, "f"),
1054
- frameId: null,
1055
- transitions: [],
1056
- overriddenHaltStateId: __classPrivateFieldGet$1(overrideTarget, _State_id, "f"),
1057
- tags: [...__classPrivateFieldGet$1(state, _State_tags, "f")],
1058
- };
1059
- bareIds.add(__classPrivateFieldGet$1(bareState, _State_id, "f"));
1060
- queue.push(bareState);
1061
- queue.push(overrideTarget);
1062
- continue;
1063
- }
1064
- // Regular (or bare) state — build node with transitions.
1065
- const node = {
1066
- id: __classPrivateFieldGet$1(state, _State_id, "f"),
1067
- name: __classPrivateFieldGet$1(state, _State_name, "f"),
1068
- isHalt: false,
1069
- isHaltMarker: false,
1070
- isWrapper: false,
1071
- bareStateId: null,
1072
- frameId: null,
1073
- transitions: [],
1074
- overriddenHaltStateId: null,
1075
- tags: [...__classPrivateFieldGet$1(state, _State_tags, "f")],
1076
- };
1077
- nodes[__classPrivateFieldGet$1(state, _State_id, "f")] = node;
1078
- let patternIx = 0;
1079
- for (const [sym, { command, nextState }] of __classPrivateFieldGet$1(state, _State_symbolToDataMap, "f")) {
1080
- let target;
1081
- try {
1082
- target = nextState instanceof _a ? nextState : nextState.ref;
1083
- }
1084
- catch {
1085
- patternIx += 1;
1086
- continue;
1087
- }
1088
- node.transitions.push({
1089
- pattern: decodePatternDescription(sym.description, alphabets),
1090
- command: command.tapesCommands.map((tc) => ({
1091
- symbol: decodeWriteSymbol(tc.symbol),
1092
- movement: decodeMovement(tc.movement.description),
1093
- })),
1094
- nextStateId: __classPrivateFieldGet$1(target, _State_id, "f"),
1095
- id: `${__classPrivateFieldGet$1(state, _State_id, "f")}-${patternIx}`,
1096
- });
1097
- queue.push(target);
1098
- patternIx += 1;
1099
- }
1100
- }
1101
- // Always emit real halt as a sentinel, even if no transition targets it.
1102
- // It anchors the `subtree -. halt .-> s0` frame-level arrow whenever a
1103
- // frame demand-emits one, and it's the canonical machine-halt singleton.
1104
- if (!(0 in nodes)) {
1105
- nodes[0] = {
1106
- id: 0,
1107
- name: 'halt',
1108
- isHalt: true,
1109
- isHaltMarker: false,
1110
- isWrapper: false,
1111
- bareStateId: null,
1112
- frameId: null,
1113
- transitions: [],
1114
- overriddenHaltStateId: null,
1115
- tags: [...__classPrivateFieldGet$1(haltState, _State_tags, "f")],
1116
- };
1117
- }
1118
- // Pass 2: For each bare, compute its forward-reachable set (following
1119
- // transitions; stopping at halt and at wrappers — both are frame
1120
- // boundaries).
1121
- const computeReach = (startId) => {
1122
- const reach = new Set();
1123
- const stack = [startId];
1124
- while (stack.length > 0) {
1125
- const id = stack.pop();
1126
- if (reach.has(id)) {
1127
- continue;
1128
- }
1129
- const node = nodes[id];
1130
- // `nodes[id]` is always populated for `id` that the BFS reached, so
1131
- // a defensive `!node` check would be dead. `isHalt` / `isWrapper`
1132
- // are real boundaries — both stop reach-set expansion.
1133
- if (node.isHalt || node.isWrapper) {
1134
- continue;
1135
- }
1136
- reach.add(id);
1137
- for (const t of node.transitions) {
1138
- const target = nodes[t.nextStateId];
1139
- if (!target || target.isHalt || target.isWrapper) {
1140
- continue;
1141
- }
1142
- stack.push(t.nextStateId);
1143
- }
1144
- }
1145
- return reach;
1146
- };
1147
- const reachByBare = new Map();
1148
- for (const bareId of bareIds) {
1149
- reachByBare.set(bareId, computeReach(bareId));
1150
- }
1151
- // Pass 3: Union-find on bare overlaps. Two bares merge if their reach
1152
- // sets share any state. Canonical representative = smallest bare-id in
1153
- // the component.
1154
- const ufParent = new Map();
1155
- // Note: no path compression. The union policy below ("smaller id always
1156
- // becomes root") keeps the tree flat — every union targets bares[0] as
1157
- // the root, so any node's parent IS the root. Walking up never exceeds
1158
- // one step. Path compression would be dead code under this invariant.
1159
- const ufFind = (id) => {
1160
- if (!ufParent.has(id)) {
1161
- ufParent.set(id, id);
1162
- }
1163
- let root = id;
1164
- while (ufParent.get(root) !== root) {
1165
- root = ufParent.get(root);
1166
- }
1167
- return root;
1168
- };
1169
- const ufUnion = (a, b) => {
1170
- const ra = ufFind(a);
1171
- const rb = ufFind(b);
1172
- if (ra === rb)
1173
- return;
1174
- if (ra < rb) {
1175
- ufParent.set(rb, ra);
1176
- }
1177
- else {
1178
- ufParent.set(ra, rb);
1179
- }
1180
- };
1181
- for (const bareId of bareIds) {
1182
- ufFind(bareId);
1183
- }
1184
- // For each state, collect the bares that reach it; union all bares that
1185
- // share a state.
1186
- const stateToReachingBares = new Map();
1187
- for (const [bareId, reachSet] of reachByBare) {
1188
- for (const stateId of reachSet) {
1189
- let bares = stateToReachingBares.get(stateId);
1190
- if (!bares) {
1191
- bares = [];
1192
- stateToReachingBares.set(stateId, bares);
1193
- }
1194
- bares.push(bareId);
1195
- }
1196
- }
1197
- for (const bares of stateToReachingBares.values()) {
1198
- for (let i = 1; i < bares.length; i += 1) {
1199
- ufUnion(bares[0], bares[i]);
1200
- }
1201
- }
1202
- // Assign frameId to each in-reach state.
1203
- const frameIds = new Set();
1204
- for (const [stateId, bares] of stateToReachingBares) {
1205
- const frameId = ufFind(bares[0]);
1206
- nodes[stateId].frameId = frameId;
1207
- frameIds.add(frameId);
1208
- }
1209
- // Pass 4: Retarget halt-bound transitions for in-frame states to the
1210
- // frame's halt marker. Out-of-frame states (top-level dispatcher, override
1211
- // targets, etc.) keep their halt-bound transitions pointing at real halt.
1212
- for (const node of Object.values(nodes)) {
1213
- if (node.frameId === null) {
1214
- continue;
1215
- }
1216
- const haltMarkerId = -node.frameId;
1217
- for (const t of node.transitions) {
1218
- const target = nodes[t.nextStateId];
1219
- if (target && target.isHalt && !target.isHaltMarker) {
1220
- t.nextStateId = haltMarkerId;
1221
- }
1222
- }
1223
- }
1224
- // Pass 5: Emit one halt marker per frame.
1225
- for (const frameId of frameIds) {
1226
- const haltMarkerId = -frameId;
1227
- nodes[haltMarkerId] = {
1228
- id: haltMarkerId,
1229
- name: 'halt',
1230
- isHalt: true,
1231
- isHaltMarker: true,
1232
- isWrapper: false,
1233
- bareStateId: null,
1234
- frameId,
1235
- transitions: [],
1236
- overriddenHaltStateId: null,
1237
- tags: [],
1238
- };
1239
- }
1240
- return { initialId: __classPrivateFieldGet$1(initialState, _State_id, "f"), alphabets, nodes };
1568
+ return toGraph(initialState, tapeBlock);
1241
1569
  }
1242
- // Inverse of toGraph: rebuilds a State graph (and a fresh TapeBlock with the
1243
- // graph's alphabets) from a serialized Graph. Round-trips with toGraph in
1244
- // the sense that running the rebuilt machine on the same input gives the
1245
- // same output, but the rebuilt State instances have *new* internal IDs.
1246
- //
1247
- // Under the v7 callable-subtree model (#174), graph nodes split into:
1248
- // - Wrapper nodes (`isWrapper: true`, no transitions) — reconstructed via
1249
- // `bareStates[bareStateId].withOverriddenHaltState(finalStates[overriddenHaltStateId])`.
1250
- // - Bare/regular nodes — constructed as normal States with transitions.
1251
- // - Halt + halt-marker nodes — collapse to the singleton `haltState`.
1570
+ /**
1571
+ * Inverse of `toGraph`: rebuilds a State graph and a fresh TapeBlock
1572
+ * from a serialized `Graph`. Thin delegate to `utilities/stateGraph.ts`'s
1573
+ * `fromGraph` (extracted in #180); see that module for the
1574
+ * reconstruction pass shape (Reference pre-create, bare build, wrapper
1575
+ * resolution via `withOverriddenHaltState`, ref binding).
1576
+ */
1252
1577
  static fromGraph(graph) {
1253
- const alphabetObjs = graph.alphabets.map((syms) => new Alphabet(syms));
1254
- const tapeBlock = TapeBlock.fromAlphabets(alphabetObjs);
1255
- const ids = Object.keys(graph.nodes).map(Number);
1256
- // Pass 1: pre-create a Reference for each non-halt non-halt-marker node
1257
- // (both wrappers and regulars). Halt and halt-marker nodes collapse to the
1258
- // singleton `haltState` and need no ref.
1259
- const refs = {};
1260
- for (const nodeId of ids) {
1261
- const node = graph.nodes[nodeId];
1262
- if (!node.isHalt) {
1263
- refs[nodeId] = new Reference();
1264
- }
1265
- }
1266
- // Convert a parsed pattern back to the symbol key the State expects.
1267
- const patternToKey = (parsed) => {
1268
- if (parsed === null) {
1269
- return ifOtherSymbol;
1270
- }
1271
- const flat = [];
1272
- for (const row of parsed) {
1273
- for (const cell of row) {
1274
- flat.push(cell === null ? ifOtherSymbol : cell);
1275
- }
1276
- }
1277
- return tapeBlock.symbol(flat);
1278
- };
1279
- // Pass 2: build a State for each non-wrapper non-halt non-halt-marker
1280
- // node. Transitions point at refs so cycles work; haltState (and halt
1281
- // markers, which collapse to haltState) are used directly.
1282
- const bareStates = {};
1283
- for (const nodeId of ids) {
1284
- const node = graph.nodes[nodeId];
1285
- if (node.isHalt || node.isWrapper) {
1286
- continue;
1287
- }
1288
- const stateDefinition = {};
1289
- for (const t of node.transitions) {
1290
- const key = patternToKey(parsePatternString(t.pattern, graph.alphabets));
1291
- const target = graph.nodes[t.nextStateId];
1292
- const nextState = !target || target.isHalt
1293
- ? haltState
1294
- : refs[t.nextStateId];
1295
- stateDefinition[key] = {
1296
- command: t.command.map((c) => ({
1297
- symbol: parseWriteSymbolLabel(c.symbol),
1298
- movement: parseMovementLabel(c.movement),
1299
- })),
1300
- nextState,
1301
- };
1302
- }
1303
- // Graph-sourced names may contain `(` and `)` (composite wrapper names —
1304
- // although wrappers go through a separate path below, defensive
1305
- // construction here keeps the bypass uniform). Construct without a name
1306
- // and assign `#name` directly to skip user-facing name validation.
1307
- const bare = new _a(stateDefinition);
1308
- __classPrivateFieldSet$1(bare, _State_name, node.name, "f");
1309
- if (node.tags.length > 0) {
1310
- bare.tag(...node.tags);
1311
- }
1312
- bareStates[nodeId] = bare;
1313
- }
1314
- // Pass 3: resolve every node to its final State (memoized + cycle-safe).
1315
- // Wrappers compose lazily via `withOverriddenHaltState` once their bare
1316
- // and override are resolved.
1317
- const finalStates = {};
1318
- const inProgress = new Set();
1319
- const getFinal = (nodeId) => {
1320
- if (finalStates[nodeId]) {
1321
- return finalStates[nodeId];
1322
- }
1323
- const node = graph.nodes[nodeId];
1324
- if (!node || node.isHalt) {
1325
- finalStates[nodeId] = haltState;
1326
- return haltState;
1327
- }
1328
- if (inProgress.has(nodeId)) {
1329
- throw new Error(`override-halt cycle at state #${nodeId}`);
1330
- }
1331
- inProgress.add(nodeId);
1332
- let state;
1333
- if (node.isWrapper) {
1334
- const bare = getFinal(node.bareStateId);
1335
- const override = getFinal(node.overriddenHaltStateId);
1336
- state = bare.withOverriddenHaltState(override);
1337
- // Apply wrapper-scoped tags (#186). Tags don't leak across wrappers
1338
- // sharing a bare — the wrapper instance owns its own tag set, and
1339
- // engine #175 memoization returns the same instance for the same
1340
- // (bare, override) pair, so this is idempotent across rebuilds.
1341
- if (node.tags.length > 0) {
1342
- state.tag(...node.tags);
1343
- }
1344
- }
1345
- else {
1346
- state = bareStates[nodeId];
1347
- }
1348
- inProgress.delete(nodeId);
1349
- finalStates[nodeId] = state;
1350
- return state;
1351
- };
1352
- for (const nodeId of ids) {
1353
- getFinal(nodeId);
1354
- }
1355
- // Pass 4: bind each ref to the resolved final State so cross-node
1356
- // transitions land on the right instance.
1357
- for (const nodeId of ids) {
1358
- if (!graph.nodes[nodeId].isHalt) {
1359
- refs[nodeId].bind(finalStates[nodeId]);
1360
- }
1361
- }
1362
- return {
1363
- start: finalStates[graph.initialId],
1364
- tapeBlock,
1365
- states: finalStates,
1366
- };
1578
+ return fromGraph(graph);
1579
+ }
1580
+ /**
1581
+ * Returns a `Map<number, {state, transitionSymbols}>` keyed by engine
1582
+ * `GraphNode.id`, exposing the live `State` instance + per-pattern
1583
+ * Symbol references for each node so downstream tooling can mutate
1584
+ * `state.debug` by numeric id and set per-pattern breakpoints by
1585
+ * `GraphTransition.id` (#195). Thin delegate to
1586
+ * `utilities/stateGraph.ts`'s `collectStates`; see that module for
1587
+ * the alignment contract, coverage rules, and halt-singleton warning.
1588
+ */
1589
+ static collectStates(initialState, tapeBlock) {
1590
+ return collectStates(initialState, tapeBlock);
1367
1591
  }
1368
1592
  }
1369
1593
  _a = State;
@@ -1386,7 +1610,7 @@ var __classPrivateFieldGet = (undefined && undefined.__classPrivateFieldGet) ||
1386
1610
  if (typeof state === "function" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError("Cannot read private member from an object whose class did not declare it");
1387
1611
  return kind === "m" ? f : kind === "a" ? f.call(receiver) : f ? f.value : state.get(receiver);
1388
1612
  };
1389
- var _TuringMachine_tapeBlock, _TuringMachine_stack;
1613
+ var _TuringMachine_tapeBlock;
1390
1614
  // True iff `filter` matches `symbol` per the DebugConfig semantics.
1391
1615
  // undefined / [] -> never; true -> always; symbol[] -> exact membership.
1392
1616
  function matchFilter(filter, symbol) {
@@ -1399,7 +1623,6 @@ function matchFilter(filter, symbol) {
1399
1623
  class TuringMachine {
1400
1624
  constructor({ tapeBlock, } = {}) {
1401
1625
  _TuringMachine_tapeBlock.set(this, void 0);
1402
- _TuringMachine_stack.set(this, []);
1403
1626
  if (!tapeBlock) {
1404
1627
  throw new Error('invalid tapeBlock');
1405
1628
  }
@@ -1433,7 +1656,14 @@ class TuringMachine {
1433
1656
  try {
1434
1657
  __classPrivateFieldGet(this, _TuringMachine_tapeBlock, "f")[lockSymbol].check(executionSymbol);
1435
1658
  __classPrivateFieldGet(this, _TuringMachine_tapeBlock, "f")[lockSymbol].lock(executionSymbol);
1436
- const stack = __classPrivateFieldGet(this, _TuringMachine_stack, "f");
1659
+ // Halt-stack is run-scoped, not machine-scoped (#196). Declaring it
1660
+ // local makes that lifetime explicit and prevents leftover entries
1661
+ // from a previous `runStepByStep` call (e.g. a build-time peek that
1662
+ // never drained the generator) from being popped during a subsequent
1663
+ // halt-bound transition. Before this change `#stack` was an instance
1664
+ // field and accumulated one extra push per call when the same machine
1665
+ // was reused.
1666
+ const stack = [];
1437
1667
  let state = initialState;
1438
1668
  if (state.overriddenHaltState) {
1439
1669
  stack.push(state.overriddenHaltState);
@@ -1508,7 +1738,7 @@ class TuringMachine {
1508
1738
  }
1509
1739
  }
1510
1740
  }
1511
- _TuringMachine_tapeBlock = new WeakMap(), _TuringMachine_stack = new WeakMap();
1741
+ _TuringMachine_tapeBlock = new WeakMap();
1512
1742
 
1513
1743
  // Format converters between a Graph (the data model produced by State.toGraph
1514
1744
  // and consumed by State.fromGraph) and external string representations.
@@ -1551,6 +1781,79 @@ function parseMermaidId(s) {
1551
1781
  function frameSubgraphId(frameId) {
1552
1782
  return `w_${frameId}`;
1553
1783
  }
1784
+ // User-controlled content (state names, tag names, alphabet symbols inside
1785
+ // edge labels) is interpolated into Mermaid label strings (`"..."` wrappers
1786
+ // on nodes, wrappers, subgraphs, and edges). Mermaid's grammar terminates
1787
+ // the string on a literal `"`, and labels render via HTML/foreignObject so
1788
+ // `<`, `>`, `&` get interpreted as markup. Statement terminators (`\n`,
1789
+ // `\r`), C0 controls (except `\t`), DEL, bidi controls, and lone UTF-16
1790
+ // surrogates are encoded as numeric entities so they can't confuse the
1791
+ // tokenizer or flip text direction silently (#194).
1792
+ //
1793
+ // Printable Unicode (Cyrillic, CJK, emoji, accented Latin, etc.) passes
1794
+ // through unchanged — a tape alphabet of Cyrillic or Brainfuck glyphs
1795
+ // stays readable in the emitted `.mmd`.
1796
+ //
1797
+ // Escape is applied at the leaf — to each user-supplied fragment BEFORE
1798
+ // it's composed into a label. Structural pieces this module emits (`<br>`
1799
+ // tag separator, ` ∪ ` bare-name join, `[`, `]`, `,`, `|`, `/`, ` → `,
1800
+ // the `callable subtree of `/`callable scope: ` prefixes) are NOT escaped;
1801
+ // only user-controlled content is. fromMermaid mirrors with
1802
+ // `unescapeMermaidLabel` on each extracted leaf AFTER structural parsing,
1803
+ // so a literal `<br>` inside a state name (encoded as `&lt;br&gt;`)
1804
+ // survives the tag-split and decodes back at the leaf.
1805
+ const MERMAID_LABEL_ESCAPE_RE = /[&"<>\n\r\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F\u202A-\u202E\u2066-\u2069\uD800-\uDFFF]/g;
1806
+ function escapeMermaidLabel(s) {
1807
+ return s.replace(MERMAID_LABEL_ESCAPE_RE, (ch) => {
1808
+ switch (ch) {
1809
+ case '&': return '&amp;';
1810
+ case '"': return '&quot;';
1811
+ case '<': return '&lt;';
1812
+ case '>': return '&gt;';
1813
+ case '\n': return '&#10;';
1814
+ case '\r': return '&#13;';
1815
+ default: return `&#${ch.charCodeAt(0)};`;
1816
+ }
1817
+ });
1818
+ }
1819
+ // Inverse of escapeMermaidLabel. Decodes the four named entities the
1820
+ // encoder emits (`&amp;`, `&quot;`, `&lt;`, `&gt;`) plus arbitrary
1821
+ // numeric entities (`&#NN;`, `&#xHH;`) — the latter to round-trip the
1822
+ // control / bidi / lone-surrogate cases from encode. Other named entities
1823
+ // pass through unchanged: fromMermaid is strict to the dialect toMermaid
1824
+ // emits, and a future-proof full HTML-entity decoder would muddle that.
1825
+ //
1826
+ // Replacement is single-pass: each `&...;` match is consumed once with
1827
+ // no re-scanning of the substitution, so nested-looking inputs like
1828
+ // `&amp;quot;` (literal `&quot;` as user text) decode to `&quot;` not `"`.
1829
+ const MERMAID_LABEL_UNESCAPE_RE = /&(?:(amp|quot|lt|gt)|#(\d+)|#x([0-9a-fA-F]+));/g;
1830
+ function unescapeMermaidLabel(s) {
1831
+ return s.replace(MERMAID_LABEL_UNESCAPE_RE, (match, named, dec, hex) => {
1832
+ switch (named) {
1833
+ case 'amp': return '&';
1834
+ case 'quot': return '"';
1835
+ case 'lt': return '<';
1836
+ case 'gt': return '>';
1837
+ default: {
1838
+ // Code units up to U+FFFF decode via fromCharCode so lone
1839
+ // surrogates we encoded by UTF-16 code unit round-trip exactly.
1840
+ // Hand-edited supplementary code points (`&#x1F600;`) use
1841
+ // fromCodePoint to produce the right surrogate pair — but only
1842
+ // when we didn't emit them ourselves, since encode runs per code
1843
+ // unit.
1844
+ if (dec !== undefined) {
1845
+ const n = Number.parseInt(dec, 10);
1846
+ return n <= 0xFFFF ? String.fromCharCode(n) : String.fromCodePoint(n);
1847
+ }
1848
+ if (hex !== undefined) {
1849
+ const n = Number.parseInt(hex, 16);
1850
+ return n <= 0xFFFF ? String.fromCharCode(n) : String.fromCodePoint(n);
1851
+ }
1852
+ return match;
1853
+ }
1854
+ }
1855
+ });
1856
+ }
1554
1857
  function toMermaid(graph) {
1555
1858
  const lines = [
1556
1859
  'flowchart TD',
@@ -1589,9 +1892,18 @@ function toMermaid(graph) {
1589
1892
  // Mermaid line-break that works across renderers without `classDef`-
1590
1893
  // pseudo-element hacks (#186).
1591
1894
  const labelOf = (node) => {
1895
+ const name = escapeMermaidLabel(node.name);
1592
1896
  if (node.tags.length === 0)
1593
- return node.name;
1594
- return `${node.name}<br>${node.tags.join(', ')}`;
1897
+ return name;
1898
+ // Per-tag escape that ALSO encodes `,` — tags are joined with `, ` and
1899
+ // split on `,` in `splitLabelTags`, so a literal comma in user tag
1900
+ // content would be mistaken for a separator on the way back. `,` isn't
1901
+ // in the base escape set because it's structural in edge labels
1902
+ // (between per-tape cells in `writes`/`moves`), where the encode pass
1903
+ // happens after composition — different context, different escape.
1904
+ const tagFragments = node.tags
1905
+ .map((t) => escapeMermaidLabel(t).replace(/,/g, '&#44;'));
1906
+ return `${name}<br>${tagFragments.join(', ')}`;
1595
1907
  };
1596
1908
  // 1. Emit top-level nodes (real halt, non-wrapper regulars outside any frame).
1597
1909
  for (const node of topLevelNodes) {
@@ -1616,7 +1928,7 @@ function toMermaid(graph) {
1616
1928
  const frameBareNames = frameBares
1617
1929
  .slice()
1618
1930
  .sort((a, b) => a.id - b.id)
1619
- .map((n) => n.name);
1931
+ .map((n) => escapeMermaidLabel(n.name));
1620
1932
  const label = frameBareNames.length > 1
1621
1933
  ? `callable scope: ${frameBareNames.join(' ∪ ')}`
1622
1934
  : `callable subtree of ${frameBareNames[0] ?? frameId}`;
@@ -1718,7 +2030,12 @@ function toMermaid(graph) {
1718
2030
  const reads = alternatives.map((alt) => `[${alt}]`).join('|');
1719
2031
  const writes = `[${t.command.map((c) => c.symbol).join(',')}]`;
1720
2032
  const moves = `[${t.command.map((c) => c.movement).join(',')}]`;
1721
- const label = `${reads} ${writes}/${moves}`;
2033
+ // Escape the WHOLE composed label structural separators ([, ], ,,
2034
+ // |, /, ' → ') are all in our safe ASCII set and pass through
2035
+ // unchanged; only embedded user alphabet symbols inside `'...'` get
2036
+ // entity-encoded. fromMermaid unescapes the captured label as the
2037
+ // first step before structural parsing.
2038
+ const label = escapeMermaidLabel(`${reads} → ${writes}/${moves}`);
1722
2039
  lines.push(` ${mermaidIdFor(node.id)} -- "${label}" --> ${mermaidIdFor(t.nextStateId)}`);
1723
2040
  }
1724
2041
  }
@@ -1849,14 +2166,23 @@ const classAssignTagRegex = /^class ([sc]\d+(?:,[sc]\d+)*) tag_([A-Za-z0-9_-]+)$
1849
2166
  // Labels without `<br>` have no tags. Tags are comma-joined; trimmed of
1850
2167
  // whitespace. The `<br>` is the single source of truth for tag-name parsing —
1851
2168
  // `class` lines are decorative-only and not consulted here.
2169
+ //
2170
+ // Mermaid-label entities (`&lt;`, `&quot;`, etc., #194) are decoded AFTER
2171
+ // structural splitting: the `<br>` separator and `,` tag delimiter survive
2172
+ // encode unchanged, and a user state name / tag containing a literal `<br>`
2173
+ // or `,` was encoded leaf-side so it can't be confused with the structural
2174
+ // form. Decode at the leaves recovers the original characters.
1852
2175
  function splitLabelTags(label) {
1853
2176
  const brIx = label.indexOf('<br>');
1854
2177
  if (brIx < 0) {
1855
- return { name: label, tags: [] };
2178
+ return { name: unescapeMermaidLabel(label), tags: [] };
1856
2179
  }
1857
- const name = label.slice(0, brIx);
2180
+ const name = unescapeMermaidLabel(label.slice(0, brIx));
1858
2181
  const tagsStr = label.slice(brIx + '<br>'.length);
1859
- const tags = tagsStr.split(',').map((t) => t.trim()).filter((t) => t.length > 0);
2182
+ const tags = tagsStr
2183
+ .split(',')
2184
+ .map((t) => unescapeMermaidLabel(t.trim()))
2185
+ .filter((t) => t.length > 0);
1860
2186
  return { name, tags };
1861
2187
  }
1862
2188
  function fromMermaid(text) {
@@ -2016,7 +2342,12 @@ function fromMermaid(text) {
2016
2342
  const tm = line.match(labeledTransitionRegex);
2017
2343
  if (tm) {
2018
2344
  const fromId = parseMermaidId(tm[1]);
2019
- const label = tm[2];
2345
+ // Decode the WHOLE captured label up front (#194). Structural
2346
+ // separators (`[`, `]`, `,`, `|`, `/`, ` → `) are all safe ASCII
2347
+ // outside the escape set and pass through encode unchanged, so it's
2348
+ // safe to decode before structural parsing; only embedded alphabet
2349
+ // symbols inside `'...'` get reconstituted.
2350
+ const label = unescapeMermaidLabel(tm[2]);
2020
2351
  const toId = parseMermaidId(tm[3]);
2021
2352
  const arrowIx = label.indexOf(' → ');
2022
2353
  if (arrowIx === -1) {