@turing-machine-js/machine 7.0.0-alpha.2 → 7.0.0-alpha.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -173,6 +173,153 @@ class Reference {
173
173
  }
174
174
  _Reference_referenceBinding = new WeakMap();
175
175
 
176
+ const movementDescriptionToLabel = {
177
+ 'move caret left command': 'L',
178
+ 'move caret right command': 'R',
179
+ 'do not move carer': 'S',
180
+ };
181
+ const symbolCommandDescriptionToLabel = {
182
+ 'keep symbol command': 'K',
183
+ 'erase symbol command': 'E',
184
+ };
185
+ // Reserved characters in the encoded pattern string:
186
+ // '*' ASCII asterisk (U+002A) — per-cell ifOtherSymbol, matches any symbol
187
+ // on that tape. ASCII (not a fancier glyph like U+1F7B0) so it renders
188
+ // in every Mermaid environment and every monospace font. A literal `*`
189
+ // in the alphabet is unambiguous from the marker because it's quoted
190
+ // (`'*'`).
191
+ // 'B' the tape's blank symbol shorthand (in read patterns). A literal `B`
192
+ // in the alphabet is unambiguous from the marker because it's quoted
193
+ // (`'B'`).
194
+ // ',' separates per-tape cells inside one pattern
195
+ // '|' separates alternative patterns
196
+ // "'" surrounds a literal alphabet symbol — e.g. `'0'` for literal `0`,
197
+ // `'X'` for literal `X`. The quoting is what visually separates literal
198
+ // symbols from the convention markers `*` / `B` and from the write
199
+ // commands `K` / `E`.
200
+ // '\\' escape prefix — to represent any of '*', 'B', ',', '|', "'", or '\\'
201
+ // as a *literal* alphabet symbol *inside* the quotes (e.g. `'\''` for
202
+ // a literal apostrophe).
203
+ const IF_OTHER_MARKER = '*';
204
+ const BLANK_MARKER = 'B';
205
+ function escapeAlphabetSymbol(s) {
206
+ return s
207
+ .replace(/\\/g, '\\\\')
208
+ .replace(/'/g, "\\'");
209
+ }
210
+ function decodePatternDescription(description, alphabets) {
211
+ if (!description) {
212
+ return '?';
213
+ }
214
+ if (description === 'other symbol') {
215
+ return IF_OTHER_MARKER;
216
+ }
217
+ try {
218
+ const patternList = JSON.parse(description);
219
+ return patternList
220
+ .map((pattern) => pattern
221
+ .map((s, tapeIx) => {
222
+ if (s === null) {
223
+ return IF_OTHER_MARKER;
224
+ }
225
+ if (s === alphabets[tapeIx]?.[0]) {
226
+ return BLANK_MARKER;
227
+ }
228
+ return `'${escapeAlphabetSymbol(s)}'`;
229
+ })
230
+ .join(','))
231
+ .join('|');
232
+ }
233
+ catch {
234
+ return description;
235
+ }
236
+ }
237
+ function decodeMovement(description) {
238
+ if (!description) {
239
+ return '?';
240
+ }
241
+ return movementDescriptionToLabel[description] ?? description;
242
+ }
243
+ function splitUnescaped(s, sep) {
244
+ const parts = [];
245
+ let current = '';
246
+ let i = 0;
247
+ while (i < s.length) {
248
+ if (s[i] === '\\' && i + 1 < s.length) {
249
+ current += s[i + 1];
250
+ i += 2;
251
+ }
252
+ else if (s[i] === sep) {
253
+ parts.push(current);
254
+ current = '';
255
+ i += 1;
256
+ }
257
+ else {
258
+ current += s[i];
259
+ i += 1;
260
+ }
261
+ }
262
+ parts.push(current);
263
+ return parts;
264
+ }
265
+ function parsePatternString(s, alphabets) {
266
+ if (s === IF_OTHER_MARKER) {
267
+ return null;
268
+ }
269
+ const alternatives = splitUnescaped(s, '|');
270
+ return alternatives.map((alt) => {
271
+ const cells = splitUnescaped(alt, ',');
272
+ return cells.map((cell, tapeIx) => {
273
+ if (cell === IF_OTHER_MARKER) {
274
+ return null;
275
+ }
276
+ if (cell === BLANK_MARKER) {
277
+ return alphabets[tapeIx]?.[0] ?? cell;
278
+ }
279
+ // Literal alphabet symbols are wrapped in single quotes by
280
+ // `decodePatternDescription` — strip them on the way back.
281
+ if (cell.length >= 2 && cell.startsWith("'") && cell.endsWith("'")) {
282
+ return cell.slice(1, -1);
283
+ }
284
+ return cell;
285
+ });
286
+ });
287
+ }
288
+ const movementLabelToSymbol = {
289
+ L: movements.left,
290
+ R: movements.right,
291
+ S: movements.stay,
292
+ };
293
+ function parseMovementLabel(label) {
294
+ const m = movementLabelToSymbol[label];
295
+ if (!m) {
296
+ throw new Error(`unknown movement label: ${label}`);
297
+ }
298
+ return m;
299
+ }
300
+ function parseWriteSymbolLabel(label) {
301
+ if (label === 'K') {
302
+ return symbolCommands.keep;
303
+ }
304
+ if (label === 'E') {
305
+ return symbolCommands.erase;
306
+ }
307
+ // Literal alphabet symbols are wrapped in single quotes by
308
+ // `decodeWriteSymbol` — strip them on the way back.
309
+ if (label.length >= 2 && label.startsWith("'") && label.endsWith("'")) {
310
+ return label.slice(1, -1);
311
+ }
312
+ return label;
313
+ }
314
+ function decodeWriteSymbol(symbol) {
315
+ if (typeof symbol === 'symbol') {
316
+ const description = symbol.description ?? '?';
317
+ return symbolCommandDescriptionToLabel[description] ?? description;
318
+ }
319
+ return `'${symbol}'`;
320
+ }
321
+ // Format converters (toMermaid / fromMermaid) live in ./graphFormats.
322
+
176
323
  var __classPrivateFieldSet$4 = (undefined && undefined.__classPrivateFieldSet) || function (receiver, state, value, kind, f) {
177
324
  if (kind === "m") throw new TypeError("Private method is not writable");
178
325
  if (kind === "a" && !f) throw new TypeError("Private accessor was defined without a setter");
@@ -529,152 +676,512 @@ _TapeBlock_generateSymbolHint = { value: (patternList) => JSON.stringify(pattern
529
676
  .map((pattern) => pattern
530
677
  .map((symbol) => (symbol === ifOtherSymbol ? null : symbol)))) };
531
678
 
532
- const movementDescriptionToLabel = {
533
- 'move caret left command': 'L',
534
- 'move caret right command': 'R',
535
- 'do not move carer': 'S',
536
- };
537
- const symbolCommandDescriptionToLabel = {
538
- 'keep symbol command': 'K',
539
- 'erase symbol command': 'E',
540
- };
541
- // Reserved characters in the encoded pattern string:
542
- // '*' ASCII asterisk (U+002A) — per-cell ifOtherSymbol, matches any symbol
543
- // on that tape. ASCII (not a fancier glyph like U+1F7B0) so it renders
544
- // in every Mermaid environment and every monospace font. A literal `*`
545
- // in the alphabet is unambiguous from the marker because it's quoted
546
- // (`'*'`).
547
- // 'B' the tape's blank symbol shorthand (in read patterns). A literal `B`
548
- // in the alphabet is unambiguous from the marker because it's quoted
549
- // (`'B'`).
550
- // ',' separates per-tape cells inside one pattern
551
- // '|' separates alternative patterns
552
- // "'" surrounds a literal alphabet symbol e.g. `'0'` for literal `0`,
553
- // `'X'` for literal `X`. The quoting is what visually separates literal
554
- // symbols from the convention markers `*` / `B` and from the write
555
- // commands `K` / `E`.
556
- // '\\' escape prefix — to represent any of '*', 'B', ',', '|', "'", or '\\'
557
- // as a *literal* alphabet symbol *inside* the quotes (e.g. `'\''` for
558
- // a literal apostrophe).
559
- const IF_OTHER_MARKER = '*';
560
- const BLANK_MARKER = 'B';
561
- function escapeAlphabetSymbol(s) {
562
- return s
563
- .replace(/\\/g, '\\\\')
564
- .replace(/'/g, "\\'");
565
- }
566
- function decodePatternDescription(description, alphabets) {
567
- if (!description) {
568
- return '?';
569
- }
570
- if (description === 'other symbol') {
571
- return IF_OTHER_MARKER;
572
- }
573
- try {
574
- const patternList = JSON.parse(description);
575
- return patternList
576
- .map((pattern) => pattern
577
- .map((s, tapeIx) => {
578
- if (s === null) {
579
- return IF_OTHER_MARKER;
679
+ // Graph serialization/reconstruction for State graphs. Extracted from
680
+ // `classes/State.ts` (#180) so the State class stays focused on the runtime
681
+ // machinery (transitions, debug, halt-stack composition). Sibling-module
682
+ // private access to State's internals goes through the `STATE_INTERNAL`
683
+ // Symbol re-exported from State.ts — see the @internal JSDoc there.
684
+ //
685
+ // Public surface is preserved: `State.toGraph` and `State.fromGraph` static
686
+ // methods continue to exist as thin delegates to the functions in this
687
+ // module. New consumers (e.g. #195's planned `collectStates`) will live
688
+ // here too and share the BFS-walk shape with `toGraph`.
689
+ /**
690
+ * Walks the reachable graph from `initialState` and returns a serializable
691
+ * `Graph`. The walk is a BFS that visits each State exactly once (keyed by
692
+ * the State's internal id) and emits one `GraphNode` per State plus
693
+ * synthetic halt-marker nodes per callable-subtree frame.
694
+ *
695
+ * Round-trips losslessly with `fromGraph` in the sense that running the
696
+ * rebuilt machine on the same input produces the same output — but State
697
+ * instance identities are NOT preserved across the cycle.
698
+ *
699
+ * See `classes/State.ts` for the runtime model these graph nodes describe;
700
+ * see `utilities/graphFormats.ts` for the Mermaid-flavored serialization
701
+ * built on top of `Graph`.
702
+ */
703
+ function toGraph(initialState, tapeBlock) {
704
+ const nodes = {};
705
+ const alphabets = tapeBlock.alphabets.map((alphabet) => alphabet.symbols);
706
+ // Pass 1: BFS-discover all reachable States; emit one GraphNode per State
707
+ // (wrapper or bare/regular). Wrappers and bares are separate nodes.
708
+ const visited = new Set();
709
+ const queue = [initialState];
710
+ const bareIds = new Set(); // ids referenced as a wrapper's bareStateId
711
+ while (queue.length > 0) {
712
+ const state = queue.shift();
713
+ const stateInternal = state[STATE_INTERNAL]();
714
+ if (visited.has(stateInternal.id)) {
715
+ continue;
716
+ }
717
+ visited.add(stateInternal.id);
718
+ if (state.isHalt) {
719
+ if (!(0 in nodes)) {
720
+ nodes[0] = {
721
+ id: 0,
722
+ name: stateInternal.name,
723
+ isHalt: true,
724
+ isHaltMarker: false,
725
+ isWrapper: false,
726
+ bareStateId: null,
727
+ frameId: null,
728
+ transitions: [],
729
+ overriddenHaltStateId: null,
730
+ tags: [...stateInternal.tags],
731
+ };
580
732
  }
581
- if (s === alphabets[tapeIx]?.[0]) {
582
- return BLANK_MARKER;
733
+ continue;
734
+ }
735
+ // Wrapper? Emit wrapper node + queue bare and override target.
736
+ if (stateInternal.overriddenHaltState !== null && stateInternal.bareState !== null) {
737
+ const bareState = stateInternal.bareState;
738
+ const overrideTarget = stateInternal.overriddenHaltState;
739
+ const bareInternal = bareState[STATE_INTERNAL]();
740
+ const overrideInternal = overrideTarget[STATE_INTERNAL]();
741
+ nodes[stateInternal.id] = {
742
+ id: stateInternal.id,
743
+ name: stateInternal.name, // composite name like "A(target)"
744
+ isHalt: false,
745
+ isHaltMarker: false,
746
+ isWrapper: true,
747
+ bareStateId: bareInternal.id,
748
+ frameId: null,
749
+ transitions: [],
750
+ overriddenHaltStateId: overrideInternal.id,
751
+ tags: [...stateInternal.tags],
752
+ };
753
+ bareIds.add(bareInternal.id);
754
+ queue.push(bareState);
755
+ queue.push(overrideTarget);
756
+ continue;
757
+ }
758
+ // Regular (or bare) state — build node with transitions.
759
+ const node = {
760
+ id: stateInternal.id,
761
+ name: stateInternal.name,
762
+ isHalt: false,
763
+ isHaltMarker: false,
764
+ isWrapper: false,
765
+ bareStateId: null,
766
+ frameId: null,
767
+ transitions: [],
768
+ overriddenHaltStateId: null,
769
+ tags: [...stateInternal.tags],
770
+ };
771
+ nodes[stateInternal.id] = node;
772
+ let patternIx = 0;
773
+ for (const [sym, { command, nextState }] of stateInternal.symbolToDataMap) {
774
+ let target;
775
+ try {
776
+ target = nextState instanceof State ? nextState : nextState.ref;
583
777
  }
584
- return `'${escapeAlphabetSymbol(s)}'`;
585
- })
586
- .join(','))
587
- .join('|');
778
+ catch {
779
+ patternIx += 1;
780
+ continue;
781
+ }
782
+ const targetInternal = target[STATE_INTERNAL]();
783
+ node.transitions.push({
784
+ pattern: decodePatternDescription(sym.description, alphabets),
785
+ command: command.tapesCommands.map((tc) => ({
786
+ symbol: decodeWriteSymbol(tc.symbol),
787
+ movement: decodeMovement(tc.movement.description),
788
+ })),
789
+ nextStateId: targetInternal.id,
790
+ id: `${stateInternal.id}-${patternIx}`,
791
+ });
792
+ queue.push(target);
793
+ patternIx += 1;
794
+ }
795
+ }
796
+ // Always emit real halt as a sentinel, even if no transition targets it.
797
+ // It anchors the `subtree -. halt .-> s0` frame-level arrow whenever a
798
+ // frame demand-emits one, and it's the canonical machine-halt singleton.
799
+ if (!(0 in nodes)) {
800
+ nodes[0] = {
801
+ id: 0,
802
+ name: 'halt',
803
+ isHalt: true,
804
+ isHaltMarker: false,
805
+ isWrapper: false,
806
+ bareStateId: null,
807
+ frameId: null,
808
+ transitions: [],
809
+ overriddenHaltStateId: null,
810
+ tags: [...haltState[STATE_INTERNAL]().tags],
811
+ };
588
812
  }
589
- catch {
590
- return description;
813
+ // Pass 2: For each bare, compute its forward-reachable set (following
814
+ // transitions; stopping at halt and at wrappers — both are frame
815
+ // boundaries).
816
+ const computeReach = (startId) => {
817
+ const reach = new Set();
818
+ const stack = [startId];
819
+ while (stack.length > 0) {
820
+ const id = stack.pop();
821
+ if (reach.has(id)) {
822
+ continue;
823
+ }
824
+ const node = nodes[id];
825
+ // `nodes[id]` is always populated for `id` that the BFS reached, so
826
+ // a defensive `!node` check would be dead. `isHalt` / `isWrapper`
827
+ // are real boundaries — both stop reach-set expansion.
828
+ if (node.isHalt || node.isWrapper) {
829
+ continue;
830
+ }
831
+ reach.add(id);
832
+ for (const t of node.transitions) {
833
+ const target = nodes[t.nextStateId];
834
+ if (!target || target.isHalt || target.isWrapper) {
835
+ continue;
836
+ }
837
+ stack.push(t.nextStateId);
838
+ }
839
+ }
840
+ return reach;
841
+ };
842
+ const reachByBare = new Map();
843
+ for (const bareId of bareIds) {
844
+ reachByBare.set(bareId, computeReach(bareId));
845
+ }
846
+ // Pass 3: Union-find on bare overlaps. Two bares merge if their reach
847
+ // sets share any state. Canonical representative = smallest bare-id in
848
+ // the component.
849
+ const ufParent = new Map();
850
+ // Note: no path compression. The union policy below ("smaller id always
851
+ // becomes root") keeps the tree flat — every union targets bares[0] as
852
+ // the root, so any node's parent IS the root. Walking up never exceeds
853
+ // one step. Path compression would be dead code under this invariant.
854
+ const ufFind = (id) => {
855
+ if (!ufParent.has(id)) {
856
+ ufParent.set(id, id);
857
+ }
858
+ let root = id;
859
+ while (ufParent.get(root) !== root) {
860
+ root = ufParent.get(root);
861
+ }
862
+ return root;
863
+ };
864
+ const ufUnion = (a, b) => {
865
+ const ra = ufFind(a);
866
+ const rb = ufFind(b);
867
+ if (ra === rb)
868
+ return;
869
+ if (ra < rb) {
870
+ ufParent.set(rb, ra);
871
+ }
872
+ else {
873
+ ufParent.set(ra, rb);
874
+ }
875
+ };
876
+ for (const bareId of bareIds) {
877
+ ufFind(bareId);
878
+ }
879
+ // For each state, collect the bares that reach it; union all bares that
880
+ // share a state.
881
+ const stateToReachingBares = new Map();
882
+ for (const [bareId, reachSet] of reachByBare) {
883
+ for (const stateId of reachSet) {
884
+ let bares = stateToReachingBares.get(stateId);
885
+ if (!bares) {
886
+ bares = [];
887
+ stateToReachingBares.set(stateId, bares);
888
+ }
889
+ bares.push(bareId);
890
+ }
891
+ }
892
+ for (const bares of stateToReachingBares.values()) {
893
+ for (let i = 1; i < bares.length; i += 1) {
894
+ ufUnion(bares[0], bares[i]);
895
+ }
896
+ }
897
+ // Assign frameId to each in-reach state.
898
+ const frameIds = new Set();
899
+ for (const [stateId, bares] of stateToReachingBares) {
900
+ const frameId = ufFind(bares[0]);
901
+ nodes[stateId].frameId = frameId;
902
+ frameIds.add(frameId);
903
+ }
904
+ // Pass 4: Retarget halt-bound transitions for in-frame states to the
905
+ // frame's halt marker. Out-of-frame states (top-level dispatcher, override
906
+ // targets, etc.) keep their halt-bound transitions pointing at real halt.
907
+ for (const node of Object.values(nodes)) {
908
+ if (node.frameId === null) {
909
+ continue;
910
+ }
911
+ const haltMarkerId = -node.frameId;
912
+ for (const t of node.transitions) {
913
+ const target = nodes[t.nextStateId];
914
+ if (target && target.isHalt && !target.isHaltMarker) {
915
+ t.nextStateId = haltMarkerId;
916
+ }
917
+ }
591
918
  }
592
- }
593
- function decodeMovement(description) {
594
- if (!description) {
595
- return '?';
919
+ // Pass 5: Emit one halt marker per frame.
920
+ for (const frameId of frameIds) {
921
+ const haltMarkerId = -frameId;
922
+ nodes[haltMarkerId] = {
923
+ id: haltMarkerId,
924
+ name: 'halt',
925
+ isHalt: true,
926
+ isHaltMarker: true,
927
+ isWrapper: false,
928
+ bareStateId: null,
929
+ frameId,
930
+ transitions: [],
931
+ overriddenHaltStateId: null,
932
+ tags: [],
933
+ };
596
934
  }
597
- return movementDescriptionToLabel[description] ?? description;
935
+ return { initialId: initialState[STATE_INTERNAL]().id, alphabets, nodes };
598
936
  }
599
- function splitUnescaped(s, sep) {
600
- const parts = [];
601
- let current = '';
602
- let i = 0;
603
- while (i < s.length) {
604
- if (s[i] === '\\' && i + 1 < s.length) {
605
- current += s[i + 1];
606
- i += 2;
937
+ /**
938
+ * Inverse of `toGraph`: rebuilds a State graph (and a fresh TapeBlock with
939
+ * the graph's alphabets) from a serialized Graph. Round-trips with `toGraph`
940
+ * in the sense that running the rebuilt machine on the same input gives the
941
+ * same output, but the rebuilt State instances have *new* internal IDs.
942
+ *
943
+ * Under the v7 callable-subtree model (#174), graph nodes split into:
944
+ * - Wrapper nodes (`isWrapper: true`, no transitions) — reconstructed via
945
+ * `bareStates[bareStateId].withOverriddenHaltState(finalStates[overriddenHaltStateId])`.
946
+ * - Bare/regular nodes — constructed as normal States with transitions.
947
+ * - Halt + halt-marker nodes — collapse to the singleton `haltState`.
948
+ */
949
+ function fromGraph(graph) {
950
+ const alphabetObjs = graph.alphabets.map((syms) => new Alphabet(syms));
951
+ const tapeBlock = TapeBlock.fromAlphabets(alphabetObjs);
952
+ const ids = Object.keys(graph.nodes).map(Number);
953
+ // Pass 1: pre-create a Reference for each non-halt non-halt-marker node
954
+ // (both wrappers and regulars). Halt and halt-marker nodes collapse to the
955
+ // singleton `haltState` and need no ref.
956
+ const refs = {};
957
+ for (const nodeId of ids) {
958
+ const node = graph.nodes[nodeId];
959
+ if (!node.isHalt) {
960
+ refs[nodeId] = new Reference();
961
+ }
962
+ }
963
+ // Convert a parsed pattern back to the symbol key the State expects.
964
+ const patternToKey = (parsed) => {
965
+ if (parsed === null) {
966
+ return ifOtherSymbol;
967
+ }
968
+ const flat = [];
969
+ for (const row of parsed) {
970
+ for (const cell of row) {
971
+ flat.push(cell === null ? ifOtherSymbol : cell);
972
+ }
973
+ }
974
+ return tapeBlock.symbol(flat);
975
+ };
976
+ // Pass 2: build a State for each non-wrapper non-halt non-halt-marker
977
+ // node. Transitions point at refs so cycles work; haltState (and halt
978
+ // markers, which collapse to haltState) are used directly.
979
+ const bareStates = {};
980
+ for (const nodeId of ids) {
981
+ const node = graph.nodes[nodeId];
982
+ if (node.isHalt || node.isWrapper) {
983
+ continue;
607
984
  }
608
- else if (s[i] === sep) {
609
- parts.push(current);
610
- current = '';
611
- i += 1;
985
+ const stateDefinition = {};
986
+ for (const t of node.transitions) {
987
+ const key = patternToKey(parsePatternString(t.pattern, graph.alphabets));
988
+ const target = graph.nodes[t.nextStateId];
989
+ const nextState = !target || target.isHalt
990
+ ? haltState
991
+ : refs[t.nextStateId];
992
+ stateDefinition[key] = {
993
+ command: t.command.map((c) => ({
994
+ symbol: parseWriteSymbolLabel(c.symbol),
995
+ movement: parseMovementLabel(c.movement),
996
+ })),
997
+ nextState,
998
+ };
999
+ }
1000
+ // Graph-sourced names may contain `(` and `)` (composite wrapper names —
1001
+ // although wrappers go through a separate path below, defensive
1002
+ // construction here keeps the bypass uniform). Construct without a name
1003
+ // and assign `name` directly through the internal accessor's setter to
1004
+ // skip the constructor's user-facing name validation.
1005
+ const bare = new State(stateDefinition);
1006
+ bare[STATE_INTERNAL]().name = node.name;
1007
+ if (node.tags.length > 0) {
1008
+ bare.tag(...node.tags);
1009
+ }
1010
+ bareStates[nodeId] = bare;
1011
+ }
1012
+ // Pass 3: resolve every node to its final State (memoized + cycle-safe).
1013
+ // Wrappers compose lazily via `withOverriddenHaltState` once their bare
1014
+ // and override are resolved.
1015
+ const finalStates = {};
1016
+ const inProgress = new Set();
1017
+ const getFinal = (nodeId) => {
1018
+ if (finalStates[nodeId]) {
1019
+ return finalStates[nodeId];
1020
+ }
1021
+ const node = graph.nodes[nodeId];
1022
+ if (!node || node.isHalt) {
1023
+ finalStates[nodeId] = haltState;
1024
+ return haltState;
1025
+ }
1026
+ if (inProgress.has(nodeId)) {
1027
+ throw new Error(`override-halt cycle at state #${nodeId}`);
1028
+ }
1029
+ inProgress.add(nodeId);
1030
+ let state;
1031
+ if (node.isWrapper) {
1032
+ const bare = getFinal(node.bareStateId);
1033
+ const override = getFinal(node.overriddenHaltStateId);
1034
+ state = bare.withOverriddenHaltState(override);
1035
+ // Apply wrapper-scoped tags (#186). Tags don't leak across wrappers
1036
+ // sharing a bare — the wrapper instance owns its own tag set, and
1037
+ // engine #175 memoization returns the same instance for the same
1038
+ // (bare, override) pair, so this is idempotent across rebuilds.
1039
+ if (node.tags.length > 0) {
1040
+ state.tag(...node.tags);
1041
+ }
612
1042
  }
613
1043
  else {
614
- current += s[i];
615
- i += 1;
1044
+ state = bareStates[nodeId];
1045
+ }
1046
+ inProgress.delete(nodeId);
1047
+ finalStates[nodeId] = state;
1048
+ return state;
1049
+ };
1050
+ for (const nodeId of ids) {
1051
+ getFinal(nodeId);
1052
+ }
1053
+ // Pass 4: bind each ref to the resolved final State so cross-node
1054
+ // transitions land on the right instance.
1055
+ for (const nodeId of ids) {
1056
+ if (!graph.nodes[nodeId].isHalt) {
1057
+ refs[nodeId].bind(finalStates[nodeId]);
616
1058
  }
617
1059
  }
618
- parts.push(current);
619
- return parts;
1060
+ return {
1061
+ start: finalStates[graph.initialId],
1062
+ tapeBlock,
1063
+ states: finalStates,
1064
+ };
620
1065
  }
621
- function parsePatternString(s, alphabets) {
622
- if (s === IF_OTHER_MARKER) {
623
- return null;
624
- }
625
- const alternatives = splitUnescaped(s, '|');
626
- return alternatives.map((alt) => {
627
- const cells = splitUnescaped(alt, ',');
628
- return cells.map((cell, tapeIx) => {
629
- if (cell === IF_OTHER_MARKER) {
630
- return null;
631
- }
632
- if (cell === BLANK_MARKER) {
633
- return alphabets[tapeIx]?.[0] ?? cell;
1066
+ /**
1067
+ * Returns a `Map<number, {state, transitionSymbols}>` keyed by engine
1068
+ * `GraphNode.id`, giving downstream tooling direct access to the `State`
1069
+ * instance + per-pattern Symbol references for breakpoint setup (#195).
1070
+ *
1071
+ * **Positional alignment contract.** For any `GraphTransition` whose id
1072
+ * is `${N}-${K}`, `result.get(N)!.transitionSymbols[K]` is the Symbol
1073
+ * the transition fires on (reference equality, not structural). The K-th
1074
+ * entry is the K-th key from the source State's `#symbolToDataMap` in
1075
+ * insertion order, including `ifOtherSymbol` when the user wrote one.
1076
+ * Consumers filtering the catch-all path identity-compare against the
1077
+ * engine-exported `ifOtherSymbol`.
1078
+ *
1079
+ * **Unbound-`Reference` slots.** `toGraph` increments `patternIx` even
1080
+ * when a transition's `nextState` is an unresolved `Reference` (it
1081
+ * `continue`s without pushing the GraphTransition). In that case
1082
+ * `transitionSymbols[K]` is still set to the K-th Map key, but no
1083
+ * `Graph.nodes[N].transitions` entry exists with id `${N}-${K}`. Sparse
1084
+ * on the Graph side, dense on the `transitionSymbols` side — same
1085
+ * indexing.
1086
+ *
1087
+ * **Coverage.** Map keys are the State-backed subset of `graph.nodes`:
1088
+ * regulars + bares + wrappers + the halt singleton (id `0`). Synthetic
1089
+ * halt markers (id `-frameId`) are excluded — they all reach the same
1090
+ * `haltState` object at runtime, and the named consumer
1091
+ * ([machines-demo#37](https://github.com/mellonis/machines-demo/issues/37))
1092
+ * surfaces halt-pause via a separate UI control, not via clicks on
1093
+ * halt glyphs. If a future consumer needs uniform-by-id lookup, the
1094
+ * helper can be extended additively.
1095
+ *
1096
+ * **Halt-singleton warning.** `result.get(0)!.state === haltState` — the
1097
+ * process-wide halt. Toggling `.debug` on that entry affects every
1098
+ * machine in the runtime, not just the one this map was built from.
1099
+ */
1100
+ function collectStates(initialState, tapeBlock) {
1101
+ // Anchor on toGraph's authoritative id set — it knows the canonical
1102
+ // ordering of wrapper/bare/regular emission and which nodes are
1103
+ // synthetic halt markers we have to skip. Building our own BFS would
1104
+ // duplicate that logic; reusing the Graph guarantees collectStates'
1105
+ // id keys never drift from toGraph's GraphTransition ids.
1106
+ const graph = toGraph(initialState, tapeBlock);
1107
+ // Walk the State graph to associate each State instance with its
1108
+ // engine id. The shape mirrors toGraph's Pass 1 — visit by id, branch
1109
+ // on halt / wrapper / regular — but only collects the (id → State)
1110
+ // mapping. Lighter than re-running the union-find passes; no
1111
+ // GraphNode construction.
1112
+ const stateById = new Map();
1113
+ const visited = new Set();
1114
+ const queue = [initialState];
1115
+ while (queue.length > 0) {
1116
+ const state = queue.shift();
1117
+ const internal = state[STATE_INTERNAL]();
1118
+ if (visited.has(internal.id))
1119
+ continue;
1120
+ visited.add(internal.id);
1121
+ stateById.set(internal.id, state);
1122
+ if (state.isHalt)
1123
+ continue;
1124
+ if (internal.bareState !== null && internal.overriddenHaltState !== null) {
1125
+ queue.push(internal.bareState);
1126
+ queue.push(internal.overriddenHaltState);
1127
+ continue;
1128
+ }
1129
+ for (const { nextState } of internal.symbolToDataMap.values()) {
1130
+ let target;
1131
+ try {
1132
+ target = nextState instanceof State ? nextState : nextState.ref;
634
1133
  }
635
- // Literal alphabet symbols are wrapped in single quotes by
636
- // `decodePatternDescription`strip them on the way back.
637
- if (cell.length >= 2 && cell.startsWith("'") && cell.endsWith("'")) {
638
- return cell.slice(1, -1);
1134
+ catch {
1135
+ continue; // unbound Reference skip silently, matches toGraph
639
1136
  }
640
- return cell;
641
- });
642
- });
643
- }
644
- const movementLabelToSymbol = {
645
- L: movements.left,
646
- R: movements.right,
647
- S: movements.stay,
648
- };
649
- function parseMovementLabel(label) {
650
- const m = movementLabelToSymbol[label];
651
- if (!m) {
652
- throw new Error(`unknown movement label: ${label}`);
653
- }
654
- return m;
655
- }
656
- function parseWriteSymbolLabel(label) {
657
- if (label === 'K') {
658
- return symbolCommands.keep;
659
- }
660
- if (label === 'E') {
661
- return symbolCommands.erase;
662
- }
663
- // Literal alphabet symbols are wrapped in single quotes by
664
- // `decodeWriteSymbol` — strip them on the way back.
665
- if (label.length >= 2 && label.startsWith("'") && label.endsWith("'")) {
666
- return label.slice(1, -1);
1137
+ queue.push(target);
1138
+ }
667
1139
  }
668
- return label;
669
- }
670
- function decodeWriteSymbol(symbol) {
671
- if (typeof symbol === 'symbol') {
672
- const description = symbol.description ?? '?';
673
- return symbolCommandDescriptionToLabel[description] ?? description;
1140
+ // Build the result by iterating graph.nodes — the authoritative id set
1141
+ // minus halt markers — and dispatching on node kind. The halt singleton
1142
+ // entry's `state` reads from `stateById` (the BFS visited haltState if
1143
+ // any path reached it) but falls back to the module-level singleton
1144
+ // for graphs whose only halt presence is the always-emitted sentinel.
1145
+ const result = new Map();
1146
+ for (const idStr of Object.keys(graph.nodes)) {
1147
+ const id = Number(idStr);
1148
+ const node = graph.nodes[id];
1149
+ if (node.isHaltMarker)
1150
+ continue; // synthetic; collapses to haltState at id 0
1151
+ if (node.isHalt) {
1152
+ // The real halt — always the engine-wide singleton. Prefer the
1153
+ // BFS-visited instance for identity-equality with whatever the
1154
+ // caller has; fall back to the module singleton when the BFS
1155
+ // didn't reach haltState (toGraph emits id 0 unconditionally).
1156
+ result.set(id, {
1157
+ state: stateById.get(0) ?? haltState,
1158
+ transitionSymbols: [],
1159
+ });
1160
+ continue;
1161
+ }
1162
+ if (node.isWrapper) {
1163
+ result.set(id, {
1164
+ state: stateById.get(id),
1165
+ transitionSymbols: [],
1166
+ });
1167
+ continue;
1168
+ }
1169
+ // Regular or bare State — enumerate `#symbolToDataMap.keys()` for
1170
+ // the patternIx alignment. The K-th key is the Symbol that
1171
+ // `${id}-${K}` GraphTransition fires on (positional contract).
1172
+ const state = stateById.get(id);
1173
+ const transitionSymbols = [...state[STATE_INTERNAL]().symbolToDataMap.keys()];
1174
+ result.set(id, { state, transitionSymbols });
674
1175
  }
675
- return `'${symbol}'`;
1176
+ return result;
676
1177
  }
677
- // Format converters (toMermaid / fromMermaid) live in ./graphFormats.
1178
+ // Note on the import cycle with `State.ts`: stateGraph.ts value-imports
1179
+ // `State`, `STATE_INTERNAL`, `haltState`, and `ifOtherSymbol`; State.ts
1180
+ // value-imports `toGraph` and `fromGraph` for its static-method delegates.
1181
+ // ESM resolves cycles via live bindings — both modules see each other's
1182
+ // exports as long as nothing at module-load reads a binding before its
1183
+ // source module finishes evaluating. All references here live inside
1184
+ // function bodies, so the cycle is safe.
678
1185
 
679
1186
  var __classPrivateFieldSet$1 = (undefined && undefined.__classPrivateFieldSet) || function (receiver, state, value, kind, f) {
680
1187
  if (kind === "m") throw new TypeError("Private method is not writable");
@@ -687,11 +1194,33 @@ var __classPrivateFieldGet$1 = (undefined && undefined.__classPrivateFieldGet) |
687
1194
  if (typeof state === "function" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError("Cannot read private member from an object whose class did not declare it");
688
1195
  return kind === "m" ? f : kind === "a" ? f.call(receiver) : f ? f.value : state.get(receiver);
689
1196
  };
690
- var _DebugConfig_ownerState, _DebugConfig_before, _DebugConfig_after, _a, _State_wrapperCache, _State_id, _State_name, _State_overriddenHaltState, _State_bareState, _State_symbolToDataMap, _State_debugRef;
1197
+ var _DebugConfig_ownerState, _DebugConfig_before, _DebugConfig_after, _a, _State_wrapperCache, _State_id, _State_name, _State_overriddenHaltState, _State_bareState, _State_symbolToDataMap, _State_debugRef, _State_tags;
691
1198
  const ifOtherSymbol = Symbol('other symbol');
692
1199
  // Module-private symbol used by DebugConfig setters to call State's validator
693
1200
  // without exposing the validator on the public surface.
694
1201
  const validateDebugFilter = Symbol('validateDebugFilter');
1202
+ /**
1203
+ * @internal
1204
+ *
1205
+ * Package-private accessor key for sibling modules in
1206
+ * `packages/machine/src` (e.g. `utilities/stateGraph.ts`, and the planned
1207
+ * `utilities/stateCollect.ts` for #195). Re-exported from this module so
1208
+ * sibling files can import it; intentionally NOT re-exported from the
1209
+ * package's public `index.ts`, so downstream consumers don't see it on
1210
+ * the supported surface.
1211
+ *
1212
+ * Calling `state[STATE_INTERNAL]()` returns a getter/setter view onto the
1213
+ * State's private fields. Reads are live (they close over `this`), so the
1214
+ * view stays in sync with subsequent mutations on the State. There's one
1215
+ * mutating setter on the view — `name` — used exclusively by
1216
+ * `fromGraph` to assign graph-sourced composite names (e.g. `A(target)`)
1217
+ * that the public name validator would reject; see the JSDoc on the
1218
+ * accessor itself.
1219
+ *
1220
+ * Designed in #180 with #195 in mind so its surface doesn't need to grow
1221
+ * when `collectStates` lands.
1222
+ */
1223
+ const STATE_INTERNAL = Symbol('State.internal');
695
1224
  class DebugConfig {
696
1225
  constructor(ownerState, initial) {
697
1226
  _DebugConfig_ownerState.set(this, void 0);
@@ -748,6 +1277,14 @@ class State {
748
1277
  // Note: toGraph / fromGraph deliberately do not serialize debug — debug is
749
1278
  // a runtime concern, not part of the structural graph.
750
1279
  _State_debugRef.set(this, { current: null });
1280
+ // Out-of-band tags applied to this State (#186). Tags are visualization
1281
+ // and debugger-tooling metadata — they don't affect runtime transition
1282
+ // lookup or `equivalentOn` comparisons. Stored as a Set for de-duplication;
1283
+ // exposed via the `tags` getter as a frozen array snapshot. Lives on the
1284
+ // State INSTANCE so wrappers (from `withOverriddenHaltState`) carry tags
1285
+ // independently of their bare's tag set — see the #175 sharing test in
1286
+ // State.spec.ts.
1287
+ _State_tags.set(this, new Set());
751
1288
  if (stateDefinition) {
752
1289
  const keys = Object.getOwnPropertyNames(stateDefinition);
753
1290
  if (keys.length) {
@@ -831,8 +1368,38 @@ class State {
831
1368
  }
832
1369
  __classPrivateFieldGet$1(this, _State_debugRef, "f").current = new DebugConfig(this, value);
833
1370
  }
1371
+ /**
1372
+ * Add one or more tags to this State (#186). Tags are out-of-band metadata
1373
+ * used by visualization (`toMermaid` emits `classDef`/`class` lines) and
1374
+ * debugger tooling — they don't affect runtime transition lookup,
1375
+ * `equivalentOn` comparisons, or any structural identity. Chainable.
1376
+ */
1377
+ tag(...tags) {
1378
+ for (const t of tags) {
1379
+ __classPrivateFieldGet$1(this, _State_tags, "f").add(t);
1380
+ }
1381
+ return this;
1382
+ }
1383
+ /**
1384
+ * Remove one or more tags from this State (#186). Untagging a tag the
1385
+ * State doesn't carry is a no-op. Chainable.
1386
+ */
1387
+ untag(...tags) {
1388
+ for (const t of tags) {
1389
+ __classPrivateFieldGet$1(this, _State_tags, "f").delete(t);
1390
+ }
1391
+ return this;
1392
+ }
1393
+ /**
1394
+ * Frozen snapshot of this State's current tags (#186). The returned array
1395
+ * is `Object.freeze`d — mutating it throws in strict mode (which TS-emitted
1396
+ * code uses). Order matches insertion order of the underlying Set.
1397
+ */
1398
+ get tags() {
1399
+ return Object.freeze([...__classPrivateFieldGet$1(this, _State_tags, "f")]);
1400
+ }
834
1401
  /** @internal — invoked by DebugConfig setters via module-private symbol. */
835
- [(_State_id = new WeakMap(), _State_name = new WeakMap(), _State_overriddenHaltState = new WeakMap(), _State_bareState = new WeakMap(), _State_symbolToDataMap = new WeakMap(), _State_debugRef = new WeakMap(), validateDebugFilter)](fieldName, filter) {
1402
+ [(_State_id = new WeakMap(), _State_name = new WeakMap(), _State_overriddenHaltState = new WeakMap(), _State_bareState = new WeakMap(), _State_symbolToDataMap = new WeakMap(), _State_debugRef = new WeakMap(), _State_tags = new WeakMap(), validateDebugFilter)](fieldName, filter) {
836
1403
  if (filter === undefined)
837
1404
  return;
838
1405
  // #108 part 2: `.after` on haltState has no semantic anchor — halt is
@@ -916,6 +1483,47 @@ class State {
916
1483
  innerCache.set(overriddenHaltState, new WeakRef(state));
917
1484
  return state;
918
1485
  }
1486
+ /**
1487
+ * @internal
1488
+ *
1489
+ * Package-private getter/setter view onto this State's private fields,
1490
+ * for sibling modules in `packages/machine/src` (currently `stateGraph.ts`
1491
+ * for `toGraph` / `fromGraph`, and the planned `stateCollect.ts` for
1492
+ * #195's `collectStates`).
1493
+ *
1494
+ * Read access is live — the getters close over `this`, so the view
1495
+ * stays in sync with subsequent mutations on this State. There's a
1496
+ * single mutating setter on the view, `name`, which exists to let
1497
+ * `fromGraph` assign graph-sourced composite names (e.g. `A(target)`)
1498
+ * to freshly-constructed bare States. The constructor's name validator
1499
+ * rejects parens (reserved as wrapper-composition delimiters in
1500
+ * `withOverriddenHaltState`); the setter intentionally bypasses that
1501
+ * check because the same delimiters appear in legitimate wrapper-bare
1502
+ * names round-tripped through the graph.
1503
+ *
1504
+ * Returns a fresh view object on every call — cheap enough for the
1505
+ * BFS-once-per-build callers, and avoids holding a reference object on
1506
+ * every State instance. Keep this surface tight: callers should only
1507
+ * read what they need. Adding fields here is a deliberate decision —
1508
+ * each adds to the implicit contract sibling modules can rely on.
1509
+ */
1510
+ [STATE_INTERNAL]() {
1511
+ // Aliasing `this` so the nested object-literal getters/setters below
1512
+ // can read/write the enclosing State's private fields — getters in an
1513
+ // object literal can't be arrow functions, so the standard arrow-
1514
+ // captures-`this` trick doesn't apply here.
1515
+ // eslint-disable-next-line @typescript-eslint/no-this-alias
1516
+ const self = this;
1517
+ return {
1518
+ get id() { return __classPrivateFieldGet$1(self, _State_id, "f"); },
1519
+ get name() { return __classPrivateFieldGet$1(self, _State_name, "f"); },
1520
+ set name(v) { __classPrivateFieldSet$1(self, _State_name, v, "f"); },
1521
+ get bareState() { return __classPrivateFieldGet$1(self, _State_bareState, "f"); },
1522
+ get overriddenHaltState() { return __classPrivateFieldGet$1(self, _State_overriddenHaltState, "f"); },
1523
+ get symbolToDataMap() { return __classPrivateFieldGet$1(self, _State_symbolToDataMap, "f"); },
1524
+ get tags() { return __classPrivateFieldGet$1(self, _State_tags, "f"); },
1525
+ };
1526
+ }
919
1527
  // Single-state introspection — no traversal, no tapeBlock required.
920
1528
  // Returns id, name, halt-status, override-halt target, and the list of
921
1529
  // transitions out of this state with decoded write/movement labels.
@@ -950,367 +1558,36 @@ class State {
950
1558
  transitions,
951
1559
  };
952
1560
  }
953
- // Walks the State graph and emits a `Graph` data structure. v7 callable-
954
- // subtree emit shape (#174):
955
- //
956
- // Each `withOverriddenHaltState` wrapper produces TWO graph nodes:
957
- // - A wrapper node (`isWrapper: true`, `[[composite-name]]` shape) — the
958
- // call site. No transitions of its own. `bareStateId` points to the
959
- // bare's GraphNode; `overriddenHaltStateId` points to the override
960
- // target's GraphNode.
961
- // - A bare node (`isWrapper: false`, regular shape) — the callable body.
962
- // Has the bare's transitions. Shared across all wrappers that wrap
963
- // this bare (no per-context duplication).
964
- //
965
- // Frames are computed via union-find on bare reachability: two bares whose
966
- // forward-reachable sets overlap merge into one frame. Each frame contains
967
- // its bares + body states + a single halt marker (id = `-frameId`). The
968
- // canonical `frameId` is the smallest bare-id in the component.
969
- //
970
- // Halt-bound transitions of any in-frame state are retargeted to the
971
- // frame's halt marker. The frame's `subtree -. return .-> wrapper` and
972
- // `subtree -. halt .-> s0` arrows are demand-emitted by `toMermaid` from
973
- // the frame structure; they're not stored as graph edges.
1561
+ /**
1562
+ * Walks the reachable State graph from `initialState` and returns a
1563
+ * serializable `Graph`. Thin delegate to `utilities/stateGraph.ts`'s
1564
+ * `toGraph` (extracted in #180); see that module for the BFS shape and
1565
+ * v7 callable-subtree emit semantics.
1566
+ */
974
1567
  static toGraph(initialState, tapeBlock) {
975
- const nodes = {};
976
- const alphabets = tapeBlock.alphabets.map((alphabet) => alphabet.symbols);
977
- // Pass 1: BFS-discover all reachable States; emit one GraphNode per State
978
- // (wrapper or bare/regular). Wrappers and bares are separate nodes.
979
- const visited = new Set();
980
- const queue = [initialState];
981
- const bareIds = new Set(); // ids referenced as a wrapper's bareStateId
982
- while (queue.length > 0) {
983
- const state = queue.shift();
984
- if (visited.has(__classPrivateFieldGet$1(state, _State_id, "f"))) {
985
- continue;
986
- }
987
- visited.add(__classPrivateFieldGet$1(state, _State_id, "f"));
988
- if (state.isHalt) {
989
- if (!(0 in nodes)) {
990
- nodes[0] = {
991
- id: 0,
992
- name: __classPrivateFieldGet$1(state, _State_name, "f"),
993
- isHalt: true,
994
- isHaltMarker: false,
995
- isWrapper: false,
996
- bareStateId: null,
997
- frameId: null,
998
- transitions: [],
999
- overriddenHaltStateId: null,
1000
- };
1001
- }
1002
- continue;
1003
- }
1004
- // Wrapper? Emit wrapper node + queue bare and override target.
1005
- if (__classPrivateFieldGet$1(state, _State_overriddenHaltState, "f") !== null && __classPrivateFieldGet$1(state, _State_bareState, "f") !== null) {
1006
- const bareState = __classPrivateFieldGet$1(state, _State_bareState, "f");
1007
- const overrideTarget = __classPrivateFieldGet$1(state, _State_overriddenHaltState, "f");
1008
- nodes[__classPrivateFieldGet$1(state, _State_id, "f")] = {
1009
- id: __classPrivateFieldGet$1(state, _State_id, "f"),
1010
- name: __classPrivateFieldGet$1(state, _State_name, "f"), // composite name like "A(target)"
1011
- isHalt: false,
1012
- isHaltMarker: false,
1013
- isWrapper: true,
1014
- bareStateId: __classPrivateFieldGet$1(bareState, _State_id, "f"),
1015
- frameId: null,
1016
- transitions: [],
1017
- overriddenHaltStateId: __classPrivateFieldGet$1(overrideTarget, _State_id, "f"),
1018
- };
1019
- bareIds.add(__classPrivateFieldGet$1(bareState, _State_id, "f"));
1020
- queue.push(bareState);
1021
- queue.push(overrideTarget);
1022
- continue;
1023
- }
1024
- // Regular (or bare) state — build node with transitions.
1025
- const node = {
1026
- id: __classPrivateFieldGet$1(state, _State_id, "f"),
1027
- name: __classPrivateFieldGet$1(state, _State_name, "f"),
1028
- isHalt: false,
1029
- isHaltMarker: false,
1030
- isWrapper: false,
1031
- bareStateId: null,
1032
- frameId: null,
1033
- transitions: [],
1034
- overriddenHaltStateId: null,
1035
- };
1036
- nodes[__classPrivateFieldGet$1(state, _State_id, "f")] = node;
1037
- let patternIx = 0;
1038
- for (const [sym, { command, nextState }] of __classPrivateFieldGet$1(state, _State_symbolToDataMap, "f")) {
1039
- let target;
1040
- try {
1041
- target = nextState instanceof _a ? nextState : nextState.ref;
1042
- }
1043
- catch {
1044
- patternIx += 1;
1045
- continue;
1046
- }
1047
- node.transitions.push({
1048
- pattern: decodePatternDescription(sym.description, alphabets),
1049
- command: command.tapesCommands.map((tc) => ({
1050
- symbol: decodeWriteSymbol(tc.symbol),
1051
- movement: decodeMovement(tc.movement.description),
1052
- })),
1053
- nextStateId: __classPrivateFieldGet$1(target, _State_id, "f"),
1054
- id: `${__classPrivateFieldGet$1(state, _State_id, "f")}-${patternIx}`,
1055
- });
1056
- queue.push(target);
1057
- patternIx += 1;
1058
- }
1059
- }
1060
- // Always emit real halt as a sentinel, even if no transition targets it.
1061
- // It anchors the `subtree -. halt .-> s0` frame-level arrow whenever a
1062
- // frame demand-emits one, and it's the canonical machine-halt singleton.
1063
- if (!(0 in nodes)) {
1064
- nodes[0] = {
1065
- id: 0,
1066
- name: 'halt',
1067
- isHalt: true,
1068
- isHaltMarker: false,
1069
- isWrapper: false,
1070
- bareStateId: null,
1071
- frameId: null,
1072
- transitions: [],
1073
- overriddenHaltStateId: null,
1074
- };
1075
- }
1076
- // Pass 2: For each bare, compute its forward-reachable set (following
1077
- // transitions; stopping at halt and at wrappers — both are frame
1078
- // boundaries).
1079
- const computeReach = (startId) => {
1080
- const reach = new Set();
1081
- const stack = [startId];
1082
- while (stack.length > 0) {
1083
- const id = stack.pop();
1084
- if (reach.has(id)) {
1085
- continue;
1086
- }
1087
- const node = nodes[id];
1088
- if (!node || node.isHalt || node.isWrapper) {
1089
- continue;
1090
- }
1091
- reach.add(id);
1092
- for (const t of node.transitions) {
1093
- const target = nodes[t.nextStateId];
1094
- if (!target || target.isHalt || target.isWrapper) {
1095
- continue;
1096
- }
1097
- stack.push(t.nextStateId);
1098
- }
1099
- }
1100
- return reach;
1101
- };
1102
- const reachByBare = new Map();
1103
- for (const bareId of bareIds) {
1104
- reachByBare.set(bareId, computeReach(bareId));
1105
- }
1106
- // Pass 3: Union-find on bare overlaps. Two bares merge if their reach
1107
- // sets share any state. Canonical representative = smallest bare-id in
1108
- // the component.
1109
- const ufParent = new Map();
1110
- const ufFind = (id) => {
1111
- if (!ufParent.has(id)) {
1112
- ufParent.set(id, id);
1113
- }
1114
- let root = id;
1115
- while (ufParent.get(root) !== root) {
1116
- root = ufParent.get(root);
1117
- }
1118
- // Path compression
1119
- let cur = id;
1120
- while (ufParent.get(cur) !== root) {
1121
- const next = ufParent.get(cur);
1122
- ufParent.set(cur, root);
1123
- cur = next;
1124
- }
1125
- return root;
1126
- };
1127
- const ufUnion = (a, b) => {
1128
- const ra = ufFind(a);
1129
- const rb = ufFind(b);
1130
- if (ra === rb)
1131
- return;
1132
- if (ra < rb) {
1133
- ufParent.set(rb, ra);
1134
- }
1135
- else {
1136
- ufParent.set(ra, rb);
1137
- }
1138
- };
1139
- for (const bareId of bareIds) {
1140
- ufFind(bareId);
1141
- }
1142
- // For each state, collect the bares that reach it; union all bares that
1143
- // share a state.
1144
- const stateToReachingBares = new Map();
1145
- for (const [bareId, reachSet] of reachByBare) {
1146
- for (const stateId of reachSet) {
1147
- let bares = stateToReachingBares.get(stateId);
1148
- if (!bares) {
1149
- bares = [];
1150
- stateToReachingBares.set(stateId, bares);
1151
- }
1152
- bares.push(bareId);
1153
- }
1154
- }
1155
- for (const bares of stateToReachingBares.values()) {
1156
- for (let i = 1; i < bares.length; i += 1) {
1157
- ufUnion(bares[0], bares[i]);
1158
- }
1159
- }
1160
- // Assign frameId to each in-reach state.
1161
- const frameIds = new Set();
1162
- for (const [stateId, bares] of stateToReachingBares) {
1163
- const frameId = ufFind(bares[0]);
1164
- nodes[stateId].frameId = frameId;
1165
- frameIds.add(frameId);
1166
- }
1167
- // Pass 4: Retarget halt-bound transitions for in-frame states to the
1168
- // frame's halt marker. Out-of-frame states (top-level dispatcher, override
1169
- // targets, etc.) keep their halt-bound transitions pointing at real halt.
1170
- for (const node of Object.values(nodes)) {
1171
- if (node.frameId === null) {
1172
- continue;
1173
- }
1174
- const haltMarkerId = -node.frameId;
1175
- for (const t of node.transitions) {
1176
- const target = nodes[t.nextStateId];
1177
- if (target && target.isHalt && !target.isHaltMarker) {
1178
- t.nextStateId = haltMarkerId;
1179
- }
1180
- }
1181
- }
1182
- // Pass 5: Emit one halt marker per frame.
1183
- for (const frameId of frameIds) {
1184
- const haltMarkerId = -frameId;
1185
- nodes[haltMarkerId] = {
1186
- id: haltMarkerId,
1187
- name: 'halt',
1188
- isHalt: true,
1189
- isHaltMarker: true,
1190
- isWrapper: false,
1191
- bareStateId: null,
1192
- frameId,
1193
- transitions: [],
1194
- overriddenHaltStateId: null,
1195
- };
1196
- }
1197
- return { initialId: __classPrivateFieldGet$1(initialState, _State_id, "f"), alphabets, nodes };
1198
- }
1199
- // Inverse of toGraph: rebuilds a State graph (and a fresh TapeBlock with the
1200
- // graph's alphabets) from a serialized Graph. Round-trips with toGraph in
1201
- // the sense that running the rebuilt machine on the same input gives the
1202
- // same output, but the rebuilt State instances have *new* internal IDs.
1203
- //
1204
- // Under the v7 callable-subtree model (#174), graph nodes split into:
1205
- // - Wrapper nodes (`isWrapper: true`, no transitions) — reconstructed via
1206
- // `bareStates[bareStateId].withOverriddenHaltState(finalStates[overriddenHaltStateId])`.
1207
- // - Bare/regular nodes — constructed as normal States with transitions.
1208
- // - Halt + halt-marker nodes — collapse to the singleton `haltState`.
1568
+ return toGraph(initialState, tapeBlock);
1569
+ }
1570
+ /**
1571
+ * Inverse of `toGraph`: rebuilds a State graph and a fresh TapeBlock
1572
+ * from a serialized `Graph`. Thin delegate to `utilities/stateGraph.ts`'s
1573
+ * `fromGraph` (extracted in #180); see that module for the
1574
+ * reconstruction pass shape (Reference pre-create, bare build, wrapper
1575
+ * resolution via `withOverriddenHaltState`, ref binding).
1576
+ */
1209
1577
  static fromGraph(graph) {
1210
- const alphabetObjs = graph.alphabets.map((syms) => new Alphabet(syms));
1211
- const tapeBlock = TapeBlock.fromAlphabets(alphabetObjs);
1212
- const ids = Object.keys(graph.nodes).map(Number);
1213
- // Pass 1: pre-create a Reference for each non-halt non-halt-marker node
1214
- // (both wrappers and regulars). Halt and halt-marker nodes collapse to the
1215
- // singleton `haltState` and need no ref.
1216
- const refs = {};
1217
- for (const nodeId of ids) {
1218
- const node = graph.nodes[nodeId];
1219
- if (!node.isHalt) {
1220
- refs[nodeId] = new Reference();
1221
- }
1222
- }
1223
- // Convert a parsed pattern back to the symbol key the State expects.
1224
- const patternToKey = (parsed) => {
1225
- if (parsed === null) {
1226
- return ifOtherSymbol;
1227
- }
1228
- const flat = [];
1229
- for (const row of parsed) {
1230
- for (const cell of row) {
1231
- flat.push(cell === null ? ifOtherSymbol : cell);
1232
- }
1233
- }
1234
- return tapeBlock.symbol(flat);
1235
- };
1236
- // Pass 2: build a State for each non-wrapper non-halt non-halt-marker
1237
- // node. Transitions point at refs so cycles work; haltState (and halt
1238
- // markers, which collapse to haltState) are used directly.
1239
- const bareStates = {};
1240
- for (const nodeId of ids) {
1241
- const node = graph.nodes[nodeId];
1242
- if (node.isHalt || node.isWrapper) {
1243
- continue;
1244
- }
1245
- const stateDefinition = {};
1246
- for (const t of node.transitions) {
1247
- const key = patternToKey(parsePatternString(t.pattern, graph.alphabets));
1248
- const target = graph.nodes[t.nextStateId];
1249
- const nextState = !target || target.isHalt
1250
- ? haltState
1251
- : refs[t.nextStateId];
1252
- stateDefinition[key] = {
1253
- command: t.command.map((c) => ({
1254
- symbol: parseWriteSymbolLabel(c.symbol),
1255
- movement: parseMovementLabel(c.movement),
1256
- })),
1257
- nextState,
1258
- };
1259
- }
1260
- // Graph-sourced names may contain `(` and `)` (composite wrapper names —
1261
- // although wrappers go through a separate path below, defensive
1262
- // construction here keeps the bypass uniform). Construct without a name
1263
- // and assign `#name` directly to skip user-facing name validation.
1264
- const bare = new _a(stateDefinition);
1265
- __classPrivateFieldSet$1(bare, _State_name, node.name, "f");
1266
- bareStates[nodeId] = bare;
1267
- }
1268
- // Pass 3: resolve every node to its final State (memoized + cycle-safe).
1269
- // Wrappers compose lazily via `withOverriddenHaltState` once their bare
1270
- // and override are resolved.
1271
- const finalStates = {};
1272
- const inProgress = new Set();
1273
- const getFinal = (nodeId) => {
1274
- if (finalStates[nodeId]) {
1275
- return finalStates[nodeId];
1276
- }
1277
- const node = graph.nodes[nodeId];
1278
- if (!node || node.isHalt) {
1279
- finalStates[nodeId] = haltState;
1280
- return haltState;
1281
- }
1282
- if (inProgress.has(nodeId)) {
1283
- throw new Error(`override-halt cycle at state #${nodeId}`);
1284
- }
1285
- inProgress.add(nodeId);
1286
- let state;
1287
- if (node.isWrapper) {
1288
- const bare = getFinal(node.bareStateId);
1289
- const override = getFinal(node.overriddenHaltStateId);
1290
- state = bare.withOverriddenHaltState(override);
1291
- }
1292
- else {
1293
- state = bareStates[nodeId];
1294
- }
1295
- inProgress.delete(nodeId);
1296
- finalStates[nodeId] = state;
1297
- return state;
1298
- };
1299
- for (const nodeId of ids) {
1300
- getFinal(nodeId);
1301
- }
1302
- // Pass 4: bind each ref to the resolved final State so cross-node
1303
- // transitions land on the right instance.
1304
- for (const nodeId of ids) {
1305
- if (!graph.nodes[nodeId].isHalt) {
1306
- refs[nodeId].bind(finalStates[nodeId]);
1307
- }
1308
- }
1309
- return {
1310
- start: finalStates[graph.initialId],
1311
- tapeBlock,
1312
- states: finalStates,
1313
- };
1578
+ return fromGraph(graph);
1579
+ }
1580
+ /**
1581
+ * Returns a `Map<number, {state, transitionSymbols}>` keyed by engine
1582
+ * `GraphNode.id`, exposing the live `State` instance + per-pattern
1583
+ * Symbol references for each node so downstream tooling can mutate
1584
+ * `state.debug` by numeric id and set per-pattern breakpoints by
1585
+ * `GraphTransition.id` (#195). Thin delegate to
1586
+ * `utilities/stateGraph.ts`'s `collectStates`; see that module for
1587
+ * the alignment contract, coverage rules, and halt-singleton warning.
1588
+ */
1589
+ static collectStates(initialState, tapeBlock) {
1590
+ return collectStates(initialState, tapeBlock);
1314
1591
  }
1315
1592
  }
1316
1593
  _a = State;
@@ -1333,7 +1610,7 @@ var __classPrivateFieldGet = (undefined && undefined.__classPrivateFieldGet) ||
1333
1610
  if (typeof state === "function" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError("Cannot read private member from an object whose class did not declare it");
1334
1611
  return kind === "m" ? f : kind === "a" ? f.call(receiver) : f ? f.value : state.get(receiver);
1335
1612
  };
1336
- var _TuringMachine_tapeBlock, _TuringMachine_stack;
1613
+ var _TuringMachine_tapeBlock;
1337
1614
  // True iff `filter` matches `symbol` per the DebugConfig semantics.
1338
1615
  // undefined / [] -> never; true -> always; symbol[] -> exact membership.
1339
1616
  function matchFilter(filter, symbol) {
@@ -1346,7 +1623,6 @@ function matchFilter(filter, symbol) {
1346
1623
  class TuringMachine {
1347
1624
  constructor({ tapeBlock, } = {}) {
1348
1625
  _TuringMachine_tapeBlock.set(this, void 0);
1349
- _TuringMachine_stack.set(this, []);
1350
1626
  if (!tapeBlock) {
1351
1627
  throw new Error('invalid tapeBlock');
1352
1628
  }
@@ -1380,7 +1656,14 @@ class TuringMachine {
1380
1656
  try {
1381
1657
  __classPrivateFieldGet(this, _TuringMachine_tapeBlock, "f")[lockSymbol].check(executionSymbol);
1382
1658
  __classPrivateFieldGet(this, _TuringMachine_tapeBlock, "f")[lockSymbol].lock(executionSymbol);
1383
- const stack = __classPrivateFieldGet(this, _TuringMachine_stack, "f");
1659
+ // Halt-stack is run-scoped, not machine-scoped (#196). Declaring it
1660
+ // local makes that lifetime explicit and prevents leftover entries
1661
+ // from a previous `runStepByStep` call (e.g. a build-time peek that
1662
+ // never drained the generator) from being popped during a subsequent
1663
+ // halt-bound transition. Before this change `#stack` was an instance
1664
+ // field and accumulated one extra push per call when the same machine
1665
+ // was reused.
1666
+ const stack = [];
1384
1667
  let state = initialState;
1385
1668
  if (state.overriddenHaltState) {
1386
1669
  stack.push(state.overriddenHaltState);
@@ -1455,7 +1738,7 @@ class TuringMachine {
1455
1738
  }
1456
1739
  }
1457
1740
  }
1458
- _TuringMachine_tapeBlock = new WeakMap(), _TuringMachine_stack = new WeakMap();
1741
+ _TuringMachine_tapeBlock = new WeakMap();
1459
1742
 
1460
1743
  // Format converters between a Graph (the data model produced by State.toGraph
1461
1744
  // and consumed by State.fromGraph) and external string representations.
@@ -1498,6 +1781,79 @@ function parseMermaidId(s) {
1498
1781
  function frameSubgraphId(frameId) {
1499
1782
  return `w_${frameId}`;
1500
1783
  }
1784
+ // User-controlled content (state names, tag names, alphabet symbols inside
1785
+ // edge labels) is interpolated into Mermaid label strings (`"..."` wrappers
1786
+ // on nodes, wrappers, subgraphs, and edges). Mermaid's grammar terminates
1787
+ // the string on a literal `"`, and labels render via HTML/foreignObject so
1788
+ // `<`, `>`, `&` get interpreted as markup. Statement terminators (`\n`,
1789
+ // `\r`), C0 controls (except `\t`), DEL, bidi controls, and lone UTF-16
1790
+ // surrogates are encoded as numeric entities so they can't confuse the
1791
+ // tokenizer or flip text direction silently (#194).
1792
+ //
1793
+ // Printable Unicode (Cyrillic, CJK, emoji, accented Latin, etc.) passes
1794
+ // through unchanged — a tape alphabet of Cyrillic or Brainfuck glyphs
1795
+ // stays readable in the emitted `.mmd`.
1796
+ //
1797
+ // Escape is applied at the leaf — to each user-supplied fragment BEFORE
1798
+ // it's composed into a label. Structural pieces this module emits (`<br>`
1799
+ // tag separator, ` ∪ ` bare-name join, `[`, `]`, `,`, `|`, `/`, ` → `,
1800
+ // the `callable subtree of `/`callable scope: ` prefixes) are NOT escaped;
1801
+ // only user-controlled content is. fromMermaid mirrors with
1802
+ // `unescapeMermaidLabel` on each extracted leaf AFTER structural parsing,
1803
+ // so a literal `<br>` inside a state name (encoded as `&lt;br&gt;`)
1804
+ // survives the tag-split and decodes back at the leaf.
1805
+ const MERMAID_LABEL_ESCAPE_RE = /[&"<>\n\r\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F\u202A-\u202E\u2066-\u2069\uD800-\uDFFF]/g;
1806
+ function escapeMermaidLabel(s) {
1807
+ return s.replace(MERMAID_LABEL_ESCAPE_RE, (ch) => {
1808
+ switch (ch) {
1809
+ case '&': return '&amp;';
1810
+ case '"': return '&quot;';
1811
+ case '<': return '&lt;';
1812
+ case '>': return '&gt;';
1813
+ case '\n': return '&#10;';
1814
+ case '\r': return '&#13;';
1815
+ default: return `&#${ch.charCodeAt(0)};`;
1816
+ }
1817
+ });
1818
+ }
1819
+ // Inverse of escapeMermaidLabel. Decodes the four named entities the
1820
+ // encoder emits (`&amp;`, `&quot;`, `&lt;`, `&gt;`) plus arbitrary
1821
+ // numeric entities (`&#NN;`, `&#xHH;`) — the latter to round-trip the
1822
+ // control / bidi / lone-surrogate cases from encode. Other named entities
1823
+ // pass through unchanged: fromMermaid is strict to the dialect toMermaid
1824
+ // emits, and a future-proof full HTML-entity decoder would muddle that.
1825
+ //
1826
+ // Replacement is single-pass: each `&...;` match is consumed once with
1827
+ // no re-scanning of the substitution, so nested-looking inputs like
1828
+ // `&amp;quot;` (literal `&quot;` as user text) decode to `&quot;` not `"`.
1829
+ const MERMAID_LABEL_UNESCAPE_RE = /&(?:(amp|quot|lt|gt)|#(\d+)|#x([0-9a-fA-F]+));/g;
1830
+ function unescapeMermaidLabel(s) {
1831
+ return s.replace(MERMAID_LABEL_UNESCAPE_RE, (match, named, dec, hex) => {
1832
+ switch (named) {
1833
+ case 'amp': return '&';
1834
+ case 'quot': return '"';
1835
+ case 'lt': return '<';
1836
+ case 'gt': return '>';
1837
+ default: {
1838
+ // Code units up to U+FFFF decode via fromCharCode so lone
1839
+ // surrogates we encoded by UTF-16 code unit round-trip exactly.
1840
+ // Hand-edited supplementary code points (`&#x1F600;`) use
1841
+ // fromCodePoint to produce the right surrogate pair — but only
1842
+ // when we didn't emit them ourselves, since encode runs per code
1843
+ // unit.
1844
+ if (dec !== undefined) {
1845
+ const n = Number.parseInt(dec, 10);
1846
+ return n <= 0xFFFF ? String.fromCharCode(n) : String.fromCodePoint(n);
1847
+ }
1848
+ if (hex !== undefined) {
1849
+ const n = Number.parseInt(hex, 16);
1850
+ return n <= 0xFFFF ? String.fromCharCode(n) : String.fromCodePoint(n);
1851
+ }
1852
+ return match;
1853
+ }
1854
+ }
1855
+ });
1856
+ }
1501
1857
  function toMermaid(graph) {
1502
1858
  const lines = [
1503
1859
  'flowchart TD',
@@ -1530,6 +1886,25 @@ function toMermaid(graph) {
1530
1886
  bucket.push(node);
1531
1887
  }
1532
1888
  }
1889
+ // Build the visible-label string for a node — name plus, if tagged, a
1890
+ // `<br>tag1, tag2, ...` suffix so the rendered Mermaid shows both. Tags
1891
+ // are the source of truth on the GraphNode; `<br>` is the universal
1892
+ // Mermaid line-break that works across renderers without `classDef`-
1893
+ // pseudo-element hacks (#186).
1894
+ const labelOf = (node) => {
1895
+ const name = escapeMermaidLabel(node.name);
1896
+ if (node.tags.length === 0)
1897
+ return name;
1898
+ // Per-tag escape that ALSO encodes `,` — tags are joined with `, ` and
1899
+ // split on `,` in `splitLabelTags`, so a literal comma in user tag
1900
+ // content would be mistaken for a separator on the way back. `,` isn't
1901
+ // in the base escape set because it's structural in edge labels
1902
+ // (between per-tape cells in `writes`/`moves`), where the encode pass
1903
+ // happens after composition — different context, different escape.
1904
+ const tagFragments = node.tags
1905
+ .map((t) => escapeMermaidLabel(t).replace(/,/g, '&#44;'));
1906
+ return `${name}<br>${tagFragments.join(', ')}`;
1907
+ };
1533
1908
  // 1. Emit top-level nodes (real halt, non-wrapper regulars outside any frame).
1534
1909
  for (const node of topLevelNodes) {
1535
1910
  const mid = mermaidIdFor(node.id);
@@ -1537,12 +1912,12 @@ function toMermaid(graph) {
1537
1912
  lines.push(` ${mid}(((halt)))`);
1538
1913
  }
1539
1914
  else {
1540
- lines.push(` ${mid}["${node.name}"]`);
1915
+ lines.push(` ${mid}["${labelOf(node)}"]`);
1541
1916
  }
1542
1917
  }
1543
1918
  // 2. Emit wrappers at top level.
1544
1919
  for (const wrapper of wrapperNodes) {
1545
- lines.push(` ${mermaidIdFor(wrapper.id)}[["${wrapper.name}"]]`);
1920
+ lines.push(` ${mermaidIdFor(wrapper.id)}[["${labelOf(wrapper)}"]]`);
1546
1921
  }
1547
1922
  // 3. `idle` sentinel.
1548
1923
  lines.push(' idle([idle])');
@@ -1553,19 +1928,20 @@ function toMermaid(graph) {
1553
1928
  const frameBareNames = frameBares
1554
1929
  .slice()
1555
1930
  .sort((a, b) => a.id - b.id)
1556
- .map((n) => n.name);
1931
+ .map((n) => escapeMermaidLabel(n.name));
1557
1932
  const label = frameBareNames.length > 1
1558
1933
  ? `callable scope: ${frameBareNames.join(' ∪ ')}`
1559
1934
  : `callable subtree of ${frameBareNames[0] ?? frameId}`;
1560
1935
  lines.push(` subgraph ${frameSubgraphId(frameId)}["${label}"]`);
1561
1936
  // Inner nodes — sort by id for determinism.
1562
1937
  for (const node of (nodesByFrame.get(frameId) ?? []).slice().sort((a, b) => a.id - b.id)) {
1563
- lines.push(` ${mermaidIdFor(node.id)}["${node.name}"]`);
1938
+ lines.push(` ${mermaidIdFor(node.id)}["${labelOf(node)}"]`);
1564
1939
  }
1940
+ // Every frame has a halt marker — `State.toGraph`'s frame-emit pass
1941
+ // creates one for each frame. Non-null assertion is safe; a defensive
1942
+ // null check would be dead.
1565
1943
  const haltMarker = haltMarkerByFrame.get(frameId);
1566
- if (haltMarker) {
1567
- lines.push(` ${mermaidIdFor(haltMarker.id)}(((halt)))`);
1568
- }
1944
+ lines.push(` ${mermaidIdFor(haltMarker.id)}(((halt)))`);
1569
1945
  lines.push(' end');
1570
1946
  }
1571
1947
  // 5. Enter arrow.
@@ -1618,29 +1994,31 @@ function toMermaid(graph) {
1618
1994
  for (const frameId of frameIds) {
1619
1995
  if (!haltMarkerHasIncoming.get(frameId))
1620
1996
  continue;
1621
- // Return arrow — collapsed `&` ribbon over all wrappers calling this frame.
1997
+ // Return arrow — collapsed `&` ribbon over all wrappers calling this
1998
+ // frame. Frames only exist because at least one wrapper's bareStateId
1999
+ // points to a bare in the frame, so `callingWrappers` is always
2000
+ // non-empty for any frame that reached this code path.
1622
2001
  const callingWrappers = wrapperNodes.filter((w) => {
1623
- if (w.bareStateId === null)
1624
- return false;
1625
2002
  const bare = graph.nodes[w.bareStateId];
1626
- return !!bare && bare.frameId === frameId;
2003
+ return bare.frameId === frameId;
1627
2004
  });
1628
- if (callingWrappers.length > 0) {
1629
- const targets = callingWrappers
1630
- .slice()
1631
- .sort((a, b) => a.id - b.id)
1632
- .map((w) => mermaidIdFor(w.id))
1633
- .join(' & ');
1634
- lines.push(` ${frameSubgraphId(frameId)} -. "return" .-> ${targets}`);
1635
- }
2005
+ const targets = callingWrappers
2006
+ .slice()
2007
+ .sort((a, b) => a.id - b.id)
2008
+ .map((w) => mermaidIdFor(w.id))
2009
+ .join(' & ');
2010
+ lines.push(` ${frameSubgraphId(frameId)} -. "return" .-> ${targets}`);
1636
2011
  if (hasNonWrapperEntry.get(frameId)) {
1637
2012
  lines.push(` ${frameSubgraphId(frameId)} -. "halt" .-> s0`);
1638
2013
  }
1639
2014
  }
1640
2015
  // 8. Wrapper-to-override arrows (regular solid).
2016
+ //
2017
+ // `wrapper.overriddenHaltStateId` is always non-null on wrapper nodes
2018
+ // (set by `State.toGraph` for every `isWrapper: true` node — it's the
2019
+ // wrapper's override target, which a wrapper by definition has). The
2020
+ // non-null assertion is safe; a defensive null check would be dead.
1641
2021
  for (const wrapper of wrapperNodes) {
1642
- if (wrapper.overriddenHaltStateId === null)
1643
- continue;
1644
2022
  lines.push(` ${mermaidIdFor(wrapper.id)} --> ${mermaidIdFor(wrapper.overriddenHaltStateId)}`);
1645
2023
  }
1646
2024
  // 9. Regular transitions for non-wrapper non-halt-marker non-halt nodes.
@@ -1652,18 +2030,84 @@ function toMermaid(graph) {
1652
2030
  const reads = alternatives.map((alt) => `[${alt}]`).join('|');
1653
2031
  const writes = `[${t.command.map((c) => c.symbol).join(',')}]`;
1654
2032
  const moves = `[${t.command.map((c) => c.movement).join(',')}]`;
1655
- const label = `${reads} ${writes}/${moves}`;
2033
+ // Escape the WHOLE composed label structural separators ([, ], ,,
2034
+ // |, /, ' → ') are all in our safe ASCII set and pass through
2035
+ // unchanged; only embedded user alphabet symbols inside `'...'` get
2036
+ // entity-encoded. fromMermaid unescapes the captured label as the
2037
+ // first step before structural parsing.
2038
+ const label = escapeMermaidLabel(`${reads} → ${writes}/${moves}`);
1656
2039
  lines.push(` ${mermaidIdFor(node.id)} -- "${label}" --> ${mermaidIdFor(t.nextStateId)}`);
1657
2040
  }
1658
2041
  }
2042
+ // 10. Tags (#186) — emit one `classDef tag_<name> fill:#...` per unique
2043
+ // tag across all nodes, then one `class <ids> tag_<name>` line per
2044
+ // tag listing every node that carries it (comma-joined for compact
2045
+ // emit). Tag-name → CSS-class identifier sanitization replaces any
2046
+ // char outside `[A-Za-z0-9_-]` with `_`; tag-name uniqueness in the
2047
+ // emit assumes user tags are already distinct after sanitization
2048
+ // (collisions are user error).
2049
+ emitTagAnnotations(lines, nodes);
1659
2050
  return lines.join('\n');
1660
2051
  }
2052
+ // Default Mermaid `classDef` palette — 6 visually distinct fill+stroke pairs,
2053
+ // selected by tag-name hash so multi-tag diagrams look readable out of the
2054
+ // box without user configuration. Users who want different colors can edit
2055
+ // the emitted Mermaid before rendering or override post-emit.
2056
+ const TAG_PALETTE = [
2057
+ ['#fef3c7', '#92400e'], // amber
2058
+ ['#dbeafe', '#1e40af'], // blue
2059
+ ['#dcfce7', '#166534'], // green
2060
+ ['#fce7f3', '#9d174d'], // pink
2061
+ ['#ede9fe', '#5b21b6'], // violet
2062
+ ['#fee2e2', '#991b1b'], // red
2063
+ ];
2064
+ function sanitizeTagName(tag) {
2065
+ return tag.replace(/[^A-Za-z0-9_-]/g, '_');
2066
+ }
2067
+ function tagColor(tag) {
2068
+ // Cheap deterministic hash — sum of char codes mod palette length. Stable
2069
+ // across runs; same tag name always picks the same color.
2070
+ let h = 0;
2071
+ for (let i = 0; i < tag.length; i += 1) {
2072
+ h = (h + tag.charCodeAt(i)) % TAG_PALETTE.length;
2073
+ }
2074
+ return TAG_PALETTE[h];
2075
+ }
2076
+ function emitTagAnnotations(lines, nodes) {
2077
+ // Collect nodes per tag in node-id order so output is deterministic.
2078
+ const nodesByTag = new Map();
2079
+ for (const node of nodes) {
2080
+ for (const tag of node.tags) {
2081
+ let list = nodesByTag.get(tag);
2082
+ if (!list) {
2083
+ list = [];
2084
+ nodesByTag.set(tag, list);
2085
+ }
2086
+ list.push(node.id);
2087
+ }
2088
+ }
2089
+ if (nodesByTag.size === 0)
2090
+ return;
2091
+ const sortedTags = [...nodesByTag.keys()].sort();
2092
+ for (const tag of sortedTags) {
2093
+ const sanitized = sanitizeTagName(tag);
2094
+ const [fill, stroke] = tagColor(tag);
2095
+ lines.push(` classDef tag_${sanitized} fill:${fill},stroke:${stroke}`);
2096
+ }
2097
+ for (const tag of sortedTags) {
2098
+ const sanitized = sanitizeTagName(tag);
2099
+ const ids = nodesByTag.get(tag).map((id) => mermaidIdFor(id)).join(',');
2100
+ lines.push(` class ${ids} tag_${sanitized}`);
2101
+ }
2102
+ }
1661
2103
  // Helper: identify "the bare states" that anchor a frame's name. A bare is a
1662
2104
  // node referenced as some wrapper's `bareStateId`. Body states (also in-frame
1663
2105
  // but not bare) are excluded from the frame label.
2106
+ //
2107
+ // The caller in `toMermaid` only passes non-wrapper, non-halt-marker nodes
2108
+ // (wrappers go to a separate bucket; halt markers go to `haltMarkerByFrame`).
2109
+ // No defensive `isHalt` / `isWrapper` guards needed here.
1664
2110
  function isFrameBare(node, graph) {
1665
- if (node.isWrapper || node.isHalt)
1666
- return false;
1667
2111
  for (const other of Object.values(graph.nodes)) {
1668
2112
  if (other.isWrapper && other.bareStateId === node.id) {
1669
2113
  return true;
@@ -1705,6 +2149,42 @@ const haltArrowRegex = /^w_(\d+)\s+-\.\s+"halt"\s+\.->\s+s0$/;
1705
2149
  // First capture char anchored as \S to avoid polynomial backtracking between
1706
2150
  // the preceding \s* and a permissive (.+); see CodeQL js/polynomial-redos.
1707
2151
  const alphabetsRegex = /^%%\s*alphabets:\s*(\S.*)$/;
2152
+ // Tag annotation lines (#186). Matches both `classDef tag_<sanitized>` and
2153
+ // `class <id-list> tag_<sanitized>`. ClassDef declarations are decorative
2154
+ // (palette) and discarded on parse — toMermaid will regenerate them from
2155
+ // the tag set on re-emit. `class` lines carry the actual graph-node
2156
+ // assignments; we strip the `tag_` prefix and assign each tag to each
2157
+ // listed node's `tags` array.
2158
+ //
2159
+ // Inter-token gaps are fixed at single literal spaces (matching toMermaid's
2160
+ // canonical emit) rather than `\s+`. This avoids the polynomial-ReDoS
2161
+ // pattern CodeQL flags when `\s+` surrounds a content group (see also
2162
+ // `callArrowRegex` / `returnArrowRegex` tightening in PR #182).
2163
+ const classDefTagRegex = /^classDef tag_([A-Za-z0-9_-]+) .+$/;
2164
+ const classAssignTagRegex = /^class ([sc]\d+(?:,[sc]\d+)*) tag_([A-Za-z0-9_-]+)$/;
2165
+ // Splits a node label like `"A<br>hot, sampled"` into its name and tags (#186).
2166
+ // Labels without `<br>` have no tags. Tags are comma-joined; trimmed of
2167
+ // whitespace. The `<br>` is the single source of truth for tag-name parsing —
2168
+ // `class` lines are decorative-only and not consulted here.
2169
+ //
2170
+ // Mermaid-label entities (`&lt;`, `&quot;`, etc., #194) are decoded AFTER
2171
+ // structural splitting: the `<br>` separator and `,` tag delimiter survive
2172
+ // encode unchanged, and a user state name / tag containing a literal `<br>`
2173
+ // or `,` was encoded leaf-side so it can't be confused with the structural
2174
+ // form. Decode at the leaves recovers the original characters.
2175
+ function splitLabelTags(label) {
2176
+ const brIx = label.indexOf('<br>');
2177
+ if (brIx < 0) {
2178
+ return { name: unescapeMermaidLabel(label), tags: [] };
2179
+ }
2180
+ const name = unescapeMermaidLabel(label.slice(0, brIx));
2181
+ const tagsStr = label.slice(brIx + '<br>'.length);
2182
+ const tags = tagsStr
2183
+ .split(',')
2184
+ .map((t) => unescapeMermaidLabel(t.trim()))
2185
+ .filter((t) => t.length > 0);
2186
+ return { name, tags };
2187
+ }
1708
2188
  function fromMermaid(text) {
1709
2189
  const lines = text.split('\n').map((l) => l.trim()).filter(Boolean);
1710
2190
  let alphabets = [];
@@ -1723,6 +2203,7 @@ function fromMermaid(text) {
1723
2203
  frameId: opts.frameId ?? null,
1724
2204
  transitions: [],
1725
2205
  overriddenHaltStateId: null,
2206
+ tags: opts.tags ? [...opts.tags] : [],
1726
2207
  };
1727
2208
  }
1728
2209
  else {
@@ -1738,6 +2219,12 @@ function fromMermaid(text) {
1738
2219
  nodes[id].bareStateId = opts.bareStateId;
1739
2220
  if (opts.frameId !== undefined)
1740
2221
  nodes[id].frameId = opts.frameId;
2222
+ if (opts.tags !== undefined) {
2223
+ for (const t of opts.tags) {
2224
+ if (!nodes[id].tags.includes(t))
2225
+ nodes[id].tags.push(t);
2226
+ }
2227
+ }
1741
2228
  }
1742
2229
  return nodes[id];
1743
2230
  };
@@ -1750,6 +2237,11 @@ function fromMermaid(text) {
1750
2237
  alphabets = JSON.parse(am[1]);
1751
2238
  continue;
1752
2239
  }
2240
+ // Tag annotations (#186) — classDef lines are decorative and skipped;
2241
+ // `class` lines are parsed in the edge pass since they reference nodes
2242
+ // by id and need those nodes already created in the first pass.
2243
+ if (classDefTagRegex.test(line))
2244
+ continue;
1753
2245
  const sgStart = line.match(subgraphStartRegex);
1754
2246
  if (sgStart) {
1755
2247
  currentFrameId = Number(sgStart[1]);
@@ -1775,17 +2267,21 @@ function fromMermaid(text) {
1775
2267
  }
1776
2268
  const wm = line.match(wrappedNodeRegex);
1777
2269
  if (wm) {
2270
+ const { name, tags } = splitLabelTags(wm[2]);
1778
2271
  ensureNode(parseMermaidId(wm[1]), {
1779
- name: wm[2],
2272
+ name,
1780
2273
  isWrapper: true,
2274
+ tags,
1781
2275
  });
1782
2276
  continue;
1783
2277
  }
1784
2278
  const rm = line.match(regularNodeRegex);
1785
2279
  if (rm) {
2280
+ const { name, tags } = splitLabelTags(rm[2]);
1786
2281
  ensureNode(parseMermaidId(rm[1]), {
1787
- name: rm[2],
2282
+ name,
1788
2283
  frameId: currentFrameId,
2284
+ tags,
1789
2285
  });
1790
2286
  continue;
1791
2287
  }
@@ -1802,6 +2298,19 @@ function fromMermaid(text) {
1802
2298
  if (returnArrowRegex.test(line) || haltArrowRegex.test(line)) {
1803
2299
  continue;
1804
2300
  }
2301
+ // Tag class-assignment line (#186): `class s1,s5 tag_hot` — adds
2302
+ // the tag to each listed node. Tag-name preserved as written
2303
+ // (sanitization on emit is lossy in principle; on parse we don't
2304
+ // un-sanitize, since the original could have any characters).
2305
+ const tagMatch = line.match(classAssignTagRegex);
2306
+ if (tagMatch) {
2307
+ const ids = tagMatch[1].split(',');
2308
+ const tagName = tagMatch[2];
2309
+ for (const idStr of ids) {
2310
+ ensureNode(parseMermaidId(idStr), { tags: [tagName] });
2311
+ }
2312
+ continue;
2313
+ }
1805
2314
  // `call` arrow — sets bareStateId on each source wrapper.
1806
2315
  const cm = line.match(callArrowRegex);
1807
2316
  if (cm) {
@@ -1818,7 +2327,12 @@ function fromMermaid(text) {
1818
2327
  if (wo) {
1819
2328
  const fromId = parseMermaidId(wo[1]);
1820
2329
  const toId = parseMermaidId(wo[2]);
1821
- if (nodes[fromId] && nodes[fromId].isWrapper) {
2330
+ // The wrapper-override regex only matches `sN --> sM` (unlabeled);
2331
+ // since `toMermaid` only emits this shape from wrappers, the source
2332
+ // is guaranteed to be a wrapper if `fromMermaid`'s input came from
2333
+ // `toMermaid`. `nodes[fromId]` is always populated (first pass emits
2334
+ // node declarations before any edge parsing).
2335
+ if (nodes[fromId].isWrapper) {
1822
2336
  nodes[fromId].overriddenHaltStateId = toId;
1823
2337
  continue;
1824
2338
  }
@@ -1828,7 +2342,12 @@ function fromMermaid(text) {
1828
2342
  const tm = line.match(labeledTransitionRegex);
1829
2343
  if (tm) {
1830
2344
  const fromId = parseMermaidId(tm[1]);
1831
- const label = tm[2];
2345
+ // Decode the WHOLE captured label up front (#194). Structural
2346
+ // separators (`[`, `]`, `,`, `|`, `/`, ` → `) are all safe ASCII
2347
+ // outside the escape set and pass through encode unchanged, so it's
2348
+ // safe to decode before structural parsing; only embedded alphabet
2349
+ // symbols inside `'...'` get reconstituted.
2350
+ const label = unescapeMermaidLabel(tm[2]);
1832
2351
  const toId = parseMermaidId(tm[3]);
1833
2352
  const arrowIx = label.indexOf(' → ');
1834
2353
  if (arrowIx === -1) {