@turing-machine-js/machine 7.0.0-alpha.2 → 7.0.0-alpha.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -175,6 +175,153 @@ class Reference {
175
175
  }
176
176
  _Reference_referenceBinding = new WeakMap();
177
177
 
178
+ const movementDescriptionToLabel = {
179
+ 'move caret left command': 'L',
180
+ 'move caret right command': 'R',
181
+ 'do not move carer': 'S',
182
+ };
183
+ const symbolCommandDescriptionToLabel = {
184
+ 'keep symbol command': 'K',
185
+ 'erase symbol command': 'E',
186
+ };
187
+ // Reserved characters in the encoded pattern string:
188
+ // '*' ASCII asterisk (U+002A) — per-cell ifOtherSymbol, matches any symbol
189
+ // on that tape. ASCII (not a fancier glyph like U+1F7B0) so it renders
190
+ // in every Mermaid environment and every monospace font. A literal `*`
191
+ // in the alphabet is unambiguous from the marker because it's quoted
192
+ // (`'*'`).
193
+ // 'B' the tape's blank symbol shorthand (in read patterns). A literal `B`
194
+ // in the alphabet is unambiguous from the marker because it's quoted
195
+ // (`'B'`).
196
+ // ',' separates per-tape cells inside one pattern
197
+ // '|' separates alternative patterns
198
+ // "'" surrounds a literal alphabet symbol — e.g. `'0'` for literal `0`,
199
+ // `'X'` for literal `X`. The quoting is what visually separates literal
200
+ // symbols from the convention markers `*` / `B` and from the write
201
+ // commands `K` / `E`.
202
+ // '\\' escape prefix — to represent any of '*', 'B', ',', '|', "'", or '\\'
203
+ // as a *literal* alphabet symbol *inside* the quotes (e.g. `'\''` for
204
+ // a literal apostrophe).
205
+ const IF_OTHER_MARKER = '*';
206
+ const BLANK_MARKER = 'B';
207
+ function escapeAlphabetSymbol(s) {
208
+ return s
209
+ .replace(/\\/g, '\\\\')
210
+ .replace(/'/g, "\\'");
211
+ }
212
+ function decodePatternDescription(description, alphabets) {
213
+ if (!description) {
214
+ return '?';
215
+ }
216
+ if (description === 'other symbol') {
217
+ return IF_OTHER_MARKER;
218
+ }
219
+ try {
220
+ const patternList = JSON.parse(description);
221
+ return patternList
222
+ .map((pattern) => pattern
223
+ .map((s, tapeIx) => {
224
+ if (s === null) {
225
+ return IF_OTHER_MARKER;
226
+ }
227
+ if (s === alphabets[tapeIx]?.[0]) {
228
+ return BLANK_MARKER;
229
+ }
230
+ return `'${escapeAlphabetSymbol(s)}'`;
231
+ })
232
+ .join(','))
233
+ .join('|');
234
+ }
235
+ catch {
236
+ return description;
237
+ }
238
+ }
239
+ function decodeMovement(description) {
240
+ if (!description) {
241
+ return '?';
242
+ }
243
+ return movementDescriptionToLabel[description] ?? description;
244
+ }
245
+ function splitUnescaped(s, sep) {
246
+ const parts = [];
247
+ let current = '';
248
+ let i = 0;
249
+ while (i < s.length) {
250
+ if (s[i] === '\\' && i + 1 < s.length) {
251
+ current += s[i + 1];
252
+ i += 2;
253
+ }
254
+ else if (s[i] === sep) {
255
+ parts.push(current);
256
+ current = '';
257
+ i += 1;
258
+ }
259
+ else {
260
+ current += s[i];
261
+ i += 1;
262
+ }
263
+ }
264
+ parts.push(current);
265
+ return parts;
266
+ }
267
+ function parsePatternString(s, alphabets) {
268
+ if (s === IF_OTHER_MARKER) {
269
+ return null;
270
+ }
271
+ const alternatives = splitUnescaped(s, '|');
272
+ return alternatives.map((alt) => {
273
+ const cells = splitUnescaped(alt, ',');
274
+ return cells.map((cell, tapeIx) => {
275
+ if (cell === IF_OTHER_MARKER) {
276
+ return null;
277
+ }
278
+ if (cell === BLANK_MARKER) {
279
+ return alphabets[tapeIx]?.[0] ?? cell;
280
+ }
281
+ // Literal alphabet symbols are wrapped in single quotes by
282
+ // `decodePatternDescription` — strip them on the way back.
283
+ if (cell.length >= 2 && cell.startsWith("'") && cell.endsWith("'")) {
284
+ return cell.slice(1, -1);
285
+ }
286
+ return cell;
287
+ });
288
+ });
289
+ }
290
+ const movementLabelToSymbol = {
291
+ L: movements.left,
292
+ R: movements.right,
293
+ S: movements.stay,
294
+ };
295
+ function parseMovementLabel(label) {
296
+ const m = movementLabelToSymbol[label];
297
+ if (!m) {
298
+ throw new Error(`unknown movement label: ${label}`);
299
+ }
300
+ return m;
301
+ }
302
+ function parseWriteSymbolLabel(label) {
303
+ if (label === 'K') {
304
+ return symbolCommands.keep;
305
+ }
306
+ if (label === 'E') {
307
+ return symbolCommands.erase;
308
+ }
309
+ // Literal alphabet symbols are wrapped in single quotes by
310
+ // `decodeWriteSymbol` — strip them on the way back.
311
+ if (label.length >= 2 && label.startsWith("'") && label.endsWith("'")) {
312
+ return label.slice(1, -1);
313
+ }
314
+ return label;
315
+ }
316
+ function decodeWriteSymbol(symbol) {
317
+ if (typeof symbol === 'symbol') {
318
+ const description = symbol.description ?? '?';
319
+ return symbolCommandDescriptionToLabel[description] ?? description;
320
+ }
321
+ return `'${symbol}'`;
322
+ }
323
+ // Format converters (toMermaid / fromMermaid) live in ./graphFormats.
324
+
178
325
  var __classPrivateFieldSet$4 = (undefined && undefined.__classPrivateFieldSet) || function (receiver, state, value, kind, f) {
179
326
  if (kind === "m") throw new TypeError("Private method is not writable");
180
327
  if (kind === "a" && !f) throw new TypeError("Private accessor was defined without a setter");
@@ -531,152 +678,512 @@ _TapeBlock_generateSymbolHint = { value: (patternList) => JSON.stringify(pattern
531
678
  .map((pattern) => pattern
532
679
  .map((symbol) => (symbol === ifOtherSymbol ? null : symbol)))) };
533
680
 
534
- const movementDescriptionToLabel = {
535
- 'move caret left command': 'L',
536
- 'move caret right command': 'R',
537
- 'do not move carer': 'S',
538
- };
539
- const symbolCommandDescriptionToLabel = {
540
- 'keep symbol command': 'K',
541
- 'erase symbol command': 'E',
542
- };
543
- // Reserved characters in the encoded pattern string:
544
- // '*' ASCII asterisk (U+002A) — per-cell ifOtherSymbol, matches any symbol
545
- // on that tape. ASCII (not a fancier glyph like U+1F7B0) so it renders
546
- // in every Mermaid environment and every monospace font. A literal `*`
547
- // in the alphabet is unambiguous from the marker because it's quoted
548
- // (`'*'`).
549
- // 'B' the tape's blank symbol shorthand (in read patterns). A literal `B`
550
- // in the alphabet is unambiguous from the marker because it's quoted
551
- // (`'B'`).
552
- // ',' separates per-tape cells inside one pattern
553
- // '|' separates alternative patterns
554
- // "'" surrounds a literal alphabet symbol e.g. `'0'` for literal `0`,
555
- // `'X'` for literal `X`. The quoting is what visually separates literal
556
- // symbols from the convention markers `*` / `B` and from the write
557
- // commands `K` / `E`.
558
- // '\\' escape prefix — to represent any of '*', 'B', ',', '|', "'", or '\\'
559
- // as a *literal* alphabet symbol *inside* the quotes (e.g. `'\''` for
560
- // a literal apostrophe).
561
- const IF_OTHER_MARKER = '*';
562
- const BLANK_MARKER = 'B';
563
- function escapeAlphabetSymbol(s) {
564
- return s
565
- .replace(/\\/g, '\\\\')
566
- .replace(/'/g, "\\'");
567
- }
568
- function decodePatternDescription(description, alphabets) {
569
- if (!description) {
570
- return '?';
571
- }
572
- if (description === 'other symbol') {
573
- return IF_OTHER_MARKER;
574
- }
575
- try {
576
- const patternList = JSON.parse(description);
577
- return patternList
578
- .map((pattern) => pattern
579
- .map((s, tapeIx) => {
580
- if (s === null) {
581
- return IF_OTHER_MARKER;
681
+ // Graph serialization/reconstruction for State graphs. Extracted from
682
+ // `classes/State.ts` (#180) so the State class stays focused on the runtime
683
+ // machinery (transitions, debug, halt-stack composition). Sibling-module
684
+ // private access to State's internals goes through the `STATE_INTERNAL`
685
+ // Symbol re-exported from State.ts — see the @internal JSDoc there.
686
+ //
687
+ // Public surface is preserved: `State.toGraph` and `State.fromGraph` static
688
+ // methods continue to exist as thin delegates to the functions in this
689
+ // module. New consumers (e.g. #195's planned `collectStates`) will live
690
+ // here too and share the BFS-walk shape with `toGraph`.
691
+ /**
692
+ * Walks the reachable graph from `initialState` and returns a serializable
693
+ * `Graph`. The walk is a BFS that visits each State exactly once (keyed by
694
+ * the State's internal id) and emits one `GraphNode` per State plus
695
+ * synthetic halt-marker nodes per callable-subtree frame.
696
+ *
697
+ * Round-trips losslessly with `fromGraph` in the sense that running the
698
+ * rebuilt machine on the same input produces the same output — but State
699
+ * instance identities are NOT preserved across the cycle.
700
+ *
701
+ * See `classes/State.ts` for the runtime model these graph nodes describe;
702
+ * see `utilities/graphFormats.ts` for the Mermaid-flavored serialization
703
+ * built on top of `Graph`.
704
+ */
705
+ function toGraph(initialState, tapeBlock) {
706
+ const nodes = {};
707
+ const alphabets = tapeBlock.alphabets.map((alphabet) => alphabet.symbols);
708
+ // Pass 1: BFS-discover all reachable States; emit one GraphNode per State
709
+ // (wrapper or bare/regular). Wrappers and bares are separate nodes.
710
+ const visited = new Set();
711
+ const queue = [initialState];
712
+ const bareIds = new Set(); // ids referenced as a wrapper's bareStateId
713
+ while (queue.length > 0) {
714
+ const state = queue.shift();
715
+ const stateInternal = state[STATE_INTERNAL]();
716
+ if (visited.has(stateInternal.id)) {
717
+ continue;
718
+ }
719
+ visited.add(stateInternal.id);
720
+ if (state.isHalt) {
721
+ if (!(0 in nodes)) {
722
+ nodes[0] = {
723
+ id: 0,
724
+ name: stateInternal.name,
725
+ isHalt: true,
726
+ isHaltMarker: false,
727
+ isWrapper: false,
728
+ bareStateId: null,
729
+ frameId: null,
730
+ transitions: [],
731
+ overriddenHaltStateId: null,
732
+ tags: [...stateInternal.tags],
733
+ };
582
734
  }
583
- if (s === alphabets[tapeIx]?.[0]) {
584
- return BLANK_MARKER;
735
+ continue;
736
+ }
737
+ // Wrapper? Emit wrapper node + queue bare and override target.
738
+ if (stateInternal.overriddenHaltState !== null && stateInternal.bareState !== null) {
739
+ const bareState = stateInternal.bareState;
740
+ const overrideTarget = stateInternal.overriddenHaltState;
741
+ const bareInternal = bareState[STATE_INTERNAL]();
742
+ const overrideInternal = overrideTarget[STATE_INTERNAL]();
743
+ nodes[stateInternal.id] = {
744
+ id: stateInternal.id,
745
+ name: stateInternal.name, // composite name like "A(target)"
746
+ isHalt: false,
747
+ isHaltMarker: false,
748
+ isWrapper: true,
749
+ bareStateId: bareInternal.id,
750
+ frameId: null,
751
+ transitions: [],
752
+ overriddenHaltStateId: overrideInternal.id,
753
+ tags: [...stateInternal.tags],
754
+ };
755
+ bareIds.add(bareInternal.id);
756
+ queue.push(bareState);
757
+ queue.push(overrideTarget);
758
+ continue;
759
+ }
760
+ // Regular (or bare) state — build node with transitions.
761
+ const node = {
762
+ id: stateInternal.id,
763
+ name: stateInternal.name,
764
+ isHalt: false,
765
+ isHaltMarker: false,
766
+ isWrapper: false,
767
+ bareStateId: null,
768
+ frameId: null,
769
+ transitions: [],
770
+ overriddenHaltStateId: null,
771
+ tags: [...stateInternal.tags],
772
+ };
773
+ nodes[stateInternal.id] = node;
774
+ let patternIx = 0;
775
+ for (const [sym, { command, nextState }] of stateInternal.symbolToDataMap) {
776
+ let target;
777
+ try {
778
+ target = nextState instanceof State ? nextState : nextState.ref;
585
779
  }
586
- return `'${escapeAlphabetSymbol(s)}'`;
587
- })
588
- .join(','))
589
- .join('|');
780
+ catch {
781
+ patternIx += 1;
782
+ continue;
783
+ }
784
+ const targetInternal = target[STATE_INTERNAL]();
785
+ node.transitions.push({
786
+ pattern: decodePatternDescription(sym.description, alphabets),
787
+ command: command.tapesCommands.map((tc) => ({
788
+ symbol: decodeWriteSymbol(tc.symbol),
789
+ movement: decodeMovement(tc.movement.description),
790
+ })),
791
+ nextStateId: targetInternal.id,
792
+ id: `${stateInternal.id}-${patternIx}`,
793
+ });
794
+ queue.push(target);
795
+ patternIx += 1;
796
+ }
797
+ }
798
+ // Always emit real halt as a sentinel, even if no transition targets it.
799
+ // It anchors the `subtree -. halt .-> s0` frame-level arrow whenever a
800
+ // frame demand-emits one, and it's the canonical machine-halt singleton.
801
+ if (!(0 in nodes)) {
802
+ nodes[0] = {
803
+ id: 0,
804
+ name: 'halt',
805
+ isHalt: true,
806
+ isHaltMarker: false,
807
+ isWrapper: false,
808
+ bareStateId: null,
809
+ frameId: null,
810
+ transitions: [],
811
+ overriddenHaltStateId: null,
812
+ tags: [...haltState[STATE_INTERNAL]().tags],
813
+ };
590
814
  }
591
- catch {
592
- return description;
815
+ // Pass 2: For each bare, compute its forward-reachable set (following
816
+ // transitions; stopping at halt and at wrappers — both are frame
817
+ // boundaries).
818
+ const computeReach = (startId) => {
819
+ const reach = new Set();
820
+ const stack = [startId];
821
+ while (stack.length > 0) {
822
+ const id = stack.pop();
823
+ if (reach.has(id)) {
824
+ continue;
825
+ }
826
+ const node = nodes[id];
827
+ // `nodes[id]` is always populated for `id` that the BFS reached, so
828
+ // a defensive `!node` check would be dead. `isHalt` / `isWrapper`
829
+ // are real boundaries — both stop reach-set expansion.
830
+ if (node.isHalt || node.isWrapper) {
831
+ continue;
832
+ }
833
+ reach.add(id);
834
+ for (const t of node.transitions) {
835
+ const target = nodes[t.nextStateId];
836
+ if (!target || target.isHalt || target.isWrapper) {
837
+ continue;
838
+ }
839
+ stack.push(t.nextStateId);
840
+ }
841
+ }
842
+ return reach;
843
+ };
844
+ const reachByBare = new Map();
845
+ for (const bareId of bareIds) {
846
+ reachByBare.set(bareId, computeReach(bareId));
847
+ }
848
+ // Pass 3: Union-find on bare overlaps. Two bares merge if their reach
849
+ // sets share any state. Canonical representative = smallest bare-id in
850
+ // the component.
851
+ const ufParent = new Map();
852
+ // Note: no path compression. The union policy below ("smaller id always
853
+ // becomes root") keeps the tree flat — every union targets bares[0] as
854
+ // the root, so any node's parent IS the root. Walking up never exceeds
855
+ // one step. Path compression would be dead code under this invariant.
856
+ const ufFind = (id) => {
857
+ if (!ufParent.has(id)) {
858
+ ufParent.set(id, id);
859
+ }
860
+ let root = id;
861
+ while (ufParent.get(root) !== root) {
862
+ root = ufParent.get(root);
863
+ }
864
+ return root;
865
+ };
866
+ const ufUnion = (a, b) => {
867
+ const ra = ufFind(a);
868
+ const rb = ufFind(b);
869
+ if (ra === rb)
870
+ return;
871
+ if (ra < rb) {
872
+ ufParent.set(rb, ra);
873
+ }
874
+ else {
875
+ ufParent.set(ra, rb);
876
+ }
877
+ };
878
+ for (const bareId of bareIds) {
879
+ ufFind(bareId);
880
+ }
881
+ // For each state, collect the bares that reach it; union all bares that
882
+ // share a state.
883
+ const stateToReachingBares = new Map();
884
+ for (const [bareId, reachSet] of reachByBare) {
885
+ for (const stateId of reachSet) {
886
+ let bares = stateToReachingBares.get(stateId);
887
+ if (!bares) {
888
+ bares = [];
889
+ stateToReachingBares.set(stateId, bares);
890
+ }
891
+ bares.push(bareId);
892
+ }
893
+ }
894
+ for (const bares of stateToReachingBares.values()) {
895
+ for (let i = 1; i < bares.length; i += 1) {
896
+ ufUnion(bares[0], bares[i]);
897
+ }
898
+ }
899
+ // Assign frameId to each in-reach state.
900
+ const frameIds = new Set();
901
+ for (const [stateId, bares] of stateToReachingBares) {
902
+ const frameId = ufFind(bares[0]);
903
+ nodes[stateId].frameId = frameId;
904
+ frameIds.add(frameId);
905
+ }
906
+ // Pass 4: Retarget halt-bound transitions for in-frame states to the
907
+ // frame's halt marker. Out-of-frame states (top-level dispatcher, override
908
+ // targets, etc.) keep their halt-bound transitions pointing at real halt.
909
+ for (const node of Object.values(nodes)) {
910
+ if (node.frameId === null) {
911
+ continue;
912
+ }
913
+ const haltMarkerId = -node.frameId;
914
+ for (const t of node.transitions) {
915
+ const target = nodes[t.nextStateId];
916
+ if (target && target.isHalt && !target.isHaltMarker) {
917
+ t.nextStateId = haltMarkerId;
918
+ }
919
+ }
593
920
  }
594
- }
595
- function decodeMovement(description) {
596
- if (!description) {
597
- return '?';
921
+ // Pass 5: Emit one halt marker per frame.
922
+ for (const frameId of frameIds) {
923
+ const haltMarkerId = -frameId;
924
+ nodes[haltMarkerId] = {
925
+ id: haltMarkerId,
926
+ name: 'halt',
927
+ isHalt: true,
928
+ isHaltMarker: true,
929
+ isWrapper: false,
930
+ bareStateId: null,
931
+ frameId,
932
+ transitions: [],
933
+ overriddenHaltStateId: null,
934
+ tags: [],
935
+ };
598
936
  }
599
- return movementDescriptionToLabel[description] ?? description;
937
+ return { initialId: initialState[STATE_INTERNAL]().id, alphabets, nodes };
600
938
  }
601
- function splitUnescaped(s, sep) {
602
- const parts = [];
603
- let current = '';
604
- let i = 0;
605
- while (i < s.length) {
606
- if (s[i] === '\\' && i + 1 < s.length) {
607
- current += s[i + 1];
608
- i += 2;
939
+ /**
940
+ * Inverse of `toGraph`: rebuilds a State graph (and a fresh TapeBlock with
941
+ * the graph's alphabets) from a serialized Graph. Round-trips with `toGraph`
942
+ * in the sense that running the rebuilt machine on the same input gives the
943
+ * same output, but the rebuilt State instances have *new* internal IDs.
944
+ *
945
+ * Under the v7 callable-subtree model (#174), graph nodes split into:
946
+ * - Wrapper nodes (`isWrapper: true`, no transitions) — reconstructed via
947
+ * `bareStates[bareStateId].withOverriddenHaltState(finalStates[overriddenHaltStateId])`.
948
+ * - Bare/regular nodes — constructed as normal States with transitions.
949
+ * - Halt + halt-marker nodes — collapse to the singleton `haltState`.
950
+ */
951
+ function fromGraph(graph) {
952
+ const alphabetObjs = graph.alphabets.map((syms) => new Alphabet(syms));
953
+ const tapeBlock = TapeBlock.fromAlphabets(alphabetObjs);
954
+ const ids = Object.keys(graph.nodes).map(Number);
955
+ // Pass 1: pre-create a Reference for each non-halt non-halt-marker node
956
+ // (both wrappers and regulars). Halt and halt-marker nodes collapse to the
957
+ // singleton `haltState` and need no ref.
958
+ const refs = {};
959
+ for (const nodeId of ids) {
960
+ const node = graph.nodes[nodeId];
961
+ if (!node.isHalt) {
962
+ refs[nodeId] = new Reference();
963
+ }
964
+ }
965
+ // Convert a parsed pattern back to the symbol key the State expects.
966
+ const patternToKey = (parsed) => {
967
+ if (parsed === null) {
968
+ return ifOtherSymbol;
969
+ }
970
+ const flat = [];
971
+ for (const row of parsed) {
972
+ for (const cell of row) {
973
+ flat.push(cell === null ? ifOtherSymbol : cell);
974
+ }
975
+ }
976
+ return tapeBlock.symbol(flat);
977
+ };
978
+ // Pass 2: build a State for each non-wrapper non-halt non-halt-marker
979
+ // node. Transitions point at refs so cycles work; haltState (and halt
980
+ // markers, which collapse to haltState) are used directly.
981
+ const bareStates = {};
982
+ for (const nodeId of ids) {
983
+ const node = graph.nodes[nodeId];
984
+ if (node.isHalt || node.isWrapper) {
985
+ continue;
609
986
  }
610
- else if (s[i] === sep) {
611
- parts.push(current);
612
- current = '';
613
- i += 1;
987
+ const stateDefinition = {};
988
+ for (const t of node.transitions) {
989
+ const key = patternToKey(parsePatternString(t.pattern, graph.alphabets));
990
+ const target = graph.nodes[t.nextStateId];
991
+ const nextState = !target || target.isHalt
992
+ ? haltState
993
+ : refs[t.nextStateId];
994
+ stateDefinition[key] = {
995
+ command: t.command.map((c) => ({
996
+ symbol: parseWriteSymbolLabel(c.symbol),
997
+ movement: parseMovementLabel(c.movement),
998
+ })),
999
+ nextState,
1000
+ };
1001
+ }
1002
+ // Graph-sourced names may contain `(` and `)` (composite wrapper names —
1003
+ // although wrappers go through a separate path below, defensive
1004
+ // construction here keeps the bypass uniform). Construct without a name
1005
+ // and assign `name` directly through the internal accessor's setter to
1006
+ // skip the constructor's user-facing name validation.
1007
+ const bare = new State(stateDefinition);
1008
+ bare[STATE_INTERNAL]().name = node.name;
1009
+ if (node.tags.length > 0) {
1010
+ bare.tag(...node.tags);
1011
+ }
1012
+ bareStates[nodeId] = bare;
1013
+ }
1014
+ // Pass 3: resolve every node to its final State (memoized + cycle-safe).
1015
+ // Wrappers compose lazily via `withOverriddenHaltState` once their bare
1016
+ // and override are resolved.
1017
+ const finalStates = {};
1018
+ const inProgress = new Set();
1019
+ const getFinal = (nodeId) => {
1020
+ if (finalStates[nodeId]) {
1021
+ return finalStates[nodeId];
1022
+ }
1023
+ const node = graph.nodes[nodeId];
1024
+ if (!node || node.isHalt) {
1025
+ finalStates[nodeId] = haltState;
1026
+ return haltState;
1027
+ }
1028
+ if (inProgress.has(nodeId)) {
1029
+ throw new Error(`override-halt cycle at state #${nodeId}`);
1030
+ }
1031
+ inProgress.add(nodeId);
1032
+ let state;
1033
+ if (node.isWrapper) {
1034
+ const bare = getFinal(node.bareStateId);
1035
+ const override = getFinal(node.overriddenHaltStateId);
1036
+ state = bare.withOverriddenHaltState(override);
1037
+ // Apply wrapper-scoped tags (#186). Tags don't leak across wrappers
1038
+ // sharing a bare — the wrapper instance owns its own tag set, and
1039
+ // engine #175 memoization returns the same instance for the same
1040
+ // (bare, override) pair, so this is idempotent across rebuilds.
1041
+ if (node.tags.length > 0) {
1042
+ state.tag(...node.tags);
1043
+ }
614
1044
  }
615
1045
  else {
616
- current += s[i];
617
- i += 1;
1046
+ state = bareStates[nodeId];
1047
+ }
1048
+ inProgress.delete(nodeId);
1049
+ finalStates[nodeId] = state;
1050
+ return state;
1051
+ };
1052
+ for (const nodeId of ids) {
1053
+ getFinal(nodeId);
1054
+ }
1055
+ // Pass 4: bind each ref to the resolved final State so cross-node
1056
+ // transitions land on the right instance.
1057
+ for (const nodeId of ids) {
1058
+ if (!graph.nodes[nodeId].isHalt) {
1059
+ refs[nodeId].bind(finalStates[nodeId]);
618
1060
  }
619
1061
  }
620
- parts.push(current);
621
- return parts;
1062
+ return {
1063
+ start: finalStates[graph.initialId],
1064
+ tapeBlock,
1065
+ states: finalStates,
1066
+ };
622
1067
  }
623
- function parsePatternString(s, alphabets) {
624
- if (s === IF_OTHER_MARKER) {
625
- return null;
626
- }
627
- const alternatives = splitUnescaped(s, '|');
628
- return alternatives.map((alt) => {
629
- const cells = splitUnescaped(alt, ',');
630
- return cells.map((cell, tapeIx) => {
631
- if (cell === IF_OTHER_MARKER) {
632
- return null;
633
- }
634
- if (cell === BLANK_MARKER) {
635
- return alphabets[tapeIx]?.[0] ?? cell;
1068
+ /**
1069
+ * Returns a `Map<number, {state, transitionSymbols}>` keyed by engine
1070
+ * `GraphNode.id`, giving downstream tooling direct access to the `State`
1071
+ * instance + per-pattern Symbol references for breakpoint setup (#195).
1072
+ *
1073
+ * **Positional alignment contract.** For any `GraphTransition` whose id
1074
+ * is `${N}-${K}`, `result.get(N)!.transitionSymbols[K]` is the Symbol
1075
+ * the transition fires on (reference equality, not structural). The K-th
1076
+ * entry is the K-th key from the source State's `#symbolToDataMap` in
1077
+ * insertion order, including `ifOtherSymbol` when the user wrote one.
1078
+ * Consumers filtering the catch-all path identity-compare against the
1079
+ * engine-exported `ifOtherSymbol`.
1080
+ *
1081
+ * **Unbound-`Reference` slots.** `toGraph` increments `patternIx` even
1082
+ * when a transition's `nextState` is an unresolved `Reference` (it
1083
+ * `continue`s without pushing the GraphTransition). In that case
1084
+ * `transitionSymbols[K]` is still set to the K-th Map key, but no
1085
+ * `Graph.nodes[N].transitions` entry exists with id `${N}-${K}`. Sparse
1086
+ * on the Graph side, dense on the `transitionSymbols` side — same
1087
+ * indexing.
1088
+ *
1089
+ * **Coverage.** Map keys are the State-backed subset of `graph.nodes`:
1090
+ * regulars + bares + wrappers + the halt singleton (id `0`). Synthetic
1091
+ * halt markers (id `-frameId`) are excluded — they all reach the same
1092
+ * `haltState` object at runtime, and the named consumer
1093
+ * ([machines-demo#37](https://github.com/mellonis/machines-demo/issues/37))
1094
+ * surfaces halt-pause via a separate UI control, not via clicks on
1095
+ * halt glyphs. If a future consumer needs uniform-by-id lookup, the
1096
+ * helper can be extended additively.
1097
+ *
1098
+ * **Halt-singleton warning.** `result.get(0)!.state === haltState` — the
1099
+ * process-wide halt. Toggling `.debug` on that entry affects every
1100
+ * machine in the runtime, not just the one this map was built from.
1101
+ */
1102
+ function collectStates(initialState, tapeBlock) {
1103
+ // Anchor on toGraph's authoritative id set — it knows the canonical
1104
+ // ordering of wrapper/bare/regular emission and which nodes are
1105
+ // synthetic halt markers we have to skip. Building our own BFS would
1106
+ // duplicate that logic; reusing the Graph guarantees collectStates'
1107
+ // id keys never drift from toGraph's GraphTransition ids.
1108
+ const graph = toGraph(initialState, tapeBlock);
1109
+ // Walk the State graph to associate each State instance with its
1110
+ // engine id. The shape mirrors toGraph's Pass 1 — visit by id, branch
1111
+ // on halt / wrapper / regular — but only collects the (id → State)
1112
+ // mapping. Lighter than re-running the union-find passes; no
1113
+ // GraphNode construction.
1114
+ const stateById = new Map();
1115
+ const visited = new Set();
1116
+ const queue = [initialState];
1117
+ while (queue.length > 0) {
1118
+ const state = queue.shift();
1119
+ const internal = state[STATE_INTERNAL]();
1120
+ if (visited.has(internal.id))
1121
+ continue;
1122
+ visited.add(internal.id);
1123
+ stateById.set(internal.id, state);
1124
+ if (state.isHalt)
1125
+ continue;
1126
+ if (internal.bareState !== null && internal.overriddenHaltState !== null) {
1127
+ queue.push(internal.bareState);
1128
+ queue.push(internal.overriddenHaltState);
1129
+ continue;
1130
+ }
1131
+ for (const { nextState } of internal.symbolToDataMap.values()) {
1132
+ let target;
1133
+ try {
1134
+ target = nextState instanceof State ? nextState : nextState.ref;
636
1135
  }
637
- // Literal alphabet symbols are wrapped in single quotes by
638
- // `decodePatternDescription`strip them on the way back.
639
- if (cell.length >= 2 && cell.startsWith("'") && cell.endsWith("'")) {
640
- return cell.slice(1, -1);
1136
+ catch {
1137
+ continue; // unbound Reference skip silently, matches toGraph
641
1138
  }
642
- return cell;
643
- });
644
- });
645
- }
646
- const movementLabelToSymbol = {
647
- L: movements.left,
648
- R: movements.right,
649
- S: movements.stay,
650
- };
651
- function parseMovementLabel(label) {
652
- const m = movementLabelToSymbol[label];
653
- if (!m) {
654
- throw new Error(`unknown movement label: ${label}`);
655
- }
656
- return m;
657
- }
658
- function parseWriteSymbolLabel(label) {
659
- if (label === 'K') {
660
- return symbolCommands.keep;
661
- }
662
- if (label === 'E') {
663
- return symbolCommands.erase;
664
- }
665
- // Literal alphabet symbols are wrapped in single quotes by
666
- // `decodeWriteSymbol` — strip them on the way back.
667
- if (label.length >= 2 && label.startsWith("'") && label.endsWith("'")) {
668
- return label.slice(1, -1);
1139
+ queue.push(target);
1140
+ }
669
1141
  }
670
- return label;
671
- }
672
- function decodeWriteSymbol(symbol) {
673
- if (typeof symbol === 'symbol') {
674
- const description = symbol.description ?? '?';
675
- return symbolCommandDescriptionToLabel[description] ?? description;
1142
+ // Build the result by iterating graph.nodes — the authoritative id set
1143
+ // minus halt markers — and dispatching on node kind. The halt singleton
1144
+ // entry's `state` reads from `stateById` (the BFS visited haltState if
1145
+ // any path reached it) but falls back to the module-level singleton
1146
+ // for graphs whose only halt presence is the always-emitted sentinel.
1147
+ const result = new Map();
1148
+ for (const idStr of Object.keys(graph.nodes)) {
1149
+ const id = Number(idStr);
1150
+ const node = graph.nodes[id];
1151
+ if (node.isHaltMarker)
1152
+ continue; // synthetic; collapses to haltState at id 0
1153
+ if (node.isHalt) {
1154
+ // The real halt — always the engine-wide singleton. Prefer the
1155
+ // BFS-visited instance for identity-equality with whatever the
1156
+ // caller has; fall back to the module singleton when the BFS
1157
+ // didn't reach haltState (toGraph emits id 0 unconditionally).
1158
+ result.set(id, {
1159
+ state: stateById.get(0) ?? haltState,
1160
+ transitionSymbols: [],
1161
+ });
1162
+ continue;
1163
+ }
1164
+ if (node.isWrapper) {
1165
+ result.set(id, {
1166
+ state: stateById.get(id),
1167
+ transitionSymbols: [],
1168
+ });
1169
+ continue;
1170
+ }
1171
+ // Regular or bare State — enumerate `#symbolToDataMap.keys()` for
1172
+ // the patternIx alignment. The K-th key is the Symbol that
1173
+ // `${id}-${K}` GraphTransition fires on (positional contract).
1174
+ const state = stateById.get(id);
1175
+ const transitionSymbols = [...state[STATE_INTERNAL]().symbolToDataMap.keys()];
1176
+ result.set(id, { state, transitionSymbols });
676
1177
  }
677
- return `'${symbol}'`;
1178
+ return result;
678
1179
  }
679
- // Format converters (toMermaid / fromMermaid) live in ./graphFormats.
1180
+ // Note on the import cycle with `State.ts`: stateGraph.ts value-imports
1181
+ // `State`, `STATE_INTERNAL`, `haltState`, and `ifOtherSymbol`; State.ts
1182
+ // value-imports `toGraph` and `fromGraph` for its static-method delegates.
1183
+ // ESM resolves cycles via live bindings — both modules see each other's
1184
+ // exports as long as nothing at module-load reads a binding before its
1185
+ // source module finishes evaluating. All references here live inside
1186
+ // function bodies, so the cycle is safe.
680
1187
 
681
1188
  var __classPrivateFieldSet$1 = (undefined && undefined.__classPrivateFieldSet) || function (receiver, state, value, kind, f) {
682
1189
  if (kind === "m") throw new TypeError("Private method is not writable");
@@ -689,11 +1196,33 @@ var __classPrivateFieldGet$1 = (undefined && undefined.__classPrivateFieldGet) |
689
1196
  if (typeof state === "function" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError("Cannot read private member from an object whose class did not declare it");
690
1197
  return kind === "m" ? f : kind === "a" ? f.call(receiver) : f ? f.value : state.get(receiver);
691
1198
  };
692
- var _DebugConfig_ownerState, _DebugConfig_before, _DebugConfig_after, _a, _State_wrapperCache, _State_id, _State_name, _State_overriddenHaltState, _State_bareState, _State_symbolToDataMap, _State_debugRef;
1199
+ var _DebugConfig_ownerState, _DebugConfig_before, _DebugConfig_after, _a, _State_wrapperCache, _State_id, _State_name, _State_overriddenHaltState, _State_bareState, _State_symbolToDataMap, _State_debugRef, _State_tags;
693
1200
  const ifOtherSymbol = Symbol('other symbol');
694
1201
  // Module-private symbol used by DebugConfig setters to call State's validator
695
1202
  // without exposing the validator on the public surface.
696
1203
  const validateDebugFilter = Symbol('validateDebugFilter');
1204
+ /**
1205
+ * @internal
1206
+ *
1207
+ * Package-private accessor key for sibling modules in
1208
+ * `packages/machine/src` (e.g. `utilities/stateGraph.ts`, and the planned
1209
+ * `utilities/stateCollect.ts` for #195). Re-exported from this module so
1210
+ * sibling files can import it; intentionally NOT re-exported from the
1211
+ * package's public `index.ts`, so downstream consumers don't see it on
1212
+ * the supported surface.
1213
+ *
1214
+ * Calling `state[STATE_INTERNAL]()` returns a getter/setter view onto the
1215
+ * State's private fields. Reads are live (they close over `this`), so the
1216
+ * view stays in sync with subsequent mutations on the State. There's one
1217
+ * mutating setter on the view — `name` — used exclusively by
1218
+ * `fromGraph` to assign graph-sourced composite names (e.g. `A(target)`)
1219
+ * that the public name validator would reject; see the JSDoc on the
1220
+ * accessor itself.
1221
+ *
1222
+ * Designed in #180 with #195 in mind so its surface doesn't need to grow
1223
+ * when `collectStates` lands.
1224
+ */
1225
+ const STATE_INTERNAL = Symbol('State.internal');
697
1226
  class DebugConfig {
698
1227
  constructor(ownerState, initial) {
699
1228
  _DebugConfig_ownerState.set(this, void 0);
@@ -750,6 +1279,14 @@ class State {
750
1279
  // Note: toGraph / fromGraph deliberately do not serialize debug — debug is
751
1280
  // a runtime concern, not part of the structural graph.
752
1281
  _State_debugRef.set(this, { current: null });
1282
+ // Out-of-band tags applied to this State (#186). Tags are visualization
1283
+ // and debugger-tooling metadata — they don't affect runtime transition
1284
+ // lookup or `equivalentOn` comparisons. Stored as a Set for de-duplication;
1285
+ // exposed via the `tags` getter as a frozen array snapshot. Lives on the
1286
+ // State INSTANCE so wrappers (from `withOverriddenHaltState`) carry tags
1287
+ // independently of their bare's tag set — see the #175 sharing test in
1288
+ // State.spec.ts.
1289
+ _State_tags.set(this, new Set());
753
1290
  if (stateDefinition) {
754
1291
  const keys = Object.getOwnPropertyNames(stateDefinition);
755
1292
  if (keys.length) {
@@ -833,8 +1370,38 @@ class State {
833
1370
  }
834
1371
  __classPrivateFieldGet$1(this, _State_debugRef, "f").current = new DebugConfig(this, value);
835
1372
  }
1373
+ /**
1374
+ * Add one or more tags to this State (#186). Tags are out-of-band metadata
1375
+ * used by visualization (`toMermaid` emits `classDef`/`class` lines) and
1376
+ * debugger tooling — they don't affect runtime transition lookup,
1377
+ * `equivalentOn` comparisons, or any structural identity. Chainable.
1378
+ */
1379
+ tag(...tags) {
1380
+ for (const t of tags) {
1381
+ __classPrivateFieldGet$1(this, _State_tags, "f").add(t);
1382
+ }
1383
+ return this;
1384
+ }
1385
+ /**
1386
+ * Remove one or more tags from this State (#186). Untagging a tag the
1387
+ * State doesn't carry is a no-op. Chainable.
1388
+ */
1389
+ untag(...tags) {
1390
+ for (const t of tags) {
1391
+ __classPrivateFieldGet$1(this, _State_tags, "f").delete(t);
1392
+ }
1393
+ return this;
1394
+ }
1395
+ /**
1396
+ * Frozen snapshot of this State's current tags (#186). The returned array
1397
+ * is `Object.freeze`d — mutating it throws in strict mode (which TS-emitted
1398
+ * code uses). Order matches insertion order of the underlying Set.
1399
+ */
1400
+ get tags() {
1401
+ return Object.freeze([...__classPrivateFieldGet$1(this, _State_tags, "f")]);
1402
+ }
836
1403
  /** @internal — invoked by DebugConfig setters via module-private symbol. */
837
- [(_State_id = new WeakMap(), _State_name = new WeakMap(), _State_overriddenHaltState = new WeakMap(), _State_bareState = new WeakMap(), _State_symbolToDataMap = new WeakMap(), _State_debugRef = new WeakMap(), validateDebugFilter)](fieldName, filter) {
1404
+ [(_State_id = new WeakMap(), _State_name = new WeakMap(), _State_overriddenHaltState = new WeakMap(), _State_bareState = new WeakMap(), _State_symbolToDataMap = new WeakMap(), _State_debugRef = new WeakMap(), _State_tags = new WeakMap(), validateDebugFilter)](fieldName, filter) {
838
1405
  if (filter === undefined)
839
1406
  return;
840
1407
  // #108 part 2: `.after` on haltState has no semantic anchor — halt is
@@ -918,6 +1485,47 @@ class State {
918
1485
  innerCache.set(overriddenHaltState, new WeakRef(state));
919
1486
  return state;
920
1487
  }
1488
+ /**
1489
+ * @internal
1490
+ *
1491
+ * Package-private getter/setter view onto this State's private fields,
1492
+ * for sibling modules in `packages/machine/src` (currently `stateGraph.ts`
1493
+ * for `toGraph` / `fromGraph`, and the planned `stateCollect.ts` for
1494
+ * #195's `collectStates`).
1495
+ *
1496
+ * Read access is live — the getters close over `this`, so the view
1497
+ * stays in sync with subsequent mutations on this State. There's a
1498
+ * single mutating setter on the view, `name`, which exists to let
1499
+ * `fromGraph` assign graph-sourced composite names (e.g. `A(target)`)
1500
+ * to freshly-constructed bare States. The constructor's name validator
1501
+ * rejects parens (reserved as wrapper-composition delimiters in
1502
+ * `withOverriddenHaltState`); the setter intentionally bypasses that
1503
+ * check because the same delimiters appear in legitimate wrapper-bare
1504
+ * names round-tripped through the graph.
1505
+ *
1506
+ * Returns a fresh view object on every call — cheap enough for the
1507
+ * BFS-once-per-build callers, and avoids holding a reference object on
1508
+ * every State instance. Keep this surface tight: callers should only
1509
+ * read what they need. Adding fields here is a deliberate decision —
1510
+ * each adds to the implicit contract sibling modules can rely on.
1511
+ */
1512
+ [STATE_INTERNAL]() {
1513
+ // Aliasing `this` so the nested object-literal getters/setters below
1514
+ // can read/write the enclosing State's private fields — getters in an
1515
+ // object literal can't be arrow functions, so the standard arrow-
1516
+ // captures-`this` trick doesn't apply here.
1517
+ // eslint-disable-next-line @typescript-eslint/no-this-alias
1518
+ const self = this;
1519
+ return {
1520
+ get id() { return __classPrivateFieldGet$1(self, _State_id, "f"); },
1521
+ get name() { return __classPrivateFieldGet$1(self, _State_name, "f"); },
1522
+ set name(v) { __classPrivateFieldSet$1(self, _State_name, v, "f"); },
1523
+ get bareState() { return __classPrivateFieldGet$1(self, _State_bareState, "f"); },
1524
+ get overriddenHaltState() { return __classPrivateFieldGet$1(self, _State_overriddenHaltState, "f"); },
1525
+ get symbolToDataMap() { return __classPrivateFieldGet$1(self, _State_symbolToDataMap, "f"); },
1526
+ get tags() { return __classPrivateFieldGet$1(self, _State_tags, "f"); },
1527
+ };
1528
+ }
921
1529
  // Single-state introspection — no traversal, no tapeBlock required.
922
1530
  // Returns id, name, halt-status, override-halt target, and the list of
923
1531
  // transitions out of this state with decoded write/movement labels.
@@ -952,367 +1560,36 @@ class State {
952
1560
  transitions,
953
1561
  };
954
1562
  }
955
- // Walks the State graph and emits a `Graph` data structure. v7 callable-
956
- // subtree emit shape (#174):
957
- //
958
- // Each `withOverriddenHaltState` wrapper produces TWO graph nodes:
959
- // - A wrapper node (`isWrapper: true`, `[[composite-name]]` shape) — the
960
- // call site. No transitions of its own. `bareStateId` points to the
961
- // bare's GraphNode; `overriddenHaltStateId` points to the override
962
- // target's GraphNode.
963
- // - A bare node (`isWrapper: false`, regular shape) — the callable body.
964
- // Has the bare's transitions. Shared across all wrappers that wrap
965
- // this bare (no per-context duplication).
966
- //
967
- // Frames are computed via union-find on bare reachability: two bares whose
968
- // forward-reachable sets overlap merge into one frame. Each frame contains
969
- // its bares + body states + a single halt marker (id = `-frameId`). The
970
- // canonical `frameId` is the smallest bare-id in the component.
971
- //
972
- // Halt-bound transitions of any in-frame state are retargeted to the
973
- // frame's halt marker. The frame's `subtree -. return .-> wrapper` and
974
- // `subtree -. halt .-> s0` arrows are demand-emitted by `toMermaid` from
975
- // the frame structure; they're not stored as graph edges.
1563
+ /**
1564
+ * Walks the reachable State graph from `initialState` and returns a
1565
+ * serializable `Graph`. Thin delegate to `utilities/stateGraph.ts`'s
1566
+ * `toGraph` (extracted in #180); see that module for the BFS shape and
1567
+ * v7 callable-subtree emit semantics.
1568
+ */
976
1569
  static toGraph(initialState, tapeBlock) {
977
- const nodes = {};
978
- const alphabets = tapeBlock.alphabets.map((alphabet) => alphabet.symbols);
979
- // Pass 1: BFS-discover all reachable States; emit one GraphNode per State
980
- // (wrapper or bare/regular). Wrappers and bares are separate nodes.
981
- const visited = new Set();
982
- const queue = [initialState];
983
- const bareIds = new Set(); // ids referenced as a wrapper's bareStateId
984
- while (queue.length > 0) {
985
- const state = queue.shift();
986
- if (visited.has(__classPrivateFieldGet$1(state, _State_id, "f"))) {
987
- continue;
988
- }
989
- visited.add(__classPrivateFieldGet$1(state, _State_id, "f"));
990
- if (state.isHalt) {
991
- if (!(0 in nodes)) {
992
- nodes[0] = {
993
- id: 0,
994
- name: __classPrivateFieldGet$1(state, _State_name, "f"),
995
- isHalt: true,
996
- isHaltMarker: false,
997
- isWrapper: false,
998
- bareStateId: null,
999
- frameId: null,
1000
- transitions: [],
1001
- overriddenHaltStateId: null,
1002
- };
1003
- }
1004
- continue;
1005
- }
1006
- // Wrapper? Emit wrapper node + queue bare and override target.
1007
- if (__classPrivateFieldGet$1(state, _State_overriddenHaltState, "f") !== null && __classPrivateFieldGet$1(state, _State_bareState, "f") !== null) {
1008
- const bareState = __classPrivateFieldGet$1(state, _State_bareState, "f");
1009
- const overrideTarget = __classPrivateFieldGet$1(state, _State_overriddenHaltState, "f");
1010
- nodes[__classPrivateFieldGet$1(state, _State_id, "f")] = {
1011
- id: __classPrivateFieldGet$1(state, _State_id, "f"),
1012
- name: __classPrivateFieldGet$1(state, _State_name, "f"), // composite name like "A(target)"
1013
- isHalt: false,
1014
- isHaltMarker: false,
1015
- isWrapper: true,
1016
- bareStateId: __classPrivateFieldGet$1(bareState, _State_id, "f"),
1017
- frameId: null,
1018
- transitions: [],
1019
- overriddenHaltStateId: __classPrivateFieldGet$1(overrideTarget, _State_id, "f"),
1020
- };
1021
- bareIds.add(__classPrivateFieldGet$1(bareState, _State_id, "f"));
1022
- queue.push(bareState);
1023
- queue.push(overrideTarget);
1024
- continue;
1025
- }
1026
- // Regular (or bare) state — build node with transitions.
1027
- const node = {
1028
- id: __classPrivateFieldGet$1(state, _State_id, "f"),
1029
- name: __classPrivateFieldGet$1(state, _State_name, "f"),
1030
- isHalt: false,
1031
- isHaltMarker: false,
1032
- isWrapper: false,
1033
- bareStateId: null,
1034
- frameId: null,
1035
- transitions: [],
1036
- overriddenHaltStateId: null,
1037
- };
1038
- nodes[__classPrivateFieldGet$1(state, _State_id, "f")] = node;
1039
- let patternIx = 0;
1040
- for (const [sym, { command, nextState }] of __classPrivateFieldGet$1(state, _State_symbolToDataMap, "f")) {
1041
- let target;
1042
- try {
1043
- target = nextState instanceof _a ? nextState : nextState.ref;
1044
- }
1045
- catch {
1046
- patternIx += 1;
1047
- continue;
1048
- }
1049
- node.transitions.push({
1050
- pattern: decodePatternDescription(sym.description, alphabets),
1051
- command: command.tapesCommands.map((tc) => ({
1052
- symbol: decodeWriteSymbol(tc.symbol),
1053
- movement: decodeMovement(tc.movement.description),
1054
- })),
1055
- nextStateId: __classPrivateFieldGet$1(target, _State_id, "f"),
1056
- id: `${__classPrivateFieldGet$1(state, _State_id, "f")}-${patternIx}`,
1057
- });
1058
- queue.push(target);
1059
- patternIx += 1;
1060
- }
1061
- }
1062
- // Always emit real halt as a sentinel, even if no transition targets it.
1063
- // It anchors the `subtree -. halt .-> s0` frame-level arrow whenever a
1064
- // frame demand-emits one, and it's the canonical machine-halt singleton.
1065
- if (!(0 in nodes)) {
1066
- nodes[0] = {
1067
- id: 0,
1068
- name: 'halt',
1069
- isHalt: true,
1070
- isHaltMarker: false,
1071
- isWrapper: false,
1072
- bareStateId: null,
1073
- frameId: null,
1074
- transitions: [],
1075
- overriddenHaltStateId: null,
1076
- };
1077
- }
1078
- // Pass 2: For each bare, compute its forward-reachable set (following
1079
- // transitions; stopping at halt and at wrappers — both are frame
1080
- // boundaries).
1081
- const computeReach = (startId) => {
1082
- const reach = new Set();
1083
- const stack = [startId];
1084
- while (stack.length > 0) {
1085
- const id = stack.pop();
1086
- if (reach.has(id)) {
1087
- continue;
1088
- }
1089
- const node = nodes[id];
1090
- if (!node || node.isHalt || node.isWrapper) {
1091
- continue;
1092
- }
1093
- reach.add(id);
1094
- for (const t of node.transitions) {
1095
- const target = nodes[t.nextStateId];
1096
- if (!target || target.isHalt || target.isWrapper) {
1097
- continue;
1098
- }
1099
- stack.push(t.nextStateId);
1100
- }
1101
- }
1102
- return reach;
1103
- };
1104
- const reachByBare = new Map();
1105
- for (const bareId of bareIds) {
1106
- reachByBare.set(bareId, computeReach(bareId));
1107
- }
1108
- // Pass 3: Union-find on bare overlaps. Two bares merge if their reach
1109
- // sets share any state. Canonical representative = smallest bare-id in
1110
- // the component.
1111
- const ufParent = new Map();
1112
- const ufFind = (id) => {
1113
- if (!ufParent.has(id)) {
1114
- ufParent.set(id, id);
1115
- }
1116
- let root = id;
1117
- while (ufParent.get(root) !== root) {
1118
- root = ufParent.get(root);
1119
- }
1120
- // Path compression
1121
- let cur = id;
1122
- while (ufParent.get(cur) !== root) {
1123
- const next = ufParent.get(cur);
1124
- ufParent.set(cur, root);
1125
- cur = next;
1126
- }
1127
- return root;
1128
- };
1129
- const ufUnion = (a, b) => {
1130
- const ra = ufFind(a);
1131
- const rb = ufFind(b);
1132
- if (ra === rb)
1133
- return;
1134
- if (ra < rb) {
1135
- ufParent.set(rb, ra);
1136
- }
1137
- else {
1138
- ufParent.set(ra, rb);
1139
- }
1140
- };
1141
- for (const bareId of bareIds) {
1142
- ufFind(bareId);
1143
- }
1144
- // For each state, collect the bares that reach it; union all bares that
1145
- // share a state.
1146
- const stateToReachingBares = new Map();
1147
- for (const [bareId, reachSet] of reachByBare) {
1148
- for (const stateId of reachSet) {
1149
- let bares = stateToReachingBares.get(stateId);
1150
- if (!bares) {
1151
- bares = [];
1152
- stateToReachingBares.set(stateId, bares);
1153
- }
1154
- bares.push(bareId);
1155
- }
1156
- }
1157
- for (const bares of stateToReachingBares.values()) {
1158
- for (let i = 1; i < bares.length; i += 1) {
1159
- ufUnion(bares[0], bares[i]);
1160
- }
1161
- }
1162
- // Assign frameId to each in-reach state.
1163
- const frameIds = new Set();
1164
- for (const [stateId, bares] of stateToReachingBares) {
1165
- const frameId = ufFind(bares[0]);
1166
- nodes[stateId].frameId = frameId;
1167
- frameIds.add(frameId);
1168
- }
1169
- // Pass 4: Retarget halt-bound transitions for in-frame states to the
1170
- // frame's halt marker. Out-of-frame states (top-level dispatcher, override
1171
- // targets, etc.) keep their halt-bound transitions pointing at real halt.
1172
- for (const node of Object.values(nodes)) {
1173
- if (node.frameId === null) {
1174
- continue;
1175
- }
1176
- const haltMarkerId = -node.frameId;
1177
- for (const t of node.transitions) {
1178
- const target = nodes[t.nextStateId];
1179
- if (target && target.isHalt && !target.isHaltMarker) {
1180
- t.nextStateId = haltMarkerId;
1181
- }
1182
- }
1183
- }
1184
- // Pass 5: Emit one halt marker per frame.
1185
- for (const frameId of frameIds) {
1186
- const haltMarkerId = -frameId;
1187
- nodes[haltMarkerId] = {
1188
- id: haltMarkerId,
1189
- name: 'halt',
1190
- isHalt: true,
1191
- isHaltMarker: true,
1192
- isWrapper: false,
1193
- bareStateId: null,
1194
- frameId,
1195
- transitions: [],
1196
- overriddenHaltStateId: null,
1197
- };
1198
- }
1199
- return { initialId: __classPrivateFieldGet$1(initialState, _State_id, "f"), alphabets, nodes };
1200
- }
1201
- // Inverse of toGraph: rebuilds a State graph (and a fresh TapeBlock with the
1202
- // graph's alphabets) from a serialized Graph. Round-trips with toGraph in
1203
- // the sense that running the rebuilt machine on the same input gives the
1204
- // same output, but the rebuilt State instances have *new* internal IDs.
1205
- //
1206
- // Under the v7 callable-subtree model (#174), graph nodes split into:
1207
- // - Wrapper nodes (`isWrapper: true`, no transitions) — reconstructed via
1208
- // `bareStates[bareStateId].withOverriddenHaltState(finalStates[overriddenHaltStateId])`.
1209
- // - Bare/regular nodes — constructed as normal States with transitions.
1210
- // - Halt + halt-marker nodes — collapse to the singleton `haltState`.
1570
+ return toGraph(initialState, tapeBlock);
1571
+ }
1572
+ /**
1573
+ * Inverse of `toGraph`: rebuilds a State graph and a fresh TapeBlock
1574
+ * from a serialized `Graph`. Thin delegate to `utilities/stateGraph.ts`'s
1575
+ * `fromGraph` (extracted in #180); see that module for the
1576
+ * reconstruction pass shape (Reference pre-create, bare build, wrapper
1577
+ * resolution via `withOverriddenHaltState`, ref binding).
1578
+ */
1211
1579
  static fromGraph(graph) {
1212
- const alphabetObjs = graph.alphabets.map((syms) => new Alphabet(syms));
1213
- const tapeBlock = TapeBlock.fromAlphabets(alphabetObjs);
1214
- const ids = Object.keys(graph.nodes).map(Number);
1215
- // Pass 1: pre-create a Reference for each non-halt non-halt-marker node
1216
- // (both wrappers and regulars). Halt and halt-marker nodes collapse to the
1217
- // singleton `haltState` and need no ref.
1218
- const refs = {};
1219
- for (const nodeId of ids) {
1220
- const node = graph.nodes[nodeId];
1221
- if (!node.isHalt) {
1222
- refs[nodeId] = new Reference();
1223
- }
1224
- }
1225
- // Convert a parsed pattern back to the symbol key the State expects.
1226
- const patternToKey = (parsed) => {
1227
- if (parsed === null) {
1228
- return ifOtherSymbol;
1229
- }
1230
- const flat = [];
1231
- for (const row of parsed) {
1232
- for (const cell of row) {
1233
- flat.push(cell === null ? ifOtherSymbol : cell);
1234
- }
1235
- }
1236
- return tapeBlock.symbol(flat);
1237
- };
1238
- // Pass 2: build a State for each non-wrapper non-halt non-halt-marker
1239
- // node. Transitions point at refs so cycles work; haltState (and halt
1240
- // markers, which collapse to haltState) are used directly.
1241
- const bareStates = {};
1242
- for (const nodeId of ids) {
1243
- const node = graph.nodes[nodeId];
1244
- if (node.isHalt || node.isWrapper) {
1245
- continue;
1246
- }
1247
- const stateDefinition = {};
1248
- for (const t of node.transitions) {
1249
- const key = patternToKey(parsePatternString(t.pattern, graph.alphabets));
1250
- const target = graph.nodes[t.nextStateId];
1251
- const nextState = !target || target.isHalt
1252
- ? haltState
1253
- : refs[t.nextStateId];
1254
- stateDefinition[key] = {
1255
- command: t.command.map((c) => ({
1256
- symbol: parseWriteSymbolLabel(c.symbol),
1257
- movement: parseMovementLabel(c.movement),
1258
- })),
1259
- nextState,
1260
- };
1261
- }
1262
- // Graph-sourced names may contain `(` and `)` (composite wrapper names —
1263
- // although wrappers go through a separate path below, defensive
1264
- // construction here keeps the bypass uniform). Construct without a name
1265
- // and assign `#name` directly to skip user-facing name validation.
1266
- const bare = new _a(stateDefinition);
1267
- __classPrivateFieldSet$1(bare, _State_name, node.name, "f");
1268
- bareStates[nodeId] = bare;
1269
- }
1270
- // Pass 3: resolve every node to its final State (memoized + cycle-safe).
1271
- // Wrappers compose lazily via `withOverriddenHaltState` once their bare
1272
- // and override are resolved.
1273
- const finalStates = {};
1274
- const inProgress = new Set();
1275
- const getFinal = (nodeId) => {
1276
- if (finalStates[nodeId]) {
1277
- return finalStates[nodeId];
1278
- }
1279
- const node = graph.nodes[nodeId];
1280
- if (!node || node.isHalt) {
1281
- finalStates[nodeId] = haltState;
1282
- return haltState;
1283
- }
1284
- if (inProgress.has(nodeId)) {
1285
- throw new Error(`override-halt cycle at state #${nodeId}`);
1286
- }
1287
- inProgress.add(nodeId);
1288
- let state;
1289
- if (node.isWrapper) {
1290
- const bare = getFinal(node.bareStateId);
1291
- const override = getFinal(node.overriddenHaltStateId);
1292
- state = bare.withOverriddenHaltState(override);
1293
- }
1294
- else {
1295
- state = bareStates[nodeId];
1296
- }
1297
- inProgress.delete(nodeId);
1298
- finalStates[nodeId] = state;
1299
- return state;
1300
- };
1301
- for (const nodeId of ids) {
1302
- getFinal(nodeId);
1303
- }
1304
- // Pass 4: bind each ref to the resolved final State so cross-node
1305
- // transitions land on the right instance.
1306
- for (const nodeId of ids) {
1307
- if (!graph.nodes[nodeId].isHalt) {
1308
- refs[nodeId].bind(finalStates[nodeId]);
1309
- }
1310
- }
1311
- return {
1312
- start: finalStates[graph.initialId],
1313
- tapeBlock,
1314
- states: finalStates,
1315
- };
1580
+ return fromGraph(graph);
1581
+ }
1582
+ /**
1583
+ * Returns a `Map<number, {state, transitionSymbols}>` keyed by engine
1584
+ * `GraphNode.id`, exposing the live `State` instance + per-pattern
1585
+ * Symbol references for each node so downstream tooling can mutate
1586
+ * `state.debug` by numeric id and set per-pattern breakpoints by
1587
+ * `GraphTransition.id` (#195). Thin delegate to
1588
+ * `utilities/stateGraph.ts`'s `collectStates`; see that module for
1589
+ * the alignment contract, coverage rules, and halt-singleton warning.
1590
+ */
1591
+ static collectStates(initialState, tapeBlock) {
1592
+ return collectStates(initialState, tapeBlock);
1316
1593
  }
1317
1594
  }
1318
1595
  _a = State;
@@ -1335,7 +1612,7 @@ var __classPrivateFieldGet = (undefined && undefined.__classPrivateFieldGet) ||
1335
1612
  if (typeof state === "function" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError("Cannot read private member from an object whose class did not declare it");
1336
1613
  return kind === "m" ? f : kind === "a" ? f.call(receiver) : f ? f.value : state.get(receiver);
1337
1614
  };
1338
- var _TuringMachine_tapeBlock, _TuringMachine_stack;
1615
+ var _TuringMachine_tapeBlock;
1339
1616
  // True iff `filter` matches `symbol` per the DebugConfig semantics.
1340
1617
  // undefined / [] -> never; true -> always; symbol[] -> exact membership.
1341
1618
  function matchFilter(filter, symbol) {
@@ -1348,7 +1625,6 @@ function matchFilter(filter, symbol) {
1348
1625
  class TuringMachine {
1349
1626
  constructor({ tapeBlock, } = {}) {
1350
1627
  _TuringMachine_tapeBlock.set(this, void 0);
1351
- _TuringMachine_stack.set(this, []);
1352
1628
  if (!tapeBlock) {
1353
1629
  throw new Error('invalid tapeBlock');
1354
1630
  }
@@ -1382,7 +1658,14 @@ class TuringMachine {
1382
1658
  try {
1383
1659
  __classPrivateFieldGet(this, _TuringMachine_tapeBlock, "f")[lockSymbol].check(executionSymbol);
1384
1660
  __classPrivateFieldGet(this, _TuringMachine_tapeBlock, "f")[lockSymbol].lock(executionSymbol);
1385
- const stack = __classPrivateFieldGet(this, _TuringMachine_stack, "f");
1661
+ // Halt-stack is run-scoped, not machine-scoped (#196). Declaring it
1662
+ // local makes that lifetime explicit and prevents leftover entries
1663
+ // from a previous `runStepByStep` call (e.g. a build-time peek that
1664
+ // never drained the generator) from being popped during a subsequent
1665
+ // halt-bound transition. Before this change `#stack` was an instance
1666
+ // field and accumulated one extra push per call when the same machine
1667
+ // was reused.
1668
+ const stack = [];
1386
1669
  let state = initialState;
1387
1670
  if (state.overriddenHaltState) {
1388
1671
  stack.push(state.overriddenHaltState);
@@ -1457,7 +1740,7 @@ class TuringMachine {
1457
1740
  }
1458
1741
  }
1459
1742
  }
1460
- _TuringMachine_tapeBlock = new WeakMap(), _TuringMachine_stack = new WeakMap();
1743
+ _TuringMachine_tapeBlock = new WeakMap();
1461
1744
 
1462
1745
  // Format converters between a Graph (the data model produced by State.toGraph
1463
1746
  // and consumed by State.fromGraph) and external string representations.
@@ -1500,6 +1783,79 @@ function parseMermaidId(s) {
1500
1783
  function frameSubgraphId(frameId) {
1501
1784
  return `w_${frameId}`;
1502
1785
  }
1786
+ // User-controlled content (state names, tag names, alphabet symbols inside
1787
+ // edge labels) is interpolated into Mermaid label strings (`"..."` wrappers
1788
+ // on nodes, wrappers, subgraphs, and edges). Mermaid's grammar terminates
1789
+ // the string on a literal `"`, and labels render via HTML/foreignObject so
1790
+ // `<`, `>`, `&` get interpreted as markup. Statement terminators (`\n`,
1791
+ // `\r`), C0 controls (except `\t`), DEL, bidi controls, and lone UTF-16
1792
+ // surrogates are encoded as numeric entities so they can't confuse the
1793
+ // tokenizer or flip text direction silently (#194).
1794
+ //
1795
+ // Printable Unicode (Cyrillic, CJK, emoji, accented Latin, etc.) passes
1796
+ // through unchanged — a tape alphabet of Cyrillic or Brainfuck glyphs
1797
+ // stays readable in the emitted `.mmd`.
1798
+ //
1799
+ // Escape is applied at the leaf — to each user-supplied fragment BEFORE
1800
+ // it's composed into a label. Structural pieces this module emits (`<br>`
1801
+ // tag separator, ` ∪ ` bare-name join, `[`, `]`, `,`, `|`, `/`, ` → `,
1802
+ // the `callable subtree of `/`callable scope: ` prefixes) are NOT escaped;
1803
+ // only user-controlled content is. fromMermaid mirrors with
1804
+ // `unescapeMermaidLabel` on each extracted leaf AFTER structural parsing,
1805
+ // so a literal `<br>` inside a state name (encoded as `&lt;br&gt;`)
1806
+ // survives the tag-split and decodes back at the leaf.
1807
+ const MERMAID_LABEL_ESCAPE_RE = /[&"<>\n\r\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F\u202A-\u202E\u2066-\u2069\uD800-\uDFFF]/g;
1808
+ function escapeMermaidLabel(s) {
1809
+ return s.replace(MERMAID_LABEL_ESCAPE_RE, (ch) => {
1810
+ switch (ch) {
1811
+ case '&': return '&amp;';
1812
+ case '"': return '&quot;';
1813
+ case '<': return '&lt;';
1814
+ case '>': return '&gt;';
1815
+ case '\n': return '&#10;';
1816
+ case '\r': return '&#13;';
1817
+ default: return `&#${ch.charCodeAt(0)};`;
1818
+ }
1819
+ });
1820
+ }
1821
+ // Inverse of escapeMermaidLabel. Decodes the four named entities the
1822
+ // encoder emits (`&amp;`, `&quot;`, `&lt;`, `&gt;`) plus arbitrary
1823
+ // numeric entities (`&#NN;`, `&#xHH;`) — the latter to round-trip the
1824
+ // control / bidi / lone-surrogate cases from encode. Other named entities
1825
+ // pass through unchanged: fromMermaid is strict to the dialect toMermaid
1826
+ // emits, and a future-proof full HTML-entity decoder would muddle that.
1827
+ //
1828
+ // Replacement is single-pass: each `&...;` match is consumed once with
1829
+ // no re-scanning of the substitution, so nested-looking inputs like
1830
+ // `&amp;quot;` (literal `&quot;` as user text) decode to `&quot;` not `"`.
1831
+ const MERMAID_LABEL_UNESCAPE_RE = /&(?:(amp|quot|lt|gt)|#(\d+)|#x([0-9a-fA-F]+));/g;
1832
+ function unescapeMermaidLabel(s) {
1833
+ return s.replace(MERMAID_LABEL_UNESCAPE_RE, (match, named, dec, hex) => {
1834
+ switch (named) {
1835
+ case 'amp': return '&';
1836
+ case 'quot': return '"';
1837
+ case 'lt': return '<';
1838
+ case 'gt': return '>';
1839
+ default: {
1840
+ // Code units up to U+FFFF decode via fromCharCode so lone
1841
+ // surrogates we encoded by UTF-16 code unit round-trip exactly.
1842
+ // Hand-edited supplementary code points (`&#x1F600;`) use
1843
+ // fromCodePoint to produce the right surrogate pair — but only
1844
+ // when we didn't emit them ourselves, since encode runs per code
1845
+ // unit.
1846
+ if (dec !== undefined) {
1847
+ const n = Number.parseInt(dec, 10);
1848
+ return n <= 0xFFFF ? String.fromCharCode(n) : String.fromCodePoint(n);
1849
+ }
1850
+ if (hex !== undefined) {
1851
+ const n = Number.parseInt(hex, 16);
1852
+ return n <= 0xFFFF ? String.fromCharCode(n) : String.fromCodePoint(n);
1853
+ }
1854
+ return match;
1855
+ }
1856
+ }
1857
+ });
1858
+ }
1503
1859
  function toMermaid(graph) {
1504
1860
  const lines = [
1505
1861
  'flowchart TD',
@@ -1532,6 +1888,25 @@ function toMermaid(graph) {
1532
1888
  bucket.push(node);
1533
1889
  }
1534
1890
  }
1891
+ // Build the visible-label string for a node — name plus, if tagged, a
1892
+ // `<br>tag1, tag2, ...` suffix so the rendered Mermaid shows both. Tags
1893
+ // are the source of truth on the GraphNode; `<br>` is the universal
1894
+ // Mermaid line-break that works across renderers without `classDef`-
1895
+ // pseudo-element hacks (#186).
1896
+ const labelOf = (node) => {
1897
+ const name = escapeMermaidLabel(node.name);
1898
+ if (node.tags.length === 0)
1899
+ return name;
1900
+ // Per-tag escape that ALSO encodes `,` — tags are joined with `, ` and
1901
+ // split on `,` in `splitLabelTags`, so a literal comma in user tag
1902
+ // content would be mistaken for a separator on the way back. `,` isn't
1903
+ // in the base escape set because it's structural in edge labels
1904
+ // (between per-tape cells in `writes`/`moves`), where the encode pass
1905
+ // happens after composition — different context, different escape.
1906
+ const tagFragments = node.tags
1907
+ .map((t) => escapeMermaidLabel(t).replace(/,/g, '&#44;'));
1908
+ return `${name}<br>${tagFragments.join(', ')}`;
1909
+ };
1535
1910
  // 1. Emit top-level nodes (real halt, non-wrapper regulars outside any frame).
1536
1911
  for (const node of topLevelNodes) {
1537
1912
  const mid = mermaidIdFor(node.id);
@@ -1539,12 +1914,12 @@ function toMermaid(graph) {
1539
1914
  lines.push(` ${mid}(((halt)))`);
1540
1915
  }
1541
1916
  else {
1542
- lines.push(` ${mid}["${node.name}"]`);
1917
+ lines.push(` ${mid}["${labelOf(node)}"]`);
1543
1918
  }
1544
1919
  }
1545
1920
  // 2. Emit wrappers at top level.
1546
1921
  for (const wrapper of wrapperNodes) {
1547
- lines.push(` ${mermaidIdFor(wrapper.id)}[["${wrapper.name}"]]`);
1922
+ lines.push(` ${mermaidIdFor(wrapper.id)}[["${labelOf(wrapper)}"]]`);
1548
1923
  }
1549
1924
  // 3. `idle` sentinel.
1550
1925
  lines.push(' idle([idle])');
@@ -1555,19 +1930,20 @@ function toMermaid(graph) {
1555
1930
  const frameBareNames = frameBares
1556
1931
  .slice()
1557
1932
  .sort((a, b) => a.id - b.id)
1558
- .map((n) => n.name);
1933
+ .map((n) => escapeMermaidLabel(n.name));
1559
1934
  const label = frameBareNames.length > 1
1560
1935
  ? `callable scope: ${frameBareNames.join(' ∪ ')}`
1561
1936
  : `callable subtree of ${frameBareNames[0] ?? frameId}`;
1562
1937
  lines.push(` subgraph ${frameSubgraphId(frameId)}["${label}"]`);
1563
1938
  // Inner nodes — sort by id for determinism.
1564
1939
  for (const node of (nodesByFrame.get(frameId) ?? []).slice().sort((a, b) => a.id - b.id)) {
1565
- lines.push(` ${mermaidIdFor(node.id)}["${node.name}"]`);
1940
+ lines.push(` ${mermaidIdFor(node.id)}["${labelOf(node)}"]`);
1566
1941
  }
1942
+ // Every frame has a halt marker — `State.toGraph`'s frame-emit pass
1943
+ // creates one for each frame. Non-null assertion is safe; a defensive
1944
+ // null check would be dead.
1567
1945
  const haltMarker = haltMarkerByFrame.get(frameId);
1568
- if (haltMarker) {
1569
- lines.push(` ${mermaidIdFor(haltMarker.id)}(((halt)))`);
1570
- }
1946
+ lines.push(` ${mermaidIdFor(haltMarker.id)}(((halt)))`);
1571
1947
  lines.push(' end');
1572
1948
  }
1573
1949
  // 5. Enter arrow.
@@ -1620,29 +1996,31 @@ function toMermaid(graph) {
1620
1996
  for (const frameId of frameIds) {
1621
1997
  if (!haltMarkerHasIncoming.get(frameId))
1622
1998
  continue;
1623
- // Return arrow — collapsed `&` ribbon over all wrappers calling this frame.
1999
+ // Return arrow — collapsed `&` ribbon over all wrappers calling this
2000
+ // frame. Frames only exist because at least one wrapper's bareStateId
2001
+ // points to a bare in the frame, so `callingWrappers` is always
2002
+ // non-empty for any frame that reached this code path.
1624
2003
  const callingWrappers = wrapperNodes.filter((w) => {
1625
- if (w.bareStateId === null)
1626
- return false;
1627
2004
  const bare = graph.nodes[w.bareStateId];
1628
- return !!bare && bare.frameId === frameId;
2005
+ return bare.frameId === frameId;
1629
2006
  });
1630
- if (callingWrappers.length > 0) {
1631
- const targets = callingWrappers
1632
- .slice()
1633
- .sort((a, b) => a.id - b.id)
1634
- .map((w) => mermaidIdFor(w.id))
1635
- .join(' & ');
1636
- lines.push(` ${frameSubgraphId(frameId)} -. "return" .-> ${targets}`);
1637
- }
2007
+ const targets = callingWrappers
2008
+ .slice()
2009
+ .sort((a, b) => a.id - b.id)
2010
+ .map((w) => mermaidIdFor(w.id))
2011
+ .join(' & ');
2012
+ lines.push(` ${frameSubgraphId(frameId)} -. "return" .-> ${targets}`);
1638
2013
  if (hasNonWrapperEntry.get(frameId)) {
1639
2014
  lines.push(` ${frameSubgraphId(frameId)} -. "halt" .-> s0`);
1640
2015
  }
1641
2016
  }
1642
2017
  // 8. Wrapper-to-override arrows (regular solid).
2018
+ //
2019
+ // `wrapper.overriddenHaltStateId` is always non-null on wrapper nodes
2020
+ // (set by `State.toGraph` for every `isWrapper: true` node — it's the
2021
+ // wrapper's override target, which a wrapper by definition has). The
2022
+ // non-null assertion is safe; a defensive null check would be dead.
1643
2023
  for (const wrapper of wrapperNodes) {
1644
- if (wrapper.overriddenHaltStateId === null)
1645
- continue;
1646
2024
  lines.push(` ${mermaidIdFor(wrapper.id)} --> ${mermaidIdFor(wrapper.overriddenHaltStateId)}`);
1647
2025
  }
1648
2026
  // 9. Regular transitions for non-wrapper non-halt-marker non-halt nodes.
@@ -1654,18 +2032,84 @@ function toMermaid(graph) {
1654
2032
  const reads = alternatives.map((alt) => `[${alt}]`).join('|');
1655
2033
  const writes = `[${t.command.map((c) => c.symbol).join(',')}]`;
1656
2034
  const moves = `[${t.command.map((c) => c.movement).join(',')}]`;
1657
- const label = `${reads} ${writes}/${moves}`;
2035
+ // Escape the WHOLE composed label structural separators ([, ], ,,
2036
+ // |, /, ' → ') are all in our safe ASCII set and pass through
2037
+ // unchanged; only embedded user alphabet symbols inside `'...'` get
2038
+ // entity-encoded. fromMermaid unescapes the captured label as the
2039
+ // first step before structural parsing.
2040
+ const label = escapeMermaidLabel(`${reads} → ${writes}/${moves}`);
1658
2041
  lines.push(` ${mermaidIdFor(node.id)} -- "${label}" --> ${mermaidIdFor(t.nextStateId)}`);
1659
2042
  }
1660
2043
  }
2044
+ // 10. Tags (#186) — emit one `classDef tag_<name> fill:#...` per unique
2045
+ // tag across all nodes, then one `class <ids> tag_<name>` line per
2046
+ // tag listing every node that carries it (comma-joined for compact
2047
+ // emit). Tag-name → CSS-class identifier sanitization replaces any
2048
+ // char outside `[A-Za-z0-9_-]` with `_`; tag-name uniqueness in the
2049
+ // emit assumes user tags are already distinct after sanitization
2050
+ // (collisions are user error).
2051
+ emitTagAnnotations(lines, nodes);
1661
2052
  return lines.join('\n');
1662
2053
  }
2054
+ // Default Mermaid `classDef` palette — 6 visually distinct fill+stroke pairs,
2055
+ // selected by tag-name hash so multi-tag diagrams look readable out of the
2056
+ // box without user configuration. Users who want different colors can edit
2057
+ // the emitted Mermaid before rendering or override post-emit.
2058
+ const TAG_PALETTE = [
2059
+ ['#fef3c7', '#92400e'], // amber
2060
+ ['#dbeafe', '#1e40af'], // blue
2061
+ ['#dcfce7', '#166534'], // green
2062
+ ['#fce7f3', '#9d174d'], // pink
2063
+ ['#ede9fe', '#5b21b6'], // violet
2064
+ ['#fee2e2', '#991b1b'], // red
2065
+ ];
2066
+ function sanitizeTagName(tag) {
2067
+ return tag.replace(/[^A-Za-z0-9_-]/g, '_');
2068
+ }
2069
+ function tagColor(tag) {
2070
+ // Cheap deterministic hash — sum of char codes mod palette length. Stable
2071
+ // across runs; same tag name always picks the same color.
2072
+ let h = 0;
2073
+ for (let i = 0; i < tag.length; i += 1) {
2074
+ h = (h + tag.charCodeAt(i)) % TAG_PALETTE.length;
2075
+ }
2076
+ return TAG_PALETTE[h];
2077
+ }
2078
+ function emitTagAnnotations(lines, nodes) {
2079
+ // Collect nodes per tag in node-id order so output is deterministic.
2080
+ const nodesByTag = new Map();
2081
+ for (const node of nodes) {
2082
+ for (const tag of node.tags) {
2083
+ let list = nodesByTag.get(tag);
2084
+ if (!list) {
2085
+ list = [];
2086
+ nodesByTag.set(tag, list);
2087
+ }
2088
+ list.push(node.id);
2089
+ }
2090
+ }
2091
+ if (nodesByTag.size === 0)
2092
+ return;
2093
+ const sortedTags = [...nodesByTag.keys()].sort();
2094
+ for (const tag of sortedTags) {
2095
+ const sanitized = sanitizeTagName(tag);
2096
+ const [fill, stroke] = tagColor(tag);
2097
+ lines.push(` classDef tag_${sanitized} fill:${fill},stroke:${stroke}`);
2098
+ }
2099
+ for (const tag of sortedTags) {
2100
+ const sanitized = sanitizeTagName(tag);
2101
+ const ids = nodesByTag.get(tag).map((id) => mermaidIdFor(id)).join(',');
2102
+ lines.push(` class ${ids} tag_${sanitized}`);
2103
+ }
2104
+ }
1663
2105
  // Helper: identify "the bare states" that anchor a frame's name. A bare is a
1664
2106
  // node referenced as some wrapper's `bareStateId`. Body states (also in-frame
1665
2107
  // but not bare) are excluded from the frame label.
2108
+ //
2109
+ // The caller in `toMermaid` only passes non-wrapper, non-halt-marker nodes
2110
+ // (wrappers go to a separate bucket; halt markers go to `haltMarkerByFrame`).
2111
+ // No defensive `isHalt` / `isWrapper` guards needed here.
1666
2112
  function isFrameBare(node, graph) {
1667
- if (node.isWrapper || node.isHalt)
1668
- return false;
1669
2113
  for (const other of Object.values(graph.nodes)) {
1670
2114
  if (other.isWrapper && other.bareStateId === node.id) {
1671
2115
  return true;
@@ -1707,6 +2151,42 @@ const haltArrowRegex = /^w_(\d+)\s+-\.\s+"halt"\s+\.->\s+s0$/;
1707
2151
  // First capture char anchored as \S to avoid polynomial backtracking between
1708
2152
  // the preceding \s* and a permissive (.+); see CodeQL js/polynomial-redos.
1709
2153
  const alphabetsRegex = /^%%\s*alphabets:\s*(\S.*)$/;
2154
+ // Tag annotation lines (#186). Matches both `classDef tag_<sanitized>` and
2155
+ // `class <id-list> tag_<sanitized>`. ClassDef declarations are decorative
2156
+ // (palette) and discarded on parse — toMermaid will regenerate them from
2157
+ // the tag set on re-emit. `class` lines carry the actual graph-node
2158
+ // assignments; we strip the `tag_` prefix and assign each tag to each
2159
+ // listed node's `tags` array.
2160
+ //
2161
+ // Inter-token gaps are fixed at single literal spaces (matching toMermaid's
2162
+ // canonical emit) rather than `\s+`. This avoids the polynomial-ReDoS
2163
+ // pattern CodeQL flags when `\s+` surrounds a content group (see also
2164
+ // `callArrowRegex` / `returnArrowRegex` tightening in PR #182).
2165
+ const classDefTagRegex = /^classDef tag_([A-Za-z0-9_-]+) .+$/;
2166
+ const classAssignTagRegex = /^class ([sc]\d+(?:,[sc]\d+)*) tag_([A-Za-z0-9_-]+)$/;
2167
+ // Splits a node label like `"A<br>hot, sampled"` into its name and tags (#186).
2168
+ // Labels without `<br>` have no tags. Tags are comma-joined; trimmed of
2169
+ // whitespace. The `<br>` is the single source of truth for tag-name parsing —
2170
+ // `class` lines are decorative-only and not consulted here.
2171
+ //
2172
+ // Mermaid-label entities (`&lt;`, `&quot;`, etc., #194) are decoded AFTER
2173
+ // structural splitting: the `<br>` separator and `,` tag delimiter survive
2174
+ // encode unchanged, and a user state name / tag containing a literal `<br>`
2175
+ // or `,` was encoded leaf-side so it can't be confused with the structural
2176
+ // form. Decode at the leaves recovers the original characters.
2177
+ function splitLabelTags(label) {
2178
+ const brIx = label.indexOf('<br>');
2179
+ if (brIx < 0) {
2180
+ return { name: unescapeMermaidLabel(label), tags: [] };
2181
+ }
2182
+ const name = unescapeMermaidLabel(label.slice(0, brIx));
2183
+ const tagsStr = label.slice(brIx + '<br>'.length);
2184
+ const tags = tagsStr
2185
+ .split(',')
2186
+ .map((t) => unescapeMermaidLabel(t.trim()))
2187
+ .filter((t) => t.length > 0);
2188
+ return { name, tags };
2189
+ }
1710
2190
  function fromMermaid(text) {
1711
2191
  const lines = text.split('\n').map((l) => l.trim()).filter(Boolean);
1712
2192
  let alphabets = [];
@@ -1725,6 +2205,7 @@ function fromMermaid(text) {
1725
2205
  frameId: opts.frameId ?? null,
1726
2206
  transitions: [],
1727
2207
  overriddenHaltStateId: null,
2208
+ tags: opts.tags ? [...opts.tags] : [],
1728
2209
  };
1729
2210
  }
1730
2211
  else {
@@ -1740,6 +2221,12 @@ function fromMermaid(text) {
1740
2221
  nodes[id].bareStateId = opts.bareStateId;
1741
2222
  if (opts.frameId !== undefined)
1742
2223
  nodes[id].frameId = opts.frameId;
2224
+ if (opts.tags !== undefined) {
2225
+ for (const t of opts.tags) {
2226
+ if (!nodes[id].tags.includes(t))
2227
+ nodes[id].tags.push(t);
2228
+ }
2229
+ }
1743
2230
  }
1744
2231
  return nodes[id];
1745
2232
  };
@@ -1752,6 +2239,11 @@ function fromMermaid(text) {
1752
2239
  alphabets = JSON.parse(am[1]);
1753
2240
  continue;
1754
2241
  }
2242
+ // Tag annotations (#186) — classDef lines are decorative and skipped;
2243
+ // `class` lines are parsed in the edge pass since they reference nodes
2244
+ // by id and need those nodes already created in the first pass.
2245
+ if (classDefTagRegex.test(line))
2246
+ continue;
1755
2247
  const sgStart = line.match(subgraphStartRegex);
1756
2248
  if (sgStart) {
1757
2249
  currentFrameId = Number(sgStart[1]);
@@ -1777,17 +2269,21 @@ function fromMermaid(text) {
1777
2269
  }
1778
2270
  const wm = line.match(wrappedNodeRegex);
1779
2271
  if (wm) {
2272
+ const { name, tags } = splitLabelTags(wm[2]);
1780
2273
  ensureNode(parseMermaidId(wm[1]), {
1781
- name: wm[2],
2274
+ name,
1782
2275
  isWrapper: true,
2276
+ tags,
1783
2277
  });
1784
2278
  continue;
1785
2279
  }
1786
2280
  const rm = line.match(regularNodeRegex);
1787
2281
  if (rm) {
2282
+ const { name, tags } = splitLabelTags(rm[2]);
1788
2283
  ensureNode(parseMermaidId(rm[1]), {
1789
- name: rm[2],
2284
+ name,
1790
2285
  frameId: currentFrameId,
2286
+ tags,
1791
2287
  });
1792
2288
  continue;
1793
2289
  }
@@ -1804,6 +2300,19 @@ function fromMermaid(text) {
1804
2300
  if (returnArrowRegex.test(line) || haltArrowRegex.test(line)) {
1805
2301
  continue;
1806
2302
  }
2303
+ // Tag class-assignment line (#186): `class s1,s5 tag_hot` — adds
2304
+ // the tag to each listed node. Tag-name preserved as written
2305
+ // (sanitization on emit is lossy in principle; on parse we don't
2306
+ // un-sanitize, since the original could have any characters).
2307
+ const tagMatch = line.match(classAssignTagRegex);
2308
+ if (tagMatch) {
2309
+ const ids = tagMatch[1].split(',');
2310
+ const tagName = tagMatch[2];
2311
+ for (const idStr of ids) {
2312
+ ensureNode(parseMermaidId(idStr), { tags: [tagName] });
2313
+ }
2314
+ continue;
2315
+ }
1807
2316
  // `call` arrow — sets bareStateId on each source wrapper.
1808
2317
  const cm = line.match(callArrowRegex);
1809
2318
  if (cm) {
@@ -1820,7 +2329,12 @@ function fromMermaid(text) {
1820
2329
  if (wo) {
1821
2330
  const fromId = parseMermaidId(wo[1]);
1822
2331
  const toId = parseMermaidId(wo[2]);
1823
- if (nodes[fromId] && nodes[fromId].isWrapper) {
2332
+ // The wrapper-override regex only matches `sN --> sM` (unlabeled);
2333
+ // since `toMermaid` only emits this shape from wrappers, the source
2334
+ // is guaranteed to be a wrapper if `fromMermaid`'s input came from
2335
+ // `toMermaid`. `nodes[fromId]` is always populated (first pass emits
2336
+ // node declarations before any edge parsing).
2337
+ if (nodes[fromId].isWrapper) {
1824
2338
  nodes[fromId].overriddenHaltStateId = toId;
1825
2339
  continue;
1826
2340
  }
@@ -1830,7 +2344,12 @@ function fromMermaid(text) {
1830
2344
  const tm = line.match(labeledTransitionRegex);
1831
2345
  if (tm) {
1832
2346
  const fromId = parseMermaidId(tm[1]);
1833
- const label = tm[2];
2347
+ // Decode the WHOLE captured label up front (#194). Structural
2348
+ // separators (`[`, `]`, `,`, `|`, `/`, ` → `) are all safe ASCII
2349
+ // outside the escape set and pass through encode unchanged, so it's
2350
+ // safe to decode before structural parsing; only embedded alphabet
2351
+ // symbols inside `'...'` get reconstituted.
2352
+ const label = unescapeMermaidLabel(tm[2]);
1834
2353
  const toId = parseMermaidId(tm[3]);
1835
2354
  const arrowIx = label.indexOf(' → ');
1836
2355
  if (arrowIx === -1) {