@reicek/neataptic-ts 0.1.21 → 0.1.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (223) hide show
  1. package/.github/agents/boundary-mapper.agent.md +29 -0
  2. package/.github/agents/docs-scout.agent.md +29 -0
  3. package/.github/agents/plan-scout.agent.md +29 -0
  4. package/.github/agents/solid-split.agent.md +138 -0
  5. package/.github/copilot-instructions.md +103 -0
  6. package/package.json +6 -3
  7. package/plans/ES2023 migration +13 -8
  8. package/plans/Evolution_Training_Interoperability_Contracts.md +1 -1
  9. package/plans/Interactive_Examples_and_Learning_Path.md +10 -2
  10. package/plans/Memory_Optimization.md +3 -3
  11. package/plans/README.md +63 -0
  12. package/plans/Roadmap.md +15 -3
  13. package/plans/asciiMaze_SOLID_split.done.md +130 -0
  14. package/plans/flappy_bird_SOLID_split.done.md +67 -0
  15. package/scripts/assets/theme.css +221 -34
  16. package/scripts/copy-examples.mjs +9 -5
  17. package/scripts/export-onnx.mjs +3 -3
  18. package/scripts/generate-bench-tables.mjs +10 -10
  19. package/scripts/generate-bench-tables.ts +10 -10
  20. package/scripts/generate-docs.ts +1415 -449
  21. package/scripts/render-docs-html.ts +15 -8
  22. package/src/README.md +101 -223
  23. package/src/architecture/README.md +57 -185
  24. package/src/architecture/layer/README.md +38 -38
  25. package/src/architecture/network/README.md +33 -31
  26. package/src/architecture/network/activate/README.md +77 -77
  27. package/src/architecture/network/connect/README.md +15 -13
  28. package/src/architecture/network/deterministic/README.md +7 -7
  29. package/src/architecture/network/evolve/README.md +44 -44
  30. package/src/architecture/network/gating/README.md +20 -20
  31. package/src/architecture/network/genetic/README.md +51 -51
  32. package/src/architecture/network/mutate/README.md +97 -97
  33. package/src/architecture/network/onnx/README.md +264 -264
  34. package/src/architecture/network/prune/README.md +39 -39
  35. package/src/architecture/network/remove/README.md +26 -26
  36. package/src/architecture/network/serialize/README.md +56 -56
  37. package/src/architecture/network/slab/README.md +61 -61
  38. package/src/architecture/network/standalone/README.md +24 -24
  39. package/src/architecture/network/stats/README.md +9 -9
  40. package/src/architecture/network/topology/README.md +46 -46
  41. package/src/architecture/network/training/README.md +21 -21
  42. package/src/methods/README.md +9 -87
  43. package/src/multithreading/README.md +8 -77
  44. package/src/multithreading/workers/README.md +2 -2
  45. package/src/multithreading/workers/browser/README.md +0 -6
  46. package/src/multithreading/workers/node/README.md +0 -3
  47. package/src/neat/README.md +562 -568
  48. package/src/utils/README.md +18 -18
  49. package/test/examples/asciiMaze/README.md +59 -59
  50. package/test/examples/asciiMaze/asciiMaze.e2e.test.ts +14 -9
  51. package/test/examples/asciiMaze/browser-entry/README.md +196 -0
  52. package/test/examples/asciiMaze/browser-entry/browser-entry.abort.services.ts +95 -0
  53. package/test/examples/asciiMaze/browser-entry/browser-entry.constants.ts +23 -0
  54. package/test/examples/asciiMaze/browser-entry/browser-entry.curriculum.services.ts +115 -0
  55. package/test/examples/asciiMaze/browser-entry/browser-entry.globals.services.ts +106 -0
  56. package/test/examples/asciiMaze/browser-entry/browser-entry.host.services.ts +157 -0
  57. package/test/examples/asciiMaze/browser-entry/browser-entry.services.ts +14 -0
  58. package/test/examples/asciiMaze/browser-entry/browser-entry.ts +129 -0
  59. package/test/examples/asciiMaze/browser-entry/browser-entry.types.ts +120 -0
  60. package/test/examples/asciiMaze/browser-entry/browser-entry.utils.ts +98 -0
  61. package/test/examples/asciiMaze/browser-entry.ts +10 -576
  62. package/test/examples/asciiMaze/dashboardManager/README.md +276 -0
  63. package/test/examples/asciiMaze/dashboardManager/archive/README.md +16 -0
  64. package/test/examples/asciiMaze/dashboardManager/archive/dashboardManager.archive.services.ts +267 -0
  65. package/test/examples/asciiMaze/dashboardManager/dashboardManager.constants.ts +35 -0
  66. package/test/examples/asciiMaze/dashboardManager/dashboardManager.services.ts +103 -0
  67. package/test/examples/asciiMaze/dashboardManager/dashboardManager.ts +181 -0
  68. package/test/examples/asciiMaze/dashboardManager/dashboardManager.types.ts +267 -0
  69. package/test/examples/asciiMaze/dashboardManager/dashboardManager.utils.ts +254 -0
  70. package/test/examples/asciiMaze/dashboardManager/live/README.md +14 -0
  71. package/test/examples/asciiMaze/dashboardManager/live/dashboardManager.live.services.ts +264 -0
  72. package/test/examples/asciiMaze/dashboardManager/telemetry/README.md +47 -0
  73. package/test/examples/asciiMaze/dashboardManager/telemetry/dashboardManager.telemetry.services.ts +513 -0
  74. package/test/examples/asciiMaze/dashboardManager.ts +13 -2335
  75. package/test/examples/asciiMaze/evolutionEngine/README.md +1058 -0
  76. package/test/examples/asciiMaze/evolutionEngine/curriculumPhase.ts +90 -0
  77. package/test/examples/asciiMaze/evolutionEngine/engineState.constants.ts +36 -0
  78. package/test/examples/asciiMaze/evolutionEngine/engineState.ts +58 -513
  79. package/test/examples/asciiMaze/evolutionEngine/engineState.types.ts +212 -0
  80. package/test/examples/asciiMaze/evolutionEngine/engineState.utils.ts +301 -0
  81. package/test/examples/asciiMaze/evolutionEngine/evolutionEngine.types.ts +445 -0
  82. package/test/examples/asciiMaze/evolutionEngine/evolutionLoop.ts +81 -50
  83. package/test/examples/asciiMaze/evolutionEngine/optionsAndSetup.ts +2 -4
  84. package/test/examples/asciiMaze/evolutionEngine/populationDynamics.ts +17 -33
  85. package/test/examples/asciiMaze/evolutionEngine/populationPruning.ts +1 -1
  86. package/test/examples/asciiMaze/evolutionEngine/rngAndTiming.ts +1 -2
  87. package/test/examples/asciiMaze/evolutionEngine/sampling.ts +1 -1
  88. package/test/examples/asciiMaze/evolutionEngine/scratchPools.ts +2 -5
  89. package/test/examples/asciiMaze/evolutionEngine/setupHelpers.ts +30 -37
  90. package/test/examples/asciiMaze/evolutionEngine/telemetryMetrics.ts +16 -58
  91. package/test/examples/asciiMaze/evolutionEngine/trainingWarmStart.ts +2 -2
  92. package/test/examples/asciiMaze/evolutionEngine.ts +55 -55
  93. package/test/examples/asciiMaze/fitness.ts +2 -2
  94. package/test/examples/asciiMaze/fitness.types.ts +65 -0
  95. package/test/examples/asciiMaze/interfaces.ts +64 -1352
  96. package/test/examples/asciiMaze/mazeMovement/README.md +356 -0
  97. package/test/examples/asciiMaze/mazeMovement/finalization/README.md +49 -0
  98. package/test/examples/asciiMaze/mazeMovement/finalization/mazeMovement.finalization.ts +138 -0
  99. package/test/examples/asciiMaze/mazeMovement/mazeMovement.constants.ts +101 -0
  100. package/test/examples/asciiMaze/mazeMovement/mazeMovement.services.ts +230 -0
  101. package/test/examples/asciiMaze/mazeMovement/mazeMovement.ts +299 -0
  102. package/test/examples/asciiMaze/mazeMovement/mazeMovement.types.ts +185 -0
  103. package/test/examples/asciiMaze/mazeMovement/mazeMovement.utils.ts +153 -0
  104. package/test/examples/asciiMaze/mazeMovement/policy/README.md +91 -0
  105. package/test/examples/asciiMaze/mazeMovement/policy/mazeMovement.policy.ts +467 -0
  106. package/test/examples/asciiMaze/mazeMovement/runtime/README.md +95 -0
  107. package/test/examples/asciiMaze/mazeMovement/runtime/mazeMovement.runtime.ts +354 -0
  108. package/test/examples/asciiMaze/mazeMovement/shaping/README.md +124 -0
  109. package/test/examples/asciiMaze/mazeMovement/shaping/mazeMovement.shaping.ts +459 -0
  110. package/test/examples/asciiMaze/mazeMovement.ts +12 -2978
  111. package/test/examples/flappy_bird/Trace-20260309T191949.json +24124 -0
  112. package/test/examples/flappy_bird/browser-entry/README.md +1129 -0
  113. package/test/examples/flappy_bird/browser-entry/browser-entry.host.utils.ts +4 -324
  114. package/test/examples/flappy_bird/browser-entry/browser-entry.network-view.utils.ts +6 -399
  115. package/test/examples/flappy_bird/browser-entry/browser-entry.playback.utils.ts +1 -717
  116. package/test/examples/flappy_bird/browser-entry/browser-entry.spawn.utils.ts +11 -31
  117. package/test/examples/flappy_bird/browser-entry/browser-entry.visualization.utils.ts +15 -893
  118. package/test/examples/flappy_bird/browser-entry/host/README.md +307 -0
  119. package/test/examples/flappy_bird/browser-entry/host/host.resize.service.ts +1 -295
  120. package/test/examples/flappy_bird/browser-entry/host/host.ts +562 -6
  121. package/test/examples/flappy_bird/browser-entry/host/resize/README.md +274 -0
  122. package/test/examples/flappy_bird/browser-entry/host/resize/host.resize.service.constants.ts +31 -0
  123. package/test/examples/flappy_bird/browser-entry/host/resize/host.resize.service.services.ts +360 -0
  124. package/test/examples/flappy_bird/browser-entry/host/resize/host.resize.service.ts +117 -0
  125. package/test/examples/flappy_bird/browser-entry/host/resize/host.resize.service.types.ts +63 -0
  126. package/test/examples/flappy_bird/browser-entry/host/resize/host.resize.service.utils.ts +250 -0
  127. package/test/examples/flappy_bird/browser-entry/network-view/README.md +399 -0
  128. package/test/examples/flappy_bird/browser-entry/network-view/network-view.topology.utils.ts +255 -0
  129. package/test/examples/flappy_bird/browser-entry/network-view/network-view.ts +802 -7
  130. package/test/examples/flappy_bird/browser-entry/playback/README.md +684 -0
  131. package/test/examples/flappy_bird/browser-entry/playback/background/README.md +277 -0
  132. package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/README.md +770 -0
  133. package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/playback.background.ground-grid.cache.services.ts +178 -0
  134. package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/playback.background.ground-grid.constants.ts +107 -0
  135. package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/playback.background.ground-grid.geometry.utils.ts +518 -0
  136. package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/playback.background.ground-grid.math.utils.ts +117 -0
  137. package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/playback.background.ground-grid.pulse.utils.ts +233 -0
  138. package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/playback.background.ground-grid.services.ts +211 -0
  139. package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/playback.background.ground-grid.ts +48 -0
  140. package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/playback.background.ground-grid.types.ts +212 -0
  141. package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/playback.background.ground-grid.utils.ts +81 -0
  142. package/test/examples/flappy_bird/browser-entry/playback/background/playback.background.cache.services.ts +96 -0
  143. package/test/examples/flappy_bird/browser-entry/playback/background/playback.background.constants.ts +62 -0
  144. package/test/examples/flappy_bird/browser-entry/playback/background/playback.background.services.ts +244 -0
  145. package/test/examples/flappy_bird/browser-entry/playback/background/playback.background.ts +53 -0
  146. package/test/examples/flappy_bird/browser-entry/playback/background/playback.background.types.ts +68 -0
  147. package/test/examples/flappy_bird/browser-entry/playback/background/playback.background.utils.ts +100 -0
  148. package/test/examples/flappy_bird/browser-entry/playback/frame-render/README.md +310 -0
  149. package/test/examples/flappy_bird/browser-entry/playback/frame-render/playback.frame-render.service.ts +92 -0
  150. package/test/examples/flappy_bird/browser-entry/playback/frame-render/playback.frame-render.services.ts +272 -0
  151. package/test/examples/flappy_bird/browser-entry/playback/frame-render/playback.frame-render.types.ts +39 -0
  152. package/test/examples/flappy_bird/browser-entry/playback/frame-render/playback.frame-render.utils.ts +493 -0
  153. package/test/examples/flappy_bird/browser-entry/playback/playback.constants.ts +1 -1
  154. package/test/examples/flappy_bird/browser-entry/playback/playback.frame-render.service.ts +4 -0
  155. package/test/examples/flappy_bird/browser-entry/playback/playback.snapshot.utils.ts +44 -0
  156. package/test/examples/flappy_bird/browser-entry/playback/playback.starfield.service.ts +39 -122
  157. package/test/examples/flappy_bird/browser-entry/playback/playback.starfield.services.ts +272 -0
  158. package/test/examples/flappy_bird/browser-entry/playback/playback.starfield.types.ts +62 -0
  159. package/test/examples/flappy_bird/browser-entry/playback/playback.starfield.utils.ts +11 -4
  160. package/test/examples/flappy_bird/browser-entry/playback/playback.ts +409 -8
  161. package/test/examples/flappy_bird/browser-entry/playback/playback.types.ts +4 -12
  162. package/test/examples/flappy_bird/browser-entry/runtime/README.md +235 -0
  163. package/test/examples/flappy_bird/browser-entry/runtime/runtime.evolution-launch.service.ts +45 -0
  164. package/test/examples/flappy_bird/browser-entry/runtime/runtime.lifecycle.service.ts +81 -0
  165. package/test/examples/flappy_bird/browser-entry/runtime/runtime.startup.service.ts +74 -0
  166. package/test/examples/flappy_bird/browser-entry/runtime/runtime.ts +31 -121
  167. package/test/examples/flappy_bird/browser-entry/runtime/runtime.types.ts +36 -0
  168. package/test/examples/flappy_bird/browser-entry/visualization/README.md +557 -0
  169. package/test/examples/flappy_bird/browser-entry/visualization/visualization.constants.ts +110 -0
  170. package/test/examples/flappy_bird/browser-entry/visualization/visualization.draw.service.ts +957 -19
  171. package/test/examples/flappy_bird/browser-entry/visualization/visualization.legend.utils.ts +138 -3
  172. package/test/examples/flappy_bird/browser-entry/visualization/visualization.topology.utils.ts +3 -27
  173. package/test/examples/flappy_bird/browser-entry/visualization/visualization.ts +1 -23
  174. package/test/examples/flappy_bird/browser-entry/worker-channel/README.md +156 -0
  175. package/test/examples/flappy_bird/constants/README.md +1179 -0
  176. package/test/examples/flappy_bird/constants/constants.network-view.ts +24 -0
  177. package/test/examples/flappy_bird/constants/constants.palette.ts +7 -0
  178. package/test/examples/flappy_bird/constants/constants.starfield.ts +78 -3
  179. package/test/examples/flappy_bird/environment/README.md +143 -0
  180. package/test/examples/flappy_bird/environment/environment.observation.utils.ts +1 -19
  181. package/test/examples/flappy_bird/environment/environment.step.service.ts +3 -66
  182. package/test/examples/flappy_bird/evaluation/README.md +130 -0
  183. package/test/examples/flappy_bird/evaluation/evaluation.fitness.utils.ts +1 -1
  184. package/test/examples/flappy_bird/evaluation/evaluation.rollout.service.ts +5 -375
  185. package/test/examples/flappy_bird/evaluation/rollout/README.md +291 -0
  186. package/test/examples/flappy_bird/evaluation/rollout/evaluation.rollout.constants.ts +30 -0
  187. package/test/examples/flappy_bird/evaluation/rollout/evaluation.rollout.service.ts +58 -0
  188. package/test/examples/flappy_bird/evaluation/rollout/evaluation.rollout.services.ts +310 -0
  189. package/test/examples/flappy_bird/evaluation/rollout/evaluation.rollout.types.ts +56 -0
  190. package/test/examples/flappy_bird/evaluation/rollout/evaluation.rollout.utils.ts +368 -0
  191. package/test/examples/flappy_bird/flappy-evolution-worker/README.md +618 -0
  192. package/test/examples/flappy_bird/flappy-evolution-worker/flappy-evolution-worker.playback.service.ts +7 -7
  193. package/test/examples/flappy_bird/flappy-evolution-worker/flappy-evolution-worker.simulation.frame.service.ts +364 -0
  194. package/test/examples/flappy_bird/flappy-evolution-worker/flappy-evolution-worker.simulation.types.ts +14 -0
  195. package/test/examples/flappy_bird/flappy-evolution-worker/flappy-evolution-worker.simulation.utils.ts +4 -201
  196. package/test/examples/flappy_bird/flappy-evolution-worker/flappy-evolution-worker.ts +184 -345
  197. package/test/examples/flappy_bird/flappy-evolution-worker/flappy-evolution-worker.warm-start.service.ts +291 -0
  198. package/test/examples/flappy_bird/flappy.simulation.shared.utils.ts +5 -0
  199. package/test/examples/flappy_bird/simulation-shared/README.md +417 -0
  200. package/test/examples/flappy_bird/simulation-shared/observation/README.md +183 -0
  201. package/test/examples/flappy_bird/simulation-shared/observation/observation.features.utils.ts +301 -0
  202. package/test/examples/flappy_bird/simulation-shared/observation/observation.ts +9 -0
  203. package/test/examples/flappy_bird/simulation-shared/observation/observation.vector.utils.ts +59 -0
  204. package/test/examples/flappy_bird/simulation-shared/simulation-shared.observation.utils.ts +5 -403
  205. package/test/examples/flappy_bird/simulation-shared/simulation-shared.spawn.utils.ts +20 -6
  206. package/test/examples/flappy_bird/{evaluation/evaluation.statistics.utils.ts → simulation-shared/simulation-shared.statistics.utils.ts} +23 -8
  207. package/test/examples/flappy_bird/trainer/README.md +563 -0
  208. package/test/examples/flappy_bird/trainer/evaluation/README.md +199 -0
  209. package/test/examples/flappy_bird/trainer/evaluation/trainer.evaluation.service.constants.ts +9 -0
  210. package/test/examples/flappy_bird/trainer/evaluation/trainer.evaluation.service.services.ts +73 -0
  211. package/test/examples/flappy_bird/trainer/evaluation/trainer.evaluation.service.ts +165 -0
  212. package/test/examples/flappy_bird/trainer/evaluation/trainer.evaluation.service.types.ts +25 -0
  213. package/test/examples/flappy_bird/trainer/evaluation/trainer.evaluation.service.utils.ts +161 -0
  214. package/test/examples/flappy_bird/trainer/trainer.evaluation.service.ts +13 -0
  215. package/test/examples/flappy_bird/trainer/trainer.report.service.services.ts +181 -0
  216. package/test/examples/flappy_bird/trainer/trainer.report.service.ts +126 -0
  217. package/test/examples/flappy_bird/trainer/trainer.selection.utils.ts +89 -0
  218. package/test/examples/flappy_bird/trainer/trainer.ts +11 -553
  219. package/test/examples/flappy_bird/browser-entry/browser-entry.utils.ts +0 -12
  220. package/test/examples/flappy_bird/environment/environment.ts +0 -7
  221. package/test/examples/flappy_bird/evaluation/evaluation.ts +0 -7
  222. package/test/examples/flappy_bird/simulation-shared/simulation-shared.ts +0 -15
  223. package/test/examples/flappy_bird/trainer/trainer.statistics.utils.ts +0 -78
@@ -1,2982 +1,16 @@
1
1
  /**
2
- * Maze Movement - Handles agent movement and simulation logic (Simplified)
2
+ * Compatibility entrypoint for the dedicated mazeMovement module.
3
3
  *
4
- * This module contains functions for agent movement and simulation in the maze environment,
5
- * focusing on simple navigation based primarily on neural network decisions.
6
- *
7
- * The agent movement system demonstrates:
8
- * - Decision making based on neural network outputs
9
- * - Basic reward calculations for reinforcement learning
10
- * - Simple goal-seeking behavior
11
- * - Simulation of movement with collision detection
12
- */
13
- import { INetwork, INodeStruct } from './interfaces';
14
- import { MazeUtils } from './mazeUtils';
15
- import { MazeVision } from './mazeVision';
16
-
17
- /**
18
- * Diagnostic telemetry produced when selecting a direction from network logits.
19
- *
20
- * Encapsulates the chosen direction along with entropy and probability data so
21
- * downstream helpers can apply shaping rewards and penalties without
22
- * rederiving softmax statistics on hot paths.
23
- */
24
- interface DirectionSelectionStats {
25
- /** Chosen action index (0..#ACTION_DIM-1) or -1 when no move is selected. */
26
- direction: number;
27
- /** Defensive copy of per-action softmax probabilities. */
28
- softmax: number[];
29
- /** Normalised entropy of the action distribution in [0,1]. */
30
- entropy: number;
31
- /** Probability assigned to the chosen action. */
32
- maxProb: number;
33
- /** Probability assigned to the runner-up action. */
34
- secondProb: number;
35
- }
36
-
37
- /**
38
- * Internal aggregate state used during a single agent simulation run.
39
- *
40
- * Purpose:
41
- * - Hold all derived runtime values, counters and diagnostic stats used by the
42
- * MazeMovement simulation helpers. This shape is intentionally rich so tests
43
- * and visualisers can inspect intermediate state when debugging.
44
- *
45
- * Notes:
46
- * - This interface is internal to the mazeMovement module and is not exported.
47
- * - Property descriptions are explicit to surface helpful tooltips in editors.
48
- */
49
- interface SimulationState {
50
- /** Current mutable agent position as [x, y]. */
51
- position: [number, number];
52
-
53
- /** Number of simulation steps executed so far (increments each loop). */
54
- steps: number;
55
-
56
- /** Number of entries in the recorded path (index into pooled PathX/PathY). */
57
- pathLength: number;
58
-
59
- /** Count of distinct cells visited during this run. */
60
- visitedUniqueCount: number;
61
-
62
- /** True when a precomputed distance map was supplied to the simulation. */
63
- hasDistanceMap: boolean;
64
-
65
- /** Optional precomputed distance map (rows × cols) used for fast heuristics. */
66
- distanceMap?: number[][];
67
-
68
- /** Minimum observed distance-to-exit reached so far (lower is better). */
69
- minDistanceToExit: number;
70
-
71
- /** Accumulated shaping reward derived from forward progress signals. */
72
- progressReward: number;
73
-
74
- /** Bonus accumulated when entering previously unvisited cells. */
75
- newCellExplorationBonus: number;
76
-
77
- /** Accumulated penalty from invalid moves, loops and other negative signals. */
78
- invalidMovePenalty: number;
79
-
80
- /** Index of the previous action/direction taken (-1 for no-move). */
81
- prevAction: number;
82
-
83
- /** Steps elapsed since the last observed improvement toward the goal. */
84
- stepsSinceImprovement: number;
85
-
86
- /** Last global distance-to-exit used for long-term improvement checks. */
87
- lastDistanceGlobal: number;
88
-
89
- /** Number of steps flagged as 'saturated' (network overconfident/flat outputs). */
90
- saturatedSteps: number;
91
-
92
- /** Recent positions sliding window used to detect local oscillation/stagnation. */
93
- recentPositions: [number, number][];
94
-
95
- /** Penalty applied when agent is oscillating in a tight local region. */
96
- localAreaPenalty: number;
97
-
98
- /** Counters of moves taken per direction index (N,E,S,W). */
99
- directionCounts: number[];
100
-
101
- /** Ring buffer storing recent visited cell indices for A↔B loop detection. */
102
- moveHistoryRing: Int32Array;
103
-
104
- /** Current number of populated entries in `moveHistoryRing`. */
105
- moveHistoryLength: number;
106
- /** Index pointer (head) into the circular moveHistoryRing. */
107
- moveHistoryHead: number;
108
- /** Current linearized cell index for the agent position. */
109
- currentCellIndex: number;
110
- /** Penalty accumulated for short A<->B oscillation detection. */
111
- loopPenalty: number;
112
- /** Penalty applied for returning to any recent cell (memory-based). */
113
- memoryPenalty: number;
114
- /** Dynamic revisit penalty scaled by per-cell visit counts. */
115
- revisitPenalty: number;
116
- /** Visit count at the current cell (derived from VisitCounts pool). */
117
- visitsAtCurrent: number;
118
- /** Current distance-to-goal measured at agent position. */
119
- distHere: number;
120
- /** Per-step perception/vision vector built for the network. */
121
- vision: number[];
122
- /** Network action statistics (softmax, entropy, etc.) populated each step. */
123
- actionStats: DirectionSelectionStats | null;
124
- /** Currently selected direction index (0..3) or #-NO_MOVE. */
125
- direction: number;
126
- /** Whether the agent moved on the last executed action. */
127
- moved: boolean;
128
- /** Distance value measured before executing the current action (previous step). */
129
- prevDistance: number;
130
-
131
- /** When true the simulation loop should terminate early due to safety triggers. */
132
- earlyTerminate: boolean;
133
- }
134
-
135
- /**
136
- * MazeMovement provides static methods for agent movement and simulation.
4
+ * The real implementation now lives under the folder-based module boundary at
5
+ * `mazeMovement/mazeMovement.ts`. This file remains so existing imports such as
6
+ * `./mazeMovement` continue to resolve without changes.
137
7
  */
138
- export class MazeMovement {
139
- /**
140
- * Maximum number of simulation steps before terminating (safety cap)
141
- * @internal
142
- */
143
- static #DEFAULT_MAX_STEPS = 3000;
144
-
145
- /**
146
- * Number of recent moves tracked for oscillation detection
147
- * @internal
148
- */
149
- static #MOVE_HISTORY_LENGTH = 6;
150
-
151
- // Named private constants to replace magic numbers and document intent.
152
- /** Reward scale applied to shaping terms (smaller reduces selection pressure) */
153
- static #REWARD_SCALE = 0.5;
154
- /** Strong penalty multiplier for short A->B oscillations */
155
- static #LOOP_PENALTY = 10; // multiplied by rewardScale
156
- /** Penalty applied when returning to a recent cell (memory-based) */
157
- static #MEMORY_RETURN_PENALTY = 2; // multiplied by rewardScale
158
- /** Per-visit penalty for repeated visits to same cell */
159
- static #REVISIT_PENALTY_PER_VISIT = 0.2; // per extra visit, multiplied by rewardScale
160
- /** Visits threshold to trigger termination/harsh penalty */
161
- static #VISIT_TERMINATION_THRESHOLD = 10;
162
- /** Extremely harsh penalty for invalid moves (used sparingly) */
163
- static #INVALID_MOVE_PENALTY_HARSH = 1000;
164
- /** Mild penalty for invalid moves to preserve learning signal */
165
- static #INVALID_MOVE_PENALTY_MILD = 10;
166
-
167
- // Saturation / collapse thresholds and penalties
168
- /** Probability threshold indicating overconfidence (near-deterministic) */
169
- static #OVERCONFIDENT_PROB = 0.985;
170
- /** Secondary-probability threshold used with overconfidence detection */
171
- static #SECOND_PROB_LOW = 0.01;
172
- /** Threshold for flat-collapse detection using log-std of outputs */
173
- static #LOGSTD_FLAT_THRESHOLD = 0.01;
174
- /** Penalty when network appears overconfident */
175
- static #OVERCONFIDENT_PENALTY = 0.25; // * rewardScale
176
- /** Penalty for flat collapse (no variance in outputs) */
177
- static #FLAT_COLLAPSE_PENALTY = 0.35; // * rewardScale
178
- /** Minimum saturations before applying bias adjustments */
179
- static #SATURATION_ADJUST_MIN = 6;
180
- /** Interval (in steps) used for saturation bias adjustment checks */
181
- static #SATURATION_ADJUST_INTERVAL = 5;
182
- /** Clamp for adaptive bias adjustments */
183
- static #BIAS_CLAMP = 5;
184
- /** Scaling factor used when adjusting biases to mitigate saturation */
185
- static #BIAS_ADJUST_FACTOR = 0.5;
186
-
187
- // Convenience thresholds and tuning knobs (centralized to avoid magic literals)
188
- /** Warmup steps where exploration is encouraged */
189
- static #EPSILON_WARMUP_STEPS = 10;
190
- /** Steps-stagnant threshold to consider very stagnant (high epsilon) */
191
- static #EPSILON_STAGNANT_HIGH_THRESHOLD = 12;
192
- /** Steps-stagnant threshold to consider moderate stagnation */
193
- static #EPSILON_STAGNANT_MED_THRESHOLD = 6;
194
- /** Saturation count that triggers epsilon-increase behavior */
195
- static #EPSILON_SATURATION_TRIGGER = 3;
196
- /** Length used to detect tiny A->B oscillations */
197
- static #OSCILLATION_DETECT_LENGTH = 4;
198
- /** Saturation penalty trigger (>=) */
199
- static #SATURATION_PENALTY_TRIGGER = 5;
200
- /** Period (in steps) to escalate saturation penalty */
201
- static #SATURATION_PENALTY_PERIOD = 10;
202
- /** Start step for global break bonus when breaking long stagnation */
203
- static #GLOBAL_BREAK_BONUS_START = 10;
204
- /** Per-step bonus for global break beyond the start threshold */
205
- static #GLOBAL_BREAK_BONUS_PER_STEP = 0.01;
206
- /** Cap for the global break bonus */
207
- static #GLOBAL_BREAK_BONUS_CAP = 0.5;
208
- /** Number of steps since improvement to begin repetition penalty scaling */
209
- static #REPETITION_PENALTY_START = 4;
210
- /** Weight for entropy bonus on failed runs */
211
- static #ENTROPY_BONUS_WEIGHT = 4;
212
-
213
- // Vision input layout indices (groups used by hasGuidance checks)
214
- /** Start index of LOS group within vision vector */
215
- static #VISION_LOS_START = 8;
216
- /** Start index of gradient group within vision vector */
217
- static #VISION_GRAD_START = 12;
218
- /** Number of elements in each vision group (LOS / Gradient) */
219
- static #VISION_GROUP_LEN = 4;
220
-
221
- // Proximity/exploration tuning
222
- /** Distance (in cells) within which greedy proximity moves are prioritized */
223
- static #PROXIMITY_GREEDY_DISTANCE = 2;
224
- /** Distance threshold to reduce epsilon exploration near goal */
225
- static #PROXIMITY_SUPPRESS_EXPLOR_DIST = 5;
226
- /** Initial epsilon for epsilon-greedy exploration */
227
- static #EPSILON_INITIAL = 0.35;
228
- /** Epsilon used when the agent is highly stagnant */
229
- static #EPSILON_STAGNANT_HIGH = 0.5;
230
- /** Epsilon used for moderate stagnation */
231
- static #EPSILON_STAGNANT_MED = 0.25;
232
- /** Epsilon used when network saturations are detected */
233
- static #EPSILON_SATURATIONS = 0.3;
234
- /** Minimum epsilon allowed when near the goal */
235
- static #EPSILON_MIN_NEAR_GOAL = 0.05;
236
- /** Streak length used to trigger forced exploration */
237
- static #NO_MOVE_STREAK_THRESHOLD = 5;
238
-
239
- // Local area stagnation
240
- /** Size of the recent-positions sliding window for local stagnation detection */
241
- static #LOCAL_WINDOW = 30;
242
- /** Max span (in cells) considered "local" for oscillation penalties */
243
- static #LOCAL_AREA_SPAN_THRESHOLD = 5;
244
- /** Steps without improvement before local-area stagnation penalty applies */
245
- static #LOCAL_AREA_STAGNATION_STEPS = 8;
246
- /** Amount applied to local area penalty when tight oscillation detected (multiplied by rewardScale) */
247
- static #LOCAL_AREA_PENALTY_AMOUNT = 0.05;
248
-
249
- // Progress reward shaping
250
- /** Base reward for making forward progress toward the exit */
251
- static #PROGRESS_REWARD_BASE = 0.3;
252
- /** Additional progress reward scaled by network confidence */
253
- static #PROGRESS_REWARD_CONF_SCALE = 0.7;
254
- /** Multiplier applied per step-since-improvement for extra reward shaping */
255
- static #PROGRESS_STEPS_MULT = 0.02;
256
- /** Maximum steps-based progress contribution (times rewardScale) */
257
- static #PROGRESS_STEPS_MAX = 0.5; // times rewardScale
258
- /** Scale applied to raw distance-delta when shaping reward */
259
- static #DISTANCE_DELTA_SCALE = 2.0;
260
- /** Base confidence factor for distance-delta shaping */
261
- static #DISTANCE_DELTA_CONF_BASE = 0.4;
262
- /** Additional confidence scale applied to distance-delta shaping */
263
- static #DISTANCE_DELTA_CONF_SCALE = 0.6;
264
- /** Base penalty applied when a move increases distance to goal (multiplied by rewardScale) */
265
- static #PROGRESS_AWAY_BASE_PENALTY = 0.05;
266
- /** Additional scaling applied to away penalty proportional to network confidence */
267
- static #PROGRESS_AWAY_CONF_SCALE = 0.15;
268
-
269
- // Entropy tuning
270
- /** Entropy value above which the action distribution is considered too uniform */
271
- static #ENTROPY_HIGH_THRESHOLD = 0.95;
272
- /** Entropy value below which the distribution is considered confident */
273
- static #ENTROPY_CONFIDENT_THRESHOLD = 0.55;
274
- /** Required gap between top two probs to treat as confident */
275
- static #ENTROPY_CONFIDENT_DIFF = 0.25;
276
- /** Small penalty applied when entropy is persistently high */
277
- static #ENTROPY_PENALTY = 0.03; // * rewardScale
278
- /** Tiny bonus for clear decisions that aid exploration */
279
- static #EXPLORATION_BONUS_SMALL = 0.015; // * rewardScale
280
- /** Base repetition/backtrack penalty applied when repeating same action without improvement */
281
- static #REPETITION_PENALTY_BASE = 0.05;
282
- /** Penalty for making the direct opposite move (when it doesn't improve) */
283
- static #BACK_MOVE_PENALTY = 0.2;
284
-
285
- // Saturation penalties
286
- /** Base penalty applied when saturation is detected */
287
- static #SATURATION_PENALTY_BASE = 0.05; // * rewardScale
288
- /** Escalating penalty applied periodically when saturation persists */
289
- static #SATURATION_PENALTY_ESCALATE = 0.1; // * rewardScale when escalation applies
290
-
291
- // Deep stagnation
292
- /** Steps without improvement that trigger deep-stagnation handling */
293
- static #DEEP_STAGNATION_THRESHOLD = 40;
294
- /** Penalty applied when deep stagnation is detected (non-browser environments) */
295
- static #DEEP_STAGNATION_PENALTY = 2; // * rewardScale
296
- // Action/output dimension and softmax/entropy tuning
297
- /** Number of cardinal actions (N,E,S,W) */
298
- static #ACTION_DIM = 4;
299
- /** Natural log of ACTION_DIM; used to normalize entropy calculations */
300
- static #LOG_ACTIONS = Math.log(MazeMovement.#ACTION_DIM);
301
- /**
302
- * Pooled scratch buffers used by `selectDirection` to avoid per-call
303
- * allocations on the softmax/entropy hot path.
304
- *
305
- * @remarks
306
- * - These are class-private and reused across calls; `selectDirection` is
307
- * therefore not reentrant and should not be called concurrently.
308
- */
309
- static #SCRATCH_CENTERED = new Float64Array(4);
310
- static #SCRATCH_EXPS = new Float64Array(4);
311
- /** Small pooled scratch for temporary integer coordinate coercion. */
312
- static #COORD_SCRATCH = new Int32Array(2);
313
- /** Representation for 'no move' direction */
314
- static #NO_MOVE = -1;
315
- /** Minimum standard deviation used to prevent division by zero */
316
- static #STD_MIN = 1e-6;
317
- /** Thresholds for collapse ratio decisions based on std */
318
- static #COLLAPSE_STD_THRESHOLD = 0.01;
319
- /** Secondary threshold used when std indicates medium collapse */
320
- static #COLLAPSE_STD_MED = 0.03;
321
- /** Collapse ratio constants used for adaptive temperature */
322
- /** Full collapse ratio used when std is extremely low */
323
- static #COLLAPSE_RATIO_FULL = 1;
324
- /** Partial collapse ratio used for medium collapse */
325
- static #COLLAPSE_RATIO_HALF = 0.5;
326
- /** Base and scale used to compute softmax temperature */
327
- static #TEMPERATURE_BASE = 1;
328
- /** Scale factor applied when computing adaptive softmax temperature */
329
- static #TEMPERATURE_SCALE = 1.2;
330
-
331
- // Network history and randomness
332
- /** History length for recent output snapshots (used for variance diagnostics) */
333
- static #OUTPUT_HISTORY_LENGTH = 80;
334
- /**
335
- * Number of outputs snapshots to keep for variance diagnostics.
336
- * Larger values smooth variance estimates at the cost of memory.
337
- */
338
- /** Small randomness added to fitness to break ties stably */
339
- static #FITNESS_RANDOMNESS = 0.01;
340
-
341
- // Success fitness constants
342
- /** Base fitness given for successful maze completion */
343
- static #SUCCESS_BASE_FITNESS = 650;
344
- /** Scale applied for remaining steps on success to reward efficiency */
345
- static #STEP_EFFICIENCY_SCALE = 0.2;
346
- /** Weight for action-entropy bonus on successful runs */
347
- static #SUCCESS_ACTION_ENTROPY_SCALE = 5;
348
- /** Minimum clamp for any successful-run fitness */
349
- static #MIN_SUCCESS_FITNESS = 150;
350
-
351
- // Exploration / revisiting tuning
352
- /** Bonus reward for discovering a previously unvisited cell */
353
- static #NEW_CELL_EXPLORATION_BONUS = 0.3;
354
- /** Strong penalty factor for revisiting cells */
355
- static #REVISIT_PENALTY_STRONG = 0.5;
356
-
357
- // Progress shaping constants
358
- /** Exponent used in non-linear progress shaping */
359
- static #PROGRESS_POWER = 1.3;
360
- /** Scale used to convert shaped progress into fitness contribution */
361
- static #PROGRESS_SCALE = 500;
362
-
363
- /** Node type string used in network node objects */
364
- static #NODE_TYPE_OUTPUT = 'output';
365
-
366
- /** Direction deltas for cardinal moves: N, E, S, W */
367
- static #DIRECTION_DELTAS: readonly [number, number][] = [
368
- [0, -1], // North
369
- [1, 0], // East
370
- [0, 1], // South
371
- [-1, 0], // West
372
- ];
373
- /** Lookup table for opposite directions (index -> opposite index). */
374
- static #OPPOSITE_DIR: readonly number[] = [2, 3, 0, 1];
375
-
376
- // ---------------------------------------------------------------------------
377
- // Pooled / reusable typed-array buffers (non‑reentrant) for simulation state
378
- // ---------------------------------------------------------------------------
379
- /** Visited flag per cell (0/1). Reused across simulations. @remarks Non-reentrant. */
380
- static #VisitedFlags: Uint8Array | null = null;
381
- /** Visit counts per cell (clamped). @remarks Non-reentrant. */
382
- static #VisitCounts: Uint16Array | null = null;
383
- /** Path X coordinates (index-aligned with #PathY). */
384
- static #PathX: Int32Array | null = null;
385
- /** Path Y coordinates (index-aligned with #PathX). */
386
- static #PathY: Int32Array | null = null;
387
- /** Capacity (cells) currently allocated for grid‑dependent arrays. */
388
- static #GridCapacity = 0;
389
- /** Capacity (steps) currently allocated for path arrays. */
390
- static #PathCapacity = 0;
391
- /** Cached maze width for index calculations. */
392
- static #CachedWidth = 0;
393
- /** Cached maze height for bounds validation. */
394
- static #CachedHeight = 0;
395
-
396
- /** Pooled softmax output (returned as a cloned plain array). */
397
- static #SOFTMAX = new Float64Array(4);
398
-
399
- /**
400
- * Seedable PRNG state (Mulberry32 style) stored in a pooled Uint32Array.
401
- * - When `null`, the implementation falls back to `Math.random()`.
402
- * - Using a typed-array for the single-word state avoids repeated
403
- * heap allocations when reseeding and makes in-place updates explicit.
404
- */
405
- static #PRNGState: Uint32Array | null = null;
406
-
407
- // ---------------------------------------------------------------------------
408
- // Internal mutable run-scoped state (replaces (MazeMovement as any).foo uses)
409
- // ---------------------------------------------------------------------------
410
- /** Rolling saturation counter used for adaptive penalties */
411
- static #StateSaturations = 0;
412
- /** Consecutive steps with no movement to trigger forced exploration */
413
- static #StateNoMoveStreak = 0;
414
- /** Previous distance value supplied to vision builder */
415
- static #StatePrevDistanceStep: number | undefined = undefined;
416
-
417
- /**
418
- * Determine whether a proposed move target is valid: inside maze bounds
419
- * and not a wall. This function accepts either a coordinate tuple
420
- * (`[x,y]`) or separate numeric `x, y` arguments.
421
- *
422
- * Behaviour / rationale:
423
- * - Centralises argument handling for two public overloads so callers
424
- * can use whichever form is more convenient.
425
- * - Defers the actual bounds and wall test to `#isCellOpen` which
426
- * contains defensive checks and cached-dimension micro-optimisations.
427
- * - Uses a tiny pooled `Int32Array` (#COORD_SCRATCH) when coercing
428
- * numeric args to 32-bit integers to avoid short-lived temporaries in
429
- * hot loops.
430
- *
431
- * Steps:
432
- * 1) Normalize arguments into integer `x` and `y` coordinates.
433
- * 2) Delegate to the private `#isCellOpen` helper which performs the
434
- * actual maze bounds and wall checks.
435
- *
436
- * @param encodedMaze - 2D read-only numeric maze (-1 === wall)
437
- * @param position - optional tuple [x,y] OR numeric `x` parameter
438
- * @param y - optional numeric `y` parameter when `x` and `y` passed separately
439
- * @returns `true` when the coordinates are within bounds and not a wall
440
- * @example
441
- * // tuple-form
442
- * MazeMovement.isValidMove(encodedMaze, [3, 2]);
443
- * // numeric-form
444
- * MazeMovement.isValidMove(encodedMaze, 3, 2);
445
- */
446
- static isValidMove(
447
- encodedMaze: ReadonlyArray<ReadonlyArray<number>>,
448
- position: readonly [number, number],
449
- ): boolean;
450
- static isValidMove(
451
- encodedMaze: ReadonlyArray<ReadonlyArray<number>>,
452
- x: number,
453
- y: number,
454
- ): boolean;
455
- static isValidMove(
456
- encodedMaze: ReadonlyArray<ReadonlyArray<number>>,
457
- positionOrX: readonly [number, number] | number,
458
- yMaybe?: number,
459
- ): boolean {
460
- // Step 1: handle numeric overload (x, y)
461
- if (typeof positionOrX === 'number') {
462
- const rawX = positionOrX;
463
- const rawY = yMaybe ?? 0;
464
- MazeMovement.#COORD_SCRATCH[0] = rawX | 0;
465
- MazeMovement.#COORD_SCRATCH[1] = rawY | 0;
466
- return MazeMovement.#isCellOpen(
467
- encodedMaze,
468
- MazeMovement.#COORD_SCRATCH[0],
469
- MazeMovement.#COORD_SCRATCH[1],
470
- );
471
- }
472
-
473
- // Step 2: tuple overload — validate shape before delegating
474
- if (!Array.isArray(positionOrX) || positionOrX.length !== 2) return false;
475
- const [rawX, rawY] = positionOrX;
476
- MazeMovement.#COORD_SCRATCH[0] = rawX | 0;
477
- MazeMovement.#COORD_SCRATCH[1] = rawY | 0;
478
- return MazeMovement.#isCellOpen(
479
- encodedMaze,
480
- MazeMovement.#COORD_SCRATCH[0],
481
- MazeMovement.#COORD_SCRATCH[1],
482
- );
483
- }
484
-
485
- /**
486
- * Generate a pseudo-random number in [0,1).
487
- *
488
- * Behaviour
489
- * - When `MazeMovement.#PRNGState` contains a single-word `Uint32Array`
490
- * the method uses a tiny, allocation-free Mulberry32-like generator
491
- * that mutates that pooled state in-place to produce deterministic
492
- * results for testing and reproducible simulations.
493
- * - When `#PRNGState` is `null` the method falls back to the host
494
- * JavaScript engine's `Math.random()`.
495
- *
496
- * Example
497
- * // (internal/private field shown for illustration)
498
- * MazeMovement['#PRNGState'] = new Uint32Array([123456789]);
499
- * const r = MazeMovement.#rand(); // deterministic in [0,1)
500
- *
501
- * Implementation steps (each step has an inline comment in the body):
502
- * 1) fast-path: fallback to Math.random when no pooled seed present
503
- * 2) advance the pooled uint32 state by a large odd constant (wraps)
504
- * 3) apply integer scrambles (xors + Math.imul) to mix bits
505
- * 4) final mix and convert the 32-bit integer to a float in [0,1)
506
- *
507
- * @returns number in range [0,1)
508
- */
509
- static #rand(): number {
510
- // Fast-path: if no pooled deterministic state is present, use engine RNG
511
- const pooledState = MazeMovement.#PRNGState;
512
- if (pooledState == null || pooledState.length === 0) {
513
- return Math.random();
514
- }
515
-
516
- // Step 1: advance pooled state in-place by a large odd increment and
517
- // keep everything in uint32 land using >>> 0. Using a pooled Uint32Array
518
- // avoids allocating a new seed object on every call.
519
- const current = (pooledState[0] + 0x6d2b79f5) >>> 0;
520
- pooledState[0] = current;
521
-
522
- // Step 2: perform integer scrambles using imul/xor/shifts to mix bits.
523
- // Local descriptive names help readers (and JITs) reason about the math.
524
- let mixed = current;
525
- // multiply/xor mix stage 1
526
- mixed = Math.imul(mixed ^ (mixed >>> 15), mixed | 1) >>> 0;
527
- // multiply/xor mix stage 2
528
- mixed =
529
- (mixed ^ (mixed + Math.imul(mixed ^ (mixed >>> 7), mixed | 61))) >>> 0;
530
-
531
- // Step 3: final avalanche and convert to float in [0,1) by dividing
532
- // by 2^32. >>> 0 ensures an unsigned 32-bit integer before the division.
533
- const final32 = (mixed ^ (mixed >>> 14)) >>> 0;
534
- return final32 / 4294967296; // 2^32
535
- }
536
-
537
- /**
538
- * Convert 2D coordinates (x,y) to a linear index into pooled grid buffers.
539
- *
540
- * Purpose:
541
- * - All pooled typed-arrays (visited flags, visit counts, etc.) are
542
- * indexed using this linear index: index = y * width + x.
543
- * - Using `Math.imul` provides a fast 32-bit integer multiplication which
544
- * avoids potential floating point rounding for large grids and is
545
- * slightly faster on some engines.
546
- *
547
- * Steps:
548
- * 1) Compute the row stride (number of cells in full rows above `y`).
549
- * 2) Add the column offset `x` to produce the final linear index.
550
- *
551
- * @param x - Column coordinate (0-based)
552
- * @param y - Row coordinate (0-based)
553
- * @returns Linearized cell index used for indexing pooled arrays
554
- * @example
555
- * // For a maze width of 10, (x=3,y=2) -> index = 2*10 + 3 = 23
556
- * MazeMovement.#CachedWidth = 10; // (normally set by #initBuffers)
557
- * const idx = MazeMovement.#index(3, 2); // 23
558
- */
559
- static #index(x: number, y: number): number {
560
- // Step 1: compute number of cells spanned by full rows above `y`.
561
- const rowStride = Math.imul(y, MazeMovement.#CachedWidth);
562
-
563
- // Step 2: add the column offset to obtain a compact linear index.
564
- const linearIndex = rowStride + x;
565
-
566
- // Return the index (intended to be used with pooled typed arrays).
567
- return linearIndex;
568
- }
569
-
570
- /**
571
- * Ensure pooled typed-array buffers are allocated and sized for the
572
- * provided maze dimensions and maximum path length.
573
- *
574
- * Behaviour & rationale:
575
- * - Reuses existing pooled arrays when possible to avoid repeated
576
- * heap allocations during many fast simulations.
577
- * - When growing, allocates the next power-of-two capacity to amortize
578
- * future resizes (common pooling strategy).
579
- * - Only the actively used portion of pooled buffers is cleared to keep
580
- * clears cheap for large, reused buffers.
581
- *
582
- * Steps:
583
- * 1) Compute required cell count for grid buffers.
584
- * 2) Grow or reuse `#VisitedFlags` and `#VisitCounts` as needed.
585
- * 3) Grow or reuse path buffers `#PathX` / `#PathY` for `maxSteps+1` entries.
586
- * 4) Cache width/height for index arithmetic used by other helpers.
587
- *
588
- * @param width - maze width (columns)
589
- * @param height - maze height (rows)
590
- * @param maxSteps - maximum expected path length (safety bound)
591
- * @example
592
- * MazeMovement.#initBuffers(32, 20, 1500);
593
- */
594
- static #initBuffers(width: number, height: number, maxSteps: number) {
595
- // Step 1: required cell count for the grid
596
- const requiredCellCount = width * height;
597
-
598
- // Step 2: ensure grid buffers large enough; grow to next power-of-two when needed
599
- if (!this.#VisitedFlags || requiredCellCount > this.#GridCapacity) {
600
- const newCellCapacity = MazeMovement.#nextPow2(requiredCellCount);
601
- // Allocate new pooled typed arrays
602
- this.#VisitedFlags = new Uint8Array(newCellCapacity);
603
- this.#VisitCounts = new Uint16Array(newCellCapacity);
604
- // Record the new pool capacity
605
- this.#GridCapacity = newCellCapacity;
606
- } else {
607
- // Fast-clear only the active region; keep remainder for reuse
608
- this.#VisitedFlags.fill(0, 0, requiredCellCount);
609
- this.#VisitCounts!.fill(0, 0, requiredCellCount);
610
- }
611
-
612
- // Step 3: ensure path buffers sized for maxSteps+1 entries (path includes start)
613
- const requiredPathEntries = maxSteps + 1;
614
- if (!this.#PathX || requiredPathEntries > this.#PathCapacity) {
615
- const newPathCapacity = MazeMovement.#nextPow2(requiredPathEntries);
616
- this.#PathX = new Int32Array(newPathCapacity);
617
- this.#PathY = new Int32Array(newPathCapacity);
618
- this.#PathCapacity = newPathCapacity;
619
- }
620
-
621
- // Step 4: cache dimensions used by indexing helpers
622
- this.#CachedWidth = width;
623
- this.#CachedHeight = height;
624
- }
625
-
626
- /**
627
- * Return the smallest power-of-two integer >= `n`.
628
- *
629
- * Implementation notes:
630
- * - Fast-path for typical 32-bit ranges uses `Math.clz32` and bit ops
631
- * which are very fast on modern JS engines.
632
- * - For extremely large values (outside 32-bit unsigned range) a safe
633
- * fallback iteratively doubles to avoid incorrect 32-bit shifts.
634
- *
635
- * Steps:
636
- * 1) Handle trivial and boundary cases (n <= 1).
637
- * 2) For n within 32-bit range, compute next power using leading-zero count.
638
- * 3) For larger n, fall back to a safe doubling loop.
639
- *
640
- * @param n - Target minimum integer (expected positive)
641
- * @returns The smallest power of two >= n
642
- * @example
643
- * MazeMovement.#nextPow2(13) === 16
644
- */
645
- static #nextPow2(n: number): number {
646
- // Step 1: sanitize input and handle trivial cases
647
- const requested = Math.max(1, Math.floor(n));
648
- if (requested <= 1) return 1;
649
-
650
- // Step 2: fast 32-bit path using clz32 when safe
651
- if (requested <= 0xffffffff) {
652
- // values are treated as unsigned 32-bit; compute next power-of-two
653
- const v = (requested - 1) >>> 0; // ensure uint32
654
- const leadingZeros = Math.clz32(v);
655
- const exponent = 32 - leadingZeros;
656
- // shifting by 32 is undefined, clamp exponent to [0,31]
657
- const clampedExp = Math.min(31, Math.max(0, exponent));
658
- const power = 1 << clampedExp;
659
- // If the computed power is less than requested (edge case), double once
660
- return power >= requested ? power : power << 1;
661
- }
662
-
663
- // Step 3: safe fallback for very large numbers — doubling loop (rare)
664
- let power = 1;
665
- while (power < requested) power = power * 2;
666
- return power;
667
- }
668
-
669
- /**
670
- * Determine whether the provided value is a finite-number array.
671
- *
672
- * @param candidate - Value to test for numeric array semantics.
673
- * @returns True when candidate is an array of finite numbers.
674
- */
675
- static #isNumberArray(candidate: unknown): candidate is number[] {
676
- return (
677
- Array.isArray(candidate) &&
678
- candidate.every(
679
- (value: unknown) => typeof value === 'number' && Number.isFinite(value),
680
- )
681
- );
682
- }
683
-
684
- /**
685
- * Read the optional `_lastStepOutputs` history stored on the network.
686
- *
687
- * @param network - Network instance that may provide an outputs history.
688
- * @returns Sanitised history buffer or `undefined` when absent/invalid.
689
- */
690
- static #readOutputHistory(network: INetwork): number[][] | undefined {
691
- const historyCandidate = Reflect.get(network as object, '_lastStepOutputs');
692
- if (!Array.isArray(historyCandidate)) return undefined;
693
- return historyCandidate.every(MazeMovement.#isNumberArray)
694
- ? (historyCandidate as number[][])
695
- : undefined;
696
- }
697
-
698
- /**
699
- * Persist the bounded outputs history on the network via reflection.
700
- *
701
- * @param network - Target network to mutate.
702
- * @param history - Updated history buffer.
703
- */
704
- static #writeOutputHistory(network: INetwork, history: number[][]): void {
705
- Reflect.set(network as object, '_lastStepOutputs', history);
706
- }
707
-
708
- /**
709
- * Materialize the current path stored in the pooled `#PathX` / `#PathY`
710
- * buffers into a fresh, mutable array of [x,y] tuples.
711
- *
712
- * Rationale:
713
- * - Internal path coordinate buffers are pooled to minimize allocations
714
- * during many fast simulations. Callers often require an independent
715
- * array (for inspection, serialization, or mutation) so we copy the
716
- * active prefix into a new plain JS array of tuples.
717
- *
718
- * Steps:
719
- * 1) Normalize the requested length and early-return an empty array for 0.
720
- * 2) Read local references to the pooled typed-arrays to reduce repeated
721
- * global/property lookups in the hot loop.
722
- * 3) Allocate the result array with the known length and fill it with
723
- * [x,y] tuples copied from the pooled Int32Arrays.
724
- *
725
- * @param length - number of path entries to materialize (usually `state.pathLength`)
726
- * @returns A newly allocated array of `[x, y]` tuples with `length` entries.
727
- * @example
728
- * // produce an independent copy of the active path
729
- * const pathSnapshot = MazeMovement.#materializePath(state.pathLength);
730
- */
731
- static #materializePath(length: number): [number, number][] {
732
- // Step 1: sanitize and fast-return for empty paths
733
- const entries = Math.max(0, Math.floor(length));
734
- if (entries === 0) return [];
735
-
736
- // Step 2: local references to pooled buffers (faster in a tight loop)
737
- const pathX = MazeMovement.#PathX!;
738
- const pathY = MazeMovement.#PathY!;
739
-
740
- // Step 3: allocate output array of known size and populate
741
- const out = new Array<[number, number]>(entries);
742
- for (let index = 0; index < entries; index++) {
743
- // Read int32 entries into descriptive locals before creating tuple
744
- const x = pathX[index];
745
- const y = pathY[index];
746
- out[index] = [x, y];
747
- }
748
- return out;
749
- }
750
-
751
- /**
752
- * Sum a contiguous group of `#VISION_GROUP_LEN` elements in the vision
753
- * vector starting at `start`.
754
- *
755
- * Behaviour and rationale:
756
- * - This helper is a hot-path primitive used by perception checks. It
757
- * avoids allocations and keeps the loop minimal for performance.
758
- * - The implementation is defensive: it bounds-checks the input so a
759
- * malformed `start` or shorter-than-expected `vision` arrays won't throw.
760
- *
761
- * Steps:
762
- * 1) Sanitize `start` and compute the clamped `end` index using the
763
- * configured `#VISION_GROUP_LEN`.
764
- * 2) Iterate linearly and accumulate into a numeric accumulator.
765
- * 3) Return the numeric sum.
766
- *
767
- * @param vision - flat array of numeric vision inputs
768
- * @param start - start index of the group to sum
769
- * @returns numeric sum of the group (0 for empty/out-of-range input)
770
- * @example
771
- * // Sum the LOS group starting at index 8
772
- * const losSum = MazeMovement.#sumVisionGroup(visionVector, MazeMovement.#VISION_LOS_START);
773
- */
774
- static #sumVisionGroup(vision: number[], start: number) {
775
- // Step 1: sanitize and clamp inputs (use descriptive names for clarity)
776
- const groupLength = MazeMovement.#VISION_GROUP_LEN;
777
- const sanitizedStart = Math.max(0, start | 0);
778
- const clampedEnd = Math.min(vision.length, sanitizedStart + groupLength);
779
- if (sanitizedStart >= clampedEnd) return 0;
780
-
781
- // Step 2: reuse pooled scratch buffer to avoid per-call allocations.
782
- // NOTE: #SCRATCH_CENTERED is a pooled Float64Array sized to at least
783
- // `#VISION_GROUP_LEN` and this class is non-reentrant in hot paths.
784
- const pooledScratch = MazeMovement.#SCRATCH_CENTERED;
785
-
786
- // Step 3: accumulate values into a local numeric accumulator while
787
- // copying into the pooled scratch. Copying documents intent and keeps
788
- // micro-benchmarks stable across engines (no hidden temporaries).
789
- let sumAccumulator = 0;
790
- let writeIndex = 0;
791
- for (let readIndex = sanitizedStart; readIndex < clampedEnd; readIndex++) {
792
- const value = vision[readIndex] ?? 0;
793
- pooledScratch[writeIndex++] = value;
794
- sumAccumulator += value;
795
- }
796
-
797
- // Step 4: return the numeric sum. We intentionally do not clear the
798
- // pooled scratch — consumers that rely on it should overwrite contents.
799
- return sumAccumulator;
800
- }
801
-
802
- /**
803
- * Compute an adaptive epsilon used for epsilon-greedy exploration.
804
- *
805
- * Behaviour:
806
- * - Epsilon controls random exploratory moves. This helper centralizes
807
- * the tuning logic so callers can keep the hot loop small.
808
- * - The returned value is intentionally conservative (often 0) unless
809
- * particular conditions (warmup, stagnation, or saturations) are met.
810
- * - When the agent is near the goal (`distHere` small) exploration is
811
- * suppressed by clamping epsilon to a small minimum.
812
- *
813
- * Steps:
814
- * 1) Compute boolean predicates for warmup/stagnation/saturation cases.
815
- * 2) Select the base epsilon from the highest-priority matching case.
816
- * 3) If proximate to goal, clamp epsilon to `#EPSILON_MIN_NEAR_GOAL`.
817
- * 4) Return the chosen epsilon.
818
- *
819
- * @param stepNumber - global step index inside the simulation loop
820
- * @param stepsSinceImprovement - number of steps since last improvement
821
- * @param distHere - current distance-to-goal (used to suppress exploration)
822
- * @param saturations - rolling saturation count used for bias adjustments
823
- * @returns epsilon value in [0,1] used for epsilon-greedy exploration
824
- * @example
825
- * // Typical usage inside simulation loop
826
- * const eps = MazeMovement.#computeEpsilon(step, state.stepsSinceImprovement, state.distHere, MazeMovement.#StateSaturations);
827
- */
828
- static #computeEpsilon(
829
- stepNumber: number,
830
- stepsSinceImprovement: number,
831
- distHere: number,
832
- saturations: number,
833
- ): number {
834
- // Step 1: evaluate predicates with descriptive names for clarity
835
- const isWarmup = stepNumber < MazeMovement.#EPSILON_WARMUP_STEPS;
836
- const isHighlyStagnant =
837
- stepsSinceImprovement > MazeMovement.#EPSILON_STAGNANT_HIGH_THRESHOLD;
838
- const isModeratelyStagnant =
839
- stepsSinceImprovement > MazeMovement.#EPSILON_STAGNANT_MED_THRESHOLD;
840
- const isSaturationTriggered =
841
- saturations > MazeMovement.#EPSILON_SATURATION_TRIGGER;
842
-
843
- // Step 2: choose the most relevant base epsilon (priority order)
844
- let chosenEpsilon = 0;
845
- // Use a switch(true) so each predicate is a case and priority is explicit
846
- switch (true) {
847
- case isWarmup:
848
- chosenEpsilon = MazeMovement.#EPSILON_INITIAL;
849
- break;
850
- case isHighlyStagnant:
851
- chosenEpsilon = MazeMovement.#EPSILON_STAGNANT_HIGH;
852
- break;
853
- case isModeratelyStagnant:
854
- chosenEpsilon = MazeMovement.#EPSILON_STAGNANT_MED;
855
- break;
856
- case isSaturationTriggered:
857
- chosenEpsilon = MazeMovement.#EPSILON_SATURATIONS;
858
- break;
859
- default:
860
- // leave chosenEpsilon at default 0
861
- break;
862
- }
863
-
864
- // Step 3: suppress exploration near the goal by clamping down
865
- if (distHere <= MazeMovement.#PROXIMITY_SUPPRESS_EXPLOR_DIST) {
866
- // Use Math.min to prefer the smaller (less exploratory) epsilon
867
- chosenEpsilon = Math.min(
868
- chosenEpsilon,
869
- MazeMovement.#EPSILON_MIN_NEAR_GOAL,
870
- );
871
- }
872
-
873
- // Step 4: return the decided epsilon
874
- return chosenEpsilon;
875
- }
876
-
877
- /**
878
- * Check whether a cell at (x, y) is inside the maze bounds and not a wall.
879
- *
880
- * Behaviour / rationale:
881
- * - Prefers cached maze dimensions (when they match the provided maze)
882
- * to avoid repeated nested property accesses inside hot loops.
883
- * - Defensively guards against malformed inputs (empty rows / missing data)
884
- * and treats those as non-open (equivalent to wall/out-of-bounds).
885
- *
886
- * Steps:
887
- * 1) Resolve maze width/height (prefer cached values when appropriate).
888
- * 2) Perform fast, descriptive bounds checks.
889
- * 3) Read the cell once and compare against the wall sentinel (-1).
890
- *
891
- * @param encodedMaze - 2D read-only numeric maze representation (-1 == wall)
892
- * @param x - zero-based column index to test
893
- * @param y - zero-based row index to test
894
- * @returns true when the cell exists and is not a wall
895
- * @example
896
- * // Typical usage inside simulation loop
897
- * const open = MazeMovement.#isCellOpen(encodedMaze, x, y);
898
- */
899
- static #isCellOpen(
900
- encodedMaze: ReadonlyArray<ReadonlyArray<number>>,
901
- x: number,
902
- y: number,
903
- ): boolean {
904
- // Step 1: resolve provided maze dimensions and grab a stable first-row
905
- const providedRowCount = encodedMaze?.length ?? 0;
906
- const firstRow = encodedMaze?.[0];
907
- const providedColumnCount = firstRow?.length ?? 0;
908
-
909
- // Step 2: prefer cached dimensions when they match the provided maze
910
- const cachedColumnCount = MazeMovement.#CachedWidth;
911
- const cachedRowCount = MazeMovement.#CachedHeight;
912
-
913
- const mazeColumnCount =
914
- cachedColumnCount > 0 &&
915
- cachedRowCount === providedRowCount &&
916
- cachedColumnCount === providedColumnCount
917
- ? cachedColumnCount
918
- : providedColumnCount;
919
- const mazeRowCount =
920
- cachedRowCount > 0 &&
921
- cachedColumnCount === providedColumnCount &&
922
- cachedRowCount === providedRowCount
923
- ? cachedRowCount
924
- : providedRowCount;
925
-
926
- // Step 3: coerce coordinates into the pooled scratch Int32Array to avoid
927
- // creating temporary boxed numbers on hot paths.
928
- MazeMovement.#COORD_SCRATCH[0] = x | 0;
929
- MazeMovement.#COORD_SCRATCH[1] = y | 0;
930
- const col = MazeMovement.#COORD_SCRATCH[0];
931
- const row = MazeMovement.#COORD_SCRATCH[1];
932
-
933
- // Step 4: fast bounds checks with clear descriptive names
934
- if (row < 0 || row >= mazeRowCount) return false;
935
- if (col < 0 || col >= mazeColumnCount) return false;
936
-
937
- // Step 5: defensive single-read of the row and cell value test
938
- const targetRow = encodedMaze[row];
939
- if (!targetRow) return false; // malformed row -> treat as wall/out-of-bounds
940
- const cellValue = targetRow[col];
941
- return cellValue !== -1;
942
- }
943
-
944
- /**
945
- * Unified distance lookup for a cell coordinate.
946
- *
947
- * Behaviour / rationale:
948
- * - Fast-path: when a `distanceMap` is supplied and contains a finite
949
- * numeric entry for the coordinate, that value is returned immediately.
950
- * - Defensive: performs robust bounds checking and uses cached maze
951
- * dimensions (when they match the provided maze) to avoid repeated
952
- * nested property lookups in hot code paths.
953
- * - Fallback: when no finite distance is available, returns `Infinity` to
954
- * indicate unknown/unreachable distance (preserves previous behaviour).
955
- *
956
- * Steps:
957
- * 1) Coerce incoming coordinates to 32-bit integers.
958
- * 2) Fast-path check for a finite value in the optional `distanceMap`.
959
- * 3) Validate bounds using cached dimensions when they align with the
960
- * provided maze to reduce property access overhead.
961
- * 4) If no distance found, return `Infinity` (unknown/unreachable).
962
- *
963
- * @param encodedMaze - 2D read-only numeric maze representation
964
- * @param coords - readonly tuple [x, y] of zero-based coordinates
965
- * @param distanceMap - optional precomputed distance map (same shape as maze)
966
- * @returns finite distance number when available, otherwise `Infinity`
967
- * @example
968
- * const d = MazeMovement.#distanceAt(encodedMaze, [3,2], distanceMap);
969
- */
970
- static #distanceAt(
971
- encodedMaze: ReadonlyArray<ReadonlyArray<number>>,
972
- [x, y]: readonly [number, number],
973
- distanceMap?: number[][],
974
- ): number {
975
- // Step 1: coerce coordinates to 32-bit integers for consistent indexing
976
- const xCoord = x | 0;
977
- const yCoord = y | 0;
978
-
979
- // Step 2: fast-path: return from provided distanceMap when present
980
- if (
981
- distanceMap &&
982
- distanceMap[yCoord] !== undefined &&
983
- Number.isFinite(distanceMap[yCoord][xCoord])
984
- ) {
985
- return distanceMap[yCoord][xCoord];
986
- }
987
-
988
- // Step 3: bounds validation — prefer cached sizes when they match the maze
989
- const providedHeight = encodedMaze?.length ?? 0;
990
- const firstRow = encodedMaze?.[0];
991
- const providedWidth = firstRow?.length ?? 0;
992
-
993
- const cachedWidth = MazeMovement.#CachedWidth;
994
- const cachedHeight = MazeMovement.#CachedHeight;
995
-
996
- const mazeWidth =
997
- cachedWidth > 0 &&
998
- cachedHeight === providedHeight &&
999
- cachedWidth === providedWidth
1000
- ? cachedWidth
1001
- : providedWidth;
1002
- const mazeHeight =
1003
- cachedHeight > 0 &&
1004
- cachedWidth === providedWidth &&
1005
- cachedHeight === providedHeight
1006
- ? cachedHeight
1007
- : providedHeight;
1008
-
1009
- if (xCoord < 0 || xCoord >= mazeWidth) return Infinity;
1010
- if (yCoord < 0 || yCoord >= mazeHeight) return Infinity;
1011
-
1012
- // Step 4: no precomputed distance found — preserve historical fallback
1013
- // (treat as unknown/unreachable). A BFS fallback could be added here
1014
- // if callers require an on-demand computation, but that is intentionally
1015
- // omitted to avoid expensive work in hot paths.
1016
- return Infinity;
1017
- }
1018
-
1019
- // ...existing code...
1020
-
1021
- /**
1022
- * Moves the agent in the specified direction if the move is valid.
1023
- *
1024
- * Handles collision detection with walls and maze boundaries,
1025
- * preventing the agent from making invalid moves.
1026
- *
1027
- * @param encodedMaze - 2D array representation of the maze.
1028
- * @param position - Current [x, y] position of the agent.
1029
- * @param direction - Direction index (0=North, 1=East, 2=South, 3=West, -1=No move).
1030
- * @returns { [number, number] } New position after movement, or original position if move was invalid.
1031
- */
1032
- static moveAgent(
1033
- encodedMaze: ReadonlyArray<ReadonlyArray<number>>,
1034
- position: readonly [number, number],
1035
- direction: number,
1036
- ): [number, number] {
1037
- // If direction is -1, do not move — return a mutable copy for callers that expect a mutable tuple
1038
- if (direction === MazeMovement.#NO_MOVE) {
1039
- return [position[0], position[1]] as [number, number];
1040
- }
1041
- // Copy current position
1042
- /**
1043
- * Next position candidate for the agent after moving
1044
- */
1045
- // Create a mutable copy of the readonly input position for local mutation
1046
- const nextPosition: [number, number] = [position[0], position[1]] as [
1047
- number,
1048
- number,
1049
- ];
1050
- // Update position based on direction using the centralized deltas table
1051
- if (direction >= 0 && direction < MazeMovement.#ACTION_DIM) {
1052
- const [dx, dy] = MazeMovement.#DIRECTION_DELTAS[direction];
1053
- nextPosition[0] += dx;
1054
- nextPosition[1] += dy;
1055
- }
1056
- // Check if the new position is valid
1057
- if (MazeMovement.isValidMove(encodedMaze, nextPosition)) {
1058
- return nextPosition;
1059
- } else {
1060
- // If invalid, stay in place — return a mutable copy to satisfy return type
1061
- return [position[0], position[1]] as [number, number];
1062
- }
1063
- }
1064
-
1065
- /**
1066
- * Choose an action index from network outputs.
1067
- *
1068
- * Behaviour:
1069
- * - Centers the raw outputs (logits), computes an adaptive temperature
1070
- * based on collapse heuristics, performs a numerically-stable softmax
1071
- * into pooled scratch buffers, and returns argmax plus diagnostics.
1072
- * - Reuses pooled typed-array scratch buffers to avoid per-call
1073
- * allocations; the method is therefore non-reentrant.
1074
- *
1075
- * Steps (implemented inline with comments):
1076
- * 1) Validate inputs and early-return a safe default for malformed inputs.
1077
- * 2) Center logits and compute variance/std for adaptive temperature.
1078
- * 3) Compute softmax in pooled buffers with numerical-stability trick.
1079
- * 4) Determine argmax (best action) and second-best probability.
1080
- * 5) Compute normalized entropy and return a defensive copy of softmax.
1081
- *
1082
- * @param outputs - Array of raw network outputs (logits), expected length === #ACTION_DIM
1083
- * @returns An object with:
1084
- * - direction: chosen action index (0..#ACTION_DIM-1) or #NO_MOVE on invalid input
1085
- * - softmax: fresh array copy of probabilities (length #ACTION_DIM)
1086
- * - entropy: normalized entropy in [0,1]
1087
- * - maxProb: probability of the chosen action
1088
- * - secondProb: probability of the runner-up action
1089
- * @example
1090
- * const result = MazeMovement.selectDirection([0.2, 1.4, -0.1, 0]);
1091
- * // result.direction -> 1 (for example)
1092
- */
1093
- static selectDirection(outputs: number[]): DirectionSelectionStats {
1094
- // Step 1: validate inputs and provide safe default
1095
- const actionCount = MazeMovement.#ACTION_DIM;
1096
- if (!Array.isArray(outputs) || outputs.length !== actionCount) {
1097
- return {
1098
- direction: MazeMovement.#NO_MOVE,
1099
- softmax: Array.from(MazeMovement.#SOFTMAX),
1100
- entropy: 0,
1101
- maxProb: 0,
1102
- secondProb: 0,
1103
- };
1104
- }
1105
-
1106
- // Local references to pooled scratch buffers for clarity and perf.
1107
- const centered = MazeMovement.#SCRATCH_CENTERED;
1108
- const exps = MazeMovement.#SCRATCH_EXPS;
1109
- const softmaxPooled = MazeMovement.#SOFTMAX;
1110
-
1111
- // Step 2: center logits and compute variance (numerically simple loop)
1112
- let sum = 0;
1113
- for (let actionIndex = 0; actionIndex < actionCount; actionIndex++) {
1114
- sum += outputs[actionIndex];
1115
- }
1116
- const meanOutput = sum / actionCount;
1117
-
1118
- let varianceAccumulator = 0;
1119
- for (let actionIndex = 0; actionIndex < actionCount; actionIndex++) {
1120
- const delta = outputs[actionIndex] - meanOutput;
1121
- centered[actionIndex] = delta; // write into pooled centered buffer
1122
- varianceAccumulator += delta * delta;
1123
- }
1124
- varianceAccumulator /= actionCount;
1125
- let standardDeviation = Math.sqrt(varianceAccumulator);
1126
- if (
1127
- !Number.isFinite(standardDeviation) ||
1128
- standardDeviation < MazeMovement.#STD_MIN
1129
- ) {
1130
- standardDeviation = MazeMovement.#STD_MIN;
1131
- }
1132
-
1133
- // Adaptive collapse ratio -> temperature
1134
- const collapseRatio =
1135
- standardDeviation < MazeMovement.#COLLAPSE_STD_THRESHOLD
1136
- ? MazeMovement.#COLLAPSE_RATIO_FULL
1137
- : standardDeviation < MazeMovement.#COLLAPSE_STD_MED
1138
- ? MazeMovement.#COLLAPSE_RATIO_HALF
1139
- : 0;
1140
- const temperature =
1141
- MazeMovement.#TEMPERATURE_BASE +
1142
- MazeMovement.#TEMPERATURE_SCALE * collapseRatio;
1143
-
1144
- // Step 3: softmax numerically stable: subtract maxCentered before exp
1145
- let maxCentered = -Infinity;
1146
- for (let actionIndex = 0; actionIndex < actionCount; actionIndex++) {
1147
- const v = centered[actionIndex];
1148
- if (v > maxCentered) maxCentered = v;
1149
- }
1150
-
1151
- let expSum = 0;
1152
- for (let actionIndex = 0; actionIndex < actionCount; actionIndex++) {
1153
- const value = Math.exp(
1154
- (centered[actionIndex] - maxCentered) / temperature,
1155
- );
1156
- exps[actionIndex] = value;
1157
- expSum += value;
1158
- }
1159
- if (expSum === 0) expSum = 1; // defensive
1160
-
1161
- // Step 4: compute probabilities in pooled softmax buffer and find top-two
1162
- let chosenDirection = 0;
1163
- let bestProb = -Infinity;
1164
- let runnerUpProb = 0;
1165
- for (let actionIndex = 0; actionIndex < actionCount; actionIndex++) {
1166
- const prob = exps[actionIndex] / expSum;
1167
- softmaxPooled[actionIndex] = prob;
1168
- if (prob > bestProb) {
1169
- runnerUpProb = bestProb;
1170
- bestProb = prob;
1171
- chosenDirection = actionIndex;
1172
- } else if (prob > runnerUpProb) {
1173
- runnerUpProb = prob;
1174
- }
1175
- }
1176
-
1177
- // Step 5: compute normalized entropy (divide by log(actionCount))
1178
- let entropy = 0;
1179
- for (let actionIndex = 0; actionIndex < actionCount; actionIndex++) {
1180
- const p = softmaxPooled[actionIndex];
1181
- if (p > 0) entropy += -p * Math.log(p);
1182
- }
1183
- entropy /= MazeMovement.#LOG_ACTIONS;
1184
-
1185
- // Defensive: return a copy of pooled softmax so callers cannot mutate
1186
- return {
1187
- direction: chosenDirection,
1188
- softmax: Array.from(softmaxPooled),
1189
- entropy,
1190
- maxProb: bestProb,
1191
- secondProb: runnerUpProb,
1192
- };
1193
- }
1194
-
1195
- /**
1196
- * Simulates the agent navigating the maze using its neural network.
1197
- *
1198
- * Runs a complete simulation of an agent traversing a maze,
1199
- * using its neural network for decision making. This implementation focuses
1200
- * on a minimalist approach, putting more responsibility on the neural network.
1201
- *
1202
- * @param network - Neural network controlling the agent.
1203
- * @param encodedMaze - 2D array representation of the maze.
1204
- * @param startPos - Starting position [x,y] of the agent.
1205
- * @param exitPos - Exit/goal position [x,y] of the maze.
1206
- * @param maxSteps - Maximum steps allowed before terminating (default 3000).
1207
- * @returns Object containing:
1208
- * - success: Boolean indicating if exit was reached.
1209
- * - steps: Number of steps taken.
1210
- * - path: Array of positions visited.
1211
- * - fitness: Calculated fitness score for evolution.
1212
- * - progress: Percentage progress toward exit (0-100).
1213
- */
1214
- static simulateAgent(
1215
- network: INetwork,
1216
- encodedMaze: number[][],
1217
- startPos: readonly [number, number],
1218
- exitPos: readonly [number, number],
1219
- distanceMap?: number[][],
1220
- maxSteps = MazeMovement.#DEFAULT_MAX_STEPS,
1221
- ): {
1222
- success: boolean;
1223
- steps: number;
1224
- path: readonly [number, number][];
1225
- fitness: number;
1226
- progress: number;
1227
- saturationFraction?: number;
1228
- actionEntropy?: number;
1229
- } {
1230
- const state = MazeMovement.#initRunState(
1231
- encodedMaze,
1232
- startPos,
1233
- distanceMap,
1234
- maxSteps,
1235
- );
1236
-
1237
- while (state.steps < maxSteps) {
1238
- state.steps++;
1239
- // Record cell visit & derive penalties for loops / memory / revisits
1240
- MazeMovement.#recordVisitAndUpdatePenalties(state);
1241
-
1242
- // Build perception & compute current distance for exploration logic
1243
- MazeMovement.#buildVisionAndDistance(
1244
- state,
1245
- encodedMaze,
1246
- exitPos,
1247
- distanceMap,
1248
- );
1249
-
1250
- // Neural net activation & saturation handling
1251
- MazeMovement.#decideDirection(state, network);
1252
-
1253
- // Proximity greedy override
1254
- MazeMovement.#maybeApplyProximityGreedy(state, encodedMaze, distanceMap);
1255
-
1256
- // Epsilon exploration
1257
- MazeMovement.#maybeApplyEpsilonExploration(state, encodedMaze);
1258
-
1259
- // Force exploration if stuck
1260
- MazeMovement.#maybeForceExploration(state, encodedMaze);
1261
-
1262
- // Execute move & update rewards
1263
- MazeMovement.#executeMoveAndRewards(state, encodedMaze, distanceMap);
1264
-
1265
- // Post‑action repetition / entropy / saturation penalties
1266
- MazeMovement.#applyPostActionPenalties(state);
1267
-
1268
- // Deep stagnation termination
1269
- if (MazeMovement.#maybeTerminateDeepStagnation(state)) break;
1270
-
1271
- // Success check
1272
- if (
1273
- state.position[0] === exitPos[0] &&
1274
- state.position[1] === exitPos[1]
1275
- ) {
1276
- return MazeMovement.#finalizeSuccess(state, maxSteps);
1277
- }
1278
- }
1279
-
1280
- return MazeMovement.#finalizeFailure(
1281
- state,
1282
- encodedMaze,
1283
- startPos,
1284
- exitPos,
1285
- distanceMap,
1286
- );
1287
- }
1288
-
1289
- // ---------------------------------------------------------------------------
1290
- // Private helper methods (refactored from large simulateAgent body)
1291
- // ---------------------------------------------------------------------------
1292
-
1293
- /** Internal aggregate simulation state (not exported). */
1294
- static #initRunState(
1295
- encodedMaze: number[][],
1296
- startPos: readonly [number, number],
1297
- distanceMap: number[][] | undefined,
1298
- maxSteps: number,
1299
- ): SimulationState {
1300
- // Reset global mutable counters reused across runs
1301
- MazeMovement.#StateSaturations = 0;
1302
- MazeMovement.#StateNoMoveStreak = 0;
1303
- MazeMovement.#StatePrevDistanceStep = undefined;
1304
- const height = encodedMaze.length;
1305
- const width = encodedMaze[0].length;
1306
- const hasDistanceMap =
1307
- Array.isArray(distanceMap) && distanceMap.length === height;
1308
- MazeMovement.#initBuffers(width, height, maxSteps);
1309
- // Seed path with start position
1310
- const position: [number, number] = [startPos[0], startPos[1]];
1311
- MazeMovement.#PathX![0] = position[0];
1312
- MazeMovement.#PathY![0] = position[1];
1313
- const historyCapacity = MazeMovement.#MOVE_HISTORY_LENGTH;
1314
- const state: SimulationState = {
1315
- position,
1316
- steps: 0,
1317
- pathLength: 1,
1318
- visitedUniqueCount: 0,
1319
- hasDistanceMap,
1320
- distanceMap,
1321
- minDistanceToExit: hasDistanceMap
1322
- ? (distanceMap![position[1]]?.[position[0]] ?? Infinity)
1323
- : MazeMovement.#distanceAt(encodedMaze, position, distanceMap),
1324
- progressReward: 0,
1325
- newCellExplorationBonus: 0,
1326
- invalidMovePenalty: 0,
1327
- prevAction: MazeMovement.#NO_MOVE,
1328
- stepsSinceImprovement: 0,
1329
- lastDistanceGlobal: MazeMovement.#distanceAt(
1330
- encodedMaze,
1331
- position,
1332
- distanceMap,
1333
- ),
1334
- saturatedSteps: 0,
1335
- recentPositions: [] as [number, number][],
1336
- localAreaPenalty: 0,
1337
- directionCounts: [0, 0, 0, 0] as number[],
1338
- moveHistoryRing: new Int32Array(historyCapacity),
1339
- moveHistoryLength: 0,
1340
- moveHistoryHead: 0,
1341
- currentCellIndex: 0,
1342
- loopPenalty: 0,
1343
- memoryPenalty: 0,
1344
- revisitPenalty: 0,
1345
- visitsAtCurrent: 0,
1346
- distHere: Infinity,
1347
- vision: [] as number[],
1348
- actionStats: null,
1349
- direction: MazeMovement.#NO_MOVE,
1350
- moved: false,
1351
- prevDistance: Infinity,
1352
- earlyTerminate: false,
1353
- };
1354
- return state;
1355
- }
1356
-
1357
- /**
1358
- * Push a cell index into the circular move-history ring buffer.
1359
- *
1360
- * Behaviour / rationale:
1361
- * - The history is stored in a preallocated `Int32Array` (`moveHistoryRing`) to
1362
- * avoid allocations. This helper updates the head pointer and length in-place.
1363
- * - The method is deliberately allocation-free and fast; callers use the
1364
- * ring to detect tiny oscillations like A->B->A->B.
1365
- *
1366
- * Steps:
1367
- * 1) Read local references to the ring, head and length for faster hot-path access.
1368
- * 2) Write the provided `cellIndex` at the current head slot.
1369
- * 3) Advance the head index modulo the ring capacity and store it back on state.
1370
- * 4) If the ring was not yet full, increment the stored length.
1371
- *
1372
- * @param state - simulation state containing `moveHistoryRing`, `moveHistoryHead`, `moveHistoryLength`
1373
- * @param cellIndex - linearized cell index to push into history
1374
- * @returns void
1375
- * @example
1376
- * MazeMovement.#pushHistory(state, currentCellIndex);
1377
- */
1378
- static #pushHistory(state: SimulationState, cellIndex: number) {
1379
- // Step 1: local references for perf and clearer names
1380
- const ring = state.moveHistoryRing;
1381
- let headIndex = state.moveHistoryHead | 0; // coerce to int
1382
- const currentLength = state.moveHistoryLength;
1383
- const capacity = ring.length;
1384
-
1385
- // Defensive: if capacity is zero nothing to do (shouldn't happen normally)
1386
- if (capacity === 0) return;
1387
-
1388
- // Step 2: write the new entry into the ring at the current head
1389
- ring[headIndex] = cellIndex;
1390
-
1391
- // Step 3: advance the head (wrap using modulo) and store back on state
1392
- headIndex = (headIndex + 1) % capacity;
1393
- state.moveHistoryHead = headIndex;
1394
-
1395
- // Step 4: if ring wasn't full yet, increment the recorded length
1396
- if (currentLength < capacity) state.moveHistoryLength = currentLength + 1;
1397
- }
1398
-
1399
- /**
1400
- * Return the nth-most-recent entry from the circular move history.
1401
- *
1402
- * Behaviour:
1403
- * - `n` is 1-based: `1` returns the last pushed entry, `2` the one
1404
- * before that, etc. Returns `undefined` when `n` is out of range.
1405
- * - Uses only preallocated `Int32Array` ring storage and integer
1406
- * arithmetic; allocation-free and safe for hot paths.
1407
- *
1408
- * Steps:
1409
- * 1) Coerce inputs to 32-bit integers and validate `n` against stored length.
1410
- * 2) Compute the wrapped index by subtracting `n` from the head and
1411
- * normalizing into `[0, capacity)` via addition + modulo.
1412
- * 3) Return the ring value at the computed slot or `undefined` when invalid.
1413
- *
1414
- * @param state - simulation state containing `moveHistoryRing`, `moveHistoryHead`, `moveHistoryLength`
1415
- * @param nth - 1-based index from the end (1 === last pushed)
1416
- * @returns linearized cell index when present, otherwise `undefined`
1417
- * @example
1418
- * const last = MazeMovement.#nthFromHistoryEnd(state, 1);
1419
- */
1420
- static #nthFromHistoryEnd(
1421
- state: SimulationState,
1422
- nth: number,
1423
- ): number | undefined {
1424
- // Step 1: coerce arguments and validate
1425
- const requested = nth | 0;
1426
- const length = state.moveHistoryLength | 0;
1427
- if (requested <= 0 || requested > length) return undefined;
1428
-
1429
- // Step 2: local refs and capacity (fast-path locals reduce property loads)
1430
- const ring = state.moveHistoryRing;
1431
- const capacity = ring.length;
1432
- if (capacity === 0) return undefined; // defensive: empty ring
1433
- const head = state.moveHistoryHead | 0;
1434
-
1435
- // Compute wrapped index: head - requested (1-based) then normalize
1436
- let rawIndex = head - requested;
1437
- // Normalize negative values into [0, capacity) without using slow division
1438
- rawIndex = ((rawIndex % capacity) + capacity) % capacity;
1439
-
1440
- // Step 3: return the stored value (Int32Array read)
1441
- return ring[rawIndex];
1442
- }
1443
-
1444
- /**
1445
- * Record a visit to the current cell and derive shaping penalties.
1446
- *
1447
- * Behaviour / rationale:
1448
- * - Updates pooled visit flags and visit counts (allocation-free).
1449
- * - Pushes the cell into the fixed-size circular `moveHistoryRing` and
1450
- * derives three shaping penalties used to discourage trivial oscillation
1451
- * and revisiting behavior: loopPenalty, memoryPenalty, revisitPenalty.
1452
- * - May mark the run `earlyTerminate` when a visit count exceeds a hard threshold.
1453
- *
1454
- * Steps:
1455
- * 1) Compute linearized cell index and mark it visited (unique-visit accounting).
1456
- * 2) Increment the per-cell visit counter and push into the circular history.
1457
- * 3) Detect tiny A↔B oscillations (A->B->A->B) and apply loop penalty.
1458
- * 4) Scan recent history (excluding last entry) for returning-to-recent-cell
1459
- * and apply memory-return penalty if found.
1460
- * 5) Compute revisit penalty scaling with visit counts and enforce termination
1461
- * when visits exceed `#VISIT_TERMINATION_THRESHOLD`.
1462
- *
1463
- * @param state - current simulation state (modified in-place)
1464
- * @returns void
1465
- * @example
1466
- * MazeMovement.#recordVisitAndUpdatePenalties(state);
1467
- */
1468
- static #recordVisitAndUpdatePenalties(state: SimulationState) {
1469
- // Step 0: local references and descriptive names for hot-path perf
1470
- const visitedFlags = MazeMovement.#VisitedFlags!;
1471
- const visitCounts = MazeMovement.#VisitCounts!;
1472
- const rewardScale = MazeMovement.#REWARD_SCALE;
1473
-
1474
- // Step 1: linearize current position and update unique-visit tracking
1475
- const cellIndex = MazeMovement.#index(state.position[0], state.position[1]);
1476
- state.currentCellIndex = cellIndex;
1477
- if (!visitedFlags[cellIndex]) {
1478
- visitedFlags[cellIndex] = 1;
1479
- state.visitedUniqueCount++;
1480
- }
1481
-
1482
- // Step 2: increment visit count and record into move-history ring
1483
- visitCounts[cellIndex] = (visitCounts[cellIndex] + 1) as number;
1484
- MazeMovement.#pushHistory(state, cellIndex);
1485
- const visitsAtCell = (state.visitsAtCurrent = visitCounts[cellIndex]);
1486
-
1487
- // Step 3: loop detection (A->B->A->B) using the small fixed-length ring
1488
- state.loopPenalty = 0;
1489
- if (state.moveHistoryLength >= MazeMovement.#OSCILLATION_DETECT_LENGTH) {
1490
- const last = MazeMovement.#nthFromHistoryEnd(state, 1)!;
1491
- const secondLast = MazeMovement.#nthFromHistoryEnd(state, 2);
1492
- const thirdLast = MazeMovement.#nthFromHistoryEnd(state, 3);
1493
- const fourthLast = MazeMovement.#nthFromHistoryEnd(state, 4);
1494
- // detect pattern (A, B, A, B) where positions alternate
1495
- if (
1496
- last === thirdLast &&
1497
- secondLast !== undefined &&
1498
- fourthLast !== undefined &&
1499
- secondLast === fourthLast
1500
- ) {
1501
- state.loopPenalty = -MazeMovement.#LOOP_PENALTY * rewardScale;
1502
- }
1503
- }
1504
-
1505
- // Step 4: memory-return penalty — returning to any recent cell (excluding immediate previous)
1506
- state.memoryPenalty = 0;
1507
- if (state.moveHistoryLength > 1) {
1508
- for (let offset = 2; offset <= state.moveHistoryLength; offset++) {
1509
- const recentIndex = MazeMovement.#nthFromHistoryEnd(state, offset);
1510
- if (recentIndex === cellIndex) {
1511
- state.memoryPenalty =
1512
- -MazeMovement.#MEMORY_RETURN_PENALTY * rewardScale;
1513
- break;
1514
- }
1515
- }
1516
- }
1517
-
1518
- // Step 5: revisit penalty (scaled by extra visits beyond the first)
1519
- state.revisitPenalty = 0;
1520
- if (visitsAtCell > 1) {
1521
- state.revisitPenalty =
1522
- -MazeMovement.#REVISIT_PENALTY_PER_VISIT *
1523
- (visitsAtCell - 1) *
1524
- rewardScale;
1525
- }
1526
-
1527
- // Enforce harsh termination penalty if a cell is visited too often
1528
- if (visitsAtCell > MazeMovement.#VISIT_TERMINATION_THRESHOLD) {
1529
- state.invalidMovePenalty -=
1530
- MazeMovement.#INVALID_MOVE_PENALTY_HARSH * rewardScale;
1531
- state.earlyTerminate = true;
1532
- }
1533
- }
1534
-
1535
- /**
1536
- * Build vision inputs and compute the current-cell distance used by
1537
- * proximity and epsilon logic.
1538
- *
1539
- * Behaviour / rationale:
1540
- * - Delegates perception construction to MazeVision.buildInputs6 and
1541
- * stores the resulting vision vector on `state.vision`.
1542
- * - Updates the rolling previous-distance value (`#StatePrevDistanceStep`) so
1543
- * the next step's vision builder receives the correct prior distance.
1544
- * - Minimizes allocations: when the builder returns an Array we assign it
1545
- * directly; otherwise we perform a single, explicit conversion.
1546
- *
1547
- * Steps:
1548
- * 1) Early-exit if the run is marked `earlyTerminate`.
1549
- * 2) Localize and coerce the current position, choose between `distanceMap`
1550
- * lookup or computed distance for the pre-move distance value.
1551
- * 3) Call `MazeVision.buildInputs6(...)` with the prior distance and store
1552
- * the returned vision array on `state.vision` (single copy only when needed).
1553
- * 4) Update `#StatePrevDistanceStep` and `state.distHere` for downstream logic.
1554
- *
1555
- * @param state - Simulation state object (mutated in-place)
1556
- * @param encodedMaze - Read-only 2D maze array (rows of numeric columns)
1557
- * @param exitPos - Exit coordinate tuple [x, y] used by the vision builder
1558
- * @param distanceMap - Optional precomputed distance map aligned to `encodedMaze`
1559
- * @returns void
1560
- * @example
1561
- * // inside the simulation loop
1562
- * MazeMovement.#buildVisionAndDistance(state, encodedMaze, exitPos, distanceMap);
1563
- */
1564
- static #buildVisionAndDistance(
1565
- state: SimulationState,
1566
- encodedMaze: number[][],
1567
- exitPos: readonly [number, number],
1568
- distanceMap?: number[][],
1569
- ) {
1570
- // Early-exit when run already marked for termination.
1571
- if (state.earlyTerminate) return;
1572
-
1573
- // Step 1: localize frequently-used values for clarity & perf
1574
- const currentPosition = state.position;
1575
- const posX = currentPosition[0] | 0;
1576
- const posY = currentPosition[1] | 0;
1577
- const hasPrecomputedDistances = state.hasDistanceMap;
1578
-
1579
- // Step 2: determine the "pre-move" distance used by the vision builder
1580
- // - When a distance map exists, prefer the direct table lookup (may be undefined)
1581
- // - Otherwise fall back to the unified distance accessor (fast, defensive)
1582
- const preMoveDistance = hasPrecomputedDistances
1583
- ? (distanceMap![posY]?.[posX] ?? undefined)
1584
- : MazeMovement.#distanceAt(encodedMaze, currentPosition, distanceMap);
1585
-
1586
- // Step 3: build perception inputs. MazeVision.buildInputs6 is the canonical
1587
- // builder; it accepts the previous-step distance and returns a plain JS array
1588
- // (or a typed-array-compatible structure). We keep the result as-is to avoid
1589
- // double-copying; callers expect `state.vision` to be a regular array of numbers.
1590
- // NOTE: MazeVision may internally reuse pools — prefer that over forcing a copy here.
1591
- const visionInputs = MazeVision.buildInputs6(
1592
- encodedMaze,
1593
- currentPosition,
1594
- exitPos,
1595
- distanceMap,
1596
- MazeMovement.#StatePrevDistanceStep,
1597
- preMoveDistance,
1598
- state.prevAction,
1599
- );
1600
-
1601
- // Step 4: store results into simulation state. We intentionally assign the
1602
- // builder's result directly to avoid an extra allocation; if MazeVision
1603
- // returns a typed array the activation code should accept it — this keeps
1604
- // hot-path overhead minimal. If you later observe mutation issues, convert
1605
- // to a defensive copy here.
1606
- state.vision = (
1607
- Array.isArray(visionInputs)
1608
- ? visionInputs
1609
- : Array.from(visionInputs as Iterable<number>)
1610
- ) as number[];
1611
-
1612
- // Step 5: update the rolling previous-distance and the current-cell distance
1613
- // used by proximity / epsilon logic. Use the cached distance map when present
1614
- // otherwise compute via #distanceAt which is defensive and fast for small inputs.
1615
- MazeMovement.#StatePrevDistanceStep = preMoveDistance;
1616
- state.distHere = hasPrecomputedDistances
1617
- ? (distanceMap![posY]?.[posX] ?? Infinity)
1618
- : MazeMovement.#distanceAt(encodedMaze, currentPosition, distanceMap);
1619
- }
1620
-
1621
- /**
1622
- * Activate the neural network, record its outputs for history, choose an action
1623
- * using the pooled softmax path, and update saturation/bias diagnostics.
1624
- *
1625
- * Behaviour & rationale:
1626
- * - Keeps hot-path allocation minimal: we avoid creating unnecessary temporaries
1627
- * used only by downstream selection logic. `MazeMovement.selectDirection`
1628
- * accepts typed-arrays and reuses pooled scratch buffers internally so we
1629
- * pass the raw outputs directly for selection.
1630
- * - `MazeUtils.pushHistory` requires a plain JS Array for correct bounded
1631
- * history semantics; we therefore make a single explicit shallow copy sized
1632
- * to the action count to record the outputs. This copy is the only
1633
- * unavoidable allocation required to preserve historical state safely.
1634
- *
1635
- * Steps (inline):
1636
- * 1) Early-exit if the run is flagged `earlyTerminate`.
1637
- * 2) Activate the network to receive raw outputs (logits).
1638
- * 3) Copy the outputs into a fresh, fixed-length JS Array and push into the
1639
- * network's `_lastStepOutputs` history (bounded by `#OUTPUT_HISTORY_LENGTH`).
1640
- * 4) Call `selectDirection(outputs)` which uses pooled scratch buffers to
1641
- * compute a numerically-stable softmax and returns argmax + diagnostics.
1642
- * 5) Apply saturation and bias adjustments and store the chosen direction on
1643
- * the simulation `state`.
1644
- *
1645
- * @param state - simulation state (mutated in-place)
1646
- * @param network - neural network implementing `activate(vision): number[]`
1647
- * @returns void
1648
- *
1649
- * @example
1650
- * // inside the simulation loop
1651
- * MazeMovement.#decideDirection(state, network);
1652
- */
1653
- static #decideDirection(state: SimulationState, network: INetwork) {
1654
- // Step 1: fast-path bail when run flagged for early termination
1655
- if (state.earlyTerminate) return;
1656
-
1657
- try {
1658
- // Step 2: activate the network to obtain raw outputs (logits). We keep
1659
- // the reference as-is because `selectDirection` can operate on typed
1660
- // arrays and internally uses pooled scratch buffers for softmax.
1661
- const networkOutputs = network.activate(state.vision);
1662
-
1663
- // Step 3: record a shallow, fixed-length plain-Array copy into the
1664
- // network's history. `MazeUtils.pushHistory` expects Array semantics so
1665
- // we must supply a real Array; create it deterministically sized to the
1666
- // action count to avoid intermediate temporaries like spread operators.
1667
- const outputsLength = networkOutputs.length | 0;
1668
- const outputsHistoryCopy: number[] = new Array(outputsLength);
1669
- for (let copyIndex = 0; copyIndex < outputsLength; copyIndex++) {
1670
- outputsHistoryCopy[copyIndex] = networkOutputs[copyIndex];
1671
- }
1672
- const previousHistory = MazeMovement.#readOutputHistory(network);
1673
- const updatedHistory = MazeUtils.pushHistory(
1674
- previousHistory,
1675
- outputsHistoryCopy,
1676
- MazeMovement.#OUTPUT_HISTORY_LENGTH,
1677
- );
1678
- MazeMovement.#writeOutputHistory(network, updatedHistory);
1679
-
1680
- // Step 4: select action using pooled softmax / scratch buffers.
1681
- const selectedActionStats = MazeMovement.selectDirection(networkOutputs);
1682
- state.actionStats = selectedActionStats;
1683
-
1684
- // Step 5: apply saturation/bias adjustments (may mutate network internals)
1685
- MazeMovement.#applySaturationAndBiasAdjust(
1686
- state,
1687
- networkOutputs,
1688
- network,
1689
- );
1690
-
1691
- // Finalize: store chosen direction on the simulation state
1692
- state.direction = selectedActionStats.direction;
1693
- } catch (error: unknown) {
1694
- // Defensive: keep behaviour identical to previous implementation
1695
- console.error('Error activating network:', error);
1696
- state.direction = MazeMovement.#NO_MOVE;
1697
- }
1698
- }
1699
-
1700
- /**
1701
- * Proximity greedy override: when the agent is within a configured
1702
- * proximity to the exit prefer the immediate neighbor that minimises
1703
- * the distance-to-exit (ties favour the current chosen direction).
1704
- *
1705
- * Behaviour & rationale:
1706
- * - This is a deterministic short-circuit: when close to the goal we bias
1707
- * the policy to a local greedy choice to avoid aimless dithering.
1708
- * - Uses pooled scratch (`#COORD_SCRATCH`) for temporary integer coords to
1709
- * avoid creating short-lived boxed numbers in hot loops.
1710
- *
1711
- * Steps:
1712
- * 1) Early-exit when run marked for termination.
1713
- * 2) When within `#PROXIMITY_GREEDY_DISTANCE` evaluate each neighbour.
1714
- * 3) Skip invalid moves and compute neighbor distance via `#distanceAt`.
1715
- * 4) Keep the neighbour with the smallest distance and assign it into
1716
- * `state.direction` if a better candidate is found.
1717
- *
1718
- * @param state - simulation state (modified in-place)
1719
- * @param encodedMaze - read-only maze grid for move validity checks
1720
- * @param distanceMap - optional precomputed distance map
1721
- * @example
1722
- * // inside the simulation loop
1723
- * MazeMovement.#maybeApplyProximityGreedy(state, encodedMaze, distanceMap);
1724
- */
1725
- static #maybeApplyProximityGreedy(
1726
- state: SimulationState,
1727
- encodedMaze: number[][],
1728
- distanceMap?: number[][],
1729
- ) {
1730
- // Step 1: guard
1731
- if (state.earlyTerminate) return;
1732
-
1733
- // Only apply greedy override when agent is sufficiently close to the exit
1734
- if (state.distHere > MazeMovement.#PROXIMITY_GREEDY_DISTANCE) return;
1735
-
1736
- // Step 2: evaluate neighbours and pick locally-minimal distance
1737
- let chosenDirection = state.direction;
1738
- let minimalNeighborDistance = Infinity;
1739
-
1740
- // Local alias to pooled coord scratch to avoid boxed temporaries
1741
- const coordScratch = MazeMovement.#COORD_SCRATCH;
1742
-
1743
- for (
1744
- let directionIndex = 0;
1745
- directionIndex < MazeMovement.#ACTION_DIM;
1746
- directionIndex++
1747
- ) {
1748
- const [deltaX, deltaY] = MazeMovement.#DIRECTION_DELTAS[directionIndex];
1749
-
1750
- // compute neighbour coordinates using integer arithmetic
1751
- const neighbourX = (state.position[0] + deltaX) | 0;
1752
- const neighbourY = (state.position[1] + deltaY) | 0;
1753
-
1754
- // write into pooled scratch (documents intent and may help some engines)
1755
- coordScratch[0] = neighbourX;
1756
- coordScratch[1] = neighbourY;
1757
-
1758
- // Step 3: skip invalid moves quickly
1759
- if (!MazeMovement.isValidMove(encodedMaze, neighbourX, neighbourY))
1760
- continue;
1761
-
1762
- // Step 4: get the distance for this neighbour; prefer provided map when present
1763
- const neighbourDistance = MazeMovement.#distanceAt(
1764
- encodedMaze,
1765
- [neighbourX, neighbourY],
1766
- distanceMap,
1767
- );
1768
-
1769
- // Keep the best (smallest) neighbour distance
1770
- if (neighbourDistance < minimalNeighborDistance) {
1771
- minimalNeighborDistance = neighbourDistance;
1772
- chosenDirection = directionIndex;
1773
- }
1774
- }
1775
-
1776
- // Assign chosen direction back to state (preserves previous when none found)
1777
- if (chosenDirection !== undefined && chosenDirection !== state.direction) {
1778
- state.direction = chosenDirection;
1779
- }
1780
- }
1781
-
1782
- /**
1783
- * Epsilon-greedy exploration override.
1784
- *
1785
- * Behaviour:
1786
- * - Occasionally (probability `epsilon`) choose a random neighbouring
1787
- * valid action to encourage exploration. The helper prefers moves that are
1788
- * not the immediate previous action to reduce trivial back-and-forth.
1789
- * - Uses pooled scratch storage and cached locals to keep the hot loop
1790
- * allocation-free and reduce property loads.
1791
- *
1792
- * Steps:
1793
- * 1) Early-exit when the run is flagged `earlyTerminate`.
1794
- * 2) Compute the adaptive epsilon via `#computeEpsilon`.
1795
- * 3) With probability `epsilon` try up to `#ACTION_DIM` random candidate
1796
- * directions, skipping the previous action.
1797
- * 4) For each candidate, test move validity and accept the first valid one.
1798
- *
1799
- * @param state - simulation state (mutated in-place)
1800
- * @param encodedMaze - read-only maze used for move validity checks
1801
- * @example
1802
- * MazeMovement.#maybeApplyEpsilonExploration(state, encodedMaze);
1803
- */
1804
- static #maybeApplyEpsilonExploration(
1805
- state: SimulationState,
1806
- encodedMaze: number[][],
1807
- ) {
1808
- // Step 1: guard
1809
- if (state.earlyTerminate) return;
1810
-
1811
- // Step 2: adaptive epsilon (small, often zero)
1812
- const epsilon = MazeMovement.#computeEpsilon(
1813
- state.steps,
1814
- state.stepsSinceImprovement,
1815
- state.distHere,
1816
- MazeMovement.#StateSaturations,
1817
- );
1818
-
1819
- // Fast-path: only run the random trials when exploration is triggered
1820
- if (!(MazeMovement.#rand() < epsilon)) return;
1821
-
1822
- // Cache locals for fewer property loads in the hot loop
1823
- const actionCount = MazeMovement.#ACTION_DIM;
1824
- const currentPrevAction = state.prevAction;
1825
- const currentPosX = state.position[0] | 0;
1826
- const currentPosY = state.position[1] | 0;
1827
- const coordScratch = MazeMovement.#COORD_SCRATCH;
1828
-
1829
- // Step 3: attempt up to `actionCount` random candidate directions
1830
- for (let attempt = 0; attempt < actionCount; attempt++) {
1831
- // integer random selection without temporary arrays
1832
- const randomDirection = (MazeMovement.#rand() * actionCount) | 0;
1833
- if (randomDirection === currentPrevAction) continue; // prefer change
1834
-
1835
- const [directionDeltaX, directionDeltaY] =
1836
- MazeMovement.#DIRECTION_DELTAS[randomDirection];
1837
-
1838
- // compute candidate target coordinates (coerced to 32-bit ints)
1839
- const candidateX = (currentPosX + directionDeltaX) | 0;
1840
- const candidateY = (currentPosY + directionDeltaY) | 0;
1841
-
1842
- // write into pooled scratch (no functional dependency but documents intent)
1843
- coordScratch[0] = candidateX;
1844
- coordScratch[1] = candidateY;
1845
-
1846
- // Step 4: accept the first valid move
1847
- if (MazeMovement.isValidMove(encodedMaze, candidateX, candidateY)) {
1848
- state.direction = randomDirection;
1849
- break;
1850
- }
1851
- }
1852
- }
1853
-
1854
- /**
1855
- * Force exploration when the agent has been unable to move for a while.
1856
- *
1857
- * Behaviour & rationale:
1858
- * - Tracks a streak of `#NO_MOVE` decisions and when the configured
1859
- * threshold is reached chooses a random valid neighbour to escape
1860
- * potential deadlocks.
1861
- * - Uses pooled scratch (`#COORD_SCRATCH`) and cached locals to reduce
1862
- * allocations and repeated property lookups in the hot loop.
1863
- *
1864
- * Steps:
1865
- * 1) Early-exit when the run is already marked for termination.
1866
- * 2) Maintain the global no-move streak counter (`#StateNoMoveStreak`).
1867
- * 3) When the threshold is exceeded, try up to `#ACTION_DIM` random
1868
- * candidate directions and pick the first valid neighbour.
1869
- * 4) Reset the no-move streak counter after forcing exploration.
1870
- *
1871
- * @param state - simulation state (mutated in-place)
1872
- * @param encodedMaze - read-only maze used for move validity tests
1873
- * @example
1874
- * // inside simulation loop to recover from stuck states
1875
- * MazeMovement.#maybeForceExploration(state, encodedMaze);
1876
- */
1877
- static #maybeForceExploration(
1878
- state: SimulationState,
1879
- encodedMaze: number[][],
1880
- ) {
1881
- // Step 1: guard
1882
- if (state.earlyTerminate) return;
1883
-
1884
- // Step 2: update the rolling no-move streak counter
1885
- if (state.direction === MazeMovement.#NO_MOVE) {
1886
- MazeMovement.#StateNoMoveStreak++;
1887
- } else {
1888
- MazeMovement.#StateNoMoveStreak = 0;
1889
- }
1890
-
1891
- // Only trigger forced exploration when the configured threshold is reached
1892
- if (
1893
- MazeMovement.#StateNoMoveStreak < MazeMovement.#NO_MOVE_STREAK_THRESHOLD
1894
- )
1895
- return;
1896
-
1897
- // Cache locals for speed in the hot loop
1898
- const actionCount = MazeMovement.#ACTION_DIM;
1899
- const currentPosX = state.position[0] | 0;
1900
- const currentPosY = state.position[1] | 0;
1901
- const coordScratch = MazeMovement.#COORD_SCRATCH;
1902
-
1903
- // Step 3: try up to `actionCount` random candidate directions
1904
- for (let attemptIndex = 0; attemptIndex < actionCount; attemptIndex++) {
1905
- // integer random selection (faster than Math.floor in tight loops)
1906
- const candidateDirection = (MazeMovement.#rand() * actionCount) | 0;
1907
- const [deltaX, deltaY] =
1908
- MazeMovement.#DIRECTION_DELTAS[candidateDirection];
1909
-
1910
- // compute candidate coordinates
1911
- const candidateX = (currentPosX + deltaX) | 0;
1912
- const candidateY = (currentPosY + deltaY) | 0;
1913
- coordScratch[0] = candidateX;
1914
- coordScratch[1] = candidateY;
1915
-
1916
- if (MazeMovement.isValidMove(encodedMaze, candidateX, candidateY)) {
1917
- state.direction = candidateDirection;
1918
- break;
1919
- }
1920
- }
1921
-
1922
- // Step 4: reset the global no-move streak counter after forcing exploration
1923
- MazeMovement.#StateNoMoveStreak = 0;
1924
- }
1925
-
1926
- /**
1927
- * Execute the currently selected move (if valid) and update all
1928
- * progress/exploration rewards and local penalties.
1929
- *
1930
- * Behavioural contract:
1931
- * - Reads `state.direction` and attempts to move the agent by the
1932
- * matching delta from `#DIRECTION_DELTAS` when the action is valid.
1933
- * - Updates `state.prevDistance`, `state.moved`, `state.pathLength`,
1934
- * `state.minDistanceToExit` and the various reward/penalty fields.
1935
- * - Reuses pooled buffers (e.g. `#COORD_SCRATCH`, `#PathX`, `#PathY`) to
1936
- * avoid per-step allocations and keep the hot path allocation-free.
1937
- *
1938
- * Steps (step-level comments are present in the implementation):
1939
- * 1) Early-exit if the run is already marked for termination.
1940
- * 2) Record the pre-move distance into `state.prevDistance`.
1941
- * 3) Compute the candidate target coordinates using pooled scratch.
1942
- * 4) If the candidate cell is valid, update `state.position` and mark
1943
- * `state.moved = true`.
1944
- * 5) When moved: append to the pooled path buffers, update local-area
1945
- * penalties, compute distance delta and apply progress/exploration
1946
- * shaping.
1947
- * 6) When not moved: apply a mild invalid-move penalty.
1948
- * 7) Apply the global distance-improvement bonus (separate helper).
1949
- *
1950
- * @param state - simulation state (mutated in-place)
1951
- * @param encodedMaze - read-only 2D maze array
1952
- * @param distanceMap - optional precomputed distance map aligned to maze
1953
- * @example
1954
- * // Typical usage inside the simulation loop
1955
- * MazeMovement.#executeMoveAndRewards(state, encodedMaze, distanceMap);
1956
- */
1957
- static #executeMoveAndRewards(
1958
- state: SimulationState,
1959
- encodedMaze: number[][],
1960
- distanceMap?: number[][],
1961
- ) {
1962
- // Step 1: early-exit when run already slated for termination
1963
- if (state.earlyTerminate) return;
1964
-
1965
- // Step 2: capture pre-move distance for shaping calculations
1966
- const previousDistance = MazeMovement.#distanceAt(
1967
- encodedMaze,
1968
- state.position,
1969
- distanceMap,
1970
- );
1971
- state.prevDistance = previousDistance;
1972
-
1973
- // Step 3: attempt to move using pooled direction deltas and coord scratch
1974
- state.moved = false;
1975
- const chosenAction = state.direction;
1976
- if (chosenAction >= 0 && chosenAction < MazeMovement.#ACTION_DIM) {
1977
- const [deltaX, deltaY] = MazeMovement.#DIRECTION_DELTAS[chosenAction];
1978
-
1979
- // Compute candidate coordinates (coerce to 32-bit ints) and reuse scratch
1980
- const candidateX = (state.position[0] + deltaX) | 0;
1981
- const candidateY = (state.position[1] + deltaY) | 0;
1982
- const coordScratch = MazeMovement.#COORD_SCRATCH;
1983
- coordScratch[0] = candidateX;
1984
- coordScratch[1] = candidateY;
1985
-
1986
- // Validate the target cell and commit the move if valid
1987
- if (MazeMovement.isValidMove(encodedMaze, candidateX, candidateY)) {
1988
- state.position[0] = candidateX;
1989
- state.position[1] = candidateY;
1990
- state.moved = true;
1991
- }
1992
- }
1993
-
1994
- // Step 4: bookkeeping and reward/penalty updates
1995
- const rewardScale = MazeMovement.#REWARD_SCALE;
1996
- const pooledPathX = MazeMovement.#PathX!;
1997
- const pooledPathY = MazeMovement.#PathY!;
1998
-
1999
- if (state.moved) {
2000
- // Append the new position into the pooled path buffers
2001
- const writeIndex = state.pathLength | 0;
2002
- pooledPathX[writeIndex] = state.position[0];
2003
- pooledPathY[writeIndex] = state.position[1];
2004
- state.pathLength = writeIndex + 1;
2005
-
2006
- // Track recent local positions using the utility pushHistory (mutates in-place)
2007
- MazeUtils.pushHistory(
2008
- state.recentPositions,
2009
- [state.position[0], state.position[1]] as [number, number],
2010
- MazeMovement.#LOCAL_WINDOW,
2011
- );
2012
-
2013
- // Local-area stagnation penalty application (may mutate state)
2014
- MazeMovement.#maybeApplyLocalAreaPenalty(state, rewardScale);
2015
-
2016
- // Resolve the post-move distance using precomputed map when available
2017
- const currentDistance = state.hasDistanceMap
2018
- ? (state.distanceMap?.[state.position[1]]?.[state.position[0]] ??
2019
- Infinity)
2020
- : MazeMovement.#distanceAt(
2021
- encodedMaze,
2022
- state.position,
2023
- state.distanceMap,
2024
- );
2025
-
2026
- // Compute improvement/worsening and apply progress shaping
2027
- const distanceDelta = previousDistance - currentDistance; // positive -> improvement
2028
- const improved = distanceDelta > 0;
2029
- const worsened = !improved && currentDistance > previousDistance;
2030
- MazeMovement.#applyProgressShaping(
2031
- state,
2032
- distanceDelta,
2033
- improved,
2034
- worsened,
2035
- rewardScale,
2036
- );
2037
-
2038
- // Exploration and revisit adjustments for the just-visited cell
2039
- MazeMovement.#applyExplorationVisitAdjustment(state, rewardScale);
2040
-
2041
- // Update direction statistics & best-seen distance
2042
- if (state.direction >= 0) state.directionCounts[state.direction]++;
2043
- state.minDistanceToExit = Math.min(
2044
- state.minDistanceToExit,
2045
- currentDistance,
2046
- );
2047
- } else {
2048
- // Mild invalid-move penalty when the agent attempted an invalid move
2049
- state.invalidMovePenalty -=
2050
- MazeMovement.#INVALID_MOVE_PENALTY_MILD * rewardScale;
2051
- }
2052
-
2053
- // Step 5: apply global distance-improvement bonus (may mutate state)
2054
- MazeMovement.#applyGlobalDistanceImprovementBonus(
2055
- state,
2056
- encodedMaze,
2057
- rewardScale,
2058
- );
2059
-
2060
- // Note: repetition/backtrack penalties and prevAction update are applied
2061
- // later in the post-action penalties stage (#applyPostActionPenalties).
2062
- }
2063
-
2064
- /**
2065
- * Finalize per-step penalties after an action has been executed.
2066
- *
2067
- * Responsibilities:
2068
- * - Apply repetition and backtrack penalties that depend on previous action
2069
- * and stagnation counters.
2070
- * - Update the `prevAction` when a movement occurred.
2071
- * - Apply entropy-based guidance shaping and periodic saturation penalties.
2072
- * - Aggregate earlier-computed local penalties (loop/memory/revisit) into
2073
- * the run's `invalidMovePenalty` accumulator.
2074
- *
2075
- * Implementation notes:
2076
- * - Uses pooled scratch storage (`#COORD_SCRATCH`) for a tiny, allocation-free
2077
- * temporary accumulator. The scratch is short-lived and reused across hot
2078
- * paths to minimise GC pressure.
2079
- * - Variable names are intentionally descriptive to aid readability in hot
2080
- * loops and profiling traces.
2081
- *
2082
- * @param state - simulation state object mutated in-place
2083
- * @returns void
2084
- * @example
2085
- * // call after moving/deciding action to finalize penalties for the step
2086
- * MazeMovement.#applyPostActionPenalties(state);
2087
- */
2088
- static #applyPostActionPenalties(state: SimulationState) {
2089
- // Step 1: fast-path guard — do nothing when run already flagged for termination
2090
- if (state.earlyTerminate) return;
2091
-
2092
- // Local alias for the global reward/scale constant used by lower-level helpers
2093
- const scale = MazeMovement.#REWARD_SCALE;
2094
-
2095
- // Step 2: apply repetition & backtrack penalties (may mutate state.invalidMovePenalty)
2096
- MazeMovement.#applyRepetitionAndBacktrackPenalties(state, scale);
2097
-
2098
- // Step 3: update prevAction only when a movement actually happened
2099
- if (state.moved) state.prevAction = state.direction;
2100
-
2101
- // Step 4: entropy-guidance shaping adjusts bonuses/penalties based on
2102
- // the network's confidence and available perceptual cues
2103
- MazeMovement.#applyEntropyGuidanceShaping(state, scale);
2104
-
2105
- // Step 5: periodic saturation escalation penalties
2106
- MazeMovement.#applySaturationPenaltyCycle(state, scale);
2107
-
2108
- // Step 6: aggregate small, earlier-computed local penalties (loop/memory/revisit)
2109
- // Use a tiny pooled scratch to avoid creating a transient Number object.
2110
- const coordScratch = MazeMovement.#COORD_SCRATCH;
2111
- // store aggregated penalty temporarily in scratch[0]
2112
- coordScratch[0] =
2113
- (state.loopPenalty || 0) +
2114
- (state.memoryPenalty || 0) +
2115
- (state.revisitPenalty || 0);
2116
- // fold aggregated penalty into the global invalid-move accumulator
2117
- state.invalidMovePenalty += coordScratch[0];
2118
- }
2119
-
2120
- /**
2121
- * Apply a local-area stagnation penalty when the agent is oscillating
2122
- * within a small window without making progress.
2123
- *
2124
- * Behaviour:
2125
- * - Examines the fixed-size `state.recentPositions` window and computes
2126
- * the bounding box (min/max X and Y). If the bounding box span is
2127
- * small and the run has been stagnant for configured steps, apply a
2128
- * local-area penalty to discourage dithering.
2129
- * - Uses an existing pooled scratch (`#COORD_SCRATCH`) as a tiny,
2130
- * allocation-free temporary to reduce GC pressure in hot loops.
2131
- *
2132
- * Steps:
2133
- * 1) Fast-path: ensure we have the full `#LOCAL_WINDOW` of recent positions.
2134
- * 2) Iterate the recent positions to compute min/max X/Y using integer
2135
- * arithmetic for speed.
2136
- * 3) Compute a simple span metric and apply the penalty when thresholds
2137
- * are exceeded.
2138
- *
2139
- * @param state - simulation state mutated in-place
2140
- * @param rewardScale - global reward scale applied to penalty magnitudes
2141
- * @example
2142
- * // called after moving to decide if a local-area penalty is warranted
2143
- * MazeMovement.#maybeApplyLocalAreaPenalty(state, MazeMovement.#REWARD_SCALE);
2144
- */
2145
- static #maybeApplyLocalAreaPenalty(
2146
- state: SimulationState,
2147
- rewardScale: number,
2148
- ) {
2149
- // Step 1: require the full local history window to compute meaningful span
2150
- const recentWindow = state.recentPositions;
2151
- if (recentWindow.length !== MazeMovement.#LOCAL_WINDOW) return;
2152
-
2153
- // Step 2: compute bounding box using integer-coerced coordinates
2154
- let minX = Number.POSITIVE_INFINITY;
2155
- let maxX = Number.NEGATIVE_INFINITY;
2156
- let minY = Number.POSITIVE_INFINITY;
2157
- let maxY = Number.NEGATIVE_INFINITY;
2158
-
2159
- // Use a simple index loop for faster iteration in some engines
2160
- for (let idx = 0, len = recentWindow.length; idx < len; idx++) {
2161
- const pair = recentWindow[idx];
2162
- const rx = pair[0] | 0;
2163
- const ry = pair[1] | 0;
2164
- if (rx < minX) minX = rx;
2165
- if (rx > maxX) maxX = rx;
2166
- if (ry < minY) minY = ry;
2167
- if (ry > maxY) maxY = ry;
2168
- }
2169
-
2170
- // Small allocation-free write into pooled scratch to keep values live in a
2171
- // typed-array for consumers or debuggers that prefer seeing typed storage.
2172
- const coordScratch = MazeMovement.#COORD_SCRATCH;
2173
- coordScratch[0] = minX;
2174
- coordScratch[1] = minY;
2175
-
2176
- // Step 3: compute span metric and apply penalty if agent is stuck locally
2177
- const span = maxX - minX + (maxY - minY);
2178
- if (
2179
- span <= MazeMovement.#LOCAL_AREA_SPAN_THRESHOLD &&
2180
- state.stepsSinceImprovement > MazeMovement.#LOCAL_AREA_STAGNATION_STEPS
2181
- ) {
2182
- state.localAreaPenalty -=
2183
- MazeMovement.#LOCAL_AREA_PENALTY_AMOUNT * rewardScale;
2184
- }
2185
- }
2186
-
2187
- /**
2188
- * Apply shaping rewards/penalties based on the change in distance-to-goal.
2189
- *
2190
- * Behaviour:
2191
- * - When the agent improved (distance decreased) grant progressive rewards
2192
- * scaled by confidence and stagnation duration.
2193
- * - When the agent worsened (distance increased) apply a penalty scaled by
2194
- * confidence.
2195
- * - When there is no change, increment the stagnation counter.
2196
- *
2197
- * Steps:
2198
- * 1) Read confidence from `state.actionStats.maxProb` with a sensible default.
2199
- * 2) When improved: apply step-based bonus, a base progress reward and a
2200
- * distance-delta contribution that is confidence-weighted.
2201
- * 3) When worsened: apply an away penalty and increment the stagnation counter.
2202
- * 4) When unchanged: increment `stepsSinceImprovement`.
2203
- *
2204
- * @param state - simulation state (mutated in-place)
2205
- * @param distanceDelta - positive when the agent moved closer to goal
2206
- * @param improved - boolean indicating whether distanceDelta > 0
2207
- * @param worsened - boolean indicating whether distance increased
2208
- * @param rewardScale - global reward scaling constant
2209
- * @example
2210
- * MazeMovement.#applyProgressShaping(state, prevDist - currDist, improved, worsened, MazeMovement.#REWARD_SCALE);
2211
- */
2212
- static #applyProgressShaping(
2213
- state: SimulationState,
2214
- distanceDelta: number,
2215
- improved: boolean,
2216
- worsened: boolean,
2217
- rewardScale: number,
2218
- ) {
2219
- // Step 1: derive confidence from last action statistics (fallbacks chosen
2220
- // to preserve previous semantics used by the original implementation).
2221
- const currentConfidence =
2222
- state.actionStats?.maxProb ?? (improved ? 1 : 0.5);
2223
-
2224
- if (improved) {
2225
- // Step 2.a: compute the base progress reward influenced by confidence
2226
- const confidenceScaledBase =
2227
- (MazeMovement.#PROGRESS_REWARD_BASE +
2228
- MazeMovement.#PROGRESS_REWARD_CONF_SCALE * currentConfidence) *
2229
- rewardScale;
2230
-
2231
- // Step 2.b: grant an additional warmup bonus proportional to how long
2232
- // the agent has been without improvement (clamped by a configured max)
2233
- if (state.stepsSinceImprovement > 0) {
2234
- const stepBonus = Math.min(
2235
- state.stepsSinceImprovement *
2236
- MazeMovement.#PROGRESS_STEPS_MULT *
2237
- rewardScale,
2238
- MazeMovement.#PROGRESS_STEPS_MAX * rewardScale,
2239
- );
2240
- state.progressReward += stepBonus;
2241
- }
2242
-
2243
- // Apply the primary base progress reward and reset stagnation counter
2244
- state.progressReward += confidenceScaledBase;
2245
- state.stepsSinceImprovement = 0;
2246
-
2247
- // Step 2.c: distance-delta contribution scaled by confidence factors
2248
- const distanceContribution =
2249
- distanceDelta *
2250
- MazeMovement.#DISTANCE_DELTA_SCALE *
2251
- (MazeMovement.#DISTANCE_DELTA_CONF_BASE +
2252
- MazeMovement.#DISTANCE_DELTA_CONF_SCALE * currentConfidence);
2253
- state.progressReward += distanceContribution;
2254
- } else if (worsened) {
2255
- // Step 3: moving away from goal -> apply a penalty influenced by confidence
2256
- const awayPenalty =
2257
- (MazeMovement.#PROGRESS_AWAY_BASE_PENALTY +
2258
- MazeMovement.#PROGRESS_AWAY_CONF_SCALE * currentConfidence) *
2259
- rewardScale;
2260
- state.progressReward -= awayPenalty;
2261
- state.stepsSinceImprovement++;
2262
- } else {
2263
- // Step 4: no distance change -> increment stagnation counter
2264
- state.stepsSinceImprovement++;
2265
- }
2266
- }
2267
-
2268
- /**
2269
- * Apply exploration bonuses or revisit penalties for the cell that was
2270
- * just visited.
2271
- *
2272
- * Behaviour:
2273
- * - If a cell was visited for the first time in the run, award a
2274
- * `NEW_CELL_EXPLORATION_BONUS` scaled by `rewardScale`.
2275
- * - If the cell has been visited before, apply a revisit penalty to
2276
- * discourage repetitive revisits to the same tile.
2277
- *
2278
- * Steps:
2279
- * 1) Read the visit count for the current cell from `state.visitsAtCurrent`.
2280
- * 2) Compute the adjustment (bonus or penalty) using the configured
2281
- * constants and `rewardScale`.
2282
- * 3) Apply the adjustment to `state.newCellExplorationBonus` using a
2283
- * tiny pooled scratch (`#COORD_SCRATCH`) to avoid creating a transient
2284
- * Number wrapper on hot paths.
2285
- *
2286
- * @param state - simulation state mutated in-place
2287
- * @param rewardScale - global reward scaling constant used to scale magnitudes
2288
- * @example
2289
- * MazeMovement.#applyExplorationVisitAdjustment(state, MazeMovement.#REWARD_SCALE);
2290
- */
2291
- static #applyExplorationVisitAdjustment(
2292
- state: SimulationState,
2293
- rewardScale: number,
2294
- ) {
2295
- // Step 1: cache the visit count as a 32-bit integer for consistent semantics
2296
- const visitsAtThisCell = state.visitsAtCurrent | 0;
2297
-
2298
- // Step 2: compute adjustment amount using named constants for clarity
2299
- const positiveBonus =
2300
- MazeMovement.#NEW_CELL_EXPLORATION_BONUS * rewardScale;
2301
- const revisitPenalty = MazeMovement.#REVISIT_PENALTY_STRONG * rewardScale;
2302
-
2303
- // Step 3: use pooled scratch to hold the computed adjustment (allocation-free)
2304
- const scratch = MazeMovement.#COORD_SCRATCH;
2305
- scratch[0] = visitsAtThisCell === 1 ? positiveBonus : -revisitPenalty;
2306
-
2307
- // Apply the adjustment to the state's exploration bonus accumulator
2308
- state.newCellExplorationBonus += scratch[0];
2309
- }
2310
-
2311
- /**
2312
- * Global distance-improvement bonus.
2313
- *
2314
- * Purpose:
2315
- * - When the run breaks a long stagnation by improving the global
2316
- * distance-to-exit, grant a capped, step-scaled bonus to
2317
- * `state.progressReward` to encourage escapes from local minima.
2318
- *
2319
- * Behaviour / steps (inlined and commented):
2320
- * 1) Resolve the current global distance-to-exit (prefer precomputed map).
2321
- * 2) If the current global distance strictly improved over the last
2322
- * recorded global distance, compute a scaled bonus based on how many
2323
- * steps the agent had been without improvement and apply it (capped).
2324
- * 3) Reset the run's `stepsSinceImprovement` when an improvement occurs.
2325
- * 4) Store the current distance as `lastDistanceGlobal` for the next step.
2326
- *
2327
- * Notes:
2328
- * - Uses the pooled `#COORD_SCRATCH` buffer for a tiny, allocation-free
2329
- * temporary storage to reduce GC pressure in hot loops.
2330
- * - Local variable names are intentionally descriptive for readability.
2331
- *
2332
- * @param state - Mutable simulation state for the current run.
2333
- * @param encodedMaze - Readonly maze grid (rows of numeric columns).
2334
- * @param rewardScale - Global scalar applied to reward magnitudes.
2335
- * @example
2336
- * // Called from the move-execution path to potentially reward breaking
2337
- * // prolonged stagnation when the agent finally decreases its global
2338
- * // distance-to-exit.
2339
- * MazeMovement.#applyGlobalDistanceImprovementBonus(state, maze, 1.0);
2340
- */
2341
- static #applyGlobalDistanceImprovementBonus(
2342
- state: SimulationState,
2343
- encodedMaze: number[][],
2344
- rewardScale: number,
2345
- ) {
2346
- // Step 1: fast-path locals & pooled scratch to minimise property loads
2347
- const coordScratch = MazeMovement.#COORD_SCRATCH;
2348
-
2349
- // Resolve current global distance; prefer precomputed distance map when present.
2350
- const posX = state.position[0] | 0;
2351
- const posY = state.position[1] | 0;
2352
- const currentGlobalDistance = state.hasDistanceMap
2353
- ? (state.distanceMap?.[posY]?.[posX] ?? Infinity)
2354
- : MazeMovement.#distanceAt(
2355
- encodedMaze,
2356
- state.position,
2357
- state.distanceMap,
2358
- );
2359
-
2360
- // Store into pooled scratch[0] (keeps a typed-slot live for debugging/inspect).
2361
- coordScratch[0] = currentGlobalDistance as number;
2362
-
2363
- // Step 2: compare against the previously-seen global distance
2364
- const previousGlobalDistance = state.lastDistanceGlobal ?? Infinity;
2365
- if (currentGlobalDistance < previousGlobalDistance) {
2366
- // Improvement detected: compute an improvement bonus when the run
2367
- // had been stagnant for more than the configured threshold.
2368
- const stagnationSteps = (state.stepsSinceImprovement | 0) as number;
2369
- if (stagnationSteps > MazeMovement.#GLOBAL_BREAK_BONUS_START) {
2370
- const bonusSteps =
2371
- stagnationSteps - MazeMovement.#GLOBAL_BREAK_BONUS_START;
2372
- const uncappedBonus =
2373
- bonusSteps * MazeMovement.#GLOBAL_BREAK_BONUS_PER_STEP * rewardScale;
2374
- const cappedBonus = Math.min(
2375
- uncappedBonus,
2376
- MazeMovement.#GLOBAL_BREAK_BONUS_CAP * rewardScale,
2377
- );
2378
- // Apply the computed bonus to the progress reward accumulator.
2379
- state.progressReward += cappedBonus;
2380
- }
2381
-
2382
- // Step 3: reset stagnation counter because we just improved globally.
2383
- state.stepsSinceImprovement = 0;
2384
- }
2385
-
2386
- // Step 4: persist the current distance for the next comparison step.
2387
- state.lastDistanceGlobal = currentGlobalDistance;
2388
- }
2389
-
2390
- /**
2391
- * Apply repetition and backtrack penalties.
2392
- *
2393
- * Purpose:
2394
- * - Penalise repeated identical actions when the agent has been stagnant
2395
- * for longer than the configured repetition threshold.
2396
- * - Penalise immediate backtrack moves (opposite of the previous action)
2397
- * when the agent is not currently improving.
2398
- *
2399
- * Steps (inline):
2400
- * 1) Guard against early termination.
2401
- * 2) If the agent repeated the same action and stagnation exceeded the
2402
- * configured start threshold, compute a scaled repetition penalty and
2403
- * fold it into `state.invalidMovePenalty`.
2404
- * 3) If the agent moved directly back (opposite direction) and the run
2405
- * is stagnant, apply a fixed backtrack penalty.
2406
- *
2407
- * Notes:
2408
- * - Uses the pooled `#COORD_SCRATCH` buffer for tiny temporary values to
2409
- * keep the hot path allocation-free and to avoid creating transient
2410
- * Number objects.
2411
- *
2412
- * @param state - Mutable simulation state for the current run.
2413
- * @param rewardScale - Global scalar applied to penalty magnitudes.
2414
- * @example
2415
- * // Called during post-action penalty finalization
2416
- * MazeMovement.#applyRepetitionAndBacktrackPenalties(state, MazeMovement.#REWARD_SCALE);
2417
- */
2418
- static #applyRepetitionAndBacktrackPenalties(
2419
- state: SimulationState,
2420
- rewardScale: number,
2421
- ) {
2422
- // Step 1: fast-path guard
2423
- if (state.earlyTerminate) return;
2424
-
2425
- // Local descriptive aliases (minimise repeated property loads)
2426
- const previousAction = state.prevAction;
2427
- const currentAction = state.direction;
2428
- const stagnationSteps = state.stepsSinceImprovement | 0;
2429
-
2430
- // Pooled tiny scratch to hold temporary penalty values (allocation-free)
2431
- const scratch = MazeMovement.#COORD_SCRATCH;
2432
-
2433
- // Step 2: repetition penalty — when repeating the same action for too long
2434
- const repetitionStartThreshold = MazeMovement.#REPETITION_PENALTY_START;
2435
- if (
2436
- previousAction === currentAction &&
2437
- stagnationSteps > repetitionStartThreshold
2438
- ) {
2439
- const repetitionMultiplier = stagnationSteps - repetitionStartThreshold;
2440
- const repetitionBase = MazeMovement.#REPETITION_PENALTY_BASE;
2441
- const computedRepetitionPenalty =
2442
- repetitionBase * repetitionMultiplier * rewardScale;
2443
-
2444
- // store negative penalty in scratch[0] then fold into the accumulator
2445
- scratch[0] = -computedRepetitionPenalty;
2446
- state.invalidMovePenalty += scratch[0];
2447
- }
2448
-
2449
- // Step 3: backtrack penalty — penalise immediate opposite-direction moves
2450
- if (
2451
- previousAction >= 0 &&
2452
- currentAction >= 0 &&
2453
- stagnationSteps > 0 &&
2454
- currentAction === MazeMovement.#OPPOSITE_DIR[previousAction]
2455
- ) {
2456
- const backtrackPenalty = MazeMovement.#BACK_MOVE_PENALTY * rewardScale;
2457
- scratch[1] = -backtrackPenalty;
2458
- state.invalidMovePenalty += scratch[1];
2459
- }
2460
- }
2461
-
2462
- /**
2463
- * Entropy-guided shaping: apply small penalties or exploration bonuses
2464
- * based on the network's action entropy and whether perceptual guidance
2465
- * (line-of-sight or gradient cues) is present.
2466
- *
2467
- * Behaviour / steps:
2468
- * 1) Early-exit when there are no recorded action statistics.
2469
- * 2) Compute whether the current perception provides guidance.
2470
- * 3) If entropy is very high, apply a small penalty to discourage
2471
- * aimless, highly-uncertain behaviour.
2472
- * 4) If perception provides guidance and the network is confident
2473
- * (low entropy and clear max-vs-second gap), award a small
2474
- * exploration bonus to encourage exploiting the useful cue.
2475
- *
2476
- * Implementation notes:
2477
- * - Uses descriptive local names and the pooled `#COORD_SCRATCH` typed
2478
- * array for tiny temporaries to avoid transient allocation on hot paths.
2479
- * - Preserves existing numeric thresholds and multipliers.
2480
- *
2481
- * @param state - Mutable simulation state for the current run.
2482
- * @param rewardScale - Global scalar applied to penalty/bonus magnitudes.
2483
- * @example
2484
- * // Called as part of per-step penalty finalization
2485
- * MazeMovement.#applyEntropyGuidanceShaping(state, MazeMovement.#REWARD_SCALE);
2486
- */
2487
- static #applyEntropyGuidanceShaping(
2488
- state: SimulationState,
2489
- rewardScale: number,
2490
- ) {
2491
- // Step 1: require action stats
2492
- if (state.earlyTerminate || !state.actionStats) return;
2493
-
2494
- // Local copies for clarity and fewer property loads
2495
- const { entropy, maxProb, secondProb } = state.actionStats;
2496
- const entropyHighThreshold = MazeMovement.#ENTROPY_HIGH_THRESHOLD;
2497
- const entropyConfidentThreshold = MazeMovement.#ENTROPY_CONFIDENT_THRESHOLD;
2498
- const confidentDiffThreshold = MazeMovement.#ENTROPY_CONFIDENT_DIFF;
2499
-
2500
- // Step 2: detect whether perceptual guidance exists (LOS or gradient cues)
2501
- const hasLineOfSightGuidance =
2502
- MazeMovement.#sumVisionGroup(
2503
- state.vision,
2504
- MazeMovement.#VISION_LOS_START,
2505
- ) > 0;
2506
- const hasGradientGuidance =
2507
- MazeMovement.#sumVisionGroup(
2508
- state.vision,
2509
- MazeMovement.#VISION_GRAD_START,
2510
- ) > 0;
2511
- const hasGuidance = hasLineOfSightGuidance || hasGradientGuidance;
2512
-
2513
- // Pooled scratch for tiny temporary values (avoid boxed Number allocations)
2514
- const scratch = MazeMovement.#COORD_SCRATCH;
2515
-
2516
- // Step 3: high-entropy penalty (discourage dithering/ambivalence)
2517
- if (entropy > entropyHighThreshold) {
2518
- scratch[0] = -MazeMovement.#ENTROPY_PENALTY * rewardScale;
2519
- state.invalidMovePenalty += scratch[0];
2520
- return; // high-entropy is dominant; bail early
2521
- }
2522
-
2523
- // Step 4: confident + guided => small exploration bonus
2524
- const maxMinusSecond = (maxProb ?? 0) - (secondProb ?? 0);
2525
- if (
2526
- hasGuidance &&
2527
- entropy < entropyConfidentThreshold &&
2528
- maxMinusSecond > confidentDiffThreshold
2529
- ) {
2530
- scratch[0] = MazeMovement.#EXPLORATION_BONUS_SMALL * rewardScale;
2531
- state.newCellExplorationBonus += scratch[0];
2532
- }
2533
- }
2534
-
2535
- /**
2536
- * Periodic saturation penalty cycle.
2537
- *
2538
- * Purpose:
2539
- * - When the global saturation counter (`#StateSaturations`) exceeds a
2540
- * trigger, apply a base saturation penalty to discourage chronic
2541
- * overconfidence. On configured periods apply an additional escalate
2542
- * penalty to increase pressure over time.
2543
- *
2544
- * Behaviour / steps:
2545
- * 1) Early-exit when saturations have not reached the configured trigger.
2546
- * 2) Apply the base saturation penalty scaled by `rewardScale`.
2547
- * 3) If the saturations counter aligns with the configured period, apply
2548
- * an extra escalate penalty (also scaled by `rewardScale`).
2549
- *
2550
- * Implementation notes:
2551
- * - Uses the pooled `#COORD_SCRATCH` typed array as a tiny allocation-free
2552
- * temporary for computed penalty values to keep the hot path GC-friendly.
2553
- * - Local descriptive names improve readability without changing logic.
2554
- *
2555
- * @param state - Mutable simulation state (penalties are accumulated here)
2556
- * @param rewardScale - Global scalar used to scale penalty magnitudes
2557
- * @example
2558
- * MazeMovement.#applySaturationPenaltyCycle(state, MazeMovement.#REWARD_SCALE);
2559
- */
2560
- static #applySaturationPenaltyCycle(
2561
- state: SimulationState,
2562
- rewardScale: number,
2563
- ) {
2564
- // Step 1: quick-exit when under the configured trigger
2565
- const saturations = MazeMovement.#StateSaturations;
2566
- const triggerThreshold = MazeMovement.#SATURATION_PENALTY_TRIGGER;
2567
- if (saturations < triggerThreshold) return;
2568
-
2569
- // Pooled tiny scratch to hold negative penalty values (avoid boxed numbers)
2570
- const scratch = MazeMovement.#COORD_SCRATCH;
2571
-
2572
- // Step 2: apply base saturation penalty (negative value folded into accumulator)
2573
- const basePenalty = MazeMovement.#SATURATION_PENALTY_BASE * rewardScale;
2574
- scratch[0] = -basePenalty;
2575
- state.invalidMovePenalty += scratch[0];
2576
-
2577
- // Step 3: periodic escalation on configured period boundaries
2578
- const period = MazeMovement.#SATURATION_PENALTY_PERIOD;
2579
- if (period > 0 && saturations % period === 0) {
2580
- const escalatePenalty =
2581
- MazeMovement.#SATURATION_PENALTY_ESCALATE * rewardScale;
2582
- scratch[1] = -escalatePenalty;
2583
- state.invalidMovePenalty += scratch[1];
2584
- }
2585
- }
2586
-
2587
- /**
2588
- * Detect saturation/overconfidence, apply shaping penalties, and
2589
- * optionally perform adaptive output-node bias dampening.
2590
- *
2591
- * Behaviour / steps:
2592
- * 1) Read action confidence statistics and decide whether the network is
2593
- * overconfident (sharp winner) or has flat logit collapse (low variance).
2594
- * 2) Update a rolling `#StateSaturations` counter and the run-local
2595
- * `state.saturatedSteps` when either condition holds.
2596
- * 3) Apply fixed penalties for overconfidence and flat collapse scaled by
2597
- * `rewardScale`.
2598
- * 4) When chronic saturation persists, periodically adjust output-node
2599
- * biases to dampen runaway confidence (best-effort; errors are swallowed).
2600
- *
2601
- * Implementation notes:
2602
- * - Uses descriptive local variables for readability and fewer property loads.
2603
- * - Reuses the pooled `#COORD_SCRATCH` typed-array for tiny temporaries to
2604
- * avoid boxed Number allocations on hot paths.
2605
- * - Preserves existing numeric thresholds and update semantics.
2606
- *
2607
- * @param state - Mutable simulation state for the current run.
2608
- * @param outputs - Raw network logits for the current activation.
2609
- * @param network - The neural network instance (used for optional bias adjust).
2610
- * @example
2611
- * MazeMovement.#applySaturationAndBiasAdjust(state, outputs, network);
2612
- */
2613
- static #applySaturationAndBiasAdjust(
2614
- state: SimulationState,
2615
- outputs: number[],
2616
- network: INetwork,
2617
- ) {
2618
- // Step 0: locals & pooled scratch
2619
- const rewardScale = MazeMovement.#REWARD_SCALE;
2620
- const scratch = MazeMovement.#COORD_SCRATCH;
2621
-
2622
- // Defensive: require actionStats to compute confidence; callers normally set this.
2623
- const actionStats = state.actionStats;
2624
- if (!actionStats) return;
2625
-
2626
- // Step 1: overconfidence detection (max probability vs second-best)
2627
- const maxProbability = actionStats.maxProb ?? 0;
2628
- const secondProbability = actionStats.secondProb ?? 0;
2629
- const isOverConfident =
2630
- maxProbability > MazeMovement.#OVERCONFIDENT_PROB &&
2631
- secondProbability < MazeMovement.#SECOND_PROB_LOW;
2632
-
2633
- // Step 1b: detect flat collapse using logits variance (population std-dev)
2634
- const actionCount = MazeMovement.#ACTION_DIM;
2635
- // compute mean logit
2636
- let sumLogits = 0;
2637
- for (let i = 0; i < outputs.length; i++) sumLogits += outputs[i];
2638
- const meanLogit = sumLogits / actionCount;
2639
-
2640
- // compute variance (avoid intermediate arrays)
2641
- let varianceAccumulator = 0;
2642
- for (let i = 0; i < outputs.length; i++) {
2643
- const delta = outputs[i] - meanLogit;
2644
- varianceAccumulator += delta * delta;
2645
- }
2646
- const variance = varianceAccumulator / actionCount;
2647
- const stdDev = Math.sqrt(variance);
2648
- const isFlatCollapsed = stdDev < MazeMovement.#LOGSTD_FLAT_THRESHOLD;
2649
-
2650
- // Step 2: update rolling saturation counter and saturated steps
2651
- let saturationCounter = MazeMovement.#StateSaturations;
2652
- if (isOverConfident || isFlatCollapsed) {
2653
- saturationCounter++;
2654
- state.saturatedSteps++;
2655
- } else if (saturationCounter > 0) {
2656
- saturationCounter--;
2657
- }
2658
- MazeMovement.#StateSaturations = saturationCounter;
2659
-
2660
- // Step 3: fold in penalties using pooled scratch to avoid boxed temporaries
2661
- if (isOverConfident) {
2662
- scratch[0] = -MazeMovement.#OVERCONFIDENT_PENALTY * rewardScale;
2663
- state.invalidMovePenalty += scratch[0];
2664
- }
2665
- if (isFlatCollapsed) {
2666
- scratch[0] = -MazeMovement.#FLAT_COLLAPSE_PENALTY * rewardScale;
2667
- state.invalidMovePenalty += scratch[0];
2668
- }
2669
-
2670
- // Step 4: adaptive bias dampening when chronic saturation persists
2671
- const shouldAdjustBiases =
2672
- MazeMovement.#StateSaturations > MazeMovement.#SATURATION_ADJUST_MIN &&
2673
- state.steps % MazeMovement.#SATURATION_ADJUST_INTERVAL === 0;
2674
-
2675
- if (shouldAdjustBiases) {
2676
- try {
2677
- const outputNodes = network.nodes?.filter(
2678
- (node: INodeStruct): node is INodeStruct & { bias: number } =>
2679
- node.type === MazeMovement.#NODE_TYPE_OUTPUT &&
2680
- typeof node.bias === 'number',
2681
- );
2682
- if (outputNodes && outputNodes.length > 0) {
2683
- // compute mean bias (simple loop to avoid higher-order helpers)
2684
- let biasSum = 0;
2685
- for (let i = 0; i < outputNodes.length; i++)
2686
- biasSum += outputNodes[i].bias;
2687
- const meanBias = biasSum / outputNodes.length;
2688
-
2689
- // adjust each node bias towards zero by removing a scaled meanBias
2690
- const adjustFactor = MazeMovement.#BIAS_ADJUST_FACTOR;
2691
- const clamp = MazeMovement.#BIAS_CLAMP;
2692
- for (let i = 0; i < outputNodes.length; i++) {
2693
- const node = outputNodes[i];
2694
- const adjusted = node.bias - meanBias * adjustFactor;
2695
- // clamp adjusted bias into allowed range
2696
- node.bias = Math.max(-clamp, Math.min(clamp, adjusted));
2697
- }
2698
- }
2699
- } catch {
2700
- // Best-effort: swallow errors (network shapes vary in tests)
2701
- }
2702
- }
2703
- }
2704
-
2705
- /**
2706
- * Check deep stagnation and optionally mark the run for termination.
2707
- *
2708
- * Purpose:
2709
- * - If the run has been without improvement for longer than
2710
- * `#DEEP_STAGNATION_THRESHOLD` we may apply a deep-stagnation penalty
2711
- * and terminate the run in non-browser environments (node / CI). The
2712
- * method avoids allocations by reusing the pooled `#COORD_SCRATCH`.
2713
- *
2714
- * Steps:
2715
- * 1) Fast-path: compare `state.stepsSinceImprovement` against the
2716
- * configured threshold.
2717
- * 2) Detect whether we are running outside a browser (only then apply
2718
- * the penalty and return `true`).
2719
- * 3) Use `#COORD_SCRATCH[0]` to hold the negative penalty (allocation-free)
2720
- * and fold it into `state.invalidMovePenalty`.
2721
- * 4) Return `true` when we applied the penalty (indicating termination),
2722
- * otherwise preserve and return `state.earlyTerminate`.
2723
- *
2724
- * @param state - mutable simulation state (mutated in-place when penalty applies)
2725
- * @returns boolean - `true` when the run should be terminated (penalty applied),
2726
- * otherwise the existing `state.earlyTerminate` value.
2727
- * @example
2728
- * // inside the simulation loop
2729
- * if (MazeMovement.#maybeTerminateDeepStagnation(state)) break;
2730
- */
2731
- static #maybeTerminateDeepStagnation(state: SimulationState): boolean {
2732
- // Step 1: quick guard using 32-bit coercion for stable comparisons
2733
- const stagnationSteps = state.stepsSinceImprovement | 0;
2734
- if (stagnationSteps <= MazeMovement.#DEEP_STAGNATION_THRESHOLD)
2735
- return state.earlyTerminate;
2736
-
2737
- // Step 2: prepare locals and pooled scratch for allocation-free penalty write
2738
- const rewardScale = MazeMovement.#REWARD_SCALE;
2739
- const scratch = MazeMovement.#COORD_SCRATCH;
2740
-
2741
- // Step 3: apply penalty and request termination only when not running in a
2742
- // browser environment (preserve original behaviour that avoids applying
2743
- // the penalty when `window` exists). Keep a try/catch as a defensive
2744
- // fallback in case environment detection throws in unusual hosts.
2745
- try {
2746
- const runningOutsideBrowser = typeof window === 'undefined';
2747
- if (runningOutsideBrowser) {
2748
- scratch[0] = -MazeMovement.#DEEP_STAGNATION_PENALTY * rewardScale;
2749
- state.invalidMovePenalty += scratch[0];
2750
- return true;
2751
- }
2752
- } catch {
2753
- // Best-effort fallback: if detection failed, still apply the penalty.
2754
- scratch[0] = -MazeMovement.#DEEP_STAGNATION_PENALTY * rewardScale;
2755
- state.invalidMovePenalty += scratch[0];
2756
- return true;
2757
- }
2758
-
2759
- // Step 4: no change to termination state in browser-like hosts
2760
- return state.earlyTerminate;
2761
- }
2762
-
2763
- /**
2764
- * Compute the normalized action entropy from recorded direction counts.
2765
- *
2766
- * Behaviour / rationale:
2767
- * - Converts direction visit counts into a probability distribution and
2768
- * computes the Shannon entropy. The result is normalised by
2769
- * `#LOG_ACTIONS` so the returned value lies in a stable range used by
2770
- * the rest of the scoring heuristics.
2771
- * - The implementation is allocation-free and uses the pooled
2772
- * `#COORD_SCRATCH` typed-array as a tiny scratch accumulator to avoid
2773
- * creating transient Number objects on hot paths.
2774
- *
2775
- * Steps:
2776
- * 1) Sum the provided `directionCounts` and fall back to 1 to avoid
2777
- * division-by-zero.
2778
- * 2) Iterate counts, skip zeros, compute per-action probability and
2779
- * accumulate -p * log(p) into a pooled accumulator.
2780
- * 3) Normalise the accumulated entropy by `#LOG_ACTIONS` and return it.
2781
- *
2782
- * @param directionCounts - array of non-negative integers counting how often each action was chosen
2783
- * @returns normalised entropy number used in fitness shaping
2784
- * @example
2785
- * const entropy = MazeMovement.#computeActionEntropyFromCounts(state.directionCounts);
2786
- */
2787
- static #computeActionEntropyFromCounts(directionCounts: number[]): number {
2788
- // Step 1: sum counts (coerce to number) and avoid dividing by zero
2789
- const totalCount =
2790
- directionCounts.reduce((sum, value) => sum + (value | 0), 0) || 1;
2791
-
2792
- // Use pooled scratch to hold the running entropy accumulator (allocation-free)
2793
- const scratch = MazeMovement.#COORD_SCRATCH;
2794
- scratch[0] = 0;
2795
-
2796
- // Local alias for performance-sensitive globals
2797
- const logFn = Math.log;
2798
-
2799
- // Step 2: accumulate entropy = -sum(p * log(p)) skipping zero-counts
2800
- for (let i = 0, len = directionCounts.length; i < len; i++) {
2801
- const count = directionCounts[i] | 0;
2802
- if (count === 0) continue;
2803
- const probability = count / totalCount;
2804
- scratch[0] -= probability * logFn(probability);
2805
- }
2806
-
2807
- // Step 3: normalise by the project's LOG_ACTIONS constant and return
2808
- return scratch[0] / MazeMovement.#LOG_ACTIONS;
2809
- }
2810
-
2811
- /**
2812
- * Build and return the finalized result object for a successful run.
2813
- *
2814
- * Behaviour / rationale:
2815
- * - Aggregates progress, exploration and penalty terms into a single
2816
- * fitness score. The final fitness is clamped by `#MIN_SUCCESS_FITNESS`.
2817
- * - Returns a compact result object including steps, materialized path,
2818
- * a progress metric and a normalised action-entropy value used by scoring.
2819
- * - Uses the pooled `#COORD_SCRATCH` for a tiny, allocation-free saturation
2820
- * fraction calculation to reduce transient allocations on hot code paths.
2821
- *
2822
- * Steps:
2823
- * 1) Compute step efficiency (how many steps under the maximum were used).
2824
- * 2) Compute action entropy from recorded direction counts.
2825
- * 3) Aggregate fitness components (base, efficiency, rewards, penalties).
2826
- * 4) Materialize the executed path and compute saturation fraction.
2827
- * 5) Clamp the fitness to the configured minimum and return the result.
2828
- *
2829
- * @param state - simulation state containing run accumulators and diagnostics
2830
- * @param maxSteps - configured maximum steps for the run (used to compute efficiency)
2831
- * @returns result object describing success, steps, path, fitness, progress and diagnostics
2832
- * @example
2833
- * const result = MazeMovement.#finalizeSuccess(state, maxSteps);
2834
- */
2835
- static #finalizeSuccess(state: SimulationState, maxSteps: number) {
2836
- // Step 1: compute steps and efficiency (coerce to 32-bit ints for stability)
2837
- const stepsTaken = state.steps | 0;
2838
- const stepEfficiency = (maxSteps | 0) - stepsTaken;
2839
-
2840
- // Step 2: entropy of the action distribution (normalised by #LOG_ACTIONS)
2841
- const actionEntropy = MazeMovement.#computeActionEntropyFromCounts(
2842
- state.directionCounts,
2843
- );
2844
-
2845
- // Step 3: aggregate fitness components using descriptive locals
2846
- const baseFitness =
2847
- MazeMovement.#SUCCESS_BASE_FITNESS +
2848
- stepEfficiency * MazeMovement.#STEP_EFFICIENCY_SCALE +
2849
- state.progressReward +
2850
- state.newCellExplorationBonus +
2851
- state.invalidMovePenalty;
2852
-
2853
- const totalFitness =
2854
- baseFitness + actionEntropy * MazeMovement.#SUCCESS_ACTION_ENTROPY_SCALE;
2855
-
2856
- // Step 4: materialize the path and compute saturation fraction using pooled scratch
2857
- const pathMaterialized = MazeMovement.#materializePath(state.pathLength);
2858
- const scratch = MazeMovement.#COORD_SCRATCH;
2859
- scratch[0] = stepsTaken ? state.saturatedSteps / stepsTaken : 0;
2860
- const saturationFraction = scratch[0];
2861
-
2862
- // Step 5: ensure final fitness meets the configured minimum for successes
2863
- const finalFitness = Math.max(
2864
- MazeMovement.#MIN_SUCCESS_FITNESS,
2865
- totalFitness,
2866
- );
2867
-
2868
- return {
2869
- success: true,
2870
- steps: stepsTaken,
2871
- path: pathMaterialized,
2872
- fitness: finalFitness,
2873
- progress: 100,
2874
- saturationFraction,
2875
- actionEntropy,
2876
- };
2877
- }
2878
-
2879
- /**
2880
- * Build and return the finalized result object for a failed run.
2881
- *
2882
- * Behaviour / rationale:
2883
- * - Computes shaped progress, exploration contributions, entropy bonus and
2884
- * aggregates penalties into a single fitness score. For failures the
2885
- * fitness is transformed to avoid negative-heavy values using the same
2886
- * heuristic as the original implementation.
2887
- * - Uses the pooled `#COORD_SCRATCH` for a tiny, allocation-free
2888
- * saturation fraction calculation.
2889
- *
2890
- * Steps:
2891
- * 1) Materialize the executed path and determine the last visited cell.
2892
- * 2) Compute progress (via distance map or geometry), then shape it.
2893
- * 3) Aggregate exploration, reward and penalty contributions including
2894
- * an entropy-derived bonus.
2895
- * 4) Mix in small random noise and transform negative raw fitness using
2896
- * the project's stabilizing mapping.
2897
- * 5) Return the failure result object with diagnostics.
2898
- *
2899
- * @param state - simulation state containing run accumulators and diagnostics
2900
- * @param encodedMaze - read-only maze grid (rows of numeric columns)
2901
- * @param startPos - starting coordinate tuple [x, y]
2902
- * @param exitPos - exit coordinate tuple [x, y]
2903
- * @param distanceMap - optional precomputed distance map aligned to maze
2904
- * @returns failure result object with fitness, path and diagnostics
2905
- * @example
2906
- * const result = MazeMovement.#finalizeFailure(state, maze, startPos, exitPos, distanceMap);
2907
- */
2908
- static #finalizeFailure(
2909
- state: SimulationState,
2910
- encodedMaze: number[][],
2911
- startPos: readonly [number, number],
2912
- exitPos: readonly [number, number],
2913
- distanceMap?: number[][],
2914
- ) {
2915
- // Step 1: materialize path and compute last visited position
2916
- const pathX = MazeMovement.#PathX!;
2917
- const pathY = MazeMovement.#PathY!;
2918
- const lastIndex = (state.pathLength | 0) - 1;
2919
- const lastPos: [number, number] = [
2920
- pathX[lastIndex] ?? 0,
2921
- pathY[lastIndex] ?? 0,
2922
- ];
2923
-
2924
- // Step 2: compute progress using an optional distance map or geometry
2925
- const progress = distanceMap
2926
- ? MazeUtils.calculateProgressFromDistanceMap(
2927
- distanceMap,
2928
- lastPos,
2929
- startPos,
2930
- )
2931
- : MazeUtils.calculateProgress(encodedMaze, lastPos, startPos, exitPos);
2932
- const progressFraction = progress / 100;
2933
- const shapedProgress =
2934
- Math.pow(progressFraction, MazeMovement.#PROGRESS_POWER) *
2935
- MazeMovement.#PROGRESS_SCALE;
2936
-
2937
- // Step 3: aggregate exploration and entropy-derived components
2938
- const explorationScore = state.visitedUniqueCount * 1.0;
2939
- const actionEntropy = MazeMovement.#computeActionEntropyFromCounts(
2940
- state.directionCounts,
2941
- );
2942
- const entropyBonus = actionEntropy * MazeMovement.#ENTROPY_BONUS_WEIGHT;
2943
-
2944
- // Placeholders for future heuristics (preserve original behaviour)
2945
- const saturationPenalty = 0;
2946
- const outputVarPenalty = 0;
2947
-
2948
- // Aggregate base fitness components
2949
- const baseFitness =
2950
- shapedProgress +
2951
- explorationScore +
2952
- state.progressReward +
2953
- state.newCellExplorationBonus +
2954
- state.invalidMovePenalty +
2955
- entropyBonus +
2956
- state.localAreaPenalty +
2957
- saturationPenalty +
2958
- outputVarPenalty;
2959
-
2960
- // Step 4: add a small random factor and stabilise negative values
2961
- const raw =
2962
- baseFitness + MazeMovement.#rand() * MazeMovement.#FITNESS_RANDOMNESS;
2963
- const fitness = raw >= 0 ? raw : -Math.log1p(1 - raw);
2964
-
2965
- // Step 5: produce materialized path and saturation fraction (allocation-free)
2966
- const pathMaterialized = MazeMovement.#materializePath(state.pathLength);
2967
- const scratch = MazeMovement.#COORD_SCRATCH;
2968
- const stepsTaken = state.steps | 0;
2969
- scratch[0] = stepsTaken ? state.saturatedSteps / stepsTaken : 0;
2970
- const saturationFraction = scratch[0];
2971
8
 
2972
- return {
2973
- success: false,
2974
- steps: state.steps,
2975
- path: pathMaterialized,
2976
- fitness,
2977
- progress,
2978
- saturationFraction,
2979
- actionEntropy,
2980
- };
2981
- }
2982
- }
9
+ export { MazeMovement } from './mazeMovement/mazeMovement';
10
+ export type {
11
+ DirectionSelectionStats,
12
+ MazeMovementBufferPools,
13
+ MazeMovementRunServiceState,
14
+ MazeMovementSimulationResult,
15
+ SimulationState,
16
+ } from './mazeMovement/mazeMovement.types';