@reicek/neataptic-ts 0.1.21 → 0.1.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/agents/boundary-mapper.agent.md +29 -0
- package/.github/agents/docs-scout.agent.md +29 -0
- package/.github/agents/plan-scout.agent.md +29 -0
- package/.github/agents/solid-split.agent.md +138 -0
- package/.github/copilot-instructions.md +103 -0
- package/package.json +6 -3
- package/plans/ES2023 migration +13 -8
- package/plans/Evolution_Training_Interoperability_Contracts.md +1 -1
- package/plans/Interactive_Examples_and_Learning_Path.md +10 -2
- package/plans/Memory_Optimization.md +3 -3
- package/plans/README.md +63 -0
- package/plans/Roadmap.md +15 -3
- package/plans/asciiMaze_SOLID_split.done.md +130 -0
- package/plans/flappy_bird_SOLID_split.done.md +67 -0
- package/scripts/assets/theme.css +221 -34
- package/scripts/copy-examples.mjs +9 -5
- package/scripts/export-onnx.mjs +3 -3
- package/scripts/generate-bench-tables.mjs +10 -10
- package/scripts/generate-bench-tables.ts +10 -10
- package/scripts/generate-docs.ts +1415 -449
- package/scripts/render-docs-html.ts +15 -8
- package/src/README.md +101 -223
- package/src/architecture/README.md +57 -185
- package/src/architecture/layer/README.md +38 -38
- package/src/architecture/network/README.md +33 -31
- package/src/architecture/network/activate/README.md +77 -77
- package/src/architecture/network/connect/README.md +15 -13
- package/src/architecture/network/deterministic/README.md +7 -7
- package/src/architecture/network/evolve/README.md +44 -44
- package/src/architecture/network/gating/README.md +20 -20
- package/src/architecture/network/genetic/README.md +51 -51
- package/src/architecture/network/mutate/README.md +97 -97
- package/src/architecture/network/onnx/README.md +264 -264
- package/src/architecture/network/prune/README.md +39 -39
- package/src/architecture/network/remove/README.md +26 -26
- package/src/architecture/network/serialize/README.md +56 -56
- package/src/architecture/network/slab/README.md +61 -61
- package/src/architecture/network/standalone/README.md +24 -24
- package/src/architecture/network/stats/README.md +9 -9
- package/src/architecture/network/topology/README.md +46 -46
- package/src/architecture/network/training/README.md +21 -21
- package/src/methods/README.md +9 -87
- package/src/multithreading/README.md +8 -77
- package/src/multithreading/workers/README.md +2 -2
- package/src/multithreading/workers/browser/README.md +0 -6
- package/src/multithreading/workers/node/README.md +0 -3
- package/src/neat/README.md +562 -568
- package/src/utils/README.md +18 -18
- package/test/examples/asciiMaze/README.md +59 -59
- package/test/examples/asciiMaze/asciiMaze.e2e.test.ts +14 -9
- package/test/examples/asciiMaze/browser-entry/README.md +196 -0
- package/test/examples/asciiMaze/browser-entry/browser-entry.abort.services.ts +95 -0
- package/test/examples/asciiMaze/browser-entry/browser-entry.constants.ts +23 -0
- package/test/examples/asciiMaze/browser-entry/browser-entry.curriculum.services.ts +115 -0
- package/test/examples/asciiMaze/browser-entry/browser-entry.globals.services.ts +106 -0
- package/test/examples/asciiMaze/browser-entry/browser-entry.host.services.ts +157 -0
- package/test/examples/asciiMaze/browser-entry/browser-entry.services.ts +14 -0
- package/test/examples/asciiMaze/browser-entry/browser-entry.ts +129 -0
- package/test/examples/asciiMaze/browser-entry/browser-entry.types.ts +120 -0
- package/test/examples/asciiMaze/browser-entry/browser-entry.utils.ts +98 -0
- package/test/examples/asciiMaze/browser-entry.ts +10 -576
- package/test/examples/asciiMaze/dashboardManager/README.md +276 -0
- package/test/examples/asciiMaze/dashboardManager/archive/README.md +16 -0
- package/test/examples/asciiMaze/dashboardManager/archive/dashboardManager.archive.services.ts +267 -0
- package/test/examples/asciiMaze/dashboardManager/dashboardManager.constants.ts +35 -0
- package/test/examples/asciiMaze/dashboardManager/dashboardManager.services.ts +103 -0
- package/test/examples/asciiMaze/dashboardManager/dashboardManager.ts +181 -0
- package/test/examples/asciiMaze/dashboardManager/dashboardManager.types.ts +267 -0
- package/test/examples/asciiMaze/dashboardManager/dashboardManager.utils.ts +254 -0
- package/test/examples/asciiMaze/dashboardManager/live/README.md +14 -0
- package/test/examples/asciiMaze/dashboardManager/live/dashboardManager.live.services.ts +264 -0
- package/test/examples/asciiMaze/dashboardManager/telemetry/README.md +47 -0
- package/test/examples/asciiMaze/dashboardManager/telemetry/dashboardManager.telemetry.services.ts +513 -0
- package/test/examples/asciiMaze/dashboardManager.ts +13 -2335
- package/test/examples/asciiMaze/evolutionEngine/README.md +1058 -0
- package/test/examples/asciiMaze/evolutionEngine/curriculumPhase.ts +90 -0
- package/test/examples/asciiMaze/evolutionEngine/engineState.constants.ts +36 -0
- package/test/examples/asciiMaze/evolutionEngine/engineState.ts +58 -513
- package/test/examples/asciiMaze/evolutionEngine/engineState.types.ts +212 -0
- package/test/examples/asciiMaze/evolutionEngine/engineState.utils.ts +301 -0
- package/test/examples/asciiMaze/evolutionEngine/evolutionEngine.types.ts +445 -0
- package/test/examples/asciiMaze/evolutionEngine/evolutionLoop.ts +81 -50
- package/test/examples/asciiMaze/evolutionEngine/optionsAndSetup.ts +2 -4
- package/test/examples/asciiMaze/evolutionEngine/populationDynamics.ts +17 -33
- package/test/examples/asciiMaze/evolutionEngine/populationPruning.ts +1 -1
- package/test/examples/asciiMaze/evolutionEngine/rngAndTiming.ts +1 -2
- package/test/examples/asciiMaze/evolutionEngine/sampling.ts +1 -1
- package/test/examples/asciiMaze/evolutionEngine/scratchPools.ts +2 -5
- package/test/examples/asciiMaze/evolutionEngine/setupHelpers.ts +30 -37
- package/test/examples/asciiMaze/evolutionEngine/telemetryMetrics.ts +16 -58
- package/test/examples/asciiMaze/evolutionEngine/trainingWarmStart.ts +2 -2
- package/test/examples/asciiMaze/evolutionEngine.ts +55 -55
- package/test/examples/asciiMaze/fitness.ts +2 -2
- package/test/examples/asciiMaze/fitness.types.ts +65 -0
- package/test/examples/asciiMaze/interfaces.ts +64 -1352
- package/test/examples/asciiMaze/mazeMovement/README.md +356 -0
- package/test/examples/asciiMaze/mazeMovement/finalization/README.md +49 -0
- package/test/examples/asciiMaze/mazeMovement/finalization/mazeMovement.finalization.ts +138 -0
- package/test/examples/asciiMaze/mazeMovement/mazeMovement.constants.ts +101 -0
- package/test/examples/asciiMaze/mazeMovement/mazeMovement.services.ts +230 -0
- package/test/examples/asciiMaze/mazeMovement/mazeMovement.ts +299 -0
- package/test/examples/asciiMaze/mazeMovement/mazeMovement.types.ts +185 -0
- package/test/examples/asciiMaze/mazeMovement/mazeMovement.utils.ts +153 -0
- package/test/examples/asciiMaze/mazeMovement/policy/README.md +91 -0
- package/test/examples/asciiMaze/mazeMovement/policy/mazeMovement.policy.ts +467 -0
- package/test/examples/asciiMaze/mazeMovement/runtime/README.md +95 -0
- package/test/examples/asciiMaze/mazeMovement/runtime/mazeMovement.runtime.ts +354 -0
- package/test/examples/asciiMaze/mazeMovement/shaping/README.md +124 -0
- package/test/examples/asciiMaze/mazeMovement/shaping/mazeMovement.shaping.ts +459 -0
- package/test/examples/asciiMaze/mazeMovement.ts +12 -2978
- package/test/examples/flappy_bird/Trace-20260309T191949.json +24124 -0
- package/test/examples/flappy_bird/browser-entry/README.md +1129 -0
- package/test/examples/flappy_bird/browser-entry/browser-entry.host.utils.ts +4 -324
- package/test/examples/flappy_bird/browser-entry/browser-entry.network-view.utils.ts +6 -399
- package/test/examples/flappy_bird/browser-entry/browser-entry.playback.utils.ts +1 -717
- package/test/examples/flappy_bird/browser-entry/browser-entry.spawn.utils.ts +11 -31
- package/test/examples/flappy_bird/browser-entry/browser-entry.visualization.utils.ts +15 -893
- package/test/examples/flappy_bird/browser-entry/host/README.md +307 -0
- package/test/examples/flappy_bird/browser-entry/host/host.resize.service.ts +1 -295
- package/test/examples/flappy_bird/browser-entry/host/host.ts +562 -6
- package/test/examples/flappy_bird/browser-entry/host/resize/README.md +274 -0
- package/test/examples/flappy_bird/browser-entry/host/resize/host.resize.service.constants.ts +31 -0
- package/test/examples/flappy_bird/browser-entry/host/resize/host.resize.service.services.ts +360 -0
- package/test/examples/flappy_bird/browser-entry/host/resize/host.resize.service.ts +117 -0
- package/test/examples/flappy_bird/browser-entry/host/resize/host.resize.service.types.ts +63 -0
- package/test/examples/flappy_bird/browser-entry/host/resize/host.resize.service.utils.ts +250 -0
- package/test/examples/flappy_bird/browser-entry/network-view/README.md +399 -0
- package/test/examples/flappy_bird/browser-entry/network-view/network-view.topology.utils.ts +255 -0
- package/test/examples/flappy_bird/browser-entry/network-view/network-view.ts +802 -7
- package/test/examples/flappy_bird/browser-entry/playback/README.md +684 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/README.md +277 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/README.md +770 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/playback.background.ground-grid.cache.services.ts +178 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/playback.background.ground-grid.constants.ts +107 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/playback.background.ground-grid.geometry.utils.ts +518 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/playback.background.ground-grid.math.utils.ts +117 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/playback.background.ground-grid.pulse.utils.ts +233 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/playback.background.ground-grid.services.ts +211 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/playback.background.ground-grid.ts +48 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/playback.background.ground-grid.types.ts +212 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/playback.background.ground-grid.utils.ts +81 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/playback.background.cache.services.ts +96 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/playback.background.constants.ts +62 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/playback.background.services.ts +244 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/playback.background.ts +53 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/playback.background.types.ts +68 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/playback.background.utils.ts +100 -0
- package/test/examples/flappy_bird/browser-entry/playback/frame-render/README.md +310 -0
- package/test/examples/flappy_bird/browser-entry/playback/frame-render/playback.frame-render.service.ts +92 -0
- package/test/examples/flappy_bird/browser-entry/playback/frame-render/playback.frame-render.services.ts +272 -0
- package/test/examples/flappy_bird/browser-entry/playback/frame-render/playback.frame-render.types.ts +39 -0
- package/test/examples/flappy_bird/browser-entry/playback/frame-render/playback.frame-render.utils.ts +493 -0
- package/test/examples/flappy_bird/browser-entry/playback/playback.constants.ts +1 -1
- package/test/examples/flappy_bird/browser-entry/playback/playback.frame-render.service.ts +4 -0
- package/test/examples/flappy_bird/browser-entry/playback/playback.snapshot.utils.ts +44 -0
- package/test/examples/flappy_bird/browser-entry/playback/playback.starfield.service.ts +39 -122
- package/test/examples/flappy_bird/browser-entry/playback/playback.starfield.services.ts +272 -0
- package/test/examples/flappy_bird/browser-entry/playback/playback.starfield.types.ts +62 -0
- package/test/examples/flappy_bird/browser-entry/playback/playback.starfield.utils.ts +11 -4
- package/test/examples/flappy_bird/browser-entry/playback/playback.ts +409 -8
- package/test/examples/flappy_bird/browser-entry/playback/playback.types.ts +4 -12
- package/test/examples/flappy_bird/browser-entry/runtime/README.md +235 -0
- package/test/examples/flappy_bird/browser-entry/runtime/runtime.evolution-launch.service.ts +45 -0
- package/test/examples/flappy_bird/browser-entry/runtime/runtime.lifecycle.service.ts +81 -0
- package/test/examples/flappy_bird/browser-entry/runtime/runtime.startup.service.ts +74 -0
- package/test/examples/flappy_bird/browser-entry/runtime/runtime.ts +31 -121
- package/test/examples/flappy_bird/browser-entry/runtime/runtime.types.ts +36 -0
- package/test/examples/flappy_bird/browser-entry/visualization/README.md +557 -0
- package/test/examples/flappy_bird/browser-entry/visualization/visualization.constants.ts +110 -0
- package/test/examples/flappy_bird/browser-entry/visualization/visualization.draw.service.ts +957 -19
- package/test/examples/flappy_bird/browser-entry/visualization/visualization.legend.utils.ts +138 -3
- package/test/examples/flappy_bird/browser-entry/visualization/visualization.topology.utils.ts +3 -27
- package/test/examples/flappy_bird/browser-entry/visualization/visualization.ts +1 -23
- package/test/examples/flappy_bird/browser-entry/worker-channel/README.md +156 -0
- package/test/examples/flappy_bird/constants/README.md +1179 -0
- package/test/examples/flappy_bird/constants/constants.network-view.ts +24 -0
- package/test/examples/flappy_bird/constants/constants.palette.ts +7 -0
- package/test/examples/flappy_bird/constants/constants.starfield.ts +78 -3
- package/test/examples/flappy_bird/environment/README.md +143 -0
- package/test/examples/flappy_bird/environment/environment.observation.utils.ts +1 -19
- package/test/examples/flappy_bird/environment/environment.step.service.ts +3 -66
- package/test/examples/flappy_bird/evaluation/README.md +130 -0
- package/test/examples/flappy_bird/evaluation/evaluation.fitness.utils.ts +1 -1
- package/test/examples/flappy_bird/evaluation/evaluation.rollout.service.ts +5 -375
- package/test/examples/flappy_bird/evaluation/rollout/README.md +291 -0
- package/test/examples/flappy_bird/evaluation/rollout/evaluation.rollout.constants.ts +30 -0
- package/test/examples/flappy_bird/evaluation/rollout/evaluation.rollout.service.ts +58 -0
- package/test/examples/flappy_bird/evaluation/rollout/evaluation.rollout.services.ts +310 -0
- package/test/examples/flappy_bird/evaluation/rollout/evaluation.rollout.types.ts +56 -0
- package/test/examples/flappy_bird/evaluation/rollout/evaluation.rollout.utils.ts +368 -0
- package/test/examples/flappy_bird/flappy-evolution-worker/README.md +618 -0
- package/test/examples/flappy_bird/flappy-evolution-worker/flappy-evolution-worker.playback.service.ts +7 -7
- package/test/examples/flappy_bird/flappy-evolution-worker/flappy-evolution-worker.simulation.frame.service.ts +364 -0
- package/test/examples/flappy_bird/flappy-evolution-worker/flappy-evolution-worker.simulation.types.ts +14 -0
- package/test/examples/flappy_bird/flappy-evolution-worker/flappy-evolution-worker.simulation.utils.ts +4 -201
- package/test/examples/flappy_bird/flappy-evolution-worker/flappy-evolution-worker.ts +184 -345
- package/test/examples/flappy_bird/flappy-evolution-worker/flappy-evolution-worker.warm-start.service.ts +291 -0
- package/test/examples/flappy_bird/flappy.simulation.shared.utils.ts +5 -0
- package/test/examples/flappy_bird/simulation-shared/README.md +417 -0
- package/test/examples/flappy_bird/simulation-shared/observation/README.md +183 -0
- package/test/examples/flappy_bird/simulation-shared/observation/observation.features.utils.ts +301 -0
- package/test/examples/flappy_bird/simulation-shared/observation/observation.ts +9 -0
- package/test/examples/flappy_bird/simulation-shared/observation/observation.vector.utils.ts +59 -0
- package/test/examples/flappy_bird/simulation-shared/simulation-shared.observation.utils.ts +5 -403
- package/test/examples/flappy_bird/simulation-shared/simulation-shared.spawn.utils.ts +20 -6
- package/test/examples/flappy_bird/{evaluation/evaluation.statistics.utils.ts → simulation-shared/simulation-shared.statistics.utils.ts} +23 -8
- package/test/examples/flappy_bird/trainer/README.md +563 -0
- package/test/examples/flappy_bird/trainer/evaluation/README.md +199 -0
- package/test/examples/flappy_bird/trainer/evaluation/trainer.evaluation.service.constants.ts +9 -0
- package/test/examples/flappy_bird/trainer/evaluation/trainer.evaluation.service.services.ts +73 -0
- package/test/examples/flappy_bird/trainer/evaluation/trainer.evaluation.service.ts +165 -0
- package/test/examples/flappy_bird/trainer/evaluation/trainer.evaluation.service.types.ts +25 -0
- package/test/examples/flappy_bird/trainer/evaluation/trainer.evaluation.service.utils.ts +161 -0
- package/test/examples/flappy_bird/trainer/trainer.evaluation.service.ts +13 -0
- package/test/examples/flappy_bird/trainer/trainer.report.service.services.ts +181 -0
- package/test/examples/flappy_bird/trainer/trainer.report.service.ts +126 -0
- package/test/examples/flappy_bird/trainer/trainer.selection.utils.ts +89 -0
- package/test/examples/flappy_bird/trainer/trainer.ts +11 -553
- package/test/examples/flappy_bird/browser-entry/browser-entry.utils.ts +0 -12
- package/test/examples/flappy_bird/environment/environment.ts +0 -7
- package/test/examples/flappy_bird/evaluation/evaluation.ts +0 -7
- package/test/examples/flappy_bird/simulation-shared/simulation-shared.ts +0 -15
- package/test/examples/flappy_bird/trainer/trainer.statistics.utils.ts +0 -78
|
@@ -1,2982 +1,16 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
2
|
+
* Compatibility entrypoint for the dedicated mazeMovement module.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
* The agent movement system demonstrates:
|
|
8
|
-
* - Decision making based on neural network outputs
|
|
9
|
-
* - Basic reward calculations for reinforcement learning
|
|
10
|
-
* - Simple goal-seeking behavior
|
|
11
|
-
* - Simulation of movement with collision detection
|
|
12
|
-
*/
|
|
13
|
-
import { INetwork, INodeStruct } from './interfaces';
|
|
14
|
-
import { MazeUtils } from './mazeUtils';
|
|
15
|
-
import { MazeVision } from './mazeVision';
|
|
16
|
-
|
|
17
|
-
/**
|
|
18
|
-
* Diagnostic telemetry produced when selecting a direction from network logits.
|
|
19
|
-
*
|
|
20
|
-
* Encapsulates the chosen direction along with entropy and probability data so
|
|
21
|
-
* downstream helpers can apply shaping rewards and penalties without
|
|
22
|
-
* rederiving softmax statistics on hot paths.
|
|
23
|
-
*/
|
|
24
|
-
interface DirectionSelectionStats {
|
|
25
|
-
/** Chosen action index (0..#ACTION_DIM-1) or -1 when no move is selected. */
|
|
26
|
-
direction: number;
|
|
27
|
-
/** Defensive copy of per-action softmax probabilities. */
|
|
28
|
-
softmax: number[];
|
|
29
|
-
/** Normalised entropy of the action distribution in [0,1]. */
|
|
30
|
-
entropy: number;
|
|
31
|
-
/** Probability assigned to the chosen action. */
|
|
32
|
-
maxProb: number;
|
|
33
|
-
/** Probability assigned to the runner-up action. */
|
|
34
|
-
secondProb: number;
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
/**
|
|
38
|
-
* Internal aggregate state used during a single agent simulation run.
|
|
39
|
-
*
|
|
40
|
-
* Purpose:
|
|
41
|
-
* - Hold all derived runtime values, counters and diagnostic stats used by the
|
|
42
|
-
* MazeMovement simulation helpers. This shape is intentionally rich so tests
|
|
43
|
-
* and visualisers can inspect intermediate state when debugging.
|
|
44
|
-
*
|
|
45
|
-
* Notes:
|
|
46
|
-
* - This interface is internal to the mazeMovement module and is not exported.
|
|
47
|
-
* - Property descriptions are explicit to surface helpful tooltips in editors.
|
|
48
|
-
*/
|
|
49
|
-
interface SimulationState {
|
|
50
|
-
/** Current mutable agent position as [x, y]. */
|
|
51
|
-
position: [number, number];
|
|
52
|
-
|
|
53
|
-
/** Number of simulation steps executed so far (increments each loop). */
|
|
54
|
-
steps: number;
|
|
55
|
-
|
|
56
|
-
/** Number of entries in the recorded path (index into pooled PathX/PathY). */
|
|
57
|
-
pathLength: number;
|
|
58
|
-
|
|
59
|
-
/** Count of distinct cells visited during this run. */
|
|
60
|
-
visitedUniqueCount: number;
|
|
61
|
-
|
|
62
|
-
/** True when a precomputed distance map was supplied to the simulation. */
|
|
63
|
-
hasDistanceMap: boolean;
|
|
64
|
-
|
|
65
|
-
/** Optional precomputed distance map (rows × cols) used for fast heuristics. */
|
|
66
|
-
distanceMap?: number[][];
|
|
67
|
-
|
|
68
|
-
/** Minimum observed distance-to-exit reached so far (lower is better). */
|
|
69
|
-
minDistanceToExit: number;
|
|
70
|
-
|
|
71
|
-
/** Accumulated shaping reward derived from forward progress signals. */
|
|
72
|
-
progressReward: number;
|
|
73
|
-
|
|
74
|
-
/** Bonus accumulated when entering previously unvisited cells. */
|
|
75
|
-
newCellExplorationBonus: number;
|
|
76
|
-
|
|
77
|
-
/** Accumulated penalty from invalid moves, loops and other negative signals. */
|
|
78
|
-
invalidMovePenalty: number;
|
|
79
|
-
|
|
80
|
-
/** Index of the previous action/direction taken (-1 for no-move). */
|
|
81
|
-
prevAction: number;
|
|
82
|
-
|
|
83
|
-
/** Steps elapsed since the last observed improvement toward the goal. */
|
|
84
|
-
stepsSinceImprovement: number;
|
|
85
|
-
|
|
86
|
-
/** Last global distance-to-exit used for long-term improvement checks. */
|
|
87
|
-
lastDistanceGlobal: number;
|
|
88
|
-
|
|
89
|
-
/** Number of steps flagged as 'saturated' (network overconfident/flat outputs). */
|
|
90
|
-
saturatedSteps: number;
|
|
91
|
-
|
|
92
|
-
/** Recent positions sliding window used to detect local oscillation/stagnation. */
|
|
93
|
-
recentPositions: [number, number][];
|
|
94
|
-
|
|
95
|
-
/** Penalty applied when agent is oscillating in a tight local region. */
|
|
96
|
-
localAreaPenalty: number;
|
|
97
|
-
|
|
98
|
-
/** Counters of moves taken per direction index (N,E,S,W). */
|
|
99
|
-
directionCounts: number[];
|
|
100
|
-
|
|
101
|
-
/** Ring buffer storing recent visited cell indices for A↔B loop detection. */
|
|
102
|
-
moveHistoryRing: Int32Array;
|
|
103
|
-
|
|
104
|
-
/** Current number of populated entries in `moveHistoryRing`. */
|
|
105
|
-
moveHistoryLength: number;
|
|
106
|
-
/** Index pointer (head) into the circular moveHistoryRing. */
|
|
107
|
-
moveHistoryHead: number;
|
|
108
|
-
/** Current linearized cell index for the agent position. */
|
|
109
|
-
currentCellIndex: number;
|
|
110
|
-
/** Penalty accumulated for short A<->B oscillation detection. */
|
|
111
|
-
loopPenalty: number;
|
|
112
|
-
/** Penalty applied for returning to any recent cell (memory-based). */
|
|
113
|
-
memoryPenalty: number;
|
|
114
|
-
/** Dynamic revisit penalty scaled by per-cell visit counts. */
|
|
115
|
-
revisitPenalty: number;
|
|
116
|
-
/** Visit count at the current cell (derived from VisitCounts pool). */
|
|
117
|
-
visitsAtCurrent: number;
|
|
118
|
-
/** Current distance-to-goal measured at agent position. */
|
|
119
|
-
distHere: number;
|
|
120
|
-
/** Per-step perception/vision vector built for the network. */
|
|
121
|
-
vision: number[];
|
|
122
|
-
/** Network action statistics (softmax, entropy, etc.) populated each step. */
|
|
123
|
-
actionStats: DirectionSelectionStats | null;
|
|
124
|
-
/** Currently selected direction index (0..3) or #-NO_MOVE. */
|
|
125
|
-
direction: number;
|
|
126
|
-
/** Whether the agent moved on the last executed action. */
|
|
127
|
-
moved: boolean;
|
|
128
|
-
/** Distance value measured before executing the current action (previous step). */
|
|
129
|
-
prevDistance: number;
|
|
130
|
-
|
|
131
|
-
/** When true the simulation loop should terminate early due to safety triggers. */
|
|
132
|
-
earlyTerminate: boolean;
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
/**
|
|
136
|
-
* MazeMovement provides static methods for agent movement and simulation.
|
|
4
|
+
* The real implementation now lives under the folder-based module boundary at
|
|
5
|
+
* `mazeMovement/mazeMovement.ts`. This file remains so existing imports such as
|
|
6
|
+
* `./mazeMovement` continue to resolve without changes.
|
|
137
7
|
*/
|
|
138
|
-
export class MazeMovement {
|
|
139
|
-
/**
|
|
140
|
-
* Maximum number of simulation steps before terminating (safety cap)
|
|
141
|
-
* @internal
|
|
142
|
-
*/
|
|
143
|
-
static #DEFAULT_MAX_STEPS = 3000;
|
|
144
|
-
|
|
145
|
-
/**
|
|
146
|
-
* Number of recent moves tracked for oscillation detection
|
|
147
|
-
* @internal
|
|
148
|
-
*/
|
|
149
|
-
static #MOVE_HISTORY_LENGTH = 6;
|
|
150
|
-
|
|
151
|
-
// Named private constants to replace magic numbers and document intent.
|
|
152
|
-
/** Reward scale applied to shaping terms (smaller reduces selection pressure) */
|
|
153
|
-
static #REWARD_SCALE = 0.5;
|
|
154
|
-
/** Strong penalty multiplier for short A->B oscillations */
|
|
155
|
-
static #LOOP_PENALTY = 10; // multiplied by rewardScale
|
|
156
|
-
/** Penalty applied when returning to a recent cell (memory-based) */
|
|
157
|
-
static #MEMORY_RETURN_PENALTY = 2; // multiplied by rewardScale
|
|
158
|
-
/** Per-visit penalty for repeated visits to same cell */
|
|
159
|
-
static #REVISIT_PENALTY_PER_VISIT = 0.2; // per extra visit, multiplied by rewardScale
|
|
160
|
-
/** Visits threshold to trigger termination/harsh penalty */
|
|
161
|
-
static #VISIT_TERMINATION_THRESHOLD = 10;
|
|
162
|
-
/** Extremely harsh penalty for invalid moves (used sparingly) */
|
|
163
|
-
static #INVALID_MOVE_PENALTY_HARSH = 1000;
|
|
164
|
-
/** Mild penalty for invalid moves to preserve learning signal */
|
|
165
|
-
static #INVALID_MOVE_PENALTY_MILD = 10;
|
|
166
|
-
|
|
167
|
-
// Saturation / collapse thresholds and penalties
|
|
168
|
-
/** Probability threshold indicating overconfidence (near-deterministic) */
|
|
169
|
-
static #OVERCONFIDENT_PROB = 0.985;
|
|
170
|
-
/** Secondary-probability threshold used with overconfidence detection */
|
|
171
|
-
static #SECOND_PROB_LOW = 0.01;
|
|
172
|
-
/** Threshold for flat-collapse detection using log-std of outputs */
|
|
173
|
-
static #LOGSTD_FLAT_THRESHOLD = 0.01;
|
|
174
|
-
/** Penalty when network appears overconfident */
|
|
175
|
-
static #OVERCONFIDENT_PENALTY = 0.25; // * rewardScale
|
|
176
|
-
/** Penalty for flat collapse (no variance in outputs) */
|
|
177
|
-
static #FLAT_COLLAPSE_PENALTY = 0.35; // * rewardScale
|
|
178
|
-
/** Minimum saturations before applying bias adjustments */
|
|
179
|
-
static #SATURATION_ADJUST_MIN = 6;
|
|
180
|
-
/** Interval (in steps) used for saturation bias adjustment checks */
|
|
181
|
-
static #SATURATION_ADJUST_INTERVAL = 5;
|
|
182
|
-
/** Clamp for adaptive bias adjustments */
|
|
183
|
-
static #BIAS_CLAMP = 5;
|
|
184
|
-
/** Scaling factor used when adjusting biases to mitigate saturation */
|
|
185
|
-
static #BIAS_ADJUST_FACTOR = 0.5;
|
|
186
|
-
|
|
187
|
-
// Convenience thresholds and tuning knobs (centralized to avoid magic literals)
|
|
188
|
-
/** Warmup steps where exploration is encouraged */
|
|
189
|
-
static #EPSILON_WARMUP_STEPS = 10;
|
|
190
|
-
/** Steps-stagnant threshold to consider very stagnant (high epsilon) */
|
|
191
|
-
static #EPSILON_STAGNANT_HIGH_THRESHOLD = 12;
|
|
192
|
-
/** Steps-stagnant threshold to consider moderate stagnation */
|
|
193
|
-
static #EPSILON_STAGNANT_MED_THRESHOLD = 6;
|
|
194
|
-
/** Saturation count that triggers epsilon-increase behavior */
|
|
195
|
-
static #EPSILON_SATURATION_TRIGGER = 3;
|
|
196
|
-
/** Length used to detect tiny A->B oscillations */
|
|
197
|
-
static #OSCILLATION_DETECT_LENGTH = 4;
|
|
198
|
-
/** Saturation penalty trigger (>=) */
|
|
199
|
-
static #SATURATION_PENALTY_TRIGGER = 5;
|
|
200
|
-
/** Period (in steps) to escalate saturation penalty */
|
|
201
|
-
static #SATURATION_PENALTY_PERIOD = 10;
|
|
202
|
-
/** Start step for global break bonus when breaking long stagnation */
|
|
203
|
-
static #GLOBAL_BREAK_BONUS_START = 10;
|
|
204
|
-
/** Per-step bonus for global break beyond the start threshold */
|
|
205
|
-
static #GLOBAL_BREAK_BONUS_PER_STEP = 0.01;
|
|
206
|
-
/** Cap for the global break bonus */
|
|
207
|
-
static #GLOBAL_BREAK_BONUS_CAP = 0.5;
|
|
208
|
-
/** Number of steps since improvement to begin repetition penalty scaling */
|
|
209
|
-
static #REPETITION_PENALTY_START = 4;
|
|
210
|
-
/** Weight for entropy bonus on failed runs */
|
|
211
|
-
static #ENTROPY_BONUS_WEIGHT = 4;
|
|
212
|
-
|
|
213
|
-
// Vision input layout indices (groups used by hasGuidance checks)
|
|
214
|
-
/** Start index of LOS group within vision vector */
|
|
215
|
-
static #VISION_LOS_START = 8;
|
|
216
|
-
/** Start index of gradient group within vision vector */
|
|
217
|
-
static #VISION_GRAD_START = 12;
|
|
218
|
-
/** Number of elements in each vision group (LOS / Gradient) */
|
|
219
|
-
static #VISION_GROUP_LEN = 4;
|
|
220
|
-
|
|
221
|
-
// Proximity/exploration tuning
|
|
222
|
-
/** Distance (in cells) within which greedy proximity moves are prioritized */
|
|
223
|
-
static #PROXIMITY_GREEDY_DISTANCE = 2;
|
|
224
|
-
/** Distance threshold to reduce epsilon exploration near goal */
|
|
225
|
-
static #PROXIMITY_SUPPRESS_EXPLOR_DIST = 5;
|
|
226
|
-
/** Initial epsilon for epsilon-greedy exploration */
|
|
227
|
-
static #EPSILON_INITIAL = 0.35;
|
|
228
|
-
/** Epsilon used when the agent is highly stagnant */
|
|
229
|
-
static #EPSILON_STAGNANT_HIGH = 0.5;
|
|
230
|
-
/** Epsilon used for moderate stagnation */
|
|
231
|
-
static #EPSILON_STAGNANT_MED = 0.25;
|
|
232
|
-
/** Epsilon used when network saturations are detected */
|
|
233
|
-
static #EPSILON_SATURATIONS = 0.3;
|
|
234
|
-
/** Minimum epsilon allowed when near the goal */
|
|
235
|
-
static #EPSILON_MIN_NEAR_GOAL = 0.05;
|
|
236
|
-
/** Streak length used to trigger forced exploration */
|
|
237
|
-
static #NO_MOVE_STREAK_THRESHOLD = 5;
|
|
238
|
-
|
|
239
|
-
// Local area stagnation
|
|
240
|
-
/** Size of the recent-positions sliding window for local stagnation detection */
|
|
241
|
-
static #LOCAL_WINDOW = 30;
|
|
242
|
-
/** Max span (in cells) considered "local" for oscillation penalties */
|
|
243
|
-
static #LOCAL_AREA_SPAN_THRESHOLD = 5;
|
|
244
|
-
/** Steps without improvement before local-area stagnation penalty applies */
|
|
245
|
-
static #LOCAL_AREA_STAGNATION_STEPS = 8;
|
|
246
|
-
/** Amount applied to local area penalty when tight oscillation detected (multiplied by rewardScale) */
|
|
247
|
-
static #LOCAL_AREA_PENALTY_AMOUNT = 0.05;
|
|
248
|
-
|
|
249
|
-
// Progress reward shaping
|
|
250
|
-
/** Base reward for making forward progress toward the exit */
|
|
251
|
-
static #PROGRESS_REWARD_BASE = 0.3;
|
|
252
|
-
/** Additional progress reward scaled by network confidence */
|
|
253
|
-
static #PROGRESS_REWARD_CONF_SCALE = 0.7;
|
|
254
|
-
/** Multiplier applied per step-since-improvement for extra reward shaping */
|
|
255
|
-
static #PROGRESS_STEPS_MULT = 0.02;
|
|
256
|
-
/** Maximum steps-based progress contribution (times rewardScale) */
|
|
257
|
-
static #PROGRESS_STEPS_MAX = 0.5; // times rewardScale
|
|
258
|
-
/** Scale applied to raw distance-delta when shaping reward */
|
|
259
|
-
static #DISTANCE_DELTA_SCALE = 2.0;
|
|
260
|
-
/** Base confidence factor for distance-delta shaping */
|
|
261
|
-
static #DISTANCE_DELTA_CONF_BASE = 0.4;
|
|
262
|
-
/** Additional confidence scale applied to distance-delta shaping */
|
|
263
|
-
static #DISTANCE_DELTA_CONF_SCALE = 0.6;
|
|
264
|
-
/** Base penalty applied when a move increases distance to goal (multiplied by rewardScale) */
|
|
265
|
-
static #PROGRESS_AWAY_BASE_PENALTY = 0.05;
|
|
266
|
-
/** Additional scaling applied to away penalty proportional to network confidence */
|
|
267
|
-
static #PROGRESS_AWAY_CONF_SCALE = 0.15;
|
|
268
|
-
|
|
269
|
-
// Entropy tuning
|
|
270
|
-
/** Entropy value above which the action distribution is considered too uniform */
|
|
271
|
-
static #ENTROPY_HIGH_THRESHOLD = 0.95;
|
|
272
|
-
/** Entropy value below which the distribution is considered confident */
|
|
273
|
-
static #ENTROPY_CONFIDENT_THRESHOLD = 0.55;
|
|
274
|
-
/** Required gap between top two probs to treat as confident */
|
|
275
|
-
static #ENTROPY_CONFIDENT_DIFF = 0.25;
|
|
276
|
-
/** Small penalty applied when entropy is persistently high */
|
|
277
|
-
static #ENTROPY_PENALTY = 0.03; // * rewardScale
|
|
278
|
-
/** Tiny bonus for clear decisions that aid exploration */
|
|
279
|
-
static #EXPLORATION_BONUS_SMALL = 0.015; // * rewardScale
|
|
280
|
-
/** Base repetition/backtrack penalty applied when repeating same action without improvement */
|
|
281
|
-
static #REPETITION_PENALTY_BASE = 0.05;
|
|
282
|
-
/** Penalty for making the direct opposite move (when it doesn't improve) */
|
|
283
|
-
static #BACK_MOVE_PENALTY = 0.2;
|
|
284
|
-
|
|
285
|
-
// Saturation penalties
|
|
286
|
-
/** Base penalty applied when saturation is detected */
|
|
287
|
-
static #SATURATION_PENALTY_BASE = 0.05; // * rewardScale
|
|
288
|
-
/** Escalating penalty applied periodically when saturation persists */
|
|
289
|
-
static #SATURATION_PENALTY_ESCALATE = 0.1; // * rewardScale when escalation applies
|
|
290
|
-
|
|
291
|
-
// Deep stagnation
|
|
292
|
-
/** Steps without improvement that trigger deep-stagnation handling */
|
|
293
|
-
static #DEEP_STAGNATION_THRESHOLD = 40;
|
|
294
|
-
/** Penalty applied when deep stagnation is detected (non-browser environments) */
|
|
295
|
-
static #DEEP_STAGNATION_PENALTY = 2; // * rewardScale
|
|
296
|
-
// Action/output dimension and softmax/entropy tuning
|
|
297
|
-
/** Number of cardinal actions (N,E,S,W) */
|
|
298
|
-
static #ACTION_DIM = 4;
|
|
299
|
-
/** Natural log of ACTION_DIM; used to normalize entropy calculations */
|
|
300
|
-
static #LOG_ACTIONS = Math.log(MazeMovement.#ACTION_DIM);
|
|
301
|
-
/**
|
|
302
|
-
* Pooled scratch buffers used by `selectDirection` to avoid per-call
|
|
303
|
-
* allocations on the softmax/entropy hot path.
|
|
304
|
-
*
|
|
305
|
-
* @remarks
|
|
306
|
-
* - These are class-private and reused across calls; `selectDirection` is
|
|
307
|
-
* therefore not reentrant and should not be called concurrently.
|
|
308
|
-
*/
|
|
309
|
-
static #SCRATCH_CENTERED = new Float64Array(4);
|
|
310
|
-
static #SCRATCH_EXPS = new Float64Array(4);
|
|
311
|
-
/** Small pooled scratch for temporary integer coordinate coercion. */
|
|
312
|
-
static #COORD_SCRATCH = new Int32Array(2);
|
|
313
|
-
/** Representation for 'no move' direction */
|
|
314
|
-
static #NO_MOVE = -1;
|
|
315
|
-
/** Minimum standard deviation used to prevent division by zero */
|
|
316
|
-
static #STD_MIN = 1e-6;
|
|
317
|
-
/** Thresholds for collapse ratio decisions based on std */
|
|
318
|
-
static #COLLAPSE_STD_THRESHOLD = 0.01;
|
|
319
|
-
/** Secondary threshold used when std indicates medium collapse */
|
|
320
|
-
static #COLLAPSE_STD_MED = 0.03;
|
|
321
|
-
/** Collapse ratio constants used for adaptive temperature */
|
|
322
|
-
/** Full collapse ratio used when std is extremely low */
|
|
323
|
-
static #COLLAPSE_RATIO_FULL = 1;
|
|
324
|
-
/** Partial collapse ratio used for medium collapse */
|
|
325
|
-
static #COLLAPSE_RATIO_HALF = 0.5;
|
|
326
|
-
/** Base and scale used to compute softmax temperature */
|
|
327
|
-
static #TEMPERATURE_BASE = 1;
|
|
328
|
-
/** Scale factor applied when computing adaptive softmax temperature */
|
|
329
|
-
static #TEMPERATURE_SCALE = 1.2;
|
|
330
|
-
|
|
331
|
-
// Network history and randomness
|
|
332
|
-
/** History length for recent output snapshots (used for variance diagnostics) */
|
|
333
|
-
static #OUTPUT_HISTORY_LENGTH = 80;
|
|
334
|
-
/**
|
|
335
|
-
* Number of outputs snapshots to keep for variance diagnostics.
|
|
336
|
-
* Larger values smooth variance estimates at the cost of memory.
|
|
337
|
-
*/
|
|
338
|
-
/** Small randomness added to fitness to break ties stably */
|
|
339
|
-
static #FITNESS_RANDOMNESS = 0.01;
|
|
340
|
-
|
|
341
|
-
// Success fitness constants
|
|
342
|
-
/** Base fitness given for successful maze completion */
|
|
343
|
-
static #SUCCESS_BASE_FITNESS = 650;
|
|
344
|
-
/** Scale applied for remaining steps on success to reward efficiency */
|
|
345
|
-
static #STEP_EFFICIENCY_SCALE = 0.2;
|
|
346
|
-
/** Weight for action-entropy bonus on successful runs */
|
|
347
|
-
static #SUCCESS_ACTION_ENTROPY_SCALE = 5;
|
|
348
|
-
/** Minimum clamp for any successful-run fitness */
|
|
349
|
-
static #MIN_SUCCESS_FITNESS = 150;
|
|
350
|
-
|
|
351
|
-
// Exploration / revisiting tuning
|
|
352
|
-
/** Bonus reward for discovering a previously unvisited cell */
|
|
353
|
-
static #NEW_CELL_EXPLORATION_BONUS = 0.3;
|
|
354
|
-
/** Strong penalty factor for revisiting cells */
|
|
355
|
-
static #REVISIT_PENALTY_STRONG = 0.5;
|
|
356
|
-
|
|
357
|
-
// Progress shaping constants
|
|
358
|
-
/** Exponent used in non-linear progress shaping */
|
|
359
|
-
static #PROGRESS_POWER = 1.3;
|
|
360
|
-
/** Scale used to convert shaped progress into fitness contribution */
|
|
361
|
-
static #PROGRESS_SCALE = 500;
|
|
362
|
-
|
|
363
|
-
/** Node type string used in network node objects */
|
|
364
|
-
static #NODE_TYPE_OUTPUT = 'output';
|
|
365
|
-
|
|
366
|
-
/** Direction deltas for cardinal moves: N, E, S, W */
|
|
367
|
-
static #DIRECTION_DELTAS: readonly [number, number][] = [
|
|
368
|
-
[0, -1], // North
|
|
369
|
-
[1, 0], // East
|
|
370
|
-
[0, 1], // South
|
|
371
|
-
[-1, 0], // West
|
|
372
|
-
];
|
|
373
|
-
/** Lookup table for opposite directions (index -> opposite index). */
|
|
374
|
-
static #OPPOSITE_DIR: readonly number[] = [2, 3, 0, 1];
|
|
375
|
-
|
|
376
|
-
// ---------------------------------------------------------------------------
|
|
377
|
-
// Pooled / reusable typed-array buffers (non‑reentrant) for simulation state
|
|
378
|
-
// ---------------------------------------------------------------------------
|
|
379
|
-
/** Visited flag per cell (0/1). Reused across simulations. @remarks Non-reentrant. */
|
|
380
|
-
static #VisitedFlags: Uint8Array | null = null;
|
|
381
|
-
/** Visit counts per cell (clamped). @remarks Non-reentrant. */
|
|
382
|
-
static #VisitCounts: Uint16Array | null = null;
|
|
383
|
-
/** Path X coordinates (index-aligned with #PathY). */
|
|
384
|
-
static #PathX: Int32Array | null = null;
|
|
385
|
-
/** Path Y coordinates (index-aligned with #PathX). */
|
|
386
|
-
static #PathY: Int32Array | null = null;
|
|
387
|
-
/** Capacity (cells) currently allocated for grid‑dependent arrays. */
|
|
388
|
-
static #GridCapacity = 0;
|
|
389
|
-
/** Capacity (steps) currently allocated for path arrays. */
|
|
390
|
-
static #PathCapacity = 0;
|
|
391
|
-
/** Cached maze width for index calculations. */
|
|
392
|
-
static #CachedWidth = 0;
|
|
393
|
-
/** Cached maze height for bounds validation. */
|
|
394
|
-
static #CachedHeight = 0;
|
|
395
|
-
|
|
396
|
-
/** Pooled softmax output (returned as a cloned plain array). */
|
|
397
|
-
static #SOFTMAX = new Float64Array(4);
|
|
398
|
-
|
|
399
|
-
/**
|
|
400
|
-
* Seedable PRNG state (Mulberry32 style) stored in a pooled Uint32Array.
|
|
401
|
-
* - When `null`, the implementation falls back to `Math.random()`.
|
|
402
|
-
* - Using a typed-array for the single-word state avoids repeated
|
|
403
|
-
* heap allocations when reseeding and makes in-place updates explicit.
|
|
404
|
-
*/
|
|
405
|
-
static #PRNGState: Uint32Array | null = null;
|
|
406
|
-
|
|
407
|
-
// ---------------------------------------------------------------------------
|
|
408
|
-
// Internal mutable run-scoped state (replaces (MazeMovement as any).foo uses)
|
|
409
|
-
// ---------------------------------------------------------------------------
|
|
410
|
-
/** Rolling saturation counter used for adaptive penalties */
|
|
411
|
-
static #StateSaturations = 0;
|
|
412
|
-
/** Consecutive steps with no movement to trigger forced exploration */
|
|
413
|
-
static #StateNoMoveStreak = 0;
|
|
414
|
-
/** Previous distance value supplied to vision builder */
|
|
415
|
-
static #StatePrevDistanceStep: number | undefined = undefined;
|
|
416
|
-
|
|
417
|
-
/**
|
|
418
|
-
* Determine whether a proposed move target is valid: inside maze bounds
|
|
419
|
-
* and not a wall. This function accepts either a coordinate tuple
|
|
420
|
-
* (`[x,y]`) or separate numeric `x, y` arguments.
|
|
421
|
-
*
|
|
422
|
-
* Behaviour / rationale:
|
|
423
|
-
* - Centralises argument handling for two public overloads so callers
|
|
424
|
-
* can use whichever form is more convenient.
|
|
425
|
-
* - Defers the actual bounds and wall test to `#isCellOpen` which
|
|
426
|
-
* contains defensive checks and cached-dimension micro-optimisations.
|
|
427
|
-
* - Uses a tiny pooled `Int32Array` (#COORD_SCRATCH) when coercing
|
|
428
|
-
* numeric args to 32-bit integers to avoid short-lived temporaries in
|
|
429
|
-
* hot loops.
|
|
430
|
-
*
|
|
431
|
-
* Steps:
|
|
432
|
-
* 1) Normalize arguments into integer `x` and `y` coordinates.
|
|
433
|
-
* 2) Delegate to the private `#isCellOpen` helper which performs the
|
|
434
|
-
* actual maze bounds and wall checks.
|
|
435
|
-
*
|
|
436
|
-
* @param encodedMaze - 2D read-only numeric maze (-1 === wall)
|
|
437
|
-
* @param position - optional tuple [x,y] OR numeric `x` parameter
|
|
438
|
-
* @param y - optional numeric `y` parameter when `x` and `y` passed separately
|
|
439
|
-
* @returns `true` when the coordinates are within bounds and not a wall
|
|
440
|
-
* @example
|
|
441
|
-
* // tuple-form
|
|
442
|
-
* MazeMovement.isValidMove(encodedMaze, [3, 2]);
|
|
443
|
-
* // numeric-form
|
|
444
|
-
* MazeMovement.isValidMove(encodedMaze, 3, 2);
|
|
445
|
-
*/
|
|
446
|
-
static isValidMove(
|
|
447
|
-
encodedMaze: ReadonlyArray<ReadonlyArray<number>>,
|
|
448
|
-
position: readonly [number, number],
|
|
449
|
-
): boolean;
|
|
450
|
-
static isValidMove(
|
|
451
|
-
encodedMaze: ReadonlyArray<ReadonlyArray<number>>,
|
|
452
|
-
x: number,
|
|
453
|
-
y: number,
|
|
454
|
-
): boolean;
|
|
455
|
-
static isValidMove(
|
|
456
|
-
encodedMaze: ReadonlyArray<ReadonlyArray<number>>,
|
|
457
|
-
positionOrX: readonly [number, number] | number,
|
|
458
|
-
yMaybe?: number,
|
|
459
|
-
): boolean {
|
|
460
|
-
// Step 1: handle numeric overload (x, y)
|
|
461
|
-
if (typeof positionOrX === 'number') {
|
|
462
|
-
const rawX = positionOrX;
|
|
463
|
-
const rawY = yMaybe ?? 0;
|
|
464
|
-
MazeMovement.#COORD_SCRATCH[0] = rawX | 0;
|
|
465
|
-
MazeMovement.#COORD_SCRATCH[1] = rawY | 0;
|
|
466
|
-
return MazeMovement.#isCellOpen(
|
|
467
|
-
encodedMaze,
|
|
468
|
-
MazeMovement.#COORD_SCRATCH[0],
|
|
469
|
-
MazeMovement.#COORD_SCRATCH[1],
|
|
470
|
-
);
|
|
471
|
-
}
|
|
472
|
-
|
|
473
|
-
// Step 2: tuple overload — validate shape before delegating
|
|
474
|
-
if (!Array.isArray(positionOrX) || positionOrX.length !== 2) return false;
|
|
475
|
-
const [rawX, rawY] = positionOrX;
|
|
476
|
-
MazeMovement.#COORD_SCRATCH[0] = rawX | 0;
|
|
477
|
-
MazeMovement.#COORD_SCRATCH[1] = rawY | 0;
|
|
478
|
-
return MazeMovement.#isCellOpen(
|
|
479
|
-
encodedMaze,
|
|
480
|
-
MazeMovement.#COORD_SCRATCH[0],
|
|
481
|
-
MazeMovement.#COORD_SCRATCH[1],
|
|
482
|
-
);
|
|
483
|
-
}
|
|
484
|
-
|
|
485
|
-
/**
|
|
486
|
-
* Generate a pseudo-random number in [0,1).
|
|
487
|
-
*
|
|
488
|
-
* Behaviour
|
|
489
|
-
* - When `MazeMovement.#PRNGState` contains a single-word `Uint32Array`
|
|
490
|
-
* the method uses a tiny, allocation-free Mulberry32-like generator
|
|
491
|
-
* that mutates that pooled state in-place to produce deterministic
|
|
492
|
-
* results for testing and reproducible simulations.
|
|
493
|
-
* - When `#PRNGState` is `null` the method falls back to the host
|
|
494
|
-
* JavaScript engine's `Math.random()`.
|
|
495
|
-
*
|
|
496
|
-
* Example
|
|
497
|
-
* // (internal/private field shown for illustration)
|
|
498
|
-
* MazeMovement['#PRNGState'] = new Uint32Array([123456789]);
|
|
499
|
-
* const r = MazeMovement.#rand(); // deterministic in [0,1)
|
|
500
|
-
*
|
|
501
|
-
* Implementation steps (each step has an inline comment in the body):
|
|
502
|
-
* 1) fast-path: fallback to Math.random when no pooled seed present
|
|
503
|
-
* 2) advance the pooled uint32 state by a large odd constant (wraps)
|
|
504
|
-
* 3) apply integer scrambles (xors + Math.imul) to mix bits
|
|
505
|
-
* 4) final mix and convert the 32-bit integer to a float in [0,1)
|
|
506
|
-
*
|
|
507
|
-
* @returns number in range [0,1)
|
|
508
|
-
*/
|
|
509
|
-
static #rand(): number {
|
|
510
|
-
// Fast-path: if no pooled deterministic state is present, use engine RNG
|
|
511
|
-
const pooledState = MazeMovement.#PRNGState;
|
|
512
|
-
if (pooledState == null || pooledState.length === 0) {
|
|
513
|
-
return Math.random();
|
|
514
|
-
}
|
|
515
|
-
|
|
516
|
-
// Step 1: advance pooled state in-place by a large odd increment and
|
|
517
|
-
// keep everything in uint32 land using >>> 0. Using a pooled Uint32Array
|
|
518
|
-
// avoids allocating a new seed object on every call.
|
|
519
|
-
const current = (pooledState[0] + 0x6d2b79f5) >>> 0;
|
|
520
|
-
pooledState[0] = current;
|
|
521
|
-
|
|
522
|
-
// Step 2: perform integer scrambles using imul/xor/shifts to mix bits.
|
|
523
|
-
// Local descriptive names help readers (and JITs) reason about the math.
|
|
524
|
-
let mixed = current;
|
|
525
|
-
// multiply/xor mix stage 1
|
|
526
|
-
mixed = Math.imul(mixed ^ (mixed >>> 15), mixed | 1) >>> 0;
|
|
527
|
-
// multiply/xor mix stage 2
|
|
528
|
-
mixed =
|
|
529
|
-
(mixed ^ (mixed + Math.imul(mixed ^ (mixed >>> 7), mixed | 61))) >>> 0;
|
|
530
|
-
|
|
531
|
-
// Step 3: final avalanche and convert to float in [0,1) by dividing
|
|
532
|
-
// by 2^32. >>> 0 ensures an unsigned 32-bit integer before the division.
|
|
533
|
-
const final32 = (mixed ^ (mixed >>> 14)) >>> 0;
|
|
534
|
-
return final32 / 4294967296; // 2^32
|
|
535
|
-
}
|
|
536
|
-
|
|
537
|
-
/**
|
|
538
|
-
* Convert 2D coordinates (x,y) to a linear index into pooled grid buffers.
|
|
539
|
-
*
|
|
540
|
-
* Purpose:
|
|
541
|
-
* - All pooled typed-arrays (visited flags, visit counts, etc.) are
|
|
542
|
-
* indexed using this linear index: index = y * width + x.
|
|
543
|
-
* - Using `Math.imul` provides a fast 32-bit integer multiplication which
|
|
544
|
-
* avoids potential floating point rounding for large grids and is
|
|
545
|
-
* slightly faster on some engines.
|
|
546
|
-
*
|
|
547
|
-
* Steps:
|
|
548
|
-
* 1) Compute the row stride (number of cells in full rows above `y`).
|
|
549
|
-
* 2) Add the column offset `x` to produce the final linear index.
|
|
550
|
-
*
|
|
551
|
-
* @param x - Column coordinate (0-based)
|
|
552
|
-
* @param y - Row coordinate (0-based)
|
|
553
|
-
* @returns Linearized cell index used for indexing pooled arrays
|
|
554
|
-
* @example
|
|
555
|
-
* // For a maze width of 10, (x=3,y=2) -> index = 2*10 + 3 = 23
|
|
556
|
-
* MazeMovement.#CachedWidth = 10; // (normally set by #initBuffers)
|
|
557
|
-
* const idx = MazeMovement.#index(3, 2); // 23
|
|
558
|
-
*/
|
|
559
|
-
static #index(x: number, y: number): number {
|
|
560
|
-
// Step 1: compute number of cells spanned by full rows above `y`.
|
|
561
|
-
const rowStride = Math.imul(y, MazeMovement.#CachedWidth);
|
|
562
|
-
|
|
563
|
-
// Step 2: add the column offset to obtain a compact linear index.
|
|
564
|
-
const linearIndex = rowStride + x;
|
|
565
|
-
|
|
566
|
-
// Return the index (intended to be used with pooled typed arrays).
|
|
567
|
-
return linearIndex;
|
|
568
|
-
}
|
|
569
|
-
|
|
570
|
-
/**
|
|
571
|
-
* Ensure pooled typed-array buffers are allocated and sized for the
|
|
572
|
-
* provided maze dimensions and maximum path length.
|
|
573
|
-
*
|
|
574
|
-
* Behaviour & rationale:
|
|
575
|
-
* - Reuses existing pooled arrays when possible to avoid repeated
|
|
576
|
-
* heap allocations during many fast simulations.
|
|
577
|
-
* - When growing, allocates the next power-of-two capacity to amortize
|
|
578
|
-
* future resizes (common pooling strategy).
|
|
579
|
-
* - Only the actively used portion of pooled buffers is cleared to keep
|
|
580
|
-
* clears cheap for large, reused buffers.
|
|
581
|
-
*
|
|
582
|
-
* Steps:
|
|
583
|
-
* 1) Compute required cell count for grid buffers.
|
|
584
|
-
* 2) Grow or reuse `#VisitedFlags` and `#VisitCounts` as needed.
|
|
585
|
-
* 3) Grow or reuse path buffers `#PathX` / `#PathY` for `maxSteps+1` entries.
|
|
586
|
-
* 4) Cache width/height for index arithmetic used by other helpers.
|
|
587
|
-
*
|
|
588
|
-
* @param width - maze width (columns)
|
|
589
|
-
* @param height - maze height (rows)
|
|
590
|
-
* @param maxSteps - maximum expected path length (safety bound)
|
|
591
|
-
* @example
|
|
592
|
-
* MazeMovement.#initBuffers(32, 20, 1500);
|
|
593
|
-
*/
|
|
594
|
-
static #initBuffers(width: number, height: number, maxSteps: number) {
|
|
595
|
-
// Step 1: required cell count for the grid
|
|
596
|
-
const requiredCellCount = width * height;
|
|
597
|
-
|
|
598
|
-
// Step 2: ensure grid buffers large enough; grow to next power-of-two when needed
|
|
599
|
-
if (!this.#VisitedFlags || requiredCellCount > this.#GridCapacity) {
|
|
600
|
-
const newCellCapacity = MazeMovement.#nextPow2(requiredCellCount);
|
|
601
|
-
// Allocate new pooled typed arrays
|
|
602
|
-
this.#VisitedFlags = new Uint8Array(newCellCapacity);
|
|
603
|
-
this.#VisitCounts = new Uint16Array(newCellCapacity);
|
|
604
|
-
// Record the new pool capacity
|
|
605
|
-
this.#GridCapacity = newCellCapacity;
|
|
606
|
-
} else {
|
|
607
|
-
// Fast-clear only the active region; keep remainder for reuse
|
|
608
|
-
this.#VisitedFlags.fill(0, 0, requiredCellCount);
|
|
609
|
-
this.#VisitCounts!.fill(0, 0, requiredCellCount);
|
|
610
|
-
}
|
|
611
|
-
|
|
612
|
-
// Step 3: ensure path buffers sized for maxSteps+1 entries (path includes start)
|
|
613
|
-
const requiredPathEntries = maxSteps + 1;
|
|
614
|
-
if (!this.#PathX || requiredPathEntries > this.#PathCapacity) {
|
|
615
|
-
const newPathCapacity = MazeMovement.#nextPow2(requiredPathEntries);
|
|
616
|
-
this.#PathX = new Int32Array(newPathCapacity);
|
|
617
|
-
this.#PathY = new Int32Array(newPathCapacity);
|
|
618
|
-
this.#PathCapacity = newPathCapacity;
|
|
619
|
-
}
|
|
620
|
-
|
|
621
|
-
// Step 4: cache dimensions used by indexing helpers
|
|
622
|
-
this.#CachedWidth = width;
|
|
623
|
-
this.#CachedHeight = height;
|
|
624
|
-
}
|
|
625
|
-
|
|
626
|
-
/**
|
|
627
|
-
* Return the smallest power-of-two integer >= `n`.
|
|
628
|
-
*
|
|
629
|
-
* Implementation notes:
|
|
630
|
-
* - Fast-path for typical 32-bit ranges uses `Math.clz32` and bit ops
|
|
631
|
-
* which are very fast on modern JS engines.
|
|
632
|
-
* - For extremely large values (outside 32-bit unsigned range) a safe
|
|
633
|
-
* fallback iteratively doubles to avoid incorrect 32-bit shifts.
|
|
634
|
-
*
|
|
635
|
-
* Steps:
|
|
636
|
-
* 1) Handle trivial and boundary cases (n <= 1).
|
|
637
|
-
* 2) For n within 32-bit range, compute next power using leading-zero count.
|
|
638
|
-
* 3) For larger n, fall back to a safe doubling loop.
|
|
639
|
-
*
|
|
640
|
-
* @param n - Target minimum integer (expected positive)
|
|
641
|
-
* @returns The smallest power of two >= n
|
|
642
|
-
* @example
|
|
643
|
-
* MazeMovement.#nextPow2(13) === 16
|
|
644
|
-
*/
|
|
645
|
-
static #nextPow2(n: number): number {
|
|
646
|
-
// Step 1: sanitize input and handle trivial cases
|
|
647
|
-
const requested = Math.max(1, Math.floor(n));
|
|
648
|
-
if (requested <= 1) return 1;
|
|
649
|
-
|
|
650
|
-
// Step 2: fast 32-bit path using clz32 when safe
|
|
651
|
-
if (requested <= 0xffffffff) {
|
|
652
|
-
// values are treated as unsigned 32-bit; compute next power-of-two
|
|
653
|
-
const v = (requested - 1) >>> 0; // ensure uint32
|
|
654
|
-
const leadingZeros = Math.clz32(v);
|
|
655
|
-
const exponent = 32 - leadingZeros;
|
|
656
|
-
// shifting by 32 is undefined, clamp exponent to [0,31]
|
|
657
|
-
const clampedExp = Math.min(31, Math.max(0, exponent));
|
|
658
|
-
const power = 1 << clampedExp;
|
|
659
|
-
// If the computed power is less than requested (edge case), double once
|
|
660
|
-
return power >= requested ? power : power << 1;
|
|
661
|
-
}
|
|
662
|
-
|
|
663
|
-
// Step 3: safe fallback for very large numbers — doubling loop (rare)
|
|
664
|
-
let power = 1;
|
|
665
|
-
while (power < requested) power = power * 2;
|
|
666
|
-
return power;
|
|
667
|
-
}
|
|
668
|
-
|
|
669
|
-
/**
|
|
670
|
-
* Determine whether the provided value is a finite-number array.
|
|
671
|
-
*
|
|
672
|
-
* @param candidate - Value to test for numeric array semantics.
|
|
673
|
-
* @returns True when candidate is an array of finite numbers.
|
|
674
|
-
*/
|
|
675
|
-
static #isNumberArray(candidate: unknown): candidate is number[] {
|
|
676
|
-
return (
|
|
677
|
-
Array.isArray(candidate) &&
|
|
678
|
-
candidate.every(
|
|
679
|
-
(value: unknown) => typeof value === 'number' && Number.isFinite(value),
|
|
680
|
-
)
|
|
681
|
-
);
|
|
682
|
-
}
|
|
683
|
-
|
|
684
|
-
/**
|
|
685
|
-
* Read the optional `_lastStepOutputs` history stored on the network.
|
|
686
|
-
*
|
|
687
|
-
* @param network - Network instance that may provide an outputs history.
|
|
688
|
-
* @returns Sanitised history buffer or `undefined` when absent/invalid.
|
|
689
|
-
*/
|
|
690
|
-
static #readOutputHistory(network: INetwork): number[][] | undefined {
|
|
691
|
-
const historyCandidate = Reflect.get(network as object, '_lastStepOutputs');
|
|
692
|
-
if (!Array.isArray(historyCandidate)) return undefined;
|
|
693
|
-
return historyCandidate.every(MazeMovement.#isNumberArray)
|
|
694
|
-
? (historyCandidate as number[][])
|
|
695
|
-
: undefined;
|
|
696
|
-
}
|
|
697
|
-
|
|
698
|
-
/**
|
|
699
|
-
* Persist the bounded outputs history on the network via reflection.
|
|
700
|
-
*
|
|
701
|
-
* @param network - Target network to mutate.
|
|
702
|
-
* @param history - Updated history buffer.
|
|
703
|
-
*/
|
|
704
|
-
static #writeOutputHistory(network: INetwork, history: number[][]): void {
|
|
705
|
-
Reflect.set(network as object, '_lastStepOutputs', history);
|
|
706
|
-
}
|
|
707
|
-
|
|
708
|
-
/**
|
|
709
|
-
* Materialize the current path stored in the pooled `#PathX` / `#PathY`
|
|
710
|
-
* buffers into a fresh, mutable array of [x,y] tuples.
|
|
711
|
-
*
|
|
712
|
-
* Rationale:
|
|
713
|
-
* - Internal path coordinate buffers are pooled to minimize allocations
|
|
714
|
-
* during many fast simulations. Callers often require an independent
|
|
715
|
-
* array (for inspection, serialization, or mutation) so we copy the
|
|
716
|
-
* active prefix into a new plain JS array of tuples.
|
|
717
|
-
*
|
|
718
|
-
* Steps:
|
|
719
|
-
* 1) Normalize the requested length and early-return an empty array for 0.
|
|
720
|
-
* 2) Read local references to the pooled typed-arrays to reduce repeated
|
|
721
|
-
* global/property lookups in the hot loop.
|
|
722
|
-
* 3) Allocate the result array with the known length and fill it with
|
|
723
|
-
* [x,y] tuples copied from the pooled Int32Arrays.
|
|
724
|
-
*
|
|
725
|
-
* @param length - number of path entries to materialize (usually `state.pathLength`)
|
|
726
|
-
* @returns A newly allocated array of `[x, y]` tuples with `length` entries.
|
|
727
|
-
* @example
|
|
728
|
-
* // produce an independent copy of the active path
|
|
729
|
-
* const pathSnapshot = MazeMovement.#materializePath(state.pathLength);
|
|
730
|
-
*/
|
|
731
|
-
static #materializePath(length: number): [number, number][] {
|
|
732
|
-
// Step 1: sanitize and fast-return for empty paths
|
|
733
|
-
const entries = Math.max(0, Math.floor(length));
|
|
734
|
-
if (entries === 0) return [];
|
|
735
|
-
|
|
736
|
-
// Step 2: local references to pooled buffers (faster in a tight loop)
|
|
737
|
-
const pathX = MazeMovement.#PathX!;
|
|
738
|
-
const pathY = MazeMovement.#PathY!;
|
|
739
|
-
|
|
740
|
-
// Step 3: allocate output array of known size and populate
|
|
741
|
-
const out = new Array<[number, number]>(entries);
|
|
742
|
-
for (let index = 0; index < entries; index++) {
|
|
743
|
-
// Read int32 entries into descriptive locals before creating tuple
|
|
744
|
-
const x = pathX[index];
|
|
745
|
-
const y = pathY[index];
|
|
746
|
-
out[index] = [x, y];
|
|
747
|
-
}
|
|
748
|
-
return out;
|
|
749
|
-
}
|
|
750
|
-
|
|
751
|
-
/**
|
|
752
|
-
* Sum a contiguous group of `#VISION_GROUP_LEN` elements in the vision
|
|
753
|
-
* vector starting at `start`.
|
|
754
|
-
*
|
|
755
|
-
* Behaviour and rationale:
|
|
756
|
-
* - This helper is a hot-path primitive used by perception checks. It
|
|
757
|
-
* avoids allocations and keeps the loop minimal for performance.
|
|
758
|
-
* - The implementation is defensive: it bounds-checks the input so a
|
|
759
|
-
* malformed `start` or shorter-than-expected `vision` arrays won't throw.
|
|
760
|
-
*
|
|
761
|
-
* Steps:
|
|
762
|
-
* 1) Sanitize `start` and compute the clamped `end` index using the
|
|
763
|
-
* configured `#VISION_GROUP_LEN`.
|
|
764
|
-
* 2) Iterate linearly and accumulate into a numeric accumulator.
|
|
765
|
-
* 3) Return the numeric sum.
|
|
766
|
-
*
|
|
767
|
-
* @param vision - flat array of numeric vision inputs
|
|
768
|
-
* @param start - start index of the group to sum
|
|
769
|
-
* @returns numeric sum of the group (0 for empty/out-of-range input)
|
|
770
|
-
* @example
|
|
771
|
-
* // Sum the LOS group starting at index 8
|
|
772
|
-
* const losSum = MazeMovement.#sumVisionGroup(visionVector, MazeMovement.#VISION_LOS_START);
|
|
773
|
-
*/
|
|
774
|
-
static #sumVisionGroup(vision: number[], start: number) {
|
|
775
|
-
// Step 1: sanitize and clamp inputs (use descriptive names for clarity)
|
|
776
|
-
const groupLength = MazeMovement.#VISION_GROUP_LEN;
|
|
777
|
-
const sanitizedStart = Math.max(0, start | 0);
|
|
778
|
-
const clampedEnd = Math.min(vision.length, sanitizedStart + groupLength);
|
|
779
|
-
if (sanitizedStart >= clampedEnd) return 0;
|
|
780
|
-
|
|
781
|
-
// Step 2: reuse pooled scratch buffer to avoid per-call allocations.
|
|
782
|
-
// NOTE: #SCRATCH_CENTERED is a pooled Float64Array sized to at least
|
|
783
|
-
// `#VISION_GROUP_LEN` and this class is non-reentrant in hot paths.
|
|
784
|
-
const pooledScratch = MazeMovement.#SCRATCH_CENTERED;
|
|
785
|
-
|
|
786
|
-
// Step 3: accumulate values into a local numeric accumulator while
|
|
787
|
-
// copying into the pooled scratch. Copying documents intent and keeps
|
|
788
|
-
// micro-benchmarks stable across engines (no hidden temporaries).
|
|
789
|
-
let sumAccumulator = 0;
|
|
790
|
-
let writeIndex = 0;
|
|
791
|
-
for (let readIndex = sanitizedStart; readIndex < clampedEnd; readIndex++) {
|
|
792
|
-
const value = vision[readIndex] ?? 0;
|
|
793
|
-
pooledScratch[writeIndex++] = value;
|
|
794
|
-
sumAccumulator += value;
|
|
795
|
-
}
|
|
796
|
-
|
|
797
|
-
// Step 4: return the numeric sum. We intentionally do not clear the
|
|
798
|
-
// pooled scratch — consumers that rely on it should overwrite contents.
|
|
799
|
-
return sumAccumulator;
|
|
800
|
-
}
|
|
801
|
-
|
|
802
|
-
/**
|
|
803
|
-
* Compute an adaptive epsilon used for epsilon-greedy exploration.
|
|
804
|
-
*
|
|
805
|
-
* Behaviour:
|
|
806
|
-
* - Epsilon controls random exploratory moves. This helper centralizes
|
|
807
|
-
* the tuning logic so callers can keep the hot loop small.
|
|
808
|
-
* - The returned value is intentionally conservative (often 0) unless
|
|
809
|
-
* particular conditions (warmup, stagnation, or saturations) are met.
|
|
810
|
-
* - When the agent is near the goal (`distHere` small) exploration is
|
|
811
|
-
* suppressed by clamping epsilon to a small minimum.
|
|
812
|
-
*
|
|
813
|
-
* Steps:
|
|
814
|
-
* 1) Compute boolean predicates for warmup/stagnation/saturation cases.
|
|
815
|
-
* 2) Select the base epsilon from the highest-priority matching case.
|
|
816
|
-
* 3) If proximate to goal, clamp epsilon to `#EPSILON_MIN_NEAR_GOAL`.
|
|
817
|
-
* 4) Return the chosen epsilon.
|
|
818
|
-
*
|
|
819
|
-
* @param stepNumber - global step index inside the simulation loop
|
|
820
|
-
* @param stepsSinceImprovement - number of steps since last improvement
|
|
821
|
-
* @param distHere - current distance-to-goal (used to suppress exploration)
|
|
822
|
-
* @param saturations - rolling saturation count used for bias adjustments
|
|
823
|
-
* @returns epsilon value in [0,1] used for epsilon-greedy exploration
|
|
824
|
-
* @example
|
|
825
|
-
* // Typical usage inside simulation loop
|
|
826
|
-
* const eps = MazeMovement.#computeEpsilon(step, state.stepsSinceImprovement, state.distHere, MazeMovement.#StateSaturations);
|
|
827
|
-
*/
|
|
828
|
-
static #computeEpsilon(
|
|
829
|
-
stepNumber: number,
|
|
830
|
-
stepsSinceImprovement: number,
|
|
831
|
-
distHere: number,
|
|
832
|
-
saturations: number,
|
|
833
|
-
): number {
|
|
834
|
-
// Step 1: evaluate predicates with descriptive names for clarity
|
|
835
|
-
const isWarmup = stepNumber < MazeMovement.#EPSILON_WARMUP_STEPS;
|
|
836
|
-
const isHighlyStagnant =
|
|
837
|
-
stepsSinceImprovement > MazeMovement.#EPSILON_STAGNANT_HIGH_THRESHOLD;
|
|
838
|
-
const isModeratelyStagnant =
|
|
839
|
-
stepsSinceImprovement > MazeMovement.#EPSILON_STAGNANT_MED_THRESHOLD;
|
|
840
|
-
const isSaturationTriggered =
|
|
841
|
-
saturations > MazeMovement.#EPSILON_SATURATION_TRIGGER;
|
|
842
|
-
|
|
843
|
-
// Step 2: choose the most relevant base epsilon (priority order)
|
|
844
|
-
let chosenEpsilon = 0;
|
|
845
|
-
// Use a switch(true) so each predicate is a case and priority is explicit
|
|
846
|
-
switch (true) {
|
|
847
|
-
case isWarmup:
|
|
848
|
-
chosenEpsilon = MazeMovement.#EPSILON_INITIAL;
|
|
849
|
-
break;
|
|
850
|
-
case isHighlyStagnant:
|
|
851
|
-
chosenEpsilon = MazeMovement.#EPSILON_STAGNANT_HIGH;
|
|
852
|
-
break;
|
|
853
|
-
case isModeratelyStagnant:
|
|
854
|
-
chosenEpsilon = MazeMovement.#EPSILON_STAGNANT_MED;
|
|
855
|
-
break;
|
|
856
|
-
case isSaturationTriggered:
|
|
857
|
-
chosenEpsilon = MazeMovement.#EPSILON_SATURATIONS;
|
|
858
|
-
break;
|
|
859
|
-
default:
|
|
860
|
-
// leave chosenEpsilon at default 0
|
|
861
|
-
break;
|
|
862
|
-
}
|
|
863
|
-
|
|
864
|
-
// Step 3: suppress exploration near the goal by clamping down
|
|
865
|
-
if (distHere <= MazeMovement.#PROXIMITY_SUPPRESS_EXPLOR_DIST) {
|
|
866
|
-
// Use Math.min to prefer the smaller (less exploratory) epsilon
|
|
867
|
-
chosenEpsilon = Math.min(
|
|
868
|
-
chosenEpsilon,
|
|
869
|
-
MazeMovement.#EPSILON_MIN_NEAR_GOAL,
|
|
870
|
-
);
|
|
871
|
-
}
|
|
872
|
-
|
|
873
|
-
// Step 4: return the decided epsilon
|
|
874
|
-
return chosenEpsilon;
|
|
875
|
-
}
|
|
876
|
-
|
|
877
|
-
/**
|
|
878
|
-
* Check whether a cell at (x, y) is inside the maze bounds and not a wall.
|
|
879
|
-
*
|
|
880
|
-
* Behaviour / rationale:
|
|
881
|
-
* - Prefers cached maze dimensions (when they match the provided maze)
|
|
882
|
-
* to avoid repeated nested property accesses inside hot loops.
|
|
883
|
-
* - Defensively guards against malformed inputs (empty rows / missing data)
|
|
884
|
-
* and treats those as non-open (equivalent to wall/out-of-bounds).
|
|
885
|
-
*
|
|
886
|
-
* Steps:
|
|
887
|
-
* 1) Resolve maze width/height (prefer cached values when appropriate).
|
|
888
|
-
* 2) Perform fast, descriptive bounds checks.
|
|
889
|
-
* 3) Read the cell once and compare against the wall sentinel (-1).
|
|
890
|
-
*
|
|
891
|
-
* @param encodedMaze - 2D read-only numeric maze representation (-1 == wall)
|
|
892
|
-
* @param x - zero-based column index to test
|
|
893
|
-
* @param y - zero-based row index to test
|
|
894
|
-
* @returns true when the cell exists and is not a wall
|
|
895
|
-
* @example
|
|
896
|
-
* // Typical usage inside simulation loop
|
|
897
|
-
* const open = MazeMovement.#isCellOpen(encodedMaze, x, y);
|
|
898
|
-
*/
|
|
899
|
-
static #isCellOpen(
|
|
900
|
-
encodedMaze: ReadonlyArray<ReadonlyArray<number>>,
|
|
901
|
-
x: number,
|
|
902
|
-
y: number,
|
|
903
|
-
): boolean {
|
|
904
|
-
// Step 1: resolve provided maze dimensions and grab a stable first-row
|
|
905
|
-
const providedRowCount = encodedMaze?.length ?? 0;
|
|
906
|
-
const firstRow = encodedMaze?.[0];
|
|
907
|
-
const providedColumnCount = firstRow?.length ?? 0;
|
|
908
|
-
|
|
909
|
-
// Step 2: prefer cached dimensions when they match the provided maze
|
|
910
|
-
const cachedColumnCount = MazeMovement.#CachedWidth;
|
|
911
|
-
const cachedRowCount = MazeMovement.#CachedHeight;
|
|
912
|
-
|
|
913
|
-
const mazeColumnCount =
|
|
914
|
-
cachedColumnCount > 0 &&
|
|
915
|
-
cachedRowCount === providedRowCount &&
|
|
916
|
-
cachedColumnCount === providedColumnCount
|
|
917
|
-
? cachedColumnCount
|
|
918
|
-
: providedColumnCount;
|
|
919
|
-
const mazeRowCount =
|
|
920
|
-
cachedRowCount > 0 &&
|
|
921
|
-
cachedColumnCount === providedColumnCount &&
|
|
922
|
-
cachedRowCount === providedRowCount
|
|
923
|
-
? cachedRowCount
|
|
924
|
-
: providedRowCount;
|
|
925
|
-
|
|
926
|
-
// Step 3: coerce coordinates into the pooled scratch Int32Array to avoid
|
|
927
|
-
// creating temporary boxed numbers on hot paths.
|
|
928
|
-
MazeMovement.#COORD_SCRATCH[0] = x | 0;
|
|
929
|
-
MazeMovement.#COORD_SCRATCH[1] = y | 0;
|
|
930
|
-
const col = MazeMovement.#COORD_SCRATCH[0];
|
|
931
|
-
const row = MazeMovement.#COORD_SCRATCH[1];
|
|
932
|
-
|
|
933
|
-
// Step 4: fast bounds checks with clear descriptive names
|
|
934
|
-
if (row < 0 || row >= mazeRowCount) return false;
|
|
935
|
-
if (col < 0 || col >= mazeColumnCount) return false;
|
|
936
|
-
|
|
937
|
-
// Step 5: defensive single-read of the row and cell value test
|
|
938
|
-
const targetRow = encodedMaze[row];
|
|
939
|
-
if (!targetRow) return false; // malformed row -> treat as wall/out-of-bounds
|
|
940
|
-
const cellValue = targetRow[col];
|
|
941
|
-
return cellValue !== -1;
|
|
942
|
-
}
|
|
943
|
-
|
|
944
|
-
/**
|
|
945
|
-
* Unified distance lookup for a cell coordinate.
|
|
946
|
-
*
|
|
947
|
-
* Behaviour / rationale:
|
|
948
|
-
* - Fast-path: when a `distanceMap` is supplied and contains a finite
|
|
949
|
-
* numeric entry for the coordinate, that value is returned immediately.
|
|
950
|
-
* - Defensive: performs robust bounds checking and uses cached maze
|
|
951
|
-
* dimensions (when they match the provided maze) to avoid repeated
|
|
952
|
-
* nested property lookups in hot code paths.
|
|
953
|
-
* - Fallback: when no finite distance is available, returns `Infinity` to
|
|
954
|
-
* indicate unknown/unreachable distance (preserves previous behaviour).
|
|
955
|
-
*
|
|
956
|
-
* Steps:
|
|
957
|
-
* 1) Coerce incoming coordinates to 32-bit integers.
|
|
958
|
-
* 2) Fast-path check for a finite value in the optional `distanceMap`.
|
|
959
|
-
* 3) Validate bounds using cached dimensions when they align with the
|
|
960
|
-
* provided maze to reduce property access overhead.
|
|
961
|
-
* 4) If no distance found, return `Infinity` (unknown/unreachable).
|
|
962
|
-
*
|
|
963
|
-
* @param encodedMaze - 2D read-only numeric maze representation
|
|
964
|
-
* @param coords - readonly tuple [x, y] of zero-based coordinates
|
|
965
|
-
* @param distanceMap - optional precomputed distance map (same shape as maze)
|
|
966
|
-
* @returns finite distance number when available, otherwise `Infinity`
|
|
967
|
-
* @example
|
|
968
|
-
* const d = MazeMovement.#distanceAt(encodedMaze, [3,2], distanceMap);
|
|
969
|
-
*/
|
|
970
|
-
static #distanceAt(
|
|
971
|
-
encodedMaze: ReadonlyArray<ReadonlyArray<number>>,
|
|
972
|
-
[x, y]: readonly [number, number],
|
|
973
|
-
distanceMap?: number[][],
|
|
974
|
-
): number {
|
|
975
|
-
// Step 1: coerce coordinates to 32-bit integers for consistent indexing
|
|
976
|
-
const xCoord = x | 0;
|
|
977
|
-
const yCoord = y | 0;
|
|
978
|
-
|
|
979
|
-
// Step 2: fast-path: return from provided distanceMap when present
|
|
980
|
-
if (
|
|
981
|
-
distanceMap &&
|
|
982
|
-
distanceMap[yCoord] !== undefined &&
|
|
983
|
-
Number.isFinite(distanceMap[yCoord][xCoord])
|
|
984
|
-
) {
|
|
985
|
-
return distanceMap[yCoord][xCoord];
|
|
986
|
-
}
|
|
987
|
-
|
|
988
|
-
// Step 3: bounds validation — prefer cached sizes when they match the maze
|
|
989
|
-
const providedHeight = encodedMaze?.length ?? 0;
|
|
990
|
-
const firstRow = encodedMaze?.[0];
|
|
991
|
-
const providedWidth = firstRow?.length ?? 0;
|
|
992
|
-
|
|
993
|
-
const cachedWidth = MazeMovement.#CachedWidth;
|
|
994
|
-
const cachedHeight = MazeMovement.#CachedHeight;
|
|
995
|
-
|
|
996
|
-
const mazeWidth =
|
|
997
|
-
cachedWidth > 0 &&
|
|
998
|
-
cachedHeight === providedHeight &&
|
|
999
|
-
cachedWidth === providedWidth
|
|
1000
|
-
? cachedWidth
|
|
1001
|
-
: providedWidth;
|
|
1002
|
-
const mazeHeight =
|
|
1003
|
-
cachedHeight > 0 &&
|
|
1004
|
-
cachedWidth === providedWidth &&
|
|
1005
|
-
cachedHeight === providedHeight
|
|
1006
|
-
? cachedHeight
|
|
1007
|
-
: providedHeight;
|
|
1008
|
-
|
|
1009
|
-
if (xCoord < 0 || xCoord >= mazeWidth) return Infinity;
|
|
1010
|
-
if (yCoord < 0 || yCoord >= mazeHeight) return Infinity;
|
|
1011
|
-
|
|
1012
|
-
// Step 4: no precomputed distance found — preserve historical fallback
|
|
1013
|
-
// (treat as unknown/unreachable). A BFS fallback could be added here
|
|
1014
|
-
// if callers require an on-demand computation, but that is intentionally
|
|
1015
|
-
// omitted to avoid expensive work in hot paths.
|
|
1016
|
-
return Infinity;
|
|
1017
|
-
}
|
|
1018
|
-
|
|
1019
|
-
// ...existing code...
|
|
1020
|
-
|
|
1021
|
-
/**
|
|
1022
|
-
* Moves the agent in the specified direction if the move is valid.
|
|
1023
|
-
*
|
|
1024
|
-
* Handles collision detection with walls and maze boundaries,
|
|
1025
|
-
* preventing the agent from making invalid moves.
|
|
1026
|
-
*
|
|
1027
|
-
* @param encodedMaze - 2D array representation of the maze.
|
|
1028
|
-
* @param position - Current [x, y] position of the agent.
|
|
1029
|
-
* @param direction - Direction index (0=North, 1=East, 2=South, 3=West, -1=No move).
|
|
1030
|
-
* @returns { [number, number] } New position after movement, or original position if move was invalid.
|
|
1031
|
-
*/
|
|
1032
|
-
static moveAgent(
|
|
1033
|
-
encodedMaze: ReadonlyArray<ReadonlyArray<number>>,
|
|
1034
|
-
position: readonly [number, number],
|
|
1035
|
-
direction: number,
|
|
1036
|
-
): [number, number] {
|
|
1037
|
-
// If direction is -1, do not move — return a mutable copy for callers that expect a mutable tuple
|
|
1038
|
-
if (direction === MazeMovement.#NO_MOVE) {
|
|
1039
|
-
return [position[0], position[1]] as [number, number];
|
|
1040
|
-
}
|
|
1041
|
-
// Copy current position
|
|
1042
|
-
/**
|
|
1043
|
-
* Next position candidate for the agent after moving
|
|
1044
|
-
*/
|
|
1045
|
-
// Create a mutable copy of the readonly input position for local mutation
|
|
1046
|
-
const nextPosition: [number, number] = [position[0], position[1]] as [
|
|
1047
|
-
number,
|
|
1048
|
-
number,
|
|
1049
|
-
];
|
|
1050
|
-
// Update position based on direction using the centralized deltas table
|
|
1051
|
-
if (direction >= 0 && direction < MazeMovement.#ACTION_DIM) {
|
|
1052
|
-
const [dx, dy] = MazeMovement.#DIRECTION_DELTAS[direction];
|
|
1053
|
-
nextPosition[0] += dx;
|
|
1054
|
-
nextPosition[1] += dy;
|
|
1055
|
-
}
|
|
1056
|
-
// Check if the new position is valid
|
|
1057
|
-
if (MazeMovement.isValidMove(encodedMaze, nextPosition)) {
|
|
1058
|
-
return nextPosition;
|
|
1059
|
-
} else {
|
|
1060
|
-
// If invalid, stay in place — return a mutable copy to satisfy return type
|
|
1061
|
-
return [position[0], position[1]] as [number, number];
|
|
1062
|
-
}
|
|
1063
|
-
}
|
|
1064
|
-
|
|
1065
|
-
/**
|
|
1066
|
-
* Choose an action index from network outputs.
|
|
1067
|
-
*
|
|
1068
|
-
* Behaviour:
|
|
1069
|
-
* - Centers the raw outputs (logits), computes an adaptive temperature
|
|
1070
|
-
* based on collapse heuristics, performs a numerically-stable softmax
|
|
1071
|
-
* into pooled scratch buffers, and returns argmax plus diagnostics.
|
|
1072
|
-
* - Reuses pooled typed-array scratch buffers to avoid per-call
|
|
1073
|
-
* allocations; the method is therefore non-reentrant.
|
|
1074
|
-
*
|
|
1075
|
-
* Steps (implemented inline with comments):
|
|
1076
|
-
* 1) Validate inputs and early-return a safe default for malformed inputs.
|
|
1077
|
-
* 2) Center logits and compute variance/std for adaptive temperature.
|
|
1078
|
-
* 3) Compute softmax in pooled buffers with numerical-stability trick.
|
|
1079
|
-
* 4) Determine argmax (best action) and second-best probability.
|
|
1080
|
-
* 5) Compute normalized entropy and return a defensive copy of softmax.
|
|
1081
|
-
*
|
|
1082
|
-
* @param outputs - Array of raw network outputs (logits), expected length === #ACTION_DIM
|
|
1083
|
-
* @returns An object with:
|
|
1084
|
-
* - direction: chosen action index (0..#ACTION_DIM-1) or #NO_MOVE on invalid input
|
|
1085
|
-
* - softmax: fresh array copy of probabilities (length #ACTION_DIM)
|
|
1086
|
-
* - entropy: normalized entropy in [0,1]
|
|
1087
|
-
* - maxProb: probability of the chosen action
|
|
1088
|
-
* - secondProb: probability of the runner-up action
|
|
1089
|
-
* @example
|
|
1090
|
-
* const result = MazeMovement.selectDirection([0.2, 1.4, -0.1, 0]);
|
|
1091
|
-
* // result.direction -> 1 (for example)
|
|
1092
|
-
*/
|
|
1093
|
-
static selectDirection(outputs: number[]): DirectionSelectionStats {
|
|
1094
|
-
// Step 1: validate inputs and provide safe default
|
|
1095
|
-
const actionCount = MazeMovement.#ACTION_DIM;
|
|
1096
|
-
if (!Array.isArray(outputs) || outputs.length !== actionCount) {
|
|
1097
|
-
return {
|
|
1098
|
-
direction: MazeMovement.#NO_MOVE,
|
|
1099
|
-
softmax: Array.from(MazeMovement.#SOFTMAX),
|
|
1100
|
-
entropy: 0,
|
|
1101
|
-
maxProb: 0,
|
|
1102
|
-
secondProb: 0,
|
|
1103
|
-
};
|
|
1104
|
-
}
|
|
1105
|
-
|
|
1106
|
-
// Local references to pooled scratch buffers for clarity and perf.
|
|
1107
|
-
const centered = MazeMovement.#SCRATCH_CENTERED;
|
|
1108
|
-
const exps = MazeMovement.#SCRATCH_EXPS;
|
|
1109
|
-
const softmaxPooled = MazeMovement.#SOFTMAX;
|
|
1110
|
-
|
|
1111
|
-
// Step 2: center logits and compute variance (numerically simple loop)
|
|
1112
|
-
let sum = 0;
|
|
1113
|
-
for (let actionIndex = 0; actionIndex < actionCount; actionIndex++) {
|
|
1114
|
-
sum += outputs[actionIndex];
|
|
1115
|
-
}
|
|
1116
|
-
const meanOutput = sum / actionCount;
|
|
1117
|
-
|
|
1118
|
-
let varianceAccumulator = 0;
|
|
1119
|
-
for (let actionIndex = 0; actionIndex < actionCount; actionIndex++) {
|
|
1120
|
-
const delta = outputs[actionIndex] - meanOutput;
|
|
1121
|
-
centered[actionIndex] = delta; // write into pooled centered buffer
|
|
1122
|
-
varianceAccumulator += delta * delta;
|
|
1123
|
-
}
|
|
1124
|
-
varianceAccumulator /= actionCount;
|
|
1125
|
-
let standardDeviation = Math.sqrt(varianceAccumulator);
|
|
1126
|
-
if (
|
|
1127
|
-
!Number.isFinite(standardDeviation) ||
|
|
1128
|
-
standardDeviation < MazeMovement.#STD_MIN
|
|
1129
|
-
) {
|
|
1130
|
-
standardDeviation = MazeMovement.#STD_MIN;
|
|
1131
|
-
}
|
|
1132
|
-
|
|
1133
|
-
// Adaptive collapse ratio -> temperature
|
|
1134
|
-
const collapseRatio =
|
|
1135
|
-
standardDeviation < MazeMovement.#COLLAPSE_STD_THRESHOLD
|
|
1136
|
-
? MazeMovement.#COLLAPSE_RATIO_FULL
|
|
1137
|
-
: standardDeviation < MazeMovement.#COLLAPSE_STD_MED
|
|
1138
|
-
? MazeMovement.#COLLAPSE_RATIO_HALF
|
|
1139
|
-
: 0;
|
|
1140
|
-
const temperature =
|
|
1141
|
-
MazeMovement.#TEMPERATURE_BASE +
|
|
1142
|
-
MazeMovement.#TEMPERATURE_SCALE * collapseRatio;
|
|
1143
|
-
|
|
1144
|
-
// Step 3: softmax numerically stable: subtract maxCentered before exp
|
|
1145
|
-
let maxCentered = -Infinity;
|
|
1146
|
-
for (let actionIndex = 0; actionIndex < actionCount; actionIndex++) {
|
|
1147
|
-
const v = centered[actionIndex];
|
|
1148
|
-
if (v > maxCentered) maxCentered = v;
|
|
1149
|
-
}
|
|
1150
|
-
|
|
1151
|
-
let expSum = 0;
|
|
1152
|
-
for (let actionIndex = 0; actionIndex < actionCount; actionIndex++) {
|
|
1153
|
-
const value = Math.exp(
|
|
1154
|
-
(centered[actionIndex] - maxCentered) / temperature,
|
|
1155
|
-
);
|
|
1156
|
-
exps[actionIndex] = value;
|
|
1157
|
-
expSum += value;
|
|
1158
|
-
}
|
|
1159
|
-
if (expSum === 0) expSum = 1; // defensive
|
|
1160
|
-
|
|
1161
|
-
// Step 4: compute probabilities in pooled softmax buffer and find top-two
|
|
1162
|
-
let chosenDirection = 0;
|
|
1163
|
-
let bestProb = -Infinity;
|
|
1164
|
-
let runnerUpProb = 0;
|
|
1165
|
-
for (let actionIndex = 0; actionIndex < actionCount; actionIndex++) {
|
|
1166
|
-
const prob = exps[actionIndex] / expSum;
|
|
1167
|
-
softmaxPooled[actionIndex] = prob;
|
|
1168
|
-
if (prob > bestProb) {
|
|
1169
|
-
runnerUpProb = bestProb;
|
|
1170
|
-
bestProb = prob;
|
|
1171
|
-
chosenDirection = actionIndex;
|
|
1172
|
-
} else if (prob > runnerUpProb) {
|
|
1173
|
-
runnerUpProb = prob;
|
|
1174
|
-
}
|
|
1175
|
-
}
|
|
1176
|
-
|
|
1177
|
-
// Step 5: compute normalized entropy (divide by log(actionCount))
|
|
1178
|
-
let entropy = 0;
|
|
1179
|
-
for (let actionIndex = 0; actionIndex < actionCount; actionIndex++) {
|
|
1180
|
-
const p = softmaxPooled[actionIndex];
|
|
1181
|
-
if (p > 0) entropy += -p * Math.log(p);
|
|
1182
|
-
}
|
|
1183
|
-
entropy /= MazeMovement.#LOG_ACTIONS;
|
|
1184
|
-
|
|
1185
|
-
// Defensive: return a copy of pooled softmax so callers cannot mutate
|
|
1186
|
-
return {
|
|
1187
|
-
direction: chosenDirection,
|
|
1188
|
-
softmax: Array.from(softmaxPooled),
|
|
1189
|
-
entropy,
|
|
1190
|
-
maxProb: bestProb,
|
|
1191
|
-
secondProb: runnerUpProb,
|
|
1192
|
-
};
|
|
1193
|
-
}
|
|
1194
|
-
|
|
1195
|
-
/**
|
|
1196
|
-
* Simulates the agent navigating the maze using its neural network.
|
|
1197
|
-
*
|
|
1198
|
-
* Runs a complete simulation of an agent traversing a maze,
|
|
1199
|
-
* using its neural network for decision making. This implementation focuses
|
|
1200
|
-
* on a minimalist approach, putting more responsibility on the neural network.
|
|
1201
|
-
*
|
|
1202
|
-
* @param network - Neural network controlling the agent.
|
|
1203
|
-
* @param encodedMaze - 2D array representation of the maze.
|
|
1204
|
-
* @param startPos - Starting position [x,y] of the agent.
|
|
1205
|
-
* @param exitPos - Exit/goal position [x,y] of the maze.
|
|
1206
|
-
* @param maxSteps - Maximum steps allowed before terminating (default 3000).
|
|
1207
|
-
* @returns Object containing:
|
|
1208
|
-
* - success: Boolean indicating if exit was reached.
|
|
1209
|
-
* - steps: Number of steps taken.
|
|
1210
|
-
* - path: Array of positions visited.
|
|
1211
|
-
* - fitness: Calculated fitness score for evolution.
|
|
1212
|
-
* - progress: Percentage progress toward exit (0-100).
|
|
1213
|
-
*/
|
|
1214
|
-
static simulateAgent(
|
|
1215
|
-
network: INetwork,
|
|
1216
|
-
encodedMaze: number[][],
|
|
1217
|
-
startPos: readonly [number, number],
|
|
1218
|
-
exitPos: readonly [number, number],
|
|
1219
|
-
distanceMap?: number[][],
|
|
1220
|
-
maxSteps = MazeMovement.#DEFAULT_MAX_STEPS,
|
|
1221
|
-
): {
|
|
1222
|
-
success: boolean;
|
|
1223
|
-
steps: number;
|
|
1224
|
-
path: readonly [number, number][];
|
|
1225
|
-
fitness: number;
|
|
1226
|
-
progress: number;
|
|
1227
|
-
saturationFraction?: number;
|
|
1228
|
-
actionEntropy?: number;
|
|
1229
|
-
} {
|
|
1230
|
-
const state = MazeMovement.#initRunState(
|
|
1231
|
-
encodedMaze,
|
|
1232
|
-
startPos,
|
|
1233
|
-
distanceMap,
|
|
1234
|
-
maxSteps,
|
|
1235
|
-
);
|
|
1236
|
-
|
|
1237
|
-
while (state.steps < maxSteps) {
|
|
1238
|
-
state.steps++;
|
|
1239
|
-
// Record cell visit & derive penalties for loops / memory / revisits
|
|
1240
|
-
MazeMovement.#recordVisitAndUpdatePenalties(state);
|
|
1241
|
-
|
|
1242
|
-
// Build perception & compute current distance for exploration logic
|
|
1243
|
-
MazeMovement.#buildVisionAndDistance(
|
|
1244
|
-
state,
|
|
1245
|
-
encodedMaze,
|
|
1246
|
-
exitPos,
|
|
1247
|
-
distanceMap,
|
|
1248
|
-
);
|
|
1249
|
-
|
|
1250
|
-
// Neural net activation & saturation handling
|
|
1251
|
-
MazeMovement.#decideDirection(state, network);
|
|
1252
|
-
|
|
1253
|
-
// Proximity greedy override
|
|
1254
|
-
MazeMovement.#maybeApplyProximityGreedy(state, encodedMaze, distanceMap);
|
|
1255
|
-
|
|
1256
|
-
// Epsilon exploration
|
|
1257
|
-
MazeMovement.#maybeApplyEpsilonExploration(state, encodedMaze);
|
|
1258
|
-
|
|
1259
|
-
// Force exploration if stuck
|
|
1260
|
-
MazeMovement.#maybeForceExploration(state, encodedMaze);
|
|
1261
|
-
|
|
1262
|
-
// Execute move & update rewards
|
|
1263
|
-
MazeMovement.#executeMoveAndRewards(state, encodedMaze, distanceMap);
|
|
1264
|
-
|
|
1265
|
-
// Post‑action repetition / entropy / saturation penalties
|
|
1266
|
-
MazeMovement.#applyPostActionPenalties(state);
|
|
1267
|
-
|
|
1268
|
-
// Deep stagnation termination
|
|
1269
|
-
if (MazeMovement.#maybeTerminateDeepStagnation(state)) break;
|
|
1270
|
-
|
|
1271
|
-
// Success check
|
|
1272
|
-
if (
|
|
1273
|
-
state.position[0] === exitPos[0] &&
|
|
1274
|
-
state.position[1] === exitPos[1]
|
|
1275
|
-
) {
|
|
1276
|
-
return MazeMovement.#finalizeSuccess(state, maxSteps);
|
|
1277
|
-
}
|
|
1278
|
-
}
|
|
1279
|
-
|
|
1280
|
-
return MazeMovement.#finalizeFailure(
|
|
1281
|
-
state,
|
|
1282
|
-
encodedMaze,
|
|
1283
|
-
startPos,
|
|
1284
|
-
exitPos,
|
|
1285
|
-
distanceMap,
|
|
1286
|
-
);
|
|
1287
|
-
}
|
|
1288
|
-
|
|
1289
|
-
// ---------------------------------------------------------------------------
|
|
1290
|
-
// Private helper methods (refactored from large simulateAgent body)
|
|
1291
|
-
// ---------------------------------------------------------------------------
|
|
1292
|
-
|
|
1293
|
-
/** Internal aggregate simulation state (not exported). */
|
|
1294
|
-
static #initRunState(
|
|
1295
|
-
encodedMaze: number[][],
|
|
1296
|
-
startPos: readonly [number, number],
|
|
1297
|
-
distanceMap: number[][] | undefined,
|
|
1298
|
-
maxSteps: number,
|
|
1299
|
-
): SimulationState {
|
|
1300
|
-
// Reset global mutable counters reused across runs
|
|
1301
|
-
MazeMovement.#StateSaturations = 0;
|
|
1302
|
-
MazeMovement.#StateNoMoveStreak = 0;
|
|
1303
|
-
MazeMovement.#StatePrevDistanceStep = undefined;
|
|
1304
|
-
const height = encodedMaze.length;
|
|
1305
|
-
const width = encodedMaze[0].length;
|
|
1306
|
-
const hasDistanceMap =
|
|
1307
|
-
Array.isArray(distanceMap) && distanceMap.length === height;
|
|
1308
|
-
MazeMovement.#initBuffers(width, height, maxSteps);
|
|
1309
|
-
// Seed path with start position
|
|
1310
|
-
const position: [number, number] = [startPos[0], startPos[1]];
|
|
1311
|
-
MazeMovement.#PathX![0] = position[0];
|
|
1312
|
-
MazeMovement.#PathY![0] = position[1];
|
|
1313
|
-
const historyCapacity = MazeMovement.#MOVE_HISTORY_LENGTH;
|
|
1314
|
-
const state: SimulationState = {
|
|
1315
|
-
position,
|
|
1316
|
-
steps: 0,
|
|
1317
|
-
pathLength: 1,
|
|
1318
|
-
visitedUniqueCount: 0,
|
|
1319
|
-
hasDistanceMap,
|
|
1320
|
-
distanceMap,
|
|
1321
|
-
minDistanceToExit: hasDistanceMap
|
|
1322
|
-
? (distanceMap![position[1]]?.[position[0]] ?? Infinity)
|
|
1323
|
-
: MazeMovement.#distanceAt(encodedMaze, position, distanceMap),
|
|
1324
|
-
progressReward: 0,
|
|
1325
|
-
newCellExplorationBonus: 0,
|
|
1326
|
-
invalidMovePenalty: 0,
|
|
1327
|
-
prevAction: MazeMovement.#NO_MOVE,
|
|
1328
|
-
stepsSinceImprovement: 0,
|
|
1329
|
-
lastDistanceGlobal: MazeMovement.#distanceAt(
|
|
1330
|
-
encodedMaze,
|
|
1331
|
-
position,
|
|
1332
|
-
distanceMap,
|
|
1333
|
-
),
|
|
1334
|
-
saturatedSteps: 0,
|
|
1335
|
-
recentPositions: [] as [number, number][],
|
|
1336
|
-
localAreaPenalty: 0,
|
|
1337
|
-
directionCounts: [0, 0, 0, 0] as number[],
|
|
1338
|
-
moveHistoryRing: new Int32Array(historyCapacity),
|
|
1339
|
-
moveHistoryLength: 0,
|
|
1340
|
-
moveHistoryHead: 0,
|
|
1341
|
-
currentCellIndex: 0,
|
|
1342
|
-
loopPenalty: 0,
|
|
1343
|
-
memoryPenalty: 0,
|
|
1344
|
-
revisitPenalty: 0,
|
|
1345
|
-
visitsAtCurrent: 0,
|
|
1346
|
-
distHere: Infinity,
|
|
1347
|
-
vision: [] as number[],
|
|
1348
|
-
actionStats: null,
|
|
1349
|
-
direction: MazeMovement.#NO_MOVE,
|
|
1350
|
-
moved: false,
|
|
1351
|
-
prevDistance: Infinity,
|
|
1352
|
-
earlyTerminate: false,
|
|
1353
|
-
};
|
|
1354
|
-
return state;
|
|
1355
|
-
}
|
|
1356
|
-
|
|
1357
|
-
/**
|
|
1358
|
-
* Push a cell index into the circular move-history ring buffer.
|
|
1359
|
-
*
|
|
1360
|
-
* Behaviour / rationale:
|
|
1361
|
-
* - The history is stored in a preallocated `Int32Array` (`moveHistoryRing`) to
|
|
1362
|
-
* avoid allocations. This helper updates the head pointer and length in-place.
|
|
1363
|
-
* - The method is deliberately allocation-free and fast; callers use the
|
|
1364
|
-
* ring to detect tiny oscillations like A->B->A->B.
|
|
1365
|
-
*
|
|
1366
|
-
* Steps:
|
|
1367
|
-
* 1) Read local references to the ring, head and length for faster hot-path access.
|
|
1368
|
-
* 2) Write the provided `cellIndex` at the current head slot.
|
|
1369
|
-
* 3) Advance the head index modulo the ring capacity and store it back on state.
|
|
1370
|
-
* 4) If the ring was not yet full, increment the stored length.
|
|
1371
|
-
*
|
|
1372
|
-
* @param state - simulation state containing `moveHistoryRing`, `moveHistoryHead`, `moveHistoryLength`
|
|
1373
|
-
* @param cellIndex - linearized cell index to push into history
|
|
1374
|
-
* @returns void
|
|
1375
|
-
* @example
|
|
1376
|
-
* MazeMovement.#pushHistory(state, currentCellIndex);
|
|
1377
|
-
*/
|
|
1378
|
-
static #pushHistory(state: SimulationState, cellIndex: number) {
|
|
1379
|
-
// Step 1: local references for perf and clearer names
|
|
1380
|
-
const ring = state.moveHistoryRing;
|
|
1381
|
-
let headIndex = state.moveHistoryHead | 0; // coerce to int
|
|
1382
|
-
const currentLength = state.moveHistoryLength;
|
|
1383
|
-
const capacity = ring.length;
|
|
1384
|
-
|
|
1385
|
-
// Defensive: if capacity is zero nothing to do (shouldn't happen normally)
|
|
1386
|
-
if (capacity === 0) return;
|
|
1387
|
-
|
|
1388
|
-
// Step 2: write the new entry into the ring at the current head
|
|
1389
|
-
ring[headIndex] = cellIndex;
|
|
1390
|
-
|
|
1391
|
-
// Step 3: advance the head (wrap using modulo) and store back on state
|
|
1392
|
-
headIndex = (headIndex + 1) % capacity;
|
|
1393
|
-
state.moveHistoryHead = headIndex;
|
|
1394
|
-
|
|
1395
|
-
// Step 4: if ring wasn't full yet, increment the recorded length
|
|
1396
|
-
if (currentLength < capacity) state.moveHistoryLength = currentLength + 1;
|
|
1397
|
-
}
|
|
1398
|
-
|
|
1399
|
-
/**
|
|
1400
|
-
* Return the nth-most-recent entry from the circular move history.
|
|
1401
|
-
*
|
|
1402
|
-
* Behaviour:
|
|
1403
|
-
* - `n` is 1-based: `1` returns the last pushed entry, `2` the one
|
|
1404
|
-
* before that, etc. Returns `undefined` when `n` is out of range.
|
|
1405
|
-
* - Uses only preallocated `Int32Array` ring storage and integer
|
|
1406
|
-
* arithmetic; allocation-free and safe for hot paths.
|
|
1407
|
-
*
|
|
1408
|
-
* Steps:
|
|
1409
|
-
* 1) Coerce inputs to 32-bit integers and validate `n` against stored length.
|
|
1410
|
-
* 2) Compute the wrapped index by subtracting `n` from the head and
|
|
1411
|
-
* normalizing into `[0, capacity)` via addition + modulo.
|
|
1412
|
-
* 3) Return the ring value at the computed slot or `undefined` when invalid.
|
|
1413
|
-
*
|
|
1414
|
-
* @param state - simulation state containing `moveHistoryRing`, `moveHistoryHead`, `moveHistoryLength`
|
|
1415
|
-
* @param nth - 1-based index from the end (1 === last pushed)
|
|
1416
|
-
* @returns linearized cell index when present, otherwise `undefined`
|
|
1417
|
-
* @example
|
|
1418
|
-
* const last = MazeMovement.#nthFromHistoryEnd(state, 1);
|
|
1419
|
-
*/
|
|
1420
|
-
static #nthFromHistoryEnd(
|
|
1421
|
-
state: SimulationState,
|
|
1422
|
-
nth: number,
|
|
1423
|
-
): number | undefined {
|
|
1424
|
-
// Step 1: coerce arguments and validate
|
|
1425
|
-
const requested = nth | 0;
|
|
1426
|
-
const length = state.moveHistoryLength | 0;
|
|
1427
|
-
if (requested <= 0 || requested > length) return undefined;
|
|
1428
|
-
|
|
1429
|
-
// Step 2: local refs and capacity (fast-path locals reduce property loads)
|
|
1430
|
-
const ring = state.moveHistoryRing;
|
|
1431
|
-
const capacity = ring.length;
|
|
1432
|
-
if (capacity === 0) return undefined; // defensive: empty ring
|
|
1433
|
-
const head = state.moveHistoryHead | 0;
|
|
1434
|
-
|
|
1435
|
-
// Compute wrapped index: head - requested (1-based) then normalize
|
|
1436
|
-
let rawIndex = head - requested;
|
|
1437
|
-
// Normalize negative values into [0, capacity) without using slow division
|
|
1438
|
-
rawIndex = ((rawIndex % capacity) + capacity) % capacity;
|
|
1439
|
-
|
|
1440
|
-
// Step 3: return the stored value (Int32Array read)
|
|
1441
|
-
return ring[rawIndex];
|
|
1442
|
-
}
|
|
1443
|
-
|
|
1444
|
-
/**
|
|
1445
|
-
* Record a visit to the current cell and derive shaping penalties.
|
|
1446
|
-
*
|
|
1447
|
-
* Behaviour / rationale:
|
|
1448
|
-
* - Updates pooled visit flags and visit counts (allocation-free).
|
|
1449
|
-
* - Pushes the cell into the fixed-size circular `moveHistoryRing` and
|
|
1450
|
-
* derives three shaping penalties used to discourage trivial oscillation
|
|
1451
|
-
* and revisiting behavior: loopPenalty, memoryPenalty, revisitPenalty.
|
|
1452
|
-
* - May mark the run `earlyTerminate` when a visit count exceeds a hard threshold.
|
|
1453
|
-
*
|
|
1454
|
-
* Steps:
|
|
1455
|
-
* 1) Compute linearized cell index and mark it visited (unique-visit accounting).
|
|
1456
|
-
* 2) Increment the per-cell visit counter and push into the circular history.
|
|
1457
|
-
* 3) Detect tiny A↔B oscillations (A->B->A->B) and apply loop penalty.
|
|
1458
|
-
* 4) Scan recent history (excluding last entry) for returning-to-recent-cell
|
|
1459
|
-
* and apply memory-return penalty if found.
|
|
1460
|
-
* 5) Compute revisit penalty scaling with visit counts and enforce termination
|
|
1461
|
-
* when visits exceed `#VISIT_TERMINATION_THRESHOLD`.
|
|
1462
|
-
*
|
|
1463
|
-
* @param state - current simulation state (modified in-place)
|
|
1464
|
-
* @returns void
|
|
1465
|
-
* @example
|
|
1466
|
-
* MazeMovement.#recordVisitAndUpdatePenalties(state);
|
|
1467
|
-
*/
|
|
1468
|
-
static #recordVisitAndUpdatePenalties(state: SimulationState) {
|
|
1469
|
-
// Step 0: local references and descriptive names for hot-path perf
|
|
1470
|
-
const visitedFlags = MazeMovement.#VisitedFlags!;
|
|
1471
|
-
const visitCounts = MazeMovement.#VisitCounts!;
|
|
1472
|
-
const rewardScale = MazeMovement.#REWARD_SCALE;
|
|
1473
|
-
|
|
1474
|
-
// Step 1: linearize current position and update unique-visit tracking
|
|
1475
|
-
const cellIndex = MazeMovement.#index(state.position[0], state.position[1]);
|
|
1476
|
-
state.currentCellIndex = cellIndex;
|
|
1477
|
-
if (!visitedFlags[cellIndex]) {
|
|
1478
|
-
visitedFlags[cellIndex] = 1;
|
|
1479
|
-
state.visitedUniqueCount++;
|
|
1480
|
-
}
|
|
1481
|
-
|
|
1482
|
-
// Step 2: increment visit count and record into move-history ring
|
|
1483
|
-
visitCounts[cellIndex] = (visitCounts[cellIndex] + 1) as number;
|
|
1484
|
-
MazeMovement.#pushHistory(state, cellIndex);
|
|
1485
|
-
const visitsAtCell = (state.visitsAtCurrent = visitCounts[cellIndex]);
|
|
1486
|
-
|
|
1487
|
-
// Step 3: loop detection (A->B->A->B) using the small fixed-length ring
|
|
1488
|
-
state.loopPenalty = 0;
|
|
1489
|
-
if (state.moveHistoryLength >= MazeMovement.#OSCILLATION_DETECT_LENGTH) {
|
|
1490
|
-
const last = MazeMovement.#nthFromHistoryEnd(state, 1)!;
|
|
1491
|
-
const secondLast = MazeMovement.#nthFromHistoryEnd(state, 2);
|
|
1492
|
-
const thirdLast = MazeMovement.#nthFromHistoryEnd(state, 3);
|
|
1493
|
-
const fourthLast = MazeMovement.#nthFromHistoryEnd(state, 4);
|
|
1494
|
-
// detect pattern (A, B, A, B) where positions alternate
|
|
1495
|
-
if (
|
|
1496
|
-
last === thirdLast &&
|
|
1497
|
-
secondLast !== undefined &&
|
|
1498
|
-
fourthLast !== undefined &&
|
|
1499
|
-
secondLast === fourthLast
|
|
1500
|
-
) {
|
|
1501
|
-
state.loopPenalty = -MazeMovement.#LOOP_PENALTY * rewardScale;
|
|
1502
|
-
}
|
|
1503
|
-
}
|
|
1504
|
-
|
|
1505
|
-
// Step 4: memory-return penalty — returning to any recent cell (excluding immediate previous)
|
|
1506
|
-
state.memoryPenalty = 0;
|
|
1507
|
-
if (state.moveHistoryLength > 1) {
|
|
1508
|
-
for (let offset = 2; offset <= state.moveHistoryLength; offset++) {
|
|
1509
|
-
const recentIndex = MazeMovement.#nthFromHistoryEnd(state, offset);
|
|
1510
|
-
if (recentIndex === cellIndex) {
|
|
1511
|
-
state.memoryPenalty =
|
|
1512
|
-
-MazeMovement.#MEMORY_RETURN_PENALTY * rewardScale;
|
|
1513
|
-
break;
|
|
1514
|
-
}
|
|
1515
|
-
}
|
|
1516
|
-
}
|
|
1517
|
-
|
|
1518
|
-
// Step 5: revisit penalty (scaled by extra visits beyond the first)
|
|
1519
|
-
state.revisitPenalty = 0;
|
|
1520
|
-
if (visitsAtCell > 1) {
|
|
1521
|
-
state.revisitPenalty =
|
|
1522
|
-
-MazeMovement.#REVISIT_PENALTY_PER_VISIT *
|
|
1523
|
-
(visitsAtCell - 1) *
|
|
1524
|
-
rewardScale;
|
|
1525
|
-
}
|
|
1526
|
-
|
|
1527
|
-
// Enforce harsh termination penalty if a cell is visited too often
|
|
1528
|
-
if (visitsAtCell > MazeMovement.#VISIT_TERMINATION_THRESHOLD) {
|
|
1529
|
-
state.invalidMovePenalty -=
|
|
1530
|
-
MazeMovement.#INVALID_MOVE_PENALTY_HARSH * rewardScale;
|
|
1531
|
-
state.earlyTerminate = true;
|
|
1532
|
-
}
|
|
1533
|
-
}
|
|
1534
|
-
|
|
1535
|
-
/**
|
|
1536
|
-
* Build vision inputs and compute the current-cell distance used by
|
|
1537
|
-
* proximity and epsilon logic.
|
|
1538
|
-
*
|
|
1539
|
-
* Behaviour / rationale:
|
|
1540
|
-
* - Delegates perception construction to MazeVision.buildInputs6 and
|
|
1541
|
-
* stores the resulting vision vector on `state.vision`.
|
|
1542
|
-
* - Updates the rolling previous-distance value (`#StatePrevDistanceStep`) so
|
|
1543
|
-
* the next step's vision builder receives the correct prior distance.
|
|
1544
|
-
* - Minimizes allocations: when the builder returns an Array we assign it
|
|
1545
|
-
* directly; otherwise we perform a single, explicit conversion.
|
|
1546
|
-
*
|
|
1547
|
-
* Steps:
|
|
1548
|
-
* 1) Early-exit if the run is marked `earlyTerminate`.
|
|
1549
|
-
* 2) Localize and coerce the current position, choose between `distanceMap`
|
|
1550
|
-
* lookup or computed distance for the pre-move distance value.
|
|
1551
|
-
* 3) Call `MazeVision.buildInputs6(...)` with the prior distance and store
|
|
1552
|
-
* the returned vision array on `state.vision` (single copy only when needed).
|
|
1553
|
-
* 4) Update `#StatePrevDistanceStep` and `state.distHere` for downstream logic.
|
|
1554
|
-
*
|
|
1555
|
-
* @param state - Simulation state object (mutated in-place)
|
|
1556
|
-
* @param encodedMaze - Read-only 2D maze array (rows of numeric columns)
|
|
1557
|
-
* @param exitPos - Exit coordinate tuple [x, y] used by the vision builder
|
|
1558
|
-
* @param distanceMap - Optional precomputed distance map aligned to `encodedMaze`
|
|
1559
|
-
* @returns void
|
|
1560
|
-
* @example
|
|
1561
|
-
* // inside the simulation loop
|
|
1562
|
-
* MazeMovement.#buildVisionAndDistance(state, encodedMaze, exitPos, distanceMap);
|
|
1563
|
-
*/
|
|
1564
|
-
static #buildVisionAndDistance(
|
|
1565
|
-
state: SimulationState,
|
|
1566
|
-
encodedMaze: number[][],
|
|
1567
|
-
exitPos: readonly [number, number],
|
|
1568
|
-
distanceMap?: number[][],
|
|
1569
|
-
) {
|
|
1570
|
-
// Early-exit when run already marked for termination.
|
|
1571
|
-
if (state.earlyTerminate) return;
|
|
1572
|
-
|
|
1573
|
-
// Step 1: localize frequently-used values for clarity & perf
|
|
1574
|
-
const currentPosition = state.position;
|
|
1575
|
-
const posX = currentPosition[0] | 0;
|
|
1576
|
-
const posY = currentPosition[1] | 0;
|
|
1577
|
-
const hasPrecomputedDistances = state.hasDistanceMap;
|
|
1578
|
-
|
|
1579
|
-
// Step 2: determine the "pre-move" distance used by the vision builder
|
|
1580
|
-
// - When a distance map exists, prefer the direct table lookup (may be undefined)
|
|
1581
|
-
// - Otherwise fall back to the unified distance accessor (fast, defensive)
|
|
1582
|
-
const preMoveDistance = hasPrecomputedDistances
|
|
1583
|
-
? (distanceMap![posY]?.[posX] ?? undefined)
|
|
1584
|
-
: MazeMovement.#distanceAt(encodedMaze, currentPosition, distanceMap);
|
|
1585
|
-
|
|
1586
|
-
// Step 3: build perception inputs. MazeVision.buildInputs6 is the canonical
|
|
1587
|
-
// builder; it accepts the previous-step distance and returns a plain JS array
|
|
1588
|
-
// (or a typed-array-compatible structure). We keep the result as-is to avoid
|
|
1589
|
-
// double-copying; callers expect `state.vision` to be a regular array of numbers.
|
|
1590
|
-
// NOTE: MazeVision may internally reuse pools — prefer that over forcing a copy here.
|
|
1591
|
-
const visionInputs = MazeVision.buildInputs6(
|
|
1592
|
-
encodedMaze,
|
|
1593
|
-
currentPosition,
|
|
1594
|
-
exitPos,
|
|
1595
|
-
distanceMap,
|
|
1596
|
-
MazeMovement.#StatePrevDistanceStep,
|
|
1597
|
-
preMoveDistance,
|
|
1598
|
-
state.prevAction,
|
|
1599
|
-
);
|
|
1600
|
-
|
|
1601
|
-
// Step 4: store results into simulation state. We intentionally assign the
|
|
1602
|
-
// builder's result directly to avoid an extra allocation; if MazeVision
|
|
1603
|
-
// returns a typed array the activation code should accept it — this keeps
|
|
1604
|
-
// hot-path overhead minimal. If you later observe mutation issues, convert
|
|
1605
|
-
// to a defensive copy here.
|
|
1606
|
-
state.vision = (
|
|
1607
|
-
Array.isArray(visionInputs)
|
|
1608
|
-
? visionInputs
|
|
1609
|
-
: Array.from(visionInputs as Iterable<number>)
|
|
1610
|
-
) as number[];
|
|
1611
|
-
|
|
1612
|
-
// Step 5: update the rolling previous-distance and the current-cell distance
|
|
1613
|
-
// used by proximity / epsilon logic. Use the cached distance map when present
|
|
1614
|
-
// otherwise compute via #distanceAt which is defensive and fast for small inputs.
|
|
1615
|
-
MazeMovement.#StatePrevDistanceStep = preMoveDistance;
|
|
1616
|
-
state.distHere = hasPrecomputedDistances
|
|
1617
|
-
? (distanceMap![posY]?.[posX] ?? Infinity)
|
|
1618
|
-
: MazeMovement.#distanceAt(encodedMaze, currentPosition, distanceMap);
|
|
1619
|
-
}
|
|
1620
|
-
|
|
1621
|
-
/**
|
|
1622
|
-
* Activate the neural network, record its outputs for history, choose an action
|
|
1623
|
-
* using the pooled softmax path, and update saturation/bias diagnostics.
|
|
1624
|
-
*
|
|
1625
|
-
* Behaviour & rationale:
|
|
1626
|
-
* - Keeps hot-path allocation minimal: we avoid creating unnecessary temporaries
|
|
1627
|
-
* used only by downstream selection logic. `MazeMovement.selectDirection`
|
|
1628
|
-
* accepts typed-arrays and reuses pooled scratch buffers internally so we
|
|
1629
|
-
* pass the raw outputs directly for selection.
|
|
1630
|
-
* - `MazeUtils.pushHistory` requires a plain JS Array for correct bounded
|
|
1631
|
-
* history semantics; we therefore make a single explicit shallow copy sized
|
|
1632
|
-
* to the action count to record the outputs. This copy is the only
|
|
1633
|
-
* unavoidable allocation required to preserve historical state safely.
|
|
1634
|
-
*
|
|
1635
|
-
* Steps (inline):
|
|
1636
|
-
* 1) Early-exit if the run is flagged `earlyTerminate`.
|
|
1637
|
-
* 2) Activate the network to receive raw outputs (logits).
|
|
1638
|
-
* 3) Copy the outputs into a fresh, fixed-length JS Array and push into the
|
|
1639
|
-
* network's `_lastStepOutputs` history (bounded by `#OUTPUT_HISTORY_LENGTH`).
|
|
1640
|
-
* 4) Call `selectDirection(outputs)` which uses pooled scratch buffers to
|
|
1641
|
-
* compute a numerically-stable softmax and returns argmax + diagnostics.
|
|
1642
|
-
* 5) Apply saturation and bias adjustments and store the chosen direction on
|
|
1643
|
-
* the simulation `state`.
|
|
1644
|
-
*
|
|
1645
|
-
* @param state - simulation state (mutated in-place)
|
|
1646
|
-
* @param network - neural network implementing `activate(vision): number[]`
|
|
1647
|
-
* @returns void
|
|
1648
|
-
*
|
|
1649
|
-
* @example
|
|
1650
|
-
* // inside the simulation loop
|
|
1651
|
-
* MazeMovement.#decideDirection(state, network);
|
|
1652
|
-
*/
|
|
1653
|
-
static #decideDirection(state: SimulationState, network: INetwork) {
|
|
1654
|
-
// Step 1: fast-path bail when run flagged for early termination
|
|
1655
|
-
if (state.earlyTerminate) return;
|
|
1656
|
-
|
|
1657
|
-
try {
|
|
1658
|
-
// Step 2: activate the network to obtain raw outputs (logits). We keep
|
|
1659
|
-
// the reference as-is because `selectDirection` can operate on typed
|
|
1660
|
-
// arrays and internally uses pooled scratch buffers for softmax.
|
|
1661
|
-
const networkOutputs = network.activate(state.vision);
|
|
1662
|
-
|
|
1663
|
-
// Step 3: record a shallow, fixed-length plain-Array copy into the
|
|
1664
|
-
// network's history. `MazeUtils.pushHistory` expects Array semantics so
|
|
1665
|
-
// we must supply a real Array; create it deterministically sized to the
|
|
1666
|
-
// action count to avoid intermediate temporaries like spread operators.
|
|
1667
|
-
const outputsLength = networkOutputs.length | 0;
|
|
1668
|
-
const outputsHistoryCopy: number[] = new Array(outputsLength);
|
|
1669
|
-
for (let copyIndex = 0; copyIndex < outputsLength; copyIndex++) {
|
|
1670
|
-
outputsHistoryCopy[copyIndex] = networkOutputs[copyIndex];
|
|
1671
|
-
}
|
|
1672
|
-
const previousHistory = MazeMovement.#readOutputHistory(network);
|
|
1673
|
-
const updatedHistory = MazeUtils.pushHistory(
|
|
1674
|
-
previousHistory,
|
|
1675
|
-
outputsHistoryCopy,
|
|
1676
|
-
MazeMovement.#OUTPUT_HISTORY_LENGTH,
|
|
1677
|
-
);
|
|
1678
|
-
MazeMovement.#writeOutputHistory(network, updatedHistory);
|
|
1679
|
-
|
|
1680
|
-
// Step 4: select action using pooled softmax / scratch buffers.
|
|
1681
|
-
const selectedActionStats = MazeMovement.selectDirection(networkOutputs);
|
|
1682
|
-
state.actionStats = selectedActionStats;
|
|
1683
|
-
|
|
1684
|
-
// Step 5: apply saturation/bias adjustments (may mutate network internals)
|
|
1685
|
-
MazeMovement.#applySaturationAndBiasAdjust(
|
|
1686
|
-
state,
|
|
1687
|
-
networkOutputs,
|
|
1688
|
-
network,
|
|
1689
|
-
);
|
|
1690
|
-
|
|
1691
|
-
// Finalize: store chosen direction on the simulation state
|
|
1692
|
-
state.direction = selectedActionStats.direction;
|
|
1693
|
-
} catch (error: unknown) {
|
|
1694
|
-
// Defensive: keep behaviour identical to previous implementation
|
|
1695
|
-
console.error('Error activating network:', error);
|
|
1696
|
-
state.direction = MazeMovement.#NO_MOVE;
|
|
1697
|
-
}
|
|
1698
|
-
}
|
|
1699
|
-
|
|
1700
|
-
/**
|
|
1701
|
-
* Proximity greedy override: when the agent is within a configured
|
|
1702
|
-
* proximity to the exit prefer the immediate neighbor that minimises
|
|
1703
|
-
* the distance-to-exit (ties favour the current chosen direction).
|
|
1704
|
-
*
|
|
1705
|
-
* Behaviour & rationale:
|
|
1706
|
-
* - This is a deterministic short-circuit: when close to the goal we bias
|
|
1707
|
-
* the policy to a local greedy choice to avoid aimless dithering.
|
|
1708
|
-
* - Uses pooled scratch (`#COORD_SCRATCH`) for temporary integer coords to
|
|
1709
|
-
* avoid creating short-lived boxed numbers in hot loops.
|
|
1710
|
-
*
|
|
1711
|
-
* Steps:
|
|
1712
|
-
* 1) Early-exit when run marked for termination.
|
|
1713
|
-
* 2) When within `#PROXIMITY_GREEDY_DISTANCE` evaluate each neighbour.
|
|
1714
|
-
* 3) Skip invalid moves and compute neighbor distance via `#distanceAt`.
|
|
1715
|
-
* 4) Keep the neighbour with the smallest distance and assign it into
|
|
1716
|
-
* `state.direction` if a better candidate is found.
|
|
1717
|
-
*
|
|
1718
|
-
* @param state - simulation state (modified in-place)
|
|
1719
|
-
* @param encodedMaze - read-only maze grid for move validity checks
|
|
1720
|
-
* @param distanceMap - optional precomputed distance map
|
|
1721
|
-
* @example
|
|
1722
|
-
* // inside the simulation loop
|
|
1723
|
-
* MazeMovement.#maybeApplyProximityGreedy(state, encodedMaze, distanceMap);
|
|
1724
|
-
*/
|
|
1725
|
-
static #maybeApplyProximityGreedy(
|
|
1726
|
-
state: SimulationState,
|
|
1727
|
-
encodedMaze: number[][],
|
|
1728
|
-
distanceMap?: number[][],
|
|
1729
|
-
) {
|
|
1730
|
-
// Step 1: guard
|
|
1731
|
-
if (state.earlyTerminate) return;
|
|
1732
|
-
|
|
1733
|
-
// Only apply greedy override when agent is sufficiently close to the exit
|
|
1734
|
-
if (state.distHere > MazeMovement.#PROXIMITY_GREEDY_DISTANCE) return;
|
|
1735
|
-
|
|
1736
|
-
// Step 2: evaluate neighbours and pick locally-minimal distance
|
|
1737
|
-
let chosenDirection = state.direction;
|
|
1738
|
-
let minimalNeighborDistance = Infinity;
|
|
1739
|
-
|
|
1740
|
-
// Local alias to pooled coord scratch to avoid boxed temporaries
|
|
1741
|
-
const coordScratch = MazeMovement.#COORD_SCRATCH;
|
|
1742
|
-
|
|
1743
|
-
for (
|
|
1744
|
-
let directionIndex = 0;
|
|
1745
|
-
directionIndex < MazeMovement.#ACTION_DIM;
|
|
1746
|
-
directionIndex++
|
|
1747
|
-
) {
|
|
1748
|
-
const [deltaX, deltaY] = MazeMovement.#DIRECTION_DELTAS[directionIndex];
|
|
1749
|
-
|
|
1750
|
-
// compute neighbour coordinates using integer arithmetic
|
|
1751
|
-
const neighbourX = (state.position[0] + deltaX) | 0;
|
|
1752
|
-
const neighbourY = (state.position[1] + deltaY) | 0;
|
|
1753
|
-
|
|
1754
|
-
// write into pooled scratch (documents intent and may help some engines)
|
|
1755
|
-
coordScratch[0] = neighbourX;
|
|
1756
|
-
coordScratch[1] = neighbourY;
|
|
1757
|
-
|
|
1758
|
-
// Step 3: skip invalid moves quickly
|
|
1759
|
-
if (!MazeMovement.isValidMove(encodedMaze, neighbourX, neighbourY))
|
|
1760
|
-
continue;
|
|
1761
|
-
|
|
1762
|
-
// Step 4: get the distance for this neighbour; prefer provided map when present
|
|
1763
|
-
const neighbourDistance = MazeMovement.#distanceAt(
|
|
1764
|
-
encodedMaze,
|
|
1765
|
-
[neighbourX, neighbourY],
|
|
1766
|
-
distanceMap,
|
|
1767
|
-
);
|
|
1768
|
-
|
|
1769
|
-
// Keep the best (smallest) neighbour distance
|
|
1770
|
-
if (neighbourDistance < minimalNeighborDistance) {
|
|
1771
|
-
minimalNeighborDistance = neighbourDistance;
|
|
1772
|
-
chosenDirection = directionIndex;
|
|
1773
|
-
}
|
|
1774
|
-
}
|
|
1775
|
-
|
|
1776
|
-
// Assign chosen direction back to state (preserves previous when none found)
|
|
1777
|
-
if (chosenDirection !== undefined && chosenDirection !== state.direction) {
|
|
1778
|
-
state.direction = chosenDirection;
|
|
1779
|
-
}
|
|
1780
|
-
}
|
|
1781
|
-
|
|
1782
|
-
/**
|
|
1783
|
-
* Epsilon-greedy exploration override.
|
|
1784
|
-
*
|
|
1785
|
-
* Behaviour:
|
|
1786
|
-
* - Occasionally (probability `epsilon`) choose a random neighbouring
|
|
1787
|
-
* valid action to encourage exploration. The helper prefers moves that are
|
|
1788
|
-
* not the immediate previous action to reduce trivial back-and-forth.
|
|
1789
|
-
* - Uses pooled scratch storage and cached locals to keep the hot loop
|
|
1790
|
-
* allocation-free and reduce property loads.
|
|
1791
|
-
*
|
|
1792
|
-
* Steps:
|
|
1793
|
-
* 1) Early-exit when the run is flagged `earlyTerminate`.
|
|
1794
|
-
* 2) Compute the adaptive epsilon via `#computeEpsilon`.
|
|
1795
|
-
* 3) With probability `epsilon` try up to `#ACTION_DIM` random candidate
|
|
1796
|
-
* directions, skipping the previous action.
|
|
1797
|
-
* 4) For each candidate, test move validity and accept the first valid one.
|
|
1798
|
-
*
|
|
1799
|
-
* @param state - simulation state (mutated in-place)
|
|
1800
|
-
* @param encodedMaze - read-only maze used for move validity checks
|
|
1801
|
-
* @example
|
|
1802
|
-
* MazeMovement.#maybeApplyEpsilonExploration(state, encodedMaze);
|
|
1803
|
-
*/
|
|
1804
|
-
static #maybeApplyEpsilonExploration(
|
|
1805
|
-
state: SimulationState,
|
|
1806
|
-
encodedMaze: number[][],
|
|
1807
|
-
) {
|
|
1808
|
-
// Step 1: guard
|
|
1809
|
-
if (state.earlyTerminate) return;
|
|
1810
|
-
|
|
1811
|
-
// Step 2: adaptive epsilon (small, often zero)
|
|
1812
|
-
const epsilon = MazeMovement.#computeEpsilon(
|
|
1813
|
-
state.steps,
|
|
1814
|
-
state.stepsSinceImprovement,
|
|
1815
|
-
state.distHere,
|
|
1816
|
-
MazeMovement.#StateSaturations,
|
|
1817
|
-
);
|
|
1818
|
-
|
|
1819
|
-
// Fast-path: only run the random trials when exploration is triggered
|
|
1820
|
-
if (!(MazeMovement.#rand() < epsilon)) return;
|
|
1821
|
-
|
|
1822
|
-
// Cache locals for fewer property loads in the hot loop
|
|
1823
|
-
const actionCount = MazeMovement.#ACTION_DIM;
|
|
1824
|
-
const currentPrevAction = state.prevAction;
|
|
1825
|
-
const currentPosX = state.position[0] | 0;
|
|
1826
|
-
const currentPosY = state.position[1] | 0;
|
|
1827
|
-
const coordScratch = MazeMovement.#COORD_SCRATCH;
|
|
1828
|
-
|
|
1829
|
-
// Step 3: attempt up to `actionCount` random candidate directions
|
|
1830
|
-
for (let attempt = 0; attempt < actionCount; attempt++) {
|
|
1831
|
-
// integer random selection without temporary arrays
|
|
1832
|
-
const randomDirection = (MazeMovement.#rand() * actionCount) | 0;
|
|
1833
|
-
if (randomDirection === currentPrevAction) continue; // prefer change
|
|
1834
|
-
|
|
1835
|
-
const [directionDeltaX, directionDeltaY] =
|
|
1836
|
-
MazeMovement.#DIRECTION_DELTAS[randomDirection];
|
|
1837
|
-
|
|
1838
|
-
// compute candidate target coordinates (coerced to 32-bit ints)
|
|
1839
|
-
const candidateX = (currentPosX + directionDeltaX) | 0;
|
|
1840
|
-
const candidateY = (currentPosY + directionDeltaY) | 0;
|
|
1841
|
-
|
|
1842
|
-
// write into pooled scratch (no functional dependency but documents intent)
|
|
1843
|
-
coordScratch[0] = candidateX;
|
|
1844
|
-
coordScratch[1] = candidateY;
|
|
1845
|
-
|
|
1846
|
-
// Step 4: accept the first valid move
|
|
1847
|
-
if (MazeMovement.isValidMove(encodedMaze, candidateX, candidateY)) {
|
|
1848
|
-
state.direction = randomDirection;
|
|
1849
|
-
break;
|
|
1850
|
-
}
|
|
1851
|
-
}
|
|
1852
|
-
}
|
|
1853
|
-
|
|
1854
|
-
/**
|
|
1855
|
-
* Force exploration when the agent has been unable to move for a while.
|
|
1856
|
-
*
|
|
1857
|
-
* Behaviour & rationale:
|
|
1858
|
-
* - Tracks a streak of `#NO_MOVE` decisions and when the configured
|
|
1859
|
-
* threshold is reached chooses a random valid neighbour to escape
|
|
1860
|
-
* potential deadlocks.
|
|
1861
|
-
* - Uses pooled scratch (`#COORD_SCRATCH`) and cached locals to reduce
|
|
1862
|
-
* allocations and repeated property lookups in the hot loop.
|
|
1863
|
-
*
|
|
1864
|
-
* Steps:
|
|
1865
|
-
* 1) Early-exit when the run is already marked for termination.
|
|
1866
|
-
* 2) Maintain the global no-move streak counter (`#StateNoMoveStreak`).
|
|
1867
|
-
* 3) When the threshold is exceeded, try up to `#ACTION_DIM` random
|
|
1868
|
-
* candidate directions and pick the first valid neighbour.
|
|
1869
|
-
* 4) Reset the no-move streak counter after forcing exploration.
|
|
1870
|
-
*
|
|
1871
|
-
* @param state - simulation state (mutated in-place)
|
|
1872
|
-
* @param encodedMaze - read-only maze used for move validity tests
|
|
1873
|
-
* @example
|
|
1874
|
-
* // inside simulation loop to recover from stuck states
|
|
1875
|
-
* MazeMovement.#maybeForceExploration(state, encodedMaze);
|
|
1876
|
-
*/
|
|
1877
|
-
static #maybeForceExploration(
|
|
1878
|
-
state: SimulationState,
|
|
1879
|
-
encodedMaze: number[][],
|
|
1880
|
-
) {
|
|
1881
|
-
// Step 1: guard
|
|
1882
|
-
if (state.earlyTerminate) return;
|
|
1883
|
-
|
|
1884
|
-
// Step 2: update the rolling no-move streak counter
|
|
1885
|
-
if (state.direction === MazeMovement.#NO_MOVE) {
|
|
1886
|
-
MazeMovement.#StateNoMoveStreak++;
|
|
1887
|
-
} else {
|
|
1888
|
-
MazeMovement.#StateNoMoveStreak = 0;
|
|
1889
|
-
}
|
|
1890
|
-
|
|
1891
|
-
// Only trigger forced exploration when the configured threshold is reached
|
|
1892
|
-
if (
|
|
1893
|
-
MazeMovement.#StateNoMoveStreak < MazeMovement.#NO_MOVE_STREAK_THRESHOLD
|
|
1894
|
-
)
|
|
1895
|
-
return;
|
|
1896
|
-
|
|
1897
|
-
// Cache locals for speed in the hot loop
|
|
1898
|
-
const actionCount = MazeMovement.#ACTION_DIM;
|
|
1899
|
-
const currentPosX = state.position[0] | 0;
|
|
1900
|
-
const currentPosY = state.position[1] | 0;
|
|
1901
|
-
const coordScratch = MazeMovement.#COORD_SCRATCH;
|
|
1902
|
-
|
|
1903
|
-
// Step 3: try up to `actionCount` random candidate directions
|
|
1904
|
-
for (let attemptIndex = 0; attemptIndex < actionCount; attemptIndex++) {
|
|
1905
|
-
// integer random selection (faster than Math.floor in tight loops)
|
|
1906
|
-
const candidateDirection = (MazeMovement.#rand() * actionCount) | 0;
|
|
1907
|
-
const [deltaX, deltaY] =
|
|
1908
|
-
MazeMovement.#DIRECTION_DELTAS[candidateDirection];
|
|
1909
|
-
|
|
1910
|
-
// compute candidate coordinates
|
|
1911
|
-
const candidateX = (currentPosX + deltaX) | 0;
|
|
1912
|
-
const candidateY = (currentPosY + deltaY) | 0;
|
|
1913
|
-
coordScratch[0] = candidateX;
|
|
1914
|
-
coordScratch[1] = candidateY;
|
|
1915
|
-
|
|
1916
|
-
if (MazeMovement.isValidMove(encodedMaze, candidateX, candidateY)) {
|
|
1917
|
-
state.direction = candidateDirection;
|
|
1918
|
-
break;
|
|
1919
|
-
}
|
|
1920
|
-
}
|
|
1921
|
-
|
|
1922
|
-
// Step 4: reset the global no-move streak counter after forcing exploration
|
|
1923
|
-
MazeMovement.#StateNoMoveStreak = 0;
|
|
1924
|
-
}
|
|
1925
|
-
|
|
1926
|
-
/**
|
|
1927
|
-
* Execute the currently selected move (if valid) and update all
|
|
1928
|
-
* progress/exploration rewards and local penalties.
|
|
1929
|
-
*
|
|
1930
|
-
* Behavioural contract:
|
|
1931
|
-
* - Reads `state.direction` and attempts to move the agent by the
|
|
1932
|
-
* matching delta from `#DIRECTION_DELTAS` when the action is valid.
|
|
1933
|
-
* - Updates `state.prevDistance`, `state.moved`, `state.pathLength`,
|
|
1934
|
-
* `state.minDistanceToExit` and the various reward/penalty fields.
|
|
1935
|
-
* - Reuses pooled buffers (e.g. `#COORD_SCRATCH`, `#PathX`, `#PathY`) to
|
|
1936
|
-
* avoid per-step allocations and keep the hot path allocation-free.
|
|
1937
|
-
*
|
|
1938
|
-
* Steps (step-level comments are present in the implementation):
|
|
1939
|
-
* 1) Early-exit if the run is already marked for termination.
|
|
1940
|
-
* 2) Record the pre-move distance into `state.prevDistance`.
|
|
1941
|
-
* 3) Compute the candidate target coordinates using pooled scratch.
|
|
1942
|
-
* 4) If the candidate cell is valid, update `state.position` and mark
|
|
1943
|
-
* `state.moved = true`.
|
|
1944
|
-
* 5) When moved: append to the pooled path buffers, update local-area
|
|
1945
|
-
* penalties, compute distance delta and apply progress/exploration
|
|
1946
|
-
* shaping.
|
|
1947
|
-
* 6) When not moved: apply a mild invalid-move penalty.
|
|
1948
|
-
* 7) Apply the global distance-improvement bonus (separate helper).
|
|
1949
|
-
*
|
|
1950
|
-
* @param state - simulation state (mutated in-place)
|
|
1951
|
-
* @param encodedMaze - read-only 2D maze array
|
|
1952
|
-
* @param distanceMap - optional precomputed distance map aligned to maze
|
|
1953
|
-
* @example
|
|
1954
|
-
* // Typical usage inside the simulation loop
|
|
1955
|
-
* MazeMovement.#executeMoveAndRewards(state, encodedMaze, distanceMap);
|
|
1956
|
-
*/
|
|
1957
|
-
static #executeMoveAndRewards(
|
|
1958
|
-
state: SimulationState,
|
|
1959
|
-
encodedMaze: number[][],
|
|
1960
|
-
distanceMap?: number[][],
|
|
1961
|
-
) {
|
|
1962
|
-
// Step 1: early-exit when run already slated for termination
|
|
1963
|
-
if (state.earlyTerminate) return;
|
|
1964
|
-
|
|
1965
|
-
// Step 2: capture pre-move distance for shaping calculations
|
|
1966
|
-
const previousDistance = MazeMovement.#distanceAt(
|
|
1967
|
-
encodedMaze,
|
|
1968
|
-
state.position,
|
|
1969
|
-
distanceMap,
|
|
1970
|
-
);
|
|
1971
|
-
state.prevDistance = previousDistance;
|
|
1972
|
-
|
|
1973
|
-
// Step 3: attempt to move using pooled direction deltas and coord scratch
|
|
1974
|
-
state.moved = false;
|
|
1975
|
-
const chosenAction = state.direction;
|
|
1976
|
-
if (chosenAction >= 0 && chosenAction < MazeMovement.#ACTION_DIM) {
|
|
1977
|
-
const [deltaX, deltaY] = MazeMovement.#DIRECTION_DELTAS[chosenAction];
|
|
1978
|
-
|
|
1979
|
-
// Compute candidate coordinates (coerce to 32-bit ints) and reuse scratch
|
|
1980
|
-
const candidateX = (state.position[0] + deltaX) | 0;
|
|
1981
|
-
const candidateY = (state.position[1] + deltaY) | 0;
|
|
1982
|
-
const coordScratch = MazeMovement.#COORD_SCRATCH;
|
|
1983
|
-
coordScratch[0] = candidateX;
|
|
1984
|
-
coordScratch[1] = candidateY;
|
|
1985
|
-
|
|
1986
|
-
// Validate the target cell and commit the move if valid
|
|
1987
|
-
if (MazeMovement.isValidMove(encodedMaze, candidateX, candidateY)) {
|
|
1988
|
-
state.position[0] = candidateX;
|
|
1989
|
-
state.position[1] = candidateY;
|
|
1990
|
-
state.moved = true;
|
|
1991
|
-
}
|
|
1992
|
-
}
|
|
1993
|
-
|
|
1994
|
-
// Step 4: bookkeeping and reward/penalty updates
|
|
1995
|
-
const rewardScale = MazeMovement.#REWARD_SCALE;
|
|
1996
|
-
const pooledPathX = MazeMovement.#PathX!;
|
|
1997
|
-
const pooledPathY = MazeMovement.#PathY!;
|
|
1998
|
-
|
|
1999
|
-
if (state.moved) {
|
|
2000
|
-
// Append the new position into the pooled path buffers
|
|
2001
|
-
const writeIndex = state.pathLength | 0;
|
|
2002
|
-
pooledPathX[writeIndex] = state.position[0];
|
|
2003
|
-
pooledPathY[writeIndex] = state.position[1];
|
|
2004
|
-
state.pathLength = writeIndex + 1;
|
|
2005
|
-
|
|
2006
|
-
// Track recent local positions using the utility pushHistory (mutates in-place)
|
|
2007
|
-
MazeUtils.pushHistory(
|
|
2008
|
-
state.recentPositions,
|
|
2009
|
-
[state.position[0], state.position[1]] as [number, number],
|
|
2010
|
-
MazeMovement.#LOCAL_WINDOW,
|
|
2011
|
-
);
|
|
2012
|
-
|
|
2013
|
-
// Local-area stagnation penalty application (may mutate state)
|
|
2014
|
-
MazeMovement.#maybeApplyLocalAreaPenalty(state, rewardScale);
|
|
2015
|
-
|
|
2016
|
-
// Resolve the post-move distance using precomputed map when available
|
|
2017
|
-
const currentDistance = state.hasDistanceMap
|
|
2018
|
-
? (state.distanceMap?.[state.position[1]]?.[state.position[0]] ??
|
|
2019
|
-
Infinity)
|
|
2020
|
-
: MazeMovement.#distanceAt(
|
|
2021
|
-
encodedMaze,
|
|
2022
|
-
state.position,
|
|
2023
|
-
state.distanceMap,
|
|
2024
|
-
);
|
|
2025
|
-
|
|
2026
|
-
// Compute improvement/worsening and apply progress shaping
|
|
2027
|
-
const distanceDelta = previousDistance - currentDistance; // positive -> improvement
|
|
2028
|
-
const improved = distanceDelta > 0;
|
|
2029
|
-
const worsened = !improved && currentDistance > previousDistance;
|
|
2030
|
-
MazeMovement.#applyProgressShaping(
|
|
2031
|
-
state,
|
|
2032
|
-
distanceDelta,
|
|
2033
|
-
improved,
|
|
2034
|
-
worsened,
|
|
2035
|
-
rewardScale,
|
|
2036
|
-
);
|
|
2037
|
-
|
|
2038
|
-
// Exploration and revisit adjustments for the just-visited cell
|
|
2039
|
-
MazeMovement.#applyExplorationVisitAdjustment(state, rewardScale);
|
|
2040
|
-
|
|
2041
|
-
// Update direction statistics & best-seen distance
|
|
2042
|
-
if (state.direction >= 0) state.directionCounts[state.direction]++;
|
|
2043
|
-
state.minDistanceToExit = Math.min(
|
|
2044
|
-
state.minDistanceToExit,
|
|
2045
|
-
currentDistance,
|
|
2046
|
-
);
|
|
2047
|
-
} else {
|
|
2048
|
-
// Mild invalid-move penalty when the agent attempted an invalid move
|
|
2049
|
-
state.invalidMovePenalty -=
|
|
2050
|
-
MazeMovement.#INVALID_MOVE_PENALTY_MILD * rewardScale;
|
|
2051
|
-
}
|
|
2052
|
-
|
|
2053
|
-
// Step 5: apply global distance-improvement bonus (may mutate state)
|
|
2054
|
-
MazeMovement.#applyGlobalDistanceImprovementBonus(
|
|
2055
|
-
state,
|
|
2056
|
-
encodedMaze,
|
|
2057
|
-
rewardScale,
|
|
2058
|
-
);
|
|
2059
|
-
|
|
2060
|
-
// Note: repetition/backtrack penalties and prevAction update are applied
|
|
2061
|
-
// later in the post-action penalties stage (#applyPostActionPenalties).
|
|
2062
|
-
}
|
|
2063
|
-
|
|
2064
|
-
/**
|
|
2065
|
-
* Finalize per-step penalties after an action has been executed.
|
|
2066
|
-
*
|
|
2067
|
-
* Responsibilities:
|
|
2068
|
-
* - Apply repetition and backtrack penalties that depend on previous action
|
|
2069
|
-
* and stagnation counters.
|
|
2070
|
-
* - Update the `prevAction` when a movement occurred.
|
|
2071
|
-
* - Apply entropy-based guidance shaping and periodic saturation penalties.
|
|
2072
|
-
* - Aggregate earlier-computed local penalties (loop/memory/revisit) into
|
|
2073
|
-
* the run's `invalidMovePenalty` accumulator.
|
|
2074
|
-
*
|
|
2075
|
-
* Implementation notes:
|
|
2076
|
-
* - Uses pooled scratch storage (`#COORD_SCRATCH`) for a tiny, allocation-free
|
|
2077
|
-
* temporary accumulator. The scratch is short-lived and reused across hot
|
|
2078
|
-
* paths to minimise GC pressure.
|
|
2079
|
-
* - Variable names are intentionally descriptive to aid readability in hot
|
|
2080
|
-
* loops and profiling traces.
|
|
2081
|
-
*
|
|
2082
|
-
* @param state - simulation state object mutated in-place
|
|
2083
|
-
* @returns void
|
|
2084
|
-
* @example
|
|
2085
|
-
* // call after moving/deciding action to finalize penalties for the step
|
|
2086
|
-
* MazeMovement.#applyPostActionPenalties(state);
|
|
2087
|
-
*/
|
|
2088
|
-
static #applyPostActionPenalties(state: SimulationState) {
|
|
2089
|
-
// Step 1: fast-path guard — do nothing when run already flagged for termination
|
|
2090
|
-
if (state.earlyTerminate) return;
|
|
2091
|
-
|
|
2092
|
-
// Local alias for the global reward/scale constant used by lower-level helpers
|
|
2093
|
-
const scale = MazeMovement.#REWARD_SCALE;
|
|
2094
|
-
|
|
2095
|
-
// Step 2: apply repetition & backtrack penalties (may mutate state.invalidMovePenalty)
|
|
2096
|
-
MazeMovement.#applyRepetitionAndBacktrackPenalties(state, scale);
|
|
2097
|
-
|
|
2098
|
-
// Step 3: update prevAction only when a movement actually happened
|
|
2099
|
-
if (state.moved) state.prevAction = state.direction;
|
|
2100
|
-
|
|
2101
|
-
// Step 4: entropy-guidance shaping adjusts bonuses/penalties based on
|
|
2102
|
-
// the network's confidence and available perceptual cues
|
|
2103
|
-
MazeMovement.#applyEntropyGuidanceShaping(state, scale);
|
|
2104
|
-
|
|
2105
|
-
// Step 5: periodic saturation escalation penalties
|
|
2106
|
-
MazeMovement.#applySaturationPenaltyCycle(state, scale);
|
|
2107
|
-
|
|
2108
|
-
// Step 6: aggregate small, earlier-computed local penalties (loop/memory/revisit)
|
|
2109
|
-
// Use a tiny pooled scratch to avoid creating a transient Number object.
|
|
2110
|
-
const coordScratch = MazeMovement.#COORD_SCRATCH;
|
|
2111
|
-
// store aggregated penalty temporarily in scratch[0]
|
|
2112
|
-
coordScratch[0] =
|
|
2113
|
-
(state.loopPenalty || 0) +
|
|
2114
|
-
(state.memoryPenalty || 0) +
|
|
2115
|
-
(state.revisitPenalty || 0);
|
|
2116
|
-
// fold aggregated penalty into the global invalid-move accumulator
|
|
2117
|
-
state.invalidMovePenalty += coordScratch[0];
|
|
2118
|
-
}
|
|
2119
|
-
|
|
2120
|
-
/**
|
|
2121
|
-
* Apply a local-area stagnation penalty when the agent is oscillating
|
|
2122
|
-
* within a small window without making progress.
|
|
2123
|
-
*
|
|
2124
|
-
* Behaviour:
|
|
2125
|
-
* - Examines the fixed-size `state.recentPositions` window and computes
|
|
2126
|
-
* the bounding box (min/max X and Y). If the bounding box span is
|
|
2127
|
-
* small and the run has been stagnant for configured steps, apply a
|
|
2128
|
-
* local-area penalty to discourage dithering.
|
|
2129
|
-
* - Uses an existing pooled scratch (`#COORD_SCRATCH`) as a tiny,
|
|
2130
|
-
* allocation-free temporary to reduce GC pressure in hot loops.
|
|
2131
|
-
*
|
|
2132
|
-
* Steps:
|
|
2133
|
-
* 1) Fast-path: ensure we have the full `#LOCAL_WINDOW` of recent positions.
|
|
2134
|
-
* 2) Iterate the recent positions to compute min/max X/Y using integer
|
|
2135
|
-
* arithmetic for speed.
|
|
2136
|
-
* 3) Compute a simple span metric and apply the penalty when thresholds
|
|
2137
|
-
* are exceeded.
|
|
2138
|
-
*
|
|
2139
|
-
* @param state - simulation state mutated in-place
|
|
2140
|
-
* @param rewardScale - global reward scale applied to penalty magnitudes
|
|
2141
|
-
* @example
|
|
2142
|
-
* // called after moving to decide if a local-area penalty is warranted
|
|
2143
|
-
* MazeMovement.#maybeApplyLocalAreaPenalty(state, MazeMovement.#REWARD_SCALE);
|
|
2144
|
-
*/
|
|
2145
|
-
static #maybeApplyLocalAreaPenalty(
|
|
2146
|
-
state: SimulationState,
|
|
2147
|
-
rewardScale: number,
|
|
2148
|
-
) {
|
|
2149
|
-
// Step 1: require the full local history window to compute meaningful span
|
|
2150
|
-
const recentWindow = state.recentPositions;
|
|
2151
|
-
if (recentWindow.length !== MazeMovement.#LOCAL_WINDOW) return;
|
|
2152
|
-
|
|
2153
|
-
// Step 2: compute bounding box using integer-coerced coordinates
|
|
2154
|
-
let minX = Number.POSITIVE_INFINITY;
|
|
2155
|
-
let maxX = Number.NEGATIVE_INFINITY;
|
|
2156
|
-
let minY = Number.POSITIVE_INFINITY;
|
|
2157
|
-
let maxY = Number.NEGATIVE_INFINITY;
|
|
2158
|
-
|
|
2159
|
-
// Use a simple index loop for faster iteration in some engines
|
|
2160
|
-
for (let idx = 0, len = recentWindow.length; idx < len; idx++) {
|
|
2161
|
-
const pair = recentWindow[idx];
|
|
2162
|
-
const rx = pair[0] | 0;
|
|
2163
|
-
const ry = pair[1] | 0;
|
|
2164
|
-
if (rx < minX) minX = rx;
|
|
2165
|
-
if (rx > maxX) maxX = rx;
|
|
2166
|
-
if (ry < minY) minY = ry;
|
|
2167
|
-
if (ry > maxY) maxY = ry;
|
|
2168
|
-
}
|
|
2169
|
-
|
|
2170
|
-
// Small allocation-free write into pooled scratch to keep values live in a
|
|
2171
|
-
// typed-array for consumers or debuggers that prefer seeing typed storage.
|
|
2172
|
-
const coordScratch = MazeMovement.#COORD_SCRATCH;
|
|
2173
|
-
coordScratch[0] = minX;
|
|
2174
|
-
coordScratch[1] = minY;
|
|
2175
|
-
|
|
2176
|
-
// Step 3: compute span metric and apply penalty if agent is stuck locally
|
|
2177
|
-
const span = maxX - minX + (maxY - minY);
|
|
2178
|
-
if (
|
|
2179
|
-
span <= MazeMovement.#LOCAL_AREA_SPAN_THRESHOLD &&
|
|
2180
|
-
state.stepsSinceImprovement > MazeMovement.#LOCAL_AREA_STAGNATION_STEPS
|
|
2181
|
-
) {
|
|
2182
|
-
state.localAreaPenalty -=
|
|
2183
|
-
MazeMovement.#LOCAL_AREA_PENALTY_AMOUNT * rewardScale;
|
|
2184
|
-
}
|
|
2185
|
-
}
|
|
2186
|
-
|
|
2187
|
-
/**
|
|
2188
|
-
* Apply shaping rewards/penalties based on the change in distance-to-goal.
|
|
2189
|
-
*
|
|
2190
|
-
* Behaviour:
|
|
2191
|
-
* - When the agent improved (distance decreased) grant progressive rewards
|
|
2192
|
-
* scaled by confidence and stagnation duration.
|
|
2193
|
-
* - When the agent worsened (distance increased) apply a penalty scaled by
|
|
2194
|
-
* confidence.
|
|
2195
|
-
* - When there is no change, increment the stagnation counter.
|
|
2196
|
-
*
|
|
2197
|
-
* Steps:
|
|
2198
|
-
* 1) Read confidence from `state.actionStats.maxProb` with a sensible default.
|
|
2199
|
-
* 2) When improved: apply step-based bonus, a base progress reward and a
|
|
2200
|
-
* distance-delta contribution that is confidence-weighted.
|
|
2201
|
-
* 3) When worsened: apply an away penalty and increment the stagnation counter.
|
|
2202
|
-
* 4) When unchanged: increment `stepsSinceImprovement`.
|
|
2203
|
-
*
|
|
2204
|
-
* @param state - simulation state (mutated in-place)
|
|
2205
|
-
* @param distanceDelta - positive when the agent moved closer to goal
|
|
2206
|
-
* @param improved - boolean indicating whether distanceDelta > 0
|
|
2207
|
-
* @param worsened - boolean indicating whether distance increased
|
|
2208
|
-
* @param rewardScale - global reward scaling constant
|
|
2209
|
-
* @example
|
|
2210
|
-
* MazeMovement.#applyProgressShaping(state, prevDist - currDist, improved, worsened, MazeMovement.#REWARD_SCALE);
|
|
2211
|
-
*/
|
|
2212
|
-
static #applyProgressShaping(
|
|
2213
|
-
state: SimulationState,
|
|
2214
|
-
distanceDelta: number,
|
|
2215
|
-
improved: boolean,
|
|
2216
|
-
worsened: boolean,
|
|
2217
|
-
rewardScale: number,
|
|
2218
|
-
) {
|
|
2219
|
-
// Step 1: derive confidence from last action statistics (fallbacks chosen
|
|
2220
|
-
// to preserve previous semantics used by the original implementation).
|
|
2221
|
-
const currentConfidence =
|
|
2222
|
-
state.actionStats?.maxProb ?? (improved ? 1 : 0.5);
|
|
2223
|
-
|
|
2224
|
-
if (improved) {
|
|
2225
|
-
// Step 2.a: compute the base progress reward influenced by confidence
|
|
2226
|
-
const confidenceScaledBase =
|
|
2227
|
-
(MazeMovement.#PROGRESS_REWARD_BASE +
|
|
2228
|
-
MazeMovement.#PROGRESS_REWARD_CONF_SCALE * currentConfidence) *
|
|
2229
|
-
rewardScale;
|
|
2230
|
-
|
|
2231
|
-
// Step 2.b: grant an additional warmup bonus proportional to how long
|
|
2232
|
-
// the agent has been without improvement (clamped by a configured max)
|
|
2233
|
-
if (state.stepsSinceImprovement > 0) {
|
|
2234
|
-
const stepBonus = Math.min(
|
|
2235
|
-
state.stepsSinceImprovement *
|
|
2236
|
-
MazeMovement.#PROGRESS_STEPS_MULT *
|
|
2237
|
-
rewardScale,
|
|
2238
|
-
MazeMovement.#PROGRESS_STEPS_MAX * rewardScale,
|
|
2239
|
-
);
|
|
2240
|
-
state.progressReward += stepBonus;
|
|
2241
|
-
}
|
|
2242
|
-
|
|
2243
|
-
// Apply the primary base progress reward and reset stagnation counter
|
|
2244
|
-
state.progressReward += confidenceScaledBase;
|
|
2245
|
-
state.stepsSinceImprovement = 0;
|
|
2246
|
-
|
|
2247
|
-
// Step 2.c: distance-delta contribution scaled by confidence factors
|
|
2248
|
-
const distanceContribution =
|
|
2249
|
-
distanceDelta *
|
|
2250
|
-
MazeMovement.#DISTANCE_DELTA_SCALE *
|
|
2251
|
-
(MazeMovement.#DISTANCE_DELTA_CONF_BASE +
|
|
2252
|
-
MazeMovement.#DISTANCE_DELTA_CONF_SCALE * currentConfidence);
|
|
2253
|
-
state.progressReward += distanceContribution;
|
|
2254
|
-
} else if (worsened) {
|
|
2255
|
-
// Step 3: moving away from goal -> apply a penalty influenced by confidence
|
|
2256
|
-
const awayPenalty =
|
|
2257
|
-
(MazeMovement.#PROGRESS_AWAY_BASE_PENALTY +
|
|
2258
|
-
MazeMovement.#PROGRESS_AWAY_CONF_SCALE * currentConfidence) *
|
|
2259
|
-
rewardScale;
|
|
2260
|
-
state.progressReward -= awayPenalty;
|
|
2261
|
-
state.stepsSinceImprovement++;
|
|
2262
|
-
} else {
|
|
2263
|
-
// Step 4: no distance change -> increment stagnation counter
|
|
2264
|
-
state.stepsSinceImprovement++;
|
|
2265
|
-
}
|
|
2266
|
-
}
|
|
2267
|
-
|
|
2268
|
-
/**
|
|
2269
|
-
* Apply exploration bonuses or revisit penalties for the cell that was
|
|
2270
|
-
* just visited.
|
|
2271
|
-
*
|
|
2272
|
-
* Behaviour:
|
|
2273
|
-
* - If a cell was visited for the first time in the run, award a
|
|
2274
|
-
* `NEW_CELL_EXPLORATION_BONUS` scaled by `rewardScale`.
|
|
2275
|
-
* - If the cell has been visited before, apply a revisit penalty to
|
|
2276
|
-
* discourage repetitive revisits to the same tile.
|
|
2277
|
-
*
|
|
2278
|
-
* Steps:
|
|
2279
|
-
* 1) Read the visit count for the current cell from `state.visitsAtCurrent`.
|
|
2280
|
-
* 2) Compute the adjustment (bonus or penalty) using the configured
|
|
2281
|
-
* constants and `rewardScale`.
|
|
2282
|
-
* 3) Apply the adjustment to `state.newCellExplorationBonus` using a
|
|
2283
|
-
* tiny pooled scratch (`#COORD_SCRATCH`) to avoid creating a transient
|
|
2284
|
-
* Number wrapper on hot paths.
|
|
2285
|
-
*
|
|
2286
|
-
* @param state - simulation state mutated in-place
|
|
2287
|
-
* @param rewardScale - global reward scaling constant used to scale magnitudes
|
|
2288
|
-
* @example
|
|
2289
|
-
* MazeMovement.#applyExplorationVisitAdjustment(state, MazeMovement.#REWARD_SCALE);
|
|
2290
|
-
*/
|
|
2291
|
-
static #applyExplorationVisitAdjustment(
|
|
2292
|
-
state: SimulationState,
|
|
2293
|
-
rewardScale: number,
|
|
2294
|
-
) {
|
|
2295
|
-
// Step 1: cache the visit count as a 32-bit integer for consistent semantics
|
|
2296
|
-
const visitsAtThisCell = state.visitsAtCurrent | 0;
|
|
2297
|
-
|
|
2298
|
-
// Step 2: compute adjustment amount using named constants for clarity
|
|
2299
|
-
const positiveBonus =
|
|
2300
|
-
MazeMovement.#NEW_CELL_EXPLORATION_BONUS * rewardScale;
|
|
2301
|
-
const revisitPenalty = MazeMovement.#REVISIT_PENALTY_STRONG * rewardScale;
|
|
2302
|
-
|
|
2303
|
-
// Step 3: use pooled scratch to hold the computed adjustment (allocation-free)
|
|
2304
|
-
const scratch = MazeMovement.#COORD_SCRATCH;
|
|
2305
|
-
scratch[0] = visitsAtThisCell === 1 ? positiveBonus : -revisitPenalty;
|
|
2306
|
-
|
|
2307
|
-
// Apply the adjustment to the state's exploration bonus accumulator
|
|
2308
|
-
state.newCellExplorationBonus += scratch[0];
|
|
2309
|
-
}
|
|
2310
|
-
|
|
2311
|
-
/**
|
|
2312
|
-
* Global distance-improvement bonus.
|
|
2313
|
-
*
|
|
2314
|
-
* Purpose:
|
|
2315
|
-
* - When the run breaks a long stagnation by improving the global
|
|
2316
|
-
* distance-to-exit, grant a capped, step-scaled bonus to
|
|
2317
|
-
* `state.progressReward` to encourage escapes from local minima.
|
|
2318
|
-
*
|
|
2319
|
-
* Behaviour / steps (inlined and commented):
|
|
2320
|
-
* 1) Resolve the current global distance-to-exit (prefer precomputed map).
|
|
2321
|
-
* 2) If the current global distance strictly improved over the last
|
|
2322
|
-
* recorded global distance, compute a scaled bonus based on how many
|
|
2323
|
-
* steps the agent had been without improvement and apply it (capped).
|
|
2324
|
-
* 3) Reset the run's `stepsSinceImprovement` when an improvement occurs.
|
|
2325
|
-
* 4) Store the current distance as `lastDistanceGlobal` for the next step.
|
|
2326
|
-
*
|
|
2327
|
-
* Notes:
|
|
2328
|
-
* - Uses the pooled `#COORD_SCRATCH` buffer for a tiny, allocation-free
|
|
2329
|
-
* temporary storage to reduce GC pressure in hot loops.
|
|
2330
|
-
* - Local variable names are intentionally descriptive for readability.
|
|
2331
|
-
*
|
|
2332
|
-
* @param state - Mutable simulation state for the current run.
|
|
2333
|
-
* @param encodedMaze - Readonly maze grid (rows of numeric columns).
|
|
2334
|
-
* @param rewardScale - Global scalar applied to reward magnitudes.
|
|
2335
|
-
* @example
|
|
2336
|
-
* // Called from the move-execution path to potentially reward breaking
|
|
2337
|
-
* // prolonged stagnation when the agent finally decreases its global
|
|
2338
|
-
* // distance-to-exit.
|
|
2339
|
-
* MazeMovement.#applyGlobalDistanceImprovementBonus(state, maze, 1.0);
|
|
2340
|
-
*/
|
|
2341
|
-
static #applyGlobalDistanceImprovementBonus(
|
|
2342
|
-
state: SimulationState,
|
|
2343
|
-
encodedMaze: number[][],
|
|
2344
|
-
rewardScale: number,
|
|
2345
|
-
) {
|
|
2346
|
-
// Step 1: fast-path locals & pooled scratch to minimise property loads
|
|
2347
|
-
const coordScratch = MazeMovement.#COORD_SCRATCH;
|
|
2348
|
-
|
|
2349
|
-
// Resolve current global distance; prefer precomputed distance map when present.
|
|
2350
|
-
const posX = state.position[0] | 0;
|
|
2351
|
-
const posY = state.position[1] | 0;
|
|
2352
|
-
const currentGlobalDistance = state.hasDistanceMap
|
|
2353
|
-
? (state.distanceMap?.[posY]?.[posX] ?? Infinity)
|
|
2354
|
-
: MazeMovement.#distanceAt(
|
|
2355
|
-
encodedMaze,
|
|
2356
|
-
state.position,
|
|
2357
|
-
state.distanceMap,
|
|
2358
|
-
);
|
|
2359
|
-
|
|
2360
|
-
// Store into pooled scratch[0] (keeps a typed-slot live for debugging/inspect).
|
|
2361
|
-
coordScratch[0] = currentGlobalDistance as number;
|
|
2362
|
-
|
|
2363
|
-
// Step 2: compare against the previously-seen global distance
|
|
2364
|
-
const previousGlobalDistance = state.lastDistanceGlobal ?? Infinity;
|
|
2365
|
-
if (currentGlobalDistance < previousGlobalDistance) {
|
|
2366
|
-
// Improvement detected: compute an improvement bonus when the run
|
|
2367
|
-
// had been stagnant for more than the configured threshold.
|
|
2368
|
-
const stagnationSteps = (state.stepsSinceImprovement | 0) as number;
|
|
2369
|
-
if (stagnationSteps > MazeMovement.#GLOBAL_BREAK_BONUS_START) {
|
|
2370
|
-
const bonusSteps =
|
|
2371
|
-
stagnationSteps - MazeMovement.#GLOBAL_BREAK_BONUS_START;
|
|
2372
|
-
const uncappedBonus =
|
|
2373
|
-
bonusSteps * MazeMovement.#GLOBAL_BREAK_BONUS_PER_STEP * rewardScale;
|
|
2374
|
-
const cappedBonus = Math.min(
|
|
2375
|
-
uncappedBonus,
|
|
2376
|
-
MazeMovement.#GLOBAL_BREAK_BONUS_CAP * rewardScale,
|
|
2377
|
-
);
|
|
2378
|
-
// Apply the computed bonus to the progress reward accumulator.
|
|
2379
|
-
state.progressReward += cappedBonus;
|
|
2380
|
-
}
|
|
2381
|
-
|
|
2382
|
-
// Step 3: reset stagnation counter because we just improved globally.
|
|
2383
|
-
state.stepsSinceImprovement = 0;
|
|
2384
|
-
}
|
|
2385
|
-
|
|
2386
|
-
// Step 4: persist the current distance for the next comparison step.
|
|
2387
|
-
state.lastDistanceGlobal = currentGlobalDistance;
|
|
2388
|
-
}
|
|
2389
|
-
|
|
2390
|
-
/**
|
|
2391
|
-
* Apply repetition and backtrack penalties.
|
|
2392
|
-
*
|
|
2393
|
-
* Purpose:
|
|
2394
|
-
* - Penalise repeated identical actions when the agent has been stagnant
|
|
2395
|
-
* for longer than the configured repetition threshold.
|
|
2396
|
-
* - Penalise immediate backtrack moves (opposite of the previous action)
|
|
2397
|
-
* when the agent is not currently improving.
|
|
2398
|
-
*
|
|
2399
|
-
* Steps (inline):
|
|
2400
|
-
* 1) Guard against early termination.
|
|
2401
|
-
* 2) If the agent repeated the same action and stagnation exceeded the
|
|
2402
|
-
* configured start threshold, compute a scaled repetition penalty and
|
|
2403
|
-
* fold it into `state.invalidMovePenalty`.
|
|
2404
|
-
* 3) If the agent moved directly back (opposite direction) and the run
|
|
2405
|
-
* is stagnant, apply a fixed backtrack penalty.
|
|
2406
|
-
*
|
|
2407
|
-
* Notes:
|
|
2408
|
-
* - Uses the pooled `#COORD_SCRATCH` buffer for tiny temporary values to
|
|
2409
|
-
* keep the hot path allocation-free and to avoid creating transient
|
|
2410
|
-
* Number objects.
|
|
2411
|
-
*
|
|
2412
|
-
* @param state - Mutable simulation state for the current run.
|
|
2413
|
-
* @param rewardScale - Global scalar applied to penalty magnitudes.
|
|
2414
|
-
* @example
|
|
2415
|
-
* // Called during post-action penalty finalization
|
|
2416
|
-
* MazeMovement.#applyRepetitionAndBacktrackPenalties(state, MazeMovement.#REWARD_SCALE);
|
|
2417
|
-
*/
|
|
2418
|
-
static #applyRepetitionAndBacktrackPenalties(
|
|
2419
|
-
state: SimulationState,
|
|
2420
|
-
rewardScale: number,
|
|
2421
|
-
) {
|
|
2422
|
-
// Step 1: fast-path guard
|
|
2423
|
-
if (state.earlyTerminate) return;
|
|
2424
|
-
|
|
2425
|
-
// Local descriptive aliases (minimise repeated property loads)
|
|
2426
|
-
const previousAction = state.prevAction;
|
|
2427
|
-
const currentAction = state.direction;
|
|
2428
|
-
const stagnationSteps = state.stepsSinceImprovement | 0;
|
|
2429
|
-
|
|
2430
|
-
// Pooled tiny scratch to hold temporary penalty values (allocation-free)
|
|
2431
|
-
const scratch = MazeMovement.#COORD_SCRATCH;
|
|
2432
|
-
|
|
2433
|
-
// Step 2: repetition penalty — when repeating the same action for too long
|
|
2434
|
-
const repetitionStartThreshold = MazeMovement.#REPETITION_PENALTY_START;
|
|
2435
|
-
if (
|
|
2436
|
-
previousAction === currentAction &&
|
|
2437
|
-
stagnationSteps > repetitionStartThreshold
|
|
2438
|
-
) {
|
|
2439
|
-
const repetitionMultiplier = stagnationSteps - repetitionStartThreshold;
|
|
2440
|
-
const repetitionBase = MazeMovement.#REPETITION_PENALTY_BASE;
|
|
2441
|
-
const computedRepetitionPenalty =
|
|
2442
|
-
repetitionBase * repetitionMultiplier * rewardScale;
|
|
2443
|
-
|
|
2444
|
-
// store negative penalty in scratch[0] then fold into the accumulator
|
|
2445
|
-
scratch[0] = -computedRepetitionPenalty;
|
|
2446
|
-
state.invalidMovePenalty += scratch[0];
|
|
2447
|
-
}
|
|
2448
|
-
|
|
2449
|
-
// Step 3: backtrack penalty — penalise immediate opposite-direction moves
|
|
2450
|
-
if (
|
|
2451
|
-
previousAction >= 0 &&
|
|
2452
|
-
currentAction >= 0 &&
|
|
2453
|
-
stagnationSteps > 0 &&
|
|
2454
|
-
currentAction === MazeMovement.#OPPOSITE_DIR[previousAction]
|
|
2455
|
-
) {
|
|
2456
|
-
const backtrackPenalty = MazeMovement.#BACK_MOVE_PENALTY * rewardScale;
|
|
2457
|
-
scratch[1] = -backtrackPenalty;
|
|
2458
|
-
state.invalidMovePenalty += scratch[1];
|
|
2459
|
-
}
|
|
2460
|
-
}
|
|
2461
|
-
|
|
2462
|
-
/**
|
|
2463
|
-
* Entropy-guided shaping: apply small penalties or exploration bonuses
|
|
2464
|
-
* based on the network's action entropy and whether perceptual guidance
|
|
2465
|
-
* (line-of-sight or gradient cues) is present.
|
|
2466
|
-
*
|
|
2467
|
-
* Behaviour / steps:
|
|
2468
|
-
* 1) Early-exit when there are no recorded action statistics.
|
|
2469
|
-
* 2) Compute whether the current perception provides guidance.
|
|
2470
|
-
* 3) If entropy is very high, apply a small penalty to discourage
|
|
2471
|
-
* aimless, highly-uncertain behaviour.
|
|
2472
|
-
* 4) If perception provides guidance and the network is confident
|
|
2473
|
-
* (low entropy and clear max-vs-second gap), award a small
|
|
2474
|
-
* exploration bonus to encourage exploiting the useful cue.
|
|
2475
|
-
*
|
|
2476
|
-
* Implementation notes:
|
|
2477
|
-
* - Uses descriptive local names and the pooled `#COORD_SCRATCH` typed
|
|
2478
|
-
* array for tiny temporaries to avoid transient allocation on hot paths.
|
|
2479
|
-
* - Preserves existing numeric thresholds and multipliers.
|
|
2480
|
-
*
|
|
2481
|
-
* @param state - Mutable simulation state for the current run.
|
|
2482
|
-
* @param rewardScale - Global scalar applied to penalty/bonus magnitudes.
|
|
2483
|
-
* @example
|
|
2484
|
-
* // Called as part of per-step penalty finalization
|
|
2485
|
-
* MazeMovement.#applyEntropyGuidanceShaping(state, MazeMovement.#REWARD_SCALE);
|
|
2486
|
-
*/
|
|
2487
|
-
static #applyEntropyGuidanceShaping(
|
|
2488
|
-
state: SimulationState,
|
|
2489
|
-
rewardScale: number,
|
|
2490
|
-
) {
|
|
2491
|
-
// Step 1: require action stats
|
|
2492
|
-
if (state.earlyTerminate || !state.actionStats) return;
|
|
2493
|
-
|
|
2494
|
-
// Local copies for clarity and fewer property loads
|
|
2495
|
-
const { entropy, maxProb, secondProb } = state.actionStats;
|
|
2496
|
-
const entropyHighThreshold = MazeMovement.#ENTROPY_HIGH_THRESHOLD;
|
|
2497
|
-
const entropyConfidentThreshold = MazeMovement.#ENTROPY_CONFIDENT_THRESHOLD;
|
|
2498
|
-
const confidentDiffThreshold = MazeMovement.#ENTROPY_CONFIDENT_DIFF;
|
|
2499
|
-
|
|
2500
|
-
// Step 2: detect whether perceptual guidance exists (LOS or gradient cues)
|
|
2501
|
-
const hasLineOfSightGuidance =
|
|
2502
|
-
MazeMovement.#sumVisionGroup(
|
|
2503
|
-
state.vision,
|
|
2504
|
-
MazeMovement.#VISION_LOS_START,
|
|
2505
|
-
) > 0;
|
|
2506
|
-
const hasGradientGuidance =
|
|
2507
|
-
MazeMovement.#sumVisionGroup(
|
|
2508
|
-
state.vision,
|
|
2509
|
-
MazeMovement.#VISION_GRAD_START,
|
|
2510
|
-
) > 0;
|
|
2511
|
-
const hasGuidance = hasLineOfSightGuidance || hasGradientGuidance;
|
|
2512
|
-
|
|
2513
|
-
// Pooled scratch for tiny temporary values (avoid boxed Number allocations)
|
|
2514
|
-
const scratch = MazeMovement.#COORD_SCRATCH;
|
|
2515
|
-
|
|
2516
|
-
// Step 3: high-entropy penalty (discourage dithering/ambivalence)
|
|
2517
|
-
if (entropy > entropyHighThreshold) {
|
|
2518
|
-
scratch[0] = -MazeMovement.#ENTROPY_PENALTY * rewardScale;
|
|
2519
|
-
state.invalidMovePenalty += scratch[0];
|
|
2520
|
-
return; // high-entropy is dominant; bail early
|
|
2521
|
-
}
|
|
2522
|
-
|
|
2523
|
-
// Step 4: confident + guided => small exploration bonus
|
|
2524
|
-
const maxMinusSecond = (maxProb ?? 0) - (secondProb ?? 0);
|
|
2525
|
-
if (
|
|
2526
|
-
hasGuidance &&
|
|
2527
|
-
entropy < entropyConfidentThreshold &&
|
|
2528
|
-
maxMinusSecond > confidentDiffThreshold
|
|
2529
|
-
) {
|
|
2530
|
-
scratch[0] = MazeMovement.#EXPLORATION_BONUS_SMALL * rewardScale;
|
|
2531
|
-
state.newCellExplorationBonus += scratch[0];
|
|
2532
|
-
}
|
|
2533
|
-
}
|
|
2534
|
-
|
|
2535
|
-
/**
|
|
2536
|
-
* Periodic saturation penalty cycle.
|
|
2537
|
-
*
|
|
2538
|
-
* Purpose:
|
|
2539
|
-
* - When the global saturation counter (`#StateSaturations`) exceeds a
|
|
2540
|
-
* trigger, apply a base saturation penalty to discourage chronic
|
|
2541
|
-
* overconfidence. On configured periods apply an additional escalate
|
|
2542
|
-
* penalty to increase pressure over time.
|
|
2543
|
-
*
|
|
2544
|
-
* Behaviour / steps:
|
|
2545
|
-
* 1) Early-exit when saturations have not reached the configured trigger.
|
|
2546
|
-
* 2) Apply the base saturation penalty scaled by `rewardScale`.
|
|
2547
|
-
* 3) If the saturations counter aligns with the configured period, apply
|
|
2548
|
-
* an extra escalate penalty (also scaled by `rewardScale`).
|
|
2549
|
-
*
|
|
2550
|
-
* Implementation notes:
|
|
2551
|
-
* - Uses the pooled `#COORD_SCRATCH` typed array as a tiny allocation-free
|
|
2552
|
-
* temporary for computed penalty values to keep the hot path GC-friendly.
|
|
2553
|
-
* - Local descriptive names improve readability without changing logic.
|
|
2554
|
-
*
|
|
2555
|
-
* @param state - Mutable simulation state (penalties are accumulated here)
|
|
2556
|
-
* @param rewardScale - Global scalar used to scale penalty magnitudes
|
|
2557
|
-
* @example
|
|
2558
|
-
* MazeMovement.#applySaturationPenaltyCycle(state, MazeMovement.#REWARD_SCALE);
|
|
2559
|
-
*/
|
|
2560
|
-
static #applySaturationPenaltyCycle(
|
|
2561
|
-
state: SimulationState,
|
|
2562
|
-
rewardScale: number,
|
|
2563
|
-
) {
|
|
2564
|
-
// Step 1: quick-exit when under the configured trigger
|
|
2565
|
-
const saturations = MazeMovement.#StateSaturations;
|
|
2566
|
-
const triggerThreshold = MazeMovement.#SATURATION_PENALTY_TRIGGER;
|
|
2567
|
-
if (saturations < triggerThreshold) return;
|
|
2568
|
-
|
|
2569
|
-
// Pooled tiny scratch to hold negative penalty values (avoid boxed numbers)
|
|
2570
|
-
const scratch = MazeMovement.#COORD_SCRATCH;
|
|
2571
|
-
|
|
2572
|
-
// Step 2: apply base saturation penalty (negative value folded into accumulator)
|
|
2573
|
-
const basePenalty = MazeMovement.#SATURATION_PENALTY_BASE * rewardScale;
|
|
2574
|
-
scratch[0] = -basePenalty;
|
|
2575
|
-
state.invalidMovePenalty += scratch[0];
|
|
2576
|
-
|
|
2577
|
-
// Step 3: periodic escalation on configured period boundaries
|
|
2578
|
-
const period = MazeMovement.#SATURATION_PENALTY_PERIOD;
|
|
2579
|
-
if (period > 0 && saturations % period === 0) {
|
|
2580
|
-
const escalatePenalty =
|
|
2581
|
-
MazeMovement.#SATURATION_PENALTY_ESCALATE * rewardScale;
|
|
2582
|
-
scratch[1] = -escalatePenalty;
|
|
2583
|
-
state.invalidMovePenalty += scratch[1];
|
|
2584
|
-
}
|
|
2585
|
-
}
|
|
2586
|
-
|
|
2587
|
-
/**
|
|
2588
|
-
* Detect saturation/overconfidence, apply shaping penalties, and
|
|
2589
|
-
* optionally perform adaptive output-node bias dampening.
|
|
2590
|
-
*
|
|
2591
|
-
* Behaviour / steps:
|
|
2592
|
-
* 1) Read action confidence statistics and decide whether the network is
|
|
2593
|
-
* overconfident (sharp winner) or has flat logit collapse (low variance).
|
|
2594
|
-
* 2) Update a rolling `#StateSaturations` counter and the run-local
|
|
2595
|
-
* `state.saturatedSteps` when either condition holds.
|
|
2596
|
-
* 3) Apply fixed penalties for overconfidence and flat collapse scaled by
|
|
2597
|
-
* `rewardScale`.
|
|
2598
|
-
* 4) When chronic saturation persists, periodically adjust output-node
|
|
2599
|
-
* biases to dampen runaway confidence (best-effort; errors are swallowed).
|
|
2600
|
-
*
|
|
2601
|
-
* Implementation notes:
|
|
2602
|
-
* - Uses descriptive local variables for readability and fewer property loads.
|
|
2603
|
-
* - Reuses the pooled `#COORD_SCRATCH` typed-array for tiny temporaries to
|
|
2604
|
-
* avoid boxed Number allocations on hot paths.
|
|
2605
|
-
* - Preserves existing numeric thresholds and update semantics.
|
|
2606
|
-
*
|
|
2607
|
-
* @param state - Mutable simulation state for the current run.
|
|
2608
|
-
* @param outputs - Raw network logits for the current activation.
|
|
2609
|
-
* @param network - The neural network instance (used for optional bias adjust).
|
|
2610
|
-
* @example
|
|
2611
|
-
* MazeMovement.#applySaturationAndBiasAdjust(state, outputs, network);
|
|
2612
|
-
*/
|
|
2613
|
-
static #applySaturationAndBiasAdjust(
|
|
2614
|
-
state: SimulationState,
|
|
2615
|
-
outputs: number[],
|
|
2616
|
-
network: INetwork,
|
|
2617
|
-
) {
|
|
2618
|
-
// Step 0: locals & pooled scratch
|
|
2619
|
-
const rewardScale = MazeMovement.#REWARD_SCALE;
|
|
2620
|
-
const scratch = MazeMovement.#COORD_SCRATCH;
|
|
2621
|
-
|
|
2622
|
-
// Defensive: require actionStats to compute confidence; callers normally set this.
|
|
2623
|
-
const actionStats = state.actionStats;
|
|
2624
|
-
if (!actionStats) return;
|
|
2625
|
-
|
|
2626
|
-
// Step 1: overconfidence detection (max probability vs second-best)
|
|
2627
|
-
const maxProbability = actionStats.maxProb ?? 0;
|
|
2628
|
-
const secondProbability = actionStats.secondProb ?? 0;
|
|
2629
|
-
const isOverConfident =
|
|
2630
|
-
maxProbability > MazeMovement.#OVERCONFIDENT_PROB &&
|
|
2631
|
-
secondProbability < MazeMovement.#SECOND_PROB_LOW;
|
|
2632
|
-
|
|
2633
|
-
// Step 1b: detect flat collapse using logits variance (population std-dev)
|
|
2634
|
-
const actionCount = MazeMovement.#ACTION_DIM;
|
|
2635
|
-
// compute mean logit
|
|
2636
|
-
let sumLogits = 0;
|
|
2637
|
-
for (let i = 0; i < outputs.length; i++) sumLogits += outputs[i];
|
|
2638
|
-
const meanLogit = sumLogits / actionCount;
|
|
2639
|
-
|
|
2640
|
-
// compute variance (avoid intermediate arrays)
|
|
2641
|
-
let varianceAccumulator = 0;
|
|
2642
|
-
for (let i = 0; i < outputs.length; i++) {
|
|
2643
|
-
const delta = outputs[i] - meanLogit;
|
|
2644
|
-
varianceAccumulator += delta * delta;
|
|
2645
|
-
}
|
|
2646
|
-
const variance = varianceAccumulator / actionCount;
|
|
2647
|
-
const stdDev = Math.sqrt(variance);
|
|
2648
|
-
const isFlatCollapsed = stdDev < MazeMovement.#LOGSTD_FLAT_THRESHOLD;
|
|
2649
|
-
|
|
2650
|
-
// Step 2: update rolling saturation counter and saturated steps
|
|
2651
|
-
let saturationCounter = MazeMovement.#StateSaturations;
|
|
2652
|
-
if (isOverConfident || isFlatCollapsed) {
|
|
2653
|
-
saturationCounter++;
|
|
2654
|
-
state.saturatedSteps++;
|
|
2655
|
-
} else if (saturationCounter > 0) {
|
|
2656
|
-
saturationCounter--;
|
|
2657
|
-
}
|
|
2658
|
-
MazeMovement.#StateSaturations = saturationCounter;
|
|
2659
|
-
|
|
2660
|
-
// Step 3: fold in penalties using pooled scratch to avoid boxed temporaries
|
|
2661
|
-
if (isOverConfident) {
|
|
2662
|
-
scratch[0] = -MazeMovement.#OVERCONFIDENT_PENALTY * rewardScale;
|
|
2663
|
-
state.invalidMovePenalty += scratch[0];
|
|
2664
|
-
}
|
|
2665
|
-
if (isFlatCollapsed) {
|
|
2666
|
-
scratch[0] = -MazeMovement.#FLAT_COLLAPSE_PENALTY * rewardScale;
|
|
2667
|
-
state.invalidMovePenalty += scratch[0];
|
|
2668
|
-
}
|
|
2669
|
-
|
|
2670
|
-
// Step 4: adaptive bias dampening when chronic saturation persists
|
|
2671
|
-
const shouldAdjustBiases =
|
|
2672
|
-
MazeMovement.#StateSaturations > MazeMovement.#SATURATION_ADJUST_MIN &&
|
|
2673
|
-
state.steps % MazeMovement.#SATURATION_ADJUST_INTERVAL === 0;
|
|
2674
|
-
|
|
2675
|
-
if (shouldAdjustBiases) {
|
|
2676
|
-
try {
|
|
2677
|
-
const outputNodes = network.nodes?.filter(
|
|
2678
|
-
(node: INodeStruct): node is INodeStruct & { bias: number } =>
|
|
2679
|
-
node.type === MazeMovement.#NODE_TYPE_OUTPUT &&
|
|
2680
|
-
typeof node.bias === 'number',
|
|
2681
|
-
);
|
|
2682
|
-
if (outputNodes && outputNodes.length > 0) {
|
|
2683
|
-
// compute mean bias (simple loop to avoid higher-order helpers)
|
|
2684
|
-
let biasSum = 0;
|
|
2685
|
-
for (let i = 0; i < outputNodes.length; i++)
|
|
2686
|
-
biasSum += outputNodes[i].bias;
|
|
2687
|
-
const meanBias = biasSum / outputNodes.length;
|
|
2688
|
-
|
|
2689
|
-
// adjust each node bias towards zero by removing a scaled meanBias
|
|
2690
|
-
const adjustFactor = MazeMovement.#BIAS_ADJUST_FACTOR;
|
|
2691
|
-
const clamp = MazeMovement.#BIAS_CLAMP;
|
|
2692
|
-
for (let i = 0; i < outputNodes.length; i++) {
|
|
2693
|
-
const node = outputNodes[i];
|
|
2694
|
-
const adjusted = node.bias - meanBias * adjustFactor;
|
|
2695
|
-
// clamp adjusted bias into allowed range
|
|
2696
|
-
node.bias = Math.max(-clamp, Math.min(clamp, adjusted));
|
|
2697
|
-
}
|
|
2698
|
-
}
|
|
2699
|
-
} catch {
|
|
2700
|
-
// Best-effort: swallow errors (network shapes vary in tests)
|
|
2701
|
-
}
|
|
2702
|
-
}
|
|
2703
|
-
}
|
|
2704
|
-
|
|
2705
|
-
/**
|
|
2706
|
-
* Check deep stagnation and optionally mark the run for termination.
|
|
2707
|
-
*
|
|
2708
|
-
* Purpose:
|
|
2709
|
-
* - If the run has been without improvement for longer than
|
|
2710
|
-
* `#DEEP_STAGNATION_THRESHOLD` we may apply a deep-stagnation penalty
|
|
2711
|
-
* and terminate the run in non-browser environments (node / CI). The
|
|
2712
|
-
* method avoids allocations by reusing the pooled `#COORD_SCRATCH`.
|
|
2713
|
-
*
|
|
2714
|
-
* Steps:
|
|
2715
|
-
* 1) Fast-path: compare `state.stepsSinceImprovement` against the
|
|
2716
|
-
* configured threshold.
|
|
2717
|
-
* 2) Detect whether we are running outside a browser (only then apply
|
|
2718
|
-
* the penalty and return `true`).
|
|
2719
|
-
* 3) Use `#COORD_SCRATCH[0]` to hold the negative penalty (allocation-free)
|
|
2720
|
-
* and fold it into `state.invalidMovePenalty`.
|
|
2721
|
-
* 4) Return `true` when we applied the penalty (indicating termination),
|
|
2722
|
-
* otherwise preserve and return `state.earlyTerminate`.
|
|
2723
|
-
*
|
|
2724
|
-
* @param state - mutable simulation state (mutated in-place when penalty applies)
|
|
2725
|
-
* @returns boolean - `true` when the run should be terminated (penalty applied),
|
|
2726
|
-
* otherwise the existing `state.earlyTerminate` value.
|
|
2727
|
-
* @example
|
|
2728
|
-
* // inside the simulation loop
|
|
2729
|
-
* if (MazeMovement.#maybeTerminateDeepStagnation(state)) break;
|
|
2730
|
-
*/
|
|
2731
|
-
static #maybeTerminateDeepStagnation(state: SimulationState): boolean {
|
|
2732
|
-
// Step 1: quick guard using 32-bit coercion for stable comparisons
|
|
2733
|
-
const stagnationSteps = state.stepsSinceImprovement | 0;
|
|
2734
|
-
if (stagnationSteps <= MazeMovement.#DEEP_STAGNATION_THRESHOLD)
|
|
2735
|
-
return state.earlyTerminate;
|
|
2736
|
-
|
|
2737
|
-
// Step 2: prepare locals and pooled scratch for allocation-free penalty write
|
|
2738
|
-
const rewardScale = MazeMovement.#REWARD_SCALE;
|
|
2739
|
-
const scratch = MazeMovement.#COORD_SCRATCH;
|
|
2740
|
-
|
|
2741
|
-
// Step 3: apply penalty and request termination only when not running in a
|
|
2742
|
-
// browser environment (preserve original behaviour that avoids applying
|
|
2743
|
-
// the penalty when `window` exists). Keep a try/catch as a defensive
|
|
2744
|
-
// fallback in case environment detection throws in unusual hosts.
|
|
2745
|
-
try {
|
|
2746
|
-
const runningOutsideBrowser = typeof window === 'undefined';
|
|
2747
|
-
if (runningOutsideBrowser) {
|
|
2748
|
-
scratch[0] = -MazeMovement.#DEEP_STAGNATION_PENALTY * rewardScale;
|
|
2749
|
-
state.invalidMovePenalty += scratch[0];
|
|
2750
|
-
return true;
|
|
2751
|
-
}
|
|
2752
|
-
} catch {
|
|
2753
|
-
// Best-effort fallback: if detection failed, still apply the penalty.
|
|
2754
|
-
scratch[0] = -MazeMovement.#DEEP_STAGNATION_PENALTY * rewardScale;
|
|
2755
|
-
state.invalidMovePenalty += scratch[0];
|
|
2756
|
-
return true;
|
|
2757
|
-
}
|
|
2758
|
-
|
|
2759
|
-
// Step 4: no change to termination state in browser-like hosts
|
|
2760
|
-
return state.earlyTerminate;
|
|
2761
|
-
}
|
|
2762
|
-
|
|
2763
|
-
/**
|
|
2764
|
-
* Compute the normalized action entropy from recorded direction counts.
|
|
2765
|
-
*
|
|
2766
|
-
* Behaviour / rationale:
|
|
2767
|
-
* - Converts direction visit counts into a probability distribution and
|
|
2768
|
-
* computes the Shannon entropy. The result is normalised by
|
|
2769
|
-
* `#LOG_ACTIONS` so the returned value lies in a stable range used by
|
|
2770
|
-
* the rest of the scoring heuristics.
|
|
2771
|
-
* - The implementation is allocation-free and uses the pooled
|
|
2772
|
-
* `#COORD_SCRATCH` typed-array as a tiny scratch accumulator to avoid
|
|
2773
|
-
* creating transient Number objects on hot paths.
|
|
2774
|
-
*
|
|
2775
|
-
* Steps:
|
|
2776
|
-
* 1) Sum the provided `directionCounts` and fall back to 1 to avoid
|
|
2777
|
-
* division-by-zero.
|
|
2778
|
-
* 2) Iterate counts, skip zeros, compute per-action probability and
|
|
2779
|
-
* accumulate -p * log(p) into a pooled accumulator.
|
|
2780
|
-
* 3) Normalise the accumulated entropy by `#LOG_ACTIONS` and return it.
|
|
2781
|
-
*
|
|
2782
|
-
* @param directionCounts - array of non-negative integers counting how often each action was chosen
|
|
2783
|
-
* @returns normalised entropy number used in fitness shaping
|
|
2784
|
-
* @example
|
|
2785
|
-
* const entropy = MazeMovement.#computeActionEntropyFromCounts(state.directionCounts);
|
|
2786
|
-
*/
|
|
2787
|
-
static #computeActionEntropyFromCounts(directionCounts: number[]): number {
|
|
2788
|
-
// Step 1: sum counts (coerce to number) and avoid dividing by zero
|
|
2789
|
-
const totalCount =
|
|
2790
|
-
directionCounts.reduce((sum, value) => sum + (value | 0), 0) || 1;
|
|
2791
|
-
|
|
2792
|
-
// Use pooled scratch to hold the running entropy accumulator (allocation-free)
|
|
2793
|
-
const scratch = MazeMovement.#COORD_SCRATCH;
|
|
2794
|
-
scratch[0] = 0;
|
|
2795
|
-
|
|
2796
|
-
// Local alias for performance-sensitive globals
|
|
2797
|
-
const logFn = Math.log;
|
|
2798
|
-
|
|
2799
|
-
// Step 2: accumulate entropy = -sum(p * log(p)) skipping zero-counts
|
|
2800
|
-
for (let i = 0, len = directionCounts.length; i < len; i++) {
|
|
2801
|
-
const count = directionCounts[i] | 0;
|
|
2802
|
-
if (count === 0) continue;
|
|
2803
|
-
const probability = count / totalCount;
|
|
2804
|
-
scratch[0] -= probability * logFn(probability);
|
|
2805
|
-
}
|
|
2806
|
-
|
|
2807
|
-
// Step 3: normalise by the project's LOG_ACTIONS constant and return
|
|
2808
|
-
return scratch[0] / MazeMovement.#LOG_ACTIONS;
|
|
2809
|
-
}
|
|
2810
|
-
|
|
2811
|
-
/**
|
|
2812
|
-
* Build and return the finalized result object for a successful run.
|
|
2813
|
-
*
|
|
2814
|
-
* Behaviour / rationale:
|
|
2815
|
-
* - Aggregates progress, exploration and penalty terms into a single
|
|
2816
|
-
* fitness score. The final fitness is clamped by `#MIN_SUCCESS_FITNESS`.
|
|
2817
|
-
* - Returns a compact result object including steps, materialized path,
|
|
2818
|
-
* a progress metric and a normalised action-entropy value used by scoring.
|
|
2819
|
-
* - Uses the pooled `#COORD_SCRATCH` for a tiny, allocation-free saturation
|
|
2820
|
-
* fraction calculation to reduce transient allocations on hot code paths.
|
|
2821
|
-
*
|
|
2822
|
-
* Steps:
|
|
2823
|
-
* 1) Compute step efficiency (how many steps under the maximum were used).
|
|
2824
|
-
* 2) Compute action entropy from recorded direction counts.
|
|
2825
|
-
* 3) Aggregate fitness components (base, efficiency, rewards, penalties).
|
|
2826
|
-
* 4) Materialize the executed path and compute saturation fraction.
|
|
2827
|
-
* 5) Clamp the fitness to the configured minimum and return the result.
|
|
2828
|
-
*
|
|
2829
|
-
* @param state - simulation state containing run accumulators and diagnostics
|
|
2830
|
-
* @param maxSteps - configured maximum steps for the run (used to compute efficiency)
|
|
2831
|
-
* @returns result object describing success, steps, path, fitness, progress and diagnostics
|
|
2832
|
-
* @example
|
|
2833
|
-
* const result = MazeMovement.#finalizeSuccess(state, maxSteps);
|
|
2834
|
-
*/
|
|
2835
|
-
static #finalizeSuccess(state: SimulationState, maxSteps: number) {
|
|
2836
|
-
// Step 1: compute steps and efficiency (coerce to 32-bit ints for stability)
|
|
2837
|
-
const stepsTaken = state.steps | 0;
|
|
2838
|
-
const stepEfficiency = (maxSteps | 0) - stepsTaken;
|
|
2839
|
-
|
|
2840
|
-
// Step 2: entropy of the action distribution (normalised by #LOG_ACTIONS)
|
|
2841
|
-
const actionEntropy = MazeMovement.#computeActionEntropyFromCounts(
|
|
2842
|
-
state.directionCounts,
|
|
2843
|
-
);
|
|
2844
|
-
|
|
2845
|
-
// Step 3: aggregate fitness components using descriptive locals
|
|
2846
|
-
const baseFitness =
|
|
2847
|
-
MazeMovement.#SUCCESS_BASE_FITNESS +
|
|
2848
|
-
stepEfficiency * MazeMovement.#STEP_EFFICIENCY_SCALE +
|
|
2849
|
-
state.progressReward +
|
|
2850
|
-
state.newCellExplorationBonus +
|
|
2851
|
-
state.invalidMovePenalty;
|
|
2852
|
-
|
|
2853
|
-
const totalFitness =
|
|
2854
|
-
baseFitness + actionEntropy * MazeMovement.#SUCCESS_ACTION_ENTROPY_SCALE;
|
|
2855
|
-
|
|
2856
|
-
// Step 4: materialize the path and compute saturation fraction using pooled scratch
|
|
2857
|
-
const pathMaterialized = MazeMovement.#materializePath(state.pathLength);
|
|
2858
|
-
const scratch = MazeMovement.#COORD_SCRATCH;
|
|
2859
|
-
scratch[0] = stepsTaken ? state.saturatedSteps / stepsTaken : 0;
|
|
2860
|
-
const saturationFraction = scratch[0];
|
|
2861
|
-
|
|
2862
|
-
// Step 5: ensure final fitness meets the configured minimum for successes
|
|
2863
|
-
const finalFitness = Math.max(
|
|
2864
|
-
MazeMovement.#MIN_SUCCESS_FITNESS,
|
|
2865
|
-
totalFitness,
|
|
2866
|
-
);
|
|
2867
|
-
|
|
2868
|
-
return {
|
|
2869
|
-
success: true,
|
|
2870
|
-
steps: stepsTaken,
|
|
2871
|
-
path: pathMaterialized,
|
|
2872
|
-
fitness: finalFitness,
|
|
2873
|
-
progress: 100,
|
|
2874
|
-
saturationFraction,
|
|
2875
|
-
actionEntropy,
|
|
2876
|
-
};
|
|
2877
|
-
}
|
|
2878
|
-
|
|
2879
|
-
/**
|
|
2880
|
-
* Build and return the finalized result object for a failed run.
|
|
2881
|
-
*
|
|
2882
|
-
* Behaviour / rationale:
|
|
2883
|
-
* - Computes shaped progress, exploration contributions, entropy bonus and
|
|
2884
|
-
* aggregates penalties into a single fitness score. For failures the
|
|
2885
|
-
* fitness is transformed to avoid negative-heavy values using the same
|
|
2886
|
-
* heuristic as the original implementation.
|
|
2887
|
-
* - Uses the pooled `#COORD_SCRATCH` for a tiny, allocation-free
|
|
2888
|
-
* saturation fraction calculation.
|
|
2889
|
-
*
|
|
2890
|
-
* Steps:
|
|
2891
|
-
* 1) Materialize the executed path and determine the last visited cell.
|
|
2892
|
-
* 2) Compute progress (via distance map or geometry), then shape it.
|
|
2893
|
-
* 3) Aggregate exploration, reward and penalty contributions including
|
|
2894
|
-
* an entropy-derived bonus.
|
|
2895
|
-
* 4) Mix in small random noise and transform negative raw fitness using
|
|
2896
|
-
* the project's stabilizing mapping.
|
|
2897
|
-
* 5) Return the failure result object with diagnostics.
|
|
2898
|
-
*
|
|
2899
|
-
* @param state - simulation state containing run accumulators and diagnostics
|
|
2900
|
-
* @param encodedMaze - read-only maze grid (rows of numeric columns)
|
|
2901
|
-
* @param startPos - starting coordinate tuple [x, y]
|
|
2902
|
-
* @param exitPos - exit coordinate tuple [x, y]
|
|
2903
|
-
* @param distanceMap - optional precomputed distance map aligned to maze
|
|
2904
|
-
* @returns failure result object with fitness, path and diagnostics
|
|
2905
|
-
* @example
|
|
2906
|
-
* const result = MazeMovement.#finalizeFailure(state, maze, startPos, exitPos, distanceMap);
|
|
2907
|
-
*/
|
|
2908
|
-
static #finalizeFailure(
|
|
2909
|
-
state: SimulationState,
|
|
2910
|
-
encodedMaze: number[][],
|
|
2911
|
-
startPos: readonly [number, number],
|
|
2912
|
-
exitPos: readonly [number, number],
|
|
2913
|
-
distanceMap?: number[][],
|
|
2914
|
-
) {
|
|
2915
|
-
// Step 1: materialize path and compute last visited position
|
|
2916
|
-
const pathX = MazeMovement.#PathX!;
|
|
2917
|
-
const pathY = MazeMovement.#PathY!;
|
|
2918
|
-
const lastIndex = (state.pathLength | 0) - 1;
|
|
2919
|
-
const lastPos: [number, number] = [
|
|
2920
|
-
pathX[lastIndex] ?? 0,
|
|
2921
|
-
pathY[lastIndex] ?? 0,
|
|
2922
|
-
];
|
|
2923
|
-
|
|
2924
|
-
// Step 2: compute progress using an optional distance map or geometry
|
|
2925
|
-
const progress = distanceMap
|
|
2926
|
-
? MazeUtils.calculateProgressFromDistanceMap(
|
|
2927
|
-
distanceMap,
|
|
2928
|
-
lastPos,
|
|
2929
|
-
startPos,
|
|
2930
|
-
)
|
|
2931
|
-
: MazeUtils.calculateProgress(encodedMaze, lastPos, startPos, exitPos);
|
|
2932
|
-
const progressFraction = progress / 100;
|
|
2933
|
-
const shapedProgress =
|
|
2934
|
-
Math.pow(progressFraction, MazeMovement.#PROGRESS_POWER) *
|
|
2935
|
-
MazeMovement.#PROGRESS_SCALE;
|
|
2936
|
-
|
|
2937
|
-
// Step 3: aggregate exploration and entropy-derived components
|
|
2938
|
-
const explorationScore = state.visitedUniqueCount * 1.0;
|
|
2939
|
-
const actionEntropy = MazeMovement.#computeActionEntropyFromCounts(
|
|
2940
|
-
state.directionCounts,
|
|
2941
|
-
);
|
|
2942
|
-
const entropyBonus = actionEntropy * MazeMovement.#ENTROPY_BONUS_WEIGHT;
|
|
2943
|
-
|
|
2944
|
-
// Placeholders for future heuristics (preserve original behaviour)
|
|
2945
|
-
const saturationPenalty = 0;
|
|
2946
|
-
const outputVarPenalty = 0;
|
|
2947
|
-
|
|
2948
|
-
// Aggregate base fitness components
|
|
2949
|
-
const baseFitness =
|
|
2950
|
-
shapedProgress +
|
|
2951
|
-
explorationScore +
|
|
2952
|
-
state.progressReward +
|
|
2953
|
-
state.newCellExplorationBonus +
|
|
2954
|
-
state.invalidMovePenalty +
|
|
2955
|
-
entropyBonus +
|
|
2956
|
-
state.localAreaPenalty +
|
|
2957
|
-
saturationPenalty +
|
|
2958
|
-
outputVarPenalty;
|
|
2959
|
-
|
|
2960
|
-
// Step 4: add a small random factor and stabilise negative values
|
|
2961
|
-
const raw =
|
|
2962
|
-
baseFitness + MazeMovement.#rand() * MazeMovement.#FITNESS_RANDOMNESS;
|
|
2963
|
-
const fitness = raw >= 0 ? raw : -Math.log1p(1 - raw);
|
|
2964
|
-
|
|
2965
|
-
// Step 5: produce materialized path and saturation fraction (allocation-free)
|
|
2966
|
-
const pathMaterialized = MazeMovement.#materializePath(state.pathLength);
|
|
2967
|
-
const scratch = MazeMovement.#COORD_SCRATCH;
|
|
2968
|
-
const stepsTaken = state.steps | 0;
|
|
2969
|
-
scratch[0] = stepsTaken ? state.saturatedSteps / stepsTaken : 0;
|
|
2970
|
-
const saturationFraction = scratch[0];
|
|
2971
8
|
|
|
2972
|
-
|
|
2973
|
-
|
|
2974
|
-
|
|
2975
|
-
|
|
2976
|
-
|
|
2977
|
-
|
|
2978
|
-
|
|
2979
|
-
|
|
2980
|
-
};
|
|
2981
|
-
}
|
|
2982
|
-
}
|
|
9
|
+
export { MazeMovement } from './mazeMovement/mazeMovement';
|
|
10
|
+
export type {
|
|
11
|
+
DirectionSelectionStats,
|
|
12
|
+
MazeMovementBufferPools,
|
|
13
|
+
MazeMovementRunServiceState,
|
|
14
|
+
MazeMovementSimulationResult,
|
|
15
|
+
SimulationState,
|
|
16
|
+
} from './mazeMovement/mazeMovement.types';
|