@reicek/neataptic-ts 0.1.21 → 0.1.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/agents/boundary-mapper.agent.md +29 -0
- package/.github/agents/docs-scout.agent.md +29 -0
- package/.github/agents/plan-scout.agent.md +29 -0
- package/.github/agents/solid-split.agent.md +138 -0
- package/.github/copilot-instructions.md +103 -0
- package/package.json +6 -3
- package/plans/ES2023 migration +13 -8
- package/plans/Evolution_Training_Interoperability_Contracts.md +1 -1
- package/plans/Interactive_Examples_and_Learning_Path.md +10 -2
- package/plans/Memory_Optimization.md +3 -3
- package/plans/README.md +63 -0
- package/plans/Roadmap.md +15 -3
- package/plans/asciiMaze_SOLID_split.done.md +130 -0
- package/plans/flappy_bird_SOLID_split.done.md +67 -0
- package/scripts/assets/theme.css +221 -34
- package/scripts/copy-examples.mjs +9 -5
- package/scripts/export-onnx.mjs +3 -3
- package/scripts/generate-bench-tables.mjs +10 -10
- package/scripts/generate-bench-tables.ts +10 -10
- package/scripts/generate-docs.ts +1415 -449
- package/scripts/render-docs-html.ts +15 -8
- package/src/README.md +101 -223
- package/src/architecture/README.md +57 -185
- package/src/architecture/layer/README.md +38 -38
- package/src/architecture/network/README.md +33 -31
- package/src/architecture/network/activate/README.md +77 -77
- package/src/architecture/network/connect/README.md +15 -13
- package/src/architecture/network/deterministic/README.md +7 -7
- package/src/architecture/network/evolve/README.md +44 -44
- package/src/architecture/network/gating/README.md +20 -20
- package/src/architecture/network/genetic/README.md +51 -51
- package/src/architecture/network/mutate/README.md +97 -97
- package/src/architecture/network/onnx/README.md +264 -264
- package/src/architecture/network/prune/README.md +39 -39
- package/src/architecture/network/remove/README.md +26 -26
- package/src/architecture/network/serialize/README.md +56 -56
- package/src/architecture/network/slab/README.md +61 -61
- package/src/architecture/network/standalone/README.md +24 -24
- package/src/architecture/network/stats/README.md +9 -9
- package/src/architecture/network/topology/README.md +46 -46
- package/src/architecture/network/training/README.md +21 -21
- package/src/methods/README.md +9 -87
- package/src/multithreading/README.md +8 -77
- package/src/multithreading/workers/README.md +2 -2
- package/src/multithreading/workers/browser/README.md +0 -6
- package/src/multithreading/workers/node/README.md +0 -3
- package/src/neat/README.md +562 -568
- package/src/utils/README.md +18 -18
- package/test/examples/asciiMaze/README.md +59 -59
- package/test/examples/asciiMaze/asciiMaze.e2e.test.ts +14 -9
- package/test/examples/asciiMaze/browser-entry/README.md +196 -0
- package/test/examples/asciiMaze/browser-entry/browser-entry.abort.services.ts +95 -0
- package/test/examples/asciiMaze/browser-entry/browser-entry.constants.ts +23 -0
- package/test/examples/asciiMaze/browser-entry/browser-entry.curriculum.services.ts +115 -0
- package/test/examples/asciiMaze/browser-entry/browser-entry.globals.services.ts +106 -0
- package/test/examples/asciiMaze/browser-entry/browser-entry.host.services.ts +157 -0
- package/test/examples/asciiMaze/browser-entry/browser-entry.services.ts +14 -0
- package/test/examples/asciiMaze/browser-entry/browser-entry.ts +129 -0
- package/test/examples/asciiMaze/browser-entry/browser-entry.types.ts +120 -0
- package/test/examples/asciiMaze/browser-entry/browser-entry.utils.ts +98 -0
- package/test/examples/asciiMaze/browser-entry.ts +10 -576
- package/test/examples/asciiMaze/dashboardManager/README.md +276 -0
- package/test/examples/asciiMaze/dashboardManager/archive/README.md +16 -0
- package/test/examples/asciiMaze/dashboardManager/archive/dashboardManager.archive.services.ts +267 -0
- package/test/examples/asciiMaze/dashboardManager/dashboardManager.constants.ts +35 -0
- package/test/examples/asciiMaze/dashboardManager/dashboardManager.services.ts +103 -0
- package/test/examples/asciiMaze/dashboardManager/dashboardManager.ts +181 -0
- package/test/examples/asciiMaze/dashboardManager/dashboardManager.types.ts +267 -0
- package/test/examples/asciiMaze/dashboardManager/dashboardManager.utils.ts +254 -0
- package/test/examples/asciiMaze/dashboardManager/live/README.md +14 -0
- package/test/examples/asciiMaze/dashboardManager/live/dashboardManager.live.services.ts +264 -0
- package/test/examples/asciiMaze/dashboardManager/telemetry/README.md +47 -0
- package/test/examples/asciiMaze/dashboardManager/telemetry/dashboardManager.telemetry.services.ts +513 -0
- package/test/examples/asciiMaze/dashboardManager.ts +13 -2335
- package/test/examples/asciiMaze/evolutionEngine/README.md +1058 -0
- package/test/examples/asciiMaze/evolutionEngine/curriculumPhase.ts +90 -0
- package/test/examples/asciiMaze/evolutionEngine/engineState.constants.ts +36 -0
- package/test/examples/asciiMaze/evolutionEngine/engineState.ts +58 -513
- package/test/examples/asciiMaze/evolutionEngine/engineState.types.ts +212 -0
- package/test/examples/asciiMaze/evolutionEngine/engineState.utils.ts +301 -0
- package/test/examples/asciiMaze/evolutionEngine/evolutionEngine.types.ts +445 -0
- package/test/examples/asciiMaze/evolutionEngine/evolutionLoop.ts +81 -50
- package/test/examples/asciiMaze/evolutionEngine/optionsAndSetup.ts +2 -4
- package/test/examples/asciiMaze/evolutionEngine/populationDynamics.ts +17 -33
- package/test/examples/asciiMaze/evolutionEngine/populationPruning.ts +1 -1
- package/test/examples/asciiMaze/evolutionEngine/rngAndTiming.ts +1 -2
- package/test/examples/asciiMaze/evolutionEngine/sampling.ts +1 -1
- package/test/examples/asciiMaze/evolutionEngine/scratchPools.ts +2 -5
- package/test/examples/asciiMaze/evolutionEngine/setupHelpers.ts +30 -37
- package/test/examples/asciiMaze/evolutionEngine/telemetryMetrics.ts +16 -58
- package/test/examples/asciiMaze/evolutionEngine/trainingWarmStart.ts +2 -2
- package/test/examples/asciiMaze/evolutionEngine.ts +55 -55
- package/test/examples/asciiMaze/fitness.ts +2 -2
- package/test/examples/asciiMaze/fitness.types.ts +65 -0
- package/test/examples/asciiMaze/interfaces.ts +64 -1352
- package/test/examples/asciiMaze/mazeMovement/README.md +356 -0
- package/test/examples/asciiMaze/mazeMovement/finalization/README.md +49 -0
- package/test/examples/asciiMaze/mazeMovement/finalization/mazeMovement.finalization.ts +138 -0
- package/test/examples/asciiMaze/mazeMovement/mazeMovement.constants.ts +101 -0
- package/test/examples/asciiMaze/mazeMovement/mazeMovement.services.ts +230 -0
- package/test/examples/asciiMaze/mazeMovement/mazeMovement.ts +299 -0
- package/test/examples/asciiMaze/mazeMovement/mazeMovement.types.ts +185 -0
- package/test/examples/asciiMaze/mazeMovement/mazeMovement.utils.ts +153 -0
- package/test/examples/asciiMaze/mazeMovement/policy/README.md +91 -0
- package/test/examples/asciiMaze/mazeMovement/policy/mazeMovement.policy.ts +467 -0
- package/test/examples/asciiMaze/mazeMovement/runtime/README.md +95 -0
- package/test/examples/asciiMaze/mazeMovement/runtime/mazeMovement.runtime.ts +354 -0
- package/test/examples/asciiMaze/mazeMovement/shaping/README.md +124 -0
- package/test/examples/asciiMaze/mazeMovement/shaping/mazeMovement.shaping.ts +459 -0
- package/test/examples/asciiMaze/mazeMovement.ts +12 -2978
- package/test/examples/flappy_bird/Trace-20260309T191949.json +24124 -0
- package/test/examples/flappy_bird/browser-entry/README.md +1129 -0
- package/test/examples/flappy_bird/browser-entry/browser-entry.host.utils.ts +4 -324
- package/test/examples/flappy_bird/browser-entry/browser-entry.network-view.utils.ts +6 -399
- package/test/examples/flappy_bird/browser-entry/browser-entry.playback.utils.ts +1 -717
- package/test/examples/flappy_bird/browser-entry/browser-entry.spawn.utils.ts +11 -31
- package/test/examples/flappy_bird/browser-entry/browser-entry.visualization.utils.ts +15 -893
- package/test/examples/flappy_bird/browser-entry/host/README.md +307 -0
- package/test/examples/flappy_bird/browser-entry/host/host.resize.service.ts +1 -295
- package/test/examples/flappy_bird/browser-entry/host/host.ts +562 -6
- package/test/examples/flappy_bird/browser-entry/host/resize/README.md +274 -0
- package/test/examples/flappy_bird/browser-entry/host/resize/host.resize.service.constants.ts +31 -0
- package/test/examples/flappy_bird/browser-entry/host/resize/host.resize.service.services.ts +360 -0
- package/test/examples/flappy_bird/browser-entry/host/resize/host.resize.service.ts +117 -0
- package/test/examples/flappy_bird/browser-entry/host/resize/host.resize.service.types.ts +63 -0
- package/test/examples/flappy_bird/browser-entry/host/resize/host.resize.service.utils.ts +250 -0
- package/test/examples/flappy_bird/browser-entry/network-view/README.md +399 -0
- package/test/examples/flappy_bird/browser-entry/network-view/network-view.topology.utils.ts +255 -0
- package/test/examples/flappy_bird/browser-entry/network-view/network-view.ts +802 -7
- package/test/examples/flappy_bird/browser-entry/playback/README.md +684 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/README.md +277 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/README.md +770 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/playback.background.ground-grid.cache.services.ts +178 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/playback.background.ground-grid.constants.ts +107 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/playback.background.ground-grid.geometry.utils.ts +518 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/playback.background.ground-grid.math.utils.ts +117 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/playback.background.ground-grid.pulse.utils.ts +233 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/playback.background.ground-grid.services.ts +211 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/playback.background.ground-grid.ts +48 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/playback.background.ground-grid.types.ts +212 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/playback.background.ground-grid.utils.ts +81 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/playback.background.cache.services.ts +96 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/playback.background.constants.ts +62 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/playback.background.services.ts +244 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/playback.background.ts +53 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/playback.background.types.ts +68 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/playback.background.utils.ts +100 -0
- package/test/examples/flappy_bird/browser-entry/playback/frame-render/README.md +310 -0
- package/test/examples/flappy_bird/browser-entry/playback/frame-render/playback.frame-render.service.ts +92 -0
- package/test/examples/flappy_bird/browser-entry/playback/frame-render/playback.frame-render.services.ts +272 -0
- package/test/examples/flappy_bird/browser-entry/playback/frame-render/playback.frame-render.types.ts +39 -0
- package/test/examples/flappy_bird/browser-entry/playback/frame-render/playback.frame-render.utils.ts +493 -0
- package/test/examples/flappy_bird/browser-entry/playback/playback.constants.ts +1 -1
- package/test/examples/flappy_bird/browser-entry/playback/playback.frame-render.service.ts +4 -0
- package/test/examples/flappy_bird/browser-entry/playback/playback.snapshot.utils.ts +44 -0
- package/test/examples/flappy_bird/browser-entry/playback/playback.starfield.service.ts +39 -122
- package/test/examples/flappy_bird/browser-entry/playback/playback.starfield.services.ts +272 -0
- package/test/examples/flappy_bird/browser-entry/playback/playback.starfield.types.ts +62 -0
- package/test/examples/flappy_bird/browser-entry/playback/playback.starfield.utils.ts +11 -4
- package/test/examples/flappy_bird/browser-entry/playback/playback.ts +409 -8
- package/test/examples/flappy_bird/browser-entry/playback/playback.types.ts +4 -12
- package/test/examples/flappy_bird/browser-entry/runtime/README.md +235 -0
- package/test/examples/flappy_bird/browser-entry/runtime/runtime.evolution-launch.service.ts +45 -0
- package/test/examples/flappy_bird/browser-entry/runtime/runtime.lifecycle.service.ts +81 -0
- package/test/examples/flappy_bird/browser-entry/runtime/runtime.startup.service.ts +74 -0
- package/test/examples/flappy_bird/browser-entry/runtime/runtime.ts +31 -121
- package/test/examples/flappy_bird/browser-entry/runtime/runtime.types.ts +36 -0
- package/test/examples/flappy_bird/browser-entry/visualization/README.md +557 -0
- package/test/examples/flappy_bird/browser-entry/visualization/visualization.constants.ts +110 -0
- package/test/examples/flappy_bird/browser-entry/visualization/visualization.draw.service.ts +957 -19
- package/test/examples/flappy_bird/browser-entry/visualization/visualization.legend.utils.ts +138 -3
- package/test/examples/flappy_bird/browser-entry/visualization/visualization.topology.utils.ts +3 -27
- package/test/examples/flappy_bird/browser-entry/visualization/visualization.ts +1 -23
- package/test/examples/flappy_bird/browser-entry/worker-channel/README.md +156 -0
- package/test/examples/flappy_bird/constants/README.md +1179 -0
- package/test/examples/flappy_bird/constants/constants.network-view.ts +24 -0
- package/test/examples/flappy_bird/constants/constants.palette.ts +7 -0
- package/test/examples/flappy_bird/constants/constants.starfield.ts +78 -3
- package/test/examples/flappy_bird/environment/README.md +143 -0
- package/test/examples/flappy_bird/environment/environment.observation.utils.ts +1 -19
- package/test/examples/flappy_bird/environment/environment.step.service.ts +3 -66
- package/test/examples/flappy_bird/evaluation/README.md +130 -0
- package/test/examples/flappy_bird/evaluation/evaluation.fitness.utils.ts +1 -1
- package/test/examples/flappy_bird/evaluation/evaluation.rollout.service.ts +5 -375
- package/test/examples/flappy_bird/evaluation/rollout/README.md +291 -0
- package/test/examples/flappy_bird/evaluation/rollout/evaluation.rollout.constants.ts +30 -0
- package/test/examples/flappy_bird/evaluation/rollout/evaluation.rollout.service.ts +58 -0
- package/test/examples/flappy_bird/evaluation/rollout/evaluation.rollout.services.ts +310 -0
- package/test/examples/flappy_bird/evaluation/rollout/evaluation.rollout.types.ts +56 -0
- package/test/examples/flappy_bird/evaluation/rollout/evaluation.rollout.utils.ts +368 -0
- package/test/examples/flappy_bird/flappy-evolution-worker/README.md +618 -0
- package/test/examples/flappy_bird/flappy-evolution-worker/flappy-evolution-worker.playback.service.ts +7 -7
- package/test/examples/flappy_bird/flappy-evolution-worker/flappy-evolution-worker.simulation.frame.service.ts +364 -0
- package/test/examples/flappy_bird/flappy-evolution-worker/flappy-evolution-worker.simulation.types.ts +14 -0
- package/test/examples/flappy_bird/flappy-evolution-worker/flappy-evolution-worker.simulation.utils.ts +4 -201
- package/test/examples/flappy_bird/flappy-evolution-worker/flappy-evolution-worker.ts +184 -345
- package/test/examples/flappy_bird/flappy-evolution-worker/flappy-evolution-worker.warm-start.service.ts +291 -0
- package/test/examples/flappy_bird/flappy.simulation.shared.utils.ts +5 -0
- package/test/examples/flappy_bird/simulation-shared/README.md +417 -0
- package/test/examples/flappy_bird/simulation-shared/observation/README.md +183 -0
- package/test/examples/flappy_bird/simulation-shared/observation/observation.features.utils.ts +301 -0
- package/test/examples/flappy_bird/simulation-shared/observation/observation.ts +9 -0
- package/test/examples/flappy_bird/simulation-shared/observation/observation.vector.utils.ts +59 -0
- package/test/examples/flappy_bird/simulation-shared/simulation-shared.observation.utils.ts +5 -403
- package/test/examples/flappy_bird/simulation-shared/simulation-shared.spawn.utils.ts +20 -6
- package/test/examples/flappy_bird/{evaluation/evaluation.statistics.utils.ts → simulation-shared/simulation-shared.statistics.utils.ts} +23 -8
- package/test/examples/flappy_bird/trainer/README.md +563 -0
- package/test/examples/flappy_bird/trainer/evaluation/README.md +199 -0
- package/test/examples/flappy_bird/trainer/evaluation/trainer.evaluation.service.constants.ts +9 -0
- package/test/examples/flappy_bird/trainer/evaluation/trainer.evaluation.service.services.ts +73 -0
- package/test/examples/flappy_bird/trainer/evaluation/trainer.evaluation.service.ts +165 -0
- package/test/examples/flappy_bird/trainer/evaluation/trainer.evaluation.service.types.ts +25 -0
- package/test/examples/flappy_bird/trainer/evaluation/trainer.evaluation.service.utils.ts +161 -0
- package/test/examples/flappy_bird/trainer/trainer.evaluation.service.ts +13 -0
- package/test/examples/flappy_bird/trainer/trainer.report.service.services.ts +181 -0
- package/test/examples/flappy_bird/trainer/trainer.report.service.ts +126 -0
- package/test/examples/flappy_bird/trainer/trainer.selection.utils.ts +89 -0
- package/test/examples/flappy_bird/trainer/trainer.ts +11 -553
- package/test/examples/flappy_bird/browser-entry/browser-entry.utils.ts +0 -12
- package/test/examples/flappy_bird/environment/environment.ts +0 -7
- package/test/examples/flappy_bird/evaluation/evaluation.ts +0 -7
- package/test/examples/flappy_bird/simulation-shared/simulation-shared.ts +0 -15
- package/test/examples/flappy_bird/trainer/trainer.statistics.utils.ts +0 -78
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
import type { INetwork } from '../interfaces';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Return the smallest power-of-two integer greater than or equal to `n`.
|
|
5
|
+
*
|
|
6
|
+
* @param n - Target minimum integer capacity.
|
|
7
|
+
* @returns The smallest power of two greater than or equal to `n`.
|
|
8
|
+
*/
|
|
9
|
+
export function nextPowerOfTwo(n: number): number {
|
|
10
|
+
const requested = Math.max(1, Math.floor(n));
|
|
11
|
+
if (requested <= 1) return 1;
|
|
12
|
+
|
|
13
|
+
if (requested <= 0xffffffff) {
|
|
14
|
+
const value = (requested - 1) >>> 0;
|
|
15
|
+
const leadingZeros = Math.clz32(value);
|
|
16
|
+
const exponent = 32 - leadingZeros;
|
|
17
|
+
const clampedExponent = Math.min(31, Math.max(0, exponent));
|
|
18
|
+
const power = 1 << clampedExponent;
|
|
19
|
+
return power >= requested ? power : power << 1;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
let power = 1;
|
|
23
|
+
while (power < requested) power *= 2;
|
|
24
|
+
return power;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Determine whether the provided value is a finite-number array.
|
|
29
|
+
*
|
|
30
|
+
* @param candidate - Value to inspect.
|
|
31
|
+
* @returns True when the input is an array of finite numbers.
|
|
32
|
+
*/
|
|
33
|
+
export function isFiniteNumberArray(candidate: unknown): candidate is number[] {
|
|
34
|
+
return (
|
|
35
|
+
Array.isArray(candidate) &&
|
|
36
|
+
candidate.every(
|
|
37
|
+
(value: unknown) => typeof value === 'number' && Number.isFinite(value),
|
|
38
|
+
)
|
|
39
|
+
);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Read the optional `_lastStepOutputs` history stored on a network.
|
|
44
|
+
*
|
|
45
|
+
* @param network - Network instance that may expose a reflected outputs history.
|
|
46
|
+
* @returns Sanitized history buffer or `undefined` when absent or invalid.
|
|
47
|
+
*/
|
|
48
|
+
export function readOutputHistory(network: INetwork): number[][] | undefined {
|
|
49
|
+
const historyCandidate = Reflect.get(network as object, '_lastStepOutputs');
|
|
50
|
+
if (!Array.isArray(historyCandidate)) return undefined;
|
|
51
|
+
return historyCandidate.every(isFiniteNumberArray)
|
|
52
|
+
? (historyCandidate as number[][])
|
|
53
|
+
: undefined;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Persist a bounded outputs history on the network via reflection.
|
|
58
|
+
*
|
|
59
|
+
* @param network - Target network to mutate.
|
|
60
|
+
* @param history - Updated history buffer.
|
|
61
|
+
*/
|
|
62
|
+
export function writeOutputHistory(
|
|
63
|
+
network: INetwork,
|
|
64
|
+
history: number[][],
|
|
65
|
+
): void {
|
|
66
|
+
Reflect.set(network as object, '_lastStepOutputs', history);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Materialize the active prefix of pooled path buffers into a fresh array.
|
|
71
|
+
*
|
|
72
|
+
* @param length - Number of path entries to materialize.
|
|
73
|
+
* @param pathX - Pooled X-coordinate buffer.
|
|
74
|
+
* @param pathY - Pooled Y-coordinate buffer.
|
|
75
|
+
* @returns A newly allocated array of path tuples.
|
|
76
|
+
*/
|
|
77
|
+
export function materializePath(
|
|
78
|
+
length: number,
|
|
79
|
+
pathX: Int32Array,
|
|
80
|
+
pathY: Int32Array,
|
|
81
|
+
): [number, number][] {
|
|
82
|
+
const entries = Math.max(0, Math.floor(length));
|
|
83
|
+
if (entries === 0) return [];
|
|
84
|
+
|
|
85
|
+
const output = new Array<[number, number]>(entries);
|
|
86
|
+
for (let index = 0; index < entries; index++) {
|
|
87
|
+
output[index] = [pathX[index], pathY[index]];
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
return output;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Sum a contiguous group of entries from a vision vector into a reusable scratch buffer.
|
|
95
|
+
*
|
|
96
|
+
* @param vision - Flat perception vector.
|
|
97
|
+
* @param start - Start index of the group to sum.
|
|
98
|
+
* @param groupLength - Number of entries in the group.
|
|
99
|
+
* @param scratch - Reusable scratch buffer populated with copied values.
|
|
100
|
+
* @returns Numeric sum of the selected group.
|
|
101
|
+
*/
|
|
102
|
+
export function sumVisionGroup(
|
|
103
|
+
vision: number[],
|
|
104
|
+
start: number,
|
|
105
|
+
groupLength: number,
|
|
106
|
+
scratch: Float64Array,
|
|
107
|
+
): number {
|
|
108
|
+
const sanitizedStart = Math.max(0, start | 0);
|
|
109
|
+
const clampedEnd = Math.min(vision.length, sanitizedStart + groupLength);
|
|
110
|
+
if (sanitizedStart >= clampedEnd) return 0;
|
|
111
|
+
|
|
112
|
+
let sumAccumulator = 0;
|
|
113
|
+
let writeIndex = 0;
|
|
114
|
+
for (let readIndex = sanitizedStart; readIndex < clampedEnd; readIndex++) {
|
|
115
|
+
const value = vision[readIndex] ?? 0;
|
|
116
|
+
scratch[writeIndex++] = value;
|
|
117
|
+
sumAccumulator += value;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
return sumAccumulator;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Compute normalized action entropy from direction counts.
|
|
125
|
+
*
|
|
126
|
+
* @param directionCounts - Number of moves taken in each direction.
|
|
127
|
+
* @param logActions - Precomputed normalization factor for the action space.
|
|
128
|
+
* @param scratch - Single-value floating-point scratch buffer reused by the caller.
|
|
129
|
+
* @returns Normalized entropy in the range `[0, 1]`.
|
|
130
|
+
*/
|
|
131
|
+
export function computeActionEntropyFromCounts(
|
|
132
|
+
directionCounts: number[],
|
|
133
|
+
logActions: number,
|
|
134
|
+
scratch: Float64Array,
|
|
135
|
+
): number {
|
|
136
|
+
const totalCount =
|
|
137
|
+
directionCounts.reduce((sum, value) => sum + (value | 0), 0) || 1;
|
|
138
|
+
|
|
139
|
+
scratch[0] = 0;
|
|
140
|
+
const logFn = Math.log;
|
|
141
|
+
for (
|
|
142
|
+
let directionIndex = 0, directionCount = directionCounts.length;
|
|
143
|
+
directionIndex < directionCount;
|
|
144
|
+
directionIndex++
|
|
145
|
+
) {
|
|
146
|
+
const count = directionCounts[directionIndex] | 0;
|
|
147
|
+
if (count === 0) continue;
|
|
148
|
+
const probability = count / totalCount;
|
|
149
|
+
scratch[0] -= probability * logFn(probability);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
return scratch[0] / logActions;
|
|
153
|
+
}
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# mazeMovement/policy
|
|
2
|
+
|
|
3
|
+
## mazeMovement/policy/mazeMovement.policy.ts
|
|
4
|
+
|
|
5
|
+
### mazeMovement.policy
|
|
6
|
+
|
|
7
|
+
Action policy helpers for the dedicated mazeMovement module.
|
|
8
|
+
|
|
9
|
+
This file owns direction selection, epsilon handling, short-horizon policy
|
|
10
|
+
overrides, and saturation-driven bias control.
|
|
11
|
+
|
|
12
|
+
### applyMazeMovementEpsilonExploration
|
|
13
|
+
|
|
14
|
+
`(state: import("test/examples/asciiMaze/mazeMovement/mazeMovement.types").SimulationState, encodedMaze: number[][], coordinateScratch: Int32Array<ArrayBufferLike>) => void`
|
|
15
|
+
|
|
16
|
+
Apply epsilon-greedy exploration to the current action choice.
|
|
17
|
+
|
|
18
|
+
Parameters:
|
|
19
|
+
- `state` - - Mutable simulation state for the active run.
|
|
20
|
+
- `encodedMaze` - - Maze grid used for move validity checks.
|
|
21
|
+
- `coordinateScratch` - - Reused coordinate scratch buffer.
|
|
22
|
+
|
|
23
|
+
### applyMazeMovementForcedExploration
|
|
24
|
+
|
|
25
|
+
`(state: import("test/examples/asciiMaze/mazeMovement/mazeMovement.types").SimulationState, encodedMaze: number[][], coordinateScratch: Int32Array<ArrayBufferLike>) => void`
|
|
26
|
+
|
|
27
|
+
Force a random valid move when the policy has stalled with repeated no-move outputs.
|
|
28
|
+
|
|
29
|
+
Parameters:
|
|
30
|
+
- `state` - - Mutable simulation state for the active run.
|
|
31
|
+
- `encodedMaze` - - Maze grid used for move validity checks.
|
|
32
|
+
- `coordinateScratch` - - Reused coordinate scratch buffer.
|
|
33
|
+
|
|
34
|
+
### applyMazeMovementProximityGreedy
|
|
35
|
+
|
|
36
|
+
`(state: import("test/examples/asciiMaze/mazeMovement/mazeMovement.types").SimulationState, encodedMaze: number[][], distanceMap: number[][] | undefined, coordinateScratch: Int32Array<ArrayBufferLike>) => void`
|
|
37
|
+
|
|
38
|
+
Apply the short-horizon proximity-greedy override near the maze exit.
|
|
39
|
+
|
|
40
|
+
Parameters:
|
|
41
|
+
- `state` - - Mutable simulation state for the active run.
|
|
42
|
+
- `encodedMaze` - - Maze grid used for move validity checks.
|
|
43
|
+
- `distanceMap` - - Optional precomputed distance map.
|
|
44
|
+
- `coordinateScratch` - - Reused coordinate scratch buffer.
|
|
45
|
+
|
|
46
|
+
### applyMazeMovementSaturationAndBiasAdjust
|
|
47
|
+
|
|
48
|
+
`(state: import("test/examples/asciiMaze/mazeMovement/mazeMovement.types").SimulationState, outputs: number[], network: import("test/examples/asciiMaze/interfaces").INetwork, coordinateScratch: Int32Array<ArrayBufferLike>) => void`
|
|
49
|
+
|
|
50
|
+
Detect saturation and optionally damp output-node biases.
|
|
51
|
+
|
|
52
|
+
Parameters:
|
|
53
|
+
- `state` - - Mutable simulation state for the active run.
|
|
54
|
+
- `outputs` - - Raw network logits for the current step.
|
|
55
|
+
- `network` - - Policy network that produced the logits.
|
|
56
|
+
- `coordinateScratch` - - Reused scratch buffer for temporary penalties.
|
|
57
|
+
|
|
58
|
+
### computeMazeMovementEpsilon
|
|
59
|
+
|
|
60
|
+
`(stepNumber: number, stepsSinceImprovement: number, distHere: number, saturations: number) => number`
|
|
61
|
+
|
|
62
|
+
Compute the adaptive epsilon used for policy exploration.
|
|
63
|
+
|
|
64
|
+
Parameters:
|
|
65
|
+
- `stepNumber` - - Global step number inside the active simulation.
|
|
66
|
+
- `stepsSinceImprovement` - - Number of steps without improvement.
|
|
67
|
+
- `distHere` - - Current distance to goal for the active position.
|
|
68
|
+
- `saturations` - - Rolling saturation count from the shared run state.
|
|
69
|
+
|
|
70
|
+
Returns: Exploration epsilon in the range `[0, 1]`.
|
|
71
|
+
|
|
72
|
+
### decideMazeMovementDirection
|
|
73
|
+
|
|
74
|
+
`(state: import("test/examples/asciiMaze/mazeMovement/mazeMovement.types").SimulationState, network: import("test/examples/asciiMaze/interfaces").INetwork, coordinateScratch: Int32Array<ArrayBufferLike>) => void`
|
|
75
|
+
|
|
76
|
+
Activate the network, record output history, and choose the next direction.
|
|
77
|
+
|
|
78
|
+
Parameters:
|
|
79
|
+
- `state` - - Mutable simulation state for the active run.
|
|
80
|
+
- `network` - - Policy network used for the current step.
|
|
81
|
+
|
|
82
|
+
### selectMazeMovementDirection
|
|
83
|
+
|
|
84
|
+
`(outputs: number[]) => import("test/examples/asciiMaze/mazeMovement/mazeMovement.types").DirectionSelectionStats`
|
|
85
|
+
|
|
86
|
+
Convert raw network outputs into a chosen direction plus diagnostics.
|
|
87
|
+
|
|
88
|
+
Parameters:
|
|
89
|
+
- `outputs` - - Raw action logits for the four maze directions.
|
|
90
|
+
|
|
91
|
+
Returns: Chosen direction plus softmax and entropy diagnostics.
|
|
@@ -0,0 +1,467 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Action policy helpers for the dedicated mazeMovement module.
|
|
3
|
+
*
|
|
4
|
+
* This file owns direction selection, epsilon handling, short-horizon policy
|
|
5
|
+
* overrides, and saturation-driven bias control.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { INetwork, INodeStruct } from '../../interfaces';
|
|
9
|
+
import { MazeUtils } from '../../mazeUtils';
|
|
10
|
+
import { MAZE_MOVEMENT_CONSTANTS } from '../mazeMovement.constants';
|
|
11
|
+
import {
|
|
12
|
+
getMazeMovementRunServiceState,
|
|
13
|
+
randomMazeMovementUnit,
|
|
14
|
+
readMazeMovementOutputHistory,
|
|
15
|
+
writeMazeMovementOutputHistory,
|
|
16
|
+
} from '../mazeMovement.services';
|
|
17
|
+
import type {
|
|
18
|
+
DirectionSelectionStats,
|
|
19
|
+
SimulationState,
|
|
20
|
+
} from '../mazeMovement.types';
|
|
21
|
+
import {
|
|
22
|
+
getMazeMovementDistance,
|
|
23
|
+
isMazeMovementCellOpen,
|
|
24
|
+
} from '../runtime/mazeMovement.runtime';
|
|
25
|
+
|
|
26
|
+
const C = MAZE_MOVEMENT_CONSTANTS;
|
|
27
|
+
const POLICY_SCRATCH_CENTERED = new Float64Array(4);
|
|
28
|
+
const POLICY_SCRATCH_EXPS = new Float64Array(4);
|
|
29
|
+
const POLICY_SOFTMAX = new Float64Array(4);
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Compute the adaptive epsilon used for policy exploration.
|
|
33
|
+
*
|
|
34
|
+
* @param stepNumber - Global step number inside the active simulation.
|
|
35
|
+
* @param stepsSinceImprovement - Number of steps without improvement.
|
|
36
|
+
* @param distHere - Current distance to goal for the active position.
|
|
37
|
+
* @param saturations - Rolling saturation count from the shared run state.
|
|
38
|
+
* @returns Exploration epsilon in the range `[0, 1]`.
|
|
39
|
+
*/
|
|
40
|
+
export function computeMazeMovementEpsilon(
|
|
41
|
+
stepNumber: number,
|
|
42
|
+
stepsSinceImprovement: number,
|
|
43
|
+
distHere: number,
|
|
44
|
+
saturations: number,
|
|
45
|
+
): number {
|
|
46
|
+
const isWarmup = stepNumber < C.EPSILON_WARMUP_STEPS;
|
|
47
|
+
const isHighlyStagnant =
|
|
48
|
+
stepsSinceImprovement > C.EPSILON_STAGNANT_HIGH_THRESHOLD;
|
|
49
|
+
const isModeratelyStagnant =
|
|
50
|
+
stepsSinceImprovement > C.EPSILON_STAGNANT_MED_THRESHOLD;
|
|
51
|
+
const isSaturationTriggered = saturations > C.EPSILON_SATURATION_TRIGGER;
|
|
52
|
+
|
|
53
|
+
let chosenEpsilon = 0;
|
|
54
|
+
switch (true) {
|
|
55
|
+
case isWarmup:
|
|
56
|
+
chosenEpsilon = C.EPSILON_INITIAL;
|
|
57
|
+
break;
|
|
58
|
+
case isHighlyStagnant:
|
|
59
|
+
chosenEpsilon = C.EPSILON_STAGNANT_HIGH;
|
|
60
|
+
break;
|
|
61
|
+
case isModeratelyStagnant:
|
|
62
|
+
chosenEpsilon = C.EPSILON_STAGNANT_MED;
|
|
63
|
+
break;
|
|
64
|
+
case isSaturationTriggered:
|
|
65
|
+
chosenEpsilon = C.EPSILON_SATURATIONS;
|
|
66
|
+
break;
|
|
67
|
+
default:
|
|
68
|
+
break;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
if (distHere <= C.PROXIMITY_SUPPRESS_EXPLOR_DIST) {
|
|
72
|
+
chosenEpsilon = Math.min(chosenEpsilon, C.EPSILON_MIN_NEAR_GOAL);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
return chosenEpsilon;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Convert raw network outputs into a chosen direction plus diagnostics.
|
|
80
|
+
*
|
|
81
|
+
* @param outputs - Raw action logits for the four maze directions.
|
|
82
|
+
* @returns Chosen direction plus softmax and entropy diagnostics.
|
|
83
|
+
*/
|
|
84
|
+
export function selectMazeMovementDirection(
|
|
85
|
+
outputs: number[],
|
|
86
|
+
): DirectionSelectionStats {
|
|
87
|
+
const actionCount = C.ACTION_DIM;
|
|
88
|
+
if (!Array.isArray(outputs) || outputs.length !== actionCount) {
|
|
89
|
+
return {
|
|
90
|
+
direction: C.NO_MOVE,
|
|
91
|
+
softmax: Array.from(POLICY_SOFTMAX),
|
|
92
|
+
entropy: 0,
|
|
93
|
+
maxProb: 0,
|
|
94
|
+
secondProb: 0,
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
let outputSum = 0;
|
|
99
|
+
for (let actionIndex = 0; actionIndex < actionCount; actionIndex++) {
|
|
100
|
+
outputSum += outputs[actionIndex];
|
|
101
|
+
}
|
|
102
|
+
const meanOutput = outputSum / actionCount;
|
|
103
|
+
|
|
104
|
+
let varianceAccumulator = 0;
|
|
105
|
+
for (let actionIndex = 0; actionIndex < actionCount; actionIndex++) {
|
|
106
|
+
const delta = outputs[actionIndex] - meanOutput;
|
|
107
|
+
POLICY_SCRATCH_CENTERED[actionIndex] = delta;
|
|
108
|
+
varianceAccumulator += delta * delta;
|
|
109
|
+
}
|
|
110
|
+
varianceAccumulator /= actionCount;
|
|
111
|
+
let standardDeviation = Math.sqrt(varianceAccumulator);
|
|
112
|
+
if (!Number.isFinite(standardDeviation) || standardDeviation < C.STD_MIN) {
|
|
113
|
+
standardDeviation = C.STD_MIN;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
const collapseRatio =
|
|
117
|
+
standardDeviation < C.COLLAPSE_STD_THRESHOLD
|
|
118
|
+
? C.COLLAPSE_RATIO_FULL
|
|
119
|
+
: standardDeviation < C.COLLAPSE_STD_MED
|
|
120
|
+
? C.COLLAPSE_RATIO_HALF
|
|
121
|
+
: 0;
|
|
122
|
+
const temperature = C.TEMPERATURE_BASE + C.TEMPERATURE_SCALE * collapseRatio;
|
|
123
|
+
|
|
124
|
+
let maxCentered = -Infinity;
|
|
125
|
+
for (let actionIndex = 0; actionIndex < actionCount; actionIndex++) {
|
|
126
|
+
const centeredValue = POLICY_SCRATCH_CENTERED[actionIndex];
|
|
127
|
+
if (centeredValue > maxCentered) maxCentered = centeredValue;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
let expSum = 0;
|
|
131
|
+
for (let actionIndex = 0; actionIndex < actionCount; actionIndex++) {
|
|
132
|
+
const expValue = Math.exp(
|
|
133
|
+
(POLICY_SCRATCH_CENTERED[actionIndex] - maxCentered) / temperature,
|
|
134
|
+
);
|
|
135
|
+
POLICY_SCRATCH_EXPS[actionIndex] = expValue;
|
|
136
|
+
expSum += expValue;
|
|
137
|
+
}
|
|
138
|
+
if (expSum === 0) expSum = 1;
|
|
139
|
+
|
|
140
|
+
let chosenDirection = 0;
|
|
141
|
+
let bestProbability = -Infinity;
|
|
142
|
+
let runnerUpProbability = 0;
|
|
143
|
+
for (let actionIndex = 0; actionIndex < actionCount; actionIndex++) {
|
|
144
|
+
const probability = POLICY_SCRATCH_EXPS[actionIndex] / expSum;
|
|
145
|
+
POLICY_SOFTMAX[actionIndex] = probability;
|
|
146
|
+
if (probability > bestProbability) {
|
|
147
|
+
runnerUpProbability = bestProbability;
|
|
148
|
+
bestProbability = probability;
|
|
149
|
+
chosenDirection = actionIndex;
|
|
150
|
+
} else if (probability > runnerUpProbability) {
|
|
151
|
+
runnerUpProbability = probability;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
let entropy = 0;
|
|
156
|
+
for (let actionIndex = 0; actionIndex < actionCount; actionIndex++) {
|
|
157
|
+
const probability = POLICY_SOFTMAX[actionIndex];
|
|
158
|
+
if (probability > 0) entropy += -probability * Math.log(probability);
|
|
159
|
+
}
|
|
160
|
+
entropy /= C.LOG_ACTIONS;
|
|
161
|
+
|
|
162
|
+
return {
|
|
163
|
+
direction: chosenDirection,
|
|
164
|
+
softmax: Array.from(POLICY_SOFTMAX),
|
|
165
|
+
entropy,
|
|
166
|
+
maxProb: bestProbability,
|
|
167
|
+
secondProb: runnerUpProbability,
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Activate the network, record output history, and choose the next direction.
|
|
173
|
+
*
|
|
174
|
+
* @param state - Mutable simulation state for the active run.
|
|
175
|
+
* @param network - Policy network used for the current step.
|
|
176
|
+
*/
|
|
177
|
+
export function decideMazeMovementDirection(
|
|
178
|
+
state: SimulationState,
|
|
179
|
+
network: INetwork,
|
|
180
|
+
coordinateScratch: Int32Array,
|
|
181
|
+
): void {
|
|
182
|
+
if (state.earlyTerminate) return;
|
|
183
|
+
|
|
184
|
+
try {
|
|
185
|
+
const networkOutputs = network.activate(state.vision);
|
|
186
|
+
const outputsLength = networkOutputs.length | 0;
|
|
187
|
+
const outputsHistoryCopy: number[] = new Array(outputsLength);
|
|
188
|
+
for (let copyIndex = 0; copyIndex < outputsLength; copyIndex++) {
|
|
189
|
+
outputsHistoryCopy[copyIndex] = networkOutputs[copyIndex];
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
const previousHistory = readMazeMovementOutputHistory(network);
|
|
193
|
+
const updatedHistory = MazeUtils.pushHistory(
|
|
194
|
+
previousHistory,
|
|
195
|
+
outputsHistoryCopy,
|
|
196
|
+
C.OUTPUT_HISTORY_LENGTH,
|
|
197
|
+
);
|
|
198
|
+
writeMazeMovementOutputHistory(network, updatedHistory);
|
|
199
|
+
|
|
200
|
+
const selectedActionStats = selectMazeMovementDirection(networkOutputs);
|
|
201
|
+
state.actionStats = selectedActionStats;
|
|
202
|
+
applyMazeMovementSaturationAndBiasAdjust(
|
|
203
|
+
state,
|
|
204
|
+
networkOutputs,
|
|
205
|
+
network,
|
|
206
|
+
coordinateScratch,
|
|
207
|
+
);
|
|
208
|
+
state.direction = selectedActionStats.direction;
|
|
209
|
+
} catch (error: unknown) {
|
|
210
|
+
console.error('Error activating network:', error);
|
|
211
|
+
state.direction = C.NO_MOVE;
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* Apply the short-horizon proximity-greedy override near the maze exit.
|
|
217
|
+
*
|
|
218
|
+
* @param state - Mutable simulation state for the active run.
|
|
219
|
+
* @param encodedMaze - Maze grid used for move validity checks.
|
|
220
|
+
* @param distanceMap - Optional precomputed distance map.
|
|
221
|
+
* @param coordinateScratch - Reused coordinate scratch buffer.
|
|
222
|
+
*/
|
|
223
|
+
export function applyMazeMovementProximityGreedy(
|
|
224
|
+
state: SimulationState,
|
|
225
|
+
encodedMaze: number[][],
|
|
226
|
+
distanceMap: number[][] | undefined,
|
|
227
|
+
coordinateScratch: Int32Array,
|
|
228
|
+
): void {
|
|
229
|
+
if (state.earlyTerminate || state.distHere > C.PROXIMITY_GREEDY_DISTANCE) {
|
|
230
|
+
return;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
let chosenDirection = state.direction;
|
|
234
|
+
let minimalNeighborDistance = Infinity;
|
|
235
|
+
|
|
236
|
+
for (
|
|
237
|
+
let directionIndex = 0;
|
|
238
|
+
directionIndex < C.ACTION_DIM;
|
|
239
|
+
directionIndex++
|
|
240
|
+
) {
|
|
241
|
+
const [deltaX, deltaY] = C.DIRECTION_DELTAS[directionIndex];
|
|
242
|
+
const neighbourX = (state.position[0] + deltaX) | 0;
|
|
243
|
+
const neighbourY = (state.position[1] + deltaY) | 0;
|
|
244
|
+
|
|
245
|
+
coordinateScratch[0] = neighbourX;
|
|
246
|
+
coordinateScratch[1] = neighbourY;
|
|
247
|
+
|
|
248
|
+
if (
|
|
249
|
+
!isMazeMovementCellOpen(
|
|
250
|
+
encodedMaze,
|
|
251
|
+
neighbourX,
|
|
252
|
+
neighbourY,
|
|
253
|
+
coordinateScratch,
|
|
254
|
+
)
|
|
255
|
+
) {
|
|
256
|
+
continue;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
const neighbourDistance = getMazeMovementDistance(
|
|
260
|
+
encodedMaze,
|
|
261
|
+
[neighbourX, neighbourY],
|
|
262
|
+
distanceMap,
|
|
263
|
+
);
|
|
264
|
+
if (neighbourDistance < minimalNeighborDistance) {
|
|
265
|
+
minimalNeighborDistance = neighbourDistance;
|
|
266
|
+
chosenDirection = directionIndex;
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
if (chosenDirection !== undefined && chosenDirection !== state.direction) {
|
|
271
|
+
state.direction = chosenDirection;
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
/**
|
|
276
|
+
* Apply epsilon-greedy exploration to the current action choice.
|
|
277
|
+
*
|
|
278
|
+
* @param state - Mutable simulation state for the active run.
|
|
279
|
+
* @param encodedMaze - Maze grid used for move validity checks.
|
|
280
|
+
* @param coordinateScratch - Reused coordinate scratch buffer.
|
|
281
|
+
*/
|
|
282
|
+
export function applyMazeMovementEpsilonExploration(
|
|
283
|
+
state: SimulationState,
|
|
284
|
+
encodedMaze: number[][],
|
|
285
|
+
coordinateScratch: Int32Array,
|
|
286
|
+
): void {
|
|
287
|
+
if (state.earlyTerminate) return;
|
|
288
|
+
|
|
289
|
+
const epsilon = computeMazeMovementEpsilon(
|
|
290
|
+
state.steps,
|
|
291
|
+
state.stepsSinceImprovement,
|
|
292
|
+
state.distHere,
|
|
293
|
+
getMazeMovementRunServiceState().saturations,
|
|
294
|
+
);
|
|
295
|
+
if (!(randomMazeMovementUnit() < epsilon)) return;
|
|
296
|
+
|
|
297
|
+
const actionCount = C.ACTION_DIM;
|
|
298
|
+
const previousAction = state.prevAction;
|
|
299
|
+
const currentPositionX = state.position[0] | 0;
|
|
300
|
+
const currentPositionY = state.position[1] | 0;
|
|
301
|
+
|
|
302
|
+
for (let attemptIndex = 0; attemptIndex < actionCount; attemptIndex++) {
|
|
303
|
+
const randomDirection = (randomMazeMovementUnit() * actionCount) | 0;
|
|
304
|
+
if (randomDirection === previousAction) continue;
|
|
305
|
+
|
|
306
|
+
const [directionDeltaX, directionDeltaY] =
|
|
307
|
+
C.DIRECTION_DELTAS[randomDirection];
|
|
308
|
+
const candidateX = (currentPositionX + directionDeltaX) | 0;
|
|
309
|
+
const candidateY = (currentPositionY + directionDeltaY) | 0;
|
|
310
|
+
coordinateScratch[0] = candidateX;
|
|
311
|
+
coordinateScratch[1] = candidateY;
|
|
312
|
+
|
|
313
|
+
if (
|
|
314
|
+
isMazeMovementCellOpen(
|
|
315
|
+
encodedMaze,
|
|
316
|
+
candidateX,
|
|
317
|
+
candidateY,
|
|
318
|
+
coordinateScratch,
|
|
319
|
+
)
|
|
320
|
+
) {
|
|
321
|
+
state.direction = randomDirection;
|
|
322
|
+
break;
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
/**
|
|
328
|
+
* Force a random valid move when the policy has stalled with repeated no-move outputs.
|
|
329
|
+
*
|
|
330
|
+
* @param state - Mutable simulation state for the active run.
|
|
331
|
+
* @param encodedMaze - Maze grid used for move validity checks.
|
|
332
|
+
* @param coordinateScratch - Reused coordinate scratch buffer.
|
|
333
|
+
*/
|
|
334
|
+
export function applyMazeMovementForcedExploration(
|
|
335
|
+
state: SimulationState,
|
|
336
|
+
encodedMaze: number[][],
|
|
337
|
+
coordinateScratch: Int32Array,
|
|
338
|
+
): void {
|
|
339
|
+
if (state.earlyTerminate) return;
|
|
340
|
+
|
|
341
|
+
const runServices = getMazeMovementRunServiceState();
|
|
342
|
+
if (state.direction === C.NO_MOVE) {
|
|
343
|
+
runServices.noMoveStreak++;
|
|
344
|
+
} else {
|
|
345
|
+
runServices.noMoveStreak = 0;
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
if (runServices.noMoveStreak < C.NO_MOVE_STREAK_THRESHOLD) return;
|
|
349
|
+
|
|
350
|
+
const actionCount = C.ACTION_DIM;
|
|
351
|
+
const currentPositionX = state.position[0] | 0;
|
|
352
|
+
const currentPositionY = state.position[1] | 0;
|
|
353
|
+
|
|
354
|
+
for (let attemptIndex = 0; attemptIndex < actionCount; attemptIndex++) {
|
|
355
|
+
const candidateDirection = (randomMazeMovementUnit() * actionCount) | 0;
|
|
356
|
+
const [deltaX, deltaY] = C.DIRECTION_DELTAS[candidateDirection];
|
|
357
|
+
const candidateX = (currentPositionX + deltaX) | 0;
|
|
358
|
+
const candidateY = (currentPositionY + deltaY) | 0;
|
|
359
|
+
coordinateScratch[0] = candidateX;
|
|
360
|
+
coordinateScratch[1] = candidateY;
|
|
361
|
+
|
|
362
|
+
if (
|
|
363
|
+
isMazeMovementCellOpen(
|
|
364
|
+
encodedMaze,
|
|
365
|
+
candidateX,
|
|
366
|
+
candidateY,
|
|
367
|
+
coordinateScratch,
|
|
368
|
+
)
|
|
369
|
+
) {
|
|
370
|
+
state.direction = candidateDirection;
|
|
371
|
+
break;
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
runServices.noMoveStreak = 0;
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
/**
|
|
379
|
+
* Detect saturation and optionally damp output-node biases.
|
|
380
|
+
*
|
|
381
|
+
* @param state - Mutable simulation state for the active run.
|
|
382
|
+
* @param outputs - Raw network logits for the current step.
|
|
383
|
+
* @param network - Policy network that produced the logits.
|
|
384
|
+
* @param coordinateScratch - Reused scratch buffer for temporary penalties.
|
|
385
|
+
*/
|
|
386
|
+
export function applyMazeMovementSaturationAndBiasAdjust(
|
|
387
|
+
state: SimulationState,
|
|
388
|
+
outputs: number[],
|
|
389
|
+
network: INetwork,
|
|
390
|
+
coordinateScratch: Int32Array,
|
|
391
|
+
): void {
|
|
392
|
+
const rewardScale = C.REWARD_SCALE;
|
|
393
|
+
const actionStats = state.actionStats;
|
|
394
|
+
if (!actionStats) return;
|
|
395
|
+
|
|
396
|
+
const maxProbability = actionStats.maxProb ?? 0;
|
|
397
|
+
const secondProbability = actionStats.secondProb ?? 0;
|
|
398
|
+
const isOverConfident =
|
|
399
|
+
maxProbability > C.OVERCONFIDENT_PROB &&
|
|
400
|
+
secondProbability < C.SECOND_PROB_LOW;
|
|
401
|
+
|
|
402
|
+
const actionCount = C.ACTION_DIM;
|
|
403
|
+
let logitsSum = 0;
|
|
404
|
+
for (let outputIndex = 0; outputIndex < outputs.length; outputIndex++) {
|
|
405
|
+
logitsSum += outputs[outputIndex];
|
|
406
|
+
}
|
|
407
|
+
const meanLogit = logitsSum / actionCount;
|
|
408
|
+
|
|
409
|
+
let varianceAccumulator = 0;
|
|
410
|
+
for (let outputIndex = 0; outputIndex < outputs.length; outputIndex++) {
|
|
411
|
+
const delta = outputs[outputIndex] - meanLogit;
|
|
412
|
+
varianceAccumulator += delta * delta;
|
|
413
|
+
}
|
|
414
|
+
const variance = varianceAccumulator / actionCount;
|
|
415
|
+
const standardDeviation = Math.sqrt(variance);
|
|
416
|
+
const isFlatCollapsed = standardDeviation < C.LOGSTD_FLAT_THRESHOLD;
|
|
417
|
+
|
|
418
|
+
const runServices = getMazeMovementRunServiceState();
|
|
419
|
+
let saturationCounter = runServices.saturations;
|
|
420
|
+
if (isOverConfident || isFlatCollapsed) {
|
|
421
|
+
saturationCounter++;
|
|
422
|
+
state.saturatedSteps++;
|
|
423
|
+
} else if (saturationCounter > 0) {
|
|
424
|
+
saturationCounter--;
|
|
425
|
+
}
|
|
426
|
+
runServices.saturations = saturationCounter;
|
|
427
|
+
|
|
428
|
+
if (isOverConfident) {
|
|
429
|
+
coordinateScratch[0] = -C.OVERCONFIDENT_PENALTY * rewardScale;
|
|
430
|
+
state.invalidMovePenalty += coordinateScratch[0];
|
|
431
|
+
}
|
|
432
|
+
if (isFlatCollapsed) {
|
|
433
|
+
coordinateScratch[0] = -C.FLAT_COLLAPSE_PENALTY * rewardScale;
|
|
434
|
+
state.invalidMovePenalty += coordinateScratch[0];
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
const shouldAdjustBiases =
|
|
438
|
+
runServices.saturations > C.SATURATION_ADJUST_MIN &&
|
|
439
|
+
state.steps % C.SATURATION_ADJUST_INTERVAL === 0;
|
|
440
|
+
if (!shouldAdjustBiases) return;
|
|
441
|
+
|
|
442
|
+
try {
|
|
443
|
+
const outputNodes = network.nodes?.filter(
|
|
444
|
+
(node: INodeStruct): node is INodeStruct & { bias: number } =>
|
|
445
|
+
node.type === C.NODE_TYPE_OUTPUT && typeof node.bias === 'number',
|
|
446
|
+
);
|
|
447
|
+
if (!outputNodes || outputNodes.length === 0) return;
|
|
448
|
+
|
|
449
|
+
let biasSum = 0;
|
|
450
|
+
for (let outputIndex = 0; outputIndex < outputNodes.length; outputIndex++) {
|
|
451
|
+
biasSum += outputNodes[outputIndex].bias;
|
|
452
|
+
}
|
|
453
|
+
const meanBias = biasSum / outputNodes.length;
|
|
454
|
+
for (let outputIndex = 0; outputIndex < outputNodes.length; outputIndex++) {
|
|
455
|
+
const outputNode = outputNodes[outputIndex];
|
|
456
|
+
const adjustedBias = outputNode.bias - meanBias * C.BIAS_ADJUST_FACTOR;
|
|
457
|
+
outputNode.bias = Math.max(
|
|
458
|
+
-C.BIAS_CLAMP,
|
|
459
|
+
Math.min(C.BIAS_CLAMP, adjustedBias),
|
|
460
|
+
);
|
|
461
|
+
}
|
|
462
|
+
} catch {
|
|
463
|
+
// Best-effort only; network shapes vary across tests and demos.
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
export {};
|