@reicek/neataptic-ts 0.1.21 → 0.1.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/agents/boundary-mapper.agent.md +29 -0
- package/.github/agents/docs-scout.agent.md +29 -0
- package/.github/agents/plan-scout.agent.md +29 -0
- package/.github/agents/solid-split.agent.md +138 -0
- package/.github/copilot-instructions.md +103 -0
- package/package.json +6 -3
- package/plans/ES2023 migration +13 -8
- package/plans/Evolution_Training_Interoperability_Contracts.md +1 -1
- package/plans/Interactive_Examples_and_Learning_Path.md +10 -2
- package/plans/Memory_Optimization.md +3 -3
- package/plans/README.md +63 -0
- package/plans/Roadmap.md +15 -3
- package/plans/asciiMaze_SOLID_split.done.md +130 -0
- package/plans/flappy_bird_SOLID_split.done.md +67 -0
- package/scripts/assets/theme.css +221 -34
- package/scripts/copy-examples.mjs +9 -5
- package/scripts/export-onnx.mjs +3 -3
- package/scripts/generate-bench-tables.mjs +10 -10
- package/scripts/generate-bench-tables.ts +10 -10
- package/scripts/generate-docs.ts +1415 -449
- package/scripts/render-docs-html.ts +15 -8
- package/src/README.md +101 -223
- package/src/architecture/README.md +57 -185
- package/src/architecture/layer/README.md +38 -38
- package/src/architecture/network/README.md +33 -31
- package/src/architecture/network/activate/README.md +77 -77
- package/src/architecture/network/connect/README.md +15 -13
- package/src/architecture/network/deterministic/README.md +7 -7
- package/src/architecture/network/evolve/README.md +44 -44
- package/src/architecture/network/gating/README.md +20 -20
- package/src/architecture/network/genetic/README.md +51 -51
- package/src/architecture/network/mutate/README.md +97 -97
- package/src/architecture/network/onnx/README.md +264 -264
- package/src/architecture/network/prune/README.md +39 -39
- package/src/architecture/network/remove/README.md +26 -26
- package/src/architecture/network/serialize/README.md +56 -56
- package/src/architecture/network/slab/README.md +61 -61
- package/src/architecture/network/standalone/README.md +24 -24
- package/src/architecture/network/stats/README.md +9 -9
- package/src/architecture/network/topology/README.md +46 -46
- package/src/architecture/network/training/README.md +21 -21
- package/src/methods/README.md +9 -87
- package/src/multithreading/README.md +8 -77
- package/src/multithreading/workers/README.md +2 -2
- package/src/multithreading/workers/browser/README.md +0 -6
- package/src/multithreading/workers/node/README.md +0 -3
- package/src/neat/README.md +562 -568
- package/src/utils/README.md +18 -18
- package/test/examples/asciiMaze/README.md +59 -59
- package/test/examples/asciiMaze/asciiMaze.e2e.test.ts +14 -9
- package/test/examples/asciiMaze/browser-entry/README.md +196 -0
- package/test/examples/asciiMaze/browser-entry/browser-entry.abort.services.ts +95 -0
- package/test/examples/asciiMaze/browser-entry/browser-entry.constants.ts +23 -0
- package/test/examples/asciiMaze/browser-entry/browser-entry.curriculum.services.ts +115 -0
- package/test/examples/asciiMaze/browser-entry/browser-entry.globals.services.ts +106 -0
- package/test/examples/asciiMaze/browser-entry/browser-entry.host.services.ts +157 -0
- package/test/examples/asciiMaze/browser-entry/browser-entry.services.ts +14 -0
- package/test/examples/asciiMaze/browser-entry/browser-entry.ts +129 -0
- package/test/examples/asciiMaze/browser-entry/browser-entry.types.ts +120 -0
- package/test/examples/asciiMaze/browser-entry/browser-entry.utils.ts +98 -0
- package/test/examples/asciiMaze/browser-entry.ts +10 -576
- package/test/examples/asciiMaze/dashboardManager/README.md +276 -0
- package/test/examples/asciiMaze/dashboardManager/archive/README.md +16 -0
- package/test/examples/asciiMaze/dashboardManager/archive/dashboardManager.archive.services.ts +267 -0
- package/test/examples/asciiMaze/dashboardManager/dashboardManager.constants.ts +35 -0
- package/test/examples/asciiMaze/dashboardManager/dashboardManager.services.ts +103 -0
- package/test/examples/asciiMaze/dashboardManager/dashboardManager.ts +181 -0
- package/test/examples/asciiMaze/dashboardManager/dashboardManager.types.ts +267 -0
- package/test/examples/asciiMaze/dashboardManager/dashboardManager.utils.ts +254 -0
- package/test/examples/asciiMaze/dashboardManager/live/README.md +14 -0
- package/test/examples/asciiMaze/dashboardManager/live/dashboardManager.live.services.ts +264 -0
- package/test/examples/asciiMaze/dashboardManager/telemetry/README.md +47 -0
- package/test/examples/asciiMaze/dashboardManager/telemetry/dashboardManager.telemetry.services.ts +513 -0
- package/test/examples/asciiMaze/dashboardManager.ts +13 -2335
- package/test/examples/asciiMaze/evolutionEngine/README.md +1058 -0
- package/test/examples/asciiMaze/evolutionEngine/curriculumPhase.ts +90 -0
- package/test/examples/asciiMaze/evolutionEngine/engineState.constants.ts +36 -0
- package/test/examples/asciiMaze/evolutionEngine/engineState.ts +58 -513
- package/test/examples/asciiMaze/evolutionEngine/engineState.types.ts +212 -0
- package/test/examples/asciiMaze/evolutionEngine/engineState.utils.ts +301 -0
- package/test/examples/asciiMaze/evolutionEngine/evolutionEngine.types.ts +445 -0
- package/test/examples/asciiMaze/evolutionEngine/evolutionLoop.ts +81 -50
- package/test/examples/asciiMaze/evolutionEngine/optionsAndSetup.ts +2 -4
- package/test/examples/asciiMaze/evolutionEngine/populationDynamics.ts +17 -33
- package/test/examples/asciiMaze/evolutionEngine/populationPruning.ts +1 -1
- package/test/examples/asciiMaze/evolutionEngine/rngAndTiming.ts +1 -2
- package/test/examples/asciiMaze/evolutionEngine/sampling.ts +1 -1
- package/test/examples/asciiMaze/evolutionEngine/scratchPools.ts +2 -5
- package/test/examples/asciiMaze/evolutionEngine/setupHelpers.ts +30 -37
- package/test/examples/asciiMaze/evolutionEngine/telemetryMetrics.ts +16 -58
- package/test/examples/asciiMaze/evolutionEngine/trainingWarmStart.ts +2 -2
- package/test/examples/asciiMaze/evolutionEngine.ts +55 -55
- package/test/examples/asciiMaze/fitness.ts +2 -2
- package/test/examples/asciiMaze/fitness.types.ts +65 -0
- package/test/examples/asciiMaze/interfaces.ts +64 -1352
- package/test/examples/asciiMaze/mazeMovement/README.md +356 -0
- package/test/examples/asciiMaze/mazeMovement/finalization/README.md +49 -0
- package/test/examples/asciiMaze/mazeMovement/finalization/mazeMovement.finalization.ts +138 -0
- package/test/examples/asciiMaze/mazeMovement/mazeMovement.constants.ts +101 -0
- package/test/examples/asciiMaze/mazeMovement/mazeMovement.services.ts +230 -0
- package/test/examples/asciiMaze/mazeMovement/mazeMovement.ts +299 -0
- package/test/examples/asciiMaze/mazeMovement/mazeMovement.types.ts +185 -0
- package/test/examples/asciiMaze/mazeMovement/mazeMovement.utils.ts +153 -0
- package/test/examples/asciiMaze/mazeMovement/policy/README.md +91 -0
- package/test/examples/asciiMaze/mazeMovement/policy/mazeMovement.policy.ts +467 -0
- package/test/examples/asciiMaze/mazeMovement/runtime/README.md +95 -0
- package/test/examples/asciiMaze/mazeMovement/runtime/mazeMovement.runtime.ts +354 -0
- package/test/examples/asciiMaze/mazeMovement/shaping/README.md +124 -0
- package/test/examples/asciiMaze/mazeMovement/shaping/mazeMovement.shaping.ts +459 -0
- package/test/examples/asciiMaze/mazeMovement.ts +12 -2978
- package/test/examples/flappy_bird/Trace-20260309T191949.json +24124 -0
- package/test/examples/flappy_bird/browser-entry/README.md +1129 -0
- package/test/examples/flappy_bird/browser-entry/browser-entry.host.utils.ts +4 -324
- package/test/examples/flappy_bird/browser-entry/browser-entry.network-view.utils.ts +6 -399
- package/test/examples/flappy_bird/browser-entry/browser-entry.playback.utils.ts +1 -717
- package/test/examples/flappy_bird/browser-entry/browser-entry.spawn.utils.ts +11 -31
- package/test/examples/flappy_bird/browser-entry/browser-entry.visualization.utils.ts +15 -893
- package/test/examples/flappy_bird/browser-entry/host/README.md +307 -0
- package/test/examples/flappy_bird/browser-entry/host/host.resize.service.ts +1 -295
- package/test/examples/flappy_bird/browser-entry/host/host.ts +562 -6
- package/test/examples/flappy_bird/browser-entry/host/resize/README.md +274 -0
- package/test/examples/flappy_bird/browser-entry/host/resize/host.resize.service.constants.ts +31 -0
- package/test/examples/flappy_bird/browser-entry/host/resize/host.resize.service.services.ts +360 -0
- package/test/examples/flappy_bird/browser-entry/host/resize/host.resize.service.ts +117 -0
- package/test/examples/flappy_bird/browser-entry/host/resize/host.resize.service.types.ts +63 -0
- package/test/examples/flappy_bird/browser-entry/host/resize/host.resize.service.utils.ts +250 -0
- package/test/examples/flappy_bird/browser-entry/network-view/README.md +399 -0
- package/test/examples/flappy_bird/browser-entry/network-view/network-view.topology.utils.ts +255 -0
- package/test/examples/flappy_bird/browser-entry/network-view/network-view.ts +802 -7
- package/test/examples/flappy_bird/browser-entry/playback/README.md +684 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/README.md +277 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/README.md +770 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/playback.background.ground-grid.cache.services.ts +178 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/playback.background.ground-grid.constants.ts +107 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/playback.background.ground-grid.geometry.utils.ts +518 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/playback.background.ground-grid.math.utils.ts +117 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/playback.background.ground-grid.pulse.utils.ts +233 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/playback.background.ground-grid.services.ts +211 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/playback.background.ground-grid.ts +48 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/playback.background.ground-grid.types.ts +212 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/ground-grid/playback.background.ground-grid.utils.ts +81 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/playback.background.cache.services.ts +96 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/playback.background.constants.ts +62 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/playback.background.services.ts +244 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/playback.background.ts +53 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/playback.background.types.ts +68 -0
- package/test/examples/flappy_bird/browser-entry/playback/background/playback.background.utils.ts +100 -0
- package/test/examples/flappy_bird/browser-entry/playback/frame-render/README.md +310 -0
- package/test/examples/flappy_bird/browser-entry/playback/frame-render/playback.frame-render.service.ts +92 -0
- package/test/examples/flappy_bird/browser-entry/playback/frame-render/playback.frame-render.services.ts +272 -0
- package/test/examples/flappy_bird/browser-entry/playback/frame-render/playback.frame-render.types.ts +39 -0
- package/test/examples/flappy_bird/browser-entry/playback/frame-render/playback.frame-render.utils.ts +493 -0
- package/test/examples/flappy_bird/browser-entry/playback/playback.constants.ts +1 -1
- package/test/examples/flappy_bird/browser-entry/playback/playback.frame-render.service.ts +4 -0
- package/test/examples/flappy_bird/browser-entry/playback/playback.snapshot.utils.ts +44 -0
- package/test/examples/flappy_bird/browser-entry/playback/playback.starfield.service.ts +39 -122
- package/test/examples/flappy_bird/browser-entry/playback/playback.starfield.services.ts +272 -0
- package/test/examples/flappy_bird/browser-entry/playback/playback.starfield.types.ts +62 -0
- package/test/examples/flappy_bird/browser-entry/playback/playback.starfield.utils.ts +11 -4
- package/test/examples/flappy_bird/browser-entry/playback/playback.ts +409 -8
- package/test/examples/flappy_bird/browser-entry/playback/playback.types.ts +4 -12
- package/test/examples/flappy_bird/browser-entry/runtime/README.md +235 -0
- package/test/examples/flappy_bird/browser-entry/runtime/runtime.evolution-launch.service.ts +45 -0
- package/test/examples/flappy_bird/browser-entry/runtime/runtime.lifecycle.service.ts +81 -0
- package/test/examples/flappy_bird/browser-entry/runtime/runtime.startup.service.ts +74 -0
- package/test/examples/flappy_bird/browser-entry/runtime/runtime.ts +31 -121
- package/test/examples/flappy_bird/browser-entry/runtime/runtime.types.ts +36 -0
- package/test/examples/flappy_bird/browser-entry/visualization/README.md +557 -0
- package/test/examples/flappy_bird/browser-entry/visualization/visualization.constants.ts +110 -0
- package/test/examples/flappy_bird/browser-entry/visualization/visualization.draw.service.ts +957 -19
- package/test/examples/flappy_bird/browser-entry/visualization/visualization.legend.utils.ts +138 -3
- package/test/examples/flappy_bird/browser-entry/visualization/visualization.topology.utils.ts +3 -27
- package/test/examples/flappy_bird/browser-entry/visualization/visualization.ts +1 -23
- package/test/examples/flappy_bird/browser-entry/worker-channel/README.md +156 -0
- package/test/examples/flappy_bird/constants/README.md +1179 -0
- package/test/examples/flappy_bird/constants/constants.network-view.ts +24 -0
- package/test/examples/flappy_bird/constants/constants.palette.ts +7 -0
- package/test/examples/flappy_bird/constants/constants.starfield.ts +78 -3
- package/test/examples/flappy_bird/environment/README.md +143 -0
- package/test/examples/flappy_bird/environment/environment.observation.utils.ts +1 -19
- package/test/examples/flappy_bird/environment/environment.step.service.ts +3 -66
- package/test/examples/flappy_bird/evaluation/README.md +130 -0
- package/test/examples/flappy_bird/evaluation/evaluation.fitness.utils.ts +1 -1
- package/test/examples/flappy_bird/evaluation/evaluation.rollout.service.ts +5 -375
- package/test/examples/flappy_bird/evaluation/rollout/README.md +291 -0
- package/test/examples/flappy_bird/evaluation/rollout/evaluation.rollout.constants.ts +30 -0
- package/test/examples/flappy_bird/evaluation/rollout/evaluation.rollout.service.ts +58 -0
- package/test/examples/flappy_bird/evaluation/rollout/evaluation.rollout.services.ts +310 -0
- package/test/examples/flappy_bird/evaluation/rollout/evaluation.rollout.types.ts +56 -0
- package/test/examples/flappy_bird/evaluation/rollout/evaluation.rollout.utils.ts +368 -0
- package/test/examples/flappy_bird/flappy-evolution-worker/README.md +618 -0
- package/test/examples/flappy_bird/flappy-evolution-worker/flappy-evolution-worker.playback.service.ts +7 -7
- package/test/examples/flappy_bird/flappy-evolution-worker/flappy-evolution-worker.simulation.frame.service.ts +364 -0
- package/test/examples/flappy_bird/flappy-evolution-worker/flappy-evolution-worker.simulation.types.ts +14 -0
- package/test/examples/flappy_bird/flappy-evolution-worker/flappy-evolution-worker.simulation.utils.ts +4 -201
- package/test/examples/flappy_bird/flappy-evolution-worker/flappy-evolution-worker.ts +184 -345
- package/test/examples/flappy_bird/flappy-evolution-worker/flappy-evolution-worker.warm-start.service.ts +291 -0
- package/test/examples/flappy_bird/flappy.simulation.shared.utils.ts +5 -0
- package/test/examples/flappy_bird/simulation-shared/README.md +417 -0
- package/test/examples/flappy_bird/simulation-shared/observation/README.md +183 -0
- package/test/examples/flappy_bird/simulation-shared/observation/observation.features.utils.ts +301 -0
- package/test/examples/flappy_bird/simulation-shared/observation/observation.ts +9 -0
- package/test/examples/flappy_bird/simulation-shared/observation/observation.vector.utils.ts +59 -0
- package/test/examples/flappy_bird/simulation-shared/simulation-shared.observation.utils.ts +5 -403
- package/test/examples/flappy_bird/simulation-shared/simulation-shared.spawn.utils.ts +20 -6
- package/test/examples/flappy_bird/{evaluation/evaluation.statistics.utils.ts → simulation-shared/simulation-shared.statistics.utils.ts} +23 -8
- package/test/examples/flappy_bird/trainer/README.md +563 -0
- package/test/examples/flappy_bird/trainer/evaluation/README.md +199 -0
- package/test/examples/flappy_bird/trainer/evaluation/trainer.evaluation.service.constants.ts +9 -0
- package/test/examples/flappy_bird/trainer/evaluation/trainer.evaluation.service.services.ts +73 -0
- package/test/examples/flappy_bird/trainer/evaluation/trainer.evaluation.service.ts +165 -0
- package/test/examples/flappy_bird/trainer/evaluation/trainer.evaluation.service.types.ts +25 -0
- package/test/examples/flappy_bird/trainer/evaluation/trainer.evaluation.service.utils.ts +161 -0
- package/test/examples/flappy_bird/trainer/trainer.evaluation.service.ts +13 -0
- package/test/examples/flappy_bird/trainer/trainer.report.service.services.ts +181 -0
- package/test/examples/flappy_bird/trainer/trainer.report.service.ts +126 -0
- package/test/examples/flappy_bird/trainer/trainer.selection.utils.ts +89 -0
- package/test/examples/flappy_bird/trainer/trainer.ts +11 -553
- package/test/examples/flappy_bird/browser-entry/browser-entry.utils.ts +0 -12
- package/test/examples/flappy_bird/environment/environment.ts +0 -7
- package/test/examples/flappy_bird/evaluation/evaluation.ts +0 -7
- package/test/examples/flappy_bird/simulation-shared/simulation-shared.ts +0 -15
- package/test/examples/flappy_bird/trainer/trainer.statistics.utils.ts +0 -78
|
@@ -0,0 +1,368 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Rollout shaping and result helpers.
|
|
3
|
+
*
|
|
4
|
+
* This file will host rollout-local fitness composition, shaping utilities,
|
|
5
|
+
* and terminal result assembly helpers.
|
|
6
|
+
*/
|
|
7
|
+
import {
|
|
8
|
+
FLAPPY_FITNESS_ALIGNMENT_WEIGHT_PER_FRAME,
|
|
9
|
+
FLAPPY_FITNESS_APPROACH_PROGRESS_WEIGHT,
|
|
10
|
+
FLAPPY_FITNESS_BONUS_PER_PIPE,
|
|
11
|
+
FLAPPY_FITNESS_CENTERING_PROGRESS_WEIGHT,
|
|
12
|
+
FLAPPY_FITNESS_CLEARANCE_WEIGHT_PER_FRAME,
|
|
13
|
+
FLAPPY_FITNESS_SECOND_GAP_ALIGNMENT_WEIGHT_PER_FRAME,
|
|
14
|
+
FLAPPY_FITNESS_STABLE_VELOCITY_WEIGHT_PER_FRAME,
|
|
15
|
+
FLAPPY_FITNESS_SURVIVAL_WEIGHT,
|
|
16
|
+
FLAPPY_FITNESS_TERMINAL_ALIGNMENT_BONUS_WEIGHT,
|
|
17
|
+
FLAPPY_FITNESS_TERMINAL_PROGRESS_BONUS_WEIGHT,
|
|
18
|
+
} from '../../constants/constants';
|
|
19
|
+
import {
|
|
20
|
+
FLAPPY_EVALUATION_DEFAULT_PIPE_PROGRESS_TARGET,
|
|
21
|
+
FLAPPY_EVALUATION_DENSE_SHAPING_FRAMES_NORMALIZER,
|
|
22
|
+
FLAPPY_EVALUATION_NORMALIZED_DENSE_WEIGHT,
|
|
23
|
+
FLAPPY_EVALUATION_NORMALIZED_PROGRESS_WEIGHT,
|
|
24
|
+
FLAPPY_EVALUATION_NORMALIZED_SURVIVAL_WEIGHT,
|
|
25
|
+
FLAPPY_EVALUATION_NORMALIZED_TERMINAL_WEIGHT,
|
|
26
|
+
FLAPPY_EVALUATION_UNRECOVERABLE_ABOVE_GAP_DELTA,
|
|
27
|
+
FLAPPY_EVALUATION_UNRECOVERABLE_BELOW_GAP_DELTA,
|
|
28
|
+
FLAPPY_EVALUATION_UNRECOVERABLE_CLEARANCE_THRESHOLD,
|
|
29
|
+
FLAPPY_EVALUATION_UNRECOVERABLE_FALLING_VELOCITY,
|
|
30
|
+
FLAPPY_EVALUATION_UNRECOVERABLE_RISING_VELOCITY,
|
|
31
|
+
} from '../evaluation.constants';
|
|
32
|
+
import {
|
|
33
|
+
getFlappyObservationFeatures,
|
|
34
|
+
type FlappyGameState,
|
|
35
|
+
type FlappyObservationFeatures,
|
|
36
|
+
} from '../../flappyEnvironment.ts';
|
|
37
|
+
import { clampValue } from '../../flappy.simulation.shared.utils';
|
|
38
|
+
import {
|
|
39
|
+
FLAPPY_ROLLOUT_MIN_MAX_FRAMES,
|
|
40
|
+
FLAPPY_ROLLOUT_ZERO_FITNESS,
|
|
41
|
+
} from './evaluation.rollout.constants';
|
|
42
|
+
import type {
|
|
43
|
+
DenseShapingRewardComponents,
|
|
44
|
+
RolloutEpisodeContext,
|
|
45
|
+
RolloutEpisodeRuntimeState,
|
|
46
|
+
RolloutFitnessBreakdown,
|
|
47
|
+
} from './evaluation.rollout.types';
|
|
48
|
+
import type { FlappyEpisodeResult } from '../evaluation.types';
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Composes the final rollout result from the terminal game state.
|
|
52
|
+
*
|
|
53
|
+
* @param rolloutEpisodeContext - Normalized rollout configuration.
|
|
54
|
+
* @param rolloutEpisodeRuntimeState - Mutable runtime state.
|
|
55
|
+
* @returns Episode result details.
|
|
56
|
+
*/
|
|
57
|
+
export function composeRolloutEpisodeResult(
|
|
58
|
+
rolloutEpisodeContext: RolloutEpisodeContext,
|
|
59
|
+
rolloutEpisodeRuntimeState: RolloutEpisodeRuntimeState,
|
|
60
|
+
): FlappyEpisodeResult {
|
|
61
|
+
// Step 1: Resolve fitness-channel breakdown values from the final game state.
|
|
62
|
+
const framesSurvived = rolloutEpisodeRuntimeState.state.frameIndex;
|
|
63
|
+
const pipesPassed = rolloutEpisodeRuntimeState.state.pipesPassed;
|
|
64
|
+
const rolloutFitnessBreakdown = resolveRolloutFitnessBreakdown(
|
|
65
|
+
rolloutEpisodeContext,
|
|
66
|
+
rolloutEpisodeRuntimeState,
|
|
67
|
+
framesSurvived,
|
|
68
|
+
pipesPassed,
|
|
69
|
+
);
|
|
70
|
+
|
|
71
|
+
// Step 2: Compose normalized or raw fitness from the resolved breakdown.
|
|
72
|
+
const fitness = rolloutEpisodeContext.normalizeFitness
|
|
73
|
+
? composeNormalizedFitness(
|
|
74
|
+
framesSurvived,
|
|
75
|
+
pipesPassed,
|
|
76
|
+
rolloutFitnessBreakdown.denseShapingFitness,
|
|
77
|
+
rolloutFitnessBreakdown.terminalShapingFitness,
|
|
78
|
+
rolloutEpisodeContext.maxFramesPerEpisode,
|
|
79
|
+
rolloutEpisodeContext.pipeProgressTarget,
|
|
80
|
+
)
|
|
81
|
+
: resolveUnnormalizedRolloutFitness(rolloutFitnessBreakdown);
|
|
82
|
+
|
|
83
|
+
// Step 3: Return the public episode result payload.
|
|
84
|
+
return {
|
|
85
|
+
framesSurvived,
|
|
86
|
+
pipesPassed,
|
|
87
|
+
done: rolloutEpisodeRuntimeState.state.done,
|
|
88
|
+
doneReason: rolloutEpisodeRuntimeState.state.doneReason,
|
|
89
|
+
fitness,
|
|
90
|
+
fitnessBreakdown: {
|
|
91
|
+
survival: rolloutFitnessBreakdown.survivalFitness,
|
|
92
|
+
pipeProgress: rolloutFitnessBreakdown.pipePassFitness,
|
|
93
|
+
denseShaping: rolloutFitnessBreakdown.denseShapingFitness,
|
|
94
|
+
terminalShaping: rolloutFitnessBreakdown.terminalShapingFitness,
|
|
95
|
+
},
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Computes dense reward shaping from consecutive observations.
|
|
101
|
+
*
|
|
102
|
+
* @param previousFeatures - Observation before stepping the environment.
|
|
103
|
+
* @param currentFeatures - Observation after stepping the environment.
|
|
104
|
+
* @returns Per-step shaped reward.
|
|
105
|
+
*/
|
|
106
|
+
export function computeDenseShapingReward(
|
|
107
|
+
previousFeatures: FlappyObservationFeatures,
|
|
108
|
+
currentFeatures: FlappyObservationFeatures,
|
|
109
|
+
): number {
|
|
110
|
+
// Step 1: Resolve each shaping component from the consecutive observations.
|
|
111
|
+
const denseShapingRewardComponents = resolveDenseShapingRewardComponents(
|
|
112
|
+
previousFeatures,
|
|
113
|
+
currentFeatures,
|
|
114
|
+
);
|
|
115
|
+
|
|
116
|
+
// Step 2: Sum the shaping channels into one per-frame reward.
|
|
117
|
+
return (
|
|
118
|
+
denseShapingRewardComponents.nextGapAlignmentReward +
|
|
119
|
+
denseShapingRewardComponents.approachProgressReward +
|
|
120
|
+
denseShapingRewardComponents.centeringProgressReward +
|
|
121
|
+
denseShapingRewardComponents.clearanceReward +
|
|
122
|
+
denseShapingRewardComponents.secondGapAlignmentReward +
|
|
123
|
+
denseShapingRewardComponents.velocityStabilityReward
|
|
124
|
+
);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Detects trajectories that are usually irrecoverable in early warmup.
|
|
129
|
+
*
|
|
130
|
+
* @param observationFeatures - Post-step observation features.
|
|
131
|
+
* @returns Whether the current trajectory appears unrecoverable.
|
|
132
|
+
*/
|
|
133
|
+
export function isBirdLikelyUnrecoverable(
|
|
134
|
+
observationFeatures: FlappyObservationFeatures,
|
|
135
|
+
): boolean {
|
|
136
|
+
const farOutsideGap =
|
|
137
|
+
observationFeatures.normalizedNextGapClearance <
|
|
138
|
+
FLAPPY_EVALUATION_UNRECOVERABLE_CLEARANCE_THRESHOLD;
|
|
139
|
+
const belowGapAndStillFalling =
|
|
140
|
+
observationFeatures.normalizedDeltaToNextGap >
|
|
141
|
+
FLAPPY_EVALUATION_UNRECOVERABLE_BELOW_GAP_DELTA &&
|
|
142
|
+
observationFeatures.normalizedVelocity >
|
|
143
|
+
FLAPPY_EVALUATION_UNRECOVERABLE_FALLING_VELOCITY;
|
|
144
|
+
const aboveGapAndStillRising =
|
|
145
|
+
observationFeatures.normalizedDeltaToNextGap <
|
|
146
|
+
FLAPPY_EVALUATION_UNRECOVERABLE_ABOVE_GAP_DELTA &&
|
|
147
|
+
observationFeatures.normalizedVelocity <
|
|
148
|
+
FLAPPY_EVALUATION_UNRECOVERABLE_RISING_VELOCITY;
|
|
149
|
+
|
|
150
|
+
return farOutsideGap && (belowGapAndStillFalling || aboveGapAndStillRising);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* Resolves the raw fitness channels from the final episode state.
|
|
155
|
+
*
|
|
156
|
+
* @param rolloutEpisodeContext - Normalized rollout configuration.
|
|
157
|
+
* @param rolloutEpisodeRuntimeState - Mutable runtime state.
|
|
158
|
+
* @param framesSurvived - Final frame count.
|
|
159
|
+
* @param pipesPassed - Final pipe-pass count.
|
|
160
|
+
* @returns Fitness-channel breakdown.
|
|
161
|
+
*/
|
|
162
|
+
function resolveRolloutFitnessBreakdown(
|
|
163
|
+
rolloutEpisodeContext: RolloutEpisodeContext,
|
|
164
|
+
rolloutEpisodeRuntimeState: RolloutEpisodeRuntimeState,
|
|
165
|
+
framesSurvived: number,
|
|
166
|
+
pipesPassed: number,
|
|
167
|
+
): RolloutFitnessBreakdown {
|
|
168
|
+
// Step 1: Resolve the direct survival and progress channels.
|
|
169
|
+
const survivalFitness = framesSurvived * FLAPPY_FITNESS_SURVIVAL_WEIGHT;
|
|
170
|
+
const pipePassFitness = pipesPassed * FLAPPY_FITNESS_BONUS_PER_PIPE;
|
|
171
|
+
|
|
172
|
+
// Step 2: Resolve the terminal shaping bonus from the final observation.
|
|
173
|
+
const terminalShapingFitness = computeTerminalShapingFitness(
|
|
174
|
+
rolloutEpisodeRuntimeState.state,
|
|
175
|
+
rolloutEpisodeContext.difficultyScale,
|
|
176
|
+
);
|
|
177
|
+
|
|
178
|
+
return {
|
|
179
|
+
survivalFitness,
|
|
180
|
+
pipePassFitness,
|
|
181
|
+
denseShapingFitness: rolloutEpisodeRuntimeState.denseShapingFitness,
|
|
182
|
+
terminalShapingFitness,
|
|
183
|
+
};
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Resolves raw fitness by summing every fitness channel.
|
|
188
|
+
*
|
|
189
|
+
* @param rolloutFitnessBreakdown - Fitness-channel breakdown.
|
|
190
|
+
* @returns Raw unnormalized fitness.
|
|
191
|
+
*/
|
|
192
|
+
function resolveUnnormalizedRolloutFitness(
|
|
193
|
+
rolloutFitnessBreakdown: RolloutFitnessBreakdown,
|
|
194
|
+
): number {
|
|
195
|
+
// Step 1: Sum all channels directly for the legacy unnormalized objective.
|
|
196
|
+
return (
|
|
197
|
+
rolloutFitnessBreakdown.survivalFitness +
|
|
198
|
+
rolloutFitnessBreakdown.pipePassFitness +
|
|
199
|
+
rolloutFitnessBreakdown.denseShapingFitness +
|
|
200
|
+
rolloutFitnessBreakdown.terminalShapingFitness
|
|
201
|
+
);
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
/**
|
|
205
|
+
* Resolves every dense-shaping reward component from consecutive observations.
|
|
206
|
+
*
|
|
207
|
+
* @param previousFeatures - Observation before stepping the environment.
|
|
208
|
+
* @param currentFeatures - Observation after stepping the environment.
|
|
209
|
+
* @returns Dense-shaping reward components.
|
|
210
|
+
*/
|
|
211
|
+
function resolveDenseShapingRewardComponents(
|
|
212
|
+
previousFeatures: FlappyObservationFeatures,
|
|
213
|
+
currentFeatures: FlappyObservationFeatures,
|
|
214
|
+
): DenseShapingRewardComponents {
|
|
215
|
+
// Step 1: Reward alignment with the next immediate pipe gap.
|
|
216
|
+
const nextGapAlignmentReward =
|
|
217
|
+
Math.max(
|
|
218
|
+
FLAPPY_ROLLOUT_ZERO_FITNESS,
|
|
219
|
+
FLAPPY_ROLLOUT_MIN_MAX_FRAMES -
|
|
220
|
+
Math.abs(currentFeatures.normalizedDeltaToNextGap),
|
|
221
|
+
) * FLAPPY_FITNESS_ALIGNMENT_WEIGHT_PER_FRAME;
|
|
222
|
+
|
|
223
|
+
// Step 2: Reward forward progress toward the next pipe.
|
|
224
|
+
const approachProgressReward =
|
|
225
|
+
Math.max(
|
|
226
|
+
FLAPPY_ROLLOUT_ZERO_FITNESS,
|
|
227
|
+
previousFeatures.normalizedDistanceToNextPipe -
|
|
228
|
+
currentFeatures.normalizedDistanceToNextPipe,
|
|
229
|
+
) * FLAPPY_FITNESS_APPROACH_PROGRESS_WEIGHT;
|
|
230
|
+
|
|
231
|
+
// Step 3: Reward reduction in absolute next-gap centering error.
|
|
232
|
+
const centeringProgressReward =
|
|
233
|
+
Math.max(
|
|
234
|
+
FLAPPY_ROLLOUT_ZERO_FITNESS,
|
|
235
|
+
Math.abs(previousFeatures.normalizedDeltaToNextGap) -
|
|
236
|
+
Math.abs(currentFeatures.normalizedDeltaToNextGap),
|
|
237
|
+
) * FLAPPY_FITNESS_CENTERING_PROGRESS_WEIGHT;
|
|
238
|
+
|
|
239
|
+
// Step 4: Reward maintaining positive clearance around the next gap.
|
|
240
|
+
const clearanceReward =
|
|
241
|
+
Math.max(
|
|
242
|
+
FLAPPY_ROLLOUT_ZERO_FITNESS,
|
|
243
|
+
currentFeatures.normalizedNextGapClearance,
|
|
244
|
+
) * FLAPPY_FITNESS_CLEARANCE_WEIGHT_PER_FRAME;
|
|
245
|
+
|
|
246
|
+
// Step 5: Reward alignment with the second upcoming gap to encourage stability.
|
|
247
|
+
const secondGapAlignmentReward =
|
|
248
|
+
Math.max(
|
|
249
|
+
FLAPPY_ROLLOUT_ZERO_FITNESS,
|
|
250
|
+
FLAPPY_ROLLOUT_MIN_MAX_FRAMES -
|
|
251
|
+
Math.abs(currentFeatures.normalizedDeltaToSecondGap),
|
|
252
|
+
) * FLAPPY_FITNESS_SECOND_GAP_ALIGNMENT_WEIGHT_PER_FRAME;
|
|
253
|
+
|
|
254
|
+
// Step 6: Reward stable velocity magnitudes that avoid extreme oscillation.
|
|
255
|
+
const velocityStabilityReward =
|
|
256
|
+
Math.max(
|
|
257
|
+
FLAPPY_ROLLOUT_ZERO_FITNESS,
|
|
258
|
+
FLAPPY_ROLLOUT_MIN_MAX_FRAMES -
|
|
259
|
+
Math.abs(currentFeatures.normalizedVelocity),
|
|
260
|
+
) * FLAPPY_FITNESS_STABLE_VELOCITY_WEIGHT_PER_FRAME;
|
|
261
|
+
|
|
262
|
+
return {
|
|
263
|
+
nextGapAlignmentReward,
|
|
264
|
+
approachProgressReward,
|
|
265
|
+
centeringProgressReward,
|
|
266
|
+
clearanceReward,
|
|
267
|
+
secondGapAlignmentReward,
|
|
268
|
+
velocityStabilityReward,
|
|
269
|
+
};
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
/**
|
|
273
|
+
* Adds small terminal bonuses from final progress/alignment signals.
|
|
274
|
+
*
|
|
275
|
+
* @param episodeState - Final rollout state.
|
|
276
|
+
* @param difficultyScale - Active rollout difficulty scale.
|
|
277
|
+
* @returns Terminal shaping reward.
|
|
278
|
+
*/
|
|
279
|
+
function computeTerminalShapingFitness(
|
|
280
|
+
episodeState: FlappyGameState,
|
|
281
|
+
difficultyScale: number,
|
|
282
|
+
): number {
|
|
283
|
+
const finalObservationFeatures = getFlappyObservationFeatures(
|
|
284
|
+
episodeState,
|
|
285
|
+
difficultyScale,
|
|
286
|
+
);
|
|
287
|
+
const finalAlignment =
|
|
288
|
+
1 - Math.abs(finalObservationFeatures.normalizedDeltaToNextGap);
|
|
289
|
+
const finalProgress =
|
|
290
|
+
1 -
|
|
291
|
+
Math.max(
|
|
292
|
+
0,
|
|
293
|
+
Math.min(1, finalObservationFeatures.normalizedDistanceToNextPipe),
|
|
294
|
+
);
|
|
295
|
+
|
|
296
|
+
const terminalAlignmentBonus =
|
|
297
|
+
Math.max(0, finalAlignment) *
|
|
298
|
+
FLAPPY_FITNESS_TERMINAL_ALIGNMENT_BONUS_WEIGHT;
|
|
299
|
+
const terminalProgressBonus =
|
|
300
|
+
Math.max(0, finalProgress) * FLAPPY_FITNESS_TERMINAL_PROGRESS_BONUS_WEIGHT;
|
|
301
|
+
|
|
302
|
+
return terminalAlignmentBonus + terminalProgressBonus;
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
/**
|
|
306
|
+
* Normalize and cap fitness channels so no single reward term dominates.
|
|
307
|
+
*
|
|
308
|
+
* @param framesValue - Frames survived for the episode.
|
|
309
|
+
* @param pipesPassedValue - Pipes passed during the episode.
|
|
310
|
+
* @param denseShapingValue - Accumulated dense shaping reward.
|
|
311
|
+
* @param terminalShapingValue - Terminal shaping reward.
|
|
312
|
+
* @param maxFramesValue - Frame budget used for the episode.
|
|
313
|
+
* @param pipeProgressTarget - Optional target used to normalize pipe progress.
|
|
314
|
+
* @returns Normalized composite fitness.
|
|
315
|
+
*/
|
|
316
|
+
function composeNormalizedFitness(
|
|
317
|
+
framesValue: number,
|
|
318
|
+
pipesPassedValue: number,
|
|
319
|
+
denseShapingValue: number,
|
|
320
|
+
terminalShapingValue: number,
|
|
321
|
+
maxFramesValue: number,
|
|
322
|
+
pipeProgressTarget: number | undefined,
|
|
323
|
+
): number {
|
|
324
|
+
const effectivePipeProgressTarget = Math.max(
|
|
325
|
+
1,
|
|
326
|
+
Math.trunc(
|
|
327
|
+
pipeProgressTarget ?? FLAPPY_EVALUATION_DEFAULT_PIPE_PROGRESS_TARGET,
|
|
328
|
+
),
|
|
329
|
+
);
|
|
330
|
+
const normalizedSurvival = clampValue(
|
|
331
|
+
framesValue / Math.max(1, maxFramesValue),
|
|
332
|
+
0,
|
|
333
|
+
1,
|
|
334
|
+
);
|
|
335
|
+
const normalizedPipeProgress = clampValue(
|
|
336
|
+
pipesPassedValue / effectivePipeProgressTarget,
|
|
337
|
+
0,
|
|
338
|
+
1,
|
|
339
|
+
);
|
|
340
|
+
const normalizedDenseShaping = clampValue(
|
|
341
|
+
denseShapingValue /
|
|
342
|
+
Math.max(
|
|
343
|
+
1,
|
|
344
|
+
framesValue * FLAPPY_EVALUATION_DENSE_SHAPING_FRAMES_NORMALIZER,
|
|
345
|
+
),
|
|
346
|
+
0,
|
|
347
|
+
1,
|
|
348
|
+
);
|
|
349
|
+
const normalizedTerminalShaping = clampValue(
|
|
350
|
+
terminalShapingValue /
|
|
351
|
+
Math.max(
|
|
352
|
+
1,
|
|
353
|
+
FLAPPY_FITNESS_TERMINAL_ALIGNMENT_BONUS_WEIGHT +
|
|
354
|
+
FLAPPY_FITNESS_TERMINAL_PROGRESS_BONUS_WEIGHT,
|
|
355
|
+
),
|
|
356
|
+
0,
|
|
357
|
+
1,
|
|
358
|
+
);
|
|
359
|
+
|
|
360
|
+
return (
|
|
361
|
+
normalizedSurvival *
|
|
362
|
+
FLAPPY_EVALUATION_NORMALIZED_SURVIVAL_WEIGHT *
|
|
363
|
+
FLAPPY_FITNESS_SURVIVAL_WEIGHT +
|
|
364
|
+
normalizedPipeProgress * FLAPPY_EVALUATION_NORMALIZED_PROGRESS_WEIGHT +
|
|
365
|
+
normalizedDenseShaping * FLAPPY_EVALUATION_NORMALIZED_DENSE_WEIGHT +
|
|
366
|
+
normalizedTerminalShaping * FLAPPY_EVALUATION_NORMALIZED_TERMINAL_WEIGHT
|
|
367
|
+
);
|
|
368
|
+
}
|