moflo 4.8.31 → 4.8.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +13 -10
- package/src/@claude-flow/cli/dist/src/commands/doctor.js +1298 -1107
- package/src/@claude-flow/cli/dist/src/memory/memory-initializer.js +4 -7
- package/src/@claude-flow/cli/package.json +106 -106
- package/src/@claude-flow/neural/README.md +260 -0
- package/src/@claude-flow/neural/dist/algorithms/a2c.js +361 -0
- package/src/@claude-flow/neural/dist/algorithms/curiosity.js +392 -0
- package/src/@claude-flow/neural/dist/algorithms/decision-transformer.js +415 -0
- package/src/@claude-flow/neural/dist/algorithms/dqn.js +303 -0
- package/src/@claude-flow/neural/dist/algorithms/index.js +74 -0
- package/src/@claude-flow/neural/dist/algorithms/ppo.js +331 -0
- package/src/@claude-flow/neural/dist/algorithms/q-learning.js +259 -0
- package/src/@claude-flow/neural/dist/algorithms/sarsa.js +297 -0
- package/src/@claude-flow/neural/dist/application/index.js +7 -0
- package/src/@claude-flow/neural/dist/application/services/neural-application-service.js +161 -0
- package/src/@claude-flow/neural/dist/domain/entities/pattern.js +134 -0
- package/src/@claude-flow/neural/dist/domain/index.js +8 -0
- package/src/@claude-flow/neural/dist/domain/services/learning-service.js +195 -0
- package/src/@claude-flow/neural/dist/index.js +201 -0
- package/src/@claude-flow/neural/dist/modes/balanced.js +234 -0
- package/src/@claude-flow/neural/dist/modes/base.js +77 -0
- package/src/@claude-flow/neural/dist/modes/batch.js +316 -0
- package/src/@claude-flow/neural/dist/modes/edge.js +310 -0
- package/src/@claude-flow/neural/dist/modes/index.js +13 -0
- package/src/@claude-flow/neural/dist/modes/real-time.js +196 -0
- package/src/@claude-flow/neural/dist/modes/research.js +389 -0
- package/src/@claude-flow/neural/dist/pattern-learner.js +603 -0
- package/src/@claude-flow/neural/dist/reasoning-bank.js +993 -0
- package/src/@claude-flow/neural/dist/reasoningbank-adapter.js +463 -0
- package/src/@claude-flow/neural/dist/sona-integration.js +316 -0
- package/src/@claude-flow/neural/dist/sona-manager.js +695 -0
- package/src/@claude-flow/neural/dist/types.js +11 -0
- package/src/@claude-flow/neural/package.json +26 -0
- package/.claude/agents/browser/browser-agent.yaml +0 -182
- package/.claude/agents/database-specialist.yaml +0 -21
- package/.claude/agents/index.yaml +0 -17
- package/.claude/agents/project-coordinator.yaml +0 -15
- package/.claude/agents/python-specialist.yaml +0 -21
- package/.claude/agents/security-auditor.yaml +0 -20
- package/.claude/agents/typescript-specialist.yaml +0 -21
- package/.claude/checkpoints/1767754460.json +0 -8
- package/.claude/config/v3-dependency-optimization.json +0 -266
- package/.claude/config/v3-performance-targets.json +0 -251
- package/.claude/guidance/moflo-bootstrap.md +0 -129
- package/.claude/mcp.json +0 -13
- package/.claude/settings.json +0 -351
- package/.claude/settings.local.json +0 -18
- package/.claude/skills/agentdb-advanced/SKILL.md +0 -550
- package/.claude/skills/agentdb-learning/SKILL.md +0 -545
- package/.claude/skills/agentdb-memory-patterns/SKILL.md +0 -339
- package/.claude/skills/agentdb-optimization/SKILL.md +0 -509
- package/.claude/skills/agentdb-vector-search/SKILL.md +0 -339
- package/.claude/skills/browser/SKILL.md +0 -204
- package/.claude/skills/fl/SKILL.md +0 -583
- package/.claude/skills/flo/SKILL.md +0 -583
- package/.claude/skills/github-code-review/SKILL.md +0 -1140
- package/.claude/skills/github-multi-repo/SKILL.md +0 -874
- package/.claude/skills/github-project-management/SKILL.md +0 -1277
- package/.claude/skills/github-release-management/SKILL.md +0 -1081
- package/.claude/skills/github-workflow-automation/SKILL.md +0 -1065
- package/.claude/skills/hive-mind-advanced/SKILL.md +0 -712
- package/.claude/skills/hooks-automation/SKILL.md +0 -1201
- package/.claude/skills/pair-programming/SKILL.md +0 -1202
- package/.claude/skills/performance-analysis/SKILL.md +0 -563
- package/.claude/skills/reasoningbank-agentdb/SKILL.md +0 -446
- package/.claude/skills/reasoningbank-intelligence/SKILL.md +0 -201
- package/.claude/skills/skill-builder/SKILL.md +0 -910
- package/.claude/skills/sparc-methodology/SKILL.md +0 -1115
- package/.claude/skills/stream-chain/SKILL.md +0 -563
- package/.claude/skills/swarm-advanced/SKILL.md +0 -973
- package/.claude/skills/swarm-orchestration/SKILL.md +0 -179
- package/.claude/skills/v3-cli-modernization/SKILL.md +0 -872
- package/.claude/skills/v3-core-implementation/SKILL.md +0 -797
- package/.claude/skills/v3-ddd-architecture/SKILL.md +0 -442
- package/.claude/skills/v3-integration-deep/SKILL.md +0 -241
- package/.claude/skills/v3-mcp-optimization/SKILL.md +0 -777
- package/.claude/skills/v3-memory-unification/SKILL.md +0 -174
- package/.claude/skills/v3-performance-optimization/SKILL.md +0 -390
- package/.claude/skills/v3-security-overhaul/SKILL.md +0 -82
- package/.claude/skills/v3-swarm-coordination/SKILL.md +0 -340
- package/.claude/skills/verification-quality/SKILL.md +0 -649
- package/.claude/skills/worker-benchmarks/skill.md +0 -135
- package/.claude/skills/worker-integration/skill.md +0 -154
- package/.claude/workflow-state.json +0 -9
- package/src/@claude-flow/cli/dist/src/appliance/gguf-engine.d.ts +0 -91
- package/src/@claude-flow/cli/dist/src/appliance/ruvllm-bridge.d.ts +0 -102
- package/src/@claude-flow/cli/dist/src/appliance/rvfa-builder.d.ts +0 -44
- package/src/@claude-flow/cli/dist/src/appliance/rvfa-distribution.d.ts +0 -97
- package/src/@claude-flow/cli/dist/src/appliance/rvfa-format.d.ts +0 -111
- package/src/@claude-flow/cli/dist/src/appliance/rvfa-runner.d.ts +0 -69
- package/src/@claude-flow/cli/dist/src/appliance/rvfa-signing.d.ts +0 -123
- package/src/@claude-flow/cli/dist/src/benchmarks/pretrain/index.d.ts +0 -58
- package/src/@claude-flow/cli/dist/src/commands/agent.d.ts +0 -8
- package/src/@claude-flow/cli/dist/src/commands/analyze.d.ts +0 -19
- package/src/@claude-flow/cli/dist/src/commands/appliance-advanced.d.ts +0 -9
- package/src/@claude-flow/cli/dist/src/commands/appliance.d.ts +0 -8
- package/src/@claude-flow/cli/dist/src/commands/benchmark.d.ts +0 -10
- package/src/@claude-flow/cli/dist/src/commands/claims.d.ts +0 -10
- package/src/@claude-flow/cli/dist/src/commands/completions.d.ts +0 -10
- package/src/@claude-flow/cli/dist/src/commands/config.d.ts +0 -8
- package/src/@claude-flow/cli/dist/src/commands/daemon.d.ts +0 -8
- package/src/@claude-flow/cli/dist/src/commands/deployment.d.ts +0 -10
- package/src/@claude-flow/cli/dist/src/commands/diagnose.d.ts +0 -16
- package/src/@claude-flow/cli/dist/src/commands/doctor.d.ts +0 -10
- package/src/@claude-flow/cli/dist/src/commands/embeddings.d.ts +0 -18
- package/src/@claude-flow/cli/dist/src/commands/gate.d.ts +0 -23
- package/src/@claude-flow/cli/dist/src/commands/github.d.ts +0 -12
- package/src/@claude-flow/cli/dist/src/commands/guidance.d.ts +0 -8
- package/src/@claude-flow/cli/dist/src/commands/hive-mind.d.ts +0 -11
- package/src/@claude-flow/cli/dist/src/commands/hooks.d.ts +0 -8
- package/src/@claude-flow/cli/dist/src/commands/index.d.ts +0 -115
- package/src/@claude-flow/cli/dist/src/commands/init.d.ts +0 -8
- package/src/@claude-flow/cli/dist/src/commands/issues.d.ts +0 -21
- package/src/@claude-flow/cli/dist/src/commands/mcp.d.ts +0 -11
- package/src/@claude-flow/cli/dist/src/commands/memory.d.ts +0 -8
- package/src/@claude-flow/cli/dist/src/commands/migrate.d.ts +0 -8
- package/src/@claude-flow/cli/dist/src/commands/neural.d.ts +0 -10
- package/src/@claude-flow/cli/dist/src/commands/orc.d.ts +0 -21
- package/src/@claude-flow/cli/dist/src/commands/performance.d.ts +0 -10
- package/src/@claude-flow/cli/dist/src/commands/plugins.d.ts +0 -11
- package/src/@claude-flow/cli/dist/src/commands/process.d.ts +0 -10
- package/src/@claude-flow/cli/dist/src/commands/progress.d.ts +0 -11
- package/src/@claude-flow/cli/dist/src/commands/providers.d.ts +0 -10
- package/src/@claude-flow/cli/dist/src/commands/route.d.ts +0 -16
- package/src/@claude-flow/cli/dist/src/commands/ruvector/backup.d.ts +0 -11
- package/src/@claude-flow/cli/dist/src/commands/ruvector/benchmark.d.ts +0 -11
- package/src/@claude-flow/cli/dist/src/commands/ruvector/import.d.ts +0 -18
- package/src/@claude-flow/cli/dist/src/commands/ruvector/index.d.ts +0 -29
- package/src/@claude-flow/cli/dist/src/commands/ruvector/init.d.ts +0 -11
- package/src/@claude-flow/cli/dist/src/commands/ruvector/migrate.d.ts +0 -11
- package/src/@claude-flow/cli/dist/src/commands/ruvector/optimize.d.ts +0 -11
- package/src/@claude-flow/cli/dist/src/commands/ruvector/setup.d.ts +0 -18
- package/src/@claude-flow/cli/dist/src/commands/ruvector/status.d.ts +0 -11
- package/src/@claude-flow/cli/dist/src/commands/security.d.ts +0 -10
- package/src/@claude-flow/cli/dist/src/commands/session.d.ts +0 -8
- package/src/@claude-flow/cli/dist/src/commands/start.d.ts +0 -8
- package/src/@claude-flow/cli/dist/src/commands/status.d.ts +0 -8
- package/src/@claude-flow/cli/dist/src/commands/swarm.d.ts +0 -8
- package/src/@claude-flow/cli/dist/src/commands/task.d.ts +0 -8
- package/src/@claude-flow/cli/dist/src/commands/transfer-store.d.ts +0 -13
- package/src/@claude-flow/cli/dist/src/commands/update.d.ts +0 -8
- package/src/@claude-flow/cli/dist/src/commands/workflow.d.ts +0 -8
- package/src/@claude-flow/cli/dist/src/config/moflo-config.d.ts +0 -93
- package/src/@claude-flow/cli/dist/src/config-adapter.d.ts +0 -15
- package/src/@claude-flow/cli/dist/src/index.d.ts +0 -81
- package/src/@claude-flow/cli/dist/src/infrastructure/in-memory-repositories.d.ts +0 -68
- package/src/@claude-flow/cli/dist/src/init/claudemd-generator.d.ts +0 -30
- package/src/@claude-flow/cli/dist/src/init/executor.d.ts +0 -41
- package/src/@claude-flow/cli/dist/src/init/helpers-generator.d.ts +0 -51
- package/src/@claude-flow/cli/dist/src/init/index.d.ts +0 -13
- package/src/@claude-flow/cli/dist/src/init/mcp-generator.d.ts +0 -29
- package/src/@claude-flow/cli/dist/src/init/moflo-init.d.ts +0 -31
- package/src/@claude-flow/cli/dist/src/init/settings-generator.d.ts +0 -14
- package/src/@claude-flow/cli/dist/src/init/statusline-generator.d.ts +0 -28
- package/src/@claude-flow/cli/dist/src/init/types.d.ts +0 -285
- package/src/@claude-flow/cli/dist/src/mcp-client.d.ts +0 -92
- package/src/@claude-flow/cli/dist/src/mcp-server.d.ts +0 -161
- package/src/@claude-flow/cli/dist/src/mcp-tools/agent-tools.d.ts +0 -9
- package/src/@claude-flow/cli/dist/src/mcp-tools/agentdb-tools.d.ts +0 -30
- package/src/@claude-flow/cli/dist/src/mcp-tools/analyze-tools.d.ts +0 -38
- package/src/@claude-flow/cli/dist/src/mcp-tools/auto-install.d.ts +0 -83
- package/src/@claude-flow/cli/dist/src/mcp-tools/browser-tools.d.ts +0 -13
- package/src/@claude-flow/cli/dist/src/mcp-tools/claims-tools.d.ts +0 -12
- package/src/@claude-flow/cli/dist/src/mcp-tools/config-tools.d.ts +0 -8
- package/src/@claude-flow/cli/dist/src/mcp-tools/coordination-tools.d.ts +0 -13
- package/src/@claude-flow/cli/dist/src/mcp-tools/daa-tools.d.ts +0 -13
- package/src/@claude-flow/cli/dist/src/mcp-tools/embeddings-tools.d.ts +0 -9
- package/src/@claude-flow/cli/dist/src/mcp-tools/github-tools.d.ts +0 -13
- package/src/@claude-flow/cli/dist/src/mcp-tools/hive-mind-tools.d.ts +0 -8
- package/src/@claude-flow/cli/dist/src/mcp-tools/hooks-tools.d.ts +0 -44
- package/src/@claude-flow/cli/dist/src/mcp-tools/index.d.ts +0 -23
- package/src/@claude-flow/cli/dist/src/mcp-tools/memory-tools.d.ts +0 -14
- package/src/@claude-flow/cli/dist/src/mcp-tools/neural-tools.d.ts +0 -16
- package/src/@claude-flow/cli/dist/src/mcp-tools/performance-tools.d.ts +0 -16
- package/src/@claude-flow/cli/dist/src/mcp-tools/progress-tools.d.ts +0 -14
- package/src/@claude-flow/cli/dist/src/mcp-tools/security-tools.d.ts +0 -18
- package/src/@claude-flow/cli/dist/src/mcp-tools/session-tools.d.ts +0 -8
- package/src/@claude-flow/cli/dist/src/mcp-tools/swarm-tools.d.ts +0 -8
- package/src/@claude-flow/cli/dist/src/mcp-tools/system-tools.d.ts +0 -13
- package/src/@claude-flow/cli/dist/src/mcp-tools/task-tools.d.ts +0 -8
- package/src/@claude-flow/cli/dist/src/mcp-tools/terminal-tools.d.ts +0 -13
- package/src/@claude-flow/cli/dist/src/mcp-tools/transfer-tools.d.ts +0 -14
- package/src/@claude-flow/cli/dist/src/mcp-tools/types.d.ts +0 -31
- package/src/@claude-flow/cli/dist/src/mcp-tools/workflow-tools.d.ts +0 -8
- package/src/@claude-flow/cli/dist/src/memory/ewc-consolidation.d.ts +0 -271
- package/src/@claude-flow/cli/dist/src/memory/intelligence.d.ts +0 -285
- package/src/@claude-flow/cli/dist/src/memory/memory-bridge.d.ts +0 -413
- package/src/@claude-flow/cli/dist/src/memory/memory-initializer.d.ts +0 -405
- package/src/@claude-flow/cli/dist/src/memory/sona-optimizer.d.ts +0 -227
- package/src/@claude-flow/cli/dist/src/output.d.ts +0 -133
- package/src/@claude-flow/cli/dist/src/parser.d.ts +0 -51
- package/src/@claude-flow/cli/dist/src/plugins/manager.d.ts +0 -133
- package/src/@claude-flow/cli/dist/src/plugins/store/discovery.d.ts +0 -88
- package/src/@claude-flow/cli/dist/src/plugins/store/index.d.ts +0 -76
- package/src/@claude-flow/cli/dist/src/plugins/store/search.d.ts +0 -46
- package/src/@claude-flow/cli/dist/src/plugins/store/types.d.ts +0 -274
- package/src/@claude-flow/cli/dist/src/plugins/tests/demo-plugin-store.d.ts +0 -7
- package/src/@claude-flow/cli/dist/src/plugins/tests/standalone-test.d.ts +0 -12
- package/src/@claude-flow/cli/dist/src/plugins/tests/test-plugin-store.d.ts +0 -7
- package/src/@claude-flow/cli/dist/src/production/circuit-breaker.d.ts +0 -101
- package/src/@claude-flow/cli/dist/src/production/error-handler.d.ts +0 -92
- package/src/@claude-flow/cli/dist/src/production/index.d.ts +0 -23
- package/src/@claude-flow/cli/dist/src/production/monitoring.d.ts +0 -161
- package/src/@claude-flow/cli/dist/src/production/rate-limiter.d.ts +0 -80
- package/src/@claude-flow/cli/dist/src/production/retry.d.ts +0 -48
- package/src/@claude-flow/cli/dist/src/prompt.d.ts +0 -44
- package/src/@claude-flow/cli/dist/src/runtime/headless.d.ts +0 -60
- package/src/@claude-flow/cli/dist/src/ruvector/ast-analyzer.d.ts +0 -67
- package/src/@claude-flow/cli/dist/src/ruvector/coverage-router.d.ts +0 -160
- package/src/@claude-flow/cli/dist/src/ruvector/coverage-tools.d.ts +0 -33
- package/src/@claude-flow/cli/dist/src/ruvector/diff-classifier.d.ts +0 -175
- package/src/@claude-flow/cli/dist/src/ruvector/enhanced-model-router.d.ts +0 -146
- package/src/@claude-flow/cli/dist/src/ruvector/flash-attention.d.ts +0 -195
- package/src/@claude-flow/cli/dist/src/ruvector/graph-analyzer.d.ts +0 -187
- package/src/@claude-flow/cli/dist/src/ruvector/index.d.ts +0 -34
- package/src/@claude-flow/cli/dist/src/ruvector/lora-adapter.d.ts +0 -218
- package/src/@claude-flow/cli/dist/src/ruvector/model-router.d.ts +0 -220
- package/src/@claude-flow/cli/dist/src/ruvector/moe-router.d.ts +0 -206
- package/src/@claude-flow/cli/dist/src/ruvector/q-learning-router.d.ts +0 -211
- package/src/@claude-flow/cli/dist/src/ruvector/semantic-router.d.ts +0 -77
- package/src/@claude-flow/cli/dist/src/ruvector/vector-db.d.ts +0 -69
- package/src/@claude-flow/cli/dist/src/services/agent-router.d.ts +0 -63
- package/src/@claude-flow/cli/dist/src/services/agentic-flow-bridge.d.ts +0 -50
- package/src/@claude-flow/cli/dist/src/services/claim-service.d.ts +0 -204
- package/src/@claude-flow/cli/dist/src/services/container-worker-pool.d.ts +0 -197
- package/src/@claude-flow/cli/dist/src/services/daemon-lock.d.ts +0 -60
- package/src/@claude-flow/cli/dist/src/services/headless-worker-executor.d.ts +0 -304
- package/src/@claude-flow/cli/dist/src/services/index.d.ts +0 -13
- package/src/@claude-flow/cli/dist/src/services/learning-service.d.ts +0 -161
- package/src/@claude-flow/cli/dist/src/services/moflo-require.d.ts +0 -34
- package/src/@claude-flow/cli/dist/src/services/registry-api.d.ts +0 -58
- package/src/@claude-flow/cli/dist/src/services/ruvector-training.d.ts +0 -214
- package/src/@claude-flow/cli/dist/src/services/worker-daemon.d.ts +0 -225
- package/src/@claude-flow/cli/dist/src/services/worker-queue.d.ts +0 -194
- package/src/@claude-flow/cli/dist/src/services/workflow-gate.d.ts +0 -88
- package/src/@claude-flow/cli/dist/src/suggest.d.ts +0 -53
- package/src/@claude-flow/cli/dist/src/transfer/anonymization/index.d.ts +0 -25
- package/src/@claude-flow/cli/dist/src/transfer/deploy-seraphine.d.ts +0 -13
- package/src/@claude-flow/cli/dist/src/transfer/export.d.ts +0 -25
- package/src/@claude-flow/cli/dist/src/transfer/index.d.ts +0 -12
- package/src/@claude-flow/cli/dist/src/transfer/ipfs/client.d.ts +0 -109
- package/src/@claude-flow/cli/dist/src/transfer/ipfs/upload.d.ts +0 -95
- package/src/@claude-flow/cli/dist/src/transfer/models/seraphine.d.ts +0 -72
- package/src/@claude-flow/cli/dist/src/transfer/serialization/cfp.d.ts +0 -49
- package/src/@claude-flow/cli/dist/src/transfer/storage/gcs.d.ts +0 -82
- package/src/@claude-flow/cli/dist/src/transfer/storage/index.d.ts +0 -6
- package/src/@claude-flow/cli/dist/src/transfer/store/discovery.d.ts +0 -84
- package/src/@claude-flow/cli/dist/src/transfer/store/download.d.ts +0 -70
- package/src/@claude-flow/cli/dist/src/transfer/store/index.d.ts +0 -84
- package/src/@claude-flow/cli/dist/src/transfer/store/publish.d.ts +0 -76
- package/src/@claude-flow/cli/dist/src/transfer/store/registry.d.ts +0 -58
- package/src/@claude-flow/cli/dist/src/transfer/store/search.d.ts +0 -54
- package/src/@claude-flow/cli/dist/src/transfer/store/tests/standalone-test.d.ts +0 -12
- package/src/@claude-flow/cli/dist/src/transfer/store/types.d.ts +0 -193
- package/src/@claude-flow/cli/dist/src/transfer/test-seraphine.d.ts +0 -6
- package/src/@claude-flow/cli/dist/src/transfer/tests/test-store.d.ts +0 -7
- package/src/@claude-flow/cli/dist/src/transfer/types.d.ts +0 -245
- package/src/@claude-flow/cli/dist/src/types.d.ts +0 -198
- package/src/@claude-flow/cli/dist/src/update/checker.d.ts +0 -34
- package/src/@claude-flow/cli/dist/src/update/executor.d.ts +0 -32
- package/src/@claude-flow/cli/dist/src/update/index.d.ts +0 -33
- package/src/@claude-flow/cli/dist/src/update/rate-limiter.d.ts +0 -20
- package/src/@claude-flow/cli/dist/src/update/validator.d.ts +0 -17
- package/src/@claude-flow/guidance/dist/adversarial.d.ts +0 -284
- package/src/@claude-flow/guidance/dist/analyzer.d.ts +0 -530
- package/src/@claude-flow/guidance/dist/artifacts.d.ts +0 -283
- package/src/@claude-flow/guidance/dist/authority.d.ts +0 -290
- package/src/@claude-flow/guidance/dist/capabilities.d.ts +0 -209
- package/src/@claude-flow/guidance/dist/coherence.d.ts +0 -233
- package/src/@claude-flow/guidance/dist/compiler.d.ts +0 -87
- package/src/@claude-flow/guidance/dist/conformance-kit.d.ts +0 -225
- package/src/@claude-flow/guidance/dist/continue-gate.d.ts +0 -214
- package/src/@claude-flow/guidance/dist/crypto-utils.d.ts +0 -17
- package/src/@claude-flow/guidance/dist/evolution.d.ts +0 -282
- package/src/@claude-flow/guidance/dist/gates.d.ts +0 -79
- package/src/@claude-flow/guidance/dist/gateway.d.ts +0 -206
- package/src/@claude-flow/guidance/dist/generators.d.ts +0 -153
- package/src/@claude-flow/guidance/dist/headless.d.ts +0 -177
- package/src/@claude-flow/guidance/dist/hooks.d.ts +0 -109
- package/src/@claude-flow/guidance/dist/index.d.ts +0 -205
- package/src/@claude-flow/guidance/dist/ledger.d.ts +0 -162
- package/src/@claude-flow/guidance/dist/manifest-validator.d.ts +0 -289
- package/src/@claude-flow/guidance/dist/memory-gate.d.ts +0 -222
- package/src/@claude-flow/guidance/dist/meta-governance.d.ts +0 -265
- package/src/@claude-flow/guidance/dist/optimizer.d.ts +0 -104
- package/src/@claude-flow/guidance/dist/persistence.d.ts +0 -189
- package/src/@claude-flow/guidance/dist/proof.d.ts +0 -185
- package/src/@claude-flow/guidance/dist/retriever.d.ts +0 -116
- package/src/@claude-flow/guidance/dist/ruvbot-integration.d.ts +0 -370
- package/src/@claude-flow/guidance/dist/temporal.d.ts +0 -426
- package/src/@claude-flow/guidance/dist/trust.d.ts +0 -283
- package/src/@claude-flow/guidance/dist/truth-anchors.d.ts +0 -276
- package/src/@claude-flow/guidance/dist/types.d.ts +0 -378
- package/src/@claude-flow/guidance/dist/uncertainty.d.ts +0 -372
- package/src/@claude-flow/guidance/dist/wasm-kernel.d.ts +0 -48
- package/src/@claude-flow/memory/dist/agent-memory-scope.d.ts +0 -131
- package/src/@claude-flow/memory/dist/agent-memory-scope.test.d.ts +0 -8
- package/src/@claude-flow/memory/dist/agentdb-adapter.d.ts +0 -165
- package/src/@claude-flow/memory/dist/agentdb-backend.d.ts +0 -212
- package/src/@claude-flow/memory/dist/agentdb-backend.test.d.ts +0 -7
- package/src/@claude-flow/memory/dist/application/commands/delete-memory.command.d.ts +0 -65
- package/src/@claude-flow/memory/dist/application/commands/store-memory.command.d.ts +0 -48
- package/src/@claude-flow/memory/dist/application/index.d.ts +0 -12
- package/src/@claude-flow/memory/dist/application/queries/search-memory.query.d.ts +0 -72
- package/src/@claude-flow/memory/dist/application/services/memory-application-service.d.ts +0 -121
- package/src/@claude-flow/memory/dist/auto-memory-bridge.d.ts +0 -226
- package/src/@claude-flow/memory/dist/auto-memory-bridge.test.d.ts +0 -8
- package/src/@claude-flow/memory/dist/benchmark.test.d.ts +0 -2
- package/src/@claude-flow/memory/dist/cache-manager.d.ts +0 -134
- package/src/@claude-flow/memory/dist/controller-registry.d.ts +0 -216
- package/src/@claude-flow/memory/dist/controller-registry.test.d.ts +0 -14
- package/src/@claude-flow/memory/dist/database-provider.d.ts +0 -87
- package/src/@claude-flow/memory/dist/database-provider.test.d.ts +0 -7
- package/src/@claude-flow/memory/dist/domain/entities/memory-entry.d.ts +0 -143
- package/src/@claude-flow/memory/dist/domain/index.d.ts +0 -11
- package/src/@claude-flow/memory/dist/domain/repositories/memory-repository.interface.d.ts +0 -102
- package/src/@claude-flow/memory/dist/domain/services/memory-domain-service.d.ts +0 -105
- package/src/@claude-flow/memory/dist/hnsw-index.d.ts +0 -111
- package/src/@claude-flow/memory/dist/hnsw-lite.d.ts +0 -23
- package/src/@claude-flow/memory/dist/hybrid-backend.d.ts +0 -245
- package/src/@claude-flow/memory/dist/hybrid-backend.test.d.ts +0 -8
- package/src/@claude-flow/memory/dist/index.d.ts +0 -204
- package/src/@claude-flow/memory/dist/infrastructure/index.d.ts +0 -17
- package/src/@claude-flow/memory/dist/infrastructure/repositories/hybrid-memory-repository.d.ts +0 -66
- package/src/@claude-flow/memory/dist/learning-bridge.d.ts +0 -137
- package/src/@claude-flow/memory/dist/learning-bridge.test.d.ts +0 -8
- package/src/@claude-flow/memory/dist/memory-graph.d.ts +0 -100
- package/src/@claude-flow/memory/dist/memory-graph.test.d.ts +0 -8
- package/src/@claude-flow/memory/dist/migration.d.ts +0 -68
- package/src/@claude-flow/memory/dist/persistent-sona.d.ts +0 -144
- package/src/@claude-flow/memory/dist/query-builder.d.ts +0 -211
- package/src/@claude-flow/memory/dist/rvf-backend.d.ts +0 -51
- package/src/@claude-flow/memory/dist/rvf-learning-store.d.ts +0 -139
- package/src/@claude-flow/memory/dist/rvf-migration.d.ts +0 -45
- package/src/@claude-flow/memory/dist/sqlite-backend.d.ts +0 -121
- package/src/@claude-flow/memory/dist/sqljs-backend.d.ts +0 -127
- package/src/@claude-flow/memory/dist/types.d.ts +0 -484
- package/src/@claude-flow/shared/dist/core/config/defaults.d.ts +0 -41
- package/src/@claude-flow/shared/dist/core/config/index.d.ts +0 -8
- package/src/@claude-flow/shared/dist/core/config/loader.d.ts +0 -45
- package/src/@claude-flow/shared/dist/core/config/schema.d.ts +0 -1134
- package/src/@claude-flow/shared/dist/core/config/validator.d.ts +0 -92
- package/src/@claude-flow/shared/dist/core/event-bus.d.ts +0 -31
- package/src/@claude-flow/shared/dist/core/index.d.ts +0 -15
- package/src/@claude-flow/shared/dist/core/interfaces/agent.interface.d.ts +0 -200
- package/src/@claude-flow/shared/dist/core/interfaces/coordinator.interface.d.ts +0 -310
- package/src/@claude-flow/shared/dist/core/interfaces/event.interface.d.ts +0 -224
- package/src/@claude-flow/shared/dist/core/interfaces/index.d.ts +0 -10
- package/src/@claude-flow/shared/dist/core/interfaces/memory.interface.d.ts +0 -298
- package/src/@claude-flow/shared/dist/core/interfaces/task.interface.d.ts +0 -185
- package/src/@claude-flow/shared/dist/core/orchestrator/event-coordinator.d.ts +0 -35
- package/src/@claude-flow/shared/dist/core/orchestrator/health-monitor.d.ts +0 -60
- package/src/@claude-flow/shared/dist/core/orchestrator/index.d.ts +0 -46
- package/src/@claude-flow/shared/dist/core/orchestrator/lifecycle-manager.d.ts +0 -56
- package/src/@claude-flow/shared/dist/core/orchestrator/session-manager.d.ts +0 -83
- package/src/@claude-flow/shared/dist/core/orchestrator/task-manager.d.ts +0 -49
- package/src/@claude-flow/shared/dist/events/domain-events.d.ts +0 -282
- package/src/@claude-flow/shared/dist/events/event-store.d.ts +0 -126
- package/src/@claude-flow/shared/dist/events/event-store.test.d.ts +0 -8
- package/src/@claude-flow/shared/dist/events/example-usage.d.ts +0 -10
- package/src/@claude-flow/shared/dist/events/index.d.ts +0 -21
- package/src/@claude-flow/shared/dist/events/projections.d.ts +0 -177
- package/src/@claude-flow/shared/dist/events/rvf-event-log.d.ts +0 -82
- package/src/@claude-flow/shared/dist/events/state-reconstructor.d.ts +0 -101
- package/src/@claude-flow/shared/dist/events.d.ts +0 -80
- package/src/@claude-flow/shared/dist/hooks/example-usage.d.ts +0 -42
- package/src/@claude-flow/shared/dist/hooks/executor.d.ts +0 -100
- package/src/@claude-flow/shared/dist/hooks/hooks.test.d.ts +0 -9
- package/src/@claude-flow/shared/dist/hooks/index.d.ts +0 -52
- package/src/@claude-flow/shared/dist/hooks/registry.d.ts +0 -133
- package/src/@claude-flow/shared/dist/hooks/safety/bash-safety.d.ts +0 -105
- package/src/@claude-flow/shared/dist/hooks/safety/file-organization.d.ts +0 -144
- package/src/@claude-flow/shared/dist/hooks/safety/git-commit.d.ts +0 -158
- package/src/@claude-flow/shared/dist/hooks/safety/index.d.ts +0 -17
- package/src/@claude-flow/shared/dist/hooks/session-hooks.d.ts +0 -234
- package/src/@claude-flow/shared/dist/hooks/task-hooks.d.ts +0 -163
- package/src/@claude-flow/shared/dist/hooks/types.d.ts +0 -267
- package/src/@claude-flow/shared/dist/hooks/verify-exports.test.d.ts +0 -9
- package/src/@claude-flow/shared/dist/index.d.ts +0 -20
- package/src/@claude-flow/shared/dist/mcp/connection-pool.d.ts +0 -98
- package/src/@claude-flow/shared/dist/mcp/index.d.ts +0 -69
- package/src/@claude-flow/shared/dist/mcp/server.d.ts +0 -166
- package/src/@claude-flow/shared/dist/mcp/session-manager.d.ts +0 -136
- package/src/@claude-flow/shared/dist/mcp/tool-registry.d.ts +0 -178
- package/src/@claude-flow/shared/dist/mcp/transport/http.d.ts +0 -104
- package/src/@claude-flow/shared/dist/mcp/transport/index.d.ts +0 -102
- package/src/@claude-flow/shared/dist/mcp/transport/stdio.d.ts +0 -104
- package/src/@claude-flow/shared/dist/mcp/transport/websocket.d.ts +0 -133
- package/src/@claude-flow/shared/dist/mcp/types.d.ts +0 -438
- package/src/@claude-flow/shared/dist/plugin-interface.d.ts +0 -544
- package/src/@claude-flow/shared/dist/plugin-loader.d.ts +0 -139
- package/src/@claude-flow/shared/dist/plugin-registry.d.ts +0 -183
- package/src/@claude-flow/shared/dist/plugins/index.d.ts +0 -10
- package/src/@claude-flow/shared/dist/plugins/official/hive-mind-plugin.d.ts +0 -106
- package/src/@claude-flow/shared/dist/plugins/official/index.d.ts +0 -10
- package/src/@claude-flow/shared/dist/plugins/official/maestro-plugin.d.ts +0 -121
- package/src/@claude-flow/shared/dist/plugins/types.d.ts +0 -93
- package/src/@claude-flow/shared/dist/resilience/bulkhead.d.ts +0 -105
- package/src/@claude-flow/shared/dist/resilience/circuit-breaker.d.ts +0 -132
- package/src/@claude-flow/shared/dist/resilience/index.d.ts +0 -19
- package/src/@claude-flow/shared/dist/resilience/rate-limiter.d.ts +0 -168
- package/src/@claude-flow/shared/dist/resilience/retry.d.ts +0 -91
- package/src/@claude-flow/shared/dist/security/index.d.ts +0 -10
- package/src/@claude-flow/shared/dist/security/input-validation.d.ts +0 -73
- package/src/@claude-flow/shared/dist/security/secure-random.d.ts +0 -92
- package/src/@claude-flow/shared/dist/services/index.d.ts +0 -7
- package/src/@claude-flow/shared/dist/services/v3-progress.service.d.ts +0 -124
- package/src/@claude-flow/shared/dist/types/agent.types.d.ts +0 -137
- package/src/@claude-flow/shared/dist/types/index.d.ts +0 -11
- package/src/@claude-flow/shared/dist/types/mcp.types.d.ts +0 -266
- package/src/@claude-flow/shared/dist/types/memory.types.d.ts +0 -236
- package/src/@claude-flow/shared/dist/types/swarm.types.d.ts +0 -186
- package/src/@claude-flow/shared/dist/types/task.types.d.ts +0 -178
- package/src/@claude-flow/shared/dist/types.d.ts +0 -197
- package/src/@claude-flow/shared/dist/utils/secure-logger.d.ts +0 -69
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Deep Q-Network (DQN)
|
|
3
|
+
*
|
|
4
|
+
* Implements DQN with enhancements:
|
|
5
|
+
* - Experience replay
|
|
6
|
+
* - Target network
|
|
7
|
+
* - Double DQN (optional)
|
|
8
|
+
* - Dueling architecture (optional)
|
|
9
|
+
* - Epsilon-greedy exploration
|
|
10
|
+
*
|
|
11
|
+
* Performance Target: <10ms per update step
|
|
12
|
+
*/
|
|
13
|
+
/**
|
|
14
|
+
* Default DQN configuration
|
|
15
|
+
*/
|
|
16
|
+
export const DEFAULT_DQN_CONFIG = {
|
|
17
|
+
algorithm: 'dqn',
|
|
18
|
+
learningRate: 0.0001,
|
|
19
|
+
gamma: 0.99,
|
|
20
|
+
entropyCoef: 0,
|
|
21
|
+
valueLossCoef: 1,
|
|
22
|
+
maxGradNorm: 10,
|
|
23
|
+
epochs: 1,
|
|
24
|
+
miniBatchSize: 32,
|
|
25
|
+
bufferSize: 10000,
|
|
26
|
+
explorationInitial: 1.0,
|
|
27
|
+
explorationFinal: 0.01,
|
|
28
|
+
explorationDecay: 10000,
|
|
29
|
+
targetUpdateFreq: 100,
|
|
30
|
+
doubleDQN: true,
|
|
31
|
+
duelingNetwork: false,
|
|
32
|
+
};
|
|
33
|
+
/**
|
|
34
|
+
* DQN Algorithm Implementation
|
|
35
|
+
*/
|
|
36
|
+
export class DQNAlgorithm {
|
|
37
|
+
config;
|
|
38
|
+
// Q-network weights
|
|
39
|
+
qWeights;
|
|
40
|
+
targetWeights;
|
|
41
|
+
// Optimizer state
|
|
42
|
+
qMomentum;
|
|
43
|
+
// Replay buffer (circular)
|
|
44
|
+
buffer = [];
|
|
45
|
+
bufferIdx = 0;
|
|
46
|
+
// Exploration
|
|
47
|
+
epsilon;
|
|
48
|
+
stepCount = 0;
|
|
49
|
+
// Number of actions
|
|
50
|
+
numActions = 4;
|
|
51
|
+
inputDim = 768;
|
|
52
|
+
// Statistics
|
|
53
|
+
updateCount = 0;
|
|
54
|
+
avgLoss = 0;
|
|
55
|
+
constructor(config = {}) {
|
|
56
|
+
this.config = { ...DEFAULT_DQN_CONFIG, ...config };
|
|
57
|
+
this.epsilon = this.config.explorationInitial;
|
|
58
|
+
// Initialize Q-network (2 hidden layers)
|
|
59
|
+
this.qWeights = this.initializeNetwork();
|
|
60
|
+
this.targetWeights = this.copyNetwork(this.qWeights);
|
|
61
|
+
this.qMomentum = this.qWeights.map(w => new Float32Array(w.length));
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Add experience from trajectory
|
|
65
|
+
*/
|
|
66
|
+
addExperience(trajectory) {
|
|
67
|
+
for (let i = 0; i < trajectory.steps.length; i++) {
|
|
68
|
+
const step = trajectory.steps[i];
|
|
69
|
+
const nextStep = i < trajectory.steps.length - 1
|
|
70
|
+
? trajectory.steps[i + 1]
|
|
71
|
+
: null;
|
|
72
|
+
const experience = {
|
|
73
|
+
state: step.stateBefore,
|
|
74
|
+
action: this.hashAction(step.action),
|
|
75
|
+
reward: step.reward,
|
|
76
|
+
nextState: step.stateAfter,
|
|
77
|
+
done: nextStep === null,
|
|
78
|
+
};
|
|
79
|
+
// Add to circular buffer
|
|
80
|
+
if (this.buffer.length < this.config.bufferSize) {
|
|
81
|
+
this.buffer.push(experience);
|
|
82
|
+
}
|
|
83
|
+
else {
|
|
84
|
+
this.buffer[this.bufferIdx] = experience;
|
|
85
|
+
}
|
|
86
|
+
this.bufferIdx = (this.bufferIdx + 1) % this.config.bufferSize;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Perform DQN update
|
|
91
|
+
* Target: <10ms
|
|
92
|
+
*/
|
|
93
|
+
update() {
|
|
94
|
+
const startTime = performance.now();
|
|
95
|
+
if (this.buffer.length < this.config.miniBatchSize) {
|
|
96
|
+
return { loss: 0, epsilon: this.epsilon };
|
|
97
|
+
}
|
|
98
|
+
// Sample mini-batch
|
|
99
|
+
const batch = this.sampleBatch();
|
|
100
|
+
// Compute TD targets
|
|
101
|
+
let totalLoss = 0;
|
|
102
|
+
const gradients = this.qWeights.map(w => new Float32Array(w.length));
|
|
103
|
+
for (const exp of batch) {
|
|
104
|
+
// Current Q-values
|
|
105
|
+
const qValues = this.forward(exp.state, this.qWeights);
|
|
106
|
+
const currentQ = qValues[exp.action];
|
|
107
|
+
// Target Q-value
|
|
108
|
+
let targetQ;
|
|
109
|
+
if (exp.done) {
|
|
110
|
+
targetQ = exp.reward;
|
|
111
|
+
}
|
|
112
|
+
else {
|
|
113
|
+
if (this.config.doubleDQN) {
|
|
114
|
+
// Double DQN: use online network to select action, target to evaluate
|
|
115
|
+
const nextQOnline = this.forward(exp.nextState, this.qWeights);
|
|
116
|
+
const bestAction = this.argmax(nextQOnline);
|
|
117
|
+
const nextQTarget = this.forward(exp.nextState, this.targetWeights);
|
|
118
|
+
targetQ = exp.reward + this.config.gamma * nextQTarget[bestAction];
|
|
119
|
+
}
|
|
120
|
+
else {
|
|
121
|
+
// Standard DQN
|
|
122
|
+
const nextQ = this.forward(exp.nextState, this.targetWeights);
|
|
123
|
+
targetQ = exp.reward + this.config.gamma * Math.max(...nextQ);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
// TD error
|
|
127
|
+
const tdError = targetQ - currentQ;
|
|
128
|
+
const loss = tdError * tdError;
|
|
129
|
+
totalLoss += loss;
|
|
130
|
+
// Accumulate gradients
|
|
131
|
+
this.accumulateGradients(gradients, exp.state, exp.action, tdError);
|
|
132
|
+
}
|
|
133
|
+
// Apply gradients
|
|
134
|
+
this.applyGradients(gradients, batch.length);
|
|
135
|
+
// Update target network periodically
|
|
136
|
+
this.stepCount++;
|
|
137
|
+
if (this.stepCount % this.config.targetUpdateFreq === 0) {
|
|
138
|
+
this.targetWeights = this.copyNetwork(this.qWeights);
|
|
139
|
+
}
|
|
140
|
+
// Decay exploration
|
|
141
|
+
this.epsilon = Math.max(this.config.explorationFinal, this.config.explorationInitial - this.stepCount / this.config.explorationDecay);
|
|
142
|
+
this.updateCount++;
|
|
143
|
+
this.avgLoss = totalLoss / batch.length;
|
|
144
|
+
const elapsed = performance.now() - startTime;
|
|
145
|
+
if (elapsed > 10) {
|
|
146
|
+
console.warn(`DQN update exceeded target: ${elapsed.toFixed(2)}ms > 10ms`);
|
|
147
|
+
}
|
|
148
|
+
return {
|
|
149
|
+
loss: this.avgLoss,
|
|
150
|
+
epsilon: this.epsilon,
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
/**
|
|
154
|
+
* Get action using epsilon-greedy
|
|
155
|
+
*/
|
|
156
|
+
getAction(state, explore = true) {
|
|
157
|
+
if (explore && Math.random() < this.epsilon) {
|
|
158
|
+
return Math.floor(Math.random() * this.numActions);
|
|
159
|
+
}
|
|
160
|
+
const qValues = this.forward(state, this.qWeights);
|
|
161
|
+
return this.argmax(qValues);
|
|
162
|
+
}
|
|
163
|
+
/**
|
|
164
|
+
* Get Q-values for a state
|
|
165
|
+
*/
|
|
166
|
+
getQValues(state) {
|
|
167
|
+
return this.forward(state, this.qWeights);
|
|
168
|
+
}
|
|
169
|
+
/**
|
|
170
|
+
* Get statistics
|
|
171
|
+
*/
|
|
172
|
+
getStats() {
|
|
173
|
+
return {
|
|
174
|
+
updateCount: this.updateCount,
|
|
175
|
+
bufferSize: this.buffer.length,
|
|
176
|
+
epsilon: this.epsilon,
|
|
177
|
+
avgLoss: this.avgLoss,
|
|
178
|
+
stepCount: this.stepCount,
|
|
179
|
+
};
|
|
180
|
+
}
|
|
181
|
+
// ==========================================================================
|
|
182
|
+
// Private Methods
|
|
183
|
+
// ==========================================================================
|
|
184
|
+
initializeNetwork() {
|
|
185
|
+
// Simple 2-layer network: input -> hidden -> output
|
|
186
|
+
const hiddenDim = 64;
|
|
187
|
+
const weights = [];
|
|
188
|
+
// Layer 1: input_dim -> hidden
|
|
189
|
+
const w1 = new Float32Array(this.inputDim * hiddenDim);
|
|
190
|
+
const scale1 = Math.sqrt(2 / this.inputDim);
|
|
191
|
+
for (let i = 0; i < w1.length; i++) {
|
|
192
|
+
w1[i] = (Math.random() - 0.5) * scale1;
|
|
193
|
+
}
|
|
194
|
+
weights.push(w1);
|
|
195
|
+
// Layer 2: hidden -> num_actions
|
|
196
|
+
const w2 = new Float32Array(hiddenDim * this.numActions);
|
|
197
|
+
const scale2 = Math.sqrt(2 / hiddenDim);
|
|
198
|
+
for (let i = 0; i < w2.length; i++) {
|
|
199
|
+
w2[i] = (Math.random() - 0.5) * scale2;
|
|
200
|
+
}
|
|
201
|
+
weights.push(w2);
|
|
202
|
+
return weights;
|
|
203
|
+
}
|
|
204
|
+
copyNetwork(weights) {
|
|
205
|
+
return weights.map(w => new Float32Array(w));
|
|
206
|
+
}
|
|
207
|
+
forward(state, weights) {
|
|
208
|
+
const hiddenDim = 64;
|
|
209
|
+
// Layer 1: ReLU(W1 * x)
|
|
210
|
+
const hidden = new Float32Array(hiddenDim);
|
|
211
|
+
for (let h = 0; h < hiddenDim; h++) {
|
|
212
|
+
let sum = 0;
|
|
213
|
+
for (let i = 0; i < Math.min(state.length, this.inputDim); i++) {
|
|
214
|
+
sum += state[i] * weights[0][i * hiddenDim + h];
|
|
215
|
+
}
|
|
216
|
+
hidden[h] = Math.max(0, sum); // ReLU
|
|
217
|
+
}
|
|
218
|
+
// Layer 2: W2 * hidden (no activation for Q-values)
|
|
219
|
+
const output = new Float32Array(this.numActions);
|
|
220
|
+
for (let a = 0; a < this.numActions; a++) {
|
|
221
|
+
let sum = 0;
|
|
222
|
+
for (let h = 0; h < hiddenDim; h++) {
|
|
223
|
+
sum += hidden[h] * weights[1][h * this.numActions + a];
|
|
224
|
+
}
|
|
225
|
+
output[a] = sum;
|
|
226
|
+
}
|
|
227
|
+
return output;
|
|
228
|
+
}
|
|
229
|
+
accumulateGradients(gradients, state, action, tdError) {
|
|
230
|
+
const hiddenDim = 64;
|
|
231
|
+
// Forward pass to get hidden activations
|
|
232
|
+
const hidden = new Float32Array(hiddenDim);
|
|
233
|
+
for (let h = 0; h < hiddenDim; h++) {
|
|
234
|
+
let sum = 0;
|
|
235
|
+
for (let i = 0; i < Math.min(state.length, this.inputDim); i++) {
|
|
236
|
+
sum += state[i] * this.qWeights[0][i * hiddenDim + h];
|
|
237
|
+
}
|
|
238
|
+
hidden[h] = Math.max(0, sum);
|
|
239
|
+
}
|
|
240
|
+
// Gradient for layer 2 (only for selected action)
|
|
241
|
+
for (let h = 0; h < hiddenDim; h++) {
|
|
242
|
+
gradients[1][h * this.numActions + action] += hidden[h] * tdError;
|
|
243
|
+
}
|
|
244
|
+
// Gradient for layer 1 (backprop through ReLU)
|
|
245
|
+
for (let h = 0; h < hiddenDim; h++) {
|
|
246
|
+
if (hidden[h] > 0) { // ReLU gradient
|
|
247
|
+
const grad = tdError * this.qWeights[1][h * this.numActions + action];
|
|
248
|
+
for (let i = 0; i < Math.min(state.length, this.inputDim); i++) {
|
|
249
|
+
gradients[0][i * hiddenDim + h] += state[i] * grad;
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
applyGradients(gradients, batchSize) {
|
|
255
|
+
const lr = this.config.learningRate / batchSize;
|
|
256
|
+
const beta = 0.9;
|
|
257
|
+
for (let layer = 0; layer < gradients.length; layer++) {
|
|
258
|
+
for (let i = 0; i < gradients[layer].length; i++) {
|
|
259
|
+
// Gradient clipping
|
|
260
|
+
const grad = Math.max(Math.min(gradients[layer][i], this.config.maxGradNorm), -this.config.maxGradNorm);
|
|
261
|
+
// Momentum update
|
|
262
|
+
this.qMomentum[layer][i] = beta * this.qMomentum[layer][i] + (1 - beta) * grad;
|
|
263
|
+
this.qWeights[layer][i] += lr * this.qMomentum[layer][i];
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
sampleBatch() {
|
|
268
|
+
const batch = [];
|
|
269
|
+
const indices = new Set();
|
|
270
|
+
while (indices.size < this.config.miniBatchSize && indices.size < this.buffer.length) {
|
|
271
|
+
indices.add(Math.floor(Math.random() * this.buffer.length));
|
|
272
|
+
}
|
|
273
|
+
for (const idx of indices) {
|
|
274
|
+
batch.push(this.buffer[idx]);
|
|
275
|
+
}
|
|
276
|
+
return batch;
|
|
277
|
+
}
|
|
278
|
+
hashAction(action) {
|
|
279
|
+
let hash = 0;
|
|
280
|
+
for (let i = 0; i < action.length; i++) {
|
|
281
|
+
hash = (hash * 31 + action.charCodeAt(i)) % this.numActions;
|
|
282
|
+
}
|
|
283
|
+
return hash;
|
|
284
|
+
}
|
|
285
|
+
argmax(values) {
|
|
286
|
+
let maxIdx = 0;
|
|
287
|
+
let maxVal = values[0];
|
|
288
|
+
for (let i = 1; i < values.length; i++) {
|
|
289
|
+
if (values[i] > maxVal) {
|
|
290
|
+
maxVal = values[i];
|
|
291
|
+
maxIdx = i;
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
return maxIdx;
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
/**
|
|
298
|
+
* Factory function
|
|
299
|
+
*/
|
|
300
|
+
export function createDQN(config) {
|
|
301
|
+
return new DQNAlgorithm(config);
|
|
302
|
+
}
|
|
303
|
+
//# sourceMappingURL=dqn.js.map
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RL Algorithms Index
|
|
3
|
+
*
|
|
4
|
+
* Exports all reinforcement learning algorithm implementations.
|
|
5
|
+
*/
|
|
6
|
+
// PPO - Proximal Policy Optimization
|
|
7
|
+
export { PPOAlgorithm, createPPO, DEFAULT_PPO_CONFIG, } from './ppo.js';
|
|
8
|
+
// DQN - Deep Q-Network
|
|
9
|
+
export { DQNAlgorithm, createDQN, DEFAULT_DQN_CONFIG, } from './dqn.js';
|
|
10
|
+
// A2C - Advantage Actor-Critic
|
|
11
|
+
export { A2CAlgorithm, createA2C, DEFAULT_A2C_CONFIG, } from './a2c.js';
|
|
12
|
+
// Decision Transformer
|
|
13
|
+
export { DecisionTransformer, createDecisionTransformer, DEFAULT_DT_CONFIG, } from './decision-transformer.js';
|
|
14
|
+
// Q-Learning (Tabular)
|
|
15
|
+
export { QLearning, createQLearning, DEFAULT_QLEARNING_CONFIG, } from './q-learning.js';
|
|
16
|
+
// SARSA
|
|
17
|
+
export { SARSAAlgorithm, createSARSA, DEFAULT_SARSA_CONFIG, } from './sarsa.js';
|
|
18
|
+
// Curiosity-Driven Exploration
|
|
19
|
+
export { CuriosityModule, createCuriosity, DEFAULT_CURIOSITY_CONFIG, } from './curiosity.js';
|
|
20
|
+
import { createPPO, DEFAULT_PPO_CONFIG } from './ppo.js';
|
|
21
|
+
import { createDQN, DEFAULT_DQN_CONFIG } from './dqn.js';
|
|
22
|
+
import { createA2C, DEFAULT_A2C_CONFIG } from './a2c.js';
|
|
23
|
+
import { createDecisionTransformer, DEFAULT_DT_CONFIG } from './decision-transformer.js';
|
|
24
|
+
import { createQLearning, DEFAULT_QLEARNING_CONFIG } from './q-learning.js';
|
|
25
|
+
import { createSARSA, DEFAULT_SARSA_CONFIG } from './sarsa.js';
|
|
26
|
+
import { createCuriosity, DEFAULT_CURIOSITY_CONFIG } from './curiosity.js';
|
|
27
|
+
/**
|
|
28
|
+
* Create an RL algorithm by name
|
|
29
|
+
*/
|
|
30
|
+
export function createAlgorithm(algorithm, config) {
|
|
31
|
+
// Use type assertions since config is validated by algorithm switch
|
|
32
|
+
switch (algorithm) {
|
|
33
|
+
case 'ppo':
|
|
34
|
+
return createPPO(config);
|
|
35
|
+
case 'dqn':
|
|
36
|
+
return createDQN(config);
|
|
37
|
+
case 'a2c':
|
|
38
|
+
return createA2C(config);
|
|
39
|
+
case 'decision-transformer':
|
|
40
|
+
return createDecisionTransformer(config);
|
|
41
|
+
case 'q-learning':
|
|
42
|
+
return createQLearning(config);
|
|
43
|
+
case 'sarsa':
|
|
44
|
+
return createSARSA(config);
|
|
45
|
+
case 'curiosity':
|
|
46
|
+
return createCuriosity(config);
|
|
47
|
+
default:
|
|
48
|
+
throw new Error(`Unknown algorithm: ${algorithm}`);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Get default configuration for an algorithm
|
|
53
|
+
*/
|
|
54
|
+
export function getDefaultConfig(algorithm) {
|
|
55
|
+
switch (algorithm) {
|
|
56
|
+
case 'ppo':
|
|
57
|
+
return { ...DEFAULT_PPO_CONFIG };
|
|
58
|
+
case 'dqn':
|
|
59
|
+
return { ...DEFAULT_DQN_CONFIG };
|
|
60
|
+
case 'a2c':
|
|
61
|
+
return { ...DEFAULT_A2C_CONFIG };
|
|
62
|
+
case 'decision-transformer':
|
|
63
|
+
return { ...DEFAULT_DT_CONFIG };
|
|
64
|
+
case 'q-learning':
|
|
65
|
+
return { ...DEFAULT_QLEARNING_CONFIG };
|
|
66
|
+
case 'sarsa':
|
|
67
|
+
return { ...DEFAULT_SARSA_CONFIG };
|
|
68
|
+
case 'curiosity':
|
|
69
|
+
return { ...DEFAULT_CURIOSITY_CONFIG };
|
|
70
|
+
default:
|
|
71
|
+
throw new Error(`Unknown algorithm: ${algorithm}`);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Proximal Policy Optimization (PPO)
|
|
3
|
+
*
|
|
4
|
+
* Implements PPO algorithm for stable policy learning with:
|
|
5
|
+
* - Clipped surrogate objective
|
|
6
|
+
* - GAE (Generalized Advantage Estimation)
|
|
7
|
+
* - Value function clipping
|
|
8
|
+
* - Entropy bonus
|
|
9
|
+
*
|
|
10
|
+
* Performance Target: <10ms per update step
|
|
11
|
+
*/
|
|
12
|
+
/**
|
|
13
|
+
* Default PPO configuration
|
|
14
|
+
*/
|
|
15
|
+
export const DEFAULT_PPO_CONFIG = {
|
|
16
|
+
algorithm: 'ppo',
|
|
17
|
+
learningRate: 0.0003,
|
|
18
|
+
gamma: 0.99,
|
|
19
|
+
entropyCoef: 0.01,
|
|
20
|
+
valueLossCoef: 0.5,
|
|
21
|
+
maxGradNorm: 0.5,
|
|
22
|
+
epochs: 4,
|
|
23
|
+
miniBatchSize: 64,
|
|
24
|
+
clipRange: 0.2,
|
|
25
|
+
clipRangeVf: null,
|
|
26
|
+
targetKL: 0.01,
|
|
27
|
+
gaeLambda: 0.95,
|
|
28
|
+
};
|
|
29
|
+
/**
|
|
30
|
+
* PPO Algorithm Implementation
|
|
31
|
+
*/
|
|
32
|
+
export class PPOAlgorithm {
|
|
33
|
+
config;
|
|
34
|
+
// Policy network weights (simplified linear model for speed)
|
|
35
|
+
policyWeights;
|
|
36
|
+
valueWeights;
|
|
37
|
+
// Optimizer state
|
|
38
|
+
policyMomentum;
|
|
39
|
+
valueMomentum;
|
|
40
|
+
// Experience buffer
|
|
41
|
+
buffer = [];
|
|
42
|
+
// Statistics
|
|
43
|
+
updateCount = 0;
|
|
44
|
+
totalLoss = 0;
|
|
45
|
+
approxKL = 0;
|
|
46
|
+
clipFraction = 0;
|
|
47
|
+
constructor(config = {}) {
|
|
48
|
+
this.config = { ...DEFAULT_PPO_CONFIG, ...config };
|
|
49
|
+
// Initialize weights (768 input dim, simplified)
|
|
50
|
+
const dim = 768;
|
|
51
|
+
this.policyWeights = new Float32Array(dim);
|
|
52
|
+
this.valueWeights = new Float32Array(dim);
|
|
53
|
+
this.policyMomentum = new Float32Array(dim);
|
|
54
|
+
this.valueMomentum = new Float32Array(dim);
|
|
55
|
+
// Xavier initialization
|
|
56
|
+
const scale = Math.sqrt(2 / dim);
|
|
57
|
+
for (let i = 0; i < dim; i++) {
|
|
58
|
+
this.policyWeights[i] = (Math.random() - 0.5) * scale;
|
|
59
|
+
this.valueWeights[i] = (Math.random() - 0.5) * scale;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Add experience from trajectory
|
|
64
|
+
*/
|
|
65
|
+
addExperience(trajectory) {
|
|
66
|
+
if (trajectory.steps.length === 0)
|
|
67
|
+
return;
|
|
68
|
+
// Compute values for each step
|
|
69
|
+
const values = trajectory.steps.map(step => this.computeValue(step.stateAfter));
|
|
70
|
+
// Compute advantages using GAE
|
|
71
|
+
const advantages = this.computeGAE(trajectory.steps.map(s => s.reward), values);
|
|
72
|
+
// Compute returns
|
|
73
|
+
const returns = this.computeReturns(trajectory.steps.map(s => s.reward));
|
|
74
|
+
// Add to buffer
|
|
75
|
+
for (let i = 0; i < trajectory.steps.length; i++) {
|
|
76
|
+
const step = trajectory.steps[i];
|
|
77
|
+
this.buffer.push({
|
|
78
|
+
state: step.stateAfter,
|
|
79
|
+
action: this.hashAction(step.action),
|
|
80
|
+
reward: step.reward,
|
|
81
|
+
value: values[i],
|
|
82
|
+
logProb: this.computeLogProb(step.stateAfter, step.action),
|
|
83
|
+
advantage: advantages[i],
|
|
84
|
+
return_: returns[i],
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Perform PPO update
|
|
90
|
+
* Target: <10ms
|
|
91
|
+
*/
|
|
92
|
+
update() {
|
|
93
|
+
const startTime = performance.now();
|
|
94
|
+
if (this.buffer.length < this.config.miniBatchSize) {
|
|
95
|
+
return { policyLoss: 0, valueLoss: 0, entropy: 0 };
|
|
96
|
+
}
|
|
97
|
+
// Normalize advantages
|
|
98
|
+
const advantages = this.buffer.map(e => e.advantage);
|
|
99
|
+
const advMean = advantages.reduce((a, b) => a + b, 0) / advantages.length;
|
|
100
|
+
const advStd = Math.sqrt(advantages.reduce((a, b) => a + (b - advMean) ** 2, 0) / advantages.length) + 1e-8;
|
|
101
|
+
for (const exp of this.buffer) {
|
|
102
|
+
exp.advantage = (exp.advantage - advMean) / advStd;
|
|
103
|
+
}
|
|
104
|
+
let totalPolicyLoss = 0;
|
|
105
|
+
let totalValueLoss = 0;
|
|
106
|
+
let totalEntropy = 0;
|
|
107
|
+
let totalClipFrac = 0;
|
|
108
|
+
let totalKL = 0;
|
|
109
|
+
let numUpdates = 0;
|
|
110
|
+
// Multiple epochs
|
|
111
|
+
for (let epoch = 0; epoch < this.config.epochs; epoch++) {
|
|
112
|
+
// Shuffle buffer
|
|
113
|
+
this.shuffleBuffer();
|
|
114
|
+
// Process mini-batches
|
|
115
|
+
for (let i = 0; i < this.buffer.length; i += this.config.miniBatchSize) {
|
|
116
|
+
const batch = this.buffer.slice(i, i + this.config.miniBatchSize);
|
|
117
|
+
if (batch.length < this.config.miniBatchSize / 2)
|
|
118
|
+
continue;
|
|
119
|
+
const result = this.updateMiniBatch(batch);
|
|
120
|
+
totalPolicyLoss += result.policyLoss;
|
|
121
|
+
totalValueLoss += result.valueLoss;
|
|
122
|
+
totalEntropy += result.entropy;
|
|
123
|
+
totalClipFrac += result.clipFrac;
|
|
124
|
+
totalKL += result.kl;
|
|
125
|
+
numUpdates++;
|
|
126
|
+
// Early stopping if KL too high
|
|
127
|
+
if (result.kl > this.config.targetKL * 1.5) {
|
|
128
|
+
break;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
// Clear buffer
|
|
133
|
+
this.buffer = [];
|
|
134
|
+
this.updateCount++;
|
|
135
|
+
const elapsed = performance.now() - startTime;
|
|
136
|
+
if (elapsed > 10) {
|
|
137
|
+
console.warn(`PPO update exceeded target: ${elapsed.toFixed(2)}ms > 10ms`);
|
|
138
|
+
}
|
|
139
|
+
return {
|
|
140
|
+
policyLoss: numUpdates > 0 ? totalPolicyLoss / numUpdates : 0,
|
|
141
|
+
valueLoss: numUpdates > 0 ? totalValueLoss / numUpdates : 0,
|
|
142
|
+
entropy: numUpdates > 0 ? totalEntropy / numUpdates : 0,
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Get action from policy
|
|
147
|
+
*/
|
|
148
|
+
getAction(state) {
|
|
149
|
+
const logits = this.computeLogits(state);
|
|
150
|
+
const probs = this.softmax(logits);
|
|
151
|
+
const action = this.sampleAction(probs);
|
|
152
|
+
return {
|
|
153
|
+
action,
|
|
154
|
+
logProb: Math.log(probs[action] + 1e-8),
|
|
155
|
+
value: this.computeValue(state),
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
/**
|
|
159
|
+
* Get statistics
|
|
160
|
+
*/
|
|
161
|
+
getStats() {
|
|
162
|
+
return {
|
|
163
|
+
updateCount: this.updateCount,
|
|
164
|
+
bufferSize: this.buffer.length,
|
|
165
|
+
avgLoss: this.updateCount > 0 ? this.totalLoss / this.updateCount : 0,
|
|
166
|
+
approxKL: this.approxKL,
|
|
167
|
+
clipFraction: this.clipFraction,
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
// ==========================================================================
|
|
171
|
+
// Private Methods
|
|
172
|
+
// ==========================================================================
|
|
173
|
+
computeValue(state) {
|
|
174
|
+
let value = 0;
|
|
175
|
+
for (let i = 0; i < Math.min(state.length, this.valueWeights.length); i++) {
|
|
176
|
+
value += state[i] * this.valueWeights[i];
|
|
177
|
+
}
|
|
178
|
+
return value;
|
|
179
|
+
}
|
|
180
|
+
computeLogits(state) {
|
|
181
|
+
// Simplified: 4 discrete actions
|
|
182
|
+
const numActions = 4;
|
|
183
|
+
const logits = new Float32Array(numActions);
|
|
184
|
+
for (let a = 0; a < numActions; a++) {
|
|
185
|
+
for (let i = 0; i < Math.min(state.length, this.policyWeights.length); i++) {
|
|
186
|
+
logits[a] += state[i] * this.policyWeights[i] * (1 + a * 0.1);
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
return logits;
|
|
190
|
+
}
|
|
191
|
+
computeLogProb(state, action) {
|
|
192
|
+
const logits = this.computeLogits(state);
|
|
193
|
+
const probs = this.softmax(logits);
|
|
194
|
+
const actionIdx = this.hashAction(action);
|
|
195
|
+
return Math.log(probs[actionIdx] + 1e-8);
|
|
196
|
+
}
|
|
197
|
+
hashAction(action) {
|
|
198
|
+
// Simple hash to action index (0-3)
|
|
199
|
+
let hash = 0;
|
|
200
|
+
for (let i = 0; i < action.length; i++) {
|
|
201
|
+
hash = (hash * 31 + action.charCodeAt(i)) % 4;
|
|
202
|
+
}
|
|
203
|
+
return hash;
|
|
204
|
+
}
|
|
205
|
+
softmax(logits) {
|
|
206
|
+
const max = Math.max(...logits);
|
|
207
|
+
const exps = new Float32Array(logits.length);
|
|
208
|
+
let sum = 0;
|
|
209
|
+
for (let i = 0; i < logits.length; i++) {
|
|
210
|
+
exps[i] = Math.exp(logits[i] - max);
|
|
211
|
+
sum += exps[i];
|
|
212
|
+
}
|
|
213
|
+
for (let i = 0; i < exps.length; i++) {
|
|
214
|
+
exps[i] /= sum;
|
|
215
|
+
}
|
|
216
|
+
return exps;
|
|
217
|
+
}
|
|
218
|
+
sampleAction(probs) {
|
|
219
|
+
const r = Math.random();
|
|
220
|
+
let cumSum = 0;
|
|
221
|
+
for (let i = 0; i < probs.length; i++) {
|
|
222
|
+
cumSum += probs[i];
|
|
223
|
+
if (r < cumSum)
|
|
224
|
+
return i;
|
|
225
|
+
}
|
|
226
|
+
return probs.length - 1;
|
|
227
|
+
}
|
|
228
|
+
computeGAE(rewards, values) {
|
|
229
|
+
const advantages = new Array(rewards.length).fill(0);
|
|
230
|
+
let lastGae = 0;
|
|
231
|
+
for (let t = rewards.length - 1; t >= 0; t--) {
|
|
232
|
+
const nextValue = t < rewards.length - 1 ? values[t + 1] : 0;
|
|
233
|
+
const delta = rewards[t] + this.config.gamma * nextValue - values[t];
|
|
234
|
+
lastGae = delta + this.config.gamma * this.config.gaeLambda * lastGae;
|
|
235
|
+
advantages[t] = lastGae;
|
|
236
|
+
}
|
|
237
|
+
return advantages;
|
|
238
|
+
}
|
|
239
|
+
computeReturns(rewards) {
|
|
240
|
+
const returns = new Array(rewards.length).fill(0);
|
|
241
|
+
let cumReturn = 0;
|
|
242
|
+
for (let t = rewards.length - 1; t >= 0; t--) {
|
|
243
|
+
cumReturn = rewards[t] + this.config.gamma * cumReturn;
|
|
244
|
+
returns[t] = cumReturn;
|
|
245
|
+
}
|
|
246
|
+
return returns;
|
|
247
|
+
}
|
|
248
|
+
shuffleBuffer() {
|
|
249
|
+
for (let i = this.buffer.length - 1; i > 0; i--) {
|
|
250
|
+
const j = Math.floor(Math.random() * (i + 1));
|
|
251
|
+
[this.buffer[i], this.buffer[j]] = [this.buffer[j], this.buffer[i]];
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
updateMiniBatch(batch) {
|
|
255
|
+
let policyLoss = 0;
|
|
256
|
+
let valueLoss = 0;
|
|
257
|
+
let entropy = 0;
|
|
258
|
+
let clipFrac = 0;
|
|
259
|
+
let kl = 0;
|
|
260
|
+
const policyGrad = new Float32Array(this.policyWeights.length);
|
|
261
|
+
const valueGrad = new Float32Array(this.valueWeights.length);
|
|
262
|
+
for (const exp of batch) {
|
|
263
|
+
// Current policy
|
|
264
|
+
const logits = this.computeLogits(exp.state);
|
|
265
|
+
const probs = this.softmax(logits);
|
|
266
|
+
const newLogProb = Math.log(probs[exp.action] + 1e-8);
|
|
267
|
+
const currentValue = this.computeValue(exp.state);
|
|
268
|
+
// Ratio for PPO
|
|
269
|
+
const ratio = Math.exp(newLogProb - exp.logProb);
|
|
270
|
+
// Clipped surrogate objective
|
|
271
|
+
const surr1 = ratio * exp.advantage;
|
|
272
|
+
const surr2 = Math.max(Math.min(ratio, 1 + this.config.clipRange), 1 - this.config.clipRange) * exp.advantage;
|
|
273
|
+
const policyLossI = -Math.min(surr1, surr2);
|
|
274
|
+
policyLoss += policyLossI;
|
|
275
|
+
// Track clipping
|
|
276
|
+
if (Math.abs(ratio - 1) > this.config.clipRange) {
|
|
277
|
+
clipFrac++;
|
|
278
|
+
}
|
|
279
|
+
// KL divergence approximation
|
|
280
|
+
kl += (exp.logProb - newLogProb);
|
|
281
|
+
// Value loss
|
|
282
|
+
let valueLossI;
|
|
283
|
+
if (this.config.clipRangeVf !== null) {
|
|
284
|
+
const valuePred = currentValue;
|
|
285
|
+
const valueClipped = exp.value + Math.max(Math.min(valuePred - exp.value, this.config.clipRangeVf), -this.config.clipRangeVf);
|
|
286
|
+
const vf1 = (valuePred - exp.return_) ** 2;
|
|
287
|
+
const vf2 = (valueClipped - exp.return_) ** 2;
|
|
288
|
+
valueLossI = Math.max(vf1, vf2);
|
|
289
|
+
}
|
|
290
|
+
else {
|
|
291
|
+
valueLossI = (currentValue - exp.return_) ** 2;
|
|
292
|
+
}
|
|
293
|
+
valueLoss += valueLossI;
|
|
294
|
+
// Entropy
|
|
295
|
+
let entropyI = 0;
|
|
296
|
+
for (const p of probs) {
|
|
297
|
+
if (p > 0)
|
|
298
|
+
entropyI -= p * Math.log(p);
|
|
299
|
+
}
|
|
300
|
+
entropy += entropyI;
|
|
301
|
+
// Compute gradients (simplified)
|
|
302
|
+
for (let i = 0; i < Math.min(exp.state.length, policyGrad.length); i++) {
|
|
303
|
+
policyGrad[i] += exp.state[i] * policyLossI * 0.01;
|
|
304
|
+
valueGrad[i] += exp.state[i] * valueLossI * 0.01;
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
// Apply gradients with momentum
|
|
308
|
+
const lr = this.config.learningRate;
|
|
309
|
+
const beta = 0.9;
|
|
310
|
+
for (let i = 0; i < this.policyWeights.length; i++) {
|
|
311
|
+
this.policyMomentum[i] = beta * this.policyMomentum[i] + (1 - beta) * policyGrad[i];
|
|
312
|
+
this.policyWeights[i] -= lr * this.policyMomentum[i];
|
|
313
|
+
this.valueMomentum[i] = beta * this.valueMomentum[i] + (1 - beta) * valueGrad[i];
|
|
314
|
+
this.valueWeights[i] -= lr * this.valueMomentum[i];
|
|
315
|
+
}
|
|
316
|
+
return {
|
|
317
|
+
policyLoss: policyLoss / batch.length,
|
|
318
|
+
valueLoss: valueLoss / batch.length,
|
|
319
|
+
entropy: entropy / batch.length,
|
|
320
|
+
clipFrac: clipFrac / batch.length,
|
|
321
|
+
kl: kl / batch.length,
|
|
322
|
+
};
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
/**
|
|
326
|
+
* Factory function
|
|
327
|
+
*/
|
|
328
|
+
export function createPPO(config) {
|
|
329
|
+
return new PPOAlgorithm(config);
|
|
330
|
+
}
|
|
331
|
+
//# sourceMappingURL=ppo.js.map
|