agentdb 1.0.12 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +196 -838
- package/dist/benchmarks/comprehensive-benchmark.js +664 -0
- package/dist/benchmarks/frontier-benchmark.js +419 -0
- package/dist/benchmarks/reflexion-benchmark.js +370 -0
- package/dist/cli/agentdb-cli.d.ts +117 -0
- package/dist/cli/agentdb-cli.d.ts.map +1 -0
- package/dist/cli/agentdb-cli.js +718 -0
- package/dist/cli/agentdb-cli.js.map +1 -0
- package/dist/controllers/CausalMemoryGraph.d.ts +125 -0
- package/dist/controllers/CausalMemoryGraph.d.ts.map +1 -0
- package/dist/controllers/CausalMemoryGraph.js +323 -0
- package/dist/controllers/CausalMemoryGraph.js.map +1 -0
- package/dist/controllers/CausalRecall.d.ts +111 -0
- package/dist/controllers/CausalRecall.d.ts.map +1 -0
- package/dist/controllers/CausalRecall.js +282 -0
- package/dist/controllers/CausalRecall.js.map +1 -0
- package/dist/controllers/EmbeddingService.d.ts +37 -0
- package/dist/controllers/EmbeddingService.d.ts.map +1 -0
- package/dist/controllers/EmbeddingService.js +119 -0
- package/dist/controllers/EmbeddingService.js.map +1 -0
- package/dist/controllers/ExplainableRecall.d.ts +138 -0
- package/dist/controllers/ExplainableRecall.d.ts.map +1 -0
- package/dist/controllers/ExplainableRecall.js +388 -0
- package/dist/controllers/ExplainableRecall.js.map +1 -0
- package/dist/controllers/NightlyLearner.d.ts +113 -0
- package/dist/controllers/NightlyLearner.d.ts.map +1 -0
- package/dist/controllers/NightlyLearner.js +383 -0
- package/dist/controllers/NightlyLearner.js.map +1 -0
- package/dist/controllers/ReflexionMemory.d.ts +84 -0
- package/dist/controllers/ReflexionMemory.d.ts.map +1 -0
- package/dist/controllers/ReflexionMemory.js +240 -0
- package/dist/controllers/ReflexionMemory.js.map +1 -0
- package/dist/controllers/SkillLibrary.d.ts +96 -0
- package/dist/controllers/SkillLibrary.d.ts.map +1 -0
- package/dist/controllers/SkillLibrary.js +277 -0
- package/dist/controllers/SkillLibrary.js.map +1 -0
- package/dist/controllers/frontier-index.d.ts +14 -0
- package/dist/controllers/frontier-index.d.ts.map +1 -0
- package/dist/controllers/frontier-index.js +10 -0
- package/dist/controllers/frontier-index.js.map +1 -0
- package/dist/controllers/index.d.ts +12 -0
- package/dist/controllers/index.d.ts.map +1 -0
- package/dist/controllers/index.js +9 -0
- package/dist/controllers/index.js.map +1 -0
- package/dist/optimizations/BatchOperations.d.ts +61 -0
- package/dist/optimizations/BatchOperations.d.ts.map +1 -0
- package/dist/optimizations/BatchOperations.js +199 -0
- package/dist/optimizations/BatchOperations.js.map +1 -0
- package/dist/optimizations/QueryOptimizer.d.ts +82 -0
- package/dist/optimizations/QueryOptimizer.d.ts.map +1 -0
- package/dist/optimizations/QueryOptimizer.js +226 -0
- package/dist/optimizations/QueryOptimizer.js.map +1 -0
- package/dist/optimizations/index.d.ts +10 -0
- package/dist/optimizations/index.d.ts.map +1 -0
- package/dist/optimizations/index.js +8 -0
- package/dist/optimizations/index.js.map +1 -0
- package/dist/schemas/frontier-schema.sql +341 -0
- package/dist/schemas/schema.sql +382 -0
- package/dist/tests/frontier-features.test.js +665 -0
- package/package.json +54 -117
- package/src/cli/agentdb-cli.ts +861 -0
- package/src/cli/examples.sh +83 -0
- package/src/controllers/CausalMemoryGraph.ts +504 -0
- package/src/controllers/CausalRecall.ts +395 -0
- package/src/controllers/EmbeddingService.ts +141 -0
- package/src/controllers/ExplainableRecall.ts +577 -0
- package/src/controllers/NightlyLearner.ts +475 -0
- package/src/controllers/ReflexionMemory.ts +349 -0
- package/src/controllers/SkillLibrary.ts +391 -0
- package/src/controllers/frontier-index.ts +35 -0
- package/src/controllers/index.ts +13 -0
- package/src/optimizations/BatchOperations.ts +292 -0
- package/src/optimizations/QueryOptimizer.ts +294 -0
- package/src/optimizations/index.ts +11 -0
- package/src/schemas/frontier-schema.sql +341 -0
- package/src/schemas/schema.sql +382 -0
- package/CHANGELOG.md +0 -166
- package/LICENSE +0 -38
- package/LICENSE-APACHE +0 -190
- package/LICENSE-MIT +0 -21
- package/bin/agentdb.js +0 -733
- package/bin/benchmark.js +0 -165
- package/bin/plugin-cli-wrapper.mjs +0 -21
- package/dist/agentdb.js +0 -5352
- package/dist/agentdb.js.map +0 -7
- package/dist/agentdb.min.js +0 -143
- package/dist/agentdb.min.js.map +0 -7
- package/dist/cache/query-cache.d.ts +0 -105
- package/dist/cache/query-cache.d.ts.map +0 -1
- package/dist/cache/query-cache.js +0 -224
- package/dist/cache/query-cache.js.map +0 -1
- package/dist/cache/query-cache.mjs +0 -219
- package/dist/cli/cache/query-cache.d.ts +0 -104
- package/dist/cli/cache/query-cache.js +0 -244
- package/dist/cli/cli/db-commands.d.ts +0 -48
- package/dist/cli/cli/db-commands.js +0 -613
- package/dist/cli/commands.d.ts +0 -7
- package/dist/cli/commands.d.ts.map +0 -1
- package/dist/cli/commands.js +0 -113
- package/dist/cli/commands.js.map +0 -1
- package/dist/cli/commands.mjs +0 -104
- package/dist/cli/core/backend-interface.d.ts +0 -70
- package/dist/cli/core/backend-interface.js +0 -15
- package/dist/cli/core/native-backend.d.ts +0 -140
- package/dist/cli/core/native-backend.js +0 -432
- package/dist/cli/core/vector-db.d.ts +0 -126
- package/dist/cli/core/vector-db.js +0 -338
- package/dist/cli/core/wasm-backend.d.ts +0 -95
- package/dist/cli/core/wasm-backend.js +0 -418
- package/dist/cli/db-commands.d.ts +0 -49
- package/dist/cli/db-commands.d.ts.map +0 -1
- package/dist/cli/db-commands.js +0 -533
- package/dist/cli/db-commands.js.map +0 -1
- package/dist/cli/db-commands.mjs +0 -522
- package/dist/cli/generator.d.ts +0 -11
- package/dist/cli/generator.d.ts.map +0 -1
- package/dist/cli/generator.js +0 -567
- package/dist/cli/generator.js.map +0 -1
- package/dist/cli/generator.mjs +0 -527
- package/dist/cli/help.d.ts +0 -18
- package/dist/cli/help.d.ts.map +0 -1
- package/dist/cli/help.js +0 -676
- package/dist/cli/help.js.map +0 -1
- package/dist/cli/help.mjs +0 -667
- package/dist/cli/index/hnsw.d.ts +0 -164
- package/dist/cli/index/hnsw.js +0 -558
- package/dist/cli/plugin-cli.d.ts +0 -7
- package/dist/cli/plugin-cli.d.ts.map +0 -1
- package/dist/cli/plugin-cli.js +0 -295
- package/dist/cli/plugin-cli.js.map +0 -1
- package/dist/cli/plugin-cli.mjs +0 -289
- package/dist/cli/quantization/product-quantization.d.ts +0 -108
- package/dist/cli/quantization/product-quantization.js +0 -350
- package/dist/cli/query/query-builder.d.ts +0 -322
- package/dist/cli/query/query-builder.js +0 -600
- package/dist/cli/templates.d.ts +0 -14
- package/dist/cli/templates.d.ts.map +0 -1
- package/dist/cli/templates.js +0 -182
- package/dist/cli/templates.js.map +0 -1
- package/dist/cli/templates.mjs +0 -176
- package/dist/cli/types/index.d.ts +0 -116
- package/dist/cli/types/index.js +0 -5
- package/dist/cli/types.d.ts +0 -91
- package/dist/cli/types.d.ts.map +0 -1
- package/dist/cli/types.js +0 -6
- package/dist/cli/types.js.map +0 -1
- package/dist/cli/types.mjs +0 -4
- package/dist/cli/wizard/index.d.ts +0 -6
- package/dist/cli/wizard/index.d.ts.map +0 -1
- package/dist/cli/wizard/index.js +0 -138
- package/dist/cli/wizard/index.js.map +0 -1
- package/dist/cli/wizard/index.mjs +0 -131
- package/dist/cli/wizard/prompts.d.ts +0 -11
- package/dist/cli/wizard/prompts.d.ts.map +0 -1
- package/dist/cli/wizard/prompts.js +0 -482
- package/dist/cli/wizard/prompts.js.map +0 -1
- package/dist/cli/wizard/prompts.mjs +0 -470
- package/dist/cli/wizard/validator.d.ts +0 -13
- package/dist/cli/wizard/validator.d.ts.map +0 -1
- package/dist/cli/wizard/validator.js +0 -234
- package/dist/cli/wizard/validator.js.map +0 -1
- package/dist/cli/wizard/validator.mjs +0 -224
- package/dist/core/backend-interface.d.ts +0 -71
- package/dist/core/backend-interface.d.ts.map +0 -1
- package/dist/core/backend-interface.js +0 -16
- package/dist/core/backend-interface.js.map +0 -1
- package/dist/core/backend-interface.mjs +0 -12
- package/dist/core/native-backend.d.ts +0 -141
- package/dist/core/native-backend.d.ts.map +0 -1
- package/dist/core/native-backend.js +0 -457
- package/dist/core/native-backend.js.map +0 -1
- package/dist/core/native-backend.mjs +0 -449
- package/dist/core/vector-db.d.ts +0 -127
- package/dist/core/vector-db.d.ts.map +0 -1
- package/dist/core/vector-db.js +0 -270
- package/dist/core/vector-db.js.map +0 -1
- package/dist/core/vector-db.mjs +0 -265
- package/dist/core/wasm-backend.d.ts +0 -96
- package/dist/core/wasm-backend.d.ts.map +0 -1
- package/dist/core/wasm-backend.js +0 -393
- package/dist/core/wasm-backend.js.map +0 -1
- package/dist/core/wasm-backend.mjs +0 -385
- package/dist/index/hnsw-optimized.d.ts +0 -75
- package/dist/index/hnsw-optimized.d.ts.map +0 -1
- package/dist/index/hnsw-optimized.js +0 -412
- package/dist/index/hnsw-optimized.js.map +0 -1
- package/dist/index/hnsw-optimized.mjs +0 -407
- package/dist/index/hnsw.d.ts +0 -165
- package/dist/index/hnsw.d.ts.map +0 -1
- package/dist/index/hnsw.js +0 -521
- package/dist/index/hnsw.js.map +0 -1
- package/dist/index/hnsw.mjs +0 -516
- package/dist/index.browser.d.ts +0 -47
- package/dist/index.browser.d.ts.map +0 -1
- package/dist/index.browser.js +0 -72
- package/dist/index.browser.js.map +0 -1
- package/dist/index.browser.mjs +0 -54
- package/dist/index.d.ts +0 -59
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js +0 -84
- package/dist/index.js.map +0 -1
- package/dist/index.mjs +0 -65
- package/dist/mcp/learning/core/experience-buffer.d.ts +0 -61
- package/dist/mcp/learning/core/experience-buffer.d.ts.map +0 -1
- package/dist/mcp/learning/core/experience-buffer.js +0 -175
- package/dist/mcp/learning/core/experience-buffer.js.map +0 -1
- package/dist/mcp/learning/core/experience-buffer.mjs +0 -170
- package/dist/mcp/learning/core/experience-recorder.d.ts +0 -40
- package/dist/mcp/learning/core/experience-recorder.d.ts.map +0 -1
- package/dist/mcp/learning/core/experience-recorder.js +0 -200
- package/dist/mcp/learning/core/experience-recorder.js.map +0 -1
- package/dist/mcp/learning/core/experience-recorder.mjs +0 -195
- package/dist/mcp/learning/core/learning-manager.d.ts +0 -66
- package/dist/mcp/learning/core/learning-manager.d.ts.map +0 -1
- package/dist/mcp/learning/core/learning-manager.js +0 -252
- package/dist/mcp/learning/core/learning-manager.js.map +0 -1
- package/dist/mcp/learning/core/learning-manager.mjs +0 -247
- package/dist/mcp/learning/core/policy-optimizer.d.ts +0 -53
- package/dist/mcp/learning/core/policy-optimizer.d.ts.map +0 -1
- package/dist/mcp/learning/core/policy-optimizer.js +0 -251
- package/dist/mcp/learning/core/policy-optimizer.js.map +0 -1
- package/dist/mcp/learning/core/policy-optimizer.mjs +0 -246
- package/dist/mcp/learning/core/reward-estimator.d.ts +0 -44
- package/dist/mcp/learning/core/reward-estimator.d.ts.map +0 -1
- package/dist/mcp/learning/core/reward-estimator.js +0 -158
- package/dist/mcp/learning/core/reward-estimator.js.map +0 -1
- package/dist/mcp/learning/core/reward-estimator.mjs +0 -153
- package/dist/mcp/learning/core/session-manager.d.ts +0 -63
- package/dist/mcp/learning/core/session-manager.d.ts.map +0 -1
- package/dist/mcp/learning/core/session-manager.js +0 -202
- package/dist/mcp/learning/core/session-manager.js.map +0 -1
- package/dist/mcp/learning/core/session-manager.mjs +0 -197
- package/dist/mcp/learning/index.d.ts +0 -19
- package/dist/mcp/learning/index.d.ts.map +0 -1
- package/dist/mcp/learning/index.js +0 -30
- package/dist/mcp/learning/index.js.map +0 -1
- package/dist/mcp/learning/index.mjs +0 -19
- package/dist/mcp/learning/tools/mcp-learning-tools.d.ts +0 -369
- package/dist/mcp/learning/tools/mcp-learning-tools.d.ts.map +0 -1
- package/dist/mcp/learning/tools/mcp-learning-tools.js +0 -361
- package/dist/mcp/learning/tools/mcp-learning-tools.js.map +0 -1
- package/dist/mcp/learning/tools/mcp-learning-tools.mjs +0 -356
- package/dist/mcp/learning/types/index.d.ts +0 -138
- package/dist/mcp/learning/types/index.d.ts.map +0 -1
- package/dist/mcp/learning/types/index.js +0 -6
- package/dist/mcp/learning/types/index.js.map +0 -1
- package/dist/mcp/learning/types/index.mjs +0 -4
- package/dist/mcp-server.d.ts +0 -29
- package/dist/mcp-server.d.ts.map +0 -1
- package/dist/mcp-server.js +0 -862
- package/dist/mcp-server.js.map +0 -1
- package/dist/mcp-server.mjs +0 -857
- package/dist/plugins/base-plugin.d.ts +0 -114
- package/dist/plugins/base-plugin.d.ts.map +0 -1
- package/dist/plugins/base-plugin.js +0 -313
- package/dist/plugins/base-plugin.js.map +0 -1
- package/dist/plugins/base-plugin.mjs +0 -275
- package/dist/plugins/implementations/active-learning.d.ts +0 -135
- package/dist/plugins/implementations/active-learning.d.ts.map +0 -1
- package/dist/plugins/implementations/active-learning.js +0 -372
- package/dist/plugins/implementations/active-learning.js.map +0 -1
- package/dist/plugins/implementations/active-learning.mjs +0 -367
- package/dist/plugins/implementations/actor-critic.d.ts +0 -64
- package/dist/plugins/implementations/actor-critic.d.ts.map +0 -1
- package/dist/plugins/implementations/actor-critic.js +0 -363
- package/dist/plugins/implementations/actor-critic.js.map +0 -1
- package/dist/plugins/implementations/actor-critic.mjs +0 -358
- package/dist/plugins/implementations/adversarial-training.d.ts +0 -133
- package/dist/plugins/implementations/adversarial-training.d.ts.map +0 -1
- package/dist/plugins/implementations/adversarial-training.js +0 -409
- package/dist/plugins/implementations/adversarial-training.js.map +0 -1
- package/dist/plugins/implementations/adversarial-training.mjs +0 -404
- package/dist/plugins/implementations/curriculum-learning.d.ts +0 -132
- package/dist/plugins/implementations/curriculum-learning.d.ts.map +0 -1
- package/dist/plugins/implementations/curriculum-learning.js +0 -354
- package/dist/plugins/implementations/curriculum-learning.js.map +0 -1
- package/dist/plugins/implementations/curriculum-learning.mjs +0 -349
- package/dist/plugins/implementations/decision-transformer.d.ts +0 -77
- package/dist/plugins/implementations/decision-transformer.d.ts.map +0 -1
- package/dist/plugins/implementations/decision-transformer.js +0 -422
- package/dist/plugins/implementations/decision-transformer.js.map +0 -1
- package/dist/plugins/implementations/decision-transformer.mjs +0 -417
- package/dist/plugins/implementations/federated-learning.d.ts +0 -126
- package/dist/plugins/implementations/federated-learning.d.ts.map +0 -1
- package/dist/plugins/implementations/federated-learning.js +0 -436
- package/dist/plugins/implementations/federated-learning.js.map +0 -1
- package/dist/plugins/implementations/federated-learning.mjs +0 -431
- package/dist/plugins/implementations/index.d.ts +0 -30
- package/dist/plugins/implementations/index.d.ts.map +0 -1
- package/dist/plugins/implementations/index.js +0 -45
- package/dist/plugins/implementations/index.js.map +0 -1
- package/dist/plugins/implementations/index.mjs +0 -31
- package/dist/plugins/implementations/multi-task-learning.d.ts +0 -115
- package/dist/plugins/implementations/multi-task-learning.d.ts.map +0 -1
- package/dist/plugins/implementations/multi-task-learning.js +0 -369
- package/dist/plugins/implementations/multi-task-learning.js.map +0 -1
- package/dist/plugins/implementations/multi-task-learning.mjs +0 -364
- package/dist/plugins/implementations/neural-architecture-search.d.ts +0 -148
- package/dist/plugins/implementations/neural-architecture-search.d.ts.map +0 -1
- package/dist/plugins/implementations/neural-architecture-search.js +0 -379
- package/dist/plugins/implementations/neural-architecture-search.js.map +0 -1
- package/dist/plugins/implementations/neural-architecture-search.mjs +0 -374
- package/dist/plugins/implementations/q-learning.d.ts +0 -98
- package/dist/plugins/implementations/q-learning.d.ts.map +0 -1
- package/dist/plugins/implementations/q-learning.js +0 -435
- package/dist/plugins/implementations/q-learning.js.map +0 -1
- package/dist/plugins/implementations/q-learning.mjs +0 -430
- package/dist/plugins/implementations/sarsa.d.ts +0 -103
- package/dist/plugins/implementations/sarsa.d.ts.map +0 -1
- package/dist/plugins/implementations/sarsa.js +0 -347
- package/dist/plugins/implementations/sarsa.js.map +0 -1
- package/dist/plugins/implementations/sarsa.mjs +0 -342
- package/dist/plugins/index.d.ts +0 -107
- package/dist/plugins/index.d.ts.map +0 -1
- package/dist/plugins/index.js +0 -179
- package/dist/plugins/index.js.map +0 -1
- package/dist/plugins/index.mjs +0 -168
- package/dist/plugins/interface.d.ts +0 -439
- package/dist/plugins/interface.d.ts.map +0 -1
- package/dist/plugins/interface.js +0 -12
- package/dist/plugins/interface.js.map +0 -1
- package/dist/plugins/interface.mjs +0 -10
- package/dist/plugins/learning-plugin.interface.d.ts +0 -257
- package/dist/plugins/learning-plugin.interface.d.ts.map +0 -1
- package/dist/plugins/learning-plugin.interface.js +0 -7
- package/dist/plugins/learning-plugin.interface.js.map +0 -1
- package/dist/plugins/learning-plugin.interface.mjs +0 -5
- package/dist/plugins/plugin-exports.d.ts +0 -71
- package/dist/plugins/plugin-exports.d.ts.map +0 -1
- package/dist/plugins/plugin-exports.js +0 -78
- package/dist/plugins/plugin-exports.js.map +0 -1
- package/dist/plugins/plugin-exports.mjs +0 -69
- package/dist/plugins/registry.d.ts +0 -206
- package/dist/plugins/registry.d.ts.map +0 -1
- package/dist/plugins/registry.js +0 -365
- package/dist/plugins/registry.js.map +0 -1
- package/dist/plugins/registry.mjs +0 -356
- package/dist/plugins/validator.d.ts +0 -63
- package/dist/plugins/validator.d.ts.map +0 -1
- package/dist/plugins/validator.js +0 -464
- package/dist/plugins/validator.js.map +0 -1
- package/dist/plugins/validator.mjs +0 -458
- package/dist/presets.d.ts +0 -65
- package/dist/presets.d.ts.map +0 -1
- package/dist/presets.js +0 -145
- package/dist/presets.js.map +0 -1
- package/dist/presets.mjs +0 -140
- package/dist/quantization/binary-quantization.d.ts +0 -104
- package/dist/quantization/binary-quantization.d.ts.map +0 -1
- package/dist/quantization/binary-quantization.js +0 -246
- package/dist/quantization/binary-quantization.js.map +0 -1
- package/dist/quantization/binary-quantization.mjs +0 -240
- package/dist/quantization/optimized-pq.d.ts +0 -138
- package/dist/quantization/optimized-pq.d.ts.map +0 -1
- package/dist/quantization/optimized-pq.js +0 -320
- package/dist/quantization/optimized-pq.js.map +0 -1
- package/dist/quantization/optimized-pq.mjs +0 -313
- package/dist/quantization/product-quantization.d.ts +0 -109
- package/dist/quantization/product-quantization.d.ts.map +0 -1
- package/dist/quantization/product-quantization.js +0 -287
- package/dist/quantization/product-quantization.js.map +0 -1
- package/dist/quantization/product-quantization.mjs +0 -282
- package/dist/quantization/scalar-quantization.d.ts +0 -100
- package/dist/quantization/scalar-quantization.d.ts.map +0 -1
- package/dist/quantization/scalar-quantization.js +0 -324
- package/dist/quantization/scalar-quantization.js.map +0 -1
- package/dist/quantization/scalar-quantization.mjs +0 -319
- package/dist/query/index.d.ts +0 -6
- package/dist/query/index.d.ts.map +0 -1
- package/dist/query/index.js +0 -9
- package/dist/query/index.js.map +0 -1
- package/dist/query/index.mjs +0 -4
- package/dist/query/query-builder.d.ts +0 -323
- package/dist/query/query-builder.d.ts.map +0 -1
- package/dist/query/query-builder.js +0 -524
- package/dist/query/query-builder.js.map +0 -1
- package/dist/query/query-builder.mjs +0 -519
- package/dist/reasoning/context-synthesizer.d.ts +0 -57
- package/dist/reasoning/context-synthesizer.d.ts.map +0 -1
- package/dist/reasoning/context-synthesizer.js +0 -224
- package/dist/reasoning/context-synthesizer.js.map +0 -1
- package/dist/reasoning/context-synthesizer.mjs +0 -219
- package/dist/reasoning/experience-curator.d.ts +0 -66
- package/dist/reasoning/experience-curator.d.ts.map +0 -1
- package/dist/reasoning/experience-curator.js +0 -288
- package/dist/reasoning/experience-curator.js.map +0 -1
- package/dist/reasoning/experience-curator.mjs +0 -283
- package/dist/reasoning/memory-optimizer.d.ts +0 -69
- package/dist/reasoning/memory-optimizer.d.ts.map +0 -1
- package/dist/reasoning/memory-optimizer.js +0 -331
- package/dist/reasoning/memory-optimizer.js.map +0 -1
- package/dist/reasoning/memory-optimizer.mjs +0 -326
- package/dist/reasoning/pattern-matcher.d.ts +0 -59
- package/dist/reasoning/pattern-matcher.d.ts.map +0 -1
- package/dist/reasoning/pattern-matcher.js +0 -229
- package/dist/reasoning/pattern-matcher.js.map +0 -1
- package/dist/reasoning/pattern-matcher.mjs +0 -224
- package/dist/reasoningbank/adapter/agentdb-adapter.d.ts +0 -118
- package/dist/reasoningbank/adapter/agentdb-adapter.d.ts.map +0 -1
- package/dist/reasoningbank/adapter/agentdb-adapter.js +0 -477
- package/dist/reasoningbank/adapter/agentdb-adapter.js.map +0 -1
- package/dist/reasoningbank/adapter/types.d.ts +0 -113
- package/dist/reasoningbank/adapter/types.d.ts.map +0 -1
- package/dist/reasoningbank/adapter/types.js +0 -9
- package/dist/reasoningbank/adapter/types.js.map +0 -1
- package/dist/reasoningbank/cli/commands.d.ts +0 -16
- package/dist/reasoningbank/cli/commands.d.ts.map +0 -1
- package/dist/reasoningbank/cli/commands.js +0 -272
- package/dist/reasoningbank/cli/commands.js.map +0 -1
- package/dist/reasoningbank/mcp/agentdb-tools.d.ts +0 -319
- package/dist/reasoningbank/mcp/agentdb-tools.d.ts.map +0 -1
- package/dist/reasoningbank/mcp/agentdb-tools.js +0 -301
- package/dist/reasoningbank/mcp/agentdb-tools.js.map +0 -1
- package/dist/reasoningbank/migration/migrate.d.ts +0 -25
- package/dist/reasoningbank/migration/migrate.d.ts.map +0 -1
- package/dist/reasoningbank/migration/migrate.js +0 -178
- package/dist/reasoningbank/migration/migrate.js.map +0 -1
- package/dist/reasoningbank/reasoning/context-synthesizer.d.ts +0 -37
- package/dist/reasoningbank/reasoning/context-synthesizer.d.ts.map +0 -1
- package/dist/reasoningbank/reasoning/context-synthesizer.js +0 -114
- package/dist/reasoningbank/reasoning/context-synthesizer.js.map +0 -1
- package/dist/reasoningbank/reasoning/experience-curator.d.ts +0 -39
- package/dist/reasoningbank/reasoning/experience-curator.d.ts.map +0 -1
- package/dist/reasoningbank/reasoning/experience-curator.js +0 -98
- package/dist/reasoningbank/reasoning/experience-curator.js.map +0 -1
- package/dist/reasoningbank/reasoning/memory-optimizer.d.ts +0 -44
- package/dist/reasoningbank/reasoning/memory-optimizer.d.ts.map +0 -1
- package/dist/reasoningbank/reasoning/memory-optimizer.js +0 -184
- package/dist/reasoningbank/reasoning/memory-optimizer.js.map +0 -1
- package/dist/reasoningbank/reasoning/pattern-matcher.d.ts +0 -40
- package/dist/reasoningbank/reasoning/pattern-matcher.d.ts.map +0 -1
- package/dist/reasoningbank/reasoning/pattern-matcher.js +0 -87
- package/dist/reasoningbank/reasoning/pattern-matcher.js.map +0 -1
- package/dist/reasoningbank/sync/quic-sync.d.ts +0 -77
- package/dist/reasoningbank/sync/quic-sync.d.ts.map +0 -1
- package/dist/reasoningbank/sync/quic-sync.js +0 -165
- package/dist/reasoningbank/sync/quic-sync.js.map +0 -1
- package/dist/sync/conflict.d.ts +0 -78
- package/dist/sync/conflict.d.ts.map +0 -1
- package/dist/sync/conflict.js +0 -202
- package/dist/sync/conflict.js.map +0 -1
- package/dist/sync/conflict.mjs +0 -196
- package/dist/sync/coordinator.d.ts +0 -111
- package/dist/sync/coordinator.d.ts.map +0 -1
- package/dist/sync/coordinator.js +0 -256
- package/dist/sync/coordinator.js.map +0 -1
- package/dist/sync/coordinator.mjs +0 -250
- package/dist/sync/delta.d.ts +0 -81
- package/dist/sync/delta.d.ts.map +0 -1
- package/dist/sync/delta.js +0 -245
- package/dist/sync/delta.js.map +0 -1
- package/dist/sync/delta.mjs +0 -238
- package/dist/sync/index.d.ts +0 -11
- package/dist/sync/index.d.ts.map +0 -1
- package/dist/sync/index.js +0 -22
- package/dist/sync/index.js.map +0 -1
- package/dist/sync/index.mjs +0 -9
- package/dist/sync/quic-sync.d.ts +0 -81
- package/dist/sync/quic-sync.d.ts.map +0 -1
- package/dist/sync/quic-sync.js +0 -329
- package/dist/sync/quic-sync.js.map +0 -1
- package/dist/sync/quic-sync.mjs +0 -323
- package/dist/sync/types.d.ts +0 -168
- package/dist/sync/types.d.ts.map +0 -1
- package/dist/sync/types.js +0 -8
- package/dist/sync/types.js.map +0 -1
- package/dist/sync/types.mjs +0 -6
- package/dist/types/index.d.ts +0 -117
- package/dist/types/index.d.ts.map +0 -1
- package/dist/types/index.js +0 -6
- package/dist/types/index.js.map +0 -1
- package/dist/types/index.mjs +0 -4
- package/dist/wasm/sql-wasm-debug.js +0 -6989
- package/dist/wasm/sql-wasm-debug.wasm +0 -0
- package/dist/wasm/sql-wasm.js +0 -188
- package/dist/wasm/sql-wasm.wasm +0 -0
- package/dist/wasm-loader.d.ts +0 -32
- package/dist/wasm-loader.d.ts.map +0 -1
- package/dist/wasm-loader.js +0 -78
- package/dist/wasm-loader.js.map +0 -1
- package/dist/wasm-loader.mjs +0 -67
- package/examples/adaptive-learning.ts +0 -284
- package/examples/browser/README.md +0 -732
- package/examples/browser/adaptive-recommendations/index.html +0 -427
- package/examples/browser/collaborative-filtering/index.html +0 -310
- package/examples/browser/continual-learning/index.html +0 -736
- package/examples/browser/experience-replay/index.html +0 -616
- package/examples/browser/index.html +0 -369
- package/examples/browser/meta-learning/index.html +0 -789
- package/examples/browser/neuro-symbolic/index.html +0 -692
- package/examples/browser/pattern-learning/index.html +0 -620
- package/examples/browser/quantum-inspired/index.html +0 -728
- package/examples/browser/rag/index.html +0 -624
- package/examples/browser/swarm-intelligence/index.html +0 -811
- package/examples/browser-basic.html +0 -170
- package/examples/browser-wasm-real.html +0 -231
- package/examples/hnsw-example.ts +0 -148
- package/examples/mcp-learning-example.ts +0 -220
- package/examples/node-basic.js +0 -70
- package/examples/quic-sync-example.ts +0 -310
- package/examples/quick-start.js +0 -68
- package/examples/test-v1.0.7-cdn.html +0 -190
- package/examples/wasm-example.ts +0 -222
|
@@ -1,251 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
/**
|
|
3
|
-
* PolicyOptimizer - Optimizes action selection policy using reinforcement learning
|
|
4
|
-
*/
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.PolicyOptimizer = void 0;
|
|
7
|
-
const experience_buffer_js_1 = require("./experience-buffer.js");
|
|
8
|
-
class PolicyOptimizer {
|
|
9
|
-
constructor(learningRate = 0.1, discountFactor = 0.95, bufferSize = 10000) {
|
|
10
|
-
this.qTable = new Map();
|
|
11
|
-
this.learningRate = 0.1;
|
|
12
|
-
this.discountFactor = 0.95;
|
|
13
|
-
this.explorationRate = 0.1;
|
|
14
|
-
this.learningRate = learningRate;
|
|
15
|
-
this.discountFactor = discountFactor;
|
|
16
|
-
this.experienceBuffer = new experience_buffer_js_1.ExperienceBuffer(bufferSize);
|
|
17
|
-
}
|
|
18
|
-
/**
|
|
19
|
-
* Predict best action for current state
|
|
20
|
-
*/
|
|
21
|
-
async predictAction(state, availableActions) {
|
|
22
|
-
const stateKey = this.encodeState(state);
|
|
23
|
-
const qValues = this.qTable.get(stateKey) || new Map();
|
|
24
|
-
// Get Q-values for available actions
|
|
25
|
-
const actionValues = [];
|
|
26
|
-
for (const action of availableActions) {
|
|
27
|
-
const value = qValues.get(action) || 0;
|
|
28
|
-
actionValues.push({ tool: action, value });
|
|
29
|
-
}
|
|
30
|
-
// Sort by Q-value (descending)
|
|
31
|
-
actionValues.sort((a, b) => b.value - a.value);
|
|
32
|
-
// Epsilon-greedy exploration
|
|
33
|
-
let recommendedAction;
|
|
34
|
-
if (Math.random() < this.explorationRate && actionValues.length > 1) {
|
|
35
|
-
// Explore: pick random action
|
|
36
|
-
const randomIdx = Math.floor(Math.random() * actionValues.length);
|
|
37
|
-
const action = actionValues[randomIdx];
|
|
38
|
-
recommendedAction = {
|
|
39
|
-
tool: action.tool,
|
|
40
|
-
params: {},
|
|
41
|
-
confidence: 0.5, // Lower confidence for exploration
|
|
42
|
-
reasoning: 'Exploration: trying alternative action to discover better strategies',
|
|
43
|
-
};
|
|
44
|
-
}
|
|
45
|
-
else {
|
|
46
|
-
// Exploit: pick best action
|
|
47
|
-
const action = actionValues[0];
|
|
48
|
-
const maxValue = actionValues[0].value;
|
|
49
|
-
const minValue = actionValues[actionValues.length - 1].value;
|
|
50
|
-
const range = maxValue - minValue || 1;
|
|
51
|
-
const confidence = Math.min(0.95, 0.5 + (action.value - minValue) / range / 2);
|
|
52
|
-
recommendedAction = {
|
|
53
|
-
tool: action.tool,
|
|
54
|
-
params: {},
|
|
55
|
-
confidence,
|
|
56
|
-
reasoning: `Best action based on ${this.getExperienceCount(stateKey)} past experiences with average reward ${action.value.toFixed(3)}`,
|
|
57
|
-
};
|
|
58
|
-
}
|
|
59
|
-
// Prepare alternatives
|
|
60
|
-
const alternatives = actionValues.slice(1, 4).map((action) => ({
|
|
61
|
-
tool: action.tool,
|
|
62
|
-
params: {}, // Empty params for alternatives
|
|
63
|
-
confidence: Math.max(0.1, action.value / (actionValues[0].value || 1)),
|
|
64
|
-
reasoning: `Alternative with Q-value ${action.value.toFixed(3)}`,
|
|
65
|
-
}));
|
|
66
|
-
return {
|
|
67
|
-
recommendedAction,
|
|
68
|
-
alternatives,
|
|
69
|
-
};
|
|
70
|
-
}
|
|
71
|
-
/**
|
|
72
|
-
* Update policy based on experience
|
|
73
|
-
*/
|
|
74
|
-
async updatePolicy(experience) {
|
|
75
|
-
// Add to experience buffer
|
|
76
|
-
this.experienceBuffer.add(experience);
|
|
77
|
-
// Q-learning update
|
|
78
|
-
const stateKey = this.encodeState(experience.state);
|
|
79
|
-
const nextStateKey = this.encodeState(experience.nextState);
|
|
80
|
-
const action = experience.action.tool;
|
|
81
|
-
// Get or initialize Q-values
|
|
82
|
-
if (!this.qTable.has(stateKey)) {
|
|
83
|
-
this.qTable.set(stateKey, new Map());
|
|
84
|
-
}
|
|
85
|
-
const qValues = this.qTable.get(stateKey);
|
|
86
|
-
// Get current Q-value
|
|
87
|
-
const currentQ = qValues.get(action) || 0;
|
|
88
|
-
// Get max Q-value for next state
|
|
89
|
-
let maxNextQ = 0;
|
|
90
|
-
if (!experience.done) {
|
|
91
|
-
const nextQValues = this.qTable.get(nextStateKey);
|
|
92
|
-
if (nextQValues) {
|
|
93
|
-
maxNextQ = Math.max(...Array.from(nextQValues.values()));
|
|
94
|
-
}
|
|
95
|
-
}
|
|
96
|
-
// Q-learning update: Q(s,a) = Q(s,a) + α[r + γ max Q(s',a') - Q(s,a)]
|
|
97
|
-
const newQ = currentQ +
|
|
98
|
-
this.learningRate *
|
|
99
|
-
(experience.reward + this.discountFactor * maxNextQ - currentQ);
|
|
100
|
-
qValues.set(action, newQ);
|
|
101
|
-
}
|
|
102
|
-
/**
|
|
103
|
-
* Train policy on batch of experiences
|
|
104
|
-
*/
|
|
105
|
-
async train(options = {}) {
|
|
106
|
-
const { batchSize = 32, epochs = 10, learningRate = this.learningRate, minExperiences = 100, } = options;
|
|
107
|
-
const startTime = Date.now();
|
|
108
|
-
let totalLoss = 0;
|
|
109
|
-
let experiencesProcessed = 0;
|
|
110
|
-
// Check if we have enough experiences
|
|
111
|
-
if (this.experienceBuffer.size() < minExperiences) {
|
|
112
|
-
return {
|
|
113
|
-
loss: 0,
|
|
114
|
-
accuracy: 0,
|
|
115
|
-
experiencesProcessed: 0,
|
|
116
|
-
trainingTime: 0,
|
|
117
|
-
improvements: {
|
|
118
|
-
taskCompletionTime: 'N/A',
|
|
119
|
-
tokenEfficiency: 'N/A',
|
|
120
|
-
successRate: 'N/A',
|
|
121
|
-
},
|
|
122
|
-
};
|
|
123
|
-
}
|
|
124
|
-
const oldLearningRate = this.learningRate;
|
|
125
|
-
this.learningRate = learningRate;
|
|
126
|
-
// Training loop
|
|
127
|
-
for (let epoch = 0; epoch < epochs; epoch++) {
|
|
128
|
-
// Sample prioritized batch
|
|
129
|
-
const batch = this.experienceBuffer.samplePrioritized(batchSize);
|
|
130
|
-
for (const experience of batch) {
|
|
131
|
-
// Calculate TD error (used as loss)
|
|
132
|
-
const stateKey = this.encodeState(experience.state);
|
|
133
|
-
const nextStateKey = this.encodeState(experience.nextState);
|
|
134
|
-
const action = experience.action.tool;
|
|
135
|
-
const qValues = this.qTable.get(stateKey) || new Map();
|
|
136
|
-
const currentQ = qValues.get(action) || 0;
|
|
137
|
-
let maxNextQ = 0;
|
|
138
|
-
if (!experience.done) {
|
|
139
|
-
const nextQValues = this.qTable.get(nextStateKey);
|
|
140
|
-
if (nextQValues) {
|
|
141
|
-
maxNextQ = Math.max(...Array.from(nextQValues.values()));
|
|
142
|
-
}
|
|
143
|
-
}
|
|
144
|
-
const targetQ = experience.reward + this.discountFactor * maxNextQ;
|
|
145
|
-
const tdError = Math.abs(targetQ - currentQ);
|
|
146
|
-
totalLoss += tdError;
|
|
147
|
-
// Update Q-value
|
|
148
|
-
await this.updatePolicy(experience);
|
|
149
|
-
experiencesProcessed++;
|
|
150
|
-
}
|
|
151
|
-
}
|
|
152
|
-
this.learningRate = oldLearningRate;
|
|
153
|
-
const trainingTime = Date.now() - startTime;
|
|
154
|
-
const avgLoss = totalLoss / experiencesProcessed;
|
|
155
|
-
// Calculate improvements
|
|
156
|
-
const stats = this.experienceBuffer.getStats();
|
|
157
|
-
const improvements = {
|
|
158
|
-
taskCompletionTime: stats.avgReward > 0 ? '+15%' : 'N/A',
|
|
159
|
-
tokenEfficiency: stats.avgReward > 0.5 ? '+20%' : 'N/A',
|
|
160
|
-
successRate: stats.avgReward > 0.7 ? '+25%' : 'N/A',
|
|
161
|
-
};
|
|
162
|
-
return {
|
|
163
|
-
loss: avgLoss,
|
|
164
|
-
accuracy: Math.max(0, 1 - avgLoss), // Simple accuracy estimate
|
|
165
|
-
experiencesProcessed,
|
|
166
|
-
trainingTime,
|
|
167
|
-
improvements,
|
|
168
|
-
};
|
|
169
|
-
}
|
|
170
|
-
/**
|
|
171
|
-
* Get policy statistics
|
|
172
|
-
*/
|
|
173
|
-
getPolicyStats() {
|
|
174
|
-
let totalQValue = 0;
|
|
175
|
-
let qValueCount = 0;
|
|
176
|
-
for (const qValues of this.qTable.values()) {
|
|
177
|
-
for (const value of qValues.values()) {
|
|
178
|
-
totalQValue += value;
|
|
179
|
-
qValueCount++;
|
|
180
|
-
}
|
|
181
|
-
}
|
|
182
|
-
return {
|
|
183
|
-
statesLearned: this.qTable.size,
|
|
184
|
-
totalExperiences: this.experienceBuffer.size(),
|
|
185
|
-
avgQValue: qValueCount > 0 ? totalQValue / qValueCount : 0,
|
|
186
|
-
};
|
|
187
|
-
}
|
|
188
|
-
/**
|
|
189
|
-
* Export policy for persistence
|
|
190
|
-
*/
|
|
191
|
-
exportPolicy() {
|
|
192
|
-
const policy = {};
|
|
193
|
-
for (const [stateKey, qValues] of this.qTable.entries()) {
|
|
194
|
-
policy[stateKey] = Object.fromEntries(qValues);
|
|
195
|
-
}
|
|
196
|
-
return {
|
|
197
|
-
qTable: policy,
|
|
198
|
-
learningRate: this.learningRate,
|
|
199
|
-
discountFactor: this.discountFactor,
|
|
200
|
-
explorationRate: this.explorationRate,
|
|
201
|
-
stats: this.getPolicyStats(),
|
|
202
|
-
};
|
|
203
|
-
}
|
|
204
|
-
/**
|
|
205
|
-
* Import policy from persistence
|
|
206
|
-
*/
|
|
207
|
-
importPolicy(policyData) {
|
|
208
|
-
this.qTable.clear();
|
|
209
|
-
if (policyData.qTable) {
|
|
210
|
-
for (const [stateKey, actions] of Object.entries(policyData.qTable)) {
|
|
211
|
-
this.qTable.set(stateKey, new Map(Object.entries(actions)));
|
|
212
|
-
}
|
|
213
|
-
}
|
|
214
|
-
if (policyData.learningRate) {
|
|
215
|
-
this.learningRate = policyData.learningRate;
|
|
216
|
-
}
|
|
217
|
-
if (policyData.discountFactor) {
|
|
218
|
-
this.discountFactor = policyData.discountFactor;
|
|
219
|
-
}
|
|
220
|
-
if (policyData.explorationRate) {
|
|
221
|
-
this.explorationRate = policyData.explorationRate;
|
|
222
|
-
}
|
|
223
|
-
}
|
|
224
|
-
/**
|
|
225
|
-
* Encode state as string key for Q-table
|
|
226
|
-
*/
|
|
227
|
-
encodeState(state) {
|
|
228
|
-
// Simple encoding: hash of task description and available tools
|
|
229
|
-
const parts = [
|
|
230
|
-
state.taskDescription.substring(0, 50),
|
|
231
|
-
state.availableTools.sort().join(','),
|
|
232
|
-
state.context?.taskType || 'general',
|
|
233
|
-
];
|
|
234
|
-
return parts.join('|');
|
|
235
|
-
}
|
|
236
|
-
/**
|
|
237
|
-
* Get experience count for state
|
|
238
|
-
*/
|
|
239
|
-
getExperienceCount(stateKey) {
|
|
240
|
-
const qValues = this.qTable.get(stateKey);
|
|
241
|
-
return qValues ? qValues.size : 0;
|
|
242
|
-
}
|
|
243
|
-
/**
|
|
244
|
-
* Decay exploration rate over time
|
|
245
|
-
*/
|
|
246
|
-
decayExploration(decayRate = 0.995) {
|
|
247
|
-
this.explorationRate = Math.max(0.01, this.explorationRate * decayRate);
|
|
248
|
-
}
|
|
249
|
-
}
|
|
250
|
-
exports.PolicyOptimizer = PolicyOptimizer;
|
|
251
|
-
//# sourceMappingURL=policy-optimizer.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"policy-optimizer.js","sourceRoot":"","sources":["../../../../src/mcp/learning/core/policy-optimizer.ts"],"names":[],"mappings":";AAAA;;GAEG;;;AAUH,iEAA0D;AAE1D,MAAa,eAAe;IAO1B,YACE,eAAuB,GAAG,EAC1B,iBAAyB,IAAI,EAC7B,aAAqB,KAAK;QATpB,WAAM,GAAqC,IAAI,GAAG,EAAE,CAAC;QACrD,iBAAY,GAAW,GAAG,CAAC;QAC3B,mBAAc,GAAW,IAAI,CAAC;QAC9B,oBAAe,GAAW,GAAG,CAAC;QAQpC,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;QACjC,IAAI,CAAC,cAAc,GAAG,cAAc,CAAC;QACrC,IAAI,CAAC,gBAAgB,GAAG,IAAI,uCAAgB,CAAC,UAAU,CAAC,CAAC;IAC3D,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,aAAa,CACjB,KAAY,EACZ,gBAA0B;QAE1B,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,IAAI,GAAG,EAAE,CAAC;QAEvD,qCAAqC;QACrC,MAAM,YAAY,GAA2C,EAAE,CAAC;QAChE,KAAK,MAAM,MAAM,IAAI,gBAAgB,EAAE,CAAC;YACtC,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;YACvC,YAAY,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,CAAC;QAC7C,CAAC;QAED,+BAA+B;QAC/B,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;QAE/C,6BAA6B;QAC7B,IAAI,iBAAuG,CAAC;QAE5G,IAAI,IAAI,CAAC,MAAM,EAAE,GAAG,IAAI,CAAC,eAAe,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpE,8BAA8B;YAC9B,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC;YAClE,MAAM,MAAM,GAAG,YAAY,CAAC,SAAS,CAAC,CAAC;YACvC,iBAAiB,GAAG;gBAClB,IAAI,EAAE,MAAM,CAAC,IAAI;gBACjB,MAAM,EAAE,EAAE;gBACV,UAAU,EAAE,GAAG,EAAE,mCAAmC;gBACpD,SAAS,EAAE,sEAAsE;aAClF,CAAC;QACJ,CAAC;aAAM,CAAC;YACN,4BAA4B;YAC5B,MAAM,MAAM,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;YAC/B,MAAM,QAAQ,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC;YACvC,MAAM,QAAQ,GAAG,YAAY,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC;YAC7D,MAAM,KAAK,GAAG,QAAQ,GAAG,QAAQ,IAAI,CAAC,CAAC;YACvC,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,GAAG,GAAG,CAAC,MAAM,CAAC,KAAK,GAAG,QAAQ,CAAC,GAAG,KAAK,GAAG,CAAC,CAAC,CAAC;YAE/E,iBAAiB,GAAG;gBAClB,IAAI,EAAE,MAAM,CAAC,IAAI;gBACjB,MAAM,EAAE,EAAE;gBACV,UAAU;gBACV,SAAS,EAAE,wBAAwB,IAAI,CAAC,kBAAkB,CAAC,QAAQ,CAAC,yCAAyC,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE;aACvI,CAAC;QACJ,CAAC;QAED,uBAAuB;QACvB,MAAM,YAAY,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;YAC7D,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,MAAM,EAAE,EAAE,EAAE,gCAAgC;YAC5C,UAAU,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,MAAM,CAAC,KAAK,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC;YACtE,SAAS,EAAE,4BAA4B,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE;SACjE,CAAC,CAAC,CAAC;QAEJ,OAAO;YACL,iBAAiB;YACjB,YAAY;SACb,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,YAAY,CAAC,UAAsB;QACvC,2BAA2B;QAC3B,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QAEtC,oBAAoB;QACpB,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;QACpD,MAAM,YAAY,GAAG,IAAI,CAAC,WAAW,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC;QAC5D,MAAM,MAAM,GAAG,UAAU,CAAC,MAAM,CAAC,IAAI,CAAC;QAEtC,6BAA6B;QAC7B,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC/B,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,IAAI,GAAG,EAAE,CAAC,CAAC;QACvC,CAAC;QACD,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAE,CAAC;QAE3C,sBAAsB;QACtB,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QAE1C,iCAAiC;QACjC,IAAI,QAAQ,GAAG,CAAC,CAAC;QACjB,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE,CAAC;YACrB,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;YAClD,IAAI,WAAW,EAAE,CAAC;gBAChB,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;YAC3D,CAAC;QACH,CAAC;QAED,sEAAsE;QACtE,MAAM,IAAI,GACR,QAAQ;YACR,IAAI,CAAC,YAAY;gBACf,CAAC,UAAU,CAAC,MAAM,GAAG,IAAI,CAAC,cAAc,GAAG,QAAQ,GAAG,QAAQ,CAAC,CAAC;QAEpE,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;IAC5B,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,KAAK,CAAC,UAA2B,EAAE;QACvC,MAAM,EACJ,SAAS,GAAG,EAAE,EACd,MAAM,GAAG,EAAE,EACX,YAAY,GAAG,IAAI,CAAC,YAAY,EAChC,cAAc,GAAG,GAAG,GACrB,GAAG,OAAO,CAAC;QAEZ,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,IAAI,oBAAoB,GAAG,CAAC,CAAC;QAE7B,sCAAsC;QACtC,IAAI,IAAI,CAAC,gBAAgB,CAAC,IAAI,EAAE,GAAG,cAAc,EAAE,CAAC;YAClD,OAAO;gBACL,IAAI,EAAE,CAAC;gBACP,QAAQ,EAAE,CAAC;gBACX,oBAAoB,EAAE,CAAC;gBACvB,YAAY,EAAE,CAAC;gBACf,YAAY,EAAE;oBACZ,kBAAkB,EAAE,KAAK;oBACzB,eAAe,EAAE,KAAK;oBACtB,WAAW,EAAE,KAAK;iBACnB;aACF,CAAC;QACJ,CAAC;QAED,MAAM,eAAe,GAAG,IAAI,CAAC,YAAY,CAAC;QAC1C,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;QAEjC,gBAAgB;QAChB,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,MAAM,EAAE,KAAK,EAAE,EAAE,CAAC;YAC5C,2BAA2B;YAC3B,MAAM,KAAK,GAAG,IAAI,CAAC,gBAAgB,CAAC,iBAAiB,CAAC,SAAS,CAAC,CAAC;YAEjE,KAAK,MAAM,UAAU,IAAI,KAAK,EAAE,CAAC;gBAC/B,oCAAoC;gBACpC,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;gBACpD,MAAM,YAAY,GAAG,IAAI,CAAC,WAAW,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC;gBAC5D,MAAM,MAAM,GAAG,UAAU,CAAC,MAAM,CAAC,IAAI,CAAC;gBAEtC,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,IAAI,GAAG,EAAE,CAAC;gBACvD,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;gBAE1C,IAAI,QAAQ,GAAG,CAAC,CAAC;gBACjB,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE,CAAC;oBACrB,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;oBAClD,IAAI,WAAW,EAAE,CAAC;wBAChB,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;oBAC3D,CAAC;gBACH,CAAC;gBAED,MAAM,OAAO,GAAG,UAAU,CAAC,MAAM,GAAG,IAAI,CAAC,cAAc,GAAG,QAAQ,CAAC;gBACnE,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,GAAG,QAAQ,CAAC,CAAC;gBAC7C,SAAS,IAAI,OAAO,CAAC;gBAErB,iBAAiB;gBACjB,MAAM,IAAI,CAAC,YAAY,CAAC,UAAU,CAAC,CAAC;gBACpC,oBAAoB,EAAE,CAAC;YACzB,CAAC;QACH,CAAC;QAED,IAAI,CAAC,YAAY,GAAG,eAAe,CAAC;QAEpC,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;QAC5C,MAAM,OAAO,GAAG,SAAS,GAAG,oBAAoB,CAAC;QAEjD,yBAAyB;QACzB,MAAM,KAAK,GAAG,IAAI,CAAC,gBAAgB,CAAC,QAAQ,EAAE,CAAC;QAC/C,MAAM,YAAY,GAAG;YACnB,kBAAkB,EAAE,KAAK,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK;YACxD,eAAe,EAAE,KAAK,CAAC,SAAS,GAAG,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK;YACvD,WAAW,EAAE,KAAK,CAAC,SAAS,GAAG,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK;SACpD,CAAC;QAEF,OAAO;YACL,IAAI,EAAE,OAAO;YACb,QAAQ,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,EAAE,2BAA2B;YAC/D,oBAAoB;YACpB,YAAY;YACZ,YAAY;SACb,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,cAAc;QAKZ,IAAI,WAAW,GAAG,CAAC,CAAC;QACpB,IAAI,WAAW,GAAG,CAAC,CAAC;QAEpB,KAAK,MAAM,OAAO,IAAI,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,EAAE,CAAC;YAC3C,KAAK,MAAM,KAAK,IAAI,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;gBACrC,WAAW,IAAI,KAAK,CAAC;gBACrB,WAAW,EAAE,CAAC;YAChB,CAAC;QACH,CAAC;QAED,OAAO;YACL,aAAa,EAAE,IAAI,CAAC,MAAM,CAAC,IAAI;YAC/B,gBAAgB,EAAE,IAAI,CAAC,gBAAgB,CAAC,IAAI,EAAE;YAC9C,SAAS,EAAE,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;SAC3D,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,YAAY;QACV,MAAM,MAAM,GAAQ,EAAE,CAAC;QAEvB,KAAK,MAAM,CAAC,QAAQ,EAAE,OAAO,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,EAAE,CAAC;YACxD,MAAM,CAAC,QAAQ,CAAC,GAAG,MAAM,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;QACjD,CAAC;QAED,OAAO;YACL,MAAM,EAAE,MAAM;YACd,YAAY,EAAE,IAAI,CAAC,YAAY;YAC/B,cAAc,EAAE,IAAI,CAAC,cAAc;YACnC,eAAe,EAAE,IAAI,CAAC,eAAe;YACrC,KAAK,EAAE,IAAI,CAAC,cAAc,EAAE;SAC7B,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,YAAY,CAAC,UAAe;QAC1B,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;QAEpB,IAAI,UAAU,CAAC,MAAM,EAAE,CAAC;YACtB,KAAK,MAAM,CAAC,QAAQ,EAAE,OAAO,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC;gBACpE,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,IAAI,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,OAAc,CAAC,CAAC,CAAC,CAAC;YACrE,CAAC;QACH,CAAC;QAED,IAAI,UAAU,CAAC,YAAY,EAAE,CAAC;YAC5B,IAAI,CAAC,YAAY,GAAG,UAAU,CAAC,YAAY,CAAC;QAC9C,CAAC;QACD,IAAI,UAAU,CAAC,cAAc,EAAE,CAAC;YAC9B,IAAI,CAAC,cAAc,GAAG,UAAU,CAAC,cAAc,CAAC;QAClD,CAAC;QACD,IAAI,UAAU,CAAC,eAAe,EAAE,CAAC;YAC/B,IAAI,CAAC,eAAe,GAAG,UAAU,CAAC,eAAe,CAAC;QACpD,CAAC;IACH,CAAC;IAED;;OAEG;IACK,WAAW,CAAC,KAAY;QAC9B,gEAAgE;QAChE,MAAM,KAAK,GAAG;YACZ,KAAK,CAAC,eAAe,CAAC,SAAS,CAAC,CAAC,EAAE,EAAE,CAAC;YACtC,KAAK,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC;YACrC,KAAK,CAAC,OAAO,EAAE,QAAQ,IAAI,SAAS;SACrC,CAAC;QACF,OAAO,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACzB,CAAC;IAED;;OAEG;IACK,kBAAkB,CAAC,QAAgB;QACzC,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAC1C,OAAO,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;IACpC,CAAC;IAED;;OAEG;IACH,gBAAgB,CAAC,YAAoB,KAAK;QACxC,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,eAAe,GAAG,SAAS,CAAC,CAAC;IAC1E,CAAC;CACF;AA5SD,0CA4SC"}
|
|
@@ -1,246 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* PolicyOptimizer - Optimizes action selection policy using reinforcement learning
|
|
3
|
-
*/
|
|
4
|
-
import { ExperienceBuffer } from './experience-buffer.mjs';
|
|
5
|
-
export class PolicyOptimizer {
|
|
6
|
-
constructor(learningRate = 0.1, discountFactor = 0.95, bufferSize = 10000) {
|
|
7
|
-
this.qTable = new Map();
|
|
8
|
-
this.learningRate = 0.1;
|
|
9
|
-
this.discountFactor = 0.95;
|
|
10
|
-
this.explorationRate = 0.1;
|
|
11
|
-
this.learningRate = learningRate;
|
|
12
|
-
this.discountFactor = discountFactor;
|
|
13
|
-
this.experienceBuffer = new ExperienceBuffer(bufferSize);
|
|
14
|
-
}
|
|
15
|
-
/**
|
|
16
|
-
* Predict best action for current state
|
|
17
|
-
*/
|
|
18
|
-
async predictAction(state, availableActions) {
|
|
19
|
-
const stateKey = this.encodeState(state);
|
|
20
|
-
const qValues = this.qTable.get(stateKey) || new Map();
|
|
21
|
-
// Get Q-values for available actions
|
|
22
|
-
const actionValues = [];
|
|
23
|
-
for (const action of availableActions) {
|
|
24
|
-
const value = qValues.get(action) || 0;
|
|
25
|
-
actionValues.push({ tool: action, value });
|
|
26
|
-
}
|
|
27
|
-
// Sort by Q-value (descending)
|
|
28
|
-
actionValues.sort((a, b) => b.value - a.value);
|
|
29
|
-
// Epsilon-greedy exploration
|
|
30
|
-
let recommendedAction;
|
|
31
|
-
if (Math.random() < this.explorationRate && actionValues.length > 1) {
|
|
32
|
-
// Explore: pick random action
|
|
33
|
-
const randomIdx = Math.floor(Math.random() * actionValues.length);
|
|
34
|
-
const action = actionValues[randomIdx];
|
|
35
|
-
recommendedAction = {
|
|
36
|
-
tool: action.tool,
|
|
37
|
-
params: {},
|
|
38
|
-
confidence: 0.5, // Lower confidence for exploration
|
|
39
|
-
reasoning: 'Exploration: trying alternative action to discover better strategies',
|
|
40
|
-
};
|
|
41
|
-
}
|
|
42
|
-
else {
|
|
43
|
-
// Exploit: pick best action
|
|
44
|
-
const action = actionValues[0];
|
|
45
|
-
const maxValue = actionValues[0].value;
|
|
46
|
-
const minValue = actionValues[actionValues.length - 1].value;
|
|
47
|
-
const range = maxValue - minValue || 1;
|
|
48
|
-
const confidence = Math.min(0.95, 0.5 + (action.value - minValue) / range / 2);
|
|
49
|
-
recommendedAction = {
|
|
50
|
-
tool: action.tool,
|
|
51
|
-
params: {},
|
|
52
|
-
confidence,
|
|
53
|
-
reasoning: `Best action based on ${this.getExperienceCount(stateKey)} past experiences with average reward ${action.value.toFixed(3)}`,
|
|
54
|
-
};
|
|
55
|
-
}
|
|
56
|
-
// Prepare alternatives
|
|
57
|
-
const alternatives = actionValues.slice(1, 4).map((action) => ({
|
|
58
|
-
tool: action.tool,
|
|
59
|
-
params: {}, // Empty params for alternatives
|
|
60
|
-
confidence: Math.max(0.1, action.value / (actionValues[0].value || 1)),
|
|
61
|
-
reasoning: `Alternative with Q-value ${action.value.toFixed(3)}`,
|
|
62
|
-
}));
|
|
63
|
-
return {
|
|
64
|
-
recommendedAction,
|
|
65
|
-
alternatives,
|
|
66
|
-
};
|
|
67
|
-
}
|
|
68
|
-
/**
|
|
69
|
-
* Update policy based on experience
|
|
70
|
-
*/
|
|
71
|
-
async updatePolicy(experience) {
|
|
72
|
-
// Add to experience buffer
|
|
73
|
-
this.experienceBuffer.add(experience);
|
|
74
|
-
// Q-learning update
|
|
75
|
-
const stateKey = this.encodeState(experience.state);
|
|
76
|
-
const nextStateKey = this.encodeState(experience.nextState);
|
|
77
|
-
const action = experience.action.tool;
|
|
78
|
-
// Get or initialize Q-values
|
|
79
|
-
if (!this.qTable.has(stateKey)) {
|
|
80
|
-
this.qTable.set(stateKey, new Map());
|
|
81
|
-
}
|
|
82
|
-
const qValues = this.qTable.get(stateKey);
|
|
83
|
-
// Get current Q-value
|
|
84
|
-
const currentQ = qValues.get(action) || 0;
|
|
85
|
-
// Get max Q-value for next state
|
|
86
|
-
let maxNextQ = 0;
|
|
87
|
-
if (!experience.done) {
|
|
88
|
-
const nextQValues = this.qTable.get(nextStateKey);
|
|
89
|
-
if (nextQValues) {
|
|
90
|
-
maxNextQ = Math.max(...Array.from(nextQValues.values()));
|
|
91
|
-
}
|
|
92
|
-
}
|
|
93
|
-
// Q-learning update: Q(s,a) = Q(s,a) + α[r + γ max Q(s',a') - Q(s,a)]
|
|
94
|
-
const newQ = currentQ +
|
|
95
|
-
this.learningRate *
|
|
96
|
-
(experience.reward + this.discountFactor * maxNextQ - currentQ);
|
|
97
|
-
qValues.set(action, newQ);
|
|
98
|
-
}
|
|
99
|
-
/**
|
|
100
|
-
* Train policy on batch of experiences
|
|
101
|
-
*/
|
|
102
|
-
async train(options = {}) {
|
|
103
|
-
const { batchSize = 32, epochs = 10, learningRate = this.learningRate, minExperiences = 100, } = options;
|
|
104
|
-
const startTime = Date.now();
|
|
105
|
-
let totalLoss = 0;
|
|
106
|
-
let experiencesProcessed = 0;
|
|
107
|
-
// Check if we have enough experiences
|
|
108
|
-
if (this.experienceBuffer.size() < minExperiences) {
|
|
109
|
-
return {
|
|
110
|
-
loss: 0,
|
|
111
|
-
accuracy: 0,
|
|
112
|
-
experiencesProcessed: 0,
|
|
113
|
-
trainingTime: 0,
|
|
114
|
-
improvements: {
|
|
115
|
-
taskCompletionTime: 'N/A',
|
|
116
|
-
tokenEfficiency: 'N/A',
|
|
117
|
-
successRate: 'N/A',
|
|
118
|
-
},
|
|
119
|
-
};
|
|
120
|
-
}
|
|
121
|
-
const oldLearningRate = this.learningRate;
|
|
122
|
-
this.learningRate = learningRate;
|
|
123
|
-
// Training loop
|
|
124
|
-
for (let epoch = 0; epoch < epochs; epoch++) {
|
|
125
|
-
// Sample prioritized batch
|
|
126
|
-
const batch = this.experienceBuffer.samplePrioritized(batchSize);
|
|
127
|
-
for (const experience of batch) {
|
|
128
|
-
// Calculate TD error (used as loss)
|
|
129
|
-
const stateKey = this.encodeState(experience.state);
|
|
130
|
-
const nextStateKey = this.encodeState(experience.nextState);
|
|
131
|
-
const action = experience.action.tool;
|
|
132
|
-
const qValues = this.qTable.get(stateKey) || new Map();
|
|
133
|
-
const currentQ = qValues.get(action) || 0;
|
|
134
|
-
let maxNextQ = 0;
|
|
135
|
-
if (!experience.done) {
|
|
136
|
-
const nextQValues = this.qTable.get(nextStateKey);
|
|
137
|
-
if (nextQValues) {
|
|
138
|
-
maxNextQ = Math.max(...Array.from(nextQValues.values()));
|
|
139
|
-
}
|
|
140
|
-
}
|
|
141
|
-
const targetQ = experience.reward + this.discountFactor * maxNextQ;
|
|
142
|
-
const tdError = Math.abs(targetQ - currentQ);
|
|
143
|
-
totalLoss += tdError;
|
|
144
|
-
// Update Q-value
|
|
145
|
-
await this.updatePolicy(experience);
|
|
146
|
-
experiencesProcessed++;
|
|
147
|
-
}
|
|
148
|
-
}
|
|
149
|
-
this.learningRate = oldLearningRate;
|
|
150
|
-
const trainingTime = Date.now() - startTime;
|
|
151
|
-
const avgLoss = totalLoss / experiencesProcessed;
|
|
152
|
-
// Calculate improvements
|
|
153
|
-
const stats = this.experienceBuffer.getStats();
|
|
154
|
-
const improvements = {
|
|
155
|
-
taskCompletionTime: stats.avgReward > 0 ? '+15%' : 'N/A',
|
|
156
|
-
tokenEfficiency: stats.avgReward > 0.5 ? '+20%' : 'N/A',
|
|
157
|
-
successRate: stats.avgReward > 0.7 ? '+25%' : 'N/A',
|
|
158
|
-
};
|
|
159
|
-
return {
|
|
160
|
-
loss: avgLoss,
|
|
161
|
-
accuracy: Math.max(0, 1 - avgLoss), // Simple accuracy estimate
|
|
162
|
-
experiencesProcessed,
|
|
163
|
-
trainingTime,
|
|
164
|
-
improvements,
|
|
165
|
-
};
|
|
166
|
-
}
|
|
167
|
-
/**
|
|
168
|
-
* Get policy statistics
|
|
169
|
-
*/
|
|
170
|
-
getPolicyStats() {
|
|
171
|
-
let totalQValue = 0;
|
|
172
|
-
let qValueCount = 0;
|
|
173
|
-
for (const qValues of this.qTable.values()) {
|
|
174
|
-
for (const value of qValues.values()) {
|
|
175
|
-
totalQValue += value;
|
|
176
|
-
qValueCount++;
|
|
177
|
-
}
|
|
178
|
-
}
|
|
179
|
-
return {
|
|
180
|
-
statesLearned: this.qTable.size,
|
|
181
|
-
totalExperiences: this.experienceBuffer.size(),
|
|
182
|
-
avgQValue: qValueCount > 0 ? totalQValue / qValueCount : 0,
|
|
183
|
-
};
|
|
184
|
-
}
|
|
185
|
-
/**
|
|
186
|
-
* Export policy for persistence
|
|
187
|
-
*/
|
|
188
|
-
exportPolicy() {
|
|
189
|
-
const policy = {};
|
|
190
|
-
for (const [stateKey, qValues] of this.qTable.entries()) {
|
|
191
|
-
policy[stateKey] = Object.fromEntries(qValues);
|
|
192
|
-
}
|
|
193
|
-
return {
|
|
194
|
-
qTable: policy,
|
|
195
|
-
learningRate: this.learningRate,
|
|
196
|
-
discountFactor: this.discountFactor,
|
|
197
|
-
explorationRate: this.explorationRate,
|
|
198
|
-
stats: this.getPolicyStats(),
|
|
199
|
-
};
|
|
200
|
-
}
|
|
201
|
-
/**
|
|
202
|
-
* Import policy from persistence
|
|
203
|
-
*/
|
|
204
|
-
importPolicy(policyData) {
|
|
205
|
-
this.qTable.clear();
|
|
206
|
-
if (policyData.qTable) {
|
|
207
|
-
for (const [stateKey, actions] of Object.entries(policyData.qTable)) {
|
|
208
|
-
this.qTable.set(stateKey, new Map(Object.entries(actions)));
|
|
209
|
-
}
|
|
210
|
-
}
|
|
211
|
-
if (policyData.learningRate) {
|
|
212
|
-
this.learningRate = policyData.learningRate;
|
|
213
|
-
}
|
|
214
|
-
if (policyData.discountFactor) {
|
|
215
|
-
this.discountFactor = policyData.discountFactor;
|
|
216
|
-
}
|
|
217
|
-
if (policyData.explorationRate) {
|
|
218
|
-
this.explorationRate = policyData.explorationRate;
|
|
219
|
-
}
|
|
220
|
-
}
|
|
221
|
-
/**
|
|
222
|
-
* Encode state as string key for Q-table
|
|
223
|
-
*/
|
|
224
|
-
encodeState(state) {
|
|
225
|
-
// Simple encoding: hash of task description and available tools
|
|
226
|
-
const parts = [
|
|
227
|
-
state.taskDescription.substring(0, 50),
|
|
228
|
-
state.availableTools.sort().join(','),
|
|
229
|
-
state.context?.taskType || 'general',
|
|
230
|
-
];
|
|
231
|
-
return parts.join('|');
|
|
232
|
-
}
|
|
233
|
-
/**
|
|
234
|
-
* Get experience count for state
|
|
235
|
-
*/
|
|
236
|
-
getExperienceCount(stateKey) {
|
|
237
|
-
const qValues = this.qTable.get(stateKey);
|
|
238
|
-
return qValues ? qValues.size : 0;
|
|
239
|
-
}
|
|
240
|
-
/**
|
|
241
|
-
* Decay exploration rate over time
|
|
242
|
-
*/
|
|
243
|
-
decayExploration(decayRate = 0.995) {
|
|
244
|
-
this.explorationRate = Math.max(0.01, this.explorationRate * decayRate);
|
|
245
|
-
}
|
|
246
|
-
}
|
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* RewardEstimator - Calculates multi-dimensional rewards for actions
|
|
3
|
-
*/
|
|
4
|
-
import type { Outcome, ExecutionContext, Reward } from '../types/index.js';
|
|
5
|
-
export declare class RewardEstimator {
|
|
6
|
-
private weights;
|
|
7
|
-
/**
|
|
8
|
-
* Calculate comprehensive reward signal
|
|
9
|
-
*/
|
|
10
|
-
calculateReward(outcome: Outcome, context: ExecutionContext): Promise<Reward>;
|
|
11
|
-
/**
|
|
12
|
-
* Calculate reward with user feedback
|
|
13
|
-
*/
|
|
14
|
-
calculateRewardWithFeedback(outcome: Outcome, context: ExecutionContext, userRating: number): Promise<Reward>;
|
|
15
|
-
/**
|
|
16
|
-
* Success dimension: binary success/failure
|
|
17
|
-
*/
|
|
18
|
-
private calculateSuccessReward;
|
|
19
|
-
/**
|
|
20
|
-
* Efficiency dimension: execution time
|
|
21
|
-
*/
|
|
22
|
-
private calculateEfficiencyReward;
|
|
23
|
-
/**
|
|
24
|
-
* Quality dimension: based on error presence and result completeness
|
|
25
|
-
*/
|
|
26
|
-
private calculateQualityReward;
|
|
27
|
-
/**
|
|
28
|
-
* Cost dimension: token usage efficiency
|
|
29
|
-
*/
|
|
30
|
-
private calculateCostReward;
|
|
31
|
-
/**
|
|
32
|
-
* Objective metrics reward
|
|
33
|
-
*/
|
|
34
|
-
private calculateObjectiveReward;
|
|
35
|
-
/**
|
|
36
|
-
* Update reward weights based on user preferences
|
|
37
|
-
*/
|
|
38
|
-
setRewardWeights(weights: Partial<typeof this.weights>): void;
|
|
39
|
-
/**
|
|
40
|
-
* Get current reward weights
|
|
41
|
-
*/
|
|
42
|
-
getRewardWeights(): typeof this.weights;
|
|
43
|
-
}
|
|
44
|
-
//# sourceMappingURL=reward-estimator.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"reward-estimator.d.ts","sourceRoot":"","sources":["../../../../src/mcp/learning/core/reward-estimator.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,OAAO,EAAE,gBAAgB,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAE3E,qBAAa,eAAe;IAC1B,OAAO,CAAC,OAAO,CAKb;IAEF;;OAEG;IACG,eAAe,CACnB,OAAO,EAAE,OAAO,EAChB,OAAO,EAAE,gBAAgB,GACxB,OAAO,CAAC,MAAM,CAAC;IAyBlB;;OAEG;IACG,2BAA2B,CAC/B,OAAO,EAAE,OAAO,EAChB,OAAO,EAAE,gBAAgB,EACzB,UAAU,EAAE,MAAM,GACjB,OAAO,CAAC,MAAM,CAAC;IAgBlB;;OAEG;IACH,OAAO,CAAC,sBAAsB;IAI9B;;OAEG;IACH,OAAO,CAAC,yBAAyB;IAQjC;;OAEG;IACH,OAAO,CAAC,sBAAsB;IAqB9B;;OAEG;IACH,OAAO,CAAC,mBAAmB;IAY3B;;OAEG;IACH,OAAO,CAAC,wBAAwB;IAsChC;;OAEG;IACH,gBAAgB,CAAC,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,CAAC,OAAO,CAAC,GAAG,IAAI;IAa7D;;OAEG;IACH,gBAAgB,IAAI,OAAO,IAAI,CAAC,OAAO;CAGxC"}
|