nodebench-mcp 2.70.0 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +95 -41
- package/dist/agents/alertRouter.d.ts +38 -0
- package/dist/agents/alertRouter.js +151 -0
- package/dist/agents/alertRouter.js.map +1 -0
- package/dist/agents/entityMemory.d.ts +40 -0
- package/dist/agents/entityMemory.js +64 -0
- package/dist/agents/entityMemory.js.map +1 -0
- package/dist/agents/subAgents.d.ts +35 -0
- package/dist/agents/subAgents.js +62 -0
- package/dist/agents/subAgents.js.map +1 -0
- package/dist/benchmarks/benchmarkRunner.js +14 -0
- package/dist/benchmarks/benchmarkRunner.js.map +1 -1
- package/dist/benchmarks/chainEval.js +107 -0
- package/dist/benchmarks/chainEval.js.map +1 -1
- package/dist/benchmarks/llmJudgeEval.js +85 -0
- package/dist/benchmarks/llmJudgeEval.js.map +1 -1
- package/dist/benchmarks/searchQualityEval.js +118 -5
- package/dist/benchmarks/searchQualityEval.js.map +1 -1
- package/dist/cli/search.d.ts +13 -0
- package/dist/cli/search.js +130 -0
- package/dist/cli/search.js.map +1 -0
- package/dist/dashboard/operatingDashboardHtml.js +2 -1
- package/dist/dashboard/operatingDashboardHtml.js.map +1 -1
- package/dist/dashboard/operatingServer.js +3 -2
- package/dist/dashboard/operatingServer.js.map +1 -1
- package/dist/db.d.ts +6 -2
- package/dist/db.js +521 -6
- package/dist/db.js.map +1 -1
- package/dist/index.js +349 -67
- package/dist/index.js.map +1 -1
- package/dist/packageInfo.d.ts +3 -0
- package/dist/packageInfo.js +32 -0
- package/dist/packageInfo.js.map +1 -0
- package/dist/profiler/behaviorStore.d.ts +97 -0
- package/dist/profiler/behaviorStore.js +276 -0
- package/dist/profiler/behaviorStore.js.map +1 -0
- package/dist/profiler/eventCollector.d.ts +119 -0
- package/dist/profiler/eventCollector.js +267 -0
- package/dist/profiler/eventCollector.js.map +1 -0
- package/dist/profiler/index.d.ts +15 -0
- package/dist/profiler/index.js +16 -0
- package/dist/profiler/index.js.map +1 -0
- package/dist/profiler/mcpProxy.d.ts +49 -0
- package/dist/profiler/mcpProxy.js +123 -0
- package/dist/profiler/mcpProxy.js.map +1 -0
- package/dist/profiler/modelRouter.d.ts +30 -0
- package/dist/profiler/modelRouter.js +99 -0
- package/dist/profiler/modelRouter.js.map +1 -0
- package/dist/profiler/otelReceiver.d.ts +17 -0
- package/dist/profiler/otelReceiver.js +62 -0
- package/dist/profiler/otelReceiver.js.map +1 -0
- package/dist/profiler/proofEngine.d.ts +41 -0
- package/dist/profiler/proofEngine.js +93 -0
- package/dist/profiler/proofEngine.js.map +1 -0
- package/dist/profiler/workflowTemplates.d.ts +41 -0
- package/dist/profiler/workflowTemplates.js +95 -0
- package/dist/profiler/workflowTemplates.js.map +1 -0
- package/dist/providers/localMemoryProvider.js +3 -2
- package/dist/providers/localMemoryProvider.js.map +1 -1
- package/dist/runtimeConfig.d.ts +11 -0
- package/dist/runtimeConfig.js +27 -0
- package/dist/runtimeConfig.js.map +1 -0
- package/dist/sandboxApi.js +2 -1
- package/dist/sandboxApi.js.map +1 -1
- package/dist/security/auditLog.js +8 -3
- package/dist/security/auditLog.js.map +1 -1
- package/dist/subconscious/blocks.d.ts +43 -0
- package/dist/subconscious/blocks.js +158 -0
- package/dist/subconscious/blocks.js.map +1 -0
- package/dist/subconscious/classifier.d.ts +22 -0
- package/dist/subconscious/classifier.js +118 -0
- package/dist/subconscious/classifier.js.map +1 -0
- package/dist/subconscious/graphEngine.d.ts +65 -0
- package/dist/subconscious/graphEngine.js +234 -0
- package/dist/subconscious/graphEngine.js.map +1 -0
- package/dist/subconscious/index.d.ts +19 -0
- package/dist/subconscious/index.js +20 -0
- package/dist/subconscious/index.js.map +1 -0
- package/dist/subconscious/tools.d.ts +5 -0
- package/dist/subconscious/tools.js +255 -0
- package/dist/subconscious/tools.js.map +1 -0
- package/dist/subconscious/whisperPolicy.d.ts +20 -0
- package/dist/subconscious/whisperPolicy.js +171 -0
- package/dist/subconscious/whisperPolicy.js.map +1 -0
- package/dist/sweep/engine.d.ts +27 -0
- package/dist/sweep/engine.js +244 -0
- package/dist/sweep/engine.js.map +1 -0
- package/dist/sweep/index.d.ts +9 -0
- package/dist/sweep/index.js +8 -0
- package/dist/sweep/index.js.map +1 -0
- package/dist/sweep/sources/github_trending.d.ts +6 -0
- package/dist/sweep/sources/github_trending.js +37 -0
- package/dist/sweep/sources/github_trending.js.map +1 -0
- package/dist/sweep/sources/hackernews.d.ts +7 -0
- package/dist/sweep/sources/hackernews.js +57 -0
- package/dist/sweep/sources/hackernews.js.map +1 -0
- package/dist/sweep/sources/openbb_finance.d.ts +9 -0
- package/dist/sweep/sources/openbb_finance.js +46 -0
- package/dist/sweep/sources/openbb_finance.js.map +1 -0
- package/dist/sweep/sources/producthunt.d.ts +6 -0
- package/dist/sweep/sources/producthunt.js +41 -0
- package/dist/sweep/sources/producthunt.js.map +1 -0
- package/dist/sweep/sources/web_signals.d.ts +7 -0
- package/dist/sweep/sources/web_signals.js +63 -0
- package/dist/sweep/sources/web_signals.js.map +1 -0
- package/dist/sweep/sources/yahoo_finance.d.ts +6 -0
- package/dist/sweep/sources/yahoo_finance.js +47 -0
- package/dist/sweep/sources/yahoo_finance.js.map +1 -0
- package/dist/sweep/types.d.ts +50 -0
- package/dist/sweep/types.js +9 -0
- package/dist/sweep/types.js.map +1 -0
- package/dist/sync/founderEpisodeStore.d.ts +98 -0
- package/dist/sync/founderEpisodeStore.js +230 -0
- package/dist/sync/founderEpisodeStore.js.map +1 -0
- package/dist/sync/hyperloopArchive.d.ts +51 -0
- package/dist/sync/hyperloopArchive.js +153 -0
- package/dist/sync/hyperloopArchive.js.map +1 -0
- package/dist/sync/hyperloopEval.d.ts +123 -0
- package/dist/sync/hyperloopEval.js +389 -0
- package/dist/sync/hyperloopEval.js.map +1 -0
- package/dist/sync/protocol.d.ts +172 -0
- package/dist/sync/protocol.js +9 -0
- package/dist/sync/protocol.js.map +1 -0
- package/dist/sync/sessionMemory.d.ts +47 -0
- package/dist/sync/sessionMemory.js +138 -0
- package/dist/sync/sessionMemory.js.map +1 -0
- package/dist/sync/store.d.ts +384 -0
- package/dist/sync/store.js +1435 -0
- package/dist/sync/store.js.map +1 -0
- package/dist/sync/syncBridgeClient.d.ts +30 -0
- package/dist/sync/syncBridgeClient.js +172 -0
- package/dist/sync/syncBridgeClient.js.map +1 -0
- package/dist/tools/autonomousDeliveryTools.d.ts +2 -0
- package/dist/tools/autonomousDeliveryTools.js +1104 -0
- package/dist/tools/autonomousDeliveryTools.js.map +1 -0
- package/dist/tools/boilerplateTools.js +10 -9
- package/dist/tools/boilerplateTools.js.map +1 -1
- package/dist/tools/claudeCodeIngestTools.d.ts +10 -0
- package/dist/tools/claudeCodeIngestTools.js +347 -0
- package/dist/tools/claudeCodeIngestTools.js.map +1 -0
- package/dist/tools/coreWorkflowTools.d.ts +2 -0
- package/dist/tools/coreWorkflowTools.js +488 -0
- package/dist/tools/coreWorkflowTools.js.map +1 -0
- package/dist/tools/deltaTools.d.ts +15 -0
- package/dist/tools/deltaTools.js +1522 -0
- package/dist/tools/deltaTools.js.map +1 -0
- package/dist/tools/documentationTools.js +2 -1
- package/dist/tools/documentationTools.js.map +1 -1
- package/dist/tools/entityLookupTools.d.ts +14 -0
- package/dist/tools/entityLookupTools.js +159 -0
- package/dist/tools/entityLookupTools.js.map +1 -0
- package/dist/tools/entityTemporalTools.d.ts +12 -0
- package/dist/tools/entityTemporalTools.js +330 -0
- package/dist/tools/entityTemporalTools.js.map +1 -0
- package/dist/tools/founderLocalPipeline.d.ts +215 -0
- package/dist/tools/founderLocalPipeline.js +1516 -2
- package/dist/tools/founderLocalPipeline.js.map +1 -1
- package/dist/tools/founderOperatingModel.d.ts +120 -0
- package/dist/tools/founderOperatingModel.js +469 -0
- package/dist/tools/founderOperatingModel.js.map +1 -0
- package/dist/tools/founderOperatingModelTools.d.ts +2 -0
- package/dist/tools/founderOperatingModelTools.js +169 -0
- package/dist/tools/founderOperatingModelTools.js.map +1 -0
- package/dist/tools/founderStrategicOpsTools.d.ts +2 -0
- package/dist/tools/founderStrategicOpsTools.js +1310 -0
- package/dist/tools/founderStrategicOpsTools.js.map +1 -0
- package/dist/tools/graphifyTools.d.ts +19 -0
- package/dist/tools/graphifyTools.js +375 -0
- package/dist/tools/graphifyTools.js.map +1 -0
- package/dist/tools/index.d.ts +3 -0
- package/dist/tools/index.js +4 -0
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/monteCarloTools.d.ts +16 -0
- package/dist/tools/monteCarloTools.js +225 -0
- package/dist/tools/monteCarloTools.js.map +1 -0
- package/dist/tools/packetCompilerTools.d.ts +12 -0
- package/dist/tools/packetCompilerTools.js +322 -0
- package/dist/tools/packetCompilerTools.js.map +1 -0
- package/dist/tools/planSynthesisTools.d.ts +15 -0
- package/dist/tools/planSynthesisTools.js +455 -0
- package/dist/tools/planSynthesisTools.js.map +1 -0
- package/dist/tools/profilerTools.d.ts +20 -0
- package/dist/tools/profilerTools.js +364 -0
- package/dist/tools/profilerTools.js.map +1 -0
- package/dist/tools/progressiveDiscoveryTools.js +2 -1
- package/dist/tools/progressiveDiscoveryTools.js.map +1 -1
- package/dist/tools/savingsTools.d.ts +11 -0
- package/dist/tools/savingsTools.js +155 -0
- package/dist/tools/savingsTools.js.map +1 -0
- package/dist/tools/scenarioCompilerTools.d.ts +14 -0
- package/dist/tools/scenarioCompilerTools.js +290 -0
- package/dist/tools/scenarioCompilerTools.js.map +1 -0
- package/dist/tools/sharedContextTools.d.ts +2 -0
- package/dist/tools/sharedContextTools.js +423 -0
- package/dist/tools/sharedContextTools.js.map +1 -0
- package/dist/tools/sitemapTools.d.ts +15 -0
- package/dist/tools/sitemapTools.js +560 -0
- package/dist/tools/sitemapTools.js.map +1 -0
- package/dist/tools/sweepTools.d.ts +9 -0
- package/dist/tools/sweepTools.js +112 -0
- package/dist/tools/sweepTools.js.map +1 -0
- package/dist/tools/syncBridgeTools.d.ts +2 -0
- package/dist/tools/syncBridgeTools.js +258 -0
- package/dist/tools/syncBridgeTools.js.map +1 -0
- package/dist/tools/toolRegistry.js +1223 -45
- package/dist/tools/toolRegistry.js.map +1 -1
- package/dist/tools/workspaceTools.d.ts +19 -0
- package/dist/tools/workspaceTools.js +762 -0
- package/dist/tools/workspaceTools.js.map +1 -0
- package/dist/toolsetRegistry.js +162 -3
- package/dist/toolsetRegistry.js.map +1 -1
- package/package.json +39 -38
- package/rules/nodebench-agentic-reliability.md +32 -0
- package/rules/nodebench-analyst-diagnostic.md +25 -0
- package/rules/nodebench-auto-qa.md +31 -0
- package/rules/nodebench-completion-traceability.md +22 -0
- package/rules/nodebench-flywheel-continuous.md +25 -0
- package/rules/nodebench-pre-release-review.md +24 -0
- package/rules/nodebench-qa-dogfood.md +26 -0
- package/rules/nodebench-scenario-testing.md +30 -0
- package/rules/nodebench-self-direction.md +23 -0
- package/rules/nodebench-self-judge-loop.md +24 -0
- package/scripts/install.sh +215 -0
- package/dist/__tests__/analytics.test.d.ts +0 -11
- package/dist/__tests__/analytics.test.js +0 -546
- package/dist/__tests__/analytics.test.js.map +0 -1
- package/dist/__tests__/architectComplex.test.d.ts +0 -1
- package/dist/__tests__/architectComplex.test.js +0 -373
- package/dist/__tests__/architectComplex.test.js.map +0 -1
- package/dist/__tests__/architectSmoke.test.d.ts +0 -1
- package/dist/__tests__/architectSmoke.test.js +0 -92
- package/dist/__tests__/architectSmoke.test.js.map +0 -1
- package/dist/__tests__/audit-registry.d.ts +0 -1
- package/dist/__tests__/audit-registry.js +0 -60
- package/dist/__tests__/audit-registry.js.map +0 -1
- package/dist/__tests__/batchAutopilot.test.d.ts +0 -8
- package/dist/__tests__/batchAutopilot.test.js +0 -218
- package/dist/__tests__/batchAutopilot.test.js.map +0 -1
- package/dist/__tests__/cliSubcommands.test.d.ts +0 -1
- package/dist/__tests__/cliSubcommands.test.js +0 -138
- package/dist/__tests__/cliSubcommands.test.js.map +0 -1
- package/dist/__tests__/comparativeBench.test.d.ts +0 -1
- package/dist/__tests__/comparativeBench.test.js +0 -722
- package/dist/__tests__/comparativeBench.test.js.map +0 -1
- package/dist/__tests__/critterCalibrationEval.d.ts +0 -8
- package/dist/__tests__/critterCalibrationEval.js +0 -370
- package/dist/__tests__/critterCalibrationEval.js.map +0 -1
- package/dist/__tests__/dynamicLoading.test.d.ts +0 -1
- package/dist/__tests__/dynamicLoading.test.js +0 -280
- package/dist/__tests__/dynamicLoading.test.js.map +0 -1
- package/dist/__tests__/embeddingProvider.test.d.ts +0 -1
- package/dist/__tests__/embeddingProvider.test.js +0 -86
- package/dist/__tests__/embeddingProvider.test.js.map +0 -1
- package/dist/__tests__/evalDatasetBench.test.d.ts +0 -1
- package/dist/__tests__/evalDatasetBench.test.js +0 -738
- package/dist/__tests__/evalDatasetBench.test.js.map +0 -1
- package/dist/__tests__/evalHarness.test.d.ts +0 -1
- package/dist/__tests__/evalHarness.test.js +0 -1107
- package/dist/__tests__/evalHarness.test.js.map +0 -1
- package/dist/__tests__/fixtures/bfcl_v3_long_context.sample.json +0 -264
- package/dist/__tests__/fixtures/generateBfclLongContextFixture.d.ts +0 -10
- package/dist/__tests__/fixtures/generateBfclLongContextFixture.js +0 -135
- package/dist/__tests__/fixtures/generateBfclLongContextFixture.js.map +0 -1
- package/dist/__tests__/fixtures/generateSwebenchVerifiedFixture.d.ts +0 -14
- package/dist/__tests__/fixtures/generateSwebenchVerifiedFixture.js +0 -189
- package/dist/__tests__/fixtures/generateSwebenchVerifiedFixture.js.map +0 -1
- package/dist/__tests__/fixtures/generateToolbenchInstructionFixture.d.ts +0 -16
- package/dist/__tests__/fixtures/generateToolbenchInstructionFixture.js +0 -154
- package/dist/__tests__/fixtures/generateToolbenchInstructionFixture.js.map +0 -1
- package/dist/__tests__/fixtures/swebench_verified.sample.json +0 -162
- package/dist/__tests__/fixtures/toolbench_instruction.sample.json +0 -109
- package/dist/__tests__/forecastingDogfood.test.d.ts +0 -9
- package/dist/__tests__/forecastingDogfood.test.js +0 -284
- package/dist/__tests__/forecastingDogfood.test.js.map +0 -1
- package/dist/__tests__/forecastingScoring.test.d.ts +0 -9
- package/dist/__tests__/forecastingScoring.test.js +0 -202
- package/dist/__tests__/forecastingScoring.test.js.map +0 -1
- package/dist/__tests__/gaiaCapabilityAudioEval.test.d.ts +0 -15
- package/dist/__tests__/gaiaCapabilityAudioEval.test.js +0 -265
- package/dist/__tests__/gaiaCapabilityAudioEval.test.js.map +0 -1
- package/dist/__tests__/gaiaCapabilityEval.test.d.ts +0 -14
- package/dist/__tests__/gaiaCapabilityEval.test.js +0 -1259
- package/dist/__tests__/gaiaCapabilityEval.test.js.map +0 -1
- package/dist/__tests__/gaiaCapabilityFilesEval.test.d.ts +0 -15
- package/dist/__tests__/gaiaCapabilityFilesEval.test.js +0 -914
- package/dist/__tests__/gaiaCapabilityFilesEval.test.js.map +0 -1
- package/dist/__tests__/gaiaCapabilityMediaEval.test.d.ts +0 -15
- package/dist/__tests__/gaiaCapabilityMediaEval.test.js +0 -1101
- package/dist/__tests__/gaiaCapabilityMediaEval.test.js.map +0 -1
- package/dist/__tests__/helpers/answerMatch.d.ts +0 -41
- package/dist/__tests__/helpers/answerMatch.js +0 -267
- package/dist/__tests__/helpers/answerMatch.js.map +0 -1
- package/dist/__tests__/helpers/textLlm.d.ts +0 -25
- package/dist/__tests__/helpers/textLlm.js +0 -214
- package/dist/__tests__/helpers/textLlm.js.map +0 -1
- package/dist/__tests__/localDashboard.test.d.ts +0 -1
- package/dist/__tests__/localDashboard.test.js +0 -226
- package/dist/__tests__/localDashboard.test.js.map +0 -1
- package/dist/__tests__/multiHopDogfood.test.d.ts +0 -12
- package/dist/__tests__/multiHopDogfood.test.js +0 -303
- package/dist/__tests__/multiHopDogfood.test.js.map +0 -1
- package/dist/__tests__/openDatasetParallelEval.test.d.ts +0 -7
- package/dist/__tests__/openDatasetParallelEval.test.js +0 -209
- package/dist/__tests__/openDatasetParallelEval.test.js.map +0 -1
- package/dist/__tests__/openDatasetParallelEvalGaia.test.d.ts +0 -7
- package/dist/__tests__/openDatasetParallelEvalGaia.test.js +0 -279
- package/dist/__tests__/openDatasetParallelEvalGaia.test.js.map +0 -1
- package/dist/__tests__/openDatasetParallelEvalSwebench.test.d.ts +0 -7
- package/dist/__tests__/openDatasetParallelEvalSwebench.test.js +0 -220
- package/dist/__tests__/openDatasetParallelEvalSwebench.test.js.map +0 -1
- package/dist/__tests__/openDatasetParallelEvalToolbench.test.d.ts +0 -7
- package/dist/__tests__/openDatasetParallelEvalToolbench.test.js +0 -218
- package/dist/__tests__/openDatasetParallelEvalToolbench.test.js.map +0 -1
- package/dist/__tests__/openDatasetPerfComparison.test.d.ts +0 -10
- package/dist/__tests__/openDatasetPerfComparison.test.js +0 -318
- package/dist/__tests__/openDatasetPerfComparison.test.js.map +0 -1
- package/dist/__tests__/openclawDogfood.test.d.ts +0 -23
- package/dist/__tests__/openclawDogfood.test.js +0 -535
- package/dist/__tests__/openclawDogfood.test.js.map +0 -1
- package/dist/__tests__/openclawMessaging.test.d.ts +0 -14
- package/dist/__tests__/openclawMessaging.test.js +0 -232
- package/dist/__tests__/openclawMessaging.test.js.map +0 -1
- package/dist/__tests__/presetRealWorldBench.test.d.ts +0 -1
- package/dist/__tests__/presetRealWorldBench.test.js +0 -859
- package/dist/__tests__/presetRealWorldBench.test.js.map +0 -1
- package/dist/__tests__/tools.test.d.ts +0 -1
- package/dist/__tests__/tools.test.js +0 -3201
- package/dist/__tests__/tools.test.js.map +0 -1
- package/dist/__tests__/toolsetGatingEval.test.d.ts +0 -1
- package/dist/__tests__/toolsetGatingEval.test.js +0 -1099
- package/dist/__tests__/toolsetGatingEval.test.js.map +0 -1
- package/dist/__tests__/traceabilityDogfood.test.d.ts +0 -12
- package/dist/__tests__/traceabilityDogfood.test.js +0 -241
- package/dist/__tests__/traceabilityDogfood.test.js.map +0 -1
- package/dist/__tests__/webmcpTools.test.d.ts +0 -7
- package/dist/__tests__/webmcpTools.test.js +0 -195
- package/dist/__tests__/webmcpTools.test.js.map +0 -1
- package/dist/benchmarks/testProviderBus.d.ts +0 -7
- package/dist/benchmarks/testProviderBus.js +0 -272
- package/dist/benchmarks/testProviderBus.js.map +0 -1
- package/dist/hooks/postCompaction.d.ts +0 -14
- package/dist/hooks/postCompaction.js +0 -51
- package/dist/hooks/postCompaction.js.map +0 -1
- package/dist/security/__tests__/security.test.d.ts +0 -8
- package/dist/security/__tests__/security.test.js +0 -295
- package/dist/security/__tests__/security.test.js.map +0 -1
- package/dist/tools/documentTools.d.ts +0 -5
- package/dist/tools/documentTools.js +0 -524
- package/dist/tools/documentTools.js.map +0 -1
- package/dist/tools/financialTools.d.ts +0 -10
- package/dist/tools/financialTools.js +0 -403
- package/dist/tools/financialTools.js.map +0 -1
- package/dist/tools/memoryTools.d.ts +0 -5
- package/dist/tools/memoryTools.js +0 -137
- package/dist/tools/memoryTools.js.map +0 -1
- package/dist/tools/planningTools.d.ts +0 -5
- package/dist/tools/planningTools.js +0 -147
- package/dist/tools/planningTools.js.map +0 -1
- package/dist/tools/searchTools.d.ts +0 -5
- package/dist/tools/searchTools.js +0 -145
- package/dist/tools/searchTools.js.map +0 -1
|
@@ -0,0 +1,1104 @@
|
|
|
1
|
+
import { getDb, genId } from "../db.js";
|
|
2
|
+
import { safeExec } from "../security/index.js";
|
|
3
|
+
import { reconTools } from "./reconTools.js";
|
|
4
|
+
import { verificationTools } from "./verificationTools.js";
|
|
5
|
+
import { qualityGateTools } from "./qualityGateTools.js";
|
|
6
|
+
import { flywheelTools } from "./flywheelTools.js";
|
|
7
|
+
import { dogfoodJudgeTools } from "./dogfoodJudgeTools.js";
|
|
8
|
+
import { llmJudgeLoopTools } from "./llmJudgeLoop.js";
|
|
9
|
+
import { learningTools } from "./learningTools.js";
|
|
10
|
+
import { sessionMemoryTools } from "./sessionMemoryTools.js";
|
|
11
|
+
import { linkDurableObjects, recordExecutionReceipt, recordLocalOutcome, upsertDurableObject, } from "../sync/store.js";
|
|
12
|
+
let schemaReady = false;
|
|
13
|
+
function ensureSchema() {
|
|
14
|
+
if (schemaReady)
|
|
15
|
+
return;
|
|
16
|
+
const db = getDb();
|
|
17
|
+
db.exec(`
|
|
18
|
+
CREATE TABLE IF NOT EXISTS autonomous_delivery_runs (
|
|
19
|
+
run_id TEXT PRIMARY KEY,
|
|
20
|
+
goal TEXT NOT NULL,
|
|
21
|
+
status TEXT NOT NULL DEFAULT 'running',
|
|
22
|
+
summary TEXT,
|
|
23
|
+
research_summary TEXT,
|
|
24
|
+
plan_json TEXT,
|
|
25
|
+
implementation_summary TEXT,
|
|
26
|
+
dogfood_summary TEXT,
|
|
27
|
+
judge_summary TEXT,
|
|
28
|
+
verification_summary TEXT,
|
|
29
|
+
learning_summary TEXT,
|
|
30
|
+
created_at TEXT NOT NULL,
|
|
31
|
+
updated_at TEXT NOT NULL,
|
|
32
|
+
completed_at TEXT
|
|
33
|
+
);
|
|
34
|
+
|
|
35
|
+
CREATE TABLE IF NOT EXISTS autonomous_delivery_steps (
|
|
36
|
+
step_id TEXT PRIMARY KEY,
|
|
37
|
+
run_id TEXT NOT NULL REFERENCES autonomous_delivery_runs(run_id) ON DELETE CASCADE,
|
|
38
|
+
stage TEXT NOT NULL,
|
|
39
|
+
status TEXT NOT NULL DEFAULT 'running',
|
|
40
|
+
summary TEXT,
|
|
41
|
+
details_json TEXT,
|
|
42
|
+
started_at TEXT NOT NULL,
|
|
43
|
+
completed_at TEXT
|
|
44
|
+
);
|
|
45
|
+
|
|
46
|
+
CREATE INDEX IF NOT EXISTS idx_autonomous_delivery_runs_created
|
|
47
|
+
ON autonomous_delivery_runs(created_at DESC);
|
|
48
|
+
CREATE INDEX IF NOT EXISTS idx_autonomous_delivery_steps_run
|
|
49
|
+
ON autonomous_delivery_steps(run_id, started_at);
|
|
50
|
+
`);
|
|
51
|
+
schemaReady = true;
|
|
52
|
+
}
|
|
53
|
+
function requireTool(tools, name) {
|
|
54
|
+
const tool = tools.find((entry) => entry.name === name);
|
|
55
|
+
if (!tool)
|
|
56
|
+
throw new Error(`Tool not found: ${name}`);
|
|
57
|
+
return tool;
|
|
58
|
+
}
|
|
59
|
+
function truncate(text, limit = 400) {
|
|
60
|
+
if (!text)
|
|
61
|
+
return "";
|
|
62
|
+
return text.length <= limit ? text : `${text.slice(0, limit)}...`;
|
|
63
|
+
}
|
|
64
|
+
function slugify(value) {
|
|
65
|
+
return value
|
|
66
|
+
.toLowerCase()
|
|
67
|
+
.replace(/[^a-z0-9]+/g, "-")
|
|
68
|
+
.replace(/^-+|-+$/g, "")
|
|
69
|
+
.slice(0, 48);
|
|
70
|
+
}
|
|
71
|
+
function normalizePhase(command) {
|
|
72
|
+
if (command.phase)
|
|
73
|
+
return command.phase;
|
|
74
|
+
const haystack = `${command.label} ${command.command}`.toLowerCase();
|
|
75
|
+
if (haystack.includes("lint"))
|
|
76
|
+
return "lint";
|
|
77
|
+
if (haystack.includes("compile") || haystack.includes("build") || haystack.includes("tsc"))
|
|
78
|
+
return "compile";
|
|
79
|
+
if (haystack.includes("debug"))
|
|
80
|
+
return "self_debug";
|
|
81
|
+
if (haystack.includes("failure") || haystack.includes("negative"))
|
|
82
|
+
return "failure_path_test";
|
|
83
|
+
if (haystack.includes("happy") || haystack.includes("smoke"))
|
|
84
|
+
return "happy_path_test";
|
|
85
|
+
if (haystack.includes("test") ||
|
|
86
|
+
haystack.includes("vitest") ||
|
|
87
|
+
haystack.includes("jest") ||
|
|
88
|
+
haystack.includes("pytest")) {
|
|
89
|
+
return "test";
|
|
90
|
+
}
|
|
91
|
+
return "implement";
|
|
92
|
+
}
|
|
93
|
+
function summarizeResearch(researchSummary) {
|
|
94
|
+
if (!researchSummary)
|
|
95
|
+
return [];
|
|
96
|
+
const highlights = [];
|
|
97
|
+
const categories = researchSummary.findingsByCategory ?? {};
|
|
98
|
+
for (const [category, findings] of Object.entries(categories)) {
|
|
99
|
+
const first = findings[0]?.summary;
|
|
100
|
+
if (first)
|
|
101
|
+
highlights.push(`${category}: ${truncate(first, 140)}`);
|
|
102
|
+
}
|
|
103
|
+
if (Array.isArray(researchSummary.prioritizedActions)) {
|
|
104
|
+
for (const action of researchSummary.prioritizedActions.slice(0, 3)) {
|
|
105
|
+
if (action?.action)
|
|
106
|
+
highlights.push(`action: ${truncate(String(action.action), 140)}`);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
return highlights.slice(0, 6);
|
|
110
|
+
}
|
|
111
|
+
function buildPlan(args) {
|
|
112
|
+
return {
|
|
113
|
+
goal: args.goal,
|
|
114
|
+
researchTarget: args.researchTarget ?? null,
|
|
115
|
+
researchHighlights: summarizeResearch(args.researchSummary),
|
|
116
|
+
implementationSequence: args.implementationCommands.map((command, index) => ({
|
|
117
|
+
order: index + 1,
|
|
118
|
+
label: command.label,
|
|
119
|
+
phase: normalizePhase(command),
|
|
120
|
+
command: command.command,
|
|
121
|
+
})),
|
|
122
|
+
dogfoodScenario: args.dogfood
|
|
123
|
+
? {
|
|
124
|
+
scenarioId: args.dogfood.scenarioId,
|
|
125
|
+
prompt: args.dogfood.prompt,
|
|
126
|
+
toolName: args.dogfood.toolName ?? null,
|
|
127
|
+
}
|
|
128
|
+
: null,
|
|
129
|
+
verificationChecklist: [
|
|
130
|
+
"Compile, lint, and test commands must all pass.",
|
|
131
|
+
"Dogfood output must be judged before completion.",
|
|
132
|
+
"Mandatory flywheel must have explicit static, happy-path, failure-path, gap-analysis, fix-and-reverify, and documentation evidence.",
|
|
133
|
+
"At least one learning or durable note should be banked at the end of the run.",
|
|
134
|
+
],
|
|
135
|
+
deliveryNote: args.saveSessionNote
|
|
136
|
+
? "A session note will be persisted to filesystem at the end of the run."
|
|
137
|
+
: "Session note persistence is disabled for this run.",
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
function mapImplementationToClosedLoopSteps(commands) {
|
|
141
|
+
const phaseMap = new Map();
|
|
142
|
+
for (const result of commands) {
|
|
143
|
+
if (["compile", "lint", "test", "self_debug"].includes(result.phase)) {
|
|
144
|
+
const existing = phaseMap.get(result.phase) ?? [];
|
|
145
|
+
existing.push(result);
|
|
146
|
+
phaseMap.set(result.phase, existing);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
const toOutput = (phase) => {
|
|
150
|
+
const results = phaseMap.get(phase);
|
|
151
|
+
if (!results || results.length === 0) {
|
|
152
|
+
return {
|
|
153
|
+
step: phase,
|
|
154
|
+
passed: false,
|
|
155
|
+
output: `No ${phase} step was recorded.`,
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
return {
|
|
159
|
+
step: phase,
|
|
160
|
+
passed: results.every((entry) => entry.passed),
|
|
161
|
+
output: results
|
|
162
|
+
.map((entry) => `${entry.label}: ${entry.passed ? "passed" : "failed"} (${entry.exitCode})`)
|
|
163
|
+
.join("; "),
|
|
164
|
+
};
|
|
165
|
+
};
|
|
166
|
+
return [toOutput("compile"), toOutput("lint"), toOutput("test"), toOutput("self_debug")];
|
|
167
|
+
}
|
|
168
|
+
function deriveFlywheelSteps(args) {
|
|
169
|
+
const compileAndLint = args.commandResults.filter((result) => result.phase === "compile" || result.phase === "lint");
|
|
170
|
+
const failurePath = args.commandResults.filter((result) => result.phase === "failure_path_test");
|
|
171
|
+
const anyFailures = args.commandResults.some((result) => !result.passed);
|
|
172
|
+
return [
|
|
173
|
+
{
|
|
174
|
+
stepName: "static_analysis",
|
|
175
|
+
passed: compileAndLint.length > 0 && compileAndLint.every((result) => result.passed),
|
|
176
|
+
output: compileAndLint.length > 0
|
|
177
|
+
? compileAndLint.map((result) => `${result.label}:${result.passed ? "pass" : "fail"}`).join(", ")
|
|
178
|
+
: "No compile/lint command was supplied.",
|
|
179
|
+
},
|
|
180
|
+
{
|
|
181
|
+
stepName: "happy_path_test",
|
|
182
|
+
passed: args.dogfoodCompleted ||
|
|
183
|
+
args.commandResults.some((result) => result.phase === "happy_path_test" && result.passed),
|
|
184
|
+
output: args.dogfoodCompleted
|
|
185
|
+
? "Dogfood scenario completed."
|
|
186
|
+
: "No explicit happy-path dogfood or happy_path_test step passed.",
|
|
187
|
+
},
|
|
188
|
+
{
|
|
189
|
+
stepName: "failure_path_test",
|
|
190
|
+
passed: failurePath.length > 0 && failurePath.every((result) => result.passed),
|
|
191
|
+
output: failurePath.length > 0
|
|
192
|
+
? failurePath.map((result) => `${result.label}:${result.passed ? "pass" : "fail"}`).join(", ")
|
|
193
|
+
: "No failure-path command was supplied.",
|
|
194
|
+
},
|
|
195
|
+
{
|
|
196
|
+
stepName: "gap_analysis",
|
|
197
|
+
passed: args.researchCompleted,
|
|
198
|
+
output: args.researchCompleted
|
|
199
|
+
? "Recon and gap logging completed."
|
|
200
|
+
: "Research/gap analysis was skipped or failed.",
|
|
201
|
+
},
|
|
202
|
+
{
|
|
203
|
+
stepName: "fix_and_reverify",
|
|
204
|
+
passed: !anyFailures,
|
|
205
|
+
output: anyFailures
|
|
206
|
+
? "At least one implementation command failed; fix and re-run are still required."
|
|
207
|
+
: "All implementation commands passed on the current loop.",
|
|
208
|
+
},
|
|
209
|
+
{
|
|
210
|
+
stepName: "deploy_and_document",
|
|
211
|
+
passed: args.documentationPlanned && args.judgePassed,
|
|
212
|
+
output: args.documentationPlanned && args.judgePassed
|
|
213
|
+
? "Documentation/note step is enabled and judge passed."
|
|
214
|
+
: "Documentation is missing or the judge did not pass.",
|
|
215
|
+
},
|
|
216
|
+
];
|
|
217
|
+
}
|
|
218
|
+
function deriveDogfoodJudgeScores(verdict) {
|
|
219
|
+
const criteria = new Map();
|
|
220
|
+
for (const criterion of verdict?.criteria ?? []) {
|
|
221
|
+
if (criterion?.criterion)
|
|
222
|
+
criteria.set(String(criterion.criterion), Boolean(criterion.pass));
|
|
223
|
+
}
|
|
224
|
+
const pass = (name) => (criteria.get(name) ? 5 : 2);
|
|
225
|
+
return {
|
|
226
|
+
truthQuality: pass("Removed repeated cognition"),
|
|
227
|
+
compressionQuality: pass("Suppressed noise"),
|
|
228
|
+
anticipationQuality: pass("Surfaced right contradiction"),
|
|
229
|
+
outputQuality: pass("Produced downstream artifact"),
|
|
230
|
+
delegationQuality: pass("Returned usable packet"),
|
|
231
|
+
trustQuality: pass("Trustworthy and reusable"),
|
|
232
|
+
};
|
|
233
|
+
}
|
|
234
|
+
function createRun(goal) {
|
|
235
|
+
ensureSchema();
|
|
236
|
+
const db = getDb();
|
|
237
|
+
const runId = genId("auto");
|
|
238
|
+
const now = new Date().toISOString();
|
|
239
|
+
db.prepare(`
|
|
240
|
+
INSERT INTO autonomous_delivery_runs (run_id, goal, status, created_at, updated_at)
|
|
241
|
+
VALUES (?, ?, 'running', ?, ?)
|
|
242
|
+
`).run(runId, goal, now, now);
|
|
243
|
+
upsertDurableObject({
|
|
244
|
+
id: `run:${runId}`,
|
|
245
|
+
kind: "run",
|
|
246
|
+
label: goal,
|
|
247
|
+
source: "autonomous_delivery",
|
|
248
|
+
metadata: { runId, workflowName: "self_directed_delivery" },
|
|
249
|
+
});
|
|
250
|
+
upsertDurableObject({
|
|
251
|
+
id: "workflow:self_directed_delivery",
|
|
252
|
+
kind: "workflow",
|
|
253
|
+
label: "Self-Directed Delivery Loop",
|
|
254
|
+
source: "autonomous_delivery",
|
|
255
|
+
metadata: { methodology: "ai_flywheel" },
|
|
256
|
+
});
|
|
257
|
+
linkDurableObjects({
|
|
258
|
+
fromId: "workflow:self_directed_delivery",
|
|
259
|
+
toId: `run:${runId}`,
|
|
260
|
+
edgeType: "executes",
|
|
261
|
+
metadata: { runId },
|
|
262
|
+
});
|
|
263
|
+
recordExecutionReceipt({
|
|
264
|
+
runId,
|
|
265
|
+
objectId: `run:${runId}`,
|
|
266
|
+
actionType: "run_created",
|
|
267
|
+
summary: `Autonomous delivery run created for goal: ${goal}`,
|
|
268
|
+
status: "recorded",
|
|
269
|
+
metadata: { workflowName: "self_directed_delivery" },
|
|
270
|
+
});
|
|
271
|
+
return runId;
|
|
272
|
+
}
|
|
273
|
+
function startStage(runId, stage, summary) {
|
|
274
|
+
ensureSchema();
|
|
275
|
+
const db = getDb();
|
|
276
|
+
const stepId = genId("auto_step");
|
|
277
|
+
const now = new Date().toISOString();
|
|
278
|
+
db.prepare(`
|
|
279
|
+
INSERT INTO autonomous_delivery_steps (step_id, run_id, stage, status, summary, started_at)
|
|
280
|
+
VALUES (?, ?, ?, 'running', ?, ?)
|
|
281
|
+
`).run(stepId, runId, stage, summary, now);
|
|
282
|
+
db.prepare(`UPDATE autonomous_delivery_runs SET updated_at = ? WHERE run_id = ?`).run(now, runId);
|
|
283
|
+
upsertDurableObject({
|
|
284
|
+
id: `action:${stepId}`,
|
|
285
|
+
kind: "action",
|
|
286
|
+
label: `${stage} stage`,
|
|
287
|
+
source: "autonomous_delivery",
|
|
288
|
+
metadata: { runId, stepId, stage },
|
|
289
|
+
});
|
|
290
|
+
linkDurableObjects({
|
|
291
|
+
fromId: `run:${runId}`,
|
|
292
|
+
toId: `action:${stepId}`,
|
|
293
|
+
edgeType: "contains_stage",
|
|
294
|
+
metadata: { stage },
|
|
295
|
+
});
|
|
296
|
+
recordExecutionReceipt({
|
|
297
|
+
runId,
|
|
298
|
+
stepId,
|
|
299
|
+
objectId: `action:${stepId}`,
|
|
300
|
+
actionType: `${stage}_started`,
|
|
301
|
+
summary,
|
|
302
|
+
status: "running",
|
|
303
|
+
metadata: { stage },
|
|
304
|
+
});
|
|
305
|
+
return stepId;
|
|
306
|
+
}
|
|
307
|
+
function finishStage(stepId, status, summary, details) {
|
|
308
|
+
ensureSchema();
|
|
309
|
+
const db = getDb();
|
|
310
|
+
const now = new Date().toISOString();
|
|
311
|
+
db.prepare(`
|
|
312
|
+
UPDATE autonomous_delivery_steps
|
|
313
|
+
SET status = ?, summary = ?, details_json = ?, completed_at = ?
|
|
314
|
+
WHERE step_id = ?
|
|
315
|
+
`).run(status, summary, details ? JSON.stringify(details) : null, now, stepId);
|
|
316
|
+
recordExecutionReceipt({
|
|
317
|
+
stepId,
|
|
318
|
+
objectId: `action:${stepId}`,
|
|
319
|
+
actionType: "stage_completed",
|
|
320
|
+
summary,
|
|
321
|
+
status,
|
|
322
|
+
metadata: { details },
|
|
323
|
+
});
|
|
324
|
+
}
|
|
325
|
+
function updateRun(runId, patch) {
|
|
326
|
+
ensureSchema();
|
|
327
|
+
const db = getDb();
|
|
328
|
+
const now = new Date().toISOString();
|
|
329
|
+
const fields = Object.keys(patch);
|
|
330
|
+
if (fields.length === 0)
|
|
331
|
+
return;
|
|
332
|
+
const assignments = fields.map((field) => `${field} = ?`);
|
|
333
|
+
assignments.push("updated_at = ?");
|
|
334
|
+
const values = fields.map((field) => {
|
|
335
|
+
const value = patch[field];
|
|
336
|
+
if (value == null)
|
|
337
|
+
return null;
|
|
338
|
+
return typeof value === "string" ? value : JSON.stringify(value);
|
|
339
|
+
});
|
|
340
|
+
values.push(now, runId);
|
|
341
|
+
db.prepare(`
|
|
342
|
+
UPDATE autonomous_delivery_runs
|
|
343
|
+
SET ${assignments.join(", ")}
|
|
344
|
+
WHERE run_id = ?
|
|
345
|
+
`).run(...values);
|
|
346
|
+
}
|
|
347
|
+
function completeRun(runId, status, summary) {
|
|
348
|
+
ensureSchema();
|
|
349
|
+
const db = getDb();
|
|
350
|
+
const now = new Date().toISOString();
|
|
351
|
+
db.prepare(`
|
|
352
|
+
UPDATE autonomous_delivery_runs
|
|
353
|
+
SET status = ?, summary = ?, updated_at = ?, completed_at = ?
|
|
354
|
+
WHERE run_id = ?
|
|
355
|
+
`).run(status, summary, now, now, runId);
|
|
356
|
+
recordExecutionReceipt({
|
|
357
|
+
runId,
|
|
358
|
+
objectId: `run:${runId}`,
|
|
359
|
+
actionType: "run_completed",
|
|
360
|
+
summary,
|
|
361
|
+
status,
|
|
362
|
+
metadata: { completedAt: now },
|
|
363
|
+
});
|
|
364
|
+
recordLocalOutcome({
|
|
365
|
+
id: `outcome:${runId}`,
|
|
366
|
+
runId,
|
|
367
|
+
objectId: `run:${runId}`,
|
|
368
|
+
outcomeType: "autonomous_delivery",
|
|
369
|
+
headline: summary,
|
|
370
|
+
userValue: "One durable delivery run with explicit research, implementation, dogfood, and verification receipts.",
|
|
371
|
+
stakeholderValue: "Replayable proof of work, verification state, and next actions tied to a single run.",
|
|
372
|
+
status,
|
|
373
|
+
evidence: [{ type: "run", id: runId }],
|
|
374
|
+
});
|
|
375
|
+
}
|
|
376
|
+
async function resolveToolByName(toolName) {
|
|
377
|
+
const { loadToolsets, ALL_DOMAIN_KEYS, TOOLSET_MAP } = await import("../toolsetRegistry.js");
|
|
378
|
+
await loadToolsets(ALL_DOMAIN_KEYS);
|
|
379
|
+
const tool = Object.values(TOOLSET_MAP)
|
|
380
|
+
.flat()
|
|
381
|
+
.find((entry) => entry.name === toolName);
|
|
382
|
+
if (!tool)
|
|
383
|
+
throw new Error(`Unable to resolve dogfood tool: ${toolName}`);
|
|
384
|
+
return tool;
|
|
385
|
+
}
|
|
386
|
+
export const autonomousDeliveryTools = [
|
|
387
|
+
{
|
|
388
|
+
name: "run_self_directed_delivery_loop",
|
|
389
|
+
description: "Run a local-first autonomous delivery loop across exploratory research, planning, implementation commands, dogfood, verification, and judge. Persists one durable run in SQLite and emits per-stage receipts so the whole loop can be resumed, audited, and synced later.",
|
|
390
|
+
inputSchema: {
|
|
391
|
+
type: "object",
|
|
392
|
+
properties: {
|
|
393
|
+
goal: {
|
|
394
|
+
type: "string",
|
|
395
|
+
description: "The outcome this autonomous loop is trying to deliver.",
|
|
396
|
+
},
|
|
397
|
+
research: {
|
|
398
|
+
type: "object",
|
|
399
|
+
description: "Optional structured recon input for the research stage.",
|
|
400
|
+
properties: {
|
|
401
|
+
target: { type: "string" },
|
|
402
|
+
description: { type: "string" },
|
|
403
|
+
webEnrich: { type: "boolean" },
|
|
404
|
+
projectContext: { type: "object", additionalProperties: true },
|
|
405
|
+
findings: {
|
|
406
|
+
type: "array",
|
|
407
|
+
items: {
|
|
408
|
+
type: "object",
|
|
409
|
+
properties: {
|
|
410
|
+
category: {
|
|
411
|
+
type: "string",
|
|
412
|
+
enum: [
|
|
413
|
+
"breaking_change",
|
|
414
|
+
"new_feature",
|
|
415
|
+
"deprecation",
|
|
416
|
+
"best_practice",
|
|
417
|
+
"dataset",
|
|
418
|
+
"benchmark",
|
|
419
|
+
"codebase_pattern",
|
|
420
|
+
"existing_implementation",
|
|
421
|
+
],
|
|
422
|
+
},
|
|
423
|
+
summary: { type: "string" },
|
|
424
|
+
sourceUrl: { type: "string" },
|
|
425
|
+
relevance: { type: "string" },
|
|
426
|
+
actionItems: { type: "string" },
|
|
427
|
+
},
|
|
428
|
+
required: ["category", "summary"],
|
|
429
|
+
},
|
|
430
|
+
},
|
|
431
|
+
},
|
|
432
|
+
},
|
|
433
|
+
implementation: {
|
|
434
|
+
type: "object",
|
|
435
|
+
description: "Shell commands to execute as the implementation and verification loop.",
|
|
436
|
+
properties: {
|
|
437
|
+
stopOnFailure: { type: "boolean" },
|
|
438
|
+
commands: {
|
|
439
|
+
type: "array",
|
|
440
|
+
items: {
|
|
441
|
+
type: "object",
|
|
442
|
+
properties: {
|
|
443
|
+
label: { type: "string" },
|
|
444
|
+
command: { type: "string" },
|
|
445
|
+
cwd: { type: "string" },
|
|
446
|
+
timeoutMs: { type: "number" },
|
|
447
|
+
phase: {
|
|
448
|
+
type: "string",
|
|
449
|
+
enum: [
|
|
450
|
+
"implement",
|
|
451
|
+
"compile",
|
|
452
|
+
"lint",
|
|
453
|
+
"test",
|
|
454
|
+
"self_debug",
|
|
455
|
+
"happy_path_test",
|
|
456
|
+
"failure_path_test",
|
|
457
|
+
],
|
|
458
|
+
},
|
|
459
|
+
},
|
|
460
|
+
required: ["label", "command"],
|
|
461
|
+
},
|
|
462
|
+
},
|
|
463
|
+
},
|
|
464
|
+
},
|
|
465
|
+
dogfood: {
|
|
466
|
+
type: "object",
|
|
467
|
+
description: "Optional dogfood scenario or precomputed output to judge.",
|
|
468
|
+
properties: {
|
|
469
|
+
loopType: {
|
|
470
|
+
type: "string",
|
|
471
|
+
enum: ["weekly_reset", "pre_delegation", "company_search"],
|
|
472
|
+
},
|
|
473
|
+
scenarioId: { type: "string" },
|
|
474
|
+
prompt: { type: "string" },
|
|
475
|
+
toolName: { type: "string" },
|
|
476
|
+
toolArgs: { type: "object", additionalProperties: true },
|
|
477
|
+
output: { type: "object", additionalProperties: true },
|
|
478
|
+
userRole: {
|
|
479
|
+
type: "string",
|
|
480
|
+
enum: ["founder", "banker", "ceo", "operator", "researcher", "student"],
|
|
481
|
+
},
|
|
482
|
+
packetVersionUsed: { type: "string" },
|
|
483
|
+
packetType: { type: "string" },
|
|
484
|
+
attachedInputs: {
|
|
485
|
+
type: "array",
|
|
486
|
+
items: { type: "string" },
|
|
487
|
+
},
|
|
488
|
+
},
|
|
489
|
+
required: ["scenarioId", "prompt"],
|
|
490
|
+
},
|
|
491
|
+
autoRecordLearning: {
|
|
492
|
+
type: "boolean",
|
|
493
|
+
description: "Persist an automatic learning summary at the end of the run. Default: true.",
|
|
494
|
+
},
|
|
495
|
+
saveSessionNote: {
|
|
496
|
+
type: "boolean",
|
|
497
|
+
description: "Persist a filesystem session note at the end of the run. Default: true.",
|
|
498
|
+
},
|
|
499
|
+
learnings: {
|
|
500
|
+
type: "array",
|
|
501
|
+
description: "Optional explicit learnings to persist in addition to the automatic summary.",
|
|
502
|
+
items: {
|
|
503
|
+
type: "object",
|
|
504
|
+
properties: {
|
|
505
|
+
key: { type: "string" },
|
|
506
|
+
content: { type: "string" },
|
|
507
|
+
category: {
|
|
508
|
+
type: "string",
|
|
509
|
+
enum: ["edge_case", "gotcha", "pattern", "regression", "convention"],
|
|
510
|
+
},
|
|
511
|
+
tags: {
|
|
512
|
+
type: "array",
|
|
513
|
+
items: { type: "string" },
|
|
514
|
+
},
|
|
515
|
+
},
|
|
516
|
+
required: ["key", "content", "category"],
|
|
517
|
+
},
|
|
518
|
+
},
|
|
519
|
+
},
|
|
520
|
+
required: ["goal"],
|
|
521
|
+
},
|
|
522
|
+
handler: async (args) => {
|
|
523
|
+
ensureSchema();
|
|
524
|
+
const runId = createRun(args.goal);
|
|
525
|
+
const autoRecordLearning = args.autoRecordLearning !== false;
|
|
526
|
+
const saveSessionNote = args.saveSessionNote !== false;
|
|
527
|
+
const implementationCommands = args.implementation?.commands ?? [];
|
|
528
|
+
const stopOnFailure = args.implementation?.stopOnFailure !== false;
|
|
529
|
+
const researchTool = requireTool(reconTools, "run_recon");
|
|
530
|
+
const logReconFindingTool = requireTool(reconTools, "log_recon_finding");
|
|
531
|
+
const getReconSummaryTool = requireTool(reconTools, "get_recon_summary");
|
|
532
|
+
const startVerificationCycleTool = requireTool(verificationTools, "start_verification_cycle");
|
|
533
|
+
const logPhaseFindingsTool = requireTool(verificationTools, "log_phase_findings");
|
|
534
|
+
const logGapTool = requireTool(verificationTools, "log_gap");
|
|
535
|
+
const logTestResultTool = requireTool(verificationTools, "log_test_result");
|
|
536
|
+
const runClosedLoopTool = requireTool(qualityGateTools, "run_closed_loop");
|
|
537
|
+
const runMandatoryFlywheelTool = requireTool(flywheelTools, "run_mandatory_flywheel");
|
|
538
|
+
const startDogfoodSessionTool = requireTool(dogfoodJudgeTools, "start_dogfood_session");
|
|
539
|
+
const endDogfoodSessionTool = requireTool(dogfoodJudgeTools, "end_dogfood_session");
|
|
540
|
+
const recordDogfoodTelemetryTool = requireTool(dogfoodJudgeTools, "record_dogfood_telemetry");
|
|
541
|
+
const judgeSessionTool = requireTool(dogfoodJudgeTools, "judge_session");
|
|
542
|
+
const judgeToolOutputTool = requireTool(llmJudgeLoopTools, "judge_tool_output");
|
|
543
|
+
const recordLearningTool = requireTool(learningTools, "record_learning");
|
|
544
|
+
const saveSessionNoteTool = requireTool(sessionMemoryTools, "save_session_note");
|
|
545
|
+
let researchSummary = null;
|
|
546
|
+
let plan = null;
|
|
547
|
+
let commandResults = [];
|
|
548
|
+
let dogfoodSummary = null;
|
|
549
|
+
let judgeSummary = null;
|
|
550
|
+
let verificationSummary = null;
|
|
551
|
+
let learningSummary = null;
|
|
552
|
+
let verificationCycleId = null;
|
|
553
|
+
const recommendations = [];
|
|
554
|
+
try {
|
|
555
|
+
const researchStepId = startStage(runId, "research", "Starting recon and context gathering");
|
|
556
|
+
try {
|
|
557
|
+
if (args.research?.target) {
|
|
558
|
+
const reconRun = (await researchTool.handler({
|
|
559
|
+
target: args.research.target,
|
|
560
|
+
description: args.research.description,
|
|
561
|
+
projectContext: args.research.projectContext,
|
|
562
|
+
webEnrich: args.research.webEnrich ?? false,
|
|
563
|
+
}));
|
|
564
|
+
for (const finding of args.research.findings ?? []) {
|
|
565
|
+
await logReconFindingTool.handler({
|
|
566
|
+
sessionId: reconRun.sessionId,
|
|
567
|
+
sourceUrl: finding.sourceUrl,
|
|
568
|
+
category: finding.category,
|
|
569
|
+
summary: finding.summary,
|
|
570
|
+
relevance: finding.relevance,
|
|
571
|
+
actionItems: finding.actionItems,
|
|
572
|
+
});
|
|
573
|
+
}
|
|
574
|
+
researchSummary = await getReconSummaryTool.handler({
|
|
575
|
+
sessionId: reconRun.sessionId,
|
|
576
|
+
completeSession: true,
|
|
577
|
+
});
|
|
578
|
+
updateRun(runId, { research_summary: researchSummary });
|
|
579
|
+
finishStage(researchStepId, "completed", `Research captured for ${args.research.target}.`, researchSummary);
|
|
580
|
+
}
|
|
581
|
+
else {
|
|
582
|
+
finishStage(researchStepId, "skipped", "No research target provided.", null);
|
|
583
|
+
}
|
|
584
|
+
}
|
|
585
|
+
catch (error) {
|
|
586
|
+
finishStage(researchStepId, "failed", truncate(error?.message ?? "Research failed"), {
|
|
587
|
+
error: error?.message ?? String(error),
|
|
588
|
+
});
|
|
589
|
+
throw error;
|
|
590
|
+
}
|
|
591
|
+
const planStepId = startStage(runId, "plan", "Building autonomous execution plan");
|
|
592
|
+
plan = buildPlan({
|
|
593
|
+
goal: args.goal,
|
|
594
|
+
researchTarget: args.research?.target,
|
|
595
|
+
researchSummary,
|
|
596
|
+
implementationCommands,
|
|
597
|
+
dogfood: args.dogfood
|
|
598
|
+
? {
|
|
599
|
+
scenarioId: args.dogfood.scenarioId,
|
|
600
|
+
prompt: args.dogfood.prompt,
|
|
601
|
+
toolName: args.dogfood.toolName,
|
|
602
|
+
}
|
|
603
|
+
: null,
|
|
604
|
+
saveSessionNote,
|
|
605
|
+
});
|
|
606
|
+
updateRun(runId, { plan_json: plan });
|
|
607
|
+
finishStage(planStepId, "completed", "Execution plan generated.", plan);
|
|
608
|
+
const implementStepId = startStage(runId, "implement", "Running implementation commands");
|
|
609
|
+
try {
|
|
610
|
+
if (implementationCommands.length > 0) {
|
|
611
|
+
for (const command of implementationCommands) {
|
|
612
|
+
const phase = normalizePhase(command);
|
|
613
|
+
const result = safeExec(command.command, {
|
|
614
|
+
cwd: command.cwd,
|
|
615
|
+
timeout: command.timeoutMs,
|
|
616
|
+
});
|
|
617
|
+
commandResults.push({
|
|
618
|
+
label: command.label,
|
|
619
|
+
command: command.command,
|
|
620
|
+
cwd: command.cwd ?? process.cwd(),
|
|
621
|
+
phase,
|
|
622
|
+
passed: result.exitCode === 0,
|
|
623
|
+
exitCode: result.exitCode,
|
|
624
|
+
timedOut: result.timedOut,
|
|
625
|
+
durationMs: result.durationMs,
|
|
626
|
+
stdout: truncate(result.stdout, 1200),
|
|
627
|
+
stderr: truncate(result.stderr, 1200),
|
|
628
|
+
});
|
|
629
|
+
if (stopOnFailure && result.exitCode !== 0)
|
|
630
|
+
break;
|
|
631
|
+
}
|
|
632
|
+
const failedCommands = commandResults.filter((entry) => !entry.passed);
|
|
633
|
+
const implementationSummary = {
|
|
634
|
+
commandCount: commandResults.length,
|
|
635
|
+
failedCount: failedCommands.length,
|
|
636
|
+
commands: commandResults,
|
|
637
|
+
};
|
|
638
|
+
updateRun(runId, { implementation_summary: implementationSummary });
|
|
639
|
+
finishStage(implementStepId, failedCommands.length === 0 ? "completed" : "failed", failedCommands.length === 0
|
|
640
|
+
? `All ${commandResults.length} implementation commands passed.`
|
|
641
|
+
: `${failedCommands.length} implementation command(s) failed.`, implementationSummary);
|
|
642
|
+
if (failedCommands.length > 0) {
|
|
643
|
+
recommendations.push(`Fix failing command: ${failedCommands[0].label} (${failedCommands[0].command})`);
|
|
644
|
+
}
|
|
645
|
+
}
|
|
646
|
+
else {
|
|
647
|
+
finishStage(implementStepId, "skipped", "No implementation commands were supplied.", null);
|
|
648
|
+
}
|
|
649
|
+
}
|
|
650
|
+
catch (error) {
|
|
651
|
+
finishStage(implementStepId, "failed", truncate(error?.message ?? "Implementation failed"), {
|
|
652
|
+
error: error?.message ?? String(error),
|
|
653
|
+
});
|
|
654
|
+
throw error;
|
|
655
|
+
}
|
|
656
|
+
const dogfoodStepId = startStage(runId, "dogfood", "Running dogfood scenario");
|
|
657
|
+
let dogfoodSessionId = null;
|
|
658
|
+
try {
|
|
659
|
+
if (args.dogfood) {
|
|
660
|
+
const dogfoodSession = (await startDogfoodSessionTool.handler({
|
|
661
|
+
loopType: args.dogfood.loopType ?? "company_search",
|
|
662
|
+
packetVersionUsed: args.dogfood.packetVersionUsed,
|
|
663
|
+
}));
|
|
664
|
+
dogfoodSessionId = dogfoodSession.sessionId;
|
|
665
|
+
const dogfoodOutput = args.dogfood.toolName
|
|
666
|
+
? await (await resolveToolByName(args.dogfood.toolName)).handler(args.dogfood.toolArgs ?? {})
|
|
667
|
+
: args.dogfood.output ?? {
|
|
668
|
+
summary: "Dogfood stage was started without a toolName or output payload.",
|
|
669
|
+
goal: args.goal,
|
|
670
|
+
};
|
|
671
|
+
dogfoodSummary = {
|
|
672
|
+
sessionId: dogfoodSessionId,
|
|
673
|
+
scenarioId: args.dogfood.scenarioId,
|
|
674
|
+
toolName: args.dogfood.toolName ?? "dogfood_output",
|
|
675
|
+
output: dogfoodOutput,
|
|
676
|
+
};
|
|
677
|
+
updateRun(runId, { dogfood_summary: dogfoodSummary });
|
|
678
|
+
await recordDogfoodTelemetryTool.handler({
|
|
679
|
+
scenarioId: args.dogfood.scenarioId,
|
|
680
|
+
userRole: args.dogfood.userRole ?? "founder",
|
|
681
|
+
primaryPrompt: args.dogfood.prompt,
|
|
682
|
+
packetType: args.dogfood.packetType,
|
|
683
|
+
attachedInputs: args.dogfood.attachedInputs,
|
|
684
|
+
toolsInvoked: args.dogfood.toolName ? [args.dogfood.toolName] : [],
|
|
685
|
+
toolCallCount: args.dogfood.toolName ? 1 : 0,
|
|
686
|
+
artifactsProduced: Object.keys(dogfoodOutput ?? {}),
|
|
687
|
+
totalLatencyMs: 0,
|
|
688
|
+
});
|
|
689
|
+
finishStage(dogfoodStepId, "completed", `Dogfood scenario ${args.dogfood.scenarioId} completed.`, dogfoodSummary);
|
|
690
|
+
}
|
|
691
|
+
else {
|
|
692
|
+
finishStage(dogfoodStepId, "skipped", "No dogfood scenario was supplied.", null);
|
|
693
|
+
}
|
|
694
|
+
}
|
|
695
|
+
catch (error) {
|
|
696
|
+
finishStage(dogfoodStepId, "failed", truncate(error?.message ?? "Dogfood failed"), {
|
|
697
|
+
error: error?.message ?? String(error),
|
|
698
|
+
});
|
|
699
|
+
if (dogfoodSessionId) {
|
|
700
|
+
await endDogfoodSessionTool.handler({
|
|
701
|
+
sessionId: dogfoodSessionId,
|
|
702
|
+
notes: truncate(error?.message ?? "Dogfood failed"),
|
|
703
|
+
});
|
|
704
|
+
}
|
|
705
|
+
throw error;
|
|
706
|
+
}
|
|
707
|
+
const judgeStepId = startStage(runId, "judge", "Judging dogfood output");
|
|
708
|
+
try {
|
|
709
|
+
if (args.dogfood && dogfoodSummary?.output) {
|
|
710
|
+
judgeSummary = await judgeToolOutputTool.handler({
|
|
711
|
+
scenarioId: args.dogfood.scenarioId,
|
|
712
|
+
prompt: args.dogfood.prompt,
|
|
713
|
+
toolName: dogfoodSummary.toolName,
|
|
714
|
+
result: dogfoodSummary.output,
|
|
715
|
+
});
|
|
716
|
+
if (dogfoodSessionId) {
|
|
717
|
+
await judgeSessionTool.handler({
|
|
718
|
+
sessionId: dogfoodSessionId,
|
|
719
|
+
...deriveDogfoodJudgeScores(judgeSummary),
|
|
720
|
+
notes: judgeSummary.criteria
|
|
721
|
+
?.map((criterion) => `${criterion.pass ? "PASS" : "FAIL"} ${criterion.criterion}: ${criterion.reasoning}`)
|
|
722
|
+
.join("\n"),
|
|
723
|
+
failureClasses: judgeSummary.criteria
|
|
724
|
+
?.filter((criterion) => !criterion.pass)
|
|
725
|
+
.map((criterion) => criterion.criterion),
|
|
726
|
+
});
|
|
727
|
+
await endDogfoodSessionTool.handler({
|
|
728
|
+
sessionId: dogfoodSessionId,
|
|
729
|
+
notes: judgeSummary.verdict,
|
|
730
|
+
timeToFirstUsefulOutput: 0,
|
|
731
|
+
delegationSucceeded: judgeSummary.verdict === "PASS",
|
|
732
|
+
packetExported: judgeSummary.verdict === "PASS",
|
|
733
|
+
});
|
|
734
|
+
}
|
|
735
|
+
updateRun(runId, { judge_summary: judgeSummary });
|
|
736
|
+
finishStage(judgeStepId, judgeSummary.verdict === "PASS" ? "completed" : "failed", judgeSummary.verdict === "PASS"
|
|
737
|
+
? `Judge passed with score ${judgeSummary.score}.`
|
|
738
|
+
: `Judge failed with score ${judgeSummary.score}.`, judgeSummary);
|
|
739
|
+
if (judgeSummary.verdict !== "PASS") {
|
|
740
|
+
recommendations.push(...(judgeSummary.fixSuggestions ?? []));
|
|
741
|
+
}
|
|
742
|
+
}
|
|
743
|
+
else {
|
|
744
|
+
finishStage(judgeStepId, "skipped", "No dogfood output was available to judge.", null);
|
|
745
|
+
}
|
|
746
|
+
}
|
|
747
|
+
catch (error) {
|
|
748
|
+
finishStage(judgeStepId, "failed", truncate(error?.message ?? "Judge failed"), {
|
|
749
|
+
error: error?.message ?? String(error),
|
|
750
|
+
});
|
|
751
|
+
throw error;
|
|
752
|
+
}
|
|
753
|
+
const verifyStepId = startStage(runId, "verify", "Running verification cycle and mandatory flywheel");
|
|
754
|
+
try {
|
|
755
|
+
const verificationCycle = (await startVerificationCycleTool.handler({
|
|
756
|
+
title: `Autonomous delivery: ${args.goal}`,
|
|
757
|
+
description: plan?.deliveryNote,
|
|
758
|
+
}));
|
|
759
|
+
verificationCycleId = verificationCycle.cycleId;
|
|
760
|
+
await logPhaseFindingsTool.handler({
|
|
761
|
+
cycleId: verificationCycleId,
|
|
762
|
+
phaseNumber: 1,
|
|
763
|
+
status: "passed",
|
|
764
|
+
findings: {
|
|
765
|
+
goal: args.goal,
|
|
766
|
+
researchSummary: researchSummary ?? "No structured research was provided.",
|
|
767
|
+
},
|
|
768
|
+
});
|
|
769
|
+
const phase2Gaps = [];
|
|
770
|
+
for (const failedCommand of commandResults.filter((entry) => !entry.passed)) {
|
|
771
|
+
phase2Gaps.push({
|
|
772
|
+
severity: failedCommand.phase === "compile" ||
|
|
773
|
+
failedCommand.phase === "lint" ||
|
|
774
|
+
failedCommand.phase === "test"
|
|
775
|
+
? "HIGH"
|
|
776
|
+
: "MEDIUM",
|
|
777
|
+
title: `Command failed: ${failedCommand.label}`,
|
|
778
|
+
description: failedCommand.stderr || failedCommand.stdout || "Command failed without output.",
|
|
779
|
+
rootCause: `Autonomous implementation command exited with code ${failedCommand.exitCode}.`,
|
|
780
|
+
fixStrategy: "Fix the command or underlying code, then rerun the autonomous loop.",
|
|
781
|
+
});
|
|
782
|
+
}
|
|
783
|
+
for (const fixSuggestion of judgeSummary?.fixSuggestions ?? []) {
|
|
784
|
+
phase2Gaps.push({
|
|
785
|
+
severity: "MEDIUM",
|
|
786
|
+
title: `Judge follow-up: ${truncate(String(fixSuggestion), 90)}`,
|
|
787
|
+
description: String(fixSuggestion),
|
|
788
|
+
rootCause: "The dogfood judge detected missing quality or structure in the output.",
|
|
789
|
+
fixStrategy: "Apply the suggested fix, rerun dogfood, and re-judge the output.",
|
|
790
|
+
});
|
|
791
|
+
}
|
|
792
|
+
for (const gap of phase2Gaps) {
|
|
793
|
+
await logGapTool.handler({
|
|
794
|
+
cycleId: verificationCycleId,
|
|
795
|
+
severity: gap.severity,
|
|
796
|
+
title: gap.title,
|
|
797
|
+
description: gap.description,
|
|
798
|
+
rootCause: gap.rootCause,
|
|
799
|
+
fixStrategy: gap.fixStrategy,
|
|
800
|
+
});
|
|
801
|
+
}
|
|
802
|
+
await logPhaseFindingsTool.handler({
|
|
803
|
+
cycleId: verificationCycleId,
|
|
804
|
+
phaseNumber: 2,
|
|
805
|
+
status: "passed",
|
|
806
|
+
findings: {
|
|
807
|
+
gapCount: phase2Gaps.length,
|
|
808
|
+
gaps: phase2Gaps,
|
|
809
|
+
},
|
|
810
|
+
});
|
|
811
|
+
const implementationPassed = commandResults.every((entry) => entry.passed);
|
|
812
|
+
await logPhaseFindingsTool.handler({
|
|
813
|
+
cycleId: verificationCycleId,
|
|
814
|
+
phaseNumber: 3,
|
|
815
|
+
status: implementationPassed ? "passed" : "failed",
|
|
816
|
+
findings: {
|
|
817
|
+
commandCount: commandResults.length,
|
|
818
|
+
failedCount: commandResults.filter((entry) => !entry.passed).length,
|
|
819
|
+
},
|
|
820
|
+
});
|
|
821
|
+
const closedLoopSteps = mapImplementationToClosedLoopSteps(commandResults);
|
|
822
|
+
for (const step of closedLoopSteps) {
|
|
823
|
+
await logTestResultTool.handler({
|
|
824
|
+
cycleId: verificationCycleId,
|
|
825
|
+
layer: step.step === "compile" || step.step === "lint"
|
|
826
|
+
? "static"
|
|
827
|
+
: step.step === "test"
|
|
828
|
+
? "unit"
|
|
829
|
+
: "manual",
|
|
830
|
+
label: step.step,
|
|
831
|
+
passed: step.passed,
|
|
832
|
+
output: step.output,
|
|
833
|
+
});
|
|
834
|
+
}
|
|
835
|
+
const closedLoopResult = (await runClosedLoopTool.handler({
|
|
836
|
+
cycleId: verificationCycleId,
|
|
837
|
+
steps: closedLoopSteps,
|
|
838
|
+
}));
|
|
839
|
+
const flywheelResult = (await runMandatoryFlywheelTool.handler({
|
|
840
|
+
target: args.goal,
|
|
841
|
+
cycleId: verificationCycleId,
|
|
842
|
+
steps: deriveFlywheelSteps({
|
|
843
|
+
commandResults,
|
|
844
|
+
dogfoodCompleted: Boolean(args.dogfood && dogfoodSummary?.output),
|
|
845
|
+
judgePassed: judgeSummary?.verdict === "PASS",
|
|
846
|
+
researchCompleted: Boolean(researchSummary || args.research?.target),
|
|
847
|
+
documentationPlanned: autoRecordLearning || saveSessionNote,
|
|
848
|
+
}),
|
|
849
|
+
}));
|
|
850
|
+
await logPhaseFindingsTool.handler({
|
|
851
|
+
cycleId: verificationCycleId,
|
|
852
|
+
phaseNumber: 4,
|
|
853
|
+
status: closedLoopResult.allPassed ? "passed" : "failed",
|
|
854
|
+
findings: closedLoopResult,
|
|
855
|
+
});
|
|
856
|
+
if (closedLoopResult.allPassed) {
|
|
857
|
+
await logPhaseFindingsTool.handler({
|
|
858
|
+
cycleId: verificationCycleId,
|
|
859
|
+
phaseNumber: 5,
|
|
860
|
+
status: flywheelResult.passed && judgeSummary?.verdict === "PASS" ? "passed" : "failed",
|
|
861
|
+
findings: {
|
|
862
|
+
flywheel: flywheelResult,
|
|
863
|
+
judge: judgeSummary,
|
|
864
|
+
},
|
|
865
|
+
});
|
|
866
|
+
}
|
|
867
|
+
verificationSummary = {
|
|
868
|
+
cycleId: verificationCycleId,
|
|
869
|
+
closedLoop: closedLoopResult,
|
|
870
|
+
flywheel: flywheelResult,
|
|
871
|
+
};
|
|
872
|
+
updateRun(runId, { verification_summary: verificationSummary });
|
|
873
|
+
finishStage(verifyStepId, closedLoopResult.allPassed && flywheelResult.passed ? "completed" : "failed", closedLoopResult.allPassed && flywheelResult.passed
|
|
874
|
+
? "Verification cycle and flywheel passed."
|
|
875
|
+
: "Verification cycle or flywheel failed.", verificationSummary);
|
|
876
|
+
if (!closedLoopResult.allPassed)
|
|
877
|
+
recommendations.push(closedLoopResult.guidance);
|
|
878
|
+
if (!flywheelResult.passed)
|
|
879
|
+
recommendations.push(flywheelResult.guidance);
|
|
880
|
+
}
|
|
881
|
+
catch (error) {
|
|
882
|
+
finishStage(verifyStepId, "failed", truncate(error?.message ?? "Verification failed"), {
|
|
883
|
+
error: error?.message ?? String(error),
|
|
884
|
+
});
|
|
885
|
+
throw error;
|
|
886
|
+
}
|
|
887
|
+
const learnStepId = startStage(runId, "learn", "Persisting learning artifacts");
|
|
888
|
+
const recordedLearnings = [];
|
|
889
|
+
const persistedNotePaths = [];
|
|
890
|
+
try {
|
|
891
|
+
if (autoRecordLearning) {
|
|
892
|
+
const autoKey = `autonomous-loop:${slugify(args.goal)}`;
|
|
893
|
+
await recordLearningTool.handler({
|
|
894
|
+
key: autoKey,
|
|
895
|
+
category: judgeSummary?.verdict === "PASS" ? "pattern" : "regression",
|
|
896
|
+
content: [
|
|
897
|
+
`Goal: ${args.goal}`,
|
|
898
|
+
`Run ID: ${runId}`,
|
|
899
|
+
`Judge: ${judgeSummary?.verdict ?? "not_run"} ${judgeSummary?.score ?? ""}`.trim(),
|
|
900
|
+
`Verification cycle: ${verificationCycleId ?? "not_started"}`,
|
|
901
|
+
`Recommendations: ${(recommendations.length > 0 ? recommendations : ["none"]).join(" | ")}`,
|
|
902
|
+
].join("\n"),
|
|
903
|
+
tags: ["autonomous-loop", "delivery"],
|
|
904
|
+
sourceCycle: verificationCycleId ?? undefined,
|
|
905
|
+
});
|
|
906
|
+
recordedLearnings.push(autoKey);
|
|
907
|
+
}
|
|
908
|
+
for (const learning of args.learnings ?? []) {
|
|
909
|
+
await recordLearningTool.handler({
|
|
910
|
+
key: learning.key,
|
|
911
|
+
content: learning.content,
|
|
912
|
+
category: learning.category,
|
|
913
|
+
tags: learning.tags,
|
|
914
|
+
sourceCycle: verificationCycleId ?? undefined,
|
|
915
|
+
});
|
|
916
|
+
recordedLearnings.push(learning.key);
|
|
917
|
+
}
|
|
918
|
+
if (saveSessionNote) {
|
|
919
|
+
const savedNote = (await saveSessionNoteTool.handler({
|
|
920
|
+
title: `Autonomous delivery loop: ${args.goal}`,
|
|
921
|
+
category: "progress",
|
|
922
|
+
content: [
|
|
923
|
+
`Run ID: ${runId}`,
|
|
924
|
+
`Goal: ${args.goal}`,
|
|
925
|
+
`Judge verdict: ${judgeSummary?.verdict ?? "not_run"} ${judgeSummary?.score ?? ""}`.trim(),
|
|
926
|
+
`Verification cycle: ${verificationCycleId ?? "not_started"}`,
|
|
927
|
+
`Top recommendations:`,
|
|
928
|
+
...(recommendations.length > 0 ? recommendations : ["- none"]),
|
|
929
|
+
].join("\n"),
|
|
930
|
+
tags: ["autonomous-loop", "delivery"],
|
|
931
|
+
citedFrom: args.goal,
|
|
932
|
+
}));
|
|
933
|
+
if (savedNote?.filePath)
|
|
934
|
+
persistedNotePaths.push(savedNote.filePath);
|
|
935
|
+
}
|
|
936
|
+
if (verificationCycleId && verificationSummary?.closedLoop?.allPassed && verificationSummary?.flywheel?.passed) {
|
|
937
|
+
await logPhaseFindingsTool.handler({
|
|
938
|
+
cycleId: verificationCycleId,
|
|
939
|
+
phaseNumber: 6,
|
|
940
|
+
status: "passed",
|
|
941
|
+
findings: {
|
|
942
|
+
recordedLearnings,
|
|
943
|
+
persistedNotePaths,
|
|
944
|
+
},
|
|
945
|
+
});
|
|
946
|
+
}
|
|
947
|
+
learningSummary = { recordedLearnings, persistedNotePaths };
|
|
948
|
+
updateRun(runId, { learning_summary: learningSummary });
|
|
949
|
+
finishStage(learnStepId, "completed", "Learning artifacts persisted.", learningSummary);
|
|
950
|
+
}
|
|
951
|
+
catch (error) {
|
|
952
|
+
finishStage(learnStepId, "failed", truncate(error?.message ?? "Learning persistence failed"), {
|
|
953
|
+
error: error?.message ?? String(error),
|
|
954
|
+
});
|
|
955
|
+
throw error;
|
|
956
|
+
}
|
|
957
|
+
const finalStatus = verificationSummary?.closedLoop?.allPassed &&
|
|
958
|
+
verificationSummary?.flywheel?.passed &&
|
|
959
|
+
judgeSummary?.verdict === "PASS"
|
|
960
|
+
? "completed"
|
|
961
|
+
: judgeSummary?.verdict === "PASS" && commandResults.every((entry) => entry.passed)
|
|
962
|
+
? "needs_attention"
|
|
963
|
+
: "failed";
|
|
964
|
+
const summary = finalStatus === "completed"
|
|
965
|
+
? `Autonomous delivery loop completed for "${args.goal}".`
|
|
966
|
+
: `Autonomous delivery loop finished with status "${finalStatus}" for "${args.goal}".`;
|
|
967
|
+
completeRun(runId, finalStatus, summary);
|
|
968
|
+
return {
|
|
969
|
+
runId,
|
|
970
|
+
status: finalStatus,
|
|
971
|
+
summary,
|
|
972
|
+
researchSummary,
|
|
973
|
+
plan,
|
|
974
|
+
implementation: {
|
|
975
|
+
commandCount: commandResults.length,
|
|
976
|
+
failedCount: commandResults.filter((entry) => !entry.passed).length,
|
|
977
|
+
commands: commandResults,
|
|
978
|
+
},
|
|
979
|
+
dogfood: dogfoodSummary,
|
|
980
|
+
judge: judgeSummary,
|
|
981
|
+
verification: verificationSummary,
|
|
982
|
+
learning: learningSummary,
|
|
983
|
+
recommendations,
|
|
984
|
+
};
|
|
985
|
+
}
|
|
986
|
+
catch (error) {
|
|
987
|
+
const summary = `Autonomous delivery loop failed for "${args.goal}": ${error?.message ?? String(error)}`;
|
|
988
|
+
completeRun(runId, "failed", summary);
|
|
989
|
+
return {
|
|
990
|
+
runId,
|
|
991
|
+
status: "failed",
|
|
992
|
+
summary,
|
|
993
|
+
recommendations,
|
|
994
|
+
};
|
|
995
|
+
}
|
|
996
|
+
},
|
|
997
|
+
},
|
|
998
|
+
{
|
|
999
|
+
name: "get_self_directed_delivery_run",
|
|
1000
|
+
description: "Load a previously recorded autonomous delivery run with all stage receipts, summaries, and final recommendations.",
|
|
1001
|
+
inputSchema: {
|
|
1002
|
+
type: "object",
|
|
1003
|
+
properties: {
|
|
1004
|
+
runId: {
|
|
1005
|
+
type: "string",
|
|
1006
|
+
description: "Autonomous delivery run ID.",
|
|
1007
|
+
},
|
|
1008
|
+
},
|
|
1009
|
+
required: ["runId"],
|
|
1010
|
+
},
|
|
1011
|
+
annotations: { readOnlyHint: true },
|
|
1012
|
+
handler: async (args) => {
|
|
1013
|
+
ensureSchema();
|
|
1014
|
+
const db = getDb();
|
|
1015
|
+
const run = db.prepare(`SELECT * FROM autonomous_delivery_runs WHERE run_id = ?`).get(args.runId);
|
|
1016
|
+
if (!run) {
|
|
1017
|
+
return {
|
|
1018
|
+
error: true,
|
|
1019
|
+
message: `Autonomous delivery run not found: ${args.runId}`,
|
|
1020
|
+
};
|
|
1021
|
+
}
|
|
1022
|
+
const steps = db
|
|
1023
|
+
.prepare(`
|
|
1024
|
+
SELECT * FROM autonomous_delivery_steps
|
|
1025
|
+
WHERE run_id = ?
|
|
1026
|
+
ORDER BY started_at ASC
|
|
1027
|
+
`)
|
|
1028
|
+
.all(args.runId);
|
|
1029
|
+
const parse = (value) => {
|
|
1030
|
+
if (!value)
|
|
1031
|
+
return null;
|
|
1032
|
+
try {
|
|
1033
|
+
return JSON.parse(value);
|
|
1034
|
+
}
|
|
1035
|
+
catch {
|
|
1036
|
+
return value;
|
|
1037
|
+
}
|
|
1038
|
+
};
|
|
1039
|
+
return {
|
|
1040
|
+
runId: run.run_id,
|
|
1041
|
+
goal: run.goal,
|
|
1042
|
+
status: run.status,
|
|
1043
|
+
summary: run.summary,
|
|
1044
|
+
createdAt: run.created_at,
|
|
1045
|
+
completedAt: run.completed_at,
|
|
1046
|
+
researchSummary: parse(run.research_summary),
|
|
1047
|
+
plan: parse(run.plan_json),
|
|
1048
|
+
implementationSummary: parse(run.implementation_summary),
|
|
1049
|
+
dogfoodSummary: parse(run.dogfood_summary),
|
|
1050
|
+
judgeSummary: parse(run.judge_summary),
|
|
1051
|
+
verificationSummary: parse(run.verification_summary),
|
|
1052
|
+
learningSummary: parse(run.learning_summary),
|
|
1053
|
+
steps: steps.map((step) => ({
|
|
1054
|
+
stepId: step.step_id,
|
|
1055
|
+
stage: step.stage,
|
|
1056
|
+
status: step.status,
|
|
1057
|
+
summary: step.summary,
|
|
1058
|
+
startedAt: step.started_at,
|
|
1059
|
+
completedAt: step.completed_at,
|
|
1060
|
+
details: parse(step.details_json),
|
|
1061
|
+
})),
|
|
1062
|
+
};
|
|
1063
|
+
},
|
|
1064
|
+
},
|
|
1065
|
+
{
|
|
1066
|
+
name: "list_self_directed_delivery_runs",
|
|
1067
|
+
description: "List recent autonomous delivery runs so operators can reopen or compare them.",
|
|
1068
|
+
inputSchema: {
|
|
1069
|
+
type: "object",
|
|
1070
|
+
properties: {
|
|
1071
|
+
limit: {
|
|
1072
|
+
type: "number",
|
|
1073
|
+
description: "Maximum number of runs to return. Default: 20.",
|
|
1074
|
+
},
|
|
1075
|
+
},
|
|
1076
|
+
},
|
|
1077
|
+
annotations: { readOnlyHint: true },
|
|
1078
|
+
handler: async (args) => {
|
|
1079
|
+
ensureSchema();
|
|
1080
|
+
const db = getDb();
|
|
1081
|
+
const limit = Math.max(1, Math.min(100, Math.floor(args.limit ?? 20)));
|
|
1082
|
+
const runs = db
|
|
1083
|
+
.prepare(`
|
|
1084
|
+
SELECT run_id, goal, status, summary, created_at, completed_at
|
|
1085
|
+
FROM autonomous_delivery_runs
|
|
1086
|
+
ORDER BY created_at DESC
|
|
1087
|
+
LIMIT ?
|
|
1088
|
+
`)
|
|
1089
|
+
.all(limit);
|
|
1090
|
+
return {
|
|
1091
|
+
count: runs.length,
|
|
1092
|
+
runs: runs.map((run) => ({
|
|
1093
|
+
runId: run.run_id,
|
|
1094
|
+
goal: run.goal,
|
|
1095
|
+
status: run.status,
|
|
1096
|
+
summary: run.summary,
|
|
1097
|
+
createdAt: run.created_at,
|
|
1098
|
+
completedAt: run.completed_at,
|
|
1099
|
+
})),
|
|
1100
|
+
};
|
|
1101
|
+
},
|
|
1102
|
+
},
|
|
1103
|
+
];
|
|
1104
|
+
//# sourceMappingURL=autonomousDeliveryTools.js.map
|