nodebench-mcp 2.70.0 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (361) hide show
  1. package/README.md +95 -41
  2. package/dist/agents/alertRouter.d.ts +38 -0
  3. package/dist/agents/alertRouter.js +151 -0
  4. package/dist/agents/alertRouter.js.map +1 -0
  5. package/dist/agents/entityMemory.d.ts +40 -0
  6. package/dist/agents/entityMemory.js +64 -0
  7. package/dist/agents/entityMemory.js.map +1 -0
  8. package/dist/agents/subAgents.d.ts +35 -0
  9. package/dist/agents/subAgents.js +62 -0
  10. package/dist/agents/subAgents.js.map +1 -0
  11. package/dist/benchmarks/benchmarkRunner.js +14 -0
  12. package/dist/benchmarks/benchmarkRunner.js.map +1 -1
  13. package/dist/benchmarks/chainEval.js +107 -0
  14. package/dist/benchmarks/chainEval.js.map +1 -1
  15. package/dist/benchmarks/llmJudgeEval.js +85 -0
  16. package/dist/benchmarks/llmJudgeEval.js.map +1 -1
  17. package/dist/benchmarks/searchQualityEval.js +118 -5
  18. package/dist/benchmarks/searchQualityEval.js.map +1 -1
  19. package/dist/cli/search.d.ts +13 -0
  20. package/dist/cli/search.js +130 -0
  21. package/dist/cli/search.js.map +1 -0
  22. package/dist/dashboard/operatingDashboardHtml.js +2 -1
  23. package/dist/dashboard/operatingDashboardHtml.js.map +1 -1
  24. package/dist/dashboard/operatingServer.js +3 -2
  25. package/dist/dashboard/operatingServer.js.map +1 -1
  26. package/dist/db.d.ts +6 -2
  27. package/dist/db.js +521 -6
  28. package/dist/db.js.map +1 -1
  29. package/dist/index.js +349 -67
  30. package/dist/index.js.map +1 -1
  31. package/dist/packageInfo.d.ts +3 -0
  32. package/dist/packageInfo.js +32 -0
  33. package/dist/packageInfo.js.map +1 -0
  34. package/dist/profiler/behaviorStore.d.ts +97 -0
  35. package/dist/profiler/behaviorStore.js +276 -0
  36. package/dist/profiler/behaviorStore.js.map +1 -0
  37. package/dist/profiler/eventCollector.d.ts +119 -0
  38. package/dist/profiler/eventCollector.js +267 -0
  39. package/dist/profiler/eventCollector.js.map +1 -0
  40. package/dist/profiler/index.d.ts +15 -0
  41. package/dist/profiler/index.js +16 -0
  42. package/dist/profiler/index.js.map +1 -0
  43. package/dist/profiler/mcpProxy.d.ts +49 -0
  44. package/dist/profiler/mcpProxy.js +123 -0
  45. package/dist/profiler/mcpProxy.js.map +1 -0
  46. package/dist/profiler/modelRouter.d.ts +30 -0
  47. package/dist/profiler/modelRouter.js +99 -0
  48. package/dist/profiler/modelRouter.js.map +1 -0
  49. package/dist/profiler/otelReceiver.d.ts +17 -0
  50. package/dist/profiler/otelReceiver.js +62 -0
  51. package/dist/profiler/otelReceiver.js.map +1 -0
  52. package/dist/profiler/proofEngine.d.ts +41 -0
  53. package/dist/profiler/proofEngine.js +93 -0
  54. package/dist/profiler/proofEngine.js.map +1 -0
  55. package/dist/profiler/workflowTemplates.d.ts +41 -0
  56. package/dist/profiler/workflowTemplates.js +95 -0
  57. package/dist/profiler/workflowTemplates.js.map +1 -0
  58. package/dist/providers/localMemoryProvider.js +3 -2
  59. package/dist/providers/localMemoryProvider.js.map +1 -1
  60. package/dist/runtimeConfig.d.ts +11 -0
  61. package/dist/runtimeConfig.js +27 -0
  62. package/dist/runtimeConfig.js.map +1 -0
  63. package/dist/sandboxApi.js +2 -1
  64. package/dist/sandboxApi.js.map +1 -1
  65. package/dist/security/auditLog.js +8 -3
  66. package/dist/security/auditLog.js.map +1 -1
  67. package/dist/subconscious/blocks.d.ts +43 -0
  68. package/dist/subconscious/blocks.js +158 -0
  69. package/dist/subconscious/blocks.js.map +1 -0
  70. package/dist/subconscious/classifier.d.ts +22 -0
  71. package/dist/subconscious/classifier.js +118 -0
  72. package/dist/subconscious/classifier.js.map +1 -0
  73. package/dist/subconscious/graphEngine.d.ts +65 -0
  74. package/dist/subconscious/graphEngine.js +234 -0
  75. package/dist/subconscious/graphEngine.js.map +1 -0
  76. package/dist/subconscious/index.d.ts +19 -0
  77. package/dist/subconscious/index.js +20 -0
  78. package/dist/subconscious/index.js.map +1 -0
  79. package/dist/subconscious/tools.d.ts +5 -0
  80. package/dist/subconscious/tools.js +255 -0
  81. package/dist/subconscious/tools.js.map +1 -0
  82. package/dist/subconscious/whisperPolicy.d.ts +20 -0
  83. package/dist/subconscious/whisperPolicy.js +171 -0
  84. package/dist/subconscious/whisperPolicy.js.map +1 -0
  85. package/dist/sweep/engine.d.ts +27 -0
  86. package/dist/sweep/engine.js +244 -0
  87. package/dist/sweep/engine.js.map +1 -0
  88. package/dist/sweep/index.d.ts +9 -0
  89. package/dist/sweep/index.js +8 -0
  90. package/dist/sweep/index.js.map +1 -0
  91. package/dist/sweep/sources/github_trending.d.ts +6 -0
  92. package/dist/sweep/sources/github_trending.js +37 -0
  93. package/dist/sweep/sources/github_trending.js.map +1 -0
  94. package/dist/sweep/sources/hackernews.d.ts +7 -0
  95. package/dist/sweep/sources/hackernews.js +57 -0
  96. package/dist/sweep/sources/hackernews.js.map +1 -0
  97. package/dist/sweep/sources/openbb_finance.d.ts +9 -0
  98. package/dist/sweep/sources/openbb_finance.js +46 -0
  99. package/dist/sweep/sources/openbb_finance.js.map +1 -0
  100. package/dist/sweep/sources/producthunt.d.ts +6 -0
  101. package/dist/sweep/sources/producthunt.js +41 -0
  102. package/dist/sweep/sources/producthunt.js.map +1 -0
  103. package/dist/sweep/sources/web_signals.d.ts +7 -0
  104. package/dist/sweep/sources/web_signals.js +63 -0
  105. package/dist/sweep/sources/web_signals.js.map +1 -0
  106. package/dist/sweep/sources/yahoo_finance.d.ts +6 -0
  107. package/dist/sweep/sources/yahoo_finance.js +47 -0
  108. package/dist/sweep/sources/yahoo_finance.js.map +1 -0
  109. package/dist/sweep/types.d.ts +50 -0
  110. package/dist/sweep/types.js +9 -0
  111. package/dist/sweep/types.js.map +1 -0
  112. package/dist/sync/founderEpisodeStore.d.ts +98 -0
  113. package/dist/sync/founderEpisodeStore.js +230 -0
  114. package/dist/sync/founderEpisodeStore.js.map +1 -0
  115. package/dist/sync/hyperloopArchive.d.ts +51 -0
  116. package/dist/sync/hyperloopArchive.js +153 -0
  117. package/dist/sync/hyperloopArchive.js.map +1 -0
  118. package/dist/sync/hyperloopEval.d.ts +123 -0
  119. package/dist/sync/hyperloopEval.js +389 -0
  120. package/dist/sync/hyperloopEval.js.map +1 -0
  121. package/dist/sync/protocol.d.ts +172 -0
  122. package/dist/sync/protocol.js +9 -0
  123. package/dist/sync/protocol.js.map +1 -0
  124. package/dist/sync/sessionMemory.d.ts +47 -0
  125. package/dist/sync/sessionMemory.js +138 -0
  126. package/dist/sync/sessionMemory.js.map +1 -0
  127. package/dist/sync/store.d.ts +384 -0
  128. package/dist/sync/store.js +1435 -0
  129. package/dist/sync/store.js.map +1 -0
  130. package/dist/sync/syncBridgeClient.d.ts +30 -0
  131. package/dist/sync/syncBridgeClient.js +172 -0
  132. package/dist/sync/syncBridgeClient.js.map +1 -0
  133. package/dist/tools/autonomousDeliveryTools.d.ts +2 -0
  134. package/dist/tools/autonomousDeliveryTools.js +1104 -0
  135. package/dist/tools/autonomousDeliveryTools.js.map +1 -0
  136. package/dist/tools/boilerplateTools.js +10 -9
  137. package/dist/tools/boilerplateTools.js.map +1 -1
  138. package/dist/tools/claudeCodeIngestTools.d.ts +10 -0
  139. package/dist/tools/claudeCodeIngestTools.js +347 -0
  140. package/dist/tools/claudeCodeIngestTools.js.map +1 -0
  141. package/dist/tools/coreWorkflowTools.d.ts +2 -0
  142. package/dist/tools/coreWorkflowTools.js +488 -0
  143. package/dist/tools/coreWorkflowTools.js.map +1 -0
  144. package/dist/tools/deltaTools.d.ts +15 -0
  145. package/dist/tools/deltaTools.js +1522 -0
  146. package/dist/tools/deltaTools.js.map +1 -0
  147. package/dist/tools/documentationTools.js +2 -1
  148. package/dist/tools/documentationTools.js.map +1 -1
  149. package/dist/tools/entityLookupTools.d.ts +14 -0
  150. package/dist/tools/entityLookupTools.js +159 -0
  151. package/dist/tools/entityLookupTools.js.map +1 -0
  152. package/dist/tools/entityTemporalTools.d.ts +12 -0
  153. package/dist/tools/entityTemporalTools.js +330 -0
  154. package/dist/tools/entityTemporalTools.js.map +1 -0
  155. package/dist/tools/founderLocalPipeline.d.ts +215 -0
  156. package/dist/tools/founderLocalPipeline.js +1516 -2
  157. package/dist/tools/founderLocalPipeline.js.map +1 -1
  158. package/dist/tools/founderOperatingModel.d.ts +120 -0
  159. package/dist/tools/founderOperatingModel.js +469 -0
  160. package/dist/tools/founderOperatingModel.js.map +1 -0
  161. package/dist/tools/founderOperatingModelTools.d.ts +2 -0
  162. package/dist/tools/founderOperatingModelTools.js +169 -0
  163. package/dist/tools/founderOperatingModelTools.js.map +1 -0
  164. package/dist/tools/founderStrategicOpsTools.d.ts +2 -0
  165. package/dist/tools/founderStrategicOpsTools.js +1310 -0
  166. package/dist/tools/founderStrategicOpsTools.js.map +1 -0
  167. package/dist/tools/graphifyTools.d.ts +19 -0
  168. package/dist/tools/graphifyTools.js +375 -0
  169. package/dist/tools/graphifyTools.js.map +1 -0
  170. package/dist/tools/index.d.ts +3 -0
  171. package/dist/tools/index.js +4 -0
  172. package/dist/tools/index.js.map +1 -1
  173. package/dist/tools/monteCarloTools.d.ts +16 -0
  174. package/dist/tools/monteCarloTools.js +225 -0
  175. package/dist/tools/monteCarloTools.js.map +1 -0
  176. package/dist/tools/packetCompilerTools.d.ts +12 -0
  177. package/dist/tools/packetCompilerTools.js +322 -0
  178. package/dist/tools/packetCompilerTools.js.map +1 -0
  179. package/dist/tools/planSynthesisTools.d.ts +15 -0
  180. package/dist/tools/planSynthesisTools.js +455 -0
  181. package/dist/tools/planSynthesisTools.js.map +1 -0
  182. package/dist/tools/profilerTools.d.ts +20 -0
  183. package/dist/tools/profilerTools.js +364 -0
  184. package/dist/tools/profilerTools.js.map +1 -0
  185. package/dist/tools/progressiveDiscoveryTools.js +2 -1
  186. package/dist/tools/progressiveDiscoveryTools.js.map +1 -1
  187. package/dist/tools/savingsTools.d.ts +11 -0
  188. package/dist/tools/savingsTools.js +155 -0
  189. package/dist/tools/savingsTools.js.map +1 -0
  190. package/dist/tools/scenarioCompilerTools.d.ts +14 -0
  191. package/dist/tools/scenarioCompilerTools.js +290 -0
  192. package/dist/tools/scenarioCompilerTools.js.map +1 -0
  193. package/dist/tools/sharedContextTools.d.ts +2 -0
  194. package/dist/tools/sharedContextTools.js +423 -0
  195. package/dist/tools/sharedContextTools.js.map +1 -0
  196. package/dist/tools/sitemapTools.d.ts +15 -0
  197. package/dist/tools/sitemapTools.js +560 -0
  198. package/dist/tools/sitemapTools.js.map +1 -0
  199. package/dist/tools/sweepTools.d.ts +9 -0
  200. package/dist/tools/sweepTools.js +112 -0
  201. package/dist/tools/sweepTools.js.map +1 -0
  202. package/dist/tools/syncBridgeTools.d.ts +2 -0
  203. package/dist/tools/syncBridgeTools.js +258 -0
  204. package/dist/tools/syncBridgeTools.js.map +1 -0
  205. package/dist/tools/toolRegistry.js +1223 -45
  206. package/dist/tools/toolRegistry.js.map +1 -1
  207. package/dist/tools/workspaceTools.d.ts +19 -0
  208. package/dist/tools/workspaceTools.js +762 -0
  209. package/dist/tools/workspaceTools.js.map +1 -0
  210. package/dist/toolsetRegistry.js +162 -3
  211. package/dist/toolsetRegistry.js.map +1 -1
  212. package/package.json +39 -38
  213. package/rules/nodebench-agentic-reliability.md +32 -0
  214. package/rules/nodebench-analyst-diagnostic.md +25 -0
  215. package/rules/nodebench-auto-qa.md +31 -0
  216. package/rules/nodebench-completion-traceability.md +22 -0
  217. package/rules/nodebench-flywheel-continuous.md +25 -0
  218. package/rules/nodebench-pre-release-review.md +24 -0
  219. package/rules/nodebench-qa-dogfood.md +26 -0
  220. package/rules/nodebench-scenario-testing.md +30 -0
  221. package/rules/nodebench-self-direction.md +23 -0
  222. package/rules/nodebench-self-judge-loop.md +24 -0
  223. package/scripts/install.sh +215 -0
  224. package/dist/__tests__/analytics.test.d.ts +0 -11
  225. package/dist/__tests__/analytics.test.js +0 -546
  226. package/dist/__tests__/analytics.test.js.map +0 -1
  227. package/dist/__tests__/architectComplex.test.d.ts +0 -1
  228. package/dist/__tests__/architectComplex.test.js +0 -373
  229. package/dist/__tests__/architectComplex.test.js.map +0 -1
  230. package/dist/__tests__/architectSmoke.test.d.ts +0 -1
  231. package/dist/__tests__/architectSmoke.test.js +0 -92
  232. package/dist/__tests__/architectSmoke.test.js.map +0 -1
  233. package/dist/__tests__/audit-registry.d.ts +0 -1
  234. package/dist/__tests__/audit-registry.js +0 -60
  235. package/dist/__tests__/audit-registry.js.map +0 -1
  236. package/dist/__tests__/batchAutopilot.test.d.ts +0 -8
  237. package/dist/__tests__/batchAutopilot.test.js +0 -218
  238. package/dist/__tests__/batchAutopilot.test.js.map +0 -1
  239. package/dist/__tests__/cliSubcommands.test.d.ts +0 -1
  240. package/dist/__tests__/cliSubcommands.test.js +0 -138
  241. package/dist/__tests__/cliSubcommands.test.js.map +0 -1
  242. package/dist/__tests__/comparativeBench.test.d.ts +0 -1
  243. package/dist/__tests__/comparativeBench.test.js +0 -722
  244. package/dist/__tests__/comparativeBench.test.js.map +0 -1
  245. package/dist/__tests__/critterCalibrationEval.d.ts +0 -8
  246. package/dist/__tests__/critterCalibrationEval.js +0 -370
  247. package/dist/__tests__/critterCalibrationEval.js.map +0 -1
  248. package/dist/__tests__/dynamicLoading.test.d.ts +0 -1
  249. package/dist/__tests__/dynamicLoading.test.js +0 -280
  250. package/dist/__tests__/dynamicLoading.test.js.map +0 -1
  251. package/dist/__tests__/embeddingProvider.test.d.ts +0 -1
  252. package/dist/__tests__/embeddingProvider.test.js +0 -86
  253. package/dist/__tests__/embeddingProvider.test.js.map +0 -1
  254. package/dist/__tests__/evalDatasetBench.test.d.ts +0 -1
  255. package/dist/__tests__/evalDatasetBench.test.js +0 -738
  256. package/dist/__tests__/evalDatasetBench.test.js.map +0 -1
  257. package/dist/__tests__/evalHarness.test.d.ts +0 -1
  258. package/dist/__tests__/evalHarness.test.js +0 -1107
  259. package/dist/__tests__/evalHarness.test.js.map +0 -1
  260. package/dist/__tests__/fixtures/bfcl_v3_long_context.sample.json +0 -264
  261. package/dist/__tests__/fixtures/generateBfclLongContextFixture.d.ts +0 -10
  262. package/dist/__tests__/fixtures/generateBfclLongContextFixture.js +0 -135
  263. package/dist/__tests__/fixtures/generateBfclLongContextFixture.js.map +0 -1
  264. package/dist/__tests__/fixtures/generateSwebenchVerifiedFixture.d.ts +0 -14
  265. package/dist/__tests__/fixtures/generateSwebenchVerifiedFixture.js +0 -189
  266. package/dist/__tests__/fixtures/generateSwebenchVerifiedFixture.js.map +0 -1
  267. package/dist/__tests__/fixtures/generateToolbenchInstructionFixture.d.ts +0 -16
  268. package/dist/__tests__/fixtures/generateToolbenchInstructionFixture.js +0 -154
  269. package/dist/__tests__/fixtures/generateToolbenchInstructionFixture.js.map +0 -1
  270. package/dist/__tests__/fixtures/swebench_verified.sample.json +0 -162
  271. package/dist/__tests__/fixtures/toolbench_instruction.sample.json +0 -109
  272. package/dist/__tests__/forecastingDogfood.test.d.ts +0 -9
  273. package/dist/__tests__/forecastingDogfood.test.js +0 -284
  274. package/dist/__tests__/forecastingDogfood.test.js.map +0 -1
  275. package/dist/__tests__/forecastingScoring.test.d.ts +0 -9
  276. package/dist/__tests__/forecastingScoring.test.js +0 -202
  277. package/dist/__tests__/forecastingScoring.test.js.map +0 -1
  278. package/dist/__tests__/gaiaCapabilityAudioEval.test.d.ts +0 -15
  279. package/dist/__tests__/gaiaCapabilityAudioEval.test.js +0 -265
  280. package/dist/__tests__/gaiaCapabilityAudioEval.test.js.map +0 -1
  281. package/dist/__tests__/gaiaCapabilityEval.test.d.ts +0 -14
  282. package/dist/__tests__/gaiaCapabilityEval.test.js +0 -1259
  283. package/dist/__tests__/gaiaCapabilityEval.test.js.map +0 -1
  284. package/dist/__tests__/gaiaCapabilityFilesEval.test.d.ts +0 -15
  285. package/dist/__tests__/gaiaCapabilityFilesEval.test.js +0 -914
  286. package/dist/__tests__/gaiaCapabilityFilesEval.test.js.map +0 -1
  287. package/dist/__tests__/gaiaCapabilityMediaEval.test.d.ts +0 -15
  288. package/dist/__tests__/gaiaCapabilityMediaEval.test.js +0 -1101
  289. package/dist/__tests__/gaiaCapabilityMediaEval.test.js.map +0 -1
  290. package/dist/__tests__/helpers/answerMatch.d.ts +0 -41
  291. package/dist/__tests__/helpers/answerMatch.js +0 -267
  292. package/dist/__tests__/helpers/answerMatch.js.map +0 -1
  293. package/dist/__tests__/helpers/textLlm.d.ts +0 -25
  294. package/dist/__tests__/helpers/textLlm.js +0 -214
  295. package/dist/__tests__/helpers/textLlm.js.map +0 -1
  296. package/dist/__tests__/localDashboard.test.d.ts +0 -1
  297. package/dist/__tests__/localDashboard.test.js +0 -226
  298. package/dist/__tests__/localDashboard.test.js.map +0 -1
  299. package/dist/__tests__/multiHopDogfood.test.d.ts +0 -12
  300. package/dist/__tests__/multiHopDogfood.test.js +0 -303
  301. package/dist/__tests__/multiHopDogfood.test.js.map +0 -1
  302. package/dist/__tests__/openDatasetParallelEval.test.d.ts +0 -7
  303. package/dist/__tests__/openDatasetParallelEval.test.js +0 -209
  304. package/dist/__tests__/openDatasetParallelEval.test.js.map +0 -1
  305. package/dist/__tests__/openDatasetParallelEvalGaia.test.d.ts +0 -7
  306. package/dist/__tests__/openDatasetParallelEvalGaia.test.js +0 -279
  307. package/dist/__tests__/openDatasetParallelEvalGaia.test.js.map +0 -1
  308. package/dist/__tests__/openDatasetParallelEvalSwebench.test.d.ts +0 -7
  309. package/dist/__tests__/openDatasetParallelEvalSwebench.test.js +0 -220
  310. package/dist/__tests__/openDatasetParallelEvalSwebench.test.js.map +0 -1
  311. package/dist/__tests__/openDatasetParallelEvalToolbench.test.d.ts +0 -7
  312. package/dist/__tests__/openDatasetParallelEvalToolbench.test.js +0 -218
  313. package/dist/__tests__/openDatasetParallelEvalToolbench.test.js.map +0 -1
  314. package/dist/__tests__/openDatasetPerfComparison.test.d.ts +0 -10
  315. package/dist/__tests__/openDatasetPerfComparison.test.js +0 -318
  316. package/dist/__tests__/openDatasetPerfComparison.test.js.map +0 -1
  317. package/dist/__tests__/openclawDogfood.test.d.ts +0 -23
  318. package/dist/__tests__/openclawDogfood.test.js +0 -535
  319. package/dist/__tests__/openclawDogfood.test.js.map +0 -1
  320. package/dist/__tests__/openclawMessaging.test.d.ts +0 -14
  321. package/dist/__tests__/openclawMessaging.test.js +0 -232
  322. package/dist/__tests__/openclawMessaging.test.js.map +0 -1
  323. package/dist/__tests__/presetRealWorldBench.test.d.ts +0 -1
  324. package/dist/__tests__/presetRealWorldBench.test.js +0 -859
  325. package/dist/__tests__/presetRealWorldBench.test.js.map +0 -1
  326. package/dist/__tests__/tools.test.d.ts +0 -1
  327. package/dist/__tests__/tools.test.js +0 -3201
  328. package/dist/__tests__/tools.test.js.map +0 -1
  329. package/dist/__tests__/toolsetGatingEval.test.d.ts +0 -1
  330. package/dist/__tests__/toolsetGatingEval.test.js +0 -1099
  331. package/dist/__tests__/toolsetGatingEval.test.js.map +0 -1
  332. package/dist/__tests__/traceabilityDogfood.test.d.ts +0 -12
  333. package/dist/__tests__/traceabilityDogfood.test.js +0 -241
  334. package/dist/__tests__/traceabilityDogfood.test.js.map +0 -1
  335. package/dist/__tests__/webmcpTools.test.d.ts +0 -7
  336. package/dist/__tests__/webmcpTools.test.js +0 -195
  337. package/dist/__tests__/webmcpTools.test.js.map +0 -1
  338. package/dist/benchmarks/testProviderBus.d.ts +0 -7
  339. package/dist/benchmarks/testProviderBus.js +0 -272
  340. package/dist/benchmarks/testProviderBus.js.map +0 -1
  341. package/dist/hooks/postCompaction.d.ts +0 -14
  342. package/dist/hooks/postCompaction.js +0 -51
  343. package/dist/hooks/postCompaction.js.map +0 -1
  344. package/dist/security/__tests__/security.test.d.ts +0 -8
  345. package/dist/security/__tests__/security.test.js +0 -295
  346. package/dist/security/__tests__/security.test.js.map +0 -1
  347. package/dist/tools/documentTools.d.ts +0 -5
  348. package/dist/tools/documentTools.js +0 -524
  349. package/dist/tools/documentTools.js.map +0 -1
  350. package/dist/tools/financialTools.d.ts +0 -10
  351. package/dist/tools/financialTools.js +0 -403
  352. package/dist/tools/financialTools.js.map +0 -1
  353. package/dist/tools/memoryTools.d.ts +0 -5
  354. package/dist/tools/memoryTools.js +0 -137
  355. package/dist/tools/memoryTools.js.map +0 -1
  356. package/dist/tools/planningTools.d.ts +0 -5
  357. package/dist/tools/planningTools.js +0 -147
  358. package/dist/tools/planningTools.js.map +0 -1
  359. package/dist/tools/searchTools.d.ts +0 -5
  360. package/dist/tools/searchTools.js +0 -145
  361. package/dist/tools/searchTools.js.map +0 -1
@@ -0,0 +1,1104 @@
1
+ import { getDb, genId } from "../db.js";
2
+ import { safeExec } from "../security/index.js";
3
+ import { reconTools } from "./reconTools.js";
4
+ import { verificationTools } from "./verificationTools.js";
5
+ import { qualityGateTools } from "./qualityGateTools.js";
6
+ import { flywheelTools } from "./flywheelTools.js";
7
+ import { dogfoodJudgeTools } from "./dogfoodJudgeTools.js";
8
+ import { llmJudgeLoopTools } from "./llmJudgeLoop.js";
9
+ import { learningTools } from "./learningTools.js";
10
+ import { sessionMemoryTools } from "./sessionMemoryTools.js";
11
+ import { linkDurableObjects, recordExecutionReceipt, recordLocalOutcome, upsertDurableObject, } from "../sync/store.js";
12
+ let schemaReady = false;
13
+ function ensureSchema() {
14
+ if (schemaReady)
15
+ return;
16
+ const db = getDb();
17
+ db.exec(`
18
+ CREATE TABLE IF NOT EXISTS autonomous_delivery_runs (
19
+ run_id TEXT PRIMARY KEY,
20
+ goal TEXT NOT NULL,
21
+ status TEXT NOT NULL DEFAULT 'running',
22
+ summary TEXT,
23
+ research_summary TEXT,
24
+ plan_json TEXT,
25
+ implementation_summary TEXT,
26
+ dogfood_summary TEXT,
27
+ judge_summary TEXT,
28
+ verification_summary TEXT,
29
+ learning_summary TEXT,
30
+ created_at TEXT NOT NULL,
31
+ updated_at TEXT NOT NULL,
32
+ completed_at TEXT
33
+ );
34
+
35
+ CREATE TABLE IF NOT EXISTS autonomous_delivery_steps (
36
+ step_id TEXT PRIMARY KEY,
37
+ run_id TEXT NOT NULL REFERENCES autonomous_delivery_runs(run_id) ON DELETE CASCADE,
38
+ stage TEXT NOT NULL,
39
+ status TEXT NOT NULL DEFAULT 'running',
40
+ summary TEXT,
41
+ details_json TEXT,
42
+ started_at TEXT NOT NULL,
43
+ completed_at TEXT
44
+ );
45
+
46
+ CREATE INDEX IF NOT EXISTS idx_autonomous_delivery_runs_created
47
+ ON autonomous_delivery_runs(created_at DESC);
48
+ CREATE INDEX IF NOT EXISTS idx_autonomous_delivery_steps_run
49
+ ON autonomous_delivery_steps(run_id, started_at);
50
+ `);
51
+ schemaReady = true;
52
+ }
53
+ function requireTool(tools, name) {
54
+ const tool = tools.find((entry) => entry.name === name);
55
+ if (!tool)
56
+ throw new Error(`Tool not found: ${name}`);
57
+ return tool;
58
+ }
59
+ function truncate(text, limit = 400) {
60
+ if (!text)
61
+ return "";
62
+ return text.length <= limit ? text : `${text.slice(0, limit)}...`;
63
+ }
64
+ function slugify(value) {
65
+ return value
66
+ .toLowerCase()
67
+ .replace(/[^a-z0-9]+/g, "-")
68
+ .replace(/^-+|-+$/g, "")
69
+ .slice(0, 48);
70
+ }
71
+ function normalizePhase(command) {
72
+ if (command.phase)
73
+ return command.phase;
74
+ const haystack = `${command.label} ${command.command}`.toLowerCase();
75
+ if (haystack.includes("lint"))
76
+ return "lint";
77
+ if (haystack.includes("compile") || haystack.includes("build") || haystack.includes("tsc"))
78
+ return "compile";
79
+ if (haystack.includes("debug"))
80
+ return "self_debug";
81
+ if (haystack.includes("failure") || haystack.includes("negative"))
82
+ return "failure_path_test";
83
+ if (haystack.includes("happy") || haystack.includes("smoke"))
84
+ return "happy_path_test";
85
+ if (haystack.includes("test") ||
86
+ haystack.includes("vitest") ||
87
+ haystack.includes("jest") ||
88
+ haystack.includes("pytest")) {
89
+ return "test";
90
+ }
91
+ return "implement";
92
+ }
93
+ function summarizeResearch(researchSummary) {
94
+ if (!researchSummary)
95
+ return [];
96
+ const highlights = [];
97
+ const categories = researchSummary.findingsByCategory ?? {};
98
+ for (const [category, findings] of Object.entries(categories)) {
99
+ const first = findings[0]?.summary;
100
+ if (first)
101
+ highlights.push(`${category}: ${truncate(first, 140)}`);
102
+ }
103
+ if (Array.isArray(researchSummary.prioritizedActions)) {
104
+ for (const action of researchSummary.prioritizedActions.slice(0, 3)) {
105
+ if (action?.action)
106
+ highlights.push(`action: ${truncate(String(action.action), 140)}`);
107
+ }
108
+ }
109
+ return highlights.slice(0, 6);
110
+ }
111
+ function buildPlan(args) {
112
+ return {
113
+ goal: args.goal,
114
+ researchTarget: args.researchTarget ?? null,
115
+ researchHighlights: summarizeResearch(args.researchSummary),
116
+ implementationSequence: args.implementationCommands.map((command, index) => ({
117
+ order: index + 1,
118
+ label: command.label,
119
+ phase: normalizePhase(command),
120
+ command: command.command,
121
+ })),
122
+ dogfoodScenario: args.dogfood
123
+ ? {
124
+ scenarioId: args.dogfood.scenarioId,
125
+ prompt: args.dogfood.prompt,
126
+ toolName: args.dogfood.toolName ?? null,
127
+ }
128
+ : null,
129
+ verificationChecklist: [
130
+ "Compile, lint, and test commands must all pass.",
131
+ "Dogfood output must be judged before completion.",
132
+ "Mandatory flywheel must have explicit static, happy-path, failure-path, gap-analysis, fix-and-reverify, and documentation evidence.",
133
+ "At least one learning or durable note should be banked at the end of the run.",
134
+ ],
135
+ deliveryNote: args.saveSessionNote
136
+ ? "A session note will be persisted to filesystem at the end of the run."
137
+ : "Session note persistence is disabled for this run.",
138
+ };
139
+ }
140
+ function mapImplementationToClosedLoopSteps(commands) {
141
+ const phaseMap = new Map();
142
+ for (const result of commands) {
143
+ if (["compile", "lint", "test", "self_debug"].includes(result.phase)) {
144
+ const existing = phaseMap.get(result.phase) ?? [];
145
+ existing.push(result);
146
+ phaseMap.set(result.phase, existing);
147
+ }
148
+ }
149
+ const toOutput = (phase) => {
150
+ const results = phaseMap.get(phase);
151
+ if (!results || results.length === 0) {
152
+ return {
153
+ step: phase,
154
+ passed: false,
155
+ output: `No ${phase} step was recorded.`,
156
+ };
157
+ }
158
+ return {
159
+ step: phase,
160
+ passed: results.every((entry) => entry.passed),
161
+ output: results
162
+ .map((entry) => `${entry.label}: ${entry.passed ? "passed" : "failed"} (${entry.exitCode})`)
163
+ .join("; "),
164
+ };
165
+ };
166
+ return [toOutput("compile"), toOutput("lint"), toOutput("test"), toOutput("self_debug")];
167
+ }
168
+ function deriveFlywheelSteps(args) {
169
+ const compileAndLint = args.commandResults.filter((result) => result.phase === "compile" || result.phase === "lint");
170
+ const failurePath = args.commandResults.filter((result) => result.phase === "failure_path_test");
171
+ const anyFailures = args.commandResults.some((result) => !result.passed);
172
+ return [
173
+ {
174
+ stepName: "static_analysis",
175
+ passed: compileAndLint.length > 0 && compileAndLint.every((result) => result.passed),
176
+ output: compileAndLint.length > 0
177
+ ? compileAndLint.map((result) => `${result.label}:${result.passed ? "pass" : "fail"}`).join(", ")
178
+ : "No compile/lint command was supplied.",
179
+ },
180
+ {
181
+ stepName: "happy_path_test",
182
+ passed: args.dogfoodCompleted ||
183
+ args.commandResults.some((result) => result.phase === "happy_path_test" && result.passed),
184
+ output: args.dogfoodCompleted
185
+ ? "Dogfood scenario completed."
186
+ : "No explicit happy-path dogfood or happy_path_test step passed.",
187
+ },
188
+ {
189
+ stepName: "failure_path_test",
190
+ passed: failurePath.length > 0 && failurePath.every((result) => result.passed),
191
+ output: failurePath.length > 0
192
+ ? failurePath.map((result) => `${result.label}:${result.passed ? "pass" : "fail"}`).join(", ")
193
+ : "No failure-path command was supplied.",
194
+ },
195
+ {
196
+ stepName: "gap_analysis",
197
+ passed: args.researchCompleted,
198
+ output: args.researchCompleted
199
+ ? "Recon and gap logging completed."
200
+ : "Research/gap analysis was skipped or failed.",
201
+ },
202
+ {
203
+ stepName: "fix_and_reverify",
204
+ passed: !anyFailures,
205
+ output: anyFailures
206
+ ? "At least one implementation command failed; fix and re-run are still required."
207
+ : "All implementation commands passed on the current loop.",
208
+ },
209
+ {
210
+ stepName: "deploy_and_document",
211
+ passed: args.documentationPlanned && args.judgePassed,
212
+ output: args.documentationPlanned && args.judgePassed
213
+ ? "Documentation/note step is enabled and judge passed."
214
+ : "Documentation is missing or the judge did not pass.",
215
+ },
216
+ ];
217
+ }
218
+ function deriveDogfoodJudgeScores(verdict) {
219
+ const criteria = new Map();
220
+ for (const criterion of verdict?.criteria ?? []) {
221
+ if (criterion?.criterion)
222
+ criteria.set(String(criterion.criterion), Boolean(criterion.pass));
223
+ }
224
+ const pass = (name) => (criteria.get(name) ? 5 : 2);
225
+ return {
226
+ truthQuality: pass("Removed repeated cognition"),
227
+ compressionQuality: pass("Suppressed noise"),
228
+ anticipationQuality: pass("Surfaced right contradiction"),
229
+ outputQuality: pass("Produced downstream artifact"),
230
+ delegationQuality: pass("Returned usable packet"),
231
+ trustQuality: pass("Trustworthy and reusable"),
232
+ };
233
+ }
234
+ function createRun(goal) {
235
+ ensureSchema();
236
+ const db = getDb();
237
+ const runId = genId("auto");
238
+ const now = new Date().toISOString();
239
+ db.prepare(`
240
+ INSERT INTO autonomous_delivery_runs (run_id, goal, status, created_at, updated_at)
241
+ VALUES (?, ?, 'running', ?, ?)
242
+ `).run(runId, goal, now, now);
243
+ upsertDurableObject({
244
+ id: `run:${runId}`,
245
+ kind: "run",
246
+ label: goal,
247
+ source: "autonomous_delivery",
248
+ metadata: { runId, workflowName: "self_directed_delivery" },
249
+ });
250
+ upsertDurableObject({
251
+ id: "workflow:self_directed_delivery",
252
+ kind: "workflow",
253
+ label: "Self-Directed Delivery Loop",
254
+ source: "autonomous_delivery",
255
+ metadata: { methodology: "ai_flywheel" },
256
+ });
257
+ linkDurableObjects({
258
+ fromId: "workflow:self_directed_delivery",
259
+ toId: `run:${runId}`,
260
+ edgeType: "executes",
261
+ metadata: { runId },
262
+ });
263
+ recordExecutionReceipt({
264
+ runId,
265
+ objectId: `run:${runId}`,
266
+ actionType: "run_created",
267
+ summary: `Autonomous delivery run created for goal: ${goal}`,
268
+ status: "recorded",
269
+ metadata: { workflowName: "self_directed_delivery" },
270
+ });
271
+ return runId;
272
+ }
273
+ function startStage(runId, stage, summary) {
274
+ ensureSchema();
275
+ const db = getDb();
276
+ const stepId = genId("auto_step");
277
+ const now = new Date().toISOString();
278
+ db.prepare(`
279
+ INSERT INTO autonomous_delivery_steps (step_id, run_id, stage, status, summary, started_at)
280
+ VALUES (?, ?, ?, 'running', ?, ?)
281
+ `).run(stepId, runId, stage, summary, now);
282
+ db.prepare(`UPDATE autonomous_delivery_runs SET updated_at = ? WHERE run_id = ?`).run(now, runId);
283
+ upsertDurableObject({
284
+ id: `action:${stepId}`,
285
+ kind: "action",
286
+ label: `${stage} stage`,
287
+ source: "autonomous_delivery",
288
+ metadata: { runId, stepId, stage },
289
+ });
290
+ linkDurableObjects({
291
+ fromId: `run:${runId}`,
292
+ toId: `action:${stepId}`,
293
+ edgeType: "contains_stage",
294
+ metadata: { stage },
295
+ });
296
+ recordExecutionReceipt({
297
+ runId,
298
+ stepId,
299
+ objectId: `action:${stepId}`,
300
+ actionType: `${stage}_started`,
301
+ summary,
302
+ status: "running",
303
+ metadata: { stage },
304
+ });
305
+ return stepId;
306
+ }
307
+ function finishStage(stepId, status, summary, details) {
308
+ ensureSchema();
309
+ const db = getDb();
310
+ const now = new Date().toISOString();
311
+ db.prepare(`
312
+ UPDATE autonomous_delivery_steps
313
+ SET status = ?, summary = ?, details_json = ?, completed_at = ?
314
+ WHERE step_id = ?
315
+ `).run(status, summary, details ? JSON.stringify(details) : null, now, stepId);
316
+ recordExecutionReceipt({
317
+ stepId,
318
+ objectId: `action:${stepId}`,
319
+ actionType: "stage_completed",
320
+ summary,
321
+ status,
322
+ metadata: { details },
323
+ });
324
+ }
325
+ function updateRun(runId, patch) {
326
+ ensureSchema();
327
+ const db = getDb();
328
+ const now = new Date().toISOString();
329
+ const fields = Object.keys(patch);
330
+ if (fields.length === 0)
331
+ return;
332
+ const assignments = fields.map((field) => `${field} = ?`);
333
+ assignments.push("updated_at = ?");
334
+ const values = fields.map((field) => {
335
+ const value = patch[field];
336
+ if (value == null)
337
+ return null;
338
+ return typeof value === "string" ? value : JSON.stringify(value);
339
+ });
340
+ values.push(now, runId);
341
+ db.prepare(`
342
+ UPDATE autonomous_delivery_runs
343
+ SET ${assignments.join(", ")}
344
+ WHERE run_id = ?
345
+ `).run(...values);
346
+ }
347
+ function completeRun(runId, status, summary) {
348
+ ensureSchema();
349
+ const db = getDb();
350
+ const now = new Date().toISOString();
351
+ db.prepare(`
352
+ UPDATE autonomous_delivery_runs
353
+ SET status = ?, summary = ?, updated_at = ?, completed_at = ?
354
+ WHERE run_id = ?
355
+ `).run(status, summary, now, now, runId);
356
+ recordExecutionReceipt({
357
+ runId,
358
+ objectId: `run:${runId}`,
359
+ actionType: "run_completed",
360
+ summary,
361
+ status,
362
+ metadata: { completedAt: now },
363
+ });
364
+ recordLocalOutcome({
365
+ id: `outcome:${runId}`,
366
+ runId,
367
+ objectId: `run:${runId}`,
368
+ outcomeType: "autonomous_delivery",
369
+ headline: summary,
370
+ userValue: "One durable delivery run with explicit research, implementation, dogfood, and verification receipts.",
371
+ stakeholderValue: "Replayable proof of work, verification state, and next actions tied to a single run.",
372
+ status,
373
+ evidence: [{ type: "run", id: runId }],
374
+ });
375
+ }
376
+ async function resolveToolByName(toolName) {
377
+ const { loadToolsets, ALL_DOMAIN_KEYS, TOOLSET_MAP } = await import("../toolsetRegistry.js");
378
+ await loadToolsets(ALL_DOMAIN_KEYS);
379
+ const tool = Object.values(TOOLSET_MAP)
380
+ .flat()
381
+ .find((entry) => entry.name === toolName);
382
+ if (!tool)
383
+ throw new Error(`Unable to resolve dogfood tool: ${toolName}`);
384
+ return tool;
385
+ }
386
+ export const autonomousDeliveryTools = [
387
+ {
388
+ name: "run_self_directed_delivery_loop",
389
+ description: "Run a local-first autonomous delivery loop across exploratory research, planning, implementation commands, dogfood, verification, and judge. Persists one durable run in SQLite and emits per-stage receipts so the whole loop can be resumed, audited, and synced later.",
390
+ inputSchema: {
391
+ type: "object",
392
+ properties: {
393
+ goal: {
394
+ type: "string",
395
+ description: "The outcome this autonomous loop is trying to deliver.",
396
+ },
397
+ research: {
398
+ type: "object",
399
+ description: "Optional structured recon input for the research stage.",
400
+ properties: {
401
+ target: { type: "string" },
402
+ description: { type: "string" },
403
+ webEnrich: { type: "boolean" },
404
+ projectContext: { type: "object", additionalProperties: true },
405
+ findings: {
406
+ type: "array",
407
+ items: {
408
+ type: "object",
409
+ properties: {
410
+ category: {
411
+ type: "string",
412
+ enum: [
413
+ "breaking_change",
414
+ "new_feature",
415
+ "deprecation",
416
+ "best_practice",
417
+ "dataset",
418
+ "benchmark",
419
+ "codebase_pattern",
420
+ "existing_implementation",
421
+ ],
422
+ },
423
+ summary: { type: "string" },
424
+ sourceUrl: { type: "string" },
425
+ relevance: { type: "string" },
426
+ actionItems: { type: "string" },
427
+ },
428
+ required: ["category", "summary"],
429
+ },
430
+ },
431
+ },
432
+ },
433
+ implementation: {
434
+ type: "object",
435
+ description: "Shell commands to execute as the implementation and verification loop.",
436
+ properties: {
437
+ stopOnFailure: { type: "boolean" },
438
+ commands: {
439
+ type: "array",
440
+ items: {
441
+ type: "object",
442
+ properties: {
443
+ label: { type: "string" },
444
+ command: { type: "string" },
445
+ cwd: { type: "string" },
446
+ timeoutMs: { type: "number" },
447
+ phase: {
448
+ type: "string",
449
+ enum: [
450
+ "implement",
451
+ "compile",
452
+ "lint",
453
+ "test",
454
+ "self_debug",
455
+ "happy_path_test",
456
+ "failure_path_test",
457
+ ],
458
+ },
459
+ },
460
+ required: ["label", "command"],
461
+ },
462
+ },
463
+ },
464
+ },
465
+ dogfood: {
466
+ type: "object",
467
+ description: "Optional dogfood scenario or precomputed output to judge.",
468
+ properties: {
469
+ loopType: {
470
+ type: "string",
471
+ enum: ["weekly_reset", "pre_delegation", "company_search"],
472
+ },
473
+ scenarioId: { type: "string" },
474
+ prompt: { type: "string" },
475
+ toolName: { type: "string" },
476
+ toolArgs: { type: "object", additionalProperties: true },
477
+ output: { type: "object", additionalProperties: true },
478
+ userRole: {
479
+ type: "string",
480
+ enum: ["founder", "banker", "ceo", "operator", "researcher", "student"],
481
+ },
482
+ packetVersionUsed: { type: "string" },
483
+ packetType: { type: "string" },
484
+ attachedInputs: {
485
+ type: "array",
486
+ items: { type: "string" },
487
+ },
488
+ },
489
+ required: ["scenarioId", "prompt"],
490
+ },
491
+ autoRecordLearning: {
492
+ type: "boolean",
493
+ description: "Persist an automatic learning summary at the end of the run. Default: true.",
494
+ },
495
+ saveSessionNote: {
496
+ type: "boolean",
497
+ description: "Persist a filesystem session note at the end of the run. Default: true.",
498
+ },
499
+ learnings: {
500
+ type: "array",
501
+ description: "Optional explicit learnings to persist in addition to the automatic summary.",
502
+ items: {
503
+ type: "object",
504
+ properties: {
505
+ key: { type: "string" },
506
+ content: { type: "string" },
507
+ category: {
508
+ type: "string",
509
+ enum: ["edge_case", "gotcha", "pattern", "regression", "convention"],
510
+ },
511
+ tags: {
512
+ type: "array",
513
+ items: { type: "string" },
514
+ },
515
+ },
516
+ required: ["key", "content", "category"],
517
+ },
518
+ },
519
+ },
520
+ required: ["goal"],
521
+ },
522
+ handler: async (args) => {
523
+ ensureSchema();
524
+ const runId = createRun(args.goal);
525
+ const autoRecordLearning = args.autoRecordLearning !== false;
526
+ const saveSessionNote = args.saveSessionNote !== false;
527
+ const implementationCommands = args.implementation?.commands ?? [];
528
+ const stopOnFailure = args.implementation?.stopOnFailure !== false;
529
+ const researchTool = requireTool(reconTools, "run_recon");
530
+ const logReconFindingTool = requireTool(reconTools, "log_recon_finding");
531
+ const getReconSummaryTool = requireTool(reconTools, "get_recon_summary");
532
+ const startVerificationCycleTool = requireTool(verificationTools, "start_verification_cycle");
533
+ const logPhaseFindingsTool = requireTool(verificationTools, "log_phase_findings");
534
+ const logGapTool = requireTool(verificationTools, "log_gap");
535
+ const logTestResultTool = requireTool(verificationTools, "log_test_result");
536
+ const runClosedLoopTool = requireTool(qualityGateTools, "run_closed_loop");
537
+ const runMandatoryFlywheelTool = requireTool(flywheelTools, "run_mandatory_flywheel");
538
+ const startDogfoodSessionTool = requireTool(dogfoodJudgeTools, "start_dogfood_session");
539
+ const endDogfoodSessionTool = requireTool(dogfoodJudgeTools, "end_dogfood_session");
540
+ const recordDogfoodTelemetryTool = requireTool(dogfoodJudgeTools, "record_dogfood_telemetry");
541
+ const judgeSessionTool = requireTool(dogfoodJudgeTools, "judge_session");
542
+ const judgeToolOutputTool = requireTool(llmJudgeLoopTools, "judge_tool_output");
543
+ const recordLearningTool = requireTool(learningTools, "record_learning");
544
+ const saveSessionNoteTool = requireTool(sessionMemoryTools, "save_session_note");
545
+ let researchSummary = null;
546
+ let plan = null;
547
+ let commandResults = [];
548
+ let dogfoodSummary = null;
549
+ let judgeSummary = null;
550
+ let verificationSummary = null;
551
+ let learningSummary = null;
552
+ let verificationCycleId = null;
553
+ const recommendations = [];
554
+ try {
555
+ const researchStepId = startStage(runId, "research", "Starting recon and context gathering");
556
+ try {
557
+ if (args.research?.target) {
558
+ const reconRun = (await researchTool.handler({
559
+ target: args.research.target,
560
+ description: args.research.description,
561
+ projectContext: args.research.projectContext,
562
+ webEnrich: args.research.webEnrich ?? false,
563
+ }));
564
+ for (const finding of args.research.findings ?? []) {
565
+ await logReconFindingTool.handler({
566
+ sessionId: reconRun.sessionId,
567
+ sourceUrl: finding.sourceUrl,
568
+ category: finding.category,
569
+ summary: finding.summary,
570
+ relevance: finding.relevance,
571
+ actionItems: finding.actionItems,
572
+ });
573
+ }
574
+ researchSummary = await getReconSummaryTool.handler({
575
+ sessionId: reconRun.sessionId,
576
+ completeSession: true,
577
+ });
578
+ updateRun(runId, { research_summary: researchSummary });
579
+ finishStage(researchStepId, "completed", `Research captured for ${args.research.target}.`, researchSummary);
580
+ }
581
+ else {
582
+ finishStage(researchStepId, "skipped", "No research target provided.", null);
583
+ }
584
+ }
585
+ catch (error) {
586
+ finishStage(researchStepId, "failed", truncate(error?.message ?? "Research failed"), {
587
+ error: error?.message ?? String(error),
588
+ });
589
+ throw error;
590
+ }
591
+ const planStepId = startStage(runId, "plan", "Building autonomous execution plan");
592
+ plan = buildPlan({
593
+ goal: args.goal,
594
+ researchTarget: args.research?.target,
595
+ researchSummary,
596
+ implementationCommands,
597
+ dogfood: args.dogfood
598
+ ? {
599
+ scenarioId: args.dogfood.scenarioId,
600
+ prompt: args.dogfood.prompt,
601
+ toolName: args.dogfood.toolName,
602
+ }
603
+ : null,
604
+ saveSessionNote,
605
+ });
606
+ updateRun(runId, { plan_json: plan });
607
+ finishStage(planStepId, "completed", "Execution plan generated.", plan);
608
+ const implementStepId = startStage(runId, "implement", "Running implementation commands");
609
+ try {
610
+ if (implementationCommands.length > 0) {
611
+ for (const command of implementationCommands) {
612
+ const phase = normalizePhase(command);
613
+ const result = safeExec(command.command, {
614
+ cwd: command.cwd,
615
+ timeout: command.timeoutMs,
616
+ });
617
+ commandResults.push({
618
+ label: command.label,
619
+ command: command.command,
620
+ cwd: command.cwd ?? process.cwd(),
621
+ phase,
622
+ passed: result.exitCode === 0,
623
+ exitCode: result.exitCode,
624
+ timedOut: result.timedOut,
625
+ durationMs: result.durationMs,
626
+ stdout: truncate(result.stdout, 1200),
627
+ stderr: truncate(result.stderr, 1200),
628
+ });
629
+ if (stopOnFailure && result.exitCode !== 0)
630
+ break;
631
+ }
632
+ const failedCommands = commandResults.filter((entry) => !entry.passed);
633
+ const implementationSummary = {
634
+ commandCount: commandResults.length,
635
+ failedCount: failedCommands.length,
636
+ commands: commandResults,
637
+ };
638
+ updateRun(runId, { implementation_summary: implementationSummary });
639
+ finishStage(implementStepId, failedCommands.length === 0 ? "completed" : "failed", failedCommands.length === 0
640
+ ? `All ${commandResults.length} implementation commands passed.`
641
+ : `${failedCommands.length} implementation command(s) failed.`, implementationSummary);
642
+ if (failedCommands.length > 0) {
643
+ recommendations.push(`Fix failing command: ${failedCommands[0].label} (${failedCommands[0].command})`);
644
+ }
645
+ }
646
+ else {
647
+ finishStage(implementStepId, "skipped", "No implementation commands were supplied.", null);
648
+ }
649
+ }
650
+ catch (error) {
651
+ finishStage(implementStepId, "failed", truncate(error?.message ?? "Implementation failed"), {
652
+ error: error?.message ?? String(error),
653
+ });
654
+ throw error;
655
+ }
656
+ const dogfoodStepId = startStage(runId, "dogfood", "Running dogfood scenario");
657
+ let dogfoodSessionId = null;
658
+ try {
659
+ if (args.dogfood) {
660
+ const dogfoodSession = (await startDogfoodSessionTool.handler({
661
+ loopType: args.dogfood.loopType ?? "company_search",
662
+ packetVersionUsed: args.dogfood.packetVersionUsed,
663
+ }));
664
+ dogfoodSessionId = dogfoodSession.sessionId;
665
+ const dogfoodOutput = args.dogfood.toolName
666
+ ? await (await resolveToolByName(args.dogfood.toolName)).handler(args.dogfood.toolArgs ?? {})
667
+ : args.dogfood.output ?? {
668
+ summary: "Dogfood stage was started without a toolName or output payload.",
669
+ goal: args.goal,
670
+ };
671
+ dogfoodSummary = {
672
+ sessionId: dogfoodSessionId,
673
+ scenarioId: args.dogfood.scenarioId,
674
+ toolName: args.dogfood.toolName ?? "dogfood_output",
675
+ output: dogfoodOutput,
676
+ };
677
+ updateRun(runId, { dogfood_summary: dogfoodSummary });
678
+ await recordDogfoodTelemetryTool.handler({
679
+ scenarioId: args.dogfood.scenarioId,
680
+ userRole: args.dogfood.userRole ?? "founder",
681
+ primaryPrompt: args.dogfood.prompt,
682
+ packetType: args.dogfood.packetType,
683
+ attachedInputs: args.dogfood.attachedInputs,
684
+ toolsInvoked: args.dogfood.toolName ? [args.dogfood.toolName] : [],
685
+ toolCallCount: args.dogfood.toolName ? 1 : 0,
686
+ artifactsProduced: Object.keys(dogfoodOutput ?? {}),
687
+ totalLatencyMs: 0,
688
+ });
689
+ finishStage(dogfoodStepId, "completed", `Dogfood scenario ${args.dogfood.scenarioId} completed.`, dogfoodSummary);
690
+ }
691
+ else {
692
+ finishStage(dogfoodStepId, "skipped", "No dogfood scenario was supplied.", null);
693
+ }
694
+ }
695
+ catch (error) {
696
+ finishStage(dogfoodStepId, "failed", truncate(error?.message ?? "Dogfood failed"), {
697
+ error: error?.message ?? String(error),
698
+ });
699
+ if (dogfoodSessionId) {
700
+ await endDogfoodSessionTool.handler({
701
+ sessionId: dogfoodSessionId,
702
+ notes: truncate(error?.message ?? "Dogfood failed"),
703
+ });
704
+ }
705
+ throw error;
706
+ }
707
+ const judgeStepId = startStage(runId, "judge", "Judging dogfood output");
708
+ try {
709
+ if (args.dogfood && dogfoodSummary?.output) {
710
+ judgeSummary = await judgeToolOutputTool.handler({
711
+ scenarioId: args.dogfood.scenarioId,
712
+ prompt: args.dogfood.prompt,
713
+ toolName: dogfoodSummary.toolName,
714
+ result: dogfoodSummary.output,
715
+ });
716
+ if (dogfoodSessionId) {
717
+ await judgeSessionTool.handler({
718
+ sessionId: dogfoodSessionId,
719
+ ...deriveDogfoodJudgeScores(judgeSummary),
720
+ notes: judgeSummary.criteria
721
+ ?.map((criterion) => `${criterion.pass ? "PASS" : "FAIL"} ${criterion.criterion}: ${criterion.reasoning}`)
722
+ .join("\n"),
723
+ failureClasses: judgeSummary.criteria
724
+ ?.filter((criterion) => !criterion.pass)
725
+ .map((criterion) => criterion.criterion),
726
+ });
727
+ await endDogfoodSessionTool.handler({
728
+ sessionId: dogfoodSessionId,
729
+ notes: judgeSummary.verdict,
730
+ timeToFirstUsefulOutput: 0,
731
+ delegationSucceeded: judgeSummary.verdict === "PASS",
732
+ packetExported: judgeSummary.verdict === "PASS",
733
+ });
734
+ }
735
+ updateRun(runId, { judge_summary: judgeSummary });
736
+ finishStage(judgeStepId, judgeSummary.verdict === "PASS" ? "completed" : "failed", judgeSummary.verdict === "PASS"
737
+ ? `Judge passed with score ${judgeSummary.score}.`
738
+ : `Judge failed with score ${judgeSummary.score}.`, judgeSummary);
739
+ if (judgeSummary.verdict !== "PASS") {
740
+ recommendations.push(...(judgeSummary.fixSuggestions ?? []));
741
+ }
742
+ }
743
+ else {
744
+ finishStage(judgeStepId, "skipped", "No dogfood output was available to judge.", null);
745
+ }
746
+ }
747
+ catch (error) {
748
+ finishStage(judgeStepId, "failed", truncate(error?.message ?? "Judge failed"), {
749
+ error: error?.message ?? String(error),
750
+ });
751
+ throw error;
752
+ }
753
+ const verifyStepId = startStage(runId, "verify", "Running verification cycle and mandatory flywheel");
754
+ try {
755
+ const verificationCycle = (await startVerificationCycleTool.handler({
756
+ title: `Autonomous delivery: ${args.goal}`,
757
+ description: plan?.deliveryNote,
758
+ }));
759
+ verificationCycleId = verificationCycle.cycleId;
760
+ await logPhaseFindingsTool.handler({
761
+ cycleId: verificationCycleId,
762
+ phaseNumber: 1,
763
+ status: "passed",
764
+ findings: {
765
+ goal: args.goal,
766
+ researchSummary: researchSummary ?? "No structured research was provided.",
767
+ },
768
+ });
769
+ const phase2Gaps = [];
770
+ for (const failedCommand of commandResults.filter((entry) => !entry.passed)) {
771
+ phase2Gaps.push({
772
+ severity: failedCommand.phase === "compile" ||
773
+ failedCommand.phase === "lint" ||
774
+ failedCommand.phase === "test"
775
+ ? "HIGH"
776
+ : "MEDIUM",
777
+ title: `Command failed: ${failedCommand.label}`,
778
+ description: failedCommand.stderr || failedCommand.stdout || "Command failed without output.",
779
+ rootCause: `Autonomous implementation command exited with code ${failedCommand.exitCode}.`,
780
+ fixStrategy: "Fix the command or underlying code, then rerun the autonomous loop.",
781
+ });
782
+ }
783
+ for (const fixSuggestion of judgeSummary?.fixSuggestions ?? []) {
784
+ phase2Gaps.push({
785
+ severity: "MEDIUM",
786
+ title: `Judge follow-up: ${truncate(String(fixSuggestion), 90)}`,
787
+ description: String(fixSuggestion),
788
+ rootCause: "The dogfood judge detected missing quality or structure in the output.",
789
+ fixStrategy: "Apply the suggested fix, rerun dogfood, and re-judge the output.",
790
+ });
791
+ }
792
+ for (const gap of phase2Gaps) {
793
+ await logGapTool.handler({
794
+ cycleId: verificationCycleId,
795
+ severity: gap.severity,
796
+ title: gap.title,
797
+ description: gap.description,
798
+ rootCause: gap.rootCause,
799
+ fixStrategy: gap.fixStrategy,
800
+ });
801
+ }
802
+ await logPhaseFindingsTool.handler({
803
+ cycleId: verificationCycleId,
804
+ phaseNumber: 2,
805
+ status: "passed",
806
+ findings: {
807
+ gapCount: phase2Gaps.length,
808
+ gaps: phase2Gaps,
809
+ },
810
+ });
811
+ const implementationPassed = commandResults.every((entry) => entry.passed);
812
+ await logPhaseFindingsTool.handler({
813
+ cycleId: verificationCycleId,
814
+ phaseNumber: 3,
815
+ status: implementationPassed ? "passed" : "failed",
816
+ findings: {
817
+ commandCount: commandResults.length,
818
+ failedCount: commandResults.filter((entry) => !entry.passed).length,
819
+ },
820
+ });
821
+ const closedLoopSteps = mapImplementationToClosedLoopSteps(commandResults);
822
+ for (const step of closedLoopSteps) {
823
+ await logTestResultTool.handler({
824
+ cycleId: verificationCycleId,
825
+ layer: step.step === "compile" || step.step === "lint"
826
+ ? "static"
827
+ : step.step === "test"
828
+ ? "unit"
829
+ : "manual",
830
+ label: step.step,
831
+ passed: step.passed,
832
+ output: step.output,
833
+ });
834
+ }
835
+ const closedLoopResult = (await runClosedLoopTool.handler({
836
+ cycleId: verificationCycleId,
837
+ steps: closedLoopSteps,
838
+ }));
839
+ const flywheelResult = (await runMandatoryFlywheelTool.handler({
840
+ target: args.goal,
841
+ cycleId: verificationCycleId,
842
+ steps: deriveFlywheelSteps({
843
+ commandResults,
844
+ dogfoodCompleted: Boolean(args.dogfood && dogfoodSummary?.output),
845
+ judgePassed: judgeSummary?.verdict === "PASS",
846
+ researchCompleted: Boolean(researchSummary || args.research?.target),
847
+ documentationPlanned: autoRecordLearning || saveSessionNote,
848
+ }),
849
+ }));
850
+ await logPhaseFindingsTool.handler({
851
+ cycleId: verificationCycleId,
852
+ phaseNumber: 4,
853
+ status: closedLoopResult.allPassed ? "passed" : "failed",
854
+ findings: closedLoopResult,
855
+ });
856
+ if (closedLoopResult.allPassed) {
857
+ await logPhaseFindingsTool.handler({
858
+ cycleId: verificationCycleId,
859
+ phaseNumber: 5,
860
+ status: flywheelResult.passed && judgeSummary?.verdict === "PASS" ? "passed" : "failed",
861
+ findings: {
862
+ flywheel: flywheelResult,
863
+ judge: judgeSummary,
864
+ },
865
+ });
866
+ }
867
+ verificationSummary = {
868
+ cycleId: verificationCycleId,
869
+ closedLoop: closedLoopResult,
870
+ flywheel: flywheelResult,
871
+ };
872
+ updateRun(runId, { verification_summary: verificationSummary });
873
+ finishStage(verifyStepId, closedLoopResult.allPassed && flywheelResult.passed ? "completed" : "failed", closedLoopResult.allPassed && flywheelResult.passed
874
+ ? "Verification cycle and flywheel passed."
875
+ : "Verification cycle or flywheel failed.", verificationSummary);
876
+ if (!closedLoopResult.allPassed)
877
+ recommendations.push(closedLoopResult.guidance);
878
+ if (!flywheelResult.passed)
879
+ recommendations.push(flywheelResult.guidance);
880
+ }
881
+ catch (error) {
882
+ finishStage(verifyStepId, "failed", truncate(error?.message ?? "Verification failed"), {
883
+ error: error?.message ?? String(error),
884
+ });
885
+ throw error;
886
+ }
887
+ const learnStepId = startStage(runId, "learn", "Persisting learning artifacts");
888
+ const recordedLearnings = [];
889
+ const persistedNotePaths = [];
890
+ try {
891
+ if (autoRecordLearning) {
892
+ const autoKey = `autonomous-loop:${slugify(args.goal)}`;
893
+ await recordLearningTool.handler({
894
+ key: autoKey,
895
+ category: judgeSummary?.verdict === "PASS" ? "pattern" : "regression",
896
+ content: [
897
+ `Goal: ${args.goal}`,
898
+ `Run ID: ${runId}`,
899
+ `Judge: ${judgeSummary?.verdict ?? "not_run"} ${judgeSummary?.score ?? ""}`.trim(),
900
+ `Verification cycle: ${verificationCycleId ?? "not_started"}`,
901
+ `Recommendations: ${(recommendations.length > 0 ? recommendations : ["none"]).join(" | ")}`,
902
+ ].join("\n"),
903
+ tags: ["autonomous-loop", "delivery"],
904
+ sourceCycle: verificationCycleId ?? undefined,
905
+ });
906
+ recordedLearnings.push(autoKey);
907
+ }
908
+ for (const learning of args.learnings ?? []) {
909
+ await recordLearningTool.handler({
910
+ key: learning.key,
911
+ content: learning.content,
912
+ category: learning.category,
913
+ tags: learning.tags,
914
+ sourceCycle: verificationCycleId ?? undefined,
915
+ });
916
+ recordedLearnings.push(learning.key);
917
+ }
918
+ if (saveSessionNote) {
919
+ const savedNote = (await saveSessionNoteTool.handler({
920
+ title: `Autonomous delivery loop: ${args.goal}`,
921
+ category: "progress",
922
+ content: [
923
+ `Run ID: ${runId}`,
924
+ `Goal: ${args.goal}`,
925
+ `Judge verdict: ${judgeSummary?.verdict ?? "not_run"} ${judgeSummary?.score ?? ""}`.trim(),
926
+ `Verification cycle: ${verificationCycleId ?? "not_started"}`,
927
+ `Top recommendations:`,
928
+ ...(recommendations.length > 0 ? recommendations : ["- none"]),
929
+ ].join("\n"),
930
+ tags: ["autonomous-loop", "delivery"],
931
+ citedFrom: args.goal,
932
+ }));
933
+ if (savedNote?.filePath)
934
+ persistedNotePaths.push(savedNote.filePath);
935
+ }
936
+ if (verificationCycleId && verificationSummary?.closedLoop?.allPassed && verificationSummary?.flywheel?.passed) {
937
+ await logPhaseFindingsTool.handler({
938
+ cycleId: verificationCycleId,
939
+ phaseNumber: 6,
940
+ status: "passed",
941
+ findings: {
942
+ recordedLearnings,
943
+ persistedNotePaths,
944
+ },
945
+ });
946
+ }
947
+ learningSummary = { recordedLearnings, persistedNotePaths };
948
+ updateRun(runId, { learning_summary: learningSummary });
949
+ finishStage(learnStepId, "completed", "Learning artifacts persisted.", learningSummary);
950
+ }
951
+ catch (error) {
952
+ finishStage(learnStepId, "failed", truncate(error?.message ?? "Learning persistence failed"), {
953
+ error: error?.message ?? String(error),
954
+ });
955
+ throw error;
956
+ }
957
+ const finalStatus = verificationSummary?.closedLoop?.allPassed &&
958
+ verificationSummary?.flywheel?.passed &&
959
+ judgeSummary?.verdict === "PASS"
960
+ ? "completed"
961
+ : judgeSummary?.verdict === "PASS" && commandResults.every((entry) => entry.passed)
962
+ ? "needs_attention"
963
+ : "failed";
964
+ const summary = finalStatus === "completed"
965
+ ? `Autonomous delivery loop completed for "${args.goal}".`
966
+ : `Autonomous delivery loop finished with status "${finalStatus}" for "${args.goal}".`;
967
+ completeRun(runId, finalStatus, summary);
968
+ return {
969
+ runId,
970
+ status: finalStatus,
971
+ summary,
972
+ researchSummary,
973
+ plan,
974
+ implementation: {
975
+ commandCount: commandResults.length,
976
+ failedCount: commandResults.filter((entry) => !entry.passed).length,
977
+ commands: commandResults,
978
+ },
979
+ dogfood: dogfoodSummary,
980
+ judge: judgeSummary,
981
+ verification: verificationSummary,
982
+ learning: learningSummary,
983
+ recommendations,
984
+ };
985
+ }
986
+ catch (error) {
987
+ const summary = `Autonomous delivery loop failed for "${args.goal}": ${error?.message ?? String(error)}`;
988
+ completeRun(runId, "failed", summary);
989
+ return {
990
+ runId,
991
+ status: "failed",
992
+ summary,
993
+ recommendations,
994
+ };
995
+ }
996
+ },
997
+ },
998
+ {
999
+ name: "get_self_directed_delivery_run",
1000
+ description: "Load a previously recorded autonomous delivery run with all stage receipts, summaries, and final recommendations.",
1001
+ inputSchema: {
1002
+ type: "object",
1003
+ properties: {
1004
+ runId: {
1005
+ type: "string",
1006
+ description: "Autonomous delivery run ID.",
1007
+ },
1008
+ },
1009
+ required: ["runId"],
1010
+ },
1011
+ annotations: { readOnlyHint: true },
1012
+ handler: async (args) => {
1013
+ ensureSchema();
1014
+ const db = getDb();
1015
+ const run = db.prepare(`SELECT * FROM autonomous_delivery_runs WHERE run_id = ?`).get(args.runId);
1016
+ if (!run) {
1017
+ return {
1018
+ error: true,
1019
+ message: `Autonomous delivery run not found: ${args.runId}`,
1020
+ };
1021
+ }
1022
+ const steps = db
1023
+ .prepare(`
1024
+ SELECT * FROM autonomous_delivery_steps
1025
+ WHERE run_id = ?
1026
+ ORDER BY started_at ASC
1027
+ `)
1028
+ .all(args.runId);
1029
+ const parse = (value) => {
1030
+ if (!value)
1031
+ return null;
1032
+ try {
1033
+ return JSON.parse(value);
1034
+ }
1035
+ catch {
1036
+ return value;
1037
+ }
1038
+ };
1039
+ return {
1040
+ runId: run.run_id,
1041
+ goal: run.goal,
1042
+ status: run.status,
1043
+ summary: run.summary,
1044
+ createdAt: run.created_at,
1045
+ completedAt: run.completed_at,
1046
+ researchSummary: parse(run.research_summary),
1047
+ plan: parse(run.plan_json),
1048
+ implementationSummary: parse(run.implementation_summary),
1049
+ dogfoodSummary: parse(run.dogfood_summary),
1050
+ judgeSummary: parse(run.judge_summary),
1051
+ verificationSummary: parse(run.verification_summary),
1052
+ learningSummary: parse(run.learning_summary),
1053
+ steps: steps.map((step) => ({
1054
+ stepId: step.step_id,
1055
+ stage: step.stage,
1056
+ status: step.status,
1057
+ summary: step.summary,
1058
+ startedAt: step.started_at,
1059
+ completedAt: step.completed_at,
1060
+ details: parse(step.details_json),
1061
+ })),
1062
+ };
1063
+ },
1064
+ },
1065
+ {
1066
+ name: "list_self_directed_delivery_runs",
1067
+ description: "List recent autonomous delivery runs so operators can reopen or compare them.",
1068
+ inputSchema: {
1069
+ type: "object",
1070
+ properties: {
1071
+ limit: {
1072
+ type: "number",
1073
+ description: "Maximum number of runs to return. Default: 20.",
1074
+ },
1075
+ },
1076
+ },
1077
+ annotations: { readOnlyHint: true },
1078
+ handler: async (args) => {
1079
+ ensureSchema();
1080
+ const db = getDb();
1081
+ const limit = Math.max(1, Math.min(100, Math.floor(args.limit ?? 20)));
1082
+ const runs = db
1083
+ .prepare(`
1084
+ SELECT run_id, goal, status, summary, created_at, completed_at
1085
+ FROM autonomous_delivery_runs
1086
+ ORDER BY created_at DESC
1087
+ LIMIT ?
1088
+ `)
1089
+ .all(limit);
1090
+ return {
1091
+ count: runs.length,
1092
+ runs: runs.map((run) => ({
1093
+ runId: run.run_id,
1094
+ goal: run.goal,
1095
+ status: run.status,
1096
+ summary: run.summary,
1097
+ createdAt: run.created_at,
1098
+ completedAt: run.completed_at,
1099
+ })),
1100
+ };
1101
+ },
1102
+ },
1103
+ ];
1104
+ //# sourceMappingURL=autonomousDeliveryTools.js.map