@build-astron-co/nimbus 0.4.1 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (435) hide show
  1. package/CHANGELOG.md +268 -89
  2. package/README.md +26 -567
  3. package/dist/src/agent/compaction-agent.js +24 -12
  4. package/dist/src/agent/context-manager.js +2 -1
  5. package/dist/src/agent/expand-files.js +2 -1
  6. package/dist/src/agent/loop.js +71 -33
  7. package/dist/src/agent/permissions.js +4 -2
  8. package/dist/src/agent/system-prompt.js +34 -17
  9. package/dist/src/app.js +1 -1
  10. package/dist/src/auth/keychain.js +8 -4
  11. package/dist/src/auth/store.js +70 -107
  12. package/dist/src/cli/init.js +35 -19
  13. package/dist/src/cli/run.js +18 -10
  14. package/dist/src/cli/serve.js +4 -2
  15. package/dist/src/cli.js +52 -11
  16. package/dist/src/commands/alias.js +5 -3
  17. package/dist/src/commands/audit/index.js +2 -1
  18. package/dist/src/commands/aws-terraform.js +36 -18
  19. package/dist/src/commands/completions.js +1 -1
  20. package/dist/src/commands/config.js +3 -2
  21. package/dist/src/commands/connect-github.js +92 -0
  22. package/dist/src/commands/cost/index.js +3 -2
  23. package/dist/src/commands/deploy.js +15 -10
  24. package/dist/src/commands/doctor.js +9 -6
  25. package/dist/src/commands/drift/index.js +2 -1
  26. package/dist/src/commands/export.js +5 -3
  27. package/dist/src/commands/generate-terraform.js +110 -2
  28. package/dist/src/commands/import.js +3 -3
  29. package/dist/src/commands/incident.js +10 -5
  30. package/dist/src/commands/login.js +8 -93
  31. package/dist/src/commands/logs.js +16 -8
  32. package/dist/src/commands/onboarding.js +6 -4
  33. package/dist/src/commands/pipeline.js +6 -3
  34. package/dist/src/commands/plugin.js +3 -2
  35. package/dist/src/commands/profile.js +27 -14
  36. package/dist/src/commands/questionnaire.js +1 -1
  37. package/dist/src/commands/rollback.js +3 -2
  38. package/dist/src/commands/rollout.js +5 -3
  39. package/dist/src/commands/runbook.js +17 -10
  40. package/dist/src/commands/schedule.js +10 -5
  41. package/dist/src/commands/status.js +2 -1
  42. package/dist/src/commands/team-context.js +12 -7
  43. package/dist/src/commands/template.js +1 -1
  44. package/dist/src/commands/tf/index.js +6 -3
  45. package/dist/src/commands/upgrade.js +5 -3
  46. package/dist/src/commands/version.js +6 -3
  47. package/dist/src/commands/watch.js +6 -3
  48. package/dist/src/compat/sqlite.js +5 -3
  49. package/dist/src/config/mode-store.js +2 -1
  50. package/dist/src/config/profiles.js +4 -2
  51. package/dist/src/config/types.js +2 -1
  52. package/dist/src/engine/executor.js +8 -4
  53. package/dist/src/engine/planner.js +9 -5
  54. package/dist/src/llm/providers/anthropic.js +6 -3
  55. package/dist/src/llm/providers/ollama.js +1 -1
  56. package/dist/src/llm/router.js +22 -7
  57. package/dist/src/nimbus.js +1 -0
  58. package/dist/src/sessions/manager.js +6 -3
  59. package/dist/src/sharing/viewer.js +2 -1
  60. package/dist/src/tools/file-ops.js +1 -2
  61. package/dist/src/tools/schemas/devops.js +197 -108
  62. package/dist/src/tools/schemas/standard.js +1 -1
  63. package/dist/src/ui/App.js +25 -13
  64. package/dist/src/ui/FileDiffModal.js +22 -11
  65. package/dist/src/ui/HelpModal.js +2 -1
  66. package/dist/src/ui/InputBox.js +6 -3
  67. package/dist/src/ui/MessageList.js +40 -20
  68. package/dist/src/ui/TerminalPane.js +2 -1
  69. package/dist/src/ui/ToolCallDisplay.js +12 -6
  70. package/dist/src/ui/TreePane.js +2 -1
  71. package/dist/src/ui/ink/index.js +37 -21
  72. package/dist/src/version.js +1 -1
  73. package/dist/src/watcher/index.js +8 -4
  74. package/package.json +3 -5
  75. package/src/__tests__/alias.test.ts +0 -133
  76. package/src/__tests__/app.test.ts +0 -76
  77. package/src/__tests__/audit.test.ts +0 -877
  78. package/src/__tests__/circuit-breaker.test.ts +0 -116
  79. package/src/__tests__/cli-run.test.ts +0 -351
  80. package/src/__tests__/compat-sqlite.test.ts +0 -68
  81. package/src/__tests__/context-manager.test.ts +0 -632
  82. package/src/__tests__/context.test.ts +0 -242
  83. package/src/__tests__/devops-terminal-gaps.test.ts +0 -718
  84. package/src/__tests__/doctor.test.ts +0 -48
  85. package/src/__tests__/enterprise.test.ts +0 -401
  86. package/src/__tests__/export.test.ts +0 -236
  87. package/src/__tests__/gap-11-18-20.test.ts +0 -958
  88. package/src/__tests__/generator.test.ts +0 -433
  89. package/src/__tests__/helm-streaming.test.ts +0 -127
  90. package/src/__tests__/hooks.test.ts +0 -582
  91. package/src/__tests__/incident.test.ts +0 -179
  92. package/src/__tests__/init.test.ts +0 -487
  93. package/src/__tests__/intent-parser.test.ts +0 -229
  94. package/src/__tests__/llm-router.test.ts +0 -209
  95. package/src/__tests__/logs.test.ts +0 -107
  96. package/src/__tests__/loop-errors.test.ts +0 -244
  97. package/src/__tests__/lsp.test.ts +0 -293
  98. package/src/__tests__/modes.test.ts +0 -336
  99. package/src/__tests__/perf-optimizations.test.ts +0 -847
  100. package/src/__tests__/permissions.test.ts +0 -338
  101. package/src/__tests__/pipeline.test.ts +0 -50
  102. package/src/__tests__/polish-phase3.test.ts +0 -340
  103. package/src/__tests__/profile.test.ts +0 -237
  104. package/src/__tests__/rollback.test.ts +0 -83
  105. package/src/__tests__/runbook.test.ts +0 -219
  106. package/src/__tests__/schedule.test.ts +0 -206
  107. package/src/__tests__/serve.test.ts +0 -275
  108. package/src/__tests__/sessions.test.ts +0 -322
  109. package/src/__tests__/sharing.test.ts +0 -340
  110. package/src/__tests__/snapshots.test.ts +0 -581
  111. package/src/__tests__/standalone-migration.test.ts +0 -199
  112. package/src/__tests__/state-db.test.ts +0 -334
  113. package/src/__tests__/status.test.ts +0 -158
  114. package/src/__tests__/stream-with-tools.test.ts +0 -778
  115. package/src/__tests__/subagents.test.ts +0 -176
  116. package/src/__tests__/system-prompt.test.ts +0 -248
  117. package/src/__tests__/terminal-gap-v2.test.ts +0 -395
  118. package/src/__tests__/terminal-parity.test.ts +0 -393
  119. package/src/__tests__/tf-apply.test.ts +0 -187
  120. package/src/__tests__/tool-converter.test.ts +0 -256
  121. package/src/__tests__/tool-schemas.test.ts +0 -602
  122. package/src/__tests__/tools.test.ts +0 -144
  123. package/src/__tests__/version-json.test.ts +0 -184
  124. package/src/__tests__/version.test.ts +0 -49
  125. package/src/__tests__/watch.test.ts +0 -129
  126. package/src/agent/compaction-agent.ts +0 -266
  127. package/src/agent/context-manager.ts +0 -499
  128. package/src/agent/context.ts +0 -427
  129. package/src/agent/deploy-preview.ts +0 -487
  130. package/src/agent/expand-files.ts +0 -108
  131. package/src/agent/index.ts +0 -68
  132. package/src/agent/loop.ts +0 -1998
  133. package/src/agent/modes.ts +0 -429
  134. package/src/agent/permissions.ts +0 -513
  135. package/src/agent/subagents/base.ts +0 -116
  136. package/src/agent/subagents/cost.ts +0 -51
  137. package/src/agent/subagents/explore.ts +0 -42
  138. package/src/agent/subagents/general.ts +0 -54
  139. package/src/agent/subagents/index.ts +0 -102
  140. package/src/agent/subagents/infra.ts +0 -59
  141. package/src/agent/subagents/security.ts +0 -69
  142. package/src/agent/system-prompt.ts +0 -990
  143. package/src/app.ts +0 -180
  144. package/src/audit/activity-log.ts +0 -290
  145. package/src/audit/compliance-checker.ts +0 -540
  146. package/src/audit/cost-tracker.ts +0 -318
  147. package/src/audit/index.ts +0 -23
  148. package/src/audit/security-scanner.ts +0 -641
  149. package/src/auth/guard.ts +0 -75
  150. package/src/auth/index.ts +0 -56
  151. package/src/auth/keychain.ts +0 -82
  152. package/src/auth/oauth.ts +0 -465
  153. package/src/auth/providers.ts +0 -470
  154. package/src/auth/sso.ts +0 -113
  155. package/src/auth/store.ts +0 -505
  156. package/src/auth/types.ts +0 -187
  157. package/src/build.ts +0 -141
  158. package/src/cli/index.ts +0 -16
  159. package/src/cli/init.ts +0 -1227
  160. package/src/cli/openapi-spec.ts +0 -356
  161. package/src/cli/run.ts +0 -628
  162. package/src/cli/serve-auth.ts +0 -80
  163. package/src/cli/serve.ts +0 -539
  164. package/src/cli/web.ts +0 -71
  165. package/src/cli.ts +0 -1728
  166. package/src/clients/core-engine-client.ts +0 -227
  167. package/src/clients/enterprise-client.ts +0 -334
  168. package/src/clients/generator-client.ts +0 -351
  169. package/src/clients/git-client.ts +0 -627
  170. package/src/clients/github-client.ts +0 -410
  171. package/src/clients/helm-client.ts +0 -504
  172. package/src/clients/index.ts +0 -80
  173. package/src/clients/k8s-client.ts +0 -497
  174. package/src/clients/llm-client.ts +0 -161
  175. package/src/clients/rest-client.ts +0 -130
  176. package/src/clients/service-discovery.ts +0 -38
  177. package/src/clients/terraform-client.ts +0 -482
  178. package/src/clients/tools-client.ts +0 -1843
  179. package/src/clients/ws-client.ts +0 -115
  180. package/src/commands/alias.ts +0 -100
  181. package/src/commands/analyze/index.ts +0 -352
  182. package/src/commands/apply/helm.ts +0 -473
  183. package/src/commands/apply/index.ts +0 -213
  184. package/src/commands/apply/k8s.ts +0 -454
  185. package/src/commands/apply/terraform.ts +0 -582
  186. package/src/commands/ask.ts +0 -167
  187. package/src/commands/audit/index.ts +0 -357
  188. package/src/commands/auth-cloud.ts +0 -407
  189. package/src/commands/auth-list.ts +0 -134
  190. package/src/commands/auth-profile.ts +0 -121
  191. package/src/commands/auth-refresh.ts +0 -187
  192. package/src/commands/auth-status.ts +0 -141
  193. package/src/commands/aws/ec2.ts +0 -501
  194. package/src/commands/aws/iam.ts +0 -397
  195. package/src/commands/aws/index.ts +0 -133
  196. package/src/commands/aws/lambda.ts +0 -396
  197. package/src/commands/aws/rds.ts +0 -439
  198. package/src/commands/aws/s3.ts +0 -439
  199. package/src/commands/aws/vpc.ts +0 -393
  200. package/src/commands/aws-discover.ts +0 -542
  201. package/src/commands/aws-terraform.ts +0 -755
  202. package/src/commands/azure/aks.ts +0 -376
  203. package/src/commands/azure/functions.ts +0 -253
  204. package/src/commands/azure/index.ts +0 -116
  205. package/src/commands/azure/storage.ts +0 -478
  206. package/src/commands/azure/vm.ts +0 -355
  207. package/src/commands/billing/index.ts +0 -256
  208. package/src/commands/chat.ts +0 -320
  209. package/src/commands/completions.ts +0 -268
  210. package/src/commands/config.ts +0 -372
  211. package/src/commands/cost/cloud-cost-estimator.ts +0 -266
  212. package/src/commands/cost/estimator.ts +0 -79
  213. package/src/commands/cost/index.ts +0 -810
  214. package/src/commands/cost/parsers/terraform.ts +0 -273
  215. package/src/commands/cost/parsers/types.ts +0 -25
  216. package/src/commands/cost/pricing/aws.ts +0 -544
  217. package/src/commands/cost/pricing/azure.ts +0 -499
  218. package/src/commands/cost/pricing/gcp.ts +0 -396
  219. package/src/commands/cost/pricing/index.ts +0 -40
  220. package/src/commands/demo.ts +0 -250
  221. package/src/commands/deploy.ts +0 -260
  222. package/src/commands/doctor.ts +0 -1386
  223. package/src/commands/drift/index.ts +0 -787
  224. package/src/commands/explain.ts +0 -277
  225. package/src/commands/export.ts +0 -146
  226. package/src/commands/feedback.ts +0 -389
  227. package/src/commands/fix.ts +0 -324
  228. package/src/commands/fs/index.ts +0 -402
  229. package/src/commands/gcp/compute.ts +0 -325
  230. package/src/commands/gcp/functions.ts +0 -271
  231. package/src/commands/gcp/gke.ts +0 -438
  232. package/src/commands/gcp/iam.ts +0 -344
  233. package/src/commands/gcp/index.ts +0 -129
  234. package/src/commands/gcp/storage.ts +0 -284
  235. package/src/commands/generate-helm.ts +0 -1249
  236. package/src/commands/generate-k8s.ts +0 -1508
  237. package/src/commands/generate-terraform.ts +0 -1202
  238. package/src/commands/gh/index.ts +0 -863
  239. package/src/commands/git/index.ts +0 -1343
  240. package/src/commands/helm/index.ts +0 -1126
  241. package/src/commands/help.ts +0 -715
  242. package/src/commands/history.ts +0 -149
  243. package/src/commands/import.ts +0 -868
  244. package/src/commands/incident.ts +0 -166
  245. package/src/commands/index.ts +0 -367
  246. package/src/commands/init.ts +0 -1051
  247. package/src/commands/k8s/index.ts +0 -1137
  248. package/src/commands/login.ts +0 -716
  249. package/src/commands/logout.ts +0 -83
  250. package/src/commands/logs.ts +0 -167
  251. package/src/commands/onboarding.ts +0 -405
  252. package/src/commands/pipeline.ts +0 -186
  253. package/src/commands/plan/display.ts +0 -279
  254. package/src/commands/plan/index.ts +0 -599
  255. package/src/commands/plugin.ts +0 -398
  256. package/src/commands/preview.ts +0 -452
  257. package/src/commands/profile.ts +0 -342
  258. package/src/commands/questionnaire.ts +0 -1172
  259. package/src/commands/resume.ts +0 -47
  260. package/src/commands/rollback.ts +0 -315
  261. package/src/commands/rollout.ts +0 -88
  262. package/src/commands/runbook.ts +0 -346
  263. package/src/commands/schedule.ts +0 -236
  264. package/src/commands/status.ts +0 -252
  265. package/src/commands/team/index.ts +0 -346
  266. package/src/commands/team-context.ts +0 -220
  267. package/src/commands/template.ts +0 -233
  268. package/src/commands/tf/index.ts +0 -1093
  269. package/src/commands/upgrade.ts +0 -607
  270. package/src/commands/usage/index.ts +0 -134
  271. package/src/commands/version.ts +0 -174
  272. package/src/commands/watch.ts +0 -153
  273. package/src/compat/index.ts +0 -2
  274. package/src/compat/runtime.ts +0 -12
  275. package/src/compat/sqlite.ts +0 -177
  276. package/src/config/index.ts +0 -17
  277. package/src/config/manager.ts +0 -530
  278. package/src/config/mode-store.ts +0 -62
  279. package/src/config/profiles.ts +0 -84
  280. package/src/config/safety-policy.ts +0 -358
  281. package/src/config/schema.ts +0 -125
  282. package/src/config/types.ts +0 -609
  283. package/src/config/workspace-state.ts +0 -53
  284. package/src/context/context-db.ts +0 -199
  285. package/src/demo/index.ts +0 -349
  286. package/src/demo/scenarios/full-journey.ts +0 -229
  287. package/src/demo/scenarios/getting-started.ts +0 -127
  288. package/src/demo/scenarios/helm-release.ts +0 -341
  289. package/src/demo/scenarios/k8s-deployment.ts +0 -194
  290. package/src/demo/scenarios/terraform-vpc.ts +0 -170
  291. package/src/demo/types.ts +0 -92
  292. package/src/engine/cost-estimator.ts +0 -480
  293. package/src/engine/diagram-generator.ts +0 -256
  294. package/src/engine/drift-detector.ts +0 -902
  295. package/src/engine/executor.ts +0 -1066
  296. package/src/engine/index.ts +0 -76
  297. package/src/engine/orchestrator.ts +0 -636
  298. package/src/engine/planner.ts +0 -787
  299. package/src/engine/safety.ts +0 -743
  300. package/src/engine/verifier.ts +0 -770
  301. package/src/enterprise/audit.ts +0 -348
  302. package/src/enterprise/auth.ts +0 -270
  303. package/src/enterprise/billing.ts +0 -822
  304. package/src/enterprise/index.ts +0 -17
  305. package/src/enterprise/teams.ts +0 -443
  306. package/src/generator/best-practices.ts +0 -1608
  307. package/src/generator/helm.ts +0 -630
  308. package/src/generator/index.ts +0 -37
  309. package/src/generator/intent-parser.ts +0 -514
  310. package/src/generator/kubernetes.ts +0 -976
  311. package/src/generator/terraform.ts +0 -1875
  312. package/src/history/index.ts +0 -8
  313. package/src/history/manager.ts +0 -250
  314. package/src/history/types.ts +0 -34
  315. package/src/hooks/config.ts +0 -432
  316. package/src/hooks/engine.ts +0 -392
  317. package/src/hooks/index.ts +0 -4
  318. package/src/llm/auth-bridge.ts +0 -198
  319. package/src/llm/circuit-breaker.ts +0 -140
  320. package/src/llm/config-loader.ts +0 -201
  321. package/src/llm/cost-calculator.ts +0 -171
  322. package/src/llm/index.ts +0 -8
  323. package/src/llm/model-aliases.ts +0 -115
  324. package/src/llm/provider-registry.ts +0 -63
  325. package/src/llm/providers/anthropic.ts +0 -462
  326. package/src/llm/providers/bedrock.ts +0 -477
  327. package/src/llm/providers/google.ts +0 -405
  328. package/src/llm/providers/ollama.ts +0 -767
  329. package/src/llm/providers/openai-compatible.ts +0 -340
  330. package/src/llm/providers/openai.ts +0 -328
  331. package/src/llm/providers/openrouter.ts +0 -338
  332. package/src/llm/router.ts +0 -1104
  333. package/src/llm/types.ts +0 -232
  334. package/src/lsp/client.ts +0 -298
  335. package/src/lsp/languages.ts +0 -119
  336. package/src/lsp/manager.ts +0 -294
  337. package/src/mcp/client.ts +0 -402
  338. package/src/mcp/index.ts +0 -5
  339. package/src/mcp/manager.ts +0 -133
  340. package/src/nimbus.ts +0 -233
  341. package/src/plugins/index.ts +0 -27
  342. package/src/plugins/loader.ts +0 -334
  343. package/src/plugins/manager.ts +0 -376
  344. package/src/plugins/types.ts +0 -284
  345. package/src/scanners/cicd-scanner.ts +0 -258
  346. package/src/scanners/cloud-scanner.ts +0 -466
  347. package/src/scanners/framework-scanner.ts +0 -469
  348. package/src/scanners/iac-scanner.ts +0 -388
  349. package/src/scanners/index.ts +0 -539
  350. package/src/scanners/language-scanner.ts +0 -276
  351. package/src/scanners/package-manager-scanner.ts +0 -277
  352. package/src/scanners/types.ts +0 -172
  353. package/src/sessions/manager.ts +0 -472
  354. package/src/sessions/types.ts +0 -44
  355. package/src/sharing/sync.ts +0 -300
  356. package/src/sharing/viewer.ts +0 -163
  357. package/src/snapshots/index.ts +0 -2
  358. package/src/snapshots/manager.ts +0 -530
  359. package/src/state/artifacts.ts +0 -147
  360. package/src/state/audit.ts +0 -137
  361. package/src/state/billing.ts +0 -240
  362. package/src/state/checkpoints.ts +0 -117
  363. package/src/state/config.ts +0 -67
  364. package/src/state/conversations.ts +0 -14
  365. package/src/state/credentials.ts +0 -154
  366. package/src/state/db.ts +0 -58
  367. package/src/state/index.ts +0 -26
  368. package/src/state/messages.ts +0 -115
  369. package/src/state/projects.ts +0 -123
  370. package/src/state/schema.ts +0 -236
  371. package/src/state/sessions.ts +0 -147
  372. package/src/state/teams.ts +0 -200
  373. package/src/telemetry.ts +0 -108
  374. package/src/tools/aws-ops.ts +0 -952
  375. package/src/tools/azure-ops.ts +0 -579
  376. package/src/tools/file-ops.ts +0 -615
  377. package/src/tools/gcp-ops.ts +0 -625
  378. package/src/tools/git-ops.ts +0 -773
  379. package/src/tools/github-ops.ts +0 -799
  380. package/src/tools/helm-ops.ts +0 -943
  381. package/src/tools/index.ts +0 -17
  382. package/src/tools/k8s-ops.ts +0 -819
  383. package/src/tools/schemas/converter.ts +0 -184
  384. package/src/tools/schemas/devops.ts +0 -3502
  385. package/src/tools/schemas/index.ts +0 -73
  386. package/src/tools/schemas/standard.ts +0 -1148
  387. package/src/tools/schemas/types.ts +0 -735
  388. package/src/tools/spawn-exec.ts +0 -148
  389. package/src/tools/terraform-ops.ts +0 -862
  390. package/src/types/ambient.d.ts +0 -193
  391. package/src/types/config.ts +0 -83
  392. package/src/types/drift.ts +0 -116
  393. package/src/types/enterprise.ts +0 -335
  394. package/src/types/index.ts +0 -20
  395. package/src/types/plan.ts +0 -44
  396. package/src/types/request.ts +0 -65
  397. package/src/types/response.ts +0 -54
  398. package/src/types/service.ts +0 -51
  399. package/src/ui/App.tsx +0 -2114
  400. package/src/ui/DeployPreview.tsx +0 -174
  401. package/src/ui/FileDiffModal.tsx +0 -162
  402. package/src/ui/Header.tsx +0 -131
  403. package/src/ui/HelpModal.tsx +0 -57
  404. package/src/ui/InputBox.tsx +0 -503
  405. package/src/ui/MessageList.tsx +0 -1032
  406. package/src/ui/PermissionPrompt.tsx +0 -163
  407. package/src/ui/StatusBar.tsx +0 -277
  408. package/src/ui/TerminalPane.tsx +0 -84
  409. package/src/ui/ToolCallDisplay.tsx +0 -643
  410. package/src/ui/TreePane.tsx +0 -132
  411. package/src/ui/chat-ui.ts +0 -850
  412. package/src/ui/index.ts +0 -33
  413. package/src/ui/ink/index.ts +0 -1444
  414. package/src/ui/streaming.ts +0 -176
  415. package/src/ui/theme.ts +0 -104
  416. package/src/ui/types.ts +0 -75
  417. package/src/utils/analytics.ts +0 -72
  418. package/src/utils/cost-warning.ts +0 -27
  419. package/src/utils/env.ts +0 -46
  420. package/src/utils/errors.ts +0 -69
  421. package/src/utils/event-bus.ts +0 -38
  422. package/src/utils/index.ts +0 -24
  423. package/src/utils/logger.ts +0 -171
  424. package/src/utils/rate-limiter.ts +0 -121
  425. package/src/utils/service-auth.ts +0 -49
  426. package/src/utils/validation.ts +0 -53
  427. package/src/version.ts +0 -4
  428. package/src/watcher/index.ts +0 -214
  429. package/src/wizard/approval.ts +0 -383
  430. package/src/wizard/index.ts +0 -25
  431. package/src/wizard/prompts.ts +0 -338
  432. package/src/wizard/types.ts +0 -172
  433. package/src/wizard/ui.ts +0 -556
  434. package/src/wizard/wizard.ts +0 -304
  435. package/tsconfig.json +0 -24
package/src/agent/loop.ts DELETED
@@ -1,1998 +0,0 @@
1
- /**
2
- * Core Agentic Loop
3
- *
4
- * Implements the autonomous agent loop:
5
- * 1. Build context (system prompt + history + tools)
6
- * 2. Send to LLM with tools enabled
7
- * 3. Stream text response
8
- * 4. If tool_use: check permissions → execute → collect results
9
- * 5. Append messages → loop back to LLM
10
- * 6. Exit when LLM returns end_turn (no more tool calls)
11
- *
12
- * This is the heart of the Nimbus agent. Every user message enters
13
- * {@link runAgentLoop}, which orchestrates a multi-turn conversation with
14
- * the LLM, executing tools on its behalf until it signals completion by
15
- * returning a response with no further tool calls.
16
- *
17
- * @module agent/loop
18
- */
19
-
20
- import { join } from 'node:path';
21
- import type { LLMRouter } from '../llm/router';
22
- import type {
23
- LLMMessage,
24
- ToolCall,
25
- ToolCompletionRequest,
26
- ToolDefinition as LLMToolDefinition,
27
- } from '../llm/types';
28
- import {
29
- toOpenAITool,
30
- type ToolDefinition,
31
- type ToolExecuteContext,
32
- type ToolResult,
33
- type ToolRegistry,
34
- } from '../tools/schemas/types';
35
- import { buildSystemPrompt, type AgentMode } from './system-prompt';
36
- import type { ContextManager, CompactionResult } from './context-manager';
37
- import { runCompaction } from './compaction-agent';
38
- import type { LSPManager } from '../lsp/manager';
39
- import { SnapshotManager } from '../snapshots/manager';
40
- import { calculateCost } from '../llm/cost-calculator';
41
- import {
42
- HookEngine,
43
- runPreToolHooks,
44
- runPostToolHooks,
45
- type HookContext,
46
- } from '../hooks/engine';
47
- import { maskSecrets } from '../audit/security-scanner';
48
- import { classifyTaskComplexity, routeModel } from '../llm/router';
49
- import { mkdirSync as _cpMkdirSync, writeFileSync as _cpWriteFileSync } from 'node:fs';
50
- import { homedir as _cpHomedir } from 'node:os';
51
-
52
- // ---------------------------------------------------------------------------
53
- // C2: Infra state checkpoint helper
54
- // ---------------------------------------------------------------------------
55
-
56
- /**
57
- * Write a checkpoint JSON file to ~/.nimbus/infra-checkpoints/<timestamp>.json
58
- * before a mutating terraform or helm operation. Non-blocking — errors are swallowed.
59
- */
60
- function writeInfraCheckpoint(tool: string, action: string, input: Record<string, unknown>): void {
61
- try {
62
- const checkpointsDir = join(_cpHomedir(), '.nimbus', 'infra-checkpoints');
63
- _cpMkdirSync(checkpointsDir, { recursive: true });
64
- // Sanitize: remove any field that looks like a secret
65
- const sanitized: Record<string, unknown> = {};
66
- for (const [k, v] of Object.entries(input)) {
67
- const lower = k.toLowerCase();
68
- if (lower.includes('secret') || lower.includes('password') || lower.includes('token') || lower.includes('key')) {
69
- sanitized[k] = '[redacted]';
70
- } else {
71
- sanitized[k] = v;
72
- }
73
- }
74
- const timestamp = new Date().toISOString();
75
- const checkpoint = {
76
- timestamp,
77
- tool,
78
- action,
79
- input: sanitized,
80
- cwd: process.cwd(),
81
- workdir: (input.workdir as string | undefined) ?? undefined,
82
- };
83
- const fileName = timestamp.replace(/[:.]/g, '-') + '.json';
84
- _cpWriteFileSync(
85
- join(checkpointsDir, fileName),
86
- JSON.stringify(checkpoint, null, 2),
87
- 'utf-8'
88
- );
89
- } catch { /* non-critical */ }
90
- }
91
-
92
- // ---------------------------------------------------------------------------
93
- // Helpers
94
- // ---------------------------------------------------------------------------
95
-
96
- // ---------------------------------------------------------------------------
97
- // Module-level compiled regex constants for classifyDevOpsError (PERF-1d).
98
- // Hoisted here so they compile once at module load rather than per-call.
99
- // ---------------------------------------------------------------------------
100
-
101
- const _RE_CREDENTIAL_EXPIRY_AWS = /ExpiredTokenException|TokenExpiredException|token.*has.*expired/i;
102
- const _RE_CREDENTIAL_EXPIRY_GCP = /credentials.*expired|Application Default Credentials.*expired|re-authenticate/i;
103
- const _RE_CREDENTIAL_EXPIRY_AZURE = /AADSTS70008|InteractionRequired|credential.*expired/i;
104
- const _RE_CMD_NOT_FOUND = /command not found|not found|no such file or directory/i;
105
-
106
- /**
107
- * Classify a DevOps tool error and return an actionable hint for the LLM.
108
- * Returns null for unrecognized errors so we don't pollute the context.
109
- */
110
- function classifyDevOpsError(toolName: string, errorOutput: string, nimbusInstructions?: string): string | null {
111
- const e = errorOutput.toLowerCase();
112
-
113
- // GAP-13: Credential expiry patterns — must come first for fast matching
114
- const CREDENTIAL_EXPIRY = [
115
- { re: _RE_CREDENTIAL_EXPIRY_AWS, provider: 'aws' },
116
- { re: _RE_CREDENTIAL_EXPIRY_GCP, provider: 'gcp' },
117
- { re: _RE_CREDENTIAL_EXPIRY_AZURE, provider: 'azure' },
118
- ];
119
- for (const { re, provider } of CREDENTIAL_EXPIRY) {
120
- if (re.test(errorOutput)) {
121
- return `Your ${provider.toUpperCase()} credentials have expired.\n\nRun: \`nimbus auth-refresh --provider ${provider}\` to refresh them.`;
122
- }
123
- }
124
-
125
- // G3: "command not found" — provide installation hints for DevOps CLIs
126
- const INSTALL_HINTS: Record<string, string> = {
127
- terraform: 'brew install terraform OR https://developer.hashicorp.com/terraform/install',
128
- kubectl: 'brew install kubectl OR https://kubernetes.io/docs/tasks/tools/',
129
- helm: 'brew install helm OR https://helm.sh/docs/intro/install/',
130
- docker: 'brew install --cask docker OR https://docs.docker.com/get-docker/',
131
- aws: 'brew install awscli OR pip install awscli',
132
- gcloud: 'brew install --cask google-cloud-sdk',
133
- az: 'brew install azure-cli',
134
- };
135
- if (_RE_CMD_NOT_FOUND.test(errorOutput)) {
136
- for (const [cmd, hint] of Object.entries(INSTALL_HINTS)) {
137
- if (toolName.includes(cmd) || e.includes(`'${cmd}'`) || e.includes(`"${cmd}"`)) {
138
- return `\`${cmd}\` is not installed.\n\nInstall: ${hint}`;
139
- }
140
- }
141
- }
142
-
143
- // Terraform errors
144
- if (toolName === 'terraform' || e.includes('terraform')) {
145
- if (e.includes('no such file or directory') && e.includes('.terraform')) {
146
- return 'HINT: Run `terraform init` first — the .terraform directory is missing.';
147
- }
148
- if (e.includes('provider') && e.includes('required') && e.includes('terraform')) {
149
- return 'HINT: Run `terraform init -upgrade` to download or upgrade required providers.';
150
- }
151
- if (e.includes('no valid credential') || e.includes('no credentials')) {
152
- return 'HINT: AWS/cloud credentials are missing. Check `aws configure` or environment variables.';
153
- }
154
- if (e.includes('state lock') || e.includes('lock file')) {
155
- return 'HINT: Terraform state is locked. If no other operation is running, use `terraform force-unlock <lock-id>`.';
156
- }
157
- if (e.includes('module not installed') || e.includes('module source')) {
158
- return 'HINT: Run `terraform init` to install required modules.';
159
- }
160
- if (e.includes('quota') || e.includes('limit exceeded') || e.includes('vcpu')) {
161
- return 'HINT: Cloud resource quota exceeded. Request a limit increase in the cloud console.';
162
- }
163
- }
164
-
165
- // Kubernetes errors
166
- if (toolName === 'kubectl' || toolName === 'kubectl_context') {
167
- if (e.includes('connection refused') || e.includes('unable to connect')) {
168
- return 'HINT: Cannot reach the Kubernetes API server. Check `kubectl config current-context` and ensure the cluster is accessible.';
169
- }
170
- if (e.includes('unauthorized') || e.includes('forbidden')) {
171
- return 'HINT: Insufficient permissions. Check your kubeconfig credentials or RBAC roles.';
172
- }
173
- if (e.includes('not found') && e.includes('namespace')) {
174
- return 'HINT: The namespace does not exist. Create it with `kubectl create namespace <name>` first.';
175
- }
176
- if (e.includes('image') && (e.includes('not found') || e.includes('pull'))) {
177
- return 'HINT: Container image pull failed. Verify the image name, tag, and registry credentials (imagePullSecret).';
178
- }
179
- }
180
-
181
- // Helm errors
182
- if (toolName === 'helm' || toolName === 'helm_values') {
183
- if (e.includes('chart not found') || e.includes('no such chart')) {
184
- return 'HINT: Chart not found. Run `helm repo update` and verify the chart name.';
185
- }
186
- if (e.includes('release not found')) {
187
- return 'HINT: Helm release not found. Use `helm list -A` to see all releases across namespaces.';
188
- }
189
- if (e.includes('unable to build kubernetes objects') || e.includes('manifest')) {
190
- return 'HINT: Helm template rendering failed. Run `helm template <release> <chart>` to debug the manifests.';
191
- }
192
- }
193
-
194
- // Cloud CLI errors
195
- if (toolName === 'cloud_discover' || toolName === 'cloud_action') {
196
- if (e.includes('not authorized') || e.includes('access denied') || e.includes('unauthorized')) {
197
- return 'HINT: Cloud credentials lack required permissions. Check IAM policies/roles for the operation.';
198
- }
199
- if (e.includes('region') && e.includes('not found')) {
200
- return 'HINT: Invalid region. Check `aws configure get region` or pass --region explicitly.';
201
- }
202
- }
203
-
204
- // Docker errors
205
- if (toolName === 'docker') {
206
- if (e.includes('cannot connect to the docker daemon') || e.includes('docker daemon') || e.includes('docker.sock')) {
207
- return 'HINT: Docker daemon is not running. Start it with `colima start` (macOS) or `sudo systemctl start docker` (Linux).';
208
- }
209
- if (e.includes('manifest unknown') || e.includes('manifest not found') || e.includes('not found')) {
210
- return 'HINT: Image not found. Verify the image name and tag. Check registry credentials with `docker login`.';
211
- }
212
- if (e.includes('no space left on device') || e.includes('no space left')) {
213
- return 'HINT: Docker disk space exhausted. Run `docker system prune -f` to reclaim space.';
214
- }
215
- if (e.includes('permission denied') && e.includes('docker')) {
216
- return 'HINT: Docker permission denied. Add your user to the docker group: `sudo usermod -aG docker $USER`.';
217
- }
218
- }
219
-
220
- // Secrets errors
221
- if (toolName === 'secrets') {
222
- if (e.includes('permission denied') || e.includes('403') || e.includes('accessdenied')) {
223
- return 'HINT: Secrets access denied. Check Vault policy with `vault policy read <policy>` or IAM role permissions.';
224
- }
225
- if (e.includes('secret not found') || e.includes('no such secret') || e.includes('resourcenotfoundexception')) {
226
- return 'HINT: Secret not found. Verify the secret path/name and namespace. Use `vault kv list <mount>` to browse.';
227
- }
228
- if (e.includes('invalid token') || e.includes('token expired')) {
229
- return 'HINT: Vault/cloud token expired. Run `vault login` or refresh cloud credentials with `nimbus auth-refresh`.';
230
- }
231
- }
232
-
233
- // CI/CD errors
234
- if (toolName === 'cicd') {
235
- if (e.includes('workflow not found') || e.includes('could not find workflow')) {
236
- return 'HINT: Workflow not found. Check the workflow filename in .github/workflows/ and the branch name.';
237
- }
238
- if (e.includes('rate limit') || e.includes('429') || e.includes('too many requests')) {
239
- return 'HINT: API rate limited. Wait 60 seconds and retry. Check rate limit headers for reset time.';
240
- }
241
- if (e.includes('unauthorized') || e.includes('401') || e.includes('bad credentials')) {
242
- return 'HINT: CI/CD authentication failed. Check GITHUB_TOKEN, GITLAB_TOKEN, or CIRCLECI_TOKEN environment variables.';
243
- }
244
- }
245
-
246
- // GitOps errors
247
- if (toolName === 'gitops') {
248
- if (e.includes('not found') || e.includes('not logged in') || e.includes('unauthenticated')) {
249
- return 'HINT: ArgoCD/Flux not accessible. Check ARGOCD_SERVER and ARGOCD_TOKEN env vars, or run `argocd login`.';
250
- }
251
- if (e.includes('comparisonerror') || e.includes('sync error')) {
252
- return 'HINT: GitOps sync error. Validate manifests: `kubectl apply --dry-run=client -f <manifest>` to find issues.';
253
- }
254
- if (e.includes('health') && e.includes('degraded')) {
255
- return 'HINT: Application is degraded. Check pod logs with `kubectl logs -n <ns>` and events with `kubectl get events -n <ns>`.';
256
- }
257
- }
258
-
259
- // Monitoring errors
260
- if (toolName === 'monitor') {
261
- if (e.includes('connection refused') || e.includes('could not connect')) {
262
- return 'HINT: Cannot connect to monitoring endpoint. Check PROMETHEUS_URL, GRAFANA_URL, or cloud region configuration.';
263
- }
264
- if (e.includes('unauthorized') || e.includes('403')) {
265
- return 'HINT: Monitoring authentication failed. Check DD_API_KEY, GRAFANA_TOKEN, or NEW_RELIC_API_KEY environment variables.';
266
- }
267
- }
268
-
269
- // L3: Parse NIMBUS.md custom error hints section
270
- if (nimbusInstructions) {
271
- const hintsMatch = nimbusInstructions.match(/##\s*Custom Error Hints\s*\n([\s\S]*?)(?=\n##|\n$|$)/i);
272
- if (hintsMatch) {
273
- const hintsSection = hintsMatch[1];
274
- const hintLines = hintsSection.split('\n').filter(l => l.trim().startsWith('-'));
275
- for (const line of hintLines) {
276
- // Format: "- pattern: hint message"
277
- const colonIdx = line.indexOf(':');
278
- if (colonIdx > 0) {
279
- const pattern = line.slice(1, colonIdx).trim();
280
- const hint = line.slice(colonIdx + 1).trim();
281
- if (pattern && hint && errorOutput.toLowerCase().includes(pattern.toLowerCase())) {
282
- return `HINT: ${hint}`;
283
- }
284
- }
285
- }
286
- }
287
- }
288
-
289
- return null;
290
- }
291
-
292
- /** DevOps tool names that get self-diagnosis hints on unrecognized errors. */
293
- const DEVOPS_TOOL_NAMES = new Set([
294
- 'terraform', 'kubectl', 'kubectl_context', 'helm', 'helm_values',
295
- 'bash', 'cloud_discover', 'drift_detect', 'deploy_preview',
296
- 'docker', 'secrets', 'cicd', 'monitor', 'gitops', 'cloud_action',
297
- 'logs', 'certs', 'mesh', 'cfn', 'k8s_rbac',
298
- ]);
299
-
300
- /**
301
- * Format a Zod (or generic) tool-input validation error into a human-readable
302
- * message that tells the LLM exactly which fields are wrong and how to fix them.
303
- */
304
- function formatToolInputError(toolName: string, err: unknown): string {
305
- if (err && typeof err === 'object' && 'issues' in err) {
306
- // ZodError
307
- const issues = (err as { issues: Array<{ path: (string | number)[]; message: string }> }).issues;
308
- const details = issues
309
- .map(i => ` - ${i.path.join('.') || '(root)'}: ${i.message}`)
310
- .join('\n');
311
- return `Tool "${toolName}" received invalid input:\n${details}\n\nPlease correct the arguments and retry.`;
312
- }
313
- return `Tool "${toolName}" failed: ${err instanceof Error ? err.message : String(err)}`;
314
- }
315
-
316
- /** Determine whether a streaming error is transient and worth retrying. */
317
- function isRetryableStreamError(err: unknown): boolean {
318
- if (err && typeof err === 'object') {
319
- const e = err as Record<string, unknown>;
320
- const status =
321
- (typeof e.status === 'number' ? e.status : undefined) ??
322
- (typeof e.statusCode === 'number' ? e.statusCode : undefined);
323
- if (status === 429 || (status !== undefined && status >= 500 && status < 600)) return true;
324
- const msg = typeof e.message === 'string' ? e.message : '';
325
- if (/rate.?limit|429|too many requests|overloaded|503/i.test(msg)) return true;
326
- }
327
- return false;
328
- }
329
-
330
- // ---------------------------------------------------------------------------
331
- // G3: Runaway protection helpers
332
- // ---------------------------------------------------------------------------
333
-
334
- /** Patterns that indicate a destructive operation in tool arguments. */
335
- const DESTRUCTIVE_PATTERNS = /\b(apply|destroy|delete|terminate|stop|remove|drop|truncate|purge)\b/i;
336
-
337
- /** Tool names whose destructive operations should be counted at the session level. */
338
- const DESTRUCTIVE_TOOL_NAMES = new Set([
339
- 'terraform', 'kubectl', 'docker', 'aws', 'gcloud', 'az', 'cloud_action', 'cfn',
340
- ]);
341
-
342
- /**
343
- * Returns true if the tool call looks like a destructive infrastructure operation.
344
- * Used to enforce the session-level destructive ops counter (G3).
345
- */
346
- function isDestructiveOp(toolName: string, inputStr: string): boolean {
347
- return DESTRUCTIVE_TOOL_NAMES.has(toolName) && DESTRUCTIVE_PATTERNS.test(inputStr);
348
- }
349
-
350
- // ---------------------------------------------------------------------------
351
- // Public Types
352
- // ---------------------------------------------------------------------------
353
-
354
- /** Options for running the agent loop. */
355
- export interface AgentLoopOptions {
356
- /** The LLM router instance. */
357
- router: LLMRouter;
358
-
359
- /** Tool registry with available tools. */
360
- toolRegistry: ToolRegistry;
361
-
362
- /** Agent mode (plan/build/deploy). */
363
- mode: AgentMode;
364
-
365
- /** Maximum number of LLM turns before stopping (default: 50). */
366
- maxTurns?: number;
367
-
368
- /**
369
- * Maximum number of tool calls allowed in a single LLM turn (G3).
370
- * Prevents runaway tool call loops. Default: 20.
371
- */
372
- maxToolCallsPerTurn?: number;
373
-
374
- /**
375
- * Maximum number of destructive operations allowed in a single session (G3).
376
- * Triggers a warning in the tool result when the threshold is reached. Default: 5.
377
- */
378
- maxDestructiveOpsPerSession?: number;
379
-
380
- /** Model to use (e.g. `'anthropic/claude-sonnet-4-20250514'`). */
381
- model?: string;
382
-
383
- /**
384
- * When true, enables automatic model routing based on task complexity (Gap 18).
385
- * Simple queries → haiku, complex → opus, moderate → sonnet.
386
- * Overridden if `model` is explicitly set.
387
- */
388
- autoRouteModel?: boolean;
389
-
390
- /** Current working directory. */
391
- cwd?: string;
392
-
393
- /** Custom NIMBUS.md content injected into the system prompt. */
394
- nimbusInstructions?: string;
395
-
396
- /**
397
- * Live infrastructure context (terraform workspace, kubectl context, etc.)
398
- * discovered at startup. Injected into the system prompt (Gaps 7 & 10).
399
- */
400
- infraContext?: {
401
- terraformWorkspace?: string;
402
- kubectlContext?: string;
403
- helmReleases?: string[];
404
- awsAccount?: string;
405
- awsRegion?: string;
406
- gcpProject?: string;
407
- };
408
-
409
- /** Callback for streaming text output. */
410
- onText?: (text: string) => void;
411
-
412
- /** Callback when a tool call starts. */
413
- onToolCallStart?: (toolCall: ToolCallInfo) => void;
414
-
415
- /** Callback when a tool call completes. */
416
- onToolCallEnd?: (toolCall: ToolCallInfo, result: ToolResult) => void;
417
-
418
- /**
419
- * Callback fired for each chunk of streamed tool output (Gap 1 — live streaming).
420
- * Called with the tool call ID and the chunk text.
421
- */
422
- onToolOutputChunk?: (toolId: string, chunk: string) => void;
423
-
424
- /**
425
- * Callback to check permission before tool execution.
426
- * If omitted, all tools are executed without prompting.
427
- */
428
- checkPermission?: (tool: ToolDefinition, input: unknown) => Promise<PermissionDecision>;
429
-
430
- /** AbortSignal for cancellation (Ctrl+C). */
431
- signal?: AbortSignal;
432
-
433
- /** Session ID for persistence (reserved for future use). */
434
- sessionId?: string;
435
-
436
- /** Optional context manager for auto-compact. When provided, the loop
437
- * checks context usage after each tool-call turn and triggers
438
- * compaction if the threshold is exceeded. */
439
- contextManager?: ContextManager;
440
-
441
- /** Callback fired when auto-compact is triggered. Receives the
442
- * compaction result with token savings information. */
443
- onCompact?: (result: CompactionResult) => void;
444
-
445
- /** Optional LSP manager for post-edit diagnostics. When provided,
446
- * the loop queries the language server after file-editing tools
447
- * and appends any diagnostics to the tool result so the LLM can
448
- * self-correct type errors and other issues. */
449
- lspManager?: LSPManager;
450
-
451
- /** Optional snapshot manager for auto-capture before file-editing tools.
452
- * When provided, a snapshot is captured before each file-modifying tool
453
- * call so users can undo/redo changes. */
454
- snapshotManager?: SnapshotManager;
455
-
456
- /** Optional hook engine for PreToolUse/PostToolUse/PermissionRequest hooks.
457
- * When provided, matching hook scripts are executed around each tool call. */
458
- hookEngine?: HookEngine;
459
-
460
- /** Callback fired after each LLM turn with accumulated usage and cost.
461
- * Allows the TUI to update cost/token display in real-time during
462
- * multi-turn agent loops, not just at the end. */
463
- onUsage?: (usage: AgentLoopUsage, costUSD: number) => void;
464
-
465
- /**
466
- * Optional callback to show a diff preview before file-mutating tools.
467
- * If provided, the loop calls this before edit_file/multi_edit/write_file.
468
- * Returning 'reject' skips the tool call; 'apply-all' disables further prompts.
469
- */
470
- requestFileDiff?: (
471
- path: string,
472
- toolName: string,
473
- diff: string
474
- ) => Promise<FileDiffDecision>;
475
-
476
- /**
477
- * Internal flag set by requestFileDiff 'apply-all' — skips remaining diff
478
- * prompts for the current turn. Set externally by the TUI launcher.
479
- */
480
- skipRemainingDiffPrompts?: boolean;
481
-
482
- /**
483
- * Internal flag set by requestFileDiff 'reject-all' — auto-rejects remaining
484
- * diff prompts for the current turn. Set externally by the TUI launcher.
485
- */
486
- rejectRemainingDiffPrompts?: boolean;
487
-
488
- /**
489
- * M1: Dry-run mode — when true, forces plan mode and prepends a hard
490
- * constraint to the system prompt instructing the agent not to execute
491
- * any mutating operations.
492
- */
493
- dryRun?: boolean;
494
-
495
- /**
496
- * G16: Maximum cost in USD per session. If the cumulative LLM cost exceeds
497
- * this threshold, the loop stops and returns a budget-exceeded message.
498
- */
499
- costBudgetUSD?: number;
500
-
501
- /**
502
- * G21: Override the stream silence timeout in milliseconds.
503
- * Defaults to config.agentTurnTimeoutSeconds * 1000, or 60_000 if not set.
504
- */
505
- streamSilenceTimeoutMs?: number;
506
-
507
- /**
508
- * GAP-20: Per-tool timeout overrides from NIMBUS.md Tool Timeouts section.
509
- * Maps tool name to timeout in milliseconds. When set, the value is threaded
510
- * into the tool's ToolExecuteContext so it can override the built-in default.
511
- */
512
- toolTimeouts?: Record<string, number>;
513
- }
514
-
515
- /** Information about a tool call in progress. */
516
- export interface ToolCallInfo {
517
- /** Provider-assigned unique ID for this tool call. */
518
- id: string;
519
-
520
- /** Tool name as it appears in the registry. */
521
- name: string;
522
-
523
- /** Parsed input arguments. */
524
- input: unknown;
525
-
526
- /** Unix timestamp (Date.now()) when the tool call started. */
527
- startTime: number;
528
- }
529
-
530
- /**
531
- * Result of a permission check.
532
- *
533
- * - `allow` -- proceed with execution.
534
- * - `deny` -- skip this invocation and report denial to the LLM.
535
- * - `block` -- skip and report that the tool is permanently blocked.
536
- */
537
- export type PermissionDecision = 'allow' | 'deny' | 'block';
538
-
539
- /**
540
- * Result of a per-file diff approval request.
541
- *
542
- * - `apply` -- apply this change.
543
- * - `reject` -- skip this change.
544
- * - `apply-all` -- apply this and all remaining changes without further prompts.
545
- */
546
- export type FileDiffDecision = 'apply' | 'reject' | 'apply-all' | 'reject-all';
547
-
548
- /** Aggregate token usage across all LLM turns. */
549
- export interface AgentLoopUsage {
550
- /** Total prompt (input) tokens consumed. */
551
- promptTokens: number;
552
-
553
- /** Total completion (output) tokens consumed. */
554
- completionTokens: number;
555
-
556
- /** Sum of prompt + completion tokens. */
557
- totalTokens: number;
558
- }
559
-
560
- /** Result of running the agent loop. */
561
- export interface AgentLoopResult {
562
- /** The conversation messages after the loop completes. */
563
- messages: LLMMessage[];
564
-
565
- /** Number of LLM turns taken. */
566
- turns: number;
567
-
568
- /** Whether the loop was interrupted via the AbortSignal. */
569
- interrupted: boolean;
570
-
571
- /** Total token usage across all turns. */
572
- usage: AgentLoopUsage;
573
-
574
- /** Total estimated cost in USD. */
575
- totalCost: number;
576
- }
577
-
578
- // ---------------------------------------------------------------------------
579
- // Constants
580
- // ---------------------------------------------------------------------------
581
-
582
- /** Default model when none is specified. */
583
- const DEFAULT_MODEL = 'anthropic/claude-sonnet-4-20250514';
584
-
585
- // ---------------------------------------------------------------------------
586
- // H5: Cost delta hint after terraform apply / helm upgrade
587
- // ---------------------------------------------------------------------------
588
-
589
- /**
590
- * Extract a lightweight cost hint from tool output for display after
591
- * infrastructure operations (terraform apply, helm install/upgrade).
592
- */
593
- function extractCostHintFromToolOutput(toolName: string, input: Record<string, unknown>, output: string): string | null {
594
- // terraform apply: parse "Apply complete! Resources: N added, M changed, K destroyed."
595
- if (toolName === 'terraform' && String(input.action) === 'apply') {
596
- const m = output.match(/Resources:\s*(\d+) added,\s*(\d+) changed,\s*(\d+) destroyed/);
597
- if (m) {
598
- const added = Number(m[1]);
599
- const changed = Number(m[2]);
600
- const destroyed = Number(m[3]);
601
- const parts: string[] = [];
602
- if (added > 0) parts.push(`+${added} resources created`);
603
- if (changed > 0) parts.push(`${changed} updated`);
604
- if (destroyed > 0) parts.push(`${destroyed} destroyed`);
605
- return parts.length > 0
606
- ? `${parts.join(', ')} — run "nimbus cost" for monthly cost estimate`
607
- : null;
608
- }
609
- }
610
- // helm install/upgrade
611
- if (toolName === 'helm' && ['install', 'upgrade'].includes(String(input.action))) {
612
- const releaseName = String(input.releaseName ?? input.release ?? '');
613
- if (!output.includes('Error') && !output.includes('FAILED')) {
614
- return `Helm release "${releaseName}" deployed — run "nimbus cost" for estimated cost impact`;
615
- }
616
- }
617
- return null;
618
- }
619
-
620
- // ---------------------------------------------------------------------------
621
- // M4: Session-scoped error tracking for NIMBUS.md persistence
622
- // ---------------------------------------------------------------------------
623
-
624
- const sessionErrorCounts = new Map<string, number>();
625
-
626
- function trackAndPersistError(toolName: string, errorHint: string, cwd: string): void {
627
- const key = `${toolName}:${errorHint.slice(0, 60)}`;
628
- const count = (sessionErrorCounts.get(key) ?? 0) + 1;
629
- sessionErrorCounts.set(key, count);
630
-
631
- if (count === 3) {
632
- try {
633
- const { existsSync, readFileSync, writeFileSync, appendFileSync } = require('node:fs') as typeof import('node:fs');
634
- const { join } = require('node:path') as typeof import('node:path');
635
- const nimbusPath = join(cwd, 'NIMBUS.md');
636
- if (!existsSync(nimbusPath)) return;
637
- const existing = readFileSync(nimbusPath, 'utf-8');
638
- if (existing.includes(errorHint.slice(0, 40))) return; // already recorded
639
- const entry = `- ${toolName}: ${errorHint}\n`;
640
- if (existing.includes('## Observed Issues')) {
641
- writeFileSync(nimbusPath, existing.replace('## Observed Issues\n', `## Observed Issues\n${entry}`));
642
- } else {
643
- appendFileSync(nimbusPath, `\n## Observed Issues\n${entry}`);
644
- }
645
- } catch { /* non-critical */ }
646
- }
647
- }
648
-
649
- // ---------------------------------------------------------------------------
650
- // M6: Destructive action guard — force confirmation before terraform destroy / kubectl delete
651
- // ---------------------------------------------------------------------------
652
-
653
- function isDestructiveAction(toolName: string, input: Record<string, unknown>): string | null {
654
- const action = String(input.action ?? input.command ?? '');
655
- if (toolName === 'terraform' && action === 'destroy') {
656
- return 'terraform destroy will PERMANENTLY DELETE all managed infrastructure. Explicitly confirm with the user before proceeding.';
657
- }
658
- if (toolName === 'kubectl' && action === 'delete') {
659
- const resource = String(input.resource ?? '');
660
- return `kubectl delete ${resource} is IRREVERSIBLE. Explicitly confirm with the user before proceeding.`;
661
- }
662
- if (toolName === 'helm' && action === 'uninstall') {
663
- return 'helm uninstall will remove the release and its resources. Explicitly confirm with the user before proceeding.';
664
- }
665
- return null;
666
- }
667
-
668
- /**
669
- * Session-scoped terraform plan cache.
670
- * Maps workdir → { output, timestamp } so that within one agent session,
671
- * a plan result can be reused for the apply call without re-running tf plan.
672
- * Cache expires after 10 minutes.
673
- */
674
- interface TerraformPlanCacheEntry {
675
- output: string;
676
- workdir: string;
677
- timestamp: number;
678
- }
679
-
680
- const PLAN_CACHE_TTL_MS = 10 * 60 * 1000; // 10 minutes
681
- const terraformPlanCache = new Map<string, TerraformPlanCacheEntry>();
682
-
683
- /** Store a terraform plan output for a workdir. */
684
- function cacheTerraformPlan(workdir: string, output: string): void {
685
- terraformPlanCache.set(workdir, { output, workdir, timestamp: Date.now() });
686
- }
687
-
688
- /** Retrieve a cached terraform plan for a workdir, or null if expired/missing. */
689
- function getCachedTerraformPlan(workdir: string): string | null {
690
- const entry = terraformPlanCache.get(workdir);
691
- if (!entry) return null;
692
- if (Date.now() - entry.timestamp > PLAN_CACHE_TTL_MS) {
693
- terraformPlanCache.delete(workdir);
694
- return null;
695
- }
696
- return entry.output;
697
- }
698
-
699
- /**
700
- * Background interval that evicts expired terraform plan cache entries every 60s.
701
- * `.unref()` ensures this does not prevent the process from exiting.
702
- * Exported for test teardown.
703
- */
704
- export const _planCacheCleanupInterval: ReturnType<typeof setInterval> = setInterval(() => {
705
- const now = Date.now();
706
- for (const [key, entry] of terraformPlanCache) {
707
- if (now - entry.timestamp > PLAN_CACHE_TTL_MS) {
708
- terraformPlanCache.delete(key);
709
- }
710
- }
711
- }, 60_000).unref();
712
-
713
- /** Default max output tokens per LLM call. */
714
- const DEFAULT_MAX_TOKENS = 8192;
715
-
716
- /** Default maximum number of agent turns. */
717
- const DEFAULT_MAX_TURNS = 50;
718
-
719
- /** Maximum characters of tool output to include in conversation history.
720
- * Anything beyond this is truncated to prevent context window overflow. */
721
- const MAX_TOOL_OUTPUT_CHARS = 100_000;
722
-
723
- // ---------------------------------------------------------------------------
724
- // Main Entry Point
725
- // ---------------------------------------------------------------------------
726
-
727
- /**
728
- * Run the agentic loop.
729
- *
730
- * Takes a user message and existing conversation history, then runs
731
- * the LLM in a loop until it stops requesting tool calls.
732
- *
733
- * The loop terminates when any of the following conditions are met:
734
- * - The LLM returns a response with no tool calls (natural end).
735
- * - The maximum number of turns is reached.
736
- * - The AbortSignal fires (e.g. user presses Ctrl+C).
737
- * - An unrecoverable LLM API error occurs.
738
- *
739
- * @param userMessage - The new user message to process.
740
- * @param history - Prior conversation messages (may be empty for a fresh session).
741
- * @param options - Configuration for the loop.
742
- * @returns The final conversation state, turn count, usage, and cost.
743
- */
744
- export async function runAgentLoop(
745
- userMessage: string,
746
- history: LLMMessage[],
747
- options: AgentLoopOptions
748
- ): Promise<AgentLoopResult> {
749
- const {
750
- router,
751
- toolRegistry,
752
- mode,
753
- maxTurns = DEFAULT_MAX_TURNS,
754
- model,
755
- cwd,
756
- nimbusInstructions,
757
- onText,
758
- onToolCallStart,
759
- onToolCallEnd,
760
- onToolOutputChunk,
761
- checkPermission,
762
- signal,
763
- } = options;
764
-
765
- // -----------------------------------------------------------------------
766
- // 1. Prepare tools and system prompt
767
- // -----------------------------------------------------------------------
768
-
769
- const tools = getToolsForMode(toolRegistry.getAll(), mode);
770
-
771
- // H3: Auto-discover infra context if not provided and cwd is set (best-effort, cached per cwd)
772
- let resolvedInfraContext = options.infraContext;
773
- if (!resolvedInfraContext && cwd) {
774
- try {
775
- const { discoverInfraContext } = await import('../cli/init');
776
- resolvedInfraContext = await Promise.race([
777
- discoverInfraContext(cwd),
778
- new Promise<undefined>(r => setTimeout(() => r(undefined), 5000)),
779
- ]);
780
- } catch { /* best-effort */ }
781
- }
782
-
783
- const systemPrompt = buildSystemPrompt({
784
- mode,
785
- tools,
786
- nimbusInstructions,
787
- cwd,
788
- infraContext: resolvedInfraContext,
789
- dryRun: options.dryRun,
790
- });
791
-
792
- // Convert agentic ToolDefinitions to the LLM-level format expected by
793
- // the router's routeWithTools() method (OpenAI function-calling shape).
794
- const llmTools: LLMToolDefinition[] = tools.map(toOpenAITool);
795
-
796
- // -----------------------------------------------------------------------
797
- // 2. Initialize conversation state
798
- // -----------------------------------------------------------------------
799
-
800
- // PERF-4a: Capacity-hinted pre-allocation avoids repeated V8 array reallocation
801
- // as messages accumulate during a long conversation.
802
- const messages: LLMMessage[] = new Array(Math.max(history.length + 1, 10));
803
- messages.length = 0;
804
- messages.push(...history, { role: 'user', content: userMessage });
805
-
806
- let turns = 0;
807
- let interrupted = false;
808
- const totalUsage: AgentLoopUsage = {
809
- promptTokens: 0,
810
- completionTokens: 0,
811
- totalTokens: 0,
812
- };
813
- let totalCost = 0;
814
-
815
- // G3: Session-level destructive operation counter and per-turn tool call counter
816
- let sessionDestructiveOps = 0;
817
- const MAX_TOOL_CALLS_PER_TURN = options.maxToolCallsPerTurn ?? 20;
818
- const MAX_DESTRUCTIVE_OPS_PER_SESSION = options.maxDestructiveOpsPerSession ?? 5;
819
-
820
- // M2/M5: Track tool calls that have already received a credential-error retry message
821
- // to avoid spamming the auth-refresh hint on repeated failures.
822
- const credentialRetried = new Set<string>();
823
-
824
- // G8: Track which terraform workdirs have had a plan run in this session.
825
- // Used to warn when apply is run without a prior plan.
826
- const terraformPlannedWorkdirs = new Set<string>();
827
-
828
- // G10: One-time kubectl RBAC pre-flight check state.
829
- // kubectlRbacChecked: ensures we only run `kubectl auth can-i --list` once per session.
830
- // rbacPreamble: stores the RBAC output to inject into the first kubectl tool result.
831
- let kubectlRbacChecked = false;
832
- let rbacPreamble = '';
833
-
834
- // G10: Pre-import async exec utilities so they're available inside the loop.
835
- // Using async execFile avoids blocking the Node.js event loop for kubectl/terraform calls.
836
- const { execFile: _execFile, exec: _exec } = await import('node:child_process');
837
- const { promisify: _promisify } = await import('node:util');
838
- const _execFileAsync = _promisify(_execFile);
839
- const _execAsync = _promisify(_exec);
840
-
841
- // PERF-4a: Pre-build the system message once so it can be reused every turn
842
- // without allocating a new object on each loop iteration.
843
- const _systemMessageObj: LLMMessage = { role: 'system', content: systemPrompt };
844
-
845
- // Shared mutable ref: set to true by 'apply-all' diff decision to skip further prompts
846
- const skipRemainingDiffPrompts = { value: options.skipRemainingDiffPrompts ?? false };
847
- // Shared mutable ref: set to true by 'reject-all' diff decision to auto-reject further prompts
848
- const rejectRemainingDiffPrompts = { value: options.rejectRemainingDiffPrompts ?? false };
849
-
850
- // -----------------------------------------------------------------------
851
- // 3. Main agent loop
852
- // -----------------------------------------------------------------------
853
-
854
- while (turns < maxTurns) {
855
- // Check for cancellation before each turn
856
- if (signal?.aborted) {
857
- interrupted = true;
858
- break;
859
- }
860
-
861
- turns++;
862
-
863
- try {
864
- // Gap 18: Auto-route model based on task complexity when no explicit model set
865
- let effectiveModel = model ?? DEFAULT_MODEL;
866
- if (!model && options.autoRouteModel) {
867
- const lastUserMsg = [...messages].reverse().find(m => m.role === 'user');
868
- const lastMsgText = lastUserMsg
869
- ? typeof lastUserMsg.content === 'string'
870
- ? lastUserMsg.content
871
- : JSON.stringify(lastUserMsg.content)
872
- : '';
873
- const complexity = classifyTaskComplexity(lastMsgText);
874
- effectiveModel = routeModel(complexity);
875
- if (onText && turns === 1) {
876
- onText(`\n[auto: ${effectiveModel.split('/').pop()?.replace('anthropic/', '') ?? effectiveModel}]\n`);
877
- }
878
- }
879
-
880
- // Build the completion request with tool definitions.
881
- // The systemMessageObj is pre-built before the loop (PERF-4a) — reuse it.
882
- const allMessages: LLMMessage[] = new Array(messages.length + 1);
883
- allMessages.length = 0;
884
- allMessages.push(_systemMessageObj, ...messages);
885
- const request: ToolCompletionRequest = {
886
- messages: allMessages,
887
- model: effectiveModel,
888
- tools: llmTools,
889
- maxTokens: DEFAULT_MAX_TOKENS,
890
- };
891
-
892
- // Stream text tokens incrementally via routeStreamWithTools.
893
- // Tokens are forwarded to onText as they arrive; tool calls
894
- // are accumulated from the final chunk.
895
- let responseContent = '';
896
- let responseToolCalls: ToolCall[] | undefined;
897
- let responseUsage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 };
898
-
899
- // A1: Retry on transient errors (rate-limit / 5xx) with exponential backoff
900
- const MAX_STREAM_RETRIES = 2;
901
- let streamAttempt = 0;
902
- while (true) {
903
- // A2: Silence timeout — abort if no chunk arrives (G21: configurable)
904
- const STREAM_SILENCE_MS = options.streamSilenceTimeoutMs ?? 60_000;
905
- const silenceAbort = new AbortController();
906
- let silenceTimer: ReturnType<typeof setTimeout> | undefined;
907
- const resetSilence = () => {
908
- clearTimeout(silenceTimer);
909
- silenceTimer = setTimeout(() => silenceAbort.abort('Stream timeout'), STREAM_SILENCE_MS);
910
- };
911
- resetSilence();
912
-
913
- try {
914
- // Pass silence abort signal via request cast (non-standard but supported by most providers)
915
- const requestWithSignal = { ...request, signal: silenceAbort.signal } as typeof request;
916
- for await (const chunk of router.routeStreamWithTools(requestWithSignal)) {
917
- resetSilence(); // reset on every chunk
918
- if (chunk.content) {
919
- responseContent += chunk.content;
920
- if (onText) {
921
- onText(chunk.content);
922
- }
923
- }
924
- if (chunk.toolCallStart && onText) {
925
- // Show early feedback when the LLM starts composing a tool call
926
- onText(`\n[Preparing tool: ${chunk.toolCallStart.name}...]\n`);
927
- }
928
- if (chunk.toolCalls) {
929
- responseToolCalls = chunk.toolCalls;
930
- }
931
- if (chunk.usage) {
932
- responseUsage = chunk.usage;
933
- }
934
- }
935
- clearTimeout(silenceTimer);
936
- break; // success — exit retry loop
937
- } catch (streamErr) {
938
- clearTimeout(silenceTimer);
939
- if (streamAttempt < MAX_STREAM_RETRIES && isRetryableStreamError(streamErr)) {
940
- const delay = 1000 * Math.pow(2, streamAttempt);
941
- if (onText) {
942
- onText(`\n[Retrying after error (attempt ${streamAttempt + 1})...]\n`);
943
- }
944
- await new Promise(r => setTimeout(r, delay));
945
- streamAttempt++;
946
- // Reset partial accumulation before retry
947
- responseContent = '';
948
- responseToolCalls = undefined;
949
- responseUsage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 };
950
- continue;
951
- }
952
- // G24: Graceful network error message instead of raw Node.js error
953
- const streamErrObj = streamErr as Error | null;
954
- const isNetworkError = /ECONNREFUSED|ETIMEDOUT|ENOTFOUND|fetch failed|network/i.test(streamErrObj?.message ?? '');
955
- if (isNetworkError) {
956
- const netMsg = '\n[!!] Network unreachable — cannot reach the LLM API.\nCheck your internet connection and API key validity, then try again.\n';
957
- if (onText) onText(netMsg);
958
- // Re-throw a specially-marked error so the outer turn catch block can handle it
959
- const netErr = new Error(netMsg);
960
- (netErr as Error & { _nimbusNetworkError?: boolean })._nimbusNetworkError = true;
961
- throw netErr;
962
- }
963
- throw streamErr; // non-retryable — propagate to outer catch
964
- }
965
- }
966
-
967
- // Accumulate usage and cost
968
- totalUsage.promptTokens += responseUsage.promptTokens;
969
- totalUsage.completionTokens += responseUsage.completionTokens;
970
- totalUsage.totalTokens += responseUsage.totalTokens;
971
-
972
- // Estimate cost for this turn
973
- const resolvedModel = effectiveModel;
974
- const providerName = resolvedModel.includes('/') ? resolvedModel.split('/')[0] : 'anthropic';
975
- const modelName = resolvedModel.includes('/')
976
- ? resolvedModel.split('/').slice(1).join('/')
977
- : resolvedModel;
978
- const turnCost = calculateCost(
979
- providerName,
980
- modelName,
981
- responseUsage.promptTokens,
982
- responseUsage.completionTokens
983
- );
984
- totalCost += turnCost.costUSD;
985
-
986
- // Notify caller of accumulated usage/cost after each turn
987
- if (options.onUsage) {
988
- options.onUsage(totalUsage, totalCost);
989
- }
990
-
991
- // M2: Emit per-turn token/cost stats as a dim system message in the TUI.
992
- // Only emit when there was actual token usage (skip turns with 0 tokens).
993
- if (onText && (responseUsage.promptTokens > 0 || responseUsage.completionTokens > 0)) {
994
- const statsLine = `\n[${responseUsage.promptTokens} in / ${responseUsage.completionTokens} out — $${turnCost.costUSD.toFixed(4)}]\n`;
995
- onText(statsLine);
996
- }
997
-
998
- // G16: Cost budget enforcement — stop if cumulative cost exceeds the limit
999
- if (options.costBudgetUSD !== undefined && totalCost >= options.costBudgetUSD) {
1000
- const budgetMsg = `\n\n[!!] Cost budget of $${options.costBudgetUSD.toFixed(2)} reached (used: $${totalCost.toFixed(3)}). Stopping to prevent overspend.\n`;
1001
- if (onText) onText(budgetMsg);
1002
- messages.push({ role: 'assistant', content: budgetMsg });
1003
- break;
1004
- }
1005
-
1006
- // -----------------------------------------------------------------
1007
- // No tool calls → the LLM is done
1008
- // -----------------------------------------------------------------
1009
- if (!responseToolCalls || responseToolCalls.length === 0) {
1010
- messages.push({
1011
- role: 'assistant',
1012
- content: responseContent,
1013
- });
1014
- break;
1015
- }
1016
-
1017
- // -----------------------------------------------------------------
1018
- // Tool calls present → execute each one
1019
- // -----------------------------------------------------------------
1020
-
1021
- // Append the assistant message that contains the tool calls
1022
- messages.push({
1023
- role: 'assistant',
1024
- content: responseContent,
1025
- toolCalls: responseToolCalls,
1026
- });
1027
-
1028
- // G3: Per-turn tool call counter — reset at the start of each tool-call batch
1029
- let turnToolCallCount = 0;
1030
-
1031
- // H2: Parallel dispatch for read-only tools (safe to run concurrently)
1032
- const READ_ONLY_TOOLS = new Set([
1033
- 'read_file', 'glob', 'grep', 'cloud_discover', 'terraform_plan_analyze',
1034
- 'kubectl_context', 'helm_values', 'cost_estimate', 'drift_detect',
1035
- ]);
1036
- const canRunInParallel = (tc: ToolCall): boolean => READ_ONLY_TOOLS.has(tc.function.name);
1037
- const allReadOnly = responseToolCalls.every(canRunInParallel);
1038
-
1039
- if (allReadOnly && responseToolCalls.length > 1) {
1040
- // All tools are read-only — dispatch in parallel
1041
- const parallelChunkCallback = onToolOutputChunk
1042
- ? (id: string) => (chunk: string) => onToolOutputChunk(id, chunk)
1043
- : undefined;
1044
-
1045
- const parallelResults = await Promise.allSettled(
1046
- responseToolCalls.map(tc =>
1047
- executeToolCall(
1048
- tc,
1049
- toolRegistry,
1050
- onToolCallStart,
1051
- onToolCallEnd,
1052
- checkPermission,
1053
- options.lspManager,
1054
- options.snapshotManager,
1055
- options.sessionId,
1056
- signal,
1057
- options.hookEngine,
1058
- mode,
1059
- options.requestFileDiff,
1060
- skipRemainingDiffPrompts,
1061
- rejectRemainingDiffPrompts,
1062
- parallelChunkCallback ? parallelChunkCallback(tc.id) : undefined,
1063
- options.toolTimeouts,
1064
- options.infraContext
1065
- )
1066
- )
1067
- );
1068
-
1069
- for (let pi = 0; pi < responseToolCalls.length; pi++) {
1070
- const tc = responseToolCalls[pi];
1071
- const pResult = parallelResults[pi];
1072
- const pContent = pResult.status === 'fulfilled'
1073
- ? (pResult.value.isError ? `Error: ${pResult.value.error}` : pResult.value.output)
1074
- : `Error: ${pResult.reason}`;
1075
- messages.push({ role: 'tool', toolCallId: tc.id, name: tc.function.name, content: pContent });
1076
- }
1077
- // Skip sequential processing — jump directly to next LLM turn
1078
- continue;
1079
- }
1080
-
1081
- // Process tool calls sequentially (order may matter for side effects)
1082
- for (const toolCall of responseToolCalls) {
1083
- // Check for cancellation between tool calls
1084
- if (signal?.aborted) {
1085
- interrupted = true;
1086
- break;
1087
- }
1088
-
1089
- // G3: Enforce per-turn tool call limit to prevent runaway loops
1090
- turnToolCallCount++;
1091
- if (turnToolCallCount > MAX_TOOL_CALLS_PER_TURN) {
1092
- messages.push({
1093
- role: 'tool',
1094
- toolCallId: toolCall.id,
1095
- name: toolCall.function.name,
1096
- content: `[Tool limit reached: ${MAX_TOOL_CALLS_PER_TURN} tool calls in this turn. Summarizing progress and stopping to avoid runaway execution.]`,
1097
- });
1098
- break;
1099
- }
1100
-
1101
- // G3: Count destructive operations at the session level
1102
- if (isDestructiveOp(toolCall.function.name, toolCall.function.arguments)) {
1103
- sessionDestructiveOps++;
1104
- }
1105
-
1106
- // G10: One-time kubectl RBAC pre-flight check — runs before the first kubectl call
1107
- // in this session. Stores the RBAC permissions summary in rbacPreamble so it can
1108
- // be injected into the first kubectl tool result (keeps conversation structure valid).
1109
- // Uses async execFile to avoid blocking the Node.js event loop (up to 5s call).
1110
- if (!kubectlRbacChecked && toolCall.function.name === 'kubectl') {
1111
- kubectlRbacChecked = true;
1112
- try {
1113
- const { stdout: rbacOut } = await _execFileAsync('kubectl', ['auth', 'can-i', '--list'], {
1114
- encoding: 'utf-8', timeout: 5000,
1115
- });
1116
- const truncated = rbacOut.length > 1500
1117
- ? `${rbacOut.slice(0, 1500)}\n...[truncated]`
1118
- : rbacOut;
1119
- rbacPreamble = `[kubectl RBAC context: permissions available in current context]\n${truncated}\n\n`;
1120
- } catch { /* non-critical — RBAC check failure does not block kubectl */ }
1121
- }
1122
-
1123
- // M6: Destructive action guard — inject warning into LLM context before executing
1124
- try {
1125
- const m6Input = JSON.parse(toolCall.function.arguments) as Record<string, unknown>;
1126
- const destructiveWarning = isDestructiveAction(toolCall.function.name, m6Input);
1127
- if (destructiveWarning) {
1128
- messages.push({
1129
- role: 'tool',
1130
- toolCallId: toolCall.id + '-guard',
1131
- name: toolCall.function.name,
1132
- content: `[SAFETY] ${destructiveWarning}`,
1133
- });
1134
- }
1135
- } catch { /* ignore parse errors */ }
1136
-
1137
- // Build chunk callback that forwards tool output to the TUI in real-time
1138
- const chunkCallback = onToolOutputChunk
1139
- ? (chunk: string) => onToolOutputChunk(toolCall.id, chunk)
1140
- : undefined;
1141
-
1142
- const result = await executeToolCall(
1143
- toolCall,
1144
- toolRegistry,
1145
- onToolCallStart,
1146
- onToolCallEnd,
1147
- checkPermission,
1148
- options.lspManager,
1149
- options.snapshotManager,
1150
- options.sessionId,
1151
- signal,
1152
- options.hookEngine,
1153
- mode,
1154
- options.requestFileDiff,
1155
- skipRemainingDiffPrompts,
1156
- rejectRemainingDiffPrompts,
1157
- chunkCallback,
1158
- options.toolTimeouts,
1159
- options.infraContext
1160
- );
1161
-
1162
- // Append each tool result as a separate message so the LLM can
1163
- // match it to the corresponding tool_use block by toolCallId.
1164
- let toolContent = result.isError ? `Error: ${result.error}` : result.output;
1165
-
1166
- // G10: Inject RBAC context preamble into the first kubectl result
1167
- if (rbacPreamble && toolCall.function.name === 'kubectl') {
1168
- toolContent = rbacPreamble + toolContent;
1169
- rbacPreamble = ''; // consume once — only injected into the first kubectl result
1170
- }
1171
-
1172
- // Inject DevOps error classification hints to guide self-correction
1173
- if (result.isError && result.error) {
1174
- const hint = classifyDevOpsError(toolCall.function.name, result.error, options.nimbusInstructions);
1175
- if (hint) {
1176
- toolContent += `\n\n${hint}`;
1177
- // C4: Also show hint in TUI error output (not just LLM context)
1178
- result.output += `\n\n${hint}`;
1179
-
1180
- // M2/M5: Auto-retry signal on credential expiry errors
1181
- // If the classified hint indicates a credential/auth problem, append
1182
- // a structured prompt so the agent knows to run auth-refresh, and
1183
- // set provider-specific env hints for the auth-refresh command.
1184
- const isCredentialError =
1185
- hint.toLowerCase().includes('credential') ||
1186
- hint.toLowerCase().includes('expired') ||
1187
- hint.toLowerCase().includes('auth') ||
1188
- hint.toLowerCase().includes('login required');
1189
-
1190
- if (isCredentialError && !credentialRetried.has(toolCall.id ?? toolCall.function.name)) {
1191
- credentialRetried.add(toolCall.id ?? toolCall.function.name);
1192
-
1193
- // M5: Set provider-specific refresh hint env vars so auth-refresh
1194
- // can surface targeted guidance when invoked by the user.
1195
- const errorLower = (result.error ?? '').toLowerCase();
1196
- if (errorLower.includes('aws')) {
1197
- process.env.NIMBUS_AWS_REFRESH_HINT = '1';
1198
- }
1199
- if (errorLower.includes('gcp') || errorLower.includes('google')) {
1200
- process.env.NIMBUS_GCP_REFRESH_HINT = '1';
1201
- }
1202
- if (errorLower.includes('azure')) {
1203
- process.env.NIMBUS_AZURE_REFRESH_HINT = '1';
1204
- }
1205
-
1206
- const refreshMsg = [
1207
- '[!!] Credential expired. Run: nimbus auth-refresh',
1208
- '[Nimbus] Credential error detected on tool: ' + toolCall.function.name,
1209
- 'Run "nimbus auth-refresh" to refresh cloud credentials, then retry.',
1210
- ].join('\n');
1211
- toolContent += '\n\n' + refreshMsg;
1212
- result.output += '\n\n' + refreshMsg;
1213
- }
1214
- } else if (DEVOPS_TOOL_NAMES.has(toolCall.function.name)) {
1215
- // Unknown DevOps error — provide structured self-diagnosis steps
1216
- toolContent += [
1217
- '\n\n--- Self-Diagnosis Steps ---',
1218
- '1. Check tool is installed: `which terraform` / `kubectl version` / `helm version`',
1219
- '2. Check credentials: `aws sts get-caller-identity` / `gcloud auth list` / `az account show`',
1220
- '3. Check network connectivity to the cluster/cloud provider',
1221
- '4. Retry with verbose flag if available (e.g., TF_LOG=DEBUG, kubectl --v=6)',
1222
- '5. If the error persists, report the exact error message and the command that caused it.',
1223
- ].join('\n');
1224
- }
1225
- // M4: Track recurring errors and persist to NIMBUS.md after 3 occurrences
1226
- const m4Hint = classifyDevOpsError(toolCall.function.name, result.error ?? '', options.nimbusInstructions);
1227
- if (m4Hint) {
1228
- trackAndPersistError(toolCall.function.name, m4Hint, options.cwd ?? process.cwd());
1229
- }
1230
- }
1231
-
1232
- // H5: Inject cost delta hint after successful infra operations
1233
- if (!result.isError) {
1234
- try {
1235
- const h5Input = JSON.parse(toolCall.function.arguments) as Record<string, unknown>;
1236
- const costHint = extractCostHintFromToolOutput(toolCall.function.name, h5Input, result.output);
1237
- if (costHint) {
1238
- onText?.(`\n[cost] ${costHint}\n`);
1239
- }
1240
- } catch { /* ignore parse errors */ }
1241
- }
1242
-
1243
- // L6: Auto-generate runbook after terraform apply success
1244
- if (!result.isError && toolCall.function.name === 'terraform') {
1245
- try {
1246
- const l6Input = JSON.parse(toolCall.function.arguments) as Record<string, unknown>;
1247
- if (String(l6Input.action) === 'apply') {
1248
- const l6Match = result.output.match(/Resources:\s*(\d+) added/);
1249
- if (l6Match && parseInt(l6Match[1] ?? '0', 10) > 0) {
1250
- const { join: _l6Join } = require('node:path') as typeof import('node:path');
1251
- const { homedir: _l6Homedir } = require('node:os') as typeof import('node:os');
1252
- const { mkdirSync: _l6MkdirSync, writeFileSync: _l6WriteFileSync } = require('node:fs') as typeof import('node:fs');
1253
- const runbookDir = _l6Join(_l6Homedir(), '.nimbus', 'runbooks');
1254
- _l6MkdirSync(runbookDir, { recursive: true });
1255
- const ts = new Date().toISOString().replace(/[:.]/g, '-');
1256
- const runbookPath = _l6Join(runbookDir, `terraform-apply-${ts}.md`);
1257
- const runbookContent = [
1258
- '# Terraform Apply Runbook',
1259
- '',
1260
- `Date: ${new Date().toLocaleString()}`,
1261
- '',
1262
- 'Apply output:',
1263
- '```',
1264
- result.output.slice(0, 2000),
1265
- '```',
1266
- '',
1267
- '## Rollback',
1268
- '',
1269
- 'To rollback, run `terraform destroy` or restore from a previous state.',
1270
- ].join('\n');
1271
- _l6WriteFileSync(runbookPath, runbookContent, 'utf-8');
1272
- options.onText?.(`\n[runbook] Saved to ${runbookPath}\n`);
1273
- }
1274
- }
1275
- } catch { /* non-critical */ }
1276
- }
1277
-
1278
- // GAP-25: Structured audit trail for destructive operations
1279
- if (!result.isError && isDestructiveOp(toolCall.function.name, toolCall.function.arguments)) {
1280
- try {
1281
- const { appendFileSync, mkdirSync } = await import('node:fs');
1282
- const { homedir } = await import('node:os');
1283
- const { join } = await import('node:path');
1284
- const auditDir = join(homedir(), '.nimbus');
1285
- mkdirSync(auditDir, { recursive: true });
1286
- const event = JSON.stringify({
1287
- type: 'infra-change',
1288
- tool: toolCall.function.name,
1289
- action: (JSON.parse(toolCall.function.arguments) as Record<string, unknown>).action,
1290
- sessionId: options.sessionId ?? 'unknown',
1291
- cwd: options.cwd ?? process.cwd(),
1292
- timestamp: new Date().toISOString(),
1293
- });
1294
- appendFileSync(join(auditDir, 'audit.jsonl'), event + '\n', 'utf-8');
1295
- } catch { /* audit logging is non-critical */ }
1296
- }
1297
-
1298
- // G3: Append a warning when session-level destructive op threshold is reached
1299
- if (sessionDestructiveOps >= MAX_DESTRUCTIVE_OPS_PER_SESSION) {
1300
- toolContent += `\n\n[Warning: ${sessionDestructiveOps} destructive operations executed in this session. Review changes carefully.]`;
1301
- }
1302
-
1303
- // Cache terraform plan output so a subsequent apply can reference it.
1304
- // Also track planned workdirs (G8) and warn on unplanned applies.
1305
- if (toolCall.function.name === 'terraform' && !result.isError) {
1306
- try {
1307
- const tfArgs = JSON.parse(toolCall.function.arguments) as Record<string, unknown>;
1308
- if (tfArgs.action === 'plan' && tfArgs.workdir) {
1309
- cacheTerraformPlan(String(tfArgs.workdir), result.output);
1310
- // G8: Track that a plan was run for this workdir in this session
1311
- terraformPlannedWorkdirs.add(String(tfArgs.workdir));
1312
- }
1313
- // G8: Warn if apply ran without a prior plan in this session
1314
- if (tfArgs.action === 'apply' && tfArgs.workdir && !terraformPlannedWorkdirs.has(String(tfArgs.workdir))) {
1315
- toolContent = `[Note: terraform apply ran without a prior terraform plan in this session for ${String(tfArgs.workdir)}. Always run terraform plan first to review changes before applying.]\n\n${toolContent}`;
1316
- }
1317
- // Inject cached plan into apply context for the LLM
1318
- if (tfArgs.action === 'apply' && tfArgs.workdir) {
1319
- const cached = getCachedTerraformPlan(String(tfArgs.workdir));
1320
- if (cached) {
1321
- toolContent = `[Apply succeeded. This was the plan that was applied:]\n${cached.slice(0, 3000)}\n\n[Apply output:]\n${toolContent}`;
1322
- }
1323
- }
1324
- } catch { /* ignore parse errors */ }
1325
- }
1326
-
1327
- // GAP-11: trigger FileDiff UI after terraform plan shows resource changes
1328
- if (toolCall.function.name === 'terraform' && !result.isError && options.requestFileDiff) {
1329
- try {
1330
- const tfArgs11 = JSON.parse(toolCall.function.arguments) as Record<string, unknown>;
1331
- if (tfArgs11.action === 'plan') {
1332
- const { parseTerraformPlanOutput, buildFileDiffBatchFromPlan } = await import('./deploy-preview');
1333
- const changes = parseTerraformPlanOutput(toolContent);
1334
- if (changes.length > 0) {
1335
- const batchFiles = buildFileDiffBatchFromPlan({ changes } as import('./deploy-preview').DeployPreview);
1336
- for (const file of batchFiles) {
1337
- const decision = await options.requestFileDiff(file.filePath, file.toolName ?? 'terraform', file.diff ?? '');
1338
- if (decision === 'reject-all') break;
1339
- }
1340
- }
1341
- }
1342
- } catch { /* non-critical — FileDiff UI not always available */ }
1343
- }
1344
-
1345
- // GAP-18: auto-validate terraform files after write/edit tool calls
1346
- if (['write_file', 'edit_file', 'multi_edit'].includes(toolCall.function.name) && !result.isError) {
1347
- const gap18Input = JSON.parse(toolCall.function.arguments) as { path?: string; file_path?: string };
1348
- const gap18FilePath = gap18Input.path ?? gap18Input.file_path ?? '';
1349
- if (gap18FilePath.endsWith('.tf')) {
1350
- try {
1351
- // Use async exec to avoid blocking the event loop (up to 10s for terraform validate)
1352
- const { stdout: validateOut } = await _execAsync('terraform validate -json 2>/dev/null', {
1353
- cwd: options.cwd ?? process.cwd(),
1354
- encoding: 'utf-8',
1355
- timeout: 10_000,
1356
- });
1357
- const parsed = JSON.parse(validateOut) as { valid: boolean; diagnostics?: Array<{ severity: string; summary: string; detail: string }> };
1358
- if (!parsed.valid && parsed.diagnostics && parsed.diagnostics.length > 0) {
1359
- const errors = parsed.diagnostics
1360
- .filter(d => d.severity === 'error')
1361
- .map(d => ` ${d.summary}: ${d.detail}`)
1362
- .join('\n');
1363
- toolContent += `\n\nTerraform validation errors (please fix):\n${errors}`;
1364
- }
1365
- } catch { /* terraform not available or not in tf project — ignore */ }
1366
- }
1367
- }
1368
-
1369
- // Truncate excessively large tool outputs to prevent context overflow
1370
- if (toolContent.length > MAX_TOOL_OUTPUT_CHARS) {
1371
- let head: string;
1372
- let tail: string;
1373
- let omitted: number;
1374
- const lines = toolContent.split('\n');
1375
-
1376
- // C3: Smart truncation for terraform plan — preserve all diff lines
1377
- const isTerraformPlan = toolCall.function.name === 'terraform' && (() => {
1378
- try {
1379
- const tfArgs = JSON.parse(toolCall.function.arguments) as Record<string, unknown>;
1380
- return tfArgs.action === 'plan';
1381
- } catch { return false; }
1382
- })();
1383
-
1384
- if (isTerraformPlan) {
1385
- // Keep all diff lines (create/update/destroy/replace) and the plan summary
1386
- const diffLines: string[] = [];
1387
- const contextLines: string[] = [];
1388
- for (const line of lines) {
1389
- const trimmed = line.trimStart();
1390
- const isDiffLine = trimmed.startsWith('+') || trimmed.startsWith('-') ||
1391
- trimmed.startsWith('~') || trimmed.startsWith('!') ||
1392
- line.includes('will be created') || line.includes('will be destroyed') ||
1393
- line.includes('will be updated') || line.includes('will be replaced') ||
1394
- line.includes('Plan:') || line.includes('No changes') ||
1395
- line.includes('Error:') || line.includes('Warning:');
1396
- if (isDiffLine) {
1397
- diffLines.push(line);
1398
- } else {
1399
- contextLines.push(line);
1400
- }
1401
- }
1402
- // Allow up to 500 diff lines + first 50 context lines
1403
- const keptDiff = diffLines.slice(0, 500);
1404
- const keptCtx = contextLines.slice(0, 50);
1405
- omitted = Math.max(0, lines.length - keptDiff.length - keptCtx.length);
1406
- head = [...keptCtx, ...keptDiff].join('\n');
1407
- tail = '';
1408
- } else {
1409
- const headLines = 100, tailLines = 20;
1410
- head = lines.slice(0, headLines).join('\n');
1411
- tail = lines.slice(-tailLines).join('\n');
1412
- omitted = Math.max(0, lines.length - headLines - tailLines);
1413
- }
1414
-
1415
- // Save full output to disk for reference
1416
- try {
1417
- const { mkdirSync: _mkdirSync, writeFileSync: _writeFileSync } = await import('node:fs');
1418
- const { homedir: _homedir } = await import('node:os');
1419
- const outDir = join(_homedir(), '.nimbus', 'tool-outputs');
1420
- _mkdirSync(outDir, { recursive: true });
1421
- const outFile = join(outDir, `${Date.now()}-${toolCall.function.name}.log`);
1422
- _writeFileSync(outFile, toolContent, 'utf-8');
1423
- toolContent = omitted > 0
1424
- ? `${head}${tail ? '\n\n... [' + omitted + ' lines omitted — full output saved to ' + outFile + '] ...\n\n' + tail : '\n\n... [full output saved to ' + outFile + ']'}`
1425
- : `${head}${tail ? '\n\n' + tail : ''}`;
1426
- } catch {
1427
- toolContent = omitted > 0
1428
- ? `${head}${tail ? '\n\n... [' + omitted + ' lines omitted — output too large for context] ...\n\n' + tail : '\n\n... [' + omitted + ' lines omitted]'}`
1429
- : `${head}${tail ? '\n\n' + tail : ''}`;
1430
- }
1431
- }
1432
-
1433
- messages.push({
1434
- role: 'tool',
1435
- toolCallId: toolCall.id,
1436
- name: toolCall.function.name,
1437
- content: toolContent,
1438
- });
1439
- }
1440
-
1441
- // If we broke out of the tool-call loop due to cancellation, exit
1442
- // the main loop as well.
1443
- if (interrupted) {
1444
- break;
1445
- }
1446
-
1447
- // -----------------------------------------------------------------
1448
- // Auto-compact check
1449
- // -----------------------------------------------------------------
1450
- // After tool results are appended, check whether the conversation
1451
- // has grown past the context window threshold. If so, summarize
1452
- // older messages to free up space for future turns.
1453
- if (options.contextManager) {
1454
- const toolTokens = llmTools.reduce(
1455
- (sum, t) => sum + Math.ceil(JSON.stringify(t).length / 4),
1456
- 0
1457
- );
1458
- if (options.contextManager.shouldCompact(systemPrompt, messages, toolTokens)) {
1459
- try {
1460
- const compactResult = await runCompaction(messages, options.contextManager, {
1461
- router,
1462
- ...(options.infraContext ? { infraContext: options.infraContext } : {}),
1463
- });
1464
- // Replace messages with the compacted version
1465
- messages.length = 0;
1466
- messages.push(...compactResult.messages);
1467
- // Clear the token cache after compaction — old message entries are no longer valid
1468
- options.contextManager.clearTokenCache();
1469
- if (options.onCompact) {
1470
- options.onCompact(compactResult.result);
1471
- }
1472
- } catch (compactErr) {
1473
- // Compaction failed — notify user visibly and continue with original messages
1474
- const compactErrMsg =
1475
- compactErr instanceof Error ? compactErr.message : String(compactErr);
1476
- if (onText) {
1477
- onText(
1478
- `\n[Warning: Auto-compaction failed: ${compactErrMsg}. Context may exceed budget on the next turn.]\n`
1479
- );
1480
- }
1481
- }
1482
- }
1483
- }
1484
- } catch (error: unknown) {
1485
- // LLM API error — report to the caller and break
1486
- const msg = error instanceof Error ? error.message : String(error);
1487
- // G24: Network errors already printed via onText above — skip duplicate output
1488
- const isNetworkErr = (error instanceof Error) && (error as Error & { _nimbusNetworkError?: boolean })._nimbusNetworkError;
1489
- if (!isNetworkErr && onText) {
1490
- onText(`\n[Error: ${msg}]\n`);
1491
- }
1492
- messages.push({
1493
- role: 'assistant',
1494
- content: isNetworkErr ? msg : `I encountered an error: ${msg}`,
1495
- });
1496
- break;
1497
- }
1498
- }
1499
-
1500
- // -----------------------------------------------------------------------
1501
- // 4. Post-loop bookkeeping
1502
- // -----------------------------------------------------------------------
1503
-
1504
- if (turns >= maxTurns && !interrupted) {
1505
- if (onText) {
1506
- onText(`\n[Agent reached maximum turns limit (${maxTurns}). Stopping.]\n`);
1507
- }
1508
- }
1509
-
1510
- // GAP-19: Session summary after multi-step deploy
1511
- if (options.mode === 'deploy' && options.onText) {
1512
- // Collect tool calls from messages
1513
- const allToolCalls: Array<{ name: string; input: Record<string, unknown> }> = [];
1514
- for (const msg of messages) {
1515
- if (msg.role === 'assistant' && Array.isArray((msg as {toolCalls?: unknown[]}).toolCalls)) {
1516
- for (const tc of (msg as {toolCalls: Array<{function: {name: string; arguments: string}}>}).toolCalls) {
1517
- try {
1518
- allToolCalls.push({ name: tc.function.name, input: JSON.parse(tc.function.arguments) as Record<string, unknown> });
1519
- } catch { /* ignore */ }
1520
- }
1521
- }
1522
- }
1523
- if (allToolCalls.length > 3) {
1524
- const terraform = allToolCalls.filter(c => c.name === 'terraform');
1525
- const kubectl = allToolCalls.filter(c => c.name === 'kubectl');
1526
- const helm = allToolCalls.filter(c => c.name === 'helm');
1527
- const summaryLines: string[] = ['---', '**Session Summary**'];
1528
- if (terraform.length) summaryLines.push(`• Terraform: ${terraform.map(c => String(c.input.action ?? '')).join(', ')}`);
1529
- if (kubectl.length) summaryLines.push(`• Kubectl: ${kubectl.map(c => String(c.input.action ?? '')).join(', ')}`);
1530
- if (helm.length) summaryLines.push(`• Helm: ${helm.map(c => String(c.input.action ?? '')).join(', ')}`);
1531
- if (summaryLines.length > 2) {
1532
- options.onText('\n\n' + summaryLines.join('\n'));
1533
- }
1534
- }
1535
- }
1536
-
1537
- return {
1538
- messages,
1539
- turns,
1540
- interrupted,
1541
- usage: totalUsage,
1542
- totalCost,
1543
- };
1544
- }
1545
-
1546
- // ---------------------------------------------------------------------------
1547
- // Tool Execution
1548
- // ---------------------------------------------------------------------------
1549
-
1550
- /** Tools that modify files and should trigger LSP diagnostics. */
1551
- const FILE_EDITING_TOOLS = new Set(['edit_file', 'multi_edit', 'write_file']);
1552
-
1553
- /** Tools that mutate files and may require a pre-approval diff. */
1554
- const FILE_MUTATING_TOOLS = new Set(['edit_file', 'multi_edit', 'write_file']);
1555
-
1556
- /**
1557
- * Generate a simple unified diff between two strings.
1558
- * Suitable for display; uses a greedy line-by-line approach.
1559
- */
1560
- function generateUnifiedDiff(filename: string, before: string, after: string): string {
1561
- const beforeLines = before.split('\n');
1562
- const afterLines = after.split('\n');
1563
- const lines: string[] = [`--- a/${filename}`, `+++ b/${filename}`];
1564
- let i = 0;
1565
- let j = 0;
1566
- while (i < beforeLines.length || j < afterLines.length) {
1567
- if (beforeLines[i] === afterLines[j]) {
1568
- i++;
1569
- j++;
1570
- continue;
1571
- }
1572
- const hunkBefore: string[] = [];
1573
- const hunkAfter: string[] = [];
1574
- const start = i;
1575
- while (i < beforeLines.length && beforeLines[i] !== afterLines[j]) {
1576
- hunkBefore.push(beforeLines[i++]);
1577
- }
1578
- while (
1579
- j < afterLines.length &&
1580
- (i >= beforeLines.length || beforeLines[i] !== afterLines[j])
1581
- ) {
1582
- hunkAfter.push(afterLines[j++]);
1583
- }
1584
- lines.push(
1585
- `@@ -${start + 1},${hunkBefore.length} +${start + 1},${hunkAfter.length} @@`
1586
- );
1587
- hunkBefore.forEach(l => lines.push(`-${l}`));
1588
- hunkAfter.forEach(l => lines.push(`+${l}`));
1589
- }
1590
- return lines.join('\n');
1591
- }
1592
-
1593
- /**
1594
- * Compute a proposed diff for a file-mutating tool call without writing to disk.
1595
- * Returns the unified diff string, or null if it cannot be computed.
1596
- */
1597
- async function computeProposedDiff(
1598
- toolName: string,
1599
- args: Record<string, unknown>
1600
- ): Promise<string | null> {
1601
- try {
1602
- const { readFile } = await import('node:fs/promises');
1603
- const path = args.path as string;
1604
- if (!path) return null;
1605
- const currentContent = await readFile(path, 'utf-8').catch(() => '');
1606
- let proposed = currentContent;
1607
- if (toolName === 'edit_file') {
1608
- proposed = currentContent.replace(args.old_string as string, args.new_string as string);
1609
- } else if (toolName === 'multi_edit') {
1610
- const edits = args.edits as Array<{ old_string: string; new_string: string }>;
1611
- if (Array.isArray(edits)) {
1612
- for (const e of edits) {
1613
- proposed = proposed.replace(e.old_string, e.new_string);
1614
- }
1615
- }
1616
- } else if (toolName === 'write_file') {
1617
- proposed = args.content as string;
1618
- }
1619
- if (proposed === currentContent) return null; // no change
1620
- return generateUnifiedDiff(path, currentContent, proposed);
1621
- } catch {
1622
- return null;
1623
- }
1624
- }
1625
-
1626
- /**
1627
- * Extract the file path from a tool call's parsed arguments.
1628
- *
1629
- * File-editing tools all have a `path` parameter that identifies
1630
- * the target file. Returns `null` for non-file tools.
1631
- */
1632
- function extractFilePath(toolName: string, input: unknown): string | null {
1633
- if (!FILE_EDITING_TOOLS.has(toolName)) {
1634
- return null;
1635
- }
1636
- if (input && typeof input === 'object' && 'path' in input) {
1637
- return (input as { path: string }).path;
1638
- }
1639
- return null;
1640
- }
1641
-
1642
- /**
1643
- * Execute a single tool call.
1644
- *
1645
- * Handles:
1646
- * - Looking up the tool in the registry.
1647
- * - Parsing the JSON arguments string from the LLM response.
1648
- * - Validating input against the Zod schema.
1649
- * - Checking permissions via the caller-supplied callback.
1650
- * - Invoking the tool and returning the result.
1651
- * - Notifying start/end callbacks.
1652
- * - Querying the LSP for diagnostics after file edits.
1653
- *
1654
- * @param toolCall - The raw tool call from the LLM response.
1655
- * @param registry - The tool registry to look up the tool definition.
1656
- * @param onStart - Optional callback fired before execution.
1657
- * @param onEnd - Optional callback fired after execution (or error).
1658
- * @param checkPermission - Optional permission gate.
1659
- * @param lspManager - Optional LSP manager for post-edit diagnostics.
1660
- * @returns The tool result (always succeeds; errors are captured inside the result).
1661
- */
1662
- async function executeToolCall(
1663
- toolCall: ToolCall,
1664
- registry: ToolRegistry,
1665
- onStart?: (info: ToolCallInfo) => void,
1666
- onEnd?: (info: ToolCallInfo, result: ToolResult) => void,
1667
- checkPermission?: (tool: ToolDefinition, input: unknown) => Promise<PermissionDecision>,
1668
- lspManager?: LSPManager,
1669
- snapshotManager?: SnapshotManager,
1670
- sessionId?: string,
1671
- signal?: AbortSignal,
1672
- hookEngine?: HookEngine,
1673
- mode?: AgentMode,
1674
- requestFileDiff?: (path: string, toolName: string, diff: string) => Promise<FileDiffDecision>,
1675
- skipRemainingDiffPrompts?: { value: boolean },
1676
- rejectRemainingDiffPrompts?: { value: boolean },
1677
- onChunk?: (chunk: string) => void,
1678
- toolTimeouts?: Record<string, number>,
1679
- infraContext?: import('../sessions/manager').SessionInfraContext
1680
- ): Promise<ToolResult> {
1681
- const toolName = toolCall.function.name;
1682
-
1683
- // Parse the JSON arguments string from the LLM
1684
- let parsedArgs: unknown;
1685
- try {
1686
- parsedArgs = JSON.parse(toolCall.function.arguments);
1687
- } catch {
1688
- const result: ToolResult = {
1689
- output: '',
1690
- error: `Tool '${toolName}' received malformed JSON arguments — please retry the tool call with valid JSON. Received: ${toolCall.function.arguments.slice(0, 200)}`,
1691
- isError: true,
1692
- };
1693
- return result;
1694
- }
1695
-
1696
- const callInfo: ToolCallInfo = {
1697
- id: toolCall.id,
1698
- name: toolName,
1699
- input: parsedArgs,
1700
- startTime: Date.now(),
1701
- };
1702
-
1703
- // Look up the tool definition
1704
- const tool = registry.get(toolName);
1705
- if (!tool) {
1706
- const result: ToolResult = {
1707
- output: '',
1708
- error: `Unknown tool: ${toolName}`,
1709
- isError: true,
1710
- };
1711
- if (onEnd) {
1712
- onEnd(callInfo, result);
1713
- }
1714
- return result;
1715
- }
1716
-
1717
- // Notify start
1718
- if (onStart) {
1719
- onStart(callInfo);
1720
- }
1721
-
1722
- // Build shared hook context for PreToolUse and PostToolUse
1723
- const hookContext: HookContext = {
1724
- tool: toolName,
1725
- input: parsedArgs && typeof parsedArgs === 'object' ? (parsedArgs as Record<string, unknown>) : {},
1726
- sessionId: sessionId ?? 'default',
1727
- agent: mode ?? 'build',
1728
- timestamp: new Date().toISOString(),
1729
- };
1730
-
1731
- // PreToolUse hooks — may block the tool call
1732
- if (hookEngine) {
1733
- const preResult = await runPreToolHooks(hookEngine, hookContext);
1734
- if (!preResult.allowed) {
1735
- const result: ToolResult = {
1736
- output: '',
1737
- error: `Tool '${toolName}' blocked by hook: ${preResult.message ?? 'no reason given'}`,
1738
- isError: true,
1739
- };
1740
- if (onEnd) {
1741
- onEnd(callInfo, result);
1742
- }
1743
- return result;
1744
- }
1745
- }
1746
-
1747
- // Permission check
1748
- if (checkPermission) {
1749
- const decision = await checkPermission(tool, parsedArgs);
1750
- if (decision === 'deny' || decision === 'block') {
1751
- const result: ToolResult = {
1752
- output: '',
1753
- error:
1754
- decision === 'block'
1755
- ? `Tool '${toolName}' is blocked by permission policy.`
1756
- : `User denied permission for tool '${toolName}'.`,
1757
- isError: true,
1758
- };
1759
- if (onEnd) {
1760
- onEnd(callInfo, result);
1761
- }
1762
- return result;
1763
- }
1764
- }
1765
-
1766
- // B1: Pre-approval diff — show proposed change before writing files
1767
- if (
1768
- FILE_MUTATING_TOOLS.has(toolName) &&
1769
- requestFileDiff &&
1770
- !(skipRemainingDiffPrompts?.value)
1771
- ) {
1772
- // Auto-reject if 'reject-all' was previously chosen
1773
- if (rejectRemainingDiffPrompts?.value) {
1774
- const rejResult: ToolResult = {
1775
- output: 'User rejected this change (reject-all).',
1776
- error: undefined,
1777
- isError: false,
1778
- };
1779
- if (onEnd) onEnd(callInfo, rejResult);
1780
- return rejResult;
1781
- }
1782
-
1783
- const diff = await computeProposedDiff(toolName, parsedArgs as Record<string, unknown>);
1784
- if (diff) {
1785
- const targetPath =
1786
- (parsedArgs as Record<string, unknown>).path as string | undefined ?? '(file)';
1787
- const decision = await requestFileDiff(targetPath, toolName, diff);
1788
- if (decision === 'reject') {
1789
- const rejResult: ToolResult = {
1790
- output: 'User rejected this change.',
1791
- error: undefined,
1792
- isError: false,
1793
- };
1794
- if (onEnd) onEnd(callInfo, rejResult);
1795
- return rejResult;
1796
- }
1797
- if (decision === 'reject-all') {
1798
- if (rejectRemainingDiffPrompts) {
1799
- rejectRemainingDiffPrompts.value = true;
1800
- }
1801
- const rejResult: ToolResult = {
1802
- output: 'User rejected this change (reject-all).',
1803
- error: undefined,
1804
- isError: false,
1805
- };
1806
- if (onEnd) onEnd(callInfo, rejResult);
1807
- return rejResult;
1808
- }
1809
- if (decision === 'apply-all' && skipRemainingDiffPrompts) {
1810
- skipRemainingDiffPrompts.value = true;
1811
- }
1812
- }
1813
- }
1814
-
1815
- // Capture snapshot before file-modifying tools for undo/redo support
1816
- if (
1817
- snapshotManager &&
1818
- SnapshotManager.shouldSnapshot(toolName, parsedArgs as Record<string, unknown>)
1819
- ) {
1820
- try {
1821
- await snapshotManager.captureSnapshot({
1822
- sessionId: sessionId || 'default',
1823
- messageId: toolCall.id,
1824
- toolCallId: toolCall.id,
1825
- description: `${toolName}: ${extractFilePath(toolName, parsedArgs) || '(bash command)'}`,
1826
- });
1827
- } catch {
1828
- // Snapshot failure should never block the tool call
1829
- }
1830
- }
1831
-
1832
- // Validate input against the tool's Zod schema and execute
1833
- let result: ToolResult;
1834
- try {
1835
- const validatedInput = tool.inputSchema.parse(parsedArgs);
1836
-
1837
- // Thread AbortSignal into bash tool for Ctrl+C child process killing
1838
- if (signal && toolName === 'bash' && validatedInput && typeof validatedInput === 'object') {
1839
- (validatedInput as Record<string, unknown>)._signal = signal;
1840
- }
1841
-
1842
- // GAP-20: Build tool execute context, including per-tool timeout from toolTimeouts map
1843
- // C2: Also pass infraContext from session so tools can use it as fallback
1844
- const toolCtx: ToolExecuteContext | undefined = onChunk || toolTimeouts?.[toolName] || infraContext
1845
- ? {
1846
- ...(onChunk ? { onProgress: onChunk } : {}),
1847
- ...(toolTimeouts?.[toolName] !== undefined ? { timeout: toolTimeouts[toolName] } : {}),
1848
- ...(infraContext ? { infraContext } : {}),
1849
- }
1850
- : undefined;
1851
- // C2: Write infra checkpoint before mutating terraform/helm operations
1852
- if (toolName === 'terraform' || toolName === 'helm') {
1853
- const _cpArgs = parsedArgs && typeof parsedArgs === 'object'
1854
- ? (parsedArgs as Record<string, unknown>)
1855
- : {};
1856
- const _cpAction = String(_cpArgs.action ?? '');
1857
- const _cpNeedCheckpoint =
1858
- (toolName === 'terraform' && _cpAction === 'apply') ||
1859
- (toolName === 'helm' && ['install', 'upgrade', 'rollback'].includes(_cpAction));
1860
- if (_cpNeedCheckpoint) {
1861
- writeInfraCheckpoint(toolName, _cpAction, _cpArgs);
1862
- }
1863
- }
1864
- result = await tool.execute(validatedInput, toolCtx);
1865
- } catch (error: unknown) {
1866
- result = {
1867
- output: '',
1868
- error: formatToolInputError(toolName, error),
1869
- isError: true,
1870
- };
1871
- }
1872
-
1873
- // -----------------------------------------------------------------------
1874
- // LSP diagnostics injection
1875
- // -----------------------------------------------------------------------
1876
- // After a successful file edit, notify the language server and collect
1877
- // any diagnostics (type errors, lint issues). If errors exist they are
1878
- // appended to the tool output so the LLM sees them on its next turn
1879
- // and can self-correct.
1880
- if (lspManager && !result.isError) {
1881
- const filePath = extractFilePath(toolName, parsedArgs);
1882
- if (filePath) {
1883
- try {
1884
- await lspManager.touchFile(filePath);
1885
- const diagnostics = await lspManager.getDiagnostics(filePath);
1886
- if (diagnostics.length > 0) {
1887
- const formatted = lspManager.formatDiagnosticsForAgent(diagnostics);
1888
- if (formatted) {
1889
- result = {
1890
- ...result,
1891
- output: result.output ? `${result.output}\n\n${formatted}` : formatted,
1892
- };
1893
- }
1894
- }
1895
- } catch (lspErr) {
1896
- // LSP errors should never block the agent loop.
1897
- // Append a note to the tool result so the LLM (and user) can see it.
1898
- const lspErrMsg = lspErr instanceof Error ? lspErr.message : String(lspErr);
1899
- result = {
1900
- ...result,
1901
- output: result.output
1902
- ? `${result.output}\n\n[Note: LSP diagnostics unavailable: ${lspErrMsg}]`
1903
- : `[Note: LSP diagnostics unavailable: ${lspErrMsg}]`,
1904
- };
1905
- }
1906
- }
1907
- }
1908
-
1909
- // Gap 12: Mask secrets in tool output before forwarding to callbacks/history
1910
- if (!result.isError && result.output) {
1911
- result = { ...result, output: maskSecrets(result.output) };
1912
- }
1913
-
1914
- // PostToolUse hooks — fire-and-forget (audit, auto-format, etc.)
1915
- if (hookEngine) {
1916
- await runPostToolHooks(hookEngine, {
1917
- ...hookContext,
1918
- result: {
1919
- output: result.isError ? (result.error ?? '') : result.output,
1920
- isError: result.isError,
1921
- },
1922
- });
1923
- }
1924
-
1925
- // Notify end
1926
- if (onEnd) {
1927
- onEnd(callInfo, result);
1928
- }
1929
-
1930
- return result;
1931
- }
1932
-
1933
- // ---------------------------------------------------------------------------
1934
- // Mode-Based Tool Filtering
1935
- // ---------------------------------------------------------------------------
1936
-
1937
- /**
1938
- * Set of tool names allowed in `plan` mode.
1939
- *
1940
- * Plan mode is strictly read-only: the agent can inspect files, search
1941
- * the codebase, read tasks, estimate costs, and detect drift -- but it
1942
- * cannot write files, run commands, or mutate infrastructure.
1943
- */
1944
- const PLAN_MODE_TOOLS = new Set([
1945
- 'read_file',
1946
- 'glob',
1947
- 'grep',
1948
- 'list_dir',
1949
- 'webfetch',
1950
- 'todo_read',
1951
- 'todo_write',
1952
- 'task',
1953
- 'cost_estimate',
1954
- 'drift_detect',
1955
- 'cloud_discover',
1956
- ]);
1957
-
1958
- /**
1959
- * Set of tool names blocked in `build` mode.
1960
- *
1961
- * Build mode allows reads and writes (file edits, code generation) but
1962
- * blocks infrastructure-mutating operations that could affect live
1963
- * environments. The permission engine provides fine-grained control on
1964
- * top of this coarse filter.
1965
- */
1966
- const BUILD_MODE_BLOCKED_TOOLS = new Set(['terraform', 'kubectl', 'helm']);
1967
-
1968
- /**
1969
- * Filter tools based on the current agent mode.
1970
- *
1971
- * - **plan**: Only read-only tools + cost/drift analysis.
1972
- * - **build**: All tools except infrastructure mutation commands.
1973
- * - **deploy**: All tools are available.
1974
- *
1975
- * @param allTools - Every tool registered in the system.
1976
- * @param mode - The active agent mode.
1977
- * @returns The subset of tools available in the given mode.
1978
- */
1979
- export function getToolsForMode(allTools: ToolDefinition[], mode: AgentMode): ToolDefinition[] {
1980
- switch (mode) {
1981
- case 'plan':
1982
- return allTools.filter(t => PLAN_MODE_TOOLS.has(t.name));
1983
-
1984
- case 'build':
1985
- return allTools.filter(t => !BUILD_MODE_BLOCKED_TOOLS.has(t.name));
1986
-
1987
- case 'deploy':
1988
- // All tools available
1989
- return allTools;
1990
-
1991
- default: {
1992
- // Exhaustive check -- if a new mode is added this becomes a compile
1993
- // error (assuming AgentMode is a union type).
1994
- const _exhaustive: never = mode;
1995
- return allTools;
1996
- }
1997
- }
1998
- }