@build-astron-co/nimbus 0.4.2 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (430) hide show
  1. package/dist/src/agent/compaction-agent.js +24 -12
  2. package/dist/src/agent/context-manager.js +2 -1
  3. package/dist/src/agent/expand-files.js +2 -1
  4. package/dist/src/agent/loop.js +71 -33
  5. package/dist/src/agent/permissions.js +4 -2
  6. package/dist/src/agent/system-prompt.js +34 -17
  7. package/dist/src/app.js +1 -1
  8. package/dist/src/auth/keychain.js +8 -4
  9. package/dist/src/auth/store.js +70 -107
  10. package/dist/src/cli/init.js +35 -19
  11. package/dist/src/cli/run.js +18 -10
  12. package/dist/src/cli/serve.js +4 -2
  13. package/dist/src/cli.js +52 -11
  14. package/dist/src/commands/alias.js +5 -3
  15. package/dist/src/commands/audit/index.js +2 -1
  16. package/dist/src/commands/aws-terraform.js +36 -18
  17. package/dist/src/commands/completions.js +1 -1
  18. package/dist/src/commands/config.js +3 -2
  19. package/dist/src/commands/connect-github.js +92 -0
  20. package/dist/src/commands/cost/index.js +3 -2
  21. package/dist/src/commands/deploy.js +15 -10
  22. package/dist/src/commands/doctor.js +6 -3
  23. package/dist/src/commands/drift/index.js +2 -1
  24. package/dist/src/commands/export.js +5 -3
  25. package/dist/src/commands/generate-terraform.js +110 -2
  26. package/dist/src/commands/import.js +3 -3
  27. package/dist/src/commands/incident.js +10 -5
  28. package/dist/src/commands/login.js +8 -93
  29. package/dist/src/commands/logs.js +16 -8
  30. package/dist/src/commands/onboarding.js +6 -4
  31. package/dist/src/commands/pipeline.js +6 -3
  32. package/dist/src/commands/plugin.js +3 -2
  33. package/dist/src/commands/profile.js +27 -14
  34. package/dist/src/commands/questionnaire.js +1 -1
  35. package/dist/src/commands/rollback.js +3 -2
  36. package/dist/src/commands/rollout.js +5 -3
  37. package/dist/src/commands/runbook.js +17 -10
  38. package/dist/src/commands/schedule.js +10 -5
  39. package/dist/src/commands/status.js +2 -1
  40. package/dist/src/commands/team-context.js +12 -7
  41. package/dist/src/commands/template.js +1 -1
  42. package/dist/src/commands/tf/index.js +6 -3
  43. package/dist/src/commands/version.js +6 -3
  44. package/dist/src/commands/watch.js +6 -3
  45. package/dist/src/compat/sqlite.js +5 -3
  46. package/dist/src/config/mode-store.js +2 -1
  47. package/dist/src/config/profiles.js +4 -2
  48. package/dist/src/config/types.js +2 -1
  49. package/dist/src/engine/executor.js +8 -4
  50. package/dist/src/engine/planner.js +9 -5
  51. package/dist/src/llm/providers/anthropic.js +6 -3
  52. package/dist/src/llm/providers/ollama.js +1 -1
  53. package/dist/src/llm/router.js +22 -7
  54. package/dist/src/sessions/manager.js +6 -3
  55. package/dist/src/sharing/viewer.js +2 -1
  56. package/dist/src/tools/file-ops.js +1 -2
  57. package/dist/src/tools/schemas/devops.js +197 -108
  58. package/dist/src/tools/schemas/standard.js +1 -1
  59. package/dist/src/ui/App.js +25 -13
  60. package/dist/src/ui/FileDiffModal.js +22 -11
  61. package/dist/src/ui/HelpModal.js +2 -1
  62. package/dist/src/ui/InputBox.js +6 -3
  63. package/dist/src/ui/MessageList.js +40 -20
  64. package/dist/src/ui/TerminalPane.js +2 -1
  65. package/dist/src/ui/ToolCallDisplay.js +12 -6
  66. package/dist/src/ui/TreePane.js +2 -1
  67. package/dist/src/ui/ink/index.js +37 -21
  68. package/dist/src/watcher/index.js +8 -4
  69. package/package.json +3 -5
  70. package/src/__tests__/alias.test.ts +0 -133
  71. package/src/__tests__/app.test.ts +0 -76
  72. package/src/__tests__/audit.test.ts +0 -877
  73. package/src/__tests__/circuit-breaker.test.ts +0 -116
  74. package/src/__tests__/cli-run.test.ts +0 -351
  75. package/src/__tests__/compat-sqlite.test.ts +0 -68
  76. package/src/__tests__/context-manager.test.ts +0 -632
  77. package/src/__tests__/context.test.ts +0 -242
  78. package/src/__tests__/devops-terminal-gaps.test.ts +0 -718
  79. package/src/__tests__/doctor.test.ts +0 -48
  80. package/src/__tests__/enterprise.test.ts +0 -401
  81. package/src/__tests__/export.test.ts +0 -236
  82. package/src/__tests__/gap-11-18-20.test.ts +0 -958
  83. package/src/__tests__/generator.test.ts +0 -433
  84. package/src/__tests__/helm-streaming.test.ts +0 -127
  85. package/src/__tests__/hooks.test.ts +0 -582
  86. package/src/__tests__/incident.test.ts +0 -179
  87. package/src/__tests__/init.test.ts +0 -487
  88. package/src/__tests__/intent-parser.test.ts +0 -229
  89. package/src/__tests__/llm-router.test.ts +0 -209
  90. package/src/__tests__/logs.test.ts +0 -107
  91. package/src/__tests__/loop-errors.test.ts +0 -244
  92. package/src/__tests__/lsp.test.ts +0 -293
  93. package/src/__tests__/modes.test.ts +0 -336
  94. package/src/__tests__/perf-optimizations.test.ts +0 -847
  95. package/src/__tests__/permissions.test.ts +0 -338
  96. package/src/__tests__/pipeline.test.ts +0 -50
  97. package/src/__tests__/polish-phase3.test.ts +0 -340
  98. package/src/__tests__/profile.test.ts +0 -237
  99. package/src/__tests__/rollback.test.ts +0 -83
  100. package/src/__tests__/runbook.test.ts +0 -219
  101. package/src/__tests__/schedule.test.ts +0 -206
  102. package/src/__tests__/serve.test.ts +0 -275
  103. package/src/__tests__/sessions.test.ts +0 -322
  104. package/src/__tests__/sharing.test.ts +0 -340
  105. package/src/__tests__/snapshots.test.ts +0 -581
  106. package/src/__tests__/standalone-migration.test.ts +0 -199
  107. package/src/__tests__/state-db.test.ts +0 -334
  108. package/src/__tests__/status.test.ts +0 -158
  109. package/src/__tests__/stream-with-tools.test.ts +0 -778
  110. package/src/__tests__/subagents.test.ts +0 -176
  111. package/src/__tests__/system-prompt.test.ts +0 -248
  112. package/src/__tests__/terminal-gap-v2.test.ts +0 -395
  113. package/src/__tests__/terminal-parity.test.ts +0 -393
  114. package/src/__tests__/tf-apply.test.ts +0 -187
  115. package/src/__tests__/tool-converter.test.ts +0 -256
  116. package/src/__tests__/tool-schemas.test.ts +0 -602
  117. package/src/__tests__/tools.test.ts +0 -144
  118. package/src/__tests__/version-json.test.ts +0 -184
  119. package/src/__tests__/version.test.ts +0 -49
  120. package/src/__tests__/watch.test.ts +0 -129
  121. package/src/agent/compaction-agent.ts +0 -266
  122. package/src/agent/context-manager.ts +0 -499
  123. package/src/agent/context.ts +0 -427
  124. package/src/agent/deploy-preview.ts +0 -487
  125. package/src/agent/expand-files.ts +0 -108
  126. package/src/agent/index.ts +0 -68
  127. package/src/agent/loop.ts +0 -1998
  128. package/src/agent/modes.ts +0 -429
  129. package/src/agent/permissions.ts +0 -513
  130. package/src/agent/subagents/base.ts +0 -116
  131. package/src/agent/subagents/cost.ts +0 -51
  132. package/src/agent/subagents/explore.ts +0 -42
  133. package/src/agent/subagents/general.ts +0 -54
  134. package/src/agent/subagents/index.ts +0 -102
  135. package/src/agent/subagents/infra.ts +0 -59
  136. package/src/agent/subagents/security.ts +0 -69
  137. package/src/agent/system-prompt.ts +0 -990
  138. package/src/app.ts +0 -180
  139. package/src/audit/activity-log.ts +0 -290
  140. package/src/audit/compliance-checker.ts +0 -540
  141. package/src/audit/cost-tracker.ts +0 -318
  142. package/src/audit/index.ts +0 -23
  143. package/src/audit/security-scanner.ts +0 -641
  144. package/src/auth/guard.ts +0 -75
  145. package/src/auth/index.ts +0 -56
  146. package/src/auth/keychain.ts +0 -82
  147. package/src/auth/oauth.ts +0 -465
  148. package/src/auth/providers.ts +0 -470
  149. package/src/auth/sso.ts +0 -113
  150. package/src/auth/store.ts +0 -505
  151. package/src/auth/types.ts +0 -187
  152. package/src/build.ts +0 -141
  153. package/src/cli/index.ts +0 -16
  154. package/src/cli/init.ts +0 -1227
  155. package/src/cli/openapi-spec.ts +0 -356
  156. package/src/cli/run.ts +0 -628
  157. package/src/cli/serve-auth.ts +0 -80
  158. package/src/cli/serve.ts +0 -539
  159. package/src/cli/web.ts +0 -71
  160. package/src/cli.ts +0 -1728
  161. package/src/clients/core-engine-client.ts +0 -227
  162. package/src/clients/enterprise-client.ts +0 -334
  163. package/src/clients/generator-client.ts +0 -351
  164. package/src/clients/git-client.ts +0 -627
  165. package/src/clients/github-client.ts +0 -410
  166. package/src/clients/helm-client.ts +0 -504
  167. package/src/clients/index.ts +0 -80
  168. package/src/clients/k8s-client.ts +0 -497
  169. package/src/clients/llm-client.ts +0 -161
  170. package/src/clients/rest-client.ts +0 -130
  171. package/src/clients/service-discovery.ts +0 -38
  172. package/src/clients/terraform-client.ts +0 -482
  173. package/src/clients/tools-client.ts +0 -1843
  174. package/src/clients/ws-client.ts +0 -115
  175. package/src/commands/alias.ts +0 -100
  176. package/src/commands/analyze/index.ts +0 -352
  177. package/src/commands/apply/helm.ts +0 -473
  178. package/src/commands/apply/index.ts +0 -213
  179. package/src/commands/apply/k8s.ts +0 -454
  180. package/src/commands/apply/terraform.ts +0 -582
  181. package/src/commands/ask.ts +0 -167
  182. package/src/commands/audit/index.ts +0 -357
  183. package/src/commands/auth-cloud.ts +0 -407
  184. package/src/commands/auth-list.ts +0 -134
  185. package/src/commands/auth-profile.ts +0 -121
  186. package/src/commands/auth-refresh.ts +0 -187
  187. package/src/commands/auth-status.ts +0 -141
  188. package/src/commands/aws/ec2.ts +0 -501
  189. package/src/commands/aws/iam.ts +0 -397
  190. package/src/commands/aws/index.ts +0 -133
  191. package/src/commands/aws/lambda.ts +0 -396
  192. package/src/commands/aws/rds.ts +0 -439
  193. package/src/commands/aws/s3.ts +0 -439
  194. package/src/commands/aws/vpc.ts +0 -393
  195. package/src/commands/aws-discover.ts +0 -542
  196. package/src/commands/aws-terraform.ts +0 -755
  197. package/src/commands/azure/aks.ts +0 -376
  198. package/src/commands/azure/functions.ts +0 -253
  199. package/src/commands/azure/index.ts +0 -116
  200. package/src/commands/azure/storage.ts +0 -478
  201. package/src/commands/azure/vm.ts +0 -355
  202. package/src/commands/billing/index.ts +0 -256
  203. package/src/commands/chat.ts +0 -320
  204. package/src/commands/completions.ts +0 -268
  205. package/src/commands/config.ts +0 -372
  206. package/src/commands/cost/cloud-cost-estimator.ts +0 -266
  207. package/src/commands/cost/estimator.ts +0 -79
  208. package/src/commands/cost/index.ts +0 -810
  209. package/src/commands/cost/parsers/terraform.ts +0 -273
  210. package/src/commands/cost/parsers/types.ts +0 -25
  211. package/src/commands/cost/pricing/aws.ts +0 -544
  212. package/src/commands/cost/pricing/azure.ts +0 -499
  213. package/src/commands/cost/pricing/gcp.ts +0 -396
  214. package/src/commands/cost/pricing/index.ts +0 -40
  215. package/src/commands/demo.ts +0 -250
  216. package/src/commands/deploy.ts +0 -260
  217. package/src/commands/doctor.ts +0 -1386
  218. package/src/commands/drift/index.ts +0 -787
  219. package/src/commands/explain.ts +0 -277
  220. package/src/commands/export.ts +0 -146
  221. package/src/commands/feedback.ts +0 -389
  222. package/src/commands/fix.ts +0 -324
  223. package/src/commands/fs/index.ts +0 -402
  224. package/src/commands/gcp/compute.ts +0 -325
  225. package/src/commands/gcp/functions.ts +0 -271
  226. package/src/commands/gcp/gke.ts +0 -438
  227. package/src/commands/gcp/iam.ts +0 -344
  228. package/src/commands/gcp/index.ts +0 -129
  229. package/src/commands/gcp/storage.ts +0 -284
  230. package/src/commands/generate-helm.ts +0 -1249
  231. package/src/commands/generate-k8s.ts +0 -1508
  232. package/src/commands/generate-terraform.ts +0 -1202
  233. package/src/commands/gh/index.ts +0 -863
  234. package/src/commands/git/index.ts +0 -1343
  235. package/src/commands/helm/index.ts +0 -1126
  236. package/src/commands/help.ts +0 -715
  237. package/src/commands/history.ts +0 -149
  238. package/src/commands/import.ts +0 -868
  239. package/src/commands/incident.ts +0 -166
  240. package/src/commands/index.ts +0 -367
  241. package/src/commands/init.ts +0 -1051
  242. package/src/commands/k8s/index.ts +0 -1137
  243. package/src/commands/login.ts +0 -716
  244. package/src/commands/logout.ts +0 -83
  245. package/src/commands/logs.ts +0 -167
  246. package/src/commands/onboarding.ts +0 -405
  247. package/src/commands/pipeline.ts +0 -186
  248. package/src/commands/plan/display.ts +0 -279
  249. package/src/commands/plan/index.ts +0 -599
  250. package/src/commands/plugin.ts +0 -398
  251. package/src/commands/preview.ts +0 -452
  252. package/src/commands/profile.ts +0 -342
  253. package/src/commands/questionnaire.ts +0 -1172
  254. package/src/commands/resume.ts +0 -47
  255. package/src/commands/rollback.ts +0 -315
  256. package/src/commands/rollout.ts +0 -88
  257. package/src/commands/runbook.ts +0 -346
  258. package/src/commands/schedule.ts +0 -236
  259. package/src/commands/status.ts +0 -252
  260. package/src/commands/team/index.ts +0 -346
  261. package/src/commands/team-context.ts +0 -220
  262. package/src/commands/template.ts +0 -233
  263. package/src/commands/tf/index.ts +0 -1093
  264. package/src/commands/upgrade.ts +0 -609
  265. package/src/commands/usage/index.ts +0 -134
  266. package/src/commands/version.ts +0 -174
  267. package/src/commands/watch.ts +0 -153
  268. package/src/compat/index.ts +0 -2
  269. package/src/compat/runtime.ts +0 -12
  270. package/src/compat/sqlite.ts +0 -177
  271. package/src/config/index.ts +0 -17
  272. package/src/config/manager.ts +0 -530
  273. package/src/config/mode-store.ts +0 -62
  274. package/src/config/profiles.ts +0 -84
  275. package/src/config/safety-policy.ts +0 -358
  276. package/src/config/schema.ts +0 -125
  277. package/src/config/types.ts +0 -609
  278. package/src/config/workspace-state.ts +0 -53
  279. package/src/context/context-db.ts +0 -199
  280. package/src/demo/index.ts +0 -349
  281. package/src/demo/scenarios/full-journey.ts +0 -229
  282. package/src/demo/scenarios/getting-started.ts +0 -127
  283. package/src/demo/scenarios/helm-release.ts +0 -341
  284. package/src/demo/scenarios/k8s-deployment.ts +0 -194
  285. package/src/demo/scenarios/terraform-vpc.ts +0 -170
  286. package/src/demo/types.ts +0 -92
  287. package/src/engine/cost-estimator.ts +0 -480
  288. package/src/engine/diagram-generator.ts +0 -256
  289. package/src/engine/drift-detector.ts +0 -902
  290. package/src/engine/executor.ts +0 -1066
  291. package/src/engine/index.ts +0 -76
  292. package/src/engine/orchestrator.ts +0 -636
  293. package/src/engine/planner.ts +0 -787
  294. package/src/engine/safety.ts +0 -743
  295. package/src/engine/verifier.ts +0 -770
  296. package/src/enterprise/audit.ts +0 -348
  297. package/src/enterprise/auth.ts +0 -270
  298. package/src/enterprise/billing.ts +0 -822
  299. package/src/enterprise/index.ts +0 -17
  300. package/src/enterprise/teams.ts +0 -443
  301. package/src/generator/best-practices.ts +0 -1608
  302. package/src/generator/helm.ts +0 -630
  303. package/src/generator/index.ts +0 -37
  304. package/src/generator/intent-parser.ts +0 -514
  305. package/src/generator/kubernetes.ts +0 -976
  306. package/src/generator/terraform.ts +0 -1875
  307. package/src/history/index.ts +0 -8
  308. package/src/history/manager.ts +0 -250
  309. package/src/history/types.ts +0 -34
  310. package/src/hooks/config.ts +0 -432
  311. package/src/hooks/engine.ts +0 -392
  312. package/src/hooks/index.ts +0 -4
  313. package/src/llm/auth-bridge.ts +0 -198
  314. package/src/llm/circuit-breaker.ts +0 -140
  315. package/src/llm/config-loader.ts +0 -201
  316. package/src/llm/cost-calculator.ts +0 -171
  317. package/src/llm/index.ts +0 -8
  318. package/src/llm/model-aliases.ts +0 -115
  319. package/src/llm/provider-registry.ts +0 -63
  320. package/src/llm/providers/anthropic.ts +0 -462
  321. package/src/llm/providers/bedrock.ts +0 -477
  322. package/src/llm/providers/google.ts +0 -405
  323. package/src/llm/providers/ollama.ts +0 -767
  324. package/src/llm/providers/openai-compatible.ts +0 -340
  325. package/src/llm/providers/openai.ts +0 -328
  326. package/src/llm/providers/openrouter.ts +0 -338
  327. package/src/llm/router.ts +0 -1104
  328. package/src/llm/types.ts +0 -232
  329. package/src/lsp/client.ts +0 -298
  330. package/src/lsp/languages.ts +0 -119
  331. package/src/lsp/manager.ts +0 -294
  332. package/src/mcp/client.ts +0 -402
  333. package/src/mcp/index.ts +0 -5
  334. package/src/mcp/manager.ts +0 -133
  335. package/src/nimbus.ts +0 -234
  336. package/src/plugins/index.ts +0 -27
  337. package/src/plugins/loader.ts +0 -334
  338. package/src/plugins/manager.ts +0 -376
  339. package/src/plugins/types.ts +0 -284
  340. package/src/scanners/cicd-scanner.ts +0 -258
  341. package/src/scanners/cloud-scanner.ts +0 -466
  342. package/src/scanners/framework-scanner.ts +0 -469
  343. package/src/scanners/iac-scanner.ts +0 -388
  344. package/src/scanners/index.ts +0 -539
  345. package/src/scanners/language-scanner.ts +0 -276
  346. package/src/scanners/package-manager-scanner.ts +0 -277
  347. package/src/scanners/types.ts +0 -172
  348. package/src/sessions/manager.ts +0 -472
  349. package/src/sessions/types.ts +0 -44
  350. package/src/sharing/sync.ts +0 -300
  351. package/src/sharing/viewer.ts +0 -163
  352. package/src/snapshots/index.ts +0 -2
  353. package/src/snapshots/manager.ts +0 -530
  354. package/src/state/artifacts.ts +0 -147
  355. package/src/state/audit.ts +0 -137
  356. package/src/state/billing.ts +0 -240
  357. package/src/state/checkpoints.ts +0 -117
  358. package/src/state/config.ts +0 -67
  359. package/src/state/conversations.ts +0 -14
  360. package/src/state/credentials.ts +0 -154
  361. package/src/state/db.ts +0 -58
  362. package/src/state/index.ts +0 -26
  363. package/src/state/messages.ts +0 -115
  364. package/src/state/projects.ts +0 -123
  365. package/src/state/schema.ts +0 -236
  366. package/src/state/sessions.ts +0 -147
  367. package/src/state/teams.ts +0 -200
  368. package/src/telemetry.ts +0 -108
  369. package/src/tools/aws-ops.ts +0 -952
  370. package/src/tools/azure-ops.ts +0 -579
  371. package/src/tools/file-ops.ts +0 -615
  372. package/src/tools/gcp-ops.ts +0 -625
  373. package/src/tools/git-ops.ts +0 -773
  374. package/src/tools/github-ops.ts +0 -799
  375. package/src/tools/helm-ops.ts +0 -943
  376. package/src/tools/index.ts +0 -17
  377. package/src/tools/k8s-ops.ts +0 -819
  378. package/src/tools/schemas/converter.ts +0 -184
  379. package/src/tools/schemas/devops.ts +0 -3502
  380. package/src/tools/schemas/index.ts +0 -73
  381. package/src/tools/schemas/standard.ts +0 -1148
  382. package/src/tools/schemas/types.ts +0 -735
  383. package/src/tools/spawn-exec.ts +0 -148
  384. package/src/tools/terraform-ops.ts +0 -862
  385. package/src/types/ambient.d.ts +0 -193
  386. package/src/types/config.ts +0 -83
  387. package/src/types/drift.ts +0 -116
  388. package/src/types/enterprise.ts +0 -335
  389. package/src/types/index.ts +0 -20
  390. package/src/types/plan.ts +0 -44
  391. package/src/types/request.ts +0 -65
  392. package/src/types/response.ts +0 -54
  393. package/src/types/service.ts +0 -51
  394. package/src/ui/App.tsx +0 -2114
  395. package/src/ui/DeployPreview.tsx +0 -174
  396. package/src/ui/FileDiffModal.tsx +0 -162
  397. package/src/ui/Header.tsx +0 -131
  398. package/src/ui/HelpModal.tsx +0 -57
  399. package/src/ui/InputBox.tsx +0 -503
  400. package/src/ui/MessageList.tsx +0 -1032
  401. package/src/ui/PermissionPrompt.tsx +0 -163
  402. package/src/ui/StatusBar.tsx +0 -277
  403. package/src/ui/TerminalPane.tsx +0 -84
  404. package/src/ui/ToolCallDisplay.tsx +0 -643
  405. package/src/ui/TreePane.tsx +0 -132
  406. package/src/ui/chat-ui.ts +0 -850
  407. package/src/ui/index.ts +0 -33
  408. package/src/ui/ink/index.ts +0 -1444
  409. package/src/ui/streaming.ts +0 -176
  410. package/src/ui/theme.ts +0 -104
  411. package/src/ui/types.ts +0 -75
  412. package/src/utils/analytics.ts +0 -72
  413. package/src/utils/cost-warning.ts +0 -27
  414. package/src/utils/env.ts +0 -46
  415. package/src/utils/errors.ts +0 -69
  416. package/src/utils/event-bus.ts +0 -38
  417. package/src/utils/index.ts +0 -24
  418. package/src/utils/logger.ts +0 -171
  419. package/src/utils/rate-limiter.ts +0 -121
  420. package/src/utils/service-auth.ts +0 -49
  421. package/src/utils/validation.ts +0 -53
  422. package/src/version.ts +0 -4
  423. package/src/watcher/index.ts +0 -214
  424. package/src/wizard/approval.ts +0 -383
  425. package/src/wizard/index.ts +0 -25
  426. package/src/wizard/prompts.ts +0 -338
  427. package/src/wizard/types.ts +0 -172
  428. package/src/wizard/ui.ts +0 -556
  429. package/src/wizard/wizard.ts +0 -304
  430. package/tsconfig.json +0 -24
package/src/agent/loop.ts DELETED
@@ -1,1998 +0,0 @@
1
- /**
2
- * Core Agentic Loop
3
- *
4
- * Implements the autonomous agent loop:
5
- * 1. Build context (system prompt + history + tools)
6
- * 2. Send to LLM with tools enabled
7
- * 3. Stream text response
8
- * 4. If tool_use: check permissions → execute → collect results
9
- * 5. Append messages → loop back to LLM
10
- * 6. Exit when LLM returns end_turn (no more tool calls)
11
- *
12
- * This is the heart of the Nimbus agent. Every user message enters
13
- * {@link runAgentLoop}, which orchestrates a multi-turn conversation with
14
- * the LLM, executing tools on its behalf until it signals completion by
15
- * returning a response with no further tool calls.
16
- *
17
- * @module agent/loop
18
- */
19
-
20
- import { join } from 'node:path';
21
- import type { LLMRouter } from '../llm/router';
22
- import type {
23
- LLMMessage,
24
- ToolCall,
25
- ToolCompletionRequest,
26
- ToolDefinition as LLMToolDefinition,
27
- } from '../llm/types';
28
- import {
29
- toOpenAITool,
30
- type ToolDefinition,
31
- type ToolExecuteContext,
32
- type ToolResult,
33
- type ToolRegistry,
34
- } from '../tools/schemas/types';
35
- import { buildSystemPrompt, type AgentMode } from './system-prompt';
36
- import type { ContextManager, CompactionResult } from './context-manager';
37
- import { runCompaction } from './compaction-agent';
38
- import type { LSPManager } from '../lsp/manager';
39
- import { SnapshotManager } from '../snapshots/manager';
40
- import { calculateCost } from '../llm/cost-calculator';
41
- import {
42
- HookEngine,
43
- runPreToolHooks,
44
- runPostToolHooks,
45
- type HookContext,
46
- } from '../hooks/engine';
47
- import { maskSecrets } from '../audit/security-scanner';
48
- import { classifyTaskComplexity, routeModel } from '../llm/router';
49
- import { mkdirSync as _cpMkdirSync, writeFileSync as _cpWriteFileSync } from 'node:fs';
50
- import { homedir as _cpHomedir } from 'node:os';
51
-
52
- // ---------------------------------------------------------------------------
53
- // C2: Infra state checkpoint helper
54
- // ---------------------------------------------------------------------------
55
-
56
- /**
57
- * Write a checkpoint JSON file to ~/.nimbus/infra-checkpoints/<timestamp>.json
58
- * before a mutating terraform or helm operation. Non-blocking — errors are swallowed.
59
- */
60
- function writeInfraCheckpoint(tool: string, action: string, input: Record<string, unknown>): void {
61
- try {
62
- const checkpointsDir = join(_cpHomedir(), '.nimbus', 'infra-checkpoints');
63
- _cpMkdirSync(checkpointsDir, { recursive: true });
64
- // Sanitize: remove any field that looks like a secret
65
- const sanitized: Record<string, unknown> = {};
66
- for (const [k, v] of Object.entries(input)) {
67
- const lower = k.toLowerCase();
68
- if (lower.includes('secret') || lower.includes('password') || lower.includes('token') || lower.includes('key')) {
69
- sanitized[k] = '[redacted]';
70
- } else {
71
- sanitized[k] = v;
72
- }
73
- }
74
- const timestamp = new Date().toISOString();
75
- const checkpoint = {
76
- timestamp,
77
- tool,
78
- action,
79
- input: sanitized,
80
- cwd: process.cwd(),
81
- workdir: (input.workdir as string | undefined) ?? undefined,
82
- };
83
- const fileName = timestamp.replace(/[:.]/g, '-') + '.json';
84
- _cpWriteFileSync(
85
- join(checkpointsDir, fileName),
86
- JSON.stringify(checkpoint, null, 2),
87
- 'utf-8'
88
- );
89
- } catch { /* non-critical */ }
90
- }
91
-
92
- // ---------------------------------------------------------------------------
93
- // Helpers
94
- // ---------------------------------------------------------------------------
95
-
96
- // ---------------------------------------------------------------------------
97
- // Module-level compiled regex constants for classifyDevOpsError (PERF-1d).
98
- // Hoisted here so they compile once at module load rather than per-call.
99
- // ---------------------------------------------------------------------------
100
-
101
- const _RE_CREDENTIAL_EXPIRY_AWS = /ExpiredTokenException|TokenExpiredException|token.*has.*expired/i;
102
- const _RE_CREDENTIAL_EXPIRY_GCP = /credentials.*expired|Application Default Credentials.*expired|re-authenticate/i;
103
- const _RE_CREDENTIAL_EXPIRY_AZURE = /AADSTS70008|InteractionRequired|credential.*expired/i;
104
- const _RE_CMD_NOT_FOUND = /command not found|not found|no such file or directory/i;
105
-
106
- /**
107
- * Classify a DevOps tool error and return an actionable hint for the LLM.
108
- * Returns null for unrecognized errors so we don't pollute the context.
109
- */
110
- function classifyDevOpsError(toolName: string, errorOutput: string, nimbusInstructions?: string): string | null {
111
- const e = errorOutput.toLowerCase();
112
-
113
- // GAP-13: Credential expiry patterns — must come first for fast matching
114
- const CREDENTIAL_EXPIRY = [
115
- { re: _RE_CREDENTIAL_EXPIRY_AWS, provider: 'aws' },
116
- { re: _RE_CREDENTIAL_EXPIRY_GCP, provider: 'gcp' },
117
- { re: _RE_CREDENTIAL_EXPIRY_AZURE, provider: 'azure' },
118
- ];
119
- for (const { re, provider } of CREDENTIAL_EXPIRY) {
120
- if (re.test(errorOutput)) {
121
- return `Your ${provider.toUpperCase()} credentials have expired.\n\nRun: \`nimbus auth-refresh --provider ${provider}\` to refresh them.`;
122
- }
123
- }
124
-
125
- // G3: "command not found" — provide installation hints for DevOps CLIs
126
- const INSTALL_HINTS: Record<string, string> = {
127
- terraform: 'brew install terraform OR https://developer.hashicorp.com/terraform/install',
128
- kubectl: 'brew install kubectl OR https://kubernetes.io/docs/tasks/tools/',
129
- helm: 'brew install helm OR https://helm.sh/docs/intro/install/',
130
- docker: 'brew install --cask docker OR https://docs.docker.com/get-docker/',
131
- aws: 'brew install awscli OR pip install awscli',
132
- gcloud: 'brew install --cask google-cloud-sdk',
133
- az: 'brew install azure-cli',
134
- };
135
- if (_RE_CMD_NOT_FOUND.test(errorOutput)) {
136
- for (const [cmd, hint] of Object.entries(INSTALL_HINTS)) {
137
- if (toolName.includes(cmd) || e.includes(`'${cmd}'`) || e.includes(`"${cmd}"`)) {
138
- return `\`${cmd}\` is not installed.\n\nInstall: ${hint}`;
139
- }
140
- }
141
- }
142
-
143
- // Terraform errors
144
- if (toolName === 'terraform' || e.includes('terraform')) {
145
- if (e.includes('no such file or directory') && e.includes('.terraform')) {
146
- return 'HINT: Run `terraform init` first — the .terraform directory is missing.';
147
- }
148
- if (e.includes('provider') && e.includes('required') && e.includes('terraform')) {
149
- return 'HINT: Run `terraform init -upgrade` to download or upgrade required providers.';
150
- }
151
- if (e.includes('no valid credential') || e.includes('no credentials')) {
152
- return 'HINT: AWS/cloud credentials are missing. Check `aws configure` or environment variables.';
153
- }
154
- if (e.includes('state lock') || e.includes('lock file')) {
155
- return 'HINT: Terraform state is locked. If no other operation is running, use `terraform force-unlock <lock-id>`.';
156
- }
157
- if (e.includes('module not installed') || e.includes('module source')) {
158
- return 'HINT: Run `terraform init` to install required modules.';
159
- }
160
- if (e.includes('quota') || e.includes('limit exceeded') || e.includes('vcpu')) {
161
- return 'HINT: Cloud resource quota exceeded. Request a limit increase in the cloud console.';
162
- }
163
- }
164
-
165
- // Kubernetes errors
166
- if (toolName === 'kubectl' || toolName === 'kubectl_context') {
167
- if (e.includes('connection refused') || e.includes('unable to connect')) {
168
- return 'HINT: Cannot reach the Kubernetes API server. Check `kubectl config current-context` and ensure the cluster is accessible.';
169
- }
170
- if (e.includes('unauthorized') || e.includes('forbidden')) {
171
- return 'HINT: Insufficient permissions. Check your kubeconfig credentials or RBAC roles.';
172
- }
173
- if (e.includes('not found') && e.includes('namespace')) {
174
- return 'HINT: The namespace does not exist. Create it with `kubectl create namespace <name>` first.';
175
- }
176
- if (e.includes('image') && (e.includes('not found') || e.includes('pull'))) {
177
- return 'HINT: Container image pull failed. Verify the image name, tag, and registry credentials (imagePullSecret).';
178
- }
179
- }
180
-
181
- // Helm errors
182
- if (toolName === 'helm' || toolName === 'helm_values') {
183
- if (e.includes('chart not found') || e.includes('no such chart')) {
184
- return 'HINT: Chart not found. Run `helm repo update` and verify the chart name.';
185
- }
186
- if (e.includes('release not found')) {
187
- return 'HINT: Helm release not found. Use `helm list -A` to see all releases across namespaces.';
188
- }
189
- if (e.includes('unable to build kubernetes objects') || e.includes('manifest')) {
190
- return 'HINT: Helm template rendering failed. Run `helm template <release> <chart>` to debug the manifests.';
191
- }
192
- }
193
-
194
- // Cloud CLI errors
195
- if (toolName === 'cloud_discover' || toolName === 'cloud_action') {
196
- if (e.includes('not authorized') || e.includes('access denied') || e.includes('unauthorized')) {
197
- return 'HINT: Cloud credentials lack required permissions. Check IAM policies/roles for the operation.';
198
- }
199
- if (e.includes('region') && e.includes('not found')) {
200
- return 'HINT: Invalid region. Check `aws configure get region` or pass --region explicitly.';
201
- }
202
- }
203
-
204
- // Docker errors
205
- if (toolName === 'docker') {
206
- if (e.includes('cannot connect to the docker daemon') || e.includes('docker daemon') || e.includes('docker.sock')) {
207
- return 'HINT: Docker daemon is not running. Start it with `colima start` (macOS) or `sudo systemctl start docker` (Linux).';
208
- }
209
- if (e.includes('manifest unknown') || e.includes('manifest not found') || e.includes('not found')) {
210
- return 'HINT: Image not found. Verify the image name and tag. Check registry credentials with `docker login`.';
211
- }
212
- if (e.includes('no space left on device') || e.includes('no space left')) {
213
- return 'HINT: Docker disk space exhausted. Run `docker system prune -f` to reclaim space.';
214
- }
215
- if (e.includes('permission denied') && e.includes('docker')) {
216
- return 'HINT: Docker permission denied. Add your user to the docker group: `sudo usermod -aG docker $USER`.';
217
- }
218
- }
219
-
220
- // Secrets errors
221
- if (toolName === 'secrets') {
222
- if (e.includes('permission denied') || e.includes('403') || e.includes('accessdenied')) {
223
- return 'HINT: Secrets access denied. Check Vault policy with `vault policy read <policy>` or IAM role permissions.';
224
- }
225
- if (e.includes('secret not found') || e.includes('no such secret') || e.includes('resourcenotfoundexception')) {
226
- return 'HINT: Secret not found. Verify the secret path/name and namespace. Use `vault kv list <mount>` to browse.';
227
- }
228
- if (e.includes('invalid token') || e.includes('token expired')) {
229
- return 'HINT: Vault/cloud token expired. Run `vault login` or refresh cloud credentials with `nimbus auth-refresh`.';
230
- }
231
- }
232
-
233
- // CI/CD errors
234
- if (toolName === 'cicd') {
235
- if (e.includes('workflow not found') || e.includes('could not find workflow')) {
236
- return 'HINT: Workflow not found. Check the workflow filename in .github/workflows/ and the branch name.';
237
- }
238
- if (e.includes('rate limit') || e.includes('429') || e.includes('too many requests')) {
239
- return 'HINT: API rate limited. Wait 60 seconds and retry. Check rate limit headers for reset time.';
240
- }
241
- if (e.includes('unauthorized') || e.includes('401') || e.includes('bad credentials')) {
242
- return 'HINT: CI/CD authentication failed. Check GITHUB_TOKEN, GITLAB_TOKEN, or CIRCLECI_TOKEN environment variables.';
243
- }
244
- }
245
-
246
- // GitOps errors
247
- if (toolName === 'gitops') {
248
- if (e.includes('not found') || e.includes('not logged in') || e.includes('unauthenticated')) {
249
- return 'HINT: ArgoCD/Flux not accessible. Check ARGOCD_SERVER and ARGOCD_TOKEN env vars, or run `argocd login`.';
250
- }
251
- if (e.includes('comparisonerror') || e.includes('sync error')) {
252
- return 'HINT: GitOps sync error. Validate manifests: `kubectl apply --dry-run=client -f <manifest>` to find issues.';
253
- }
254
- if (e.includes('health') && e.includes('degraded')) {
255
- return 'HINT: Application is degraded. Check pod logs with `kubectl logs -n <ns>` and events with `kubectl get events -n <ns>`.';
256
- }
257
- }
258
-
259
- // Monitoring errors
260
- if (toolName === 'monitor') {
261
- if (e.includes('connection refused') || e.includes('could not connect')) {
262
- return 'HINT: Cannot connect to monitoring endpoint. Check PROMETHEUS_URL, GRAFANA_URL, or cloud region configuration.';
263
- }
264
- if (e.includes('unauthorized') || e.includes('403')) {
265
- return 'HINT: Monitoring authentication failed. Check DD_API_KEY, GRAFANA_TOKEN, or NEW_RELIC_API_KEY environment variables.';
266
- }
267
- }
268
-
269
- // L3: Parse NIMBUS.md custom error hints section
270
- if (nimbusInstructions) {
271
- const hintsMatch = nimbusInstructions.match(/##\s*Custom Error Hints\s*\n([\s\S]*?)(?=\n##|\n$|$)/i);
272
- if (hintsMatch) {
273
- const hintsSection = hintsMatch[1];
274
- const hintLines = hintsSection.split('\n').filter(l => l.trim().startsWith('-'));
275
- for (const line of hintLines) {
276
- // Format: "- pattern: hint message"
277
- const colonIdx = line.indexOf(':');
278
- if (colonIdx > 0) {
279
- const pattern = line.slice(1, colonIdx).trim();
280
- const hint = line.slice(colonIdx + 1).trim();
281
- if (pattern && hint && errorOutput.toLowerCase().includes(pattern.toLowerCase())) {
282
- return `HINT: ${hint}`;
283
- }
284
- }
285
- }
286
- }
287
- }
288
-
289
- return null;
290
- }
291
-
292
- /** DevOps tool names that get self-diagnosis hints on unrecognized errors. */
293
- const DEVOPS_TOOL_NAMES = new Set([
294
- 'terraform', 'kubectl', 'kubectl_context', 'helm', 'helm_values',
295
- 'bash', 'cloud_discover', 'drift_detect', 'deploy_preview',
296
- 'docker', 'secrets', 'cicd', 'monitor', 'gitops', 'cloud_action',
297
- 'logs', 'certs', 'mesh', 'cfn', 'k8s_rbac',
298
- ]);
299
-
300
- /**
301
- * Format a Zod (or generic) tool-input validation error into a human-readable
302
- * message that tells the LLM exactly which fields are wrong and how to fix them.
303
- */
304
- function formatToolInputError(toolName: string, err: unknown): string {
305
- if (err && typeof err === 'object' && 'issues' in err) {
306
- // ZodError
307
- const issues = (err as { issues: Array<{ path: (string | number)[]; message: string }> }).issues;
308
- const details = issues
309
- .map(i => ` - ${i.path.join('.') || '(root)'}: ${i.message}`)
310
- .join('\n');
311
- return `Tool "${toolName}" received invalid input:\n${details}\n\nPlease correct the arguments and retry.`;
312
- }
313
- return `Tool "${toolName}" failed: ${err instanceof Error ? err.message : String(err)}`;
314
- }
315
-
316
- /** Determine whether a streaming error is transient and worth retrying. */
317
- function isRetryableStreamError(err: unknown): boolean {
318
- if (err && typeof err === 'object') {
319
- const e = err as Record<string, unknown>;
320
- const status =
321
- (typeof e.status === 'number' ? e.status : undefined) ??
322
- (typeof e.statusCode === 'number' ? e.statusCode : undefined);
323
- if (status === 429 || (status !== undefined && status >= 500 && status < 600)) return true;
324
- const msg = typeof e.message === 'string' ? e.message : '';
325
- if (/rate.?limit|429|too many requests|overloaded|503/i.test(msg)) return true;
326
- }
327
- return false;
328
- }
329
-
330
- // ---------------------------------------------------------------------------
331
- // G3: Runaway protection helpers
332
- // ---------------------------------------------------------------------------
333
-
334
- /** Patterns that indicate a destructive operation in tool arguments. */
335
- const DESTRUCTIVE_PATTERNS = /\b(apply|destroy|delete|terminate|stop|remove|drop|truncate|purge)\b/i;
336
-
337
- /** Tool names whose destructive operations should be counted at the session level. */
338
- const DESTRUCTIVE_TOOL_NAMES = new Set([
339
- 'terraform', 'kubectl', 'docker', 'aws', 'gcloud', 'az', 'cloud_action', 'cfn',
340
- ]);
341
-
342
- /**
343
- * Returns true if the tool call looks like a destructive infrastructure operation.
344
- * Used to enforce the session-level destructive ops counter (G3).
345
- */
346
- function isDestructiveOp(toolName: string, inputStr: string): boolean {
347
- return DESTRUCTIVE_TOOL_NAMES.has(toolName) && DESTRUCTIVE_PATTERNS.test(inputStr);
348
- }
349
-
350
- // ---------------------------------------------------------------------------
351
- // Public Types
352
- // ---------------------------------------------------------------------------
353
-
354
- /** Options for running the agent loop. */
355
- export interface AgentLoopOptions {
356
- /** The LLM router instance. */
357
- router: LLMRouter;
358
-
359
- /** Tool registry with available tools. */
360
- toolRegistry: ToolRegistry;
361
-
362
- /** Agent mode (plan/build/deploy). */
363
- mode: AgentMode;
364
-
365
- /** Maximum number of LLM turns before stopping (default: 50). */
366
- maxTurns?: number;
367
-
368
- /**
369
- * Maximum number of tool calls allowed in a single LLM turn (G3).
370
- * Prevents runaway tool call loops. Default: 20.
371
- */
372
- maxToolCallsPerTurn?: number;
373
-
374
- /**
375
- * Maximum number of destructive operations allowed in a single session (G3).
376
- * Triggers a warning in the tool result when the threshold is reached. Default: 5.
377
- */
378
- maxDestructiveOpsPerSession?: number;
379
-
380
- /** Model to use (e.g. `'anthropic/claude-sonnet-4-20250514'`). */
381
- model?: string;
382
-
383
- /**
384
- * When true, enables automatic model routing based on task complexity (Gap 18).
385
- * Simple queries → haiku, complex → opus, moderate → sonnet.
386
- * Overridden if `model` is explicitly set.
387
- */
388
- autoRouteModel?: boolean;
389
-
390
- /** Current working directory. */
391
- cwd?: string;
392
-
393
- /** Custom NIMBUS.md content injected into the system prompt. */
394
- nimbusInstructions?: string;
395
-
396
- /**
397
- * Live infrastructure context (terraform workspace, kubectl context, etc.)
398
- * discovered at startup. Injected into the system prompt (Gaps 7 & 10).
399
- */
400
- infraContext?: {
401
- terraformWorkspace?: string;
402
- kubectlContext?: string;
403
- helmReleases?: string[];
404
- awsAccount?: string;
405
- awsRegion?: string;
406
- gcpProject?: string;
407
- };
408
-
409
- /** Callback for streaming text output. */
410
- onText?: (text: string) => void;
411
-
412
- /** Callback when a tool call starts. */
413
- onToolCallStart?: (toolCall: ToolCallInfo) => void;
414
-
415
- /** Callback when a tool call completes. */
416
- onToolCallEnd?: (toolCall: ToolCallInfo, result: ToolResult) => void;
417
-
418
- /**
419
- * Callback fired for each chunk of streamed tool output (Gap 1 — live streaming).
420
- * Called with the tool call ID and the chunk text.
421
- */
422
- onToolOutputChunk?: (toolId: string, chunk: string) => void;
423
-
424
- /**
425
- * Callback to check permission before tool execution.
426
- * If omitted, all tools are executed without prompting.
427
- */
428
- checkPermission?: (tool: ToolDefinition, input: unknown) => Promise<PermissionDecision>;
429
-
430
- /** AbortSignal for cancellation (Ctrl+C). */
431
- signal?: AbortSignal;
432
-
433
- /** Session ID for persistence (reserved for future use). */
434
- sessionId?: string;
435
-
436
- /** Optional context manager for auto-compact. When provided, the loop
437
- * checks context usage after each tool-call turn and triggers
438
- * compaction if the threshold is exceeded. */
439
- contextManager?: ContextManager;
440
-
441
- /** Callback fired when auto-compact is triggered. Receives the
442
- * compaction result with token savings information. */
443
- onCompact?: (result: CompactionResult) => void;
444
-
445
- /** Optional LSP manager for post-edit diagnostics. When provided,
446
- * the loop queries the language server after file-editing tools
447
- * and appends any diagnostics to the tool result so the LLM can
448
- * self-correct type errors and other issues. */
449
- lspManager?: LSPManager;
450
-
451
- /** Optional snapshot manager for auto-capture before file-editing tools.
452
- * When provided, a snapshot is captured before each file-modifying tool
453
- * call so users can undo/redo changes. */
454
- snapshotManager?: SnapshotManager;
455
-
456
- /** Optional hook engine for PreToolUse/PostToolUse/PermissionRequest hooks.
457
- * When provided, matching hook scripts are executed around each tool call. */
458
- hookEngine?: HookEngine;
459
-
460
- /** Callback fired after each LLM turn with accumulated usage and cost.
461
- * Allows the TUI to update cost/token display in real-time during
462
- * multi-turn agent loops, not just at the end. */
463
- onUsage?: (usage: AgentLoopUsage, costUSD: number) => void;
464
-
465
- /**
466
- * Optional callback to show a diff preview before file-mutating tools.
467
- * If provided, the loop calls this before edit_file/multi_edit/write_file.
468
- * Returning 'reject' skips the tool call; 'apply-all' disables further prompts.
469
- */
470
- requestFileDiff?: (
471
- path: string,
472
- toolName: string,
473
- diff: string
474
- ) => Promise<FileDiffDecision>;
475
-
476
- /**
477
- * Internal flag set by requestFileDiff 'apply-all' — skips remaining diff
478
- * prompts for the current turn. Set externally by the TUI launcher.
479
- */
480
- skipRemainingDiffPrompts?: boolean;
481
-
482
- /**
483
- * Internal flag set by requestFileDiff 'reject-all' — auto-rejects remaining
484
- * diff prompts for the current turn. Set externally by the TUI launcher.
485
- */
486
- rejectRemainingDiffPrompts?: boolean;
487
-
488
- /**
489
- * M1: Dry-run mode — when true, forces plan mode and prepends a hard
490
- * constraint to the system prompt instructing the agent not to execute
491
- * any mutating operations.
492
- */
493
- dryRun?: boolean;
494
-
495
- /**
496
- * G16: Maximum cost in USD per session. If the cumulative LLM cost exceeds
497
- * this threshold, the loop stops and returns a budget-exceeded message.
498
- */
499
- costBudgetUSD?: number;
500
-
501
- /**
502
- * G21: Override the stream silence timeout in milliseconds.
503
- * Defaults to config.agentTurnTimeoutSeconds * 1000, or 60_000 if not set.
504
- */
505
- streamSilenceTimeoutMs?: number;
506
-
507
- /**
508
- * GAP-20: Per-tool timeout overrides from NIMBUS.md Tool Timeouts section.
509
- * Maps tool name to timeout in milliseconds. When set, the value is threaded
510
- * into the tool's ToolExecuteContext so it can override the built-in default.
511
- */
512
- toolTimeouts?: Record<string, number>;
513
- }
514
-
515
- /** Information about a tool call in progress. */
516
- export interface ToolCallInfo {
517
- /** Provider-assigned unique ID for this tool call. */
518
- id: string;
519
-
520
- /** Tool name as it appears in the registry. */
521
- name: string;
522
-
523
- /** Parsed input arguments. */
524
- input: unknown;
525
-
526
- /** Unix timestamp (Date.now()) when the tool call started. */
527
- startTime: number;
528
- }
529
-
530
- /**
531
- * Result of a permission check.
532
- *
533
- * - `allow` -- proceed with execution.
534
- * - `deny` -- skip this invocation and report denial to the LLM.
535
- * - `block` -- skip and report that the tool is permanently blocked.
536
- */
537
- export type PermissionDecision = 'allow' | 'deny' | 'block';
538
-
539
- /**
540
- * Result of a per-file diff approval request.
541
- *
542
- * - `apply` -- apply this change.
543
- * - `reject` -- skip this change.
544
- * - `apply-all` -- apply this and all remaining changes without further prompts.
545
- */
546
- export type FileDiffDecision = 'apply' | 'reject' | 'apply-all' | 'reject-all';
547
-
548
- /** Aggregate token usage across all LLM turns. */
549
- export interface AgentLoopUsage {
550
- /** Total prompt (input) tokens consumed. */
551
- promptTokens: number;
552
-
553
- /** Total completion (output) tokens consumed. */
554
- completionTokens: number;
555
-
556
- /** Sum of prompt + completion tokens. */
557
- totalTokens: number;
558
- }
559
-
560
- /** Result of running the agent loop. */
561
- export interface AgentLoopResult {
562
- /** The conversation messages after the loop completes. */
563
- messages: LLMMessage[];
564
-
565
- /** Number of LLM turns taken. */
566
- turns: number;
567
-
568
- /** Whether the loop was interrupted via the AbortSignal. */
569
- interrupted: boolean;
570
-
571
- /** Total token usage across all turns. */
572
- usage: AgentLoopUsage;
573
-
574
- /** Total estimated cost in USD. */
575
- totalCost: number;
576
- }
577
-
578
- // ---------------------------------------------------------------------------
579
- // Constants
580
- // ---------------------------------------------------------------------------
581
-
582
- /** Default model when none is specified. */
583
- const DEFAULT_MODEL = 'anthropic/claude-sonnet-4-20250514';
584
-
585
- // ---------------------------------------------------------------------------
586
- // H5: Cost delta hint after terraform apply / helm upgrade
587
- // ---------------------------------------------------------------------------
588
-
589
- /**
590
- * Extract a lightweight cost hint from tool output for display after
591
- * infrastructure operations (terraform apply, helm install/upgrade).
592
- */
593
- function extractCostHintFromToolOutput(toolName: string, input: Record<string, unknown>, output: string): string | null {
594
- // terraform apply: parse "Apply complete! Resources: N added, M changed, K destroyed."
595
- if (toolName === 'terraform' && String(input.action) === 'apply') {
596
- const m = output.match(/Resources:\s*(\d+) added,\s*(\d+) changed,\s*(\d+) destroyed/);
597
- if (m) {
598
- const added = Number(m[1]);
599
- const changed = Number(m[2]);
600
- const destroyed = Number(m[3]);
601
- const parts: string[] = [];
602
- if (added > 0) parts.push(`+${added} resources created`);
603
- if (changed > 0) parts.push(`${changed} updated`);
604
- if (destroyed > 0) parts.push(`${destroyed} destroyed`);
605
- return parts.length > 0
606
- ? `${parts.join(', ')} — run "nimbus cost" for monthly cost estimate`
607
- : null;
608
- }
609
- }
610
- // helm install/upgrade
611
- if (toolName === 'helm' && ['install', 'upgrade'].includes(String(input.action))) {
612
- const releaseName = String(input.releaseName ?? input.release ?? '');
613
- if (!output.includes('Error') && !output.includes('FAILED')) {
614
- return `Helm release "${releaseName}" deployed — run "nimbus cost" for estimated cost impact`;
615
- }
616
- }
617
- return null;
618
- }
619
-
620
- // ---------------------------------------------------------------------------
621
- // M4: Session-scoped error tracking for NIMBUS.md persistence
622
- // ---------------------------------------------------------------------------
623
-
624
- const sessionErrorCounts = new Map<string, number>();
625
-
626
- function trackAndPersistError(toolName: string, errorHint: string, cwd: string): void {
627
- const key = `${toolName}:${errorHint.slice(0, 60)}`;
628
- const count = (sessionErrorCounts.get(key) ?? 0) + 1;
629
- sessionErrorCounts.set(key, count);
630
-
631
- if (count === 3) {
632
- try {
633
- const { existsSync, readFileSync, writeFileSync, appendFileSync } = require('node:fs') as typeof import('node:fs');
634
- const { join } = require('node:path') as typeof import('node:path');
635
- const nimbusPath = join(cwd, 'NIMBUS.md');
636
- if (!existsSync(nimbusPath)) return;
637
- const existing = readFileSync(nimbusPath, 'utf-8');
638
- if (existing.includes(errorHint.slice(0, 40))) return; // already recorded
639
- const entry = `- ${toolName}: ${errorHint}\n`;
640
- if (existing.includes('## Observed Issues')) {
641
- writeFileSync(nimbusPath, existing.replace('## Observed Issues\n', `## Observed Issues\n${entry}`));
642
- } else {
643
- appendFileSync(nimbusPath, `\n## Observed Issues\n${entry}`);
644
- }
645
- } catch { /* non-critical */ }
646
- }
647
- }
648
-
649
- // ---------------------------------------------------------------------------
650
- // M6: Destructive action guard — force confirmation before terraform destroy / kubectl delete
651
- // ---------------------------------------------------------------------------
652
-
653
- function isDestructiveAction(toolName: string, input: Record<string, unknown>): string | null {
654
- const action = String(input.action ?? input.command ?? '');
655
- if (toolName === 'terraform' && action === 'destroy') {
656
- return 'terraform destroy will PERMANENTLY DELETE all managed infrastructure. Explicitly confirm with the user before proceeding.';
657
- }
658
- if (toolName === 'kubectl' && action === 'delete') {
659
- const resource = String(input.resource ?? '');
660
- return `kubectl delete ${resource} is IRREVERSIBLE. Explicitly confirm with the user before proceeding.`;
661
- }
662
- if (toolName === 'helm' && action === 'uninstall') {
663
- return 'helm uninstall will remove the release and its resources. Explicitly confirm with the user before proceeding.';
664
- }
665
- return null;
666
- }
667
-
668
- /**
669
- * Session-scoped terraform plan cache.
670
- * Maps workdir → { output, timestamp } so that within one agent session,
671
- * a plan result can be reused for the apply call without re-running tf plan.
672
- * Cache expires after 10 minutes.
673
- */
674
- interface TerraformPlanCacheEntry {
675
- output: string;
676
- workdir: string;
677
- timestamp: number;
678
- }
679
-
680
- const PLAN_CACHE_TTL_MS = 10 * 60 * 1000; // 10 minutes
681
- const terraformPlanCache = new Map<string, TerraformPlanCacheEntry>();
682
-
683
- /** Store a terraform plan output for a workdir. */
684
- function cacheTerraformPlan(workdir: string, output: string): void {
685
- terraformPlanCache.set(workdir, { output, workdir, timestamp: Date.now() });
686
- }
687
-
688
- /** Retrieve a cached terraform plan for a workdir, or null if expired/missing. */
689
- function getCachedTerraformPlan(workdir: string): string | null {
690
- const entry = terraformPlanCache.get(workdir);
691
- if (!entry) return null;
692
- if (Date.now() - entry.timestamp > PLAN_CACHE_TTL_MS) {
693
- terraformPlanCache.delete(workdir);
694
- return null;
695
- }
696
- return entry.output;
697
- }
698
-
699
- /**
700
- * Background interval that evicts expired terraform plan cache entries every 60s.
701
- * `.unref()` ensures this does not prevent the process from exiting.
702
- * Exported for test teardown.
703
- */
704
- export const _planCacheCleanupInterval: ReturnType<typeof setInterval> = setInterval(() => {
705
- const now = Date.now();
706
- for (const [key, entry] of terraformPlanCache) {
707
- if (now - entry.timestamp > PLAN_CACHE_TTL_MS) {
708
- terraformPlanCache.delete(key);
709
- }
710
- }
711
- }, 60_000).unref();
712
-
713
- /** Default max output tokens per LLM call. */
714
- const DEFAULT_MAX_TOKENS = 8192;
715
-
716
- /** Default maximum number of agent turns. */
717
- const DEFAULT_MAX_TURNS = 50;
718
-
719
- /** Maximum characters of tool output to include in conversation history.
720
- * Anything beyond this is truncated to prevent context window overflow. */
721
- const MAX_TOOL_OUTPUT_CHARS = 100_000;
722
-
723
- // ---------------------------------------------------------------------------
724
- // Main Entry Point
725
- // ---------------------------------------------------------------------------
726
-
727
- /**
728
- * Run the agentic loop.
729
- *
730
- * Takes a user message and existing conversation history, then runs
731
- * the LLM in a loop until it stops requesting tool calls.
732
- *
733
- * The loop terminates when any of the following conditions are met:
734
- * - The LLM returns a response with no tool calls (natural end).
735
- * - The maximum number of turns is reached.
736
- * - The AbortSignal fires (e.g. user presses Ctrl+C).
737
- * - An unrecoverable LLM API error occurs.
738
- *
739
- * @param userMessage - The new user message to process.
740
- * @param history - Prior conversation messages (may be empty for a fresh session).
741
- * @param options - Configuration for the loop.
742
- * @returns The final conversation state, turn count, usage, and cost.
743
- */
744
- export async function runAgentLoop(
745
- userMessage: string,
746
- history: LLMMessage[],
747
- options: AgentLoopOptions
748
- ): Promise<AgentLoopResult> {
749
- const {
750
- router,
751
- toolRegistry,
752
- mode,
753
- maxTurns = DEFAULT_MAX_TURNS,
754
- model,
755
- cwd,
756
- nimbusInstructions,
757
- onText,
758
- onToolCallStart,
759
- onToolCallEnd,
760
- onToolOutputChunk,
761
- checkPermission,
762
- signal,
763
- } = options;
764
-
765
- // -----------------------------------------------------------------------
766
- // 1. Prepare tools and system prompt
767
- // -----------------------------------------------------------------------
768
-
769
- const tools = getToolsForMode(toolRegistry.getAll(), mode);
770
-
771
- // H3: Auto-discover infra context if not provided and cwd is set (best-effort, cached per cwd)
772
- let resolvedInfraContext = options.infraContext;
773
- if (!resolvedInfraContext && cwd) {
774
- try {
775
- const { discoverInfraContext } = await import('../cli/init');
776
- resolvedInfraContext = await Promise.race([
777
- discoverInfraContext(cwd),
778
- new Promise<undefined>(r => setTimeout(() => r(undefined), 5000)),
779
- ]);
780
- } catch { /* best-effort */ }
781
- }
782
-
783
- const systemPrompt = buildSystemPrompt({
784
- mode,
785
- tools,
786
- nimbusInstructions,
787
- cwd,
788
- infraContext: resolvedInfraContext,
789
- dryRun: options.dryRun,
790
- });
791
-
792
- // Convert agentic ToolDefinitions to the LLM-level format expected by
793
- // the router's routeWithTools() method (OpenAI function-calling shape).
794
- const llmTools: LLMToolDefinition[] = tools.map(toOpenAITool);
795
-
796
- // -----------------------------------------------------------------------
797
- // 2. Initialize conversation state
798
- // -----------------------------------------------------------------------
799
-
800
- // PERF-4a: Capacity-hinted pre-allocation avoids repeated V8 array reallocation
801
- // as messages accumulate during a long conversation.
802
- const messages: LLMMessage[] = new Array(Math.max(history.length + 1, 10));
803
- messages.length = 0;
804
- messages.push(...history, { role: 'user', content: userMessage });
805
-
806
- let turns = 0;
807
- let interrupted = false;
808
- const totalUsage: AgentLoopUsage = {
809
- promptTokens: 0,
810
- completionTokens: 0,
811
- totalTokens: 0,
812
- };
813
- let totalCost = 0;
814
-
815
- // G3: Session-level destructive operation counter and per-turn tool call counter
816
- let sessionDestructiveOps = 0;
817
- const MAX_TOOL_CALLS_PER_TURN = options.maxToolCallsPerTurn ?? 20;
818
- const MAX_DESTRUCTIVE_OPS_PER_SESSION = options.maxDestructiveOpsPerSession ?? 5;
819
-
820
- // M2/M5: Track tool calls that have already received a credential-error retry message
821
- // to avoid spamming the auth-refresh hint on repeated failures.
822
- const credentialRetried = new Set<string>();
823
-
824
- // G8: Track which terraform workdirs have had a plan run in this session.
825
- // Used to warn when apply is run without a prior plan.
826
- const terraformPlannedWorkdirs = new Set<string>();
827
-
828
- // G10: One-time kubectl RBAC pre-flight check state.
829
- // kubectlRbacChecked: ensures we only run `kubectl auth can-i --list` once per session.
830
- // rbacPreamble: stores the RBAC output to inject into the first kubectl tool result.
831
- let kubectlRbacChecked = false;
832
- let rbacPreamble = '';
833
-
834
- // G10: Pre-import async exec utilities so they're available inside the loop.
835
- // Using async execFile avoids blocking the Node.js event loop for kubectl/terraform calls.
836
- const { execFile: _execFile, exec: _exec } = await import('node:child_process');
837
- const { promisify: _promisify } = await import('node:util');
838
- const _execFileAsync = _promisify(_execFile);
839
- const _execAsync = _promisify(_exec);
840
-
841
- // PERF-4a: Pre-build the system message once so it can be reused every turn
842
- // without allocating a new object on each loop iteration.
843
- const _systemMessageObj: LLMMessage = { role: 'system', content: systemPrompt };
844
-
845
- // Shared mutable ref: set to true by 'apply-all' diff decision to skip further prompts
846
- const skipRemainingDiffPrompts = { value: options.skipRemainingDiffPrompts ?? false };
847
- // Shared mutable ref: set to true by 'reject-all' diff decision to auto-reject further prompts
848
- const rejectRemainingDiffPrompts = { value: options.rejectRemainingDiffPrompts ?? false };
849
-
850
- // -----------------------------------------------------------------------
851
- // 3. Main agent loop
852
- // -----------------------------------------------------------------------
853
-
854
- while (turns < maxTurns) {
855
- // Check for cancellation before each turn
856
- if (signal?.aborted) {
857
- interrupted = true;
858
- break;
859
- }
860
-
861
- turns++;
862
-
863
- try {
864
- // Gap 18: Auto-route model based on task complexity when no explicit model set
865
- let effectiveModel = model ?? DEFAULT_MODEL;
866
- if (!model && options.autoRouteModel) {
867
- const lastUserMsg = [...messages].reverse().find(m => m.role === 'user');
868
- const lastMsgText = lastUserMsg
869
- ? typeof lastUserMsg.content === 'string'
870
- ? lastUserMsg.content
871
- : JSON.stringify(lastUserMsg.content)
872
- : '';
873
- const complexity = classifyTaskComplexity(lastMsgText);
874
- effectiveModel = routeModel(complexity);
875
- if (onText && turns === 1) {
876
- onText(`\n[auto: ${effectiveModel.split('/').pop()?.replace('anthropic/', '') ?? effectiveModel}]\n`);
877
- }
878
- }
879
-
880
- // Build the completion request with tool definitions.
881
- // The systemMessageObj is pre-built before the loop (PERF-4a) — reuse it.
882
- const allMessages: LLMMessage[] = new Array(messages.length + 1);
883
- allMessages.length = 0;
884
- allMessages.push(_systemMessageObj, ...messages);
885
- const request: ToolCompletionRequest = {
886
- messages: allMessages,
887
- model: effectiveModel,
888
- tools: llmTools,
889
- maxTokens: DEFAULT_MAX_TOKENS,
890
- };
891
-
892
- // Stream text tokens incrementally via routeStreamWithTools.
893
- // Tokens are forwarded to onText as they arrive; tool calls
894
- // are accumulated from the final chunk.
895
- let responseContent = '';
896
- let responseToolCalls: ToolCall[] | undefined;
897
- let responseUsage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 };
898
-
899
- // A1: Retry on transient errors (rate-limit / 5xx) with exponential backoff
900
- const MAX_STREAM_RETRIES = 2;
901
- let streamAttempt = 0;
902
- while (true) {
903
- // A2: Silence timeout — abort if no chunk arrives (G21: configurable)
904
- const STREAM_SILENCE_MS = options.streamSilenceTimeoutMs ?? 60_000;
905
- const silenceAbort = new AbortController();
906
- let silenceTimer: ReturnType<typeof setTimeout> | undefined;
907
- const resetSilence = () => {
908
- clearTimeout(silenceTimer);
909
- silenceTimer = setTimeout(() => silenceAbort.abort('Stream timeout'), STREAM_SILENCE_MS);
910
- };
911
- resetSilence();
912
-
913
- try {
914
- // Pass silence abort signal via request cast (non-standard but supported by most providers)
915
- const requestWithSignal = { ...request, signal: silenceAbort.signal } as typeof request;
916
- for await (const chunk of router.routeStreamWithTools(requestWithSignal)) {
917
- resetSilence(); // reset on every chunk
918
- if (chunk.content) {
919
- responseContent += chunk.content;
920
- if (onText) {
921
- onText(chunk.content);
922
- }
923
- }
924
- if (chunk.toolCallStart && onText) {
925
- // Show early feedback when the LLM starts composing a tool call
926
- onText(`\n[Preparing tool: ${chunk.toolCallStart.name}...]\n`);
927
- }
928
- if (chunk.toolCalls) {
929
- responseToolCalls = chunk.toolCalls;
930
- }
931
- if (chunk.usage) {
932
- responseUsage = chunk.usage;
933
- }
934
- }
935
- clearTimeout(silenceTimer);
936
- break; // success — exit retry loop
937
- } catch (streamErr) {
938
- clearTimeout(silenceTimer);
939
- if (streamAttempt < MAX_STREAM_RETRIES && isRetryableStreamError(streamErr)) {
940
- const delay = 1000 * Math.pow(2, streamAttempt);
941
- if (onText) {
942
- onText(`\n[Retrying after error (attempt ${streamAttempt + 1})...]\n`);
943
- }
944
- await new Promise(r => setTimeout(r, delay));
945
- streamAttempt++;
946
- // Reset partial accumulation before retry
947
- responseContent = '';
948
- responseToolCalls = undefined;
949
- responseUsage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 };
950
- continue;
951
- }
952
- // G24: Graceful network error message instead of raw Node.js error
953
- const streamErrObj = streamErr as Error | null;
954
- const isNetworkError = /ECONNREFUSED|ETIMEDOUT|ENOTFOUND|fetch failed|network/i.test(streamErrObj?.message ?? '');
955
- if (isNetworkError) {
956
- const netMsg = '\n[!!] Network unreachable — cannot reach the LLM API.\nCheck your internet connection and API key validity, then try again.\n';
957
- if (onText) onText(netMsg);
958
- // Re-throw a specially-marked error so the outer turn catch block can handle it
959
- const netErr = new Error(netMsg);
960
- (netErr as Error & { _nimbusNetworkError?: boolean })._nimbusNetworkError = true;
961
- throw netErr;
962
- }
963
- throw streamErr; // non-retryable — propagate to outer catch
964
- }
965
- }
966
-
967
- // Accumulate usage and cost
968
- totalUsage.promptTokens += responseUsage.promptTokens;
969
- totalUsage.completionTokens += responseUsage.completionTokens;
970
- totalUsage.totalTokens += responseUsage.totalTokens;
971
-
972
- // Estimate cost for this turn
973
- const resolvedModel = effectiveModel;
974
- const providerName = resolvedModel.includes('/') ? resolvedModel.split('/')[0] : 'anthropic';
975
- const modelName = resolvedModel.includes('/')
976
- ? resolvedModel.split('/').slice(1).join('/')
977
- : resolvedModel;
978
- const turnCost = calculateCost(
979
- providerName,
980
- modelName,
981
- responseUsage.promptTokens,
982
- responseUsage.completionTokens
983
- );
984
- totalCost += turnCost.costUSD;
985
-
986
- // Notify caller of accumulated usage/cost after each turn
987
- if (options.onUsage) {
988
- options.onUsage(totalUsage, totalCost);
989
- }
990
-
991
- // M2: Emit per-turn token/cost stats as a dim system message in the TUI.
992
- // Only emit when there was actual token usage (skip turns with 0 tokens).
993
- if (onText && (responseUsage.promptTokens > 0 || responseUsage.completionTokens > 0)) {
994
- const statsLine = `\n[${responseUsage.promptTokens} in / ${responseUsage.completionTokens} out — $${turnCost.costUSD.toFixed(4)}]\n`;
995
- onText(statsLine);
996
- }
997
-
998
- // G16: Cost budget enforcement — stop if cumulative cost exceeds the limit
999
- if (options.costBudgetUSD !== undefined && totalCost >= options.costBudgetUSD) {
1000
- const budgetMsg = `\n\n[!!] Cost budget of $${options.costBudgetUSD.toFixed(2)} reached (used: $${totalCost.toFixed(3)}). Stopping to prevent overspend.\n`;
1001
- if (onText) onText(budgetMsg);
1002
- messages.push({ role: 'assistant', content: budgetMsg });
1003
- break;
1004
- }
1005
-
1006
- // -----------------------------------------------------------------
1007
- // No tool calls → the LLM is done
1008
- // -----------------------------------------------------------------
1009
- if (!responseToolCalls || responseToolCalls.length === 0) {
1010
- messages.push({
1011
- role: 'assistant',
1012
- content: responseContent,
1013
- });
1014
- break;
1015
- }
1016
-
1017
- // -----------------------------------------------------------------
1018
- // Tool calls present → execute each one
1019
- // -----------------------------------------------------------------
1020
-
1021
- // Append the assistant message that contains the tool calls
1022
- messages.push({
1023
- role: 'assistant',
1024
- content: responseContent,
1025
- toolCalls: responseToolCalls,
1026
- });
1027
-
1028
- // G3: Per-turn tool call counter — reset at the start of each tool-call batch
1029
- let turnToolCallCount = 0;
1030
-
1031
- // H2: Parallel dispatch for read-only tools (safe to run concurrently)
1032
- const READ_ONLY_TOOLS = new Set([
1033
- 'read_file', 'glob', 'grep', 'cloud_discover', 'terraform_plan_analyze',
1034
- 'kubectl_context', 'helm_values', 'cost_estimate', 'drift_detect',
1035
- ]);
1036
- const canRunInParallel = (tc: ToolCall): boolean => READ_ONLY_TOOLS.has(tc.function.name);
1037
- const allReadOnly = responseToolCalls.every(canRunInParallel);
1038
-
1039
- if (allReadOnly && responseToolCalls.length > 1) {
1040
- // All tools are read-only — dispatch in parallel
1041
- const parallelChunkCallback = onToolOutputChunk
1042
- ? (id: string) => (chunk: string) => onToolOutputChunk(id, chunk)
1043
- : undefined;
1044
-
1045
- const parallelResults = await Promise.allSettled(
1046
- responseToolCalls.map(tc =>
1047
- executeToolCall(
1048
- tc,
1049
- toolRegistry,
1050
- onToolCallStart,
1051
- onToolCallEnd,
1052
- checkPermission,
1053
- options.lspManager,
1054
- options.snapshotManager,
1055
- options.sessionId,
1056
- signal,
1057
- options.hookEngine,
1058
- mode,
1059
- options.requestFileDiff,
1060
- skipRemainingDiffPrompts,
1061
- rejectRemainingDiffPrompts,
1062
- parallelChunkCallback ? parallelChunkCallback(tc.id) : undefined,
1063
- options.toolTimeouts,
1064
- options.infraContext
1065
- )
1066
- )
1067
- );
1068
-
1069
- for (let pi = 0; pi < responseToolCalls.length; pi++) {
1070
- const tc = responseToolCalls[pi];
1071
- const pResult = parallelResults[pi];
1072
- const pContent = pResult.status === 'fulfilled'
1073
- ? (pResult.value.isError ? `Error: ${pResult.value.error}` : pResult.value.output)
1074
- : `Error: ${pResult.reason}`;
1075
- messages.push({ role: 'tool', toolCallId: tc.id, name: tc.function.name, content: pContent });
1076
- }
1077
- // Skip sequential processing — jump directly to next LLM turn
1078
- continue;
1079
- }
1080
-
1081
- // Process tool calls sequentially (order may matter for side effects)
1082
- for (const toolCall of responseToolCalls) {
1083
- // Check for cancellation between tool calls
1084
- if (signal?.aborted) {
1085
- interrupted = true;
1086
- break;
1087
- }
1088
-
1089
- // G3: Enforce per-turn tool call limit to prevent runaway loops
1090
- turnToolCallCount++;
1091
- if (turnToolCallCount > MAX_TOOL_CALLS_PER_TURN) {
1092
- messages.push({
1093
- role: 'tool',
1094
- toolCallId: toolCall.id,
1095
- name: toolCall.function.name,
1096
- content: `[Tool limit reached: ${MAX_TOOL_CALLS_PER_TURN} tool calls in this turn. Summarizing progress and stopping to avoid runaway execution.]`,
1097
- });
1098
- break;
1099
- }
1100
-
1101
- // G3: Count destructive operations at the session level
1102
- if (isDestructiveOp(toolCall.function.name, toolCall.function.arguments)) {
1103
- sessionDestructiveOps++;
1104
- }
1105
-
1106
- // G10: One-time kubectl RBAC pre-flight check — runs before the first kubectl call
1107
- // in this session. Stores the RBAC permissions summary in rbacPreamble so it can
1108
- // be injected into the first kubectl tool result (keeps conversation structure valid).
1109
- // Uses async execFile to avoid blocking the Node.js event loop (up to 5s call).
1110
- if (!kubectlRbacChecked && toolCall.function.name === 'kubectl') {
1111
- kubectlRbacChecked = true;
1112
- try {
1113
- const { stdout: rbacOut } = await _execFileAsync('kubectl', ['auth', 'can-i', '--list'], {
1114
- encoding: 'utf-8', timeout: 5000,
1115
- });
1116
- const truncated = rbacOut.length > 1500
1117
- ? `${rbacOut.slice(0, 1500)}\n...[truncated]`
1118
- : rbacOut;
1119
- rbacPreamble = `[kubectl RBAC context: permissions available in current context]\n${truncated}\n\n`;
1120
- } catch { /* non-critical — RBAC check failure does not block kubectl */ }
1121
- }
1122
-
1123
- // M6: Destructive action guard — inject warning into LLM context before executing
1124
- try {
1125
- const m6Input = JSON.parse(toolCall.function.arguments) as Record<string, unknown>;
1126
- const destructiveWarning = isDestructiveAction(toolCall.function.name, m6Input);
1127
- if (destructiveWarning) {
1128
- messages.push({
1129
- role: 'tool',
1130
- toolCallId: toolCall.id + '-guard',
1131
- name: toolCall.function.name,
1132
- content: `[SAFETY] ${destructiveWarning}`,
1133
- });
1134
- }
1135
- } catch { /* ignore parse errors */ }
1136
-
1137
- // Build chunk callback that forwards tool output to the TUI in real-time
1138
- const chunkCallback = onToolOutputChunk
1139
- ? (chunk: string) => onToolOutputChunk(toolCall.id, chunk)
1140
- : undefined;
1141
-
1142
- const result = await executeToolCall(
1143
- toolCall,
1144
- toolRegistry,
1145
- onToolCallStart,
1146
- onToolCallEnd,
1147
- checkPermission,
1148
- options.lspManager,
1149
- options.snapshotManager,
1150
- options.sessionId,
1151
- signal,
1152
- options.hookEngine,
1153
- mode,
1154
- options.requestFileDiff,
1155
- skipRemainingDiffPrompts,
1156
- rejectRemainingDiffPrompts,
1157
- chunkCallback,
1158
- options.toolTimeouts,
1159
- options.infraContext
1160
- );
1161
-
1162
- // Append each tool result as a separate message so the LLM can
1163
- // match it to the corresponding tool_use block by toolCallId.
1164
- let toolContent = result.isError ? `Error: ${result.error}` : result.output;
1165
-
1166
- // G10: Inject RBAC context preamble into the first kubectl result
1167
- if (rbacPreamble && toolCall.function.name === 'kubectl') {
1168
- toolContent = rbacPreamble + toolContent;
1169
- rbacPreamble = ''; // consume once — only injected into the first kubectl result
1170
- }
1171
-
1172
- // Inject DevOps error classification hints to guide self-correction
1173
- if (result.isError && result.error) {
1174
- const hint = classifyDevOpsError(toolCall.function.name, result.error, options.nimbusInstructions);
1175
- if (hint) {
1176
- toolContent += `\n\n${hint}`;
1177
- // C4: Also show hint in TUI error output (not just LLM context)
1178
- result.output += `\n\n${hint}`;
1179
-
1180
- // M2/M5: Auto-retry signal on credential expiry errors
1181
- // If the classified hint indicates a credential/auth problem, append
1182
- // a structured prompt so the agent knows to run auth-refresh, and
1183
- // set provider-specific env hints for the auth-refresh command.
1184
- const isCredentialError =
1185
- hint.toLowerCase().includes('credential') ||
1186
- hint.toLowerCase().includes('expired') ||
1187
- hint.toLowerCase().includes('auth') ||
1188
- hint.toLowerCase().includes('login required');
1189
-
1190
- if (isCredentialError && !credentialRetried.has(toolCall.id ?? toolCall.function.name)) {
1191
- credentialRetried.add(toolCall.id ?? toolCall.function.name);
1192
-
1193
- // M5: Set provider-specific refresh hint env vars so auth-refresh
1194
- // can surface targeted guidance when invoked by the user.
1195
- const errorLower = (result.error ?? '').toLowerCase();
1196
- if (errorLower.includes('aws')) {
1197
- process.env.NIMBUS_AWS_REFRESH_HINT = '1';
1198
- }
1199
- if (errorLower.includes('gcp') || errorLower.includes('google')) {
1200
- process.env.NIMBUS_GCP_REFRESH_HINT = '1';
1201
- }
1202
- if (errorLower.includes('azure')) {
1203
- process.env.NIMBUS_AZURE_REFRESH_HINT = '1';
1204
- }
1205
-
1206
- const refreshMsg = [
1207
- '[!!] Credential expired. Run: nimbus auth-refresh',
1208
- '[Nimbus] Credential error detected on tool: ' + toolCall.function.name,
1209
- 'Run "nimbus auth-refresh" to refresh cloud credentials, then retry.',
1210
- ].join('\n');
1211
- toolContent += '\n\n' + refreshMsg;
1212
- result.output += '\n\n' + refreshMsg;
1213
- }
1214
- } else if (DEVOPS_TOOL_NAMES.has(toolCall.function.name)) {
1215
- // Unknown DevOps error — provide structured self-diagnosis steps
1216
- toolContent += [
1217
- '\n\n--- Self-Diagnosis Steps ---',
1218
- '1. Check tool is installed: `which terraform` / `kubectl version` / `helm version`',
1219
- '2. Check credentials: `aws sts get-caller-identity` / `gcloud auth list` / `az account show`',
1220
- '3. Check network connectivity to the cluster/cloud provider',
1221
- '4. Retry with verbose flag if available (e.g., TF_LOG=DEBUG, kubectl --v=6)',
1222
- '5. If the error persists, report the exact error message and the command that caused it.',
1223
- ].join('\n');
1224
- }
1225
- // M4: Track recurring errors and persist to NIMBUS.md after 3 occurrences
1226
- const m4Hint = classifyDevOpsError(toolCall.function.name, result.error ?? '', options.nimbusInstructions);
1227
- if (m4Hint) {
1228
- trackAndPersistError(toolCall.function.name, m4Hint, options.cwd ?? process.cwd());
1229
- }
1230
- }
1231
-
1232
- // H5: Inject cost delta hint after successful infra operations
1233
- if (!result.isError) {
1234
- try {
1235
- const h5Input = JSON.parse(toolCall.function.arguments) as Record<string, unknown>;
1236
- const costHint = extractCostHintFromToolOutput(toolCall.function.name, h5Input, result.output);
1237
- if (costHint) {
1238
- onText?.(`\n[cost] ${costHint}\n`);
1239
- }
1240
- } catch { /* ignore parse errors */ }
1241
- }
1242
-
1243
- // L6: Auto-generate runbook after terraform apply success
1244
- if (!result.isError && toolCall.function.name === 'terraform') {
1245
- try {
1246
- const l6Input = JSON.parse(toolCall.function.arguments) as Record<string, unknown>;
1247
- if (String(l6Input.action) === 'apply') {
1248
- const l6Match = result.output.match(/Resources:\s*(\d+) added/);
1249
- if (l6Match && parseInt(l6Match[1] ?? '0', 10) > 0) {
1250
- const { join: _l6Join } = require('node:path') as typeof import('node:path');
1251
- const { homedir: _l6Homedir } = require('node:os') as typeof import('node:os');
1252
- const { mkdirSync: _l6MkdirSync, writeFileSync: _l6WriteFileSync } = require('node:fs') as typeof import('node:fs');
1253
- const runbookDir = _l6Join(_l6Homedir(), '.nimbus', 'runbooks');
1254
- _l6MkdirSync(runbookDir, { recursive: true });
1255
- const ts = new Date().toISOString().replace(/[:.]/g, '-');
1256
- const runbookPath = _l6Join(runbookDir, `terraform-apply-${ts}.md`);
1257
- const runbookContent = [
1258
- '# Terraform Apply Runbook',
1259
- '',
1260
- `Date: ${new Date().toLocaleString()}`,
1261
- '',
1262
- 'Apply output:',
1263
- '```',
1264
- result.output.slice(0, 2000),
1265
- '```',
1266
- '',
1267
- '## Rollback',
1268
- '',
1269
- 'To rollback, run `terraform destroy` or restore from a previous state.',
1270
- ].join('\n');
1271
- _l6WriteFileSync(runbookPath, runbookContent, 'utf-8');
1272
- options.onText?.(`\n[runbook] Saved to ${runbookPath}\n`);
1273
- }
1274
- }
1275
- } catch { /* non-critical */ }
1276
- }
1277
-
1278
- // GAP-25: Structured audit trail for destructive operations
1279
- if (!result.isError && isDestructiveOp(toolCall.function.name, toolCall.function.arguments)) {
1280
- try {
1281
- const { appendFileSync, mkdirSync } = await import('node:fs');
1282
- const { homedir } = await import('node:os');
1283
- const { join } = await import('node:path');
1284
- const auditDir = join(homedir(), '.nimbus');
1285
- mkdirSync(auditDir, { recursive: true });
1286
- const event = JSON.stringify({
1287
- type: 'infra-change',
1288
- tool: toolCall.function.name,
1289
- action: (JSON.parse(toolCall.function.arguments) as Record<string, unknown>).action,
1290
- sessionId: options.sessionId ?? 'unknown',
1291
- cwd: options.cwd ?? process.cwd(),
1292
- timestamp: new Date().toISOString(),
1293
- });
1294
- appendFileSync(join(auditDir, 'audit.jsonl'), event + '\n', 'utf-8');
1295
- } catch { /* audit logging is non-critical */ }
1296
- }
1297
-
1298
- // G3: Append a warning when session-level destructive op threshold is reached
1299
- if (sessionDestructiveOps >= MAX_DESTRUCTIVE_OPS_PER_SESSION) {
1300
- toolContent += `\n\n[Warning: ${sessionDestructiveOps} destructive operations executed in this session. Review changes carefully.]`;
1301
- }
1302
-
1303
- // Cache terraform plan output so a subsequent apply can reference it.
1304
- // Also track planned workdirs (G8) and warn on unplanned applies.
1305
- if (toolCall.function.name === 'terraform' && !result.isError) {
1306
- try {
1307
- const tfArgs = JSON.parse(toolCall.function.arguments) as Record<string, unknown>;
1308
- if (tfArgs.action === 'plan' && tfArgs.workdir) {
1309
- cacheTerraformPlan(String(tfArgs.workdir), result.output);
1310
- // G8: Track that a plan was run for this workdir in this session
1311
- terraformPlannedWorkdirs.add(String(tfArgs.workdir));
1312
- }
1313
- // G8: Warn if apply ran without a prior plan in this session
1314
- if (tfArgs.action === 'apply' && tfArgs.workdir && !terraformPlannedWorkdirs.has(String(tfArgs.workdir))) {
1315
- toolContent = `[Note: terraform apply ran without a prior terraform plan in this session for ${String(tfArgs.workdir)}. Always run terraform plan first to review changes before applying.]\n\n${toolContent}`;
1316
- }
1317
- // Inject cached plan into apply context for the LLM
1318
- if (tfArgs.action === 'apply' && tfArgs.workdir) {
1319
- const cached = getCachedTerraformPlan(String(tfArgs.workdir));
1320
- if (cached) {
1321
- toolContent = `[Apply succeeded. This was the plan that was applied:]\n${cached.slice(0, 3000)}\n\n[Apply output:]\n${toolContent}`;
1322
- }
1323
- }
1324
- } catch { /* ignore parse errors */ }
1325
- }
1326
-
1327
- // GAP-11: trigger FileDiff UI after terraform plan shows resource changes
1328
- if (toolCall.function.name === 'terraform' && !result.isError && options.requestFileDiff) {
1329
- try {
1330
- const tfArgs11 = JSON.parse(toolCall.function.arguments) as Record<string, unknown>;
1331
- if (tfArgs11.action === 'plan') {
1332
- const { parseTerraformPlanOutput, buildFileDiffBatchFromPlan } = await import('./deploy-preview');
1333
- const changes = parseTerraformPlanOutput(toolContent);
1334
- if (changes.length > 0) {
1335
- const batchFiles = buildFileDiffBatchFromPlan({ changes } as import('./deploy-preview').DeployPreview);
1336
- for (const file of batchFiles) {
1337
- const decision = await options.requestFileDiff(file.filePath, file.toolName ?? 'terraform', file.diff ?? '');
1338
- if (decision === 'reject-all') break;
1339
- }
1340
- }
1341
- }
1342
- } catch { /* non-critical — FileDiff UI not always available */ }
1343
- }
1344
-
1345
- // GAP-18: auto-validate terraform files after write/edit tool calls
1346
- if (['write_file', 'edit_file', 'multi_edit'].includes(toolCall.function.name) && !result.isError) {
1347
- const gap18Input = JSON.parse(toolCall.function.arguments) as { path?: string; file_path?: string };
1348
- const gap18FilePath = gap18Input.path ?? gap18Input.file_path ?? '';
1349
- if (gap18FilePath.endsWith('.tf')) {
1350
- try {
1351
- // Use async exec to avoid blocking the event loop (up to 10s for terraform validate)
1352
- const { stdout: validateOut } = await _execAsync('terraform validate -json 2>/dev/null', {
1353
- cwd: options.cwd ?? process.cwd(),
1354
- encoding: 'utf-8',
1355
- timeout: 10_000,
1356
- });
1357
- const parsed = JSON.parse(validateOut) as { valid: boolean; diagnostics?: Array<{ severity: string; summary: string; detail: string }> };
1358
- if (!parsed.valid && parsed.diagnostics && parsed.diagnostics.length > 0) {
1359
- const errors = parsed.diagnostics
1360
- .filter(d => d.severity === 'error')
1361
- .map(d => ` ${d.summary}: ${d.detail}`)
1362
- .join('\n');
1363
- toolContent += `\n\nTerraform validation errors (please fix):\n${errors}`;
1364
- }
1365
- } catch { /* terraform not available or not in tf project — ignore */ }
1366
- }
1367
- }
1368
-
1369
- // Truncate excessively large tool outputs to prevent context overflow
1370
- if (toolContent.length > MAX_TOOL_OUTPUT_CHARS) {
1371
- let head: string;
1372
- let tail: string;
1373
- let omitted: number;
1374
- const lines = toolContent.split('\n');
1375
-
1376
- // C3: Smart truncation for terraform plan — preserve all diff lines
1377
- const isTerraformPlan = toolCall.function.name === 'terraform' && (() => {
1378
- try {
1379
- const tfArgs = JSON.parse(toolCall.function.arguments) as Record<string, unknown>;
1380
- return tfArgs.action === 'plan';
1381
- } catch { return false; }
1382
- })();
1383
-
1384
- if (isTerraformPlan) {
1385
- // Keep all diff lines (create/update/destroy/replace) and the plan summary
1386
- const diffLines: string[] = [];
1387
- const contextLines: string[] = [];
1388
- for (const line of lines) {
1389
- const trimmed = line.trimStart();
1390
- const isDiffLine = trimmed.startsWith('+') || trimmed.startsWith('-') ||
1391
- trimmed.startsWith('~') || trimmed.startsWith('!') ||
1392
- line.includes('will be created') || line.includes('will be destroyed') ||
1393
- line.includes('will be updated') || line.includes('will be replaced') ||
1394
- line.includes('Plan:') || line.includes('No changes') ||
1395
- line.includes('Error:') || line.includes('Warning:');
1396
- if (isDiffLine) {
1397
- diffLines.push(line);
1398
- } else {
1399
- contextLines.push(line);
1400
- }
1401
- }
1402
- // Allow up to 500 diff lines + first 50 context lines
1403
- const keptDiff = diffLines.slice(0, 500);
1404
- const keptCtx = contextLines.slice(0, 50);
1405
- omitted = Math.max(0, lines.length - keptDiff.length - keptCtx.length);
1406
- head = [...keptCtx, ...keptDiff].join('\n');
1407
- tail = '';
1408
- } else {
1409
- const headLines = 100, tailLines = 20;
1410
- head = lines.slice(0, headLines).join('\n');
1411
- tail = lines.slice(-tailLines).join('\n');
1412
- omitted = Math.max(0, lines.length - headLines - tailLines);
1413
- }
1414
-
1415
- // Save full output to disk for reference
1416
- try {
1417
- const { mkdirSync: _mkdirSync, writeFileSync: _writeFileSync } = await import('node:fs');
1418
- const { homedir: _homedir } = await import('node:os');
1419
- const outDir = join(_homedir(), '.nimbus', 'tool-outputs');
1420
- _mkdirSync(outDir, { recursive: true });
1421
- const outFile = join(outDir, `${Date.now()}-${toolCall.function.name}.log`);
1422
- _writeFileSync(outFile, toolContent, 'utf-8');
1423
- toolContent = omitted > 0
1424
- ? `${head}${tail ? '\n\n... [' + omitted + ' lines omitted — full output saved to ' + outFile + '] ...\n\n' + tail : '\n\n... [full output saved to ' + outFile + ']'}`
1425
- : `${head}${tail ? '\n\n' + tail : ''}`;
1426
- } catch {
1427
- toolContent = omitted > 0
1428
- ? `${head}${tail ? '\n\n... [' + omitted + ' lines omitted — output too large for context] ...\n\n' + tail : '\n\n... [' + omitted + ' lines omitted]'}`
1429
- : `${head}${tail ? '\n\n' + tail : ''}`;
1430
- }
1431
- }
1432
-
1433
- messages.push({
1434
- role: 'tool',
1435
- toolCallId: toolCall.id,
1436
- name: toolCall.function.name,
1437
- content: toolContent,
1438
- });
1439
- }
1440
-
1441
- // If we broke out of the tool-call loop due to cancellation, exit
1442
- // the main loop as well.
1443
- if (interrupted) {
1444
- break;
1445
- }
1446
-
1447
- // -----------------------------------------------------------------
1448
- // Auto-compact check
1449
- // -----------------------------------------------------------------
1450
- // After tool results are appended, check whether the conversation
1451
- // has grown past the context window threshold. If so, summarize
1452
- // older messages to free up space for future turns.
1453
- if (options.contextManager) {
1454
- const toolTokens = llmTools.reduce(
1455
- (sum, t) => sum + Math.ceil(JSON.stringify(t).length / 4),
1456
- 0
1457
- );
1458
- if (options.contextManager.shouldCompact(systemPrompt, messages, toolTokens)) {
1459
- try {
1460
- const compactResult = await runCompaction(messages, options.contextManager, {
1461
- router,
1462
- ...(options.infraContext ? { infraContext: options.infraContext } : {}),
1463
- });
1464
- // Replace messages with the compacted version
1465
- messages.length = 0;
1466
- messages.push(...compactResult.messages);
1467
- // Clear the token cache after compaction — old message entries are no longer valid
1468
- options.contextManager.clearTokenCache();
1469
- if (options.onCompact) {
1470
- options.onCompact(compactResult.result);
1471
- }
1472
- } catch (compactErr) {
1473
- // Compaction failed — notify user visibly and continue with original messages
1474
- const compactErrMsg =
1475
- compactErr instanceof Error ? compactErr.message : String(compactErr);
1476
- if (onText) {
1477
- onText(
1478
- `\n[Warning: Auto-compaction failed: ${compactErrMsg}. Context may exceed budget on the next turn.]\n`
1479
- );
1480
- }
1481
- }
1482
- }
1483
- }
1484
- } catch (error: unknown) {
1485
- // LLM API error — report to the caller and break
1486
- const msg = error instanceof Error ? error.message : String(error);
1487
- // G24: Network errors already printed via onText above — skip duplicate output
1488
- const isNetworkErr = (error instanceof Error) && (error as Error & { _nimbusNetworkError?: boolean })._nimbusNetworkError;
1489
- if (!isNetworkErr && onText) {
1490
- onText(`\n[Error: ${msg}]\n`);
1491
- }
1492
- messages.push({
1493
- role: 'assistant',
1494
- content: isNetworkErr ? msg : `I encountered an error: ${msg}`,
1495
- });
1496
- break;
1497
- }
1498
- }
1499
-
1500
- // -----------------------------------------------------------------------
1501
- // 4. Post-loop bookkeeping
1502
- // -----------------------------------------------------------------------
1503
-
1504
- if (turns >= maxTurns && !interrupted) {
1505
- if (onText) {
1506
- onText(`\n[Agent reached maximum turns limit (${maxTurns}). Stopping.]\n`);
1507
- }
1508
- }
1509
-
1510
- // GAP-19: Session summary after multi-step deploy
1511
- if (options.mode === 'deploy' && options.onText) {
1512
- // Collect tool calls from messages
1513
- const allToolCalls: Array<{ name: string; input: Record<string, unknown> }> = [];
1514
- for (const msg of messages) {
1515
- if (msg.role === 'assistant' && Array.isArray((msg as {toolCalls?: unknown[]}).toolCalls)) {
1516
- for (const tc of (msg as {toolCalls: Array<{function: {name: string; arguments: string}}>}).toolCalls) {
1517
- try {
1518
- allToolCalls.push({ name: tc.function.name, input: JSON.parse(tc.function.arguments) as Record<string, unknown> });
1519
- } catch { /* ignore */ }
1520
- }
1521
- }
1522
- }
1523
- if (allToolCalls.length > 3) {
1524
- const terraform = allToolCalls.filter(c => c.name === 'terraform');
1525
- const kubectl = allToolCalls.filter(c => c.name === 'kubectl');
1526
- const helm = allToolCalls.filter(c => c.name === 'helm');
1527
- const summaryLines: string[] = ['---', '**Session Summary**'];
1528
- if (terraform.length) summaryLines.push(`• Terraform: ${terraform.map(c => String(c.input.action ?? '')).join(', ')}`);
1529
- if (kubectl.length) summaryLines.push(`• Kubectl: ${kubectl.map(c => String(c.input.action ?? '')).join(', ')}`);
1530
- if (helm.length) summaryLines.push(`• Helm: ${helm.map(c => String(c.input.action ?? '')).join(', ')}`);
1531
- if (summaryLines.length > 2) {
1532
- options.onText('\n\n' + summaryLines.join('\n'));
1533
- }
1534
- }
1535
- }
1536
-
1537
- return {
1538
- messages,
1539
- turns,
1540
- interrupted,
1541
- usage: totalUsage,
1542
- totalCost,
1543
- };
1544
- }
1545
-
1546
- // ---------------------------------------------------------------------------
1547
- // Tool Execution
1548
- // ---------------------------------------------------------------------------
1549
-
1550
- /** Tools that modify files and should trigger LSP diagnostics. */
1551
- const FILE_EDITING_TOOLS = new Set(['edit_file', 'multi_edit', 'write_file']);
1552
-
1553
- /** Tools that mutate files and may require a pre-approval diff. */
1554
- const FILE_MUTATING_TOOLS = new Set(['edit_file', 'multi_edit', 'write_file']);
1555
-
1556
- /**
1557
- * Generate a simple unified diff between two strings.
1558
- * Suitable for display; uses a greedy line-by-line approach.
1559
- */
1560
- function generateUnifiedDiff(filename: string, before: string, after: string): string {
1561
- const beforeLines = before.split('\n');
1562
- const afterLines = after.split('\n');
1563
- const lines: string[] = [`--- a/${filename}`, `+++ b/${filename}`];
1564
- let i = 0;
1565
- let j = 0;
1566
- while (i < beforeLines.length || j < afterLines.length) {
1567
- if (beforeLines[i] === afterLines[j]) {
1568
- i++;
1569
- j++;
1570
- continue;
1571
- }
1572
- const hunkBefore: string[] = [];
1573
- const hunkAfter: string[] = [];
1574
- const start = i;
1575
- while (i < beforeLines.length && beforeLines[i] !== afterLines[j]) {
1576
- hunkBefore.push(beforeLines[i++]);
1577
- }
1578
- while (
1579
- j < afterLines.length &&
1580
- (i >= beforeLines.length || beforeLines[i] !== afterLines[j])
1581
- ) {
1582
- hunkAfter.push(afterLines[j++]);
1583
- }
1584
- lines.push(
1585
- `@@ -${start + 1},${hunkBefore.length} +${start + 1},${hunkAfter.length} @@`
1586
- );
1587
- hunkBefore.forEach(l => lines.push(`-${l}`));
1588
- hunkAfter.forEach(l => lines.push(`+${l}`));
1589
- }
1590
- return lines.join('\n');
1591
- }
1592
-
1593
- /**
1594
- * Compute a proposed diff for a file-mutating tool call without writing to disk.
1595
- * Returns the unified diff string, or null if it cannot be computed.
1596
- */
1597
- async function computeProposedDiff(
1598
- toolName: string,
1599
- args: Record<string, unknown>
1600
- ): Promise<string | null> {
1601
- try {
1602
- const { readFile } = await import('node:fs/promises');
1603
- const path = args.path as string;
1604
- if (!path) return null;
1605
- const currentContent = await readFile(path, 'utf-8').catch(() => '');
1606
- let proposed = currentContent;
1607
- if (toolName === 'edit_file') {
1608
- proposed = currentContent.replace(args.old_string as string, args.new_string as string);
1609
- } else if (toolName === 'multi_edit') {
1610
- const edits = args.edits as Array<{ old_string: string; new_string: string }>;
1611
- if (Array.isArray(edits)) {
1612
- for (const e of edits) {
1613
- proposed = proposed.replace(e.old_string, e.new_string);
1614
- }
1615
- }
1616
- } else if (toolName === 'write_file') {
1617
- proposed = args.content as string;
1618
- }
1619
- if (proposed === currentContent) return null; // no change
1620
- return generateUnifiedDiff(path, currentContent, proposed);
1621
- } catch {
1622
- return null;
1623
- }
1624
- }
1625
-
1626
- /**
1627
- * Extract the file path from a tool call's parsed arguments.
1628
- *
1629
- * File-editing tools all have a `path` parameter that identifies
1630
- * the target file. Returns `null` for non-file tools.
1631
- */
1632
- function extractFilePath(toolName: string, input: unknown): string | null {
1633
- if (!FILE_EDITING_TOOLS.has(toolName)) {
1634
- return null;
1635
- }
1636
- if (input && typeof input === 'object' && 'path' in input) {
1637
- return (input as { path: string }).path;
1638
- }
1639
- return null;
1640
- }
1641
-
1642
- /**
1643
- * Execute a single tool call.
1644
- *
1645
- * Handles:
1646
- * - Looking up the tool in the registry.
1647
- * - Parsing the JSON arguments string from the LLM response.
1648
- * - Validating input against the Zod schema.
1649
- * - Checking permissions via the caller-supplied callback.
1650
- * - Invoking the tool and returning the result.
1651
- * - Notifying start/end callbacks.
1652
- * - Querying the LSP for diagnostics after file edits.
1653
- *
1654
- * @param toolCall - The raw tool call from the LLM response.
1655
- * @param registry - The tool registry to look up the tool definition.
1656
- * @param onStart - Optional callback fired before execution.
1657
- * @param onEnd - Optional callback fired after execution (or error).
1658
- * @param checkPermission - Optional permission gate.
1659
- * @param lspManager - Optional LSP manager for post-edit diagnostics.
1660
- * @returns The tool result (always succeeds; errors are captured inside the result).
1661
- */
1662
- async function executeToolCall(
1663
- toolCall: ToolCall,
1664
- registry: ToolRegistry,
1665
- onStart?: (info: ToolCallInfo) => void,
1666
- onEnd?: (info: ToolCallInfo, result: ToolResult) => void,
1667
- checkPermission?: (tool: ToolDefinition, input: unknown) => Promise<PermissionDecision>,
1668
- lspManager?: LSPManager,
1669
- snapshotManager?: SnapshotManager,
1670
- sessionId?: string,
1671
- signal?: AbortSignal,
1672
- hookEngine?: HookEngine,
1673
- mode?: AgentMode,
1674
- requestFileDiff?: (path: string, toolName: string, diff: string) => Promise<FileDiffDecision>,
1675
- skipRemainingDiffPrompts?: { value: boolean },
1676
- rejectRemainingDiffPrompts?: { value: boolean },
1677
- onChunk?: (chunk: string) => void,
1678
- toolTimeouts?: Record<string, number>,
1679
- infraContext?: import('../sessions/manager').SessionInfraContext
1680
- ): Promise<ToolResult> {
1681
- const toolName = toolCall.function.name;
1682
-
1683
- // Parse the JSON arguments string from the LLM
1684
- let parsedArgs: unknown;
1685
- try {
1686
- parsedArgs = JSON.parse(toolCall.function.arguments);
1687
- } catch {
1688
- const result: ToolResult = {
1689
- output: '',
1690
- error: `Tool '${toolName}' received malformed JSON arguments — please retry the tool call with valid JSON. Received: ${toolCall.function.arguments.slice(0, 200)}`,
1691
- isError: true,
1692
- };
1693
- return result;
1694
- }
1695
-
1696
- const callInfo: ToolCallInfo = {
1697
- id: toolCall.id,
1698
- name: toolName,
1699
- input: parsedArgs,
1700
- startTime: Date.now(),
1701
- };
1702
-
1703
- // Look up the tool definition
1704
- const tool = registry.get(toolName);
1705
- if (!tool) {
1706
- const result: ToolResult = {
1707
- output: '',
1708
- error: `Unknown tool: ${toolName}`,
1709
- isError: true,
1710
- };
1711
- if (onEnd) {
1712
- onEnd(callInfo, result);
1713
- }
1714
- return result;
1715
- }
1716
-
1717
- // Notify start
1718
- if (onStart) {
1719
- onStart(callInfo);
1720
- }
1721
-
1722
- // Build shared hook context for PreToolUse and PostToolUse
1723
- const hookContext: HookContext = {
1724
- tool: toolName,
1725
- input: parsedArgs && typeof parsedArgs === 'object' ? (parsedArgs as Record<string, unknown>) : {},
1726
- sessionId: sessionId ?? 'default',
1727
- agent: mode ?? 'build',
1728
- timestamp: new Date().toISOString(),
1729
- };
1730
-
1731
- // PreToolUse hooks — may block the tool call
1732
- if (hookEngine) {
1733
- const preResult = await runPreToolHooks(hookEngine, hookContext);
1734
- if (!preResult.allowed) {
1735
- const result: ToolResult = {
1736
- output: '',
1737
- error: `Tool '${toolName}' blocked by hook: ${preResult.message ?? 'no reason given'}`,
1738
- isError: true,
1739
- };
1740
- if (onEnd) {
1741
- onEnd(callInfo, result);
1742
- }
1743
- return result;
1744
- }
1745
- }
1746
-
1747
- // Permission check
1748
- if (checkPermission) {
1749
- const decision = await checkPermission(tool, parsedArgs);
1750
- if (decision === 'deny' || decision === 'block') {
1751
- const result: ToolResult = {
1752
- output: '',
1753
- error:
1754
- decision === 'block'
1755
- ? `Tool '${toolName}' is blocked by permission policy.`
1756
- : `User denied permission for tool '${toolName}'.`,
1757
- isError: true,
1758
- };
1759
- if (onEnd) {
1760
- onEnd(callInfo, result);
1761
- }
1762
- return result;
1763
- }
1764
- }
1765
-
1766
- // B1: Pre-approval diff — show proposed change before writing files
1767
- if (
1768
- FILE_MUTATING_TOOLS.has(toolName) &&
1769
- requestFileDiff &&
1770
- !(skipRemainingDiffPrompts?.value)
1771
- ) {
1772
- // Auto-reject if 'reject-all' was previously chosen
1773
- if (rejectRemainingDiffPrompts?.value) {
1774
- const rejResult: ToolResult = {
1775
- output: 'User rejected this change (reject-all).',
1776
- error: undefined,
1777
- isError: false,
1778
- };
1779
- if (onEnd) onEnd(callInfo, rejResult);
1780
- return rejResult;
1781
- }
1782
-
1783
- const diff = await computeProposedDiff(toolName, parsedArgs as Record<string, unknown>);
1784
- if (diff) {
1785
- const targetPath =
1786
- (parsedArgs as Record<string, unknown>).path as string | undefined ?? '(file)';
1787
- const decision = await requestFileDiff(targetPath, toolName, diff);
1788
- if (decision === 'reject') {
1789
- const rejResult: ToolResult = {
1790
- output: 'User rejected this change.',
1791
- error: undefined,
1792
- isError: false,
1793
- };
1794
- if (onEnd) onEnd(callInfo, rejResult);
1795
- return rejResult;
1796
- }
1797
- if (decision === 'reject-all') {
1798
- if (rejectRemainingDiffPrompts) {
1799
- rejectRemainingDiffPrompts.value = true;
1800
- }
1801
- const rejResult: ToolResult = {
1802
- output: 'User rejected this change (reject-all).',
1803
- error: undefined,
1804
- isError: false,
1805
- };
1806
- if (onEnd) onEnd(callInfo, rejResult);
1807
- return rejResult;
1808
- }
1809
- if (decision === 'apply-all' && skipRemainingDiffPrompts) {
1810
- skipRemainingDiffPrompts.value = true;
1811
- }
1812
- }
1813
- }
1814
-
1815
- // Capture snapshot before file-modifying tools for undo/redo support
1816
- if (
1817
- snapshotManager &&
1818
- SnapshotManager.shouldSnapshot(toolName, parsedArgs as Record<string, unknown>)
1819
- ) {
1820
- try {
1821
- await snapshotManager.captureSnapshot({
1822
- sessionId: sessionId || 'default',
1823
- messageId: toolCall.id,
1824
- toolCallId: toolCall.id,
1825
- description: `${toolName}: ${extractFilePath(toolName, parsedArgs) || '(bash command)'}`,
1826
- });
1827
- } catch {
1828
- // Snapshot failure should never block the tool call
1829
- }
1830
- }
1831
-
1832
- // Validate input against the tool's Zod schema and execute
1833
- let result: ToolResult;
1834
- try {
1835
- const validatedInput = tool.inputSchema.parse(parsedArgs);
1836
-
1837
- // Thread AbortSignal into bash tool for Ctrl+C child process killing
1838
- if (signal && toolName === 'bash' && validatedInput && typeof validatedInput === 'object') {
1839
- (validatedInput as Record<string, unknown>)._signal = signal;
1840
- }
1841
-
1842
- // GAP-20: Build tool execute context, including per-tool timeout from toolTimeouts map
1843
- // C2: Also pass infraContext from session so tools can use it as fallback
1844
- const toolCtx: ToolExecuteContext | undefined = onChunk || toolTimeouts?.[toolName] || infraContext
1845
- ? {
1846
- ...(onChunk ? { onProgress: onChunk } : {}),
1847
- ...(toolTimeouts?.[toolName] !== undefined ? { timeout: toolTimeouts[toolName] } : {}),
1848
- ...(infraContext ? { infraContext } : {}),
1849
- }
1850
- : undefined;
1851
- // C2: Write infra checkpoint before mutating terraform/helm operations
1852
- if (toolName === 'terraform' || toolName === 'helm') {
1853
- const _cpArgs = parsedArgs && typeof parsedArgs === 'object'
1854
- ? (parsedArgs as Record<string, unknown>)
1855
- : {};
1856
- const _cpAction = String(_cpArgs.action ?? '');
1857
- const _cpNeedCheckpoint =
1858
- (toolName === 'terraform' && _cpAction === 'apply') ||
1859
- (toolName === 'helm' && ['install', 'upgrade', 'rollback'].includes(_cpAction));
1860
- if (_cpNeedCheckpoint) {
1861
- writeInfraCheckpoint(toolName, _cpAction, _cpArgs);
1862
- }
1863
- }
1864
- result = await tool.execute(validatedInput, toolCtx);
1865
- } catch (error: unknown) {
1866
- result = {
1867
- output: '',
1868
- error: formatToolInputError(toolName, error),
1869
- isError: true,
1870
- };
1871
- }
1872
-
1873
- // -----------------------------------------------------------------------
1874
- // LSP diagnostics injection
1875
- // -----------------------------------------------------------------------
1876
- // After a successful file edit, notify the language server and collect
1877
- // any diagnostics (type errors, lint issues). If errors exist they are
1878
- // appended to the tool output so the LLM sees them on its next turn
1879
- // and can self-correct.
1880
- if (lspManager && !result.isError) {
1881
- const filePath = extractFilePath(toolName, parsedArgs);
1882
- if (filePath) {
1883
- try {
1884
- await lspManager.touchFile(filePath);
1885
- const diagnostics = await lspManager.getDiagnostics(filePath);
1886
- if (diagnostics.length > 0) {
1887
- const formatted = lspManager.formatDiagnosticsForAgent(diagnostics);
1888
- if (formatted) {
1889
- result = {
1890
- ...result,
1891
- output: result.output ? `${result.output}\n\n${formatted}` : formatted,
1892
- };
1893
- }
1894
- }
1895
- } catch (lspErr) {
1896
- // LSP errors should never block the agent loop.
1897
- // Append a note to the tool result so the LLM (and user) can see it.
1898
- const lspErrMsg = lspErr instanceof Error ? lspErr.message : String(lspErr);
1899
- result = {
1900
- ...result,
1901
- output: result.output
1902
- ? `${result.output}\n\n[Note: LSP diagnostics unavailable: ${lspErrMsg}]`
1903
- : `[Note: LSP diagnostics unavailable: ${lspErrMsg}]`,
1904
- };
1905
- }
1906
- }
1907
- }
1908
-
1909
- // Gap 12: Mask secrets in tool output before forwarding to callbacks/history
1910
- if (!result.isError && result.output) {
1911
- result = { ...result, output: maskSecrets(result.output) };
1912
- }
1913
-
1914
- // PostToolUse hooks — fire-and-forget (audit, auto-format, etc.)
1915
- if (hookEngine) {
1916
- await runPostToolHooks(hookEngine, {
1917
- ...hookContext,
1918
- result: {
1919
- output: result.isError ? (result.error ?? '') : result.output,
1920
- isError: result.isError,
1921
- },
1922
- });
1923
- }
1924
-
1925
- // Notify end
1926
- if (onEnd) {
1927
- onEnd(callInfo, result);
1928
- }
1929
-
1930
- return result;
1931
- }
1932
-
1933
- // ---------------------------------------------------------------------------
1934
- // Mode-Based Tool Filtering
1935
- // ---------------------------------------------------------------------------
1936
-
1937
- /**
1938
- * Set of tool names allowed in `plan` mode.
1939
- *
1940
- * Plan mode is strictly read-only: the agent can inspect files, search
1941
- * the codebase, read tasks, estimate costs, and detect drift -- but it
1942
- * cannot write files, run commands, or mutate infrastructure.
1943
- */
1944
- const PLAN_MODE_TOOLS = new Set([
1945
- 'read_file',
1946
- 'glob',
1947
- 'grep',
1948
- 'list_dir',
1949
- 'webfetch',
1950
- 'todo_read',
1951
- 'todo_write',
1952
- 'task',
1953
- 'cost_estimate',
1954
- 'drift_detect',
1955
- 'cloud_discover',
1956
- ]);
1957
-
1958
- /**
1959
- * Set of tool names blocked in `build` mode.
1960
- *
1961
- * Build mode allows reads and writes (file edits, code generation) but
1962
- * blocks infrastructure-mutating operations that could affect live
1963
- * environments. The permission engine provides fine-grained control on
1964
- * top of this coarse filter.
1965
- */
1966
- const BUILD_MODE_BLOCKED_TOOLS = new Set(['terraform', 'kubectl', 'helm']);
1967
-
1968
- /**
1969
- * Filter tools based on the current agent mode.
1970
- *
1971
- * - **plan**: Only read-only tools + cost/drift analysis.
1972
- * - **build**: All tools except infrastructure mutation commands.
1973
- * - **deploy**: All tools are available.
1974
- *
1975
- * @param allTools - Every tool registered in the system.
1976
- * @param mode - The active agent mode.
1977
- * @returns The subset of tools available in the given mode.
1978
- */
1979
- export function getToolsForMode(allTools: ToolDefinition[], mode: AgentMode): ToolDefinition[] {
1980
- switch (mode) {
1981
- case 'plan':
1982
- return allTools.filter(t => PLAN_MODE_TOOLS.has(t.name));
1983
-
1984
- case 'build':
1985
- return allTools.filter(t => !BUILD_MODE_BLOCKED_TOOLS.has(t.name));
1986
-
1987
- case 'deploy':
1988
- // All tools available
1989
- return allTools;
1990
-
1991
- default: {
1992
- // Exhaustive check -- if a new mode is added this becomes a compile
1993
- // error (assuming AgentMode is a union type).
1994
- const _exhaustive: never = mode;
1995
- return allTools;
1996
- }
1997
- }
1998
- }