@build-astron-co/nimbus 0.4.2 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/agent/compaction-agent.js +24 -12
- package/dist/src/agent/context-manager.js +2 -1
- package/dist/src/agent/expand-files.js +2 -1
- package/dist/src/agent/loop.js +71 -33
- package/dist/src/agent/permissions.js +4 -2
- package/dist/src/agent/system-prompt.js +34 -17
- package/dist/src/app.js +1 -1
- package/dist/src/auth/keychain.js +8 -4
- package/dist/src/auth/store.js +70 -107
- package/dist/src/cli/init.js +35 -19
- package/dist/src/cli/run.js +18 -10
- package/dist/src/cli/serve.js +4 -2
- package/dist/src/cli.js +52 -11
- package/dist/src/commands/alias.js +5 -3
- package/dist/src/commands/audit/index.js +2 -1
- package/dist/src/commands/aws-terraform.js +36 -18
- package/dist/src/commands/completions.js +1 -1
- package/dist/src/commands/config.js +3 -2
- package/dist/src/commands/connect-github.js +92 -0
- package/dist/src/commands/cost/index.js +3 -2
- package/dist/src/commands/deploy.js +15 -10
- package/dist/src/commands/doctor.js +6 -3
- package/dist/src/commands/drift/index.js +2 -1
- package/dist/src/commands/export.js +5 -3
- package/dist/src/commands/generate-terraform.js +110 -2
- package/dist/src/commands/import.js +3 -3
- package/dist/src/commands/incident.js +10 -5
- package/dist/src/commands/login.js +8 -93
- package/dist/src/commands/logs.js +16 -8
- package/dist/src/commands/onboarding.js +6 -4
- package/dist/src/commands/pipeline.js +6 -3
- package/dist/src/commands/plugin.js +3 -2
- package/dist/src/commands/profile.js +27 -14
- package/dist/src/commands/questionnaire.js +1 -1
- package/dist/src/commands/rollback.js +3 -2
- package/dist/src/commands/rollout.js +5 -3
- package/dist/src/commands/runbook.js +17 -10
- package/dist/src/commands/schedule.js +10 -5
- package/dist/src/commands/status.js +2 -1
- package/dist/src/commands/team-context.js +12 -7
- package/dist/src/commands/template.js +1 -1
- package/dist/src/commands/tf/index.js +6 -3
- package/dist/src/commands/version.js +6 -3
- package/dist/src/commands/watch.js +6 -3
- package/dist/src/compat/sqlite.js +5 -3
- package/dist/src/config/mode-store.js +2 -1
- package/dist/src/config/profiles.js +4 -2
- package/dist/src/config/types.js +2 -1
- package/dist/src/engine/executor.js +8 -4
- package/dist/src/engine/planner.js +9 -5
- package/dist/src/llm/providers/anthropic.js +6 -3
- package/dist/src/llm/providers/ollama.js +1 -1
- package/dist/src/llm/router.js +22 -7
- package/dist/src/sessions/manager.js +6 -3
- package/dist/src/sharing/viewer.js +2 -1
- package/dist/src/tools/file-ops.js +1 -2
- package/dist/src/tools/schemas/devops.js +197 -108
- package/dist/src/tools/schemas/standard.js +1 -1
- package/dist/src/ui/App.js +25 -13
- package/dist/src/ui/FileDiffModal.js +22 -11
- package/dist/src/ui/HelpModal.js +2 -1
- package/dist/src/ui/InputBox.js +6 -3
- package/dist/src/ui/MessageList.js +40 -20
- package/dist/src/ui/TerminalPane.js +2 -1
- package/dist/src/ui/ToolCallDisplay.js +12 -6
- package/dist/src/ui/TreePane.js +2 -1
- package/dist/src/ui/ink/index.js +37 -21
- package/dist/src/watcher/index.js +8 -4
- package/package.json +3 -5
- package/src/__tests__/alias.test.ts +0 -133
- package/src/__tests__/app.test.ts +0 -76
- package/src/__tests__/audit.test.ts +0 -877
- package/src/__tests__/circuit-breaker.test.ts +0 -116
- package/src/__tests__/cli-run.test.ts +0 -351
- package/src/__tests__/compat-sqlite.test.ts +0 -68
- package/src/__tests__/context-manager.test.ts +0 -632
- package/src/__tests__/context.test.ts +0 -242
- package/src/__tests__/devops-terminal-gaps.test.ts +0 -718
- package/src/__tests__/doctor.test.ts +0 -48
- package/src/__tests__/enterprise.test.ts +0 -401
- package/src/__tests__/export.test.ts +0 -236
- package/src/__tests__/gap-11-18-20.test.ts +0 -958
- package/src/__tests__/generator.test.ts +0 -433
- package/src/__tests__/helm-streaming.test.ts +0 -127
- package/src/__tests__/hooks.test.ts +0 -582
- package/src/__tests__/incident.test.ts +0 -179
- package/src/__tests__/init.test.ts +0 -487
- package/src/__tests__/intent-parser.test.ts +0 -229
- package/src/__tests__/llm-router.test.ts +0 -209
- package/src/__tests__/logs.test.ts +0 -107
- package/src/__tests__/loop-errors.test.ts +0 -244
- package/src/__tests__/lsp.test.ts +0 -293
- package/src/__tests__/modes.test.ts +0 -336
- package/src/__tests__/perf-optimizations.test.ts +0 -847
- package/src/__tests__/permissions.test.ts +0 -338
- package/src/__tests__/pipeline.test.ts +0 -50
- package/src/__tests__/polish-phase3.test.ts +0 -340
- package/src/__tests__/profile.test.ts +0 -237
- package/src/__tests__/rollback.test.ts +0 -83
- package/src/__tests__/runbook.test.ts +0 -219
- package/src/__tests__/schedule.test.ts +0 -206
- package/src/__tests__/serve.test.ts +0 -275
- package/src/__tests__/sessions.test.ts +0 -322
- package/src/__tests__/sharing.test.ts +0 -340
- package/src/__tests__/snapshots.test.ts +0 -581
- package/src/__tests__/standalone-migration.test.ts +0 -199
- package/src/__tests__/state-db.test.ts +0 -334
- package/src/__tests__/status.test.ts +0 -158
- package/src/__tests__/stream-with-tools.test.ts +0 -778
- package/src/__tests__/subagents.test.ts +0 -176
- package/src/__tests__/system-prompt.test.ts +0 -248
- package/src/__tests__/terminal-gap-v2.test.ts +0 -395
- package/src/__tests__/terminal-parity.test.ts +0 -393
- package/src/__tests__/tf-apply.test.ts +0 -187
- package/src/__tests__/tool-converter.test.ts +0 -256
- package/src/__tests__/tool-schemas.test.ts +0 -602
- package/src/__tests__/tools.test.ts +0 -144
- package/src/__tests__/version-json.test.ts +0 -184
- package/src/__tests__/version.test.ts +0 -49
- package/src/__tests__/watch.test.ts +0 -129
- package/src/agent/compaction-agent.ts +0 -266
- package/src/agent/context-manager.ts +0 -499
- package/src/agent/context.ts +0 -427
- package/src/agent/deploy-preview.ts +0 -487
- package/src/agent/expand-files.ts +0 -108
- package/src/agent/index.ts +0 -68
- package/src/agent/loop.ts +0 -1998
- package/src/agent/modes.ts +0 -429
- package/src/agent/permissions.ts +0 -513
- package/src/agent/subagents/base.ts +0 -116
- package/src/agent/subagents/cost.ts +0 -51
- package/src/agent/subagents/explore.ts +0 -42
- package/src/agent/subagents/general.ts +0 -54
- package/src/agent/subagents/index.ts +0 -102
- package/src/agent/subagents/infra.ts +0 -59
- package/src/agent/subagents/security.ts +0 -69
- package/src/agent/system-prompt.ts +0 -990
- package/src/app.ts +0 -180
- package/src/audit/activity-log.ts +0 -290
- package/src/audit/compliance-checker.ts +0 -540
- package/src/audit/cost-tracker.ts +0 -318
- package/src/audit/index.ts +0 -23
- package/src/audit/security-scanner.ts +0 -641
- package/src/auth/guard.ts +0 -75
- package/src/auth/index.ts +0 -56
- package/src/auth/keychain.ts +0 -82
- package/src/auth/oauth.ts +0 -465
- package/src/auth/providers.ts +0 -470
- package/src/auth/sso.ts +0 -113
- package/src/auth/store.ts +0 -505
- package/src/auth/types.ts +0 -187
- package/src/build.ts +0 -141
- package/src/cli/index.ts +0 -16
- package/src/cli/init.ts +0 -1227
- package/src/cli/openapi-spec.ts +0 -356
- package/src/cli/run.ts +0 -628
- package/src/cli/serve-auth.ts +0 -80
- package/src/cli/serve.ts +0 -539
- package/src/cli/web.ts +0 -71
- package/src/cli.ts +0 -1728
- package/src/clients/core-engine-client.ts +0 -227
- package/src/clients/enterprise-client.ts +0 -334
- package/src/clients/generator-client.ts +0 -351
- package/src/clients/git-client.ts +0 -627
- package/src/clients/github-client.ts +0 -410
- package/src/clients/helm-client.ts +0 -504
- package/src/clients/index.ts +0 -80
- package/src/clients/k8s-client.ts +0 -497
- package/src/clients/llm-client.ts +0 -161
- package/src/clients/rest-client.ts +0 -130
- package/src/clients/service-discovery.ts +0 -38
- package/src/clients/terraform-client.ts +0 -482
- package/src/clients/tools-client.ts +0 -1843
- package/src/clients/ws-client.ts +0 -115
- package/src/commands/alias.ts +0 -100
- package/src/commands/analyze/index.ts +0 -352
- package/src/commands/apply/helm.ts +0 -473
- package/src/commands/apply/index.ts +0 -213
- package/src/commands/apply/k8s.ts +0 -454
- package/src/commands/apply/terraform.ts +0 -582
- package/src/commands/ask.ts +0 -167
- package/src/commands/audit/index.ts +0 -357
- package/src/commands/auth-cloud.ts +0 -407
- package/src/commands/auth-list.ts +0 -134
- package/src/commands/auth-profile.ts +0 -121
- package/src/commands/auth-refresh.ts +0 -187
- package/src/commands/auth-status.ts +0 -141
- package/src/commands/aws/ec2.ts +0 -501
- package/src/commands/aws/iam.ts +0 -397
- package/src/commands/aws/index.ts +0 -133
- package/src/commands/aws/lambda.ts +0 -396
- package/src/commands/aws/rds.ts +0 -439
- package/src/commands/aws/s3.ts +0 -439
- package/src/commands/aws/vpc.ts +0 -393
- package/src/commands/aws-discover.ts +0 -542
- package/src/commands/aws-terraform.ts +0 -755
- package/src/commands/azure/aks.ts +0 -376
- package/src/commands/azure/functions.ts +0 -253
- package/src/commands/azure/index.ts +0 -116
- package/src/commands/azure/storage.ts +0 -478
- package/src/commands/azure/vm.ts +0 -355
- package/src/commands/billing/index.ts +0 -256
- package/src/commands/chat.ts +0 -320
- package/src/commands/completions.ts +0 -268
- package/src/commands/config.ts +0 -372
- package/src/commands/cost/cloud-cost-estimator.ts +0 -266
- package/src/commands/cost/estimator.ts +0 -79
- package/src/commands/cost/index.ts +0 -810
- package/src/commands/cost/parsers/terraform.ts +0 -273
- package/src/commands/cost/parsers/types.ts +0 -25
- package/src/commands/cost/pricing/aws.ts +0 -544
- package/src/commands/cost/pricing/azure.ts +0 -499
- package/src/commands/cost/pricing/gcp.ts +0 -396
- package/src/commands/cost/pricing/index.ts +0 -40
- package/src/commands/demo.ts +0 -250
- package/src/commands/deploy.ts +0 -260
- package/src/commands/doctor.ts +0 -1386
- package/src/commands/drift/index.ts +0 -787
- package/src/commands/explain.ts +0 -277
- package/src/commands/export.ts +0 -146
- package/src/commands/feedback.ts +0 -389
- package/src/commands/fix.ts +0 -324
- package/src/commands/fs/index.ts +0 -402
- package/src/commands/gcp/compute.ts +0 -325
- package/src/commands/gcp/functions.ts +0 -271
- package/src/commands/gcp/gke.ts +0 -438
- package/src/commands/gcp/iam.ts +0 -344
- package/src/commands/gcp/index.ts +0 -129
- package/src/commands/gcp/storage.ts +0 -284
- package/src/commands/generate-helm.ts +0 -1249
- package/src/commands/generate-k8s.ts +0 -1508
- package/src/commands/generate-terraform.ts +0 -1202
- package/src/commands/gh/index.ts +0 -863
- package/src/commands/git/index.ts +0 -1343
- package/src/commands/helm/index.ts +0 -1126
- package/src/commands/help.ts +0 -715
- package/src/commands/history.ts +0 -149
- package/src/commands/import.ts +0 -868
- package/src/commands/incident.ts +0 -166
- package/src/commands/index.ts +0 -367
- package/src/commands/init.ts +0 -1051
- package/src/commands/k8s/index.ts +0 -1137
- package/src/commands/login.ts +0 -716
- package/src/commands/logout.ts +0 -83
- package/src/commands/logs.ts +0 -167
- package/src/commands/onboarding.ts +0 -405
- package/src/commands/pipeline.ts +0 -186
- package/src/commands/plan/display.ts +0 -279
- package/src/commands/plan/index.ts +0 -599
- package/src/commands/plugin.ts +0 -398
- package/src/commands/preview.ts +0 -452
- package/src/commands/profile.ts +0 -342
- package/src/commands/questionnaire.ts +0 -1172
- package/src/commands/resume.ts +0 -47
- package/src/commands/rollback.ts +0 -315
- package/src/commands/rollout.ts +0 -88
- package/src/commands/runbook.ts +0 -346
- package/src/commands/schedule.ts +0 -236
- package/src/commands/status.ts +0 -252
- package/src/commands/team/index.ts +0 -346
- package/src/commands/team-context.ts +0 -220
- package/src/commands/template.ts +0 -233
- package/src/commands/tf/index.ts +0 -1093
- package/src/commands/upgrade.ts +0 -609
- package/src/commands/usage/index.ts +0 -134
- package/src/commands/version.ts +0 -174
- package/src/commands/watch.ts +0 -153
- package/src/compat/index.ts +0 -2
- package/src/compat/runtime.ts +0 -12
- package/src/compat/sqlite.ts +0 -177
- package/src/config/index.ts +0 -17
- package/src/config/manager.ts +0 -530
- package/src/config/mode-store.ts +0 -62
- package/src/config/profiles.ts +0 -84
- package/src/config/safety-policy.ts +0 -358
- package/src/config/schema.ts +0 -125
- package/src/config/types.ts +0 -609
- package/src/config/workspace-state.ts +0 -53
- package/src/context/context-db.ts +0 -199
- package/src/demo/index.ts +0 -349
- package/src/demo/scenarios/full-journey.ts +0 -229
- package/src/demo/scenarios/getting-started.ts +0 -127
- package/src/demo/scenarios/helm-release.ts +0 -341
- package/src/demo/scenarios/k8s-deployment.ts +0 -194
- package/src/demo/scenarios/terraform-vpc.ts +0 -170
- package/src/demo/types.ts +0 -92
- package/src/engine/cost-estimator.ts +0 -480
- package/src/engine/diagram-generator.ts +0 -256
- package/src/engine/drift-detector.ts +0 -902
- package/src/engine/executor.ts +0 -1066
- package/src/engine/index.ts +0 -76
- package/src/engine/orchestrator.ts +0 -636
- package/src/engine/planner.ts +0 -787
- package/src/engine/safety.ts +0 -743
- package/src/engine/verifier.ts +0 -770
- package/src/enterprise/audit.ts +0 -348
- package/src/enterprise/auth.ts +0 -270
- package/src/enterprise/billing.ts +0 -822
- package/src/enterprise/index.ts +0 -17
- package/src/enterprise/teams.ts +0 -443
- package/src/generator/best-practices.ts +0 -1608
- package/src/generator/helm.ts +0 -630
- package/src/generator/index.ts +0 -37
- package/src/generator/intent-parser.ts +0 -514
- package/src/generator/kubernetes.ts +0 -976
- package/src/generator/terraform.ts +0 -1875
- package/src/history/index.ts +0 -8
- package/src/history/manager.ts +0 -250
- package/src/history/types.ts +0 -34
- package/src/hooks/config.ts +0 -432
- package/src/hooks/engine.ts +0 -392
- package/src/hooks/index.ts +0 -4
- package/src/llm/auth-bridge.ts +0 -198
- package/src/llm/circuit-breaker.ts +0 -140
- package/src/llm/config-loader.ts +0 -201
- package/src/llm/cost-calculator.ts +0 -171
- package/src/llm/index.ts +0 -8
- package/src/llm/model-aliases.ts +0 -115
- package/src/llm/provider-registry.ts +0 -63
- package/src/llm/providers/anthropic.ts +0 -462
- package/src/llm/providers/bedrock.ts +0 -477
- package/src/llm/providers/google.ts +0 -405
- package/src/llm/providers/ollama.ts +0 -767
- package/src/llm/providers/openai-compatible.ts +0 -340
- package/src/llm/providers/openai.ts +0 -328
- package/src/llm/providers/openrouter.ts +0 -338
- package/src/llm/router.ts +0 -1104
- package/src/llm/types.ts +0 -232
- package/src/lsp/client.ts +0 -298
- package/src/lsp/languages.ts +0 -119
- package/src/lsp/manager.ts +0 -294
- package/src/mcp/client.ts +0 -402
- package/src/mcp/index.ts +0 -5
- package/src/mcp/manager.ts +0 -133
- package/src/nimbus.ts +0 -234
- package/src/plugins/index.ts +0 -27
- package/src/plugins/loader.ts +0 -334
- package/src/plugins/manager.ts +0 -376
- package/src/plugins/types.ts +0 -284
- package/src/scanners/cicd-scanner.ts +0 -258
- package/src/scanners/cloud-scanner.ts +0 -466
- package/src/scanners/framework-scanner.ts +0 -469
- package/src/scanners/iac-scanner.ts +0 -388
- package/src/scanners/index.ts +0 -539
- package/src/scanners/language-scanner.ts +0 -276
- package/src/scanners/package-manager-scanner.ts +0 -277
- package/src/scanners/types.ts +0 -172
- package/src/sessions/manager.ts +0 -472
- package/src/sessions/types.ts +0 -44
- package/src/sharing/sync.ts +0 -300
- package/src/sharing/viewer.ts +0 -163
- package/src/snapshots/index.ts +0 -2
- package/src/snapshots/manager.ts +0 -530
- package/src/state/artifacts.ts +0 -147
- package/src/state/audit.ts +0 -137
- package/src/state/billing.ts +0 -240
- package/src/state/checkpoints.ts +0 -117
- package/src/state/config.ts +0 -67
- package/src/state/conversations.ts +0 -14
- package/src/state/credentials.ts +0 -154
- package/src/state/db.ts +0 -58
- package/src/state/index.ts +0 -26
- package/src/state/messages.ts +0 -115
- package/src/state/projects.ts +0 -123
- package/src/state/schema.ts +0 -236
- package/src/state/sessions.ts +0 -147
- package/src/state/teams.ts +0 -200
- package/src/telemetry.ts +0 -108
- package/src/tools/aws-ops.ts +0 -952
- package/src/tools/azure-ops.ts +0 -579
- package/src/tools/file-ops.ts +0 -615
- package/src/tools/gcp-ops.ts +0 -625
- package/src/tools/git-ops.ts +0 -773
- package/src/tools/github-ops.ts +0 -799
- package/src/tools/helm-ops.ts +0 -943
- package/src/tools/index.ts +0 -17
- package/src/tools/k8s-ops.ts +0 -819
- package/src/tools/schemas/converter.ts +0 -184
- package/src/tools/schemas/devops.ts +0 -3502
- package/src/tools/schemas/index.ts +0 -73
- package/src/tools/schemas/standard.ts +0 -1148
- package/src/tools/schemas/types.ts +0 -735
- package/src/tools/spawn-exec.ts +0 -148
- package/src/tools/terraform-ops.ts +0 -862
- package/src/types/ambient.d.ts +0 -193
- package/src/types/config.ts +0 -83
- package/src/types/drift.ts +0 -116
- package/src/types/enterprise.ts +0 -335
- package/src/types/index.ts +0 -20
- package/src/types/plan.ts +0 -44
- package/src/types/request.ts +0 -65
- package/src/types/response.ts +0 -54
- package/src/types/service.ts +0 -51
- package/src/ui/App.tsx +0 -2114
- package/src/ui/DeployPreview.tsx +0 -174
- package/src/ui/FileDiffModal.tsx +0 -162
- package/src/ui/Header.tsx +0 -131
- package/src/ui/HelpModal.tsx +0 -57
- package/src/ui/InputBox.tsx +0 -503
- package/src/ui/MessageList.tsx +0 -1032
- package/src/ui/PermissionPrompt.tsx +0 -163
- package/src/ui/StatusBar.tsx +0 -277
- package/src/ui/TerminalPane.tsx +0 -84
- package/src/ui/ToolCallDisplay.tsx +0 -643
- package/src/ui/TreePane.tsx +0 -132
- package/src/ui/chat-ui.ts +0 -850
- package/src/ui/index.ts +0 -33
- package/src/ui/ink/index.ts +0 -1444
- package/src/ui/streaming.ts +0 -176
- package/src/ui/theme.ts +0 -104
- package/src/ui/types.ts +0 -75
- package/src/utils/analytics.ts +0 -72
- package/src/utils/cost-warning.ts +0 -27
- package/src/utils/env.ts +0 -46
- package/src/utils/errors.ts +0 -69
- package/src/utils/event-bus.ts +0 -38
- package/src/utils/index.ts +0 -24
- package/src/utils/logger.ts +0 -171
- package/src/utils/rate-limiter.ts +0 -121
- package/src/utils/service-auth.ts +0 -49
- package/src/utils/validation.ts +0 -53
- package/src/version.ts +0 -4
- package/src/watcher/index.ts +0 -214
- package/src/wizard/approval.ts +0 -383
- package/src/wizard/index.ts +0 -25
- package/src/wizard/prompts.ts +0 -338
- package/src/wizard/types.ts +0 -172
- package/src/wizard/ui.ts +0 -556
- package/src/wizard/wizard.ts +0 -304
- package/tsconfig.json +0 -24
package/src/agent/loop.ts
DELETED
|
@@ -1,1998 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Core Agentic Loop
|
|
3
|
-
*
|
|
4
|
-
* Implements the autonomous agent loop:
|
|
5
|
-
* 1. Build context (system prompt + history + tools)
|
|
6
|
-
* 2. Send to LLM with tools enabled
|
|
7
|
-
* 3. Stream text response
|
|
8
|
-
* 4. If tool_use: check permissions → execute → collect results
|
|
9
|
-
* 5. Append messages → loop back to LLM
|
|
10
|
-
* 6. Exit when LLM returns end_turn (no more tool calls)
|
|
11
|
-
*
|
|
12
|
-
* This is the heart of the Nimbus agent. Every user message enters
|
|
13
|
-
* {@link runAgentLoop}, which orchestrates a multi-turn conversation with
|
|
14
|
-
* the LLM, executing tools on its behalf until it signals completion by
|
|
15
|
-
* returning a response with no further tool calls.
|
|
16
|
-
*
|
|
17
|
-
* @module agent/loop
|
|
18
|
-
*/
|
|
19
|
-
|
|
20
|
-
import { join } from 'node:path';
|
|
21
|
-
import type { LLMRouter } from '../llm/router';
|
|
22
|
-
import type {
|
|
23
|
-
LLMMessage,
|
|
24
|
-
ToolCall,
|
|
25
|
-
ToolCompletionRequest,
|
|
26
|
-
ToolDefinition as LLMToolDefinition,
|
|
27
|
-
} from '../llm/types';
|
|
28
|
-
import {
|
|
29
|
-
toOpenAITool,
|
|
30
|
-
type ToolDefinition,
|
|
31
|
-
type ToolExecuteContext,
|
|
32
|
-
type ToolResult,
|
|
33
|
-
type ToolRegistry,
|
|
34
|
-
} from '../tools/schemas/types';
|
|
35
|
-
import { buildSystemPrompt, type AgentMode } from './system-prompt';
|
|
36
|
-
import type { ContextManager, CompactionResult } from './context-manager';
|
|
37
|
-
import { runCompaction } from './compaction-agent';
|
|
38
|
-
import type { LSPManager } from '../lsp/manager';
|
|
39
|
-
import { SnapshotManager } from '../snapshots/manager';
|
|
40
|
-
import { calculateCost } from '../llm/cost-calculator';
|
|
41
|
-
import {
|
|
42
|
-
HookEngine,
|
|
43
|
-
runPreToolHooks,
|
|
44
|
-
runPostToolHooks,
|
|
45
|
-
type HookContext,
|
|
46
|
-
} from '../hooks/engine';
|
|
47
|
-
import { maskSecrets } from '../audit/security-scanner';
|
|
48
|
-
import { classifyTaskComplexity, routeModel } from '../llm/router';
|
|
49
|
-
import { mkdirSync as _cpMkdirSync, writeFileSync as _cpWriteFileSync } from 'node:fs';
|
|
50
|
-
import { homedir as _cpHomedir } from 'node:os';
|
|
51
|
-
|
|
52
|
-
// ---------------------------------------------------------------------------
|
|
53
|
-
// C2: Infra state checkpoint helper
|
|
54
|
-
// ---------------------------------------------------------------------------
|
|
55
|
-
|
|
56
|
-
/**
|
|
57
|
-
* Write a checkpoint JSON file to ~/.nimbus/infra-checkpoints/<timestamp>.json
|
|
58
|
-
* before a mutating terraform or helm operation. Non-blocking — errors are swallowed.
|
|
59
|
-
*/
|
|
60
|
-
function writeInfraCheckpoint(tool: string, action: string, input: Record<string, unknown>): void {
|
|
61
|
-
try {
|
|
62
|
-
const checkpointsDir = join(_cpHomedir(), '.nimbus', 'infra-checkpoints');
|
|
63
|
-
_cpMkdirSync(checkpointsDir, { recursive: true });
|
|
64
|
-
// Sanitize: remove any field that looks like a secret
|
|
65
|
-
const sanitized: Record<string, unknown> = {};
|
|
66
|
-
for (const [k, v] of Object.entries(input)) {
|
|
67
|
-
const lower = k.toLowerCase();
|
|
68
|
-
if (lower.includes('secret') || lower.includes('password') || lower.includes('token') || lower.includes('key')) {
|
|
69
|
-
sanitized[k] = '[redacted]';
|
|
70
|
-
} else {
|
|
71
|
-
sanitized[k] = v;
|
|
72
|
-
}
|
|
73
|
-
}
|
|
74
|
-
const timestamp = new Date().toISOString();
|
|
75
|
-
const checkpoint = {
|
|
76
|
-
timestamp,
|
|
77
|
-
tool,
|
|
78
|
-
action,
|
|
79
|
-
input: sanitized,
|
|
80
|
-
cwd: process.cwd(),
|
|
81
|
-
workdir: (input.workdir as string | undefined) ?? undefined,
|
|
82
|
-
};
|
|
83
|
-
const fileName = timestamp.replace(/[:.]/g, '-') + '.json';
|
|
84
|
-
_cpWriteFileSync(
|
|
85
|
-
join(checkpointsDir, fileName),
|
|
86
|
-
JSON.stringify(checkpoint, null, 2),
|
|
87
|
-
'utf-8'
|
|
88
|
-
);
|
|
89
|
-
} catch { /* non-critical */ }
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
// ---------------------------------------------------------------------------
|
|
93
|
-
// Helpers
|
|
94
|
-
// ---------------------------------------------------------------------------
|
|
95
|
-
|
|
96
|
-
// ---------------------------------------------------------------------------
|
|
97
|
-
// Module-level compiled regex constants for classifyDevOpsError (PERF-1d).
|
|
98
|
-
// Hoisted here so they compile once at module load rather than per-call.
|
|
99
|
-
// ---------------------------------------------------------------------------
|
|
100
|
-
|
|
101
|
-
const _RE_CREDENTIAL_EXPIRY_AWS = /ExpiredTokenException|TokenExpiredException|token.*has.*expired/i;
|
|
102
|
-
const _RE_CREDENTIAL_EXPIRY_GCP = /credentials.*expired|Application Default Credentials.*expired|re-authenticate/i;
|
|
103
|
-
const _RE_CREDENTIAL_EXPIRY_AZURE = /AADSTS70008|InteractionRequired|credential.*expired/i;
|
|
104
|
-
const _RE_CMD_NOT_FOUND = /command not found|not found|no such file or directory/i;
|
|
105
|
-
|
|
106
|
-
/**
|
|
107
|
-
* Classify a DevOps tool error and return an actionable hint for the LLM.
|
|
108
|
-
* Returns null for unrecognized errors so we don't pollute the context.
|
|
109
|
-
*/
|
|
110
|
-
function classifyDevOpsError(toolName: string, errorOutput: string, nimbusInstructions?: string): string | null {
|
|
111
|
-
const e = errorOutput.toLowerCase();
|
|
112
|
-
|
|
113
|
-
// GAP-13: Credential expiry patterns — must come first for fast matching
|
|
114
|
-
const CREDENTIAL_EXPIRY = [
|
|
115
|
-
{ re: _RE_CREDENTIAL_EXPIRY_AWS, provider: 'aws' },
|
|
116
|
-
{ re: _RE_CREDENTIAL_EXPIRY_GCP, provider: 'gcp' },
|
|
117
|
-
{ re: _RE_CREDENTIAL_EXPIRY_AZURE, provider: 'azure' },
|
|
118
|
-
];
|
|
119
|
-
for (const { re, provider } of CREDENTIAL_EXPIRY) {
|
|
120
|
-
if (re.test(errorOutput)) {
|
|
121
|
-
return `Your ${provider.toUpperCase()} credentials have expired.\n\nRun: \`nimbus auth-refresh --provider ${provider}\` to refresh them.`;
|
|
122
|
-
}
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
// G3: "command not found" — provide installation hints for DevOps CLIs
|
|
126
|
-
const INSTALL_HINTS: Record<string, string> = {
|
|
127
|
-
terraform: 'brew install terraform OR https://developer.hashicorp.com/terraform/install',
|
|
128
|
-
kubectl: 'brew install kubectl OR https://kubernetes.io/docs/tasks/tools/',
|
|
129
|
-
helm: 'brew install helm OR https://helm.sh/docs/intro/install/',
|
|
130
|
-
docker: 'brew install --cask docker OR https://docs.docker.com/get-docker/',
|
|
131
|
-
aws: 'brew install awscli OR pip install awscli',
|
|
132
|
-
gcloud: 'brew install --cask google-cloud-sdk',
|
|
133
|
-
az: 'brew install azure-cli',
|
|
134
|
-
};
|
|
135
|
-
if (_RE_CMD_NOT_FOUND.test(errorOutput)) {
|
|
136
|
-
for (const [cmd, hint] of Object.entries(INSTALL_HINTS)) {
|
|
137
|
-
if (toolName.includes(cmd) || e.includes(`'${cmd}'`) || e.includes(`"${cmd}"`)) {
|
|
138
|
-
return `\`${cmd}\` is not installed.\n\nInstall: ${hint}`;
|
|
139
|
-
}
|
|
140
|
-
}
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
// Terraform errors
|
|
144
|
-
if (toolName === 'terraform' || e.includes('terraform')) {
|
|
145
|
-
if (e.includes('no such file or directory') && e.includes('.terraform')) {
|
|
146
|
-
return 'HINT: Run `terraform init` first — the .terraform directory is missing.';
|
|
147
|
-
}
|
|
148
|
-
if (e.includes('provider') && e.includes('required') && e.includes('terraform')) {
|
|
149
|
-
return 'HINT: Run `terraform init -upgrade` to download or upgrade required providers.';
|
|
150
|
-
}
|
|
151
|
-
if (e.includes('no valid credential') || e.includes('no credentials')) {
|
|
152
|
-
return 'HINT: AWS/cloud credentials are missing. Check `aws configure` or environment variables.';
|
|
153
|
-
}
|
|
154
|
-
if (e.includes('state lock') || e.includes('lock file')) {
|
|
155
|
-
return 'HINT: Terraform state is locked. If no other operation is running, use `terraform force-unlock <lock-id>`.';
|
|
156
|
-
}
|
|
157
|
-
if (e.includes('module not installed') || e.includes('module source')) {
|
|
158
|
-
return 'HINT: Run `terraform init` to install required modules.';
|
|
159
|
-
}
|
|
160
|
-
if (e.includes('quota') || e.includes('limit exceeded') || e.includes('vcpu')) {
|
|
161
|
-
return 'HINT: Cloud resource quota exceeded. Request a limit increase in the cloud console.';
|
|
162
|
-
}
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
// Kubernetes errors
|
|
166
|
-
if (toolName === 'kubectl' || toolName === 'kubectl_context') {
|
|
167
|
-
if (e.includes('connection refused') || e.includes('unable to connect')) {
|
|
168
|
-
return 'HINT: Cannot reach the Kubernetes API server. Check `kubectl config current-context` and ensure the cluster is accessible.';
|
|
169
|
-
}
|
|
170
|
-
if (e.includes('unauthorized') || e.includes('forbidden')) {
|
|
171
|
-
return 'HINT: Insufficient permissions. Check your kubeconfig credentials or RBAC roles.';
|
|
172
|
-
}
|
|
173
|
-
if (e.includes('not found') && e.includes('namespace')) {
|
|
174
|
-
return 'HINT: The namespace does not exist. Create it with `kubectl create namespace <name>` first.';
|
|
175
|
-
}
|
|
176
|
-
if (e.includes('image') && (e.includes('not found') || e.includes('pull'))) {
|
|
177
|
-
return 'HINT: Container image pull failed. Verify the image name, tag, and registry credentials (imagePullSecret).';
|
|
178
|
-
}
|
|
179
|
-
}
|
|
180
|
-
|
|
181
|
-
// Helm errors
|
|
182
|
-
if (toolName === 'helm' || toolName === 'helm_values') {
|
|
183
|
-
if (e.includes('chart not found') || e.includes('no such chart')) {
|
|
184
|
-
return 'HINT: Chart not found. Run `helm repo update` and verify the chart name.';
|
|
185
|
-
}
|
|
186
|
-
if (e.includes('release not found')) {
|
|
187
|
-
return 'HINT: Helm release not found. Use `helm list -A` to see all releases across namespaces.';
|
|
188
|
-
}
|
|
189
|
-
if (e.includes('unable to build kubernetes objects') || e.includes('manifest')) {
|
|
190
|
-
return 'HINT: Helm template rendering failed. Run `helm template <release> <chart>` to debug the manifests.';
|
|
191
|
-
}
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
// Cloud CLI errors
|
|
195
|
-
if (toolName === 'cloud_discover' || toolName === 'cloud_action') {
|
|
196
|
-
if (e.includes('not authorized') || e.includes('access denied') || e.includes('unauthorized')) {
|
|
197
|
-
return 'HINT: Cloud credentials lack required permissions. Check IAM policies/roles for the operation.';
|
|
198
|
-
}
|
|
199
|
-
if (e.includes('region') && e.includes('not found')) {
|
|
200
|
-
return 'HINT: Invalid region. Check `aws configure get region` or pass --region explicitly.';
|
|
201
|
-
}
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
// Docker errors
|
|
205
|
-
if (toolName === 'docker') {
|
|
206
|
-
if (e.includes('cannot connect to the docker daemon') || e.includes('docker daemon') || e.includes('docker.sock')) {
|
|
207
|
-
return 'HINT: Docker daemon is not running. Start it with `colima start` (macOS) or `sudo systemctl start docker` (Linux).';
|
|
208
|
-
}
|
|
209
|
-
if (e.includes('manifest unknown') || e.includes('manifest not found') || e.includes('not found')) {
|
|
210
|
-
return 'HINT: Image not found. Verify the image name and tag. Check registry credentials with `docker login`.';
|
|
211
|
-
}
|
|
212
|
-
if (e.includes('no space left on device') || e.includes('no space left')) {
|
|
213
|
-
return 'HINT: Docker disk space exhausted. Run `docker system prune -f` to reclaim space.';
|
|
214
|
-
}
|
|
215
|
-
if (e.includes('permission denied') && e.includes('docker')) {
|
|
216
|
-
return 'HINT: Docker permission denied. Add your user to the docker group: `sudo usermod -aG docker $USER`.';
|
|
217
|
-
}
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
// Secrets errors
|
|
221
|
-
if (toolName === 'secrets') {
|
|
222
|
-
if (e.includes('permission denied') || e.includes('403') || e.includes('accessdenied')) {
|
|
223
|
-
return 'HINT: Secrets access denied. Check Vault policy with `vault policy read <policy>` or IAM role permissions.';
|
|
224
|
-
}
|
|
225
|
-
if (e.includes('secret not found') || e.includes('no such secret') || e.includes('resourcenotfoundexception')) {
|
|
226
|
-
return 'HINT: Secret not found. Verify the secret path/name and namespace. Use `vault kv list <mount>` to browse.';
|
|
227
|
-
}
|
|
228
|
-
if (e.includes('invalid token') || e.includes('token expired')) {
|
|
229
|
-
return 'HINT: Vault/cloud token expired. Run `vault login` or refresh cloud credentials with `nimbus auth-refresh`.';
|
|
230
|
-
}
|
|
231
|
-
}
|
|
232
|
-
|
|
233
|
-
// CI/CD errors
|
|
234
|
-
if (toolName === 'cicd') {
|
|
235
|
-
if (e.includes('workflow not found') || e.includes('could not find workflow')) {
|
|
236
|
-
return 'HINT: Workflow not found. Check the workflow filename in .github/workflows/ and the branch name.';
|
|
237
|
-
}
|
|
238
|
-
if (e.includes('rate limit') || e.includes('429') || e.includes('too many requests')) {
|
|
239
|
-
return 'HINT: API rate limited. Wait 60 seconds and retry. Check rate limit headers for reset time.';
|
|
240
|
-
}
|
|
241
|
-
if (e.includes('unauthorized') || e.includes('401') || e.includes('bad credentials')) {
|
|
242
|
-
return 'HINT: CI/CD authentication failed. Check GITHUB_TOKEN, GITLAB_TOKEN, or CIRCLECI_TOKEN environment variables.';
|
|
243
|
-
}
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
// GitOps errors
|
|
247
|
-
if (toolName === 'gitops') {
|
|
248
|
-
if (e.includes('not found') || e.includes('not logged in') || e.includes('unauthenticated')) {
|
|
249
|
-
return 'HINT: ArgoCD/Flux not accessible. Check ARGOCD_SERVER and ARGOCD_TOKEN env vars, or run `argocd login`.';
|
|
250
|
-
}
|
|
251
|
-
if (e.includes('comparisonerror') || e.includes('sync error')) {
|
|
252
|
-
return 'HINT: GitOps sync error. Validate manifests: `kubectl apply --dry-run=client -f <manifest>` to find issues.';
|
|
253
|
-
}
|
|
254
|
-
if (e.includes('health') && e.includes('degraded')) {
|
|
255
|
-
return 'HINT: Application is degraded. Check pod logs with `kubectl logs -n <ns>` and events with `kubectl get events -n <ns>`.';
|
|
256
|
-
}
|
|
257
|
-
}
|
|
258
|
-
|
|
259
|
-
// Monitoring errors
|
|
260
|
-
if (toolName === 'monitor') {
|
|
261
|
-
if (e.includes('connection refused') || e.includes('could not connect')) {
|
|
262
|
-
return 'HINT: Cannot connect to monitoring endpoint. Check PROMETHEUS_URL, GRAFANA_URL, or cloud region configuration.';
|
|
263
|
-
}
|
|
264
|
-
if (e.includes('unauthorized') || e.includes('403')) {
|
|
265
|
-
return 'HINT: Monitoring authentication failed. Check DD_API_KEY, GRAFANA_TOKEN, or NEW_RELIC_API_KEY environment variables.';
|
|
266
|
-
}
|
|
267
|
-
}
|
|
268
|
-
|
|
269
|
-
// L3: Parse NIMBUS.md custom error hints section
|
|
270
|
-
if (nimbusInstructions) {
|
|
271
|
-
const hintsMatch = nimbusInstructions.match(/##\s*Custom Error Hints\s*\n([\s\S]*?)(?=\n##|\n$|$)/i);
|
|
272
|
-
if (hintsMatch) {
|
|
273
|
-
const hintsSection = hintsMatch[1];
|
|
274
|
-
const hintLines = hintsSection.split('\n').filter(l => l.trim().startsWith('-'));
|
|
275
|
-
for (const line of hintLines) {
|
|
276
|
-
// Format: "- pattern: hint message"
|
|
277
|
-
const colonIdx = line.indexOf(':');
|
|
278
|
-
if (colonIdx > 0) {
|
|
279
|
-
const pattern = line.slice(1, colonIdx).trim();
|
|
280
|
-
const hint = line.slice(colonIdx + 1).trim();
|
|
281
|
-
if (pattern && hint && errorOutput.toLowerCase().includes(pattern.toLowerCase())) {
|
|
282
|
-
return `HINT: ${hint}`;
|
|
283
|
-
}
|
|
284
|
-
}
|
|
285
|
-
}
|
|
286
|
-
}
|
|
287
|
-
}
|
|
288
|
-
|
|
289
|
-
return null;
|
|
290
|
-
}
|
|
291
|
-
|
|
292
|
-
/** DevOps tool names that get self-diagnosis hints on unrecognized errors. */
|
|
293
|
-
const DEVOPS_TOOL_NAMES = new Set([
|
|
294
|
-
'terraform', 'kubectl', 'kubectl_context', 'helm', 'helm_values',
|
|
295
|
-
'bash', 'cloud_discover', 'drift_detect', 'deploy_preview',
|
|
296
|
-
'docker', 'secrets', 'cicd', 'monitor', 'gitops', 'cloud_action',
|
|
297
|
-
'logs', 'certs', 'mesh', 'cfn', 'k8s_rbac',
|
|
298
|
-
]);
|
|
299
|
-
|
|
300
|
-
/**
|
|
301
|
-
* Format a Zod (or generic) tool-input validation error into a human-readable
|
|
302
|
-
* message that tells the LLM exactly which fields are wrong and how to fix them.
|
|
303
|
-
*/
|
|
304
|
-
function formatToolInputError(toolName: string, err: unknown): string {
|
|
305
|
-
if (err && typeof err === 'object' && 'issues' in err) {
|
|
306
|
-
// ZodError
|
|
307
|
-
const issues = (err as { issues: Array<{ path: (string | number)[]; message: string }> }).issues;
|
|
308
|
-
const details = issues
|
|
309
|
-
.map(i => ` - ${i.path.join('.') || '(root)'}: ${i.message}`)
|
|
310
|
-
.join('\n');
|
|
311
|
-
return `Tool "${toolName}" received invalid input:\n${details}\n\nPlease correct the arguments and retry.`;
|
|
312
|
-
}
|
|
313
|
-
return `Tool "${toolName}" failed: ${err instanceof Error ? err.message : String(err)}`;
|
|
314
|
-
}
|
|
315
|
-
|
|
316
|
-
/** Determine whether a streaming error is transient and worth retrying. */
|
|
317
|
-
function isRetryableStreamError(err: unknown): boolean {
|
|
318
|
-
if (err && typeof err === 'object') {
|
|
319
|
-
const e = err as Record<string, unknown>;
|
|
320
|
-
const status =
|
|
321
|
-
(typeof e.status === 'number' ? e.status : undefined) ??
|
|
322
|
-
(typeof e.statusCode === 'number' ? e.statusCode : undefined);
|
|
323
|
-
if (status === 429 || (status !== undefined && status >= 500 && status < 600)) return true;
|
|
324
|
-
const msg = typeof e.message === 'string' ? e.message : '';
|
|
325
|
-
if (/rate.?limit|429|too many requests|overloaded|503/i.test(msg)) return true;
|
|
326
|
-
}
|
|
327
|
-
return false;
|
|
328
|
-
}
|
|
329
|
-
|
|
330
|
-
// ---------------------------------------------------------------------------
|
|
331
|
-
// G3: Runaway protection helpers
|
|
332
|
-
// ---------------------------------------------------------------------------
|
|
333
|
-
|
|
334
|
-
/** Patterns that indicate a destructive operation in tool arguments. */
|
|
335
|
-
const DESTRUCTIVE_PATTERNS = /\b(apply|destroy|delete|terminate|stop|remove|drop|truncate|purge)\b/i;
|
|
336
|
-
|
|
337
|
-
/** Tool names whose destructive operations should be counted at the session level. */
|
|
338
|
-
const DESTRUCTIVE_TOOL_NAMES = new Set([
|
|
339
|
-
'terraform', 'kubectl', 'docker', 'aws', 'gcloud', 'az', 'cloud_action', 'cfn',
|
|
340
|
-
]);
|
|
341
|
-
|
|
342
|
-
/**
|
|
343
|
-
* Returns true if the tool call looks like a destructive infrastructure operation.
|
|
344
|
-
* Used to enforce the session-level destructive ops counter (G3).
|
|
345
|
-
*/
|
|
346
|
-
function isDestructiveOp(toolName: string, inputStr: string): boolean {
|
|
347
|
-
return DESTRUCTIVE_TOOL_NAMES.has(toolName) && DESTRUCTIVE_PATTERNS.test(inputStr);
|
|
348
|
-
}
|
|
349
|
-
|
|
350
|
-
// ---------------------------------------------------------------------------
|
|
351
|
-
// Public Types
|
|
352
|
-
// ---------------------------------------------------------------------------
|
|
353
|
-
|
|
354
|
-
/** Options for running the agent loop. */
|
|
355
|
-
export interface AgentLoopOptions {
|
|
356
|
-
/** The LLM router instance. */
|
|
357
|
-
router: LLMRouter;
|
|
358
|
-
|
|
359
|
-
/** Tool registry with available tools. */
|
|
360
|
-
toolRegistry: ToolRegistry;
|
|
361
|
-
|
|
362
|
-
/** Agent mode (plan/build/deploy). */
|
|
363
|
-
mode: AgentMode;
|
|
364
|
-
|
|
365
|
-
/** Maximum number of LLM turns before stopping (default: 50). */
|
|
366
|
-
maxTurns?: number;
|
|
367
|
-
|
|
368
|
-
/**
|
|
369
|
-
* Maximum number of tool calls allowed in a single LLM turn (G3).
|
|
370
|
-
* Prevents runaway tool call loops. Default: 20.
|
|
371
|
-
*/
|
|
372
|
-
maxToolCallsPerTurn?: number;
|
|
373
|
-
|
|
374
|
-
/**
|
|
375
|
-
* Maximum number of destructive operations allowed in a single session (G3).
|
|
376
|
-
* Triggers a warning in the tool result when the threshold is reached. Default: 5.
|
|
377
|
-
*/
|
|
378
|
-
maxDestructiveOpsPerSession?: number;
|
|
379
|
-
|
|
380
|
-
/** Model to use (e.g. `'anthropic/claude-sonnet-4-20250514'`). */
|
|
381
|
-
model?: string;
|
|
382
|
-
|
|
383
|
-
/**
|
|
384
|
-
* When true, enables automatic model routing based on task complexity (Gap 18).
|
|
385
|
-
* Simple queries → haiku, complex → opus, moderate → sonnet.
|
|
386
|
-
* Overridden if `model` is explicitly set.
|
|
387
|
-
*/
|
|
388
|
-
autoRouteModel?: boolean;
|
|
389
|
-
|
|
390
|
-
/** Current working directory. */
|
|
391
|
-
cwd?: string;
|
|
392
|
-
|
|
393
|
-
/** Custom NIMBUS.md content injected into the system prompt. */
|
|
394
|
-
nimbusInstructions?: string;
|
|
395
|
-
|
|
396
|
-
/**
|
|
397
|
-
* Live infrastructure context (terraform workspace, kubectl context, etc.)
|
|
398
|
-
* discovered at startup. Injected into the system prompt (Gaps 7 & 10).
|
|
399
|
-
*/
|
|
400
|
-
infraContext?: {
|
|
401
|
-
terraformWorkspace?: string;
|
|
402
|
-
kubectlContext?: string;
|
|
403
|
-
helmReleases?: string[];
|
|
404
|
-
awsAccount?: string;
|
|
405
|
-
awsRegion?: string;
|
|
406
|
-
gcpProject?: string;
|
|
407
|
-
};
|
|
408
|
-
|
|
409
|
-
/** Callback for streaming text output. */
|
|
410
|
-
onText?: (text: string) => void;
|
|
411
|
-
|
|
412
|
-
/** Callback when a tool call starts. */
|
|
413
|
-
onToolCallStart?: (toolCall: ToolCallInfo) => void;
|
|
414
|
-
|
|
415
|
-
/** Callback when a tool call completes. */
|
|
416
|
-
onToolCallEnd?: (toolCall: ToolCallInfo, result: ToolResult) => void;
|
|
417
|
-
|
|
418
|
-
/**
|
|
419
|
-
* Callback fired for each chunk of streamed tool output (Gap 1 — live streaming).
|
|
420
|
-
* Called with the tool call ID and the chunk text.
|
|
421
|
-
*/
|
|
422
|
-
onToolOutputChunk?: (toolId: string, chunk: string) => void;
|
|
423
|
-
|
|
424
|
-
/**
|
|
425
|
-
* Callback to check permission before tool execution.
|
|
426
|
-
* If omitted, all tools are executed without prompting.
|
|
427
|
-
*/
|
|
428
|
-
checkPermission?: (tool: ToolDefinition, input: unknown) => Promise<PermissionDecision>;
|
|
429
|
-
|
|
430
|
-
/** AbortSignal for cancellation (Ctrl+C). */
|
|
431
|
-
signal?: AbortSignal;
|
|
432
|
-
|
|
433
|
-
/** Session ID for persistence (reserved for future use). */
|
|
434
|
-
sessionId?: string;
|
|
435
|
-
|
|
436
|
-
/** Optional context manager for auto-compact. When provided, the loop
|
|
437
|
-
* checks context usage after each tool-call turn and triggers
|
|
438
|
-
* compaction if the threshold is exceeded. */
|
|
439
|
-
contextManager?: ContextManager;
|
|
440
|
-
|
|
441
|
-
/** Callback fired when auto-compact is triggered. Receives the
|
|
442
|
-
* compaction result with token savings information. */
|
|
443
|
-
onCompact?: (result: CompactionResult) => void;
|
|
444
|
-
|
|
445
|
-
/** Optional LSP manager for post-edit diagnostics. When provided,
|
|
446
|
-
* the loop queries the language server after file-editing tools
|
|
447
|
-
* and appends any diagnostics to the tool result so the LLM can
|
|
448
|
-
* self-correct type errors and other issues. */
|
|
449
|
-
lspManager?: LSPManager;
|
|
450
|
-
|
|
451
|
-
/** Optional snapshot manager for auto-capture before file-editing tools.
|
|
452
|
-
* When provided, a snapshot is captured before each file-modifying tool
|
|
453
|
-
* call so users can undo/redo changes. */
|
|
454
|
-
snapshotManager?: SnapshotManager;
|
|
455
|
-
|
|
456
|
-
/** Optional hook engine for PreToolUse/PostToolUse/PermissionRequest hooks.
|
|
457
|
-
* When provided, matching hook scripts are executed around each tool call. */
|
|
458
|
-
hookEngine?: HookEngine;
|
|
459
|
-
|
|
460
|
-
/** Callback fired after each LLM turn with accumulated usage and cost.
|
|
461
|
-
* Allows the TUI to update cost/token display in real-time during
|
|
462
|
-
* multi-turn agent loops, not just at the end. */
|
|
463
|
-
onUsage?: (usage: AgentLoopUsage, costUSD: number) => void;
|
|
464
|
-
|
|
465
|
-
/**
|
|
466
|
-
* Optional callback to show a diff preview before file-mutating tools.
|
|
467
|
-
* If provided, the loop calls this before edit_file/multi_edit/write_file.
|
|
468
|
-
* Returning 'reject' skips the tool call; 'apply-all' disables further prompts.
|
|
469
|
-
*/
|
|
470
|
-
requestFileDiff?: (
|
|
471
|
-
path: string,
|
|
472
|
-
toolName: string,
|
|
473
|
-
diff: string
|
|
474
|
-
) => Promise<FileDiffDecision>;
|
|
475
|
-
|
|
476
|
-
/**
|
|
477
|
-
* Internal flag set by requestFileDiff 'apply-all' — skips remaining diff
|
|
478
|
-
* prompts for the current turn. Set externally by the TUI launcher.
|
|
479
|
-
*/
|
|
480
|
-
skipRemainingDiffPrompts?: boolean;
|
|
481
|
-
|
|
482
|
-
/**
|
|
483
|
-
* Internal flag set by requestFileDiff 'reject-all' — auto-rejects remaining
|
|
484
|
-
* diff prompts for the current turn. Set externally by the TUI launcher.
|
|
485
|
-
*/
|
|
486
|
-
rejectRemainingDiffPrompts?: boolean;
|
|
487
|
-
|
|
488
|
-
/**
|
|
489
|
-
* M1: Dry-run mode — when true, forces plan mode and prepends a hard
|
|
490
|
-
* constraint to the system prompt instructing the agent not to execute
|
|
491
|
-
* any mutating operations.
|
|
492
|
-
*/
|
|
493
|
-
dryRun?: boolean;
|
|
494
|
-
|
|
495
|
-
/**
|
|
496
|
-
* G16: Maximum cost in USD per session. If the cumulative LLM cost exceeds
|
|
497
|
-
* this threshold, the loop stops and returns a budget-exceeded message.
|
|
498
|
-
*/
|
|
499
|
-
costBudgetUSD?: number;
|
|
500
|
-
|
|
501
|
-
/**
|
|
502
|
-
* G21: Override the stream silence timeout in milliseconds.
|
|
503
|
-
* Defaults to config.agentTurnTimeoutSeconds * 1000, or 60_000 if not set.
|
|
504
|
-
*/
|
|
505
|
-
streamSilenceTimeoutMs?: number;
|
|
506
|
-
|
|
507
|
-
/**
|
|
508
|
-
* GAP-20: Per-tool timeout overrides from NIMBUS.md Tool Timeouts section.
|
|
509
|
-
* Maps tool name to timeout in milliseconds. When set, the value is threaded
|
|
510
|
-
* into the tool's ToolExecuteContext so it can override the built-in default.
|
|
511
|
-
*/
|
|
512
|
-
toolTimeouts?: Record<string, number>;
|
|
513
|
-
}
|
|
514
|
-
|
|
515
|
-
/** Information about a tool call in progress. */
|
|
516
|
-
export interface ToolCallInfo {
|
|
517
|
-
/** Provider-assigned unique ID for this tool call. */
|
|
518
|
-
id: string;
|
|
519
|
-
|
|
520
|
-
/** Tool name as it appears in the registry. */
|
|
521
|
-
name: string;
|
|
522
|
-
|
|
523
|
-
/** Parsed input arguments. */
|
|
524
|
-
input: unknown;
|
|
525
|
-
|
|
526
|
-
/** Unix timestamp (Date.now()) when the tool call started. */
|
|
527
|
-
startTime: number;
|
|
528
|
-
}
|
|
529
|
-
|
|
530
|
-
/**
|
|
531
|
-
* Result of a permission check.
|
|
532
|
-
*
|
|
533
|
-
* - `allow` -- proceed with execution.
|
|
534
|
-
* - `deny` -- skip this invocation and report denial to the LLM.
|
|
535
|
-
* - `block` -- skip and report that the tool is permanently blocked.
|
|
536
|
-
*/
|
|
537
|
-
export type PermissionDecision = 'allow' | 'deny' | 'block';
|
|
538
|
-
|
|
539
|
-
/**
|
|
540
|
-
* Result of a per-file diff approval request.
|
|
541
|
-
*
|
|
542
|
-
* - `apply` -- apply this change.
|
|
543
|
-
* - `reject` -- skip this change.
|
|
544
|
-
* - `apply-all` -- apply this and all remaining changes without further prompts.
|
|
545
|
-
*/
|
|
546
|
-
export type FileDiffDecision = 'apply' | 'reject' | 'apply-all' | 'reject-all';
|
|
547
|
-
|
|
548
|
-
/** Aggregate token usage across all LLM turns. */
|
|
549
|
-
export interface AgentLoopUsage {
|
|
550
|
-
/** Total prompt (input) tokens consumed. */
|
|
551
|
-
promptTokens: number;
|
|
552
|
-
|
|
553
|
-
/** Total completion (output) tokens consumed. */
|
|
554
|
-
completionTokens: number;
|
|
555
|
-
|
|
556
|
-
/** Sum of prompt + completion tokens. */
|
|
557
|
-
totalTokens: number;
|
|
558
|
-
}
|
|
559
|
-
|
|
560
|
-
/** Result of running the agent loop. */
|
|
561
|
-
export interface AgentLoopResult {
|
|
562
|
-
/** The conversation messages after the loop completes. */
|
|
563
|
-
messages: LLMMessage[];
|
|
564
|
-
|
|
565
|
-
/** Number of LLM turns taken. */
|
|
566
|
-
turns: number;
|
|
567
|
-
|
|
568
|
-
/** Whether the loop was interrupted via the AbortSignal. */
|
|
569
|
-
interrupted: boolean;
|
|
570
|
-
|
|
571
|
-
/** Total token usage across all turns. */
|
|
572
|
-
usage: AgentLoopUsage;
|
|
573
|
-
|
|
574
|
-
/** Total estimated cost in USD. */
|
|
575
|
-
totalCost: number;
|
|
576
|
-
}
|
|
577
|
-
|
|
578
|
-
// ---------------------------------------------------------------------------
|
|
579
|
-
// Constants
|
|
580
|
-
// ---------------------------------------------------------------------------
|
|
581
|
-
|
|
582
|
-
/** Default model when none is specified. */
|
|
583
|
-
const DEFAULT_MODEL = 'anthropic/claude-sonnet-4-20250514';
|
|
584
|
-
|
|
585
|
-
// ---------------------------------------------------------------------------
|
|
586
|
-
// H5: Cost delta hint after terraform apply / helm upgrade
|
|
587
|
-
// ---------------------------------------------------------------------------
|
|
588
|
-
|
|
589
|
-
/**
|
|
590
|
-
* Extract a lightweight cost hint from tool output for display after
|
|
591
|
-
* infrastructure operations (terraform apply, helm install/upgrade).
|
|
592
|
-
*/
|
|
593
|
-
function extractCostHintFromToolOutput(toolName: string, input: Record<string, unknown>, output: string): string | null {
|
|
594
|
-
// terraform apply: parse "Apply complete! Resources: N added, M changed, K destroyed."
|
|
595
|
-
if (toolName === 'terraform' && String(input.action) === 'apply') {
|
|
596
|
-
const m = output.match(/Resources:\s*(\d+) added,\s*(\d+) changed,\s*(\d+) destroyed/);
|
|
597
|
-
if (m) {
|
|
598
|
-
const added = Number(m[1]);
|
|
599
|
-
const changed = Number(m[2]);
|
|
600
|
-
const destroyed = Number(m[3]);
|
|
601
|
-
const parts: string[] = [];
|
|
602
|
-
if (added > 0) parts.push(`+${added} resources created`);
|
|
603
|
-
if (changed > 0) parts.push(`${changed} updated`);
|
|
604
|
-
if (destroyed > 0) parts.push(`${destroyed} destroyed`);
|
|
605
|
-
return parts.length > 0
|
|
606
|
-
? `${parts.join(', ')} — run "nimbus cost" for monthly cost estimate`
|
|
607
|
-
: null;
|
|
608
|
-
}
|
|
609
|
-
}
|
|
610
|
-
// helm install/upgrade
|
|
611
|
-
if (toolName === 'helm' && ['install', 'upgrade'].includes(String(input.action))) {
|
|
612
|
-
const releaseName = String(input.releaseName ?? input.release ?? '');
|
|
613
|
-
if (!output.includes('Error') && !output.includes('FAILED')) {
|
|
614
|
-
return `Helm release "${releaseName}" deployed — run "nimbus cost" for estimated cost impact`;
|
|
615
|
-
}
|
|
616
|
-
}
|
|
617
|
-
return null;
|
|
618
|
-
}
|
|
619
|
-
|
|
620
|
-
// ---------------------------------------------------------------------------
|
|
621
|
-
// M4: Session-scoped error tracking for NIMBUS.md persistence
|
|
622
|
-
// ---------------------------------------------------------------------------
|
|
623
|
-
|
|
624
|
-
const sessionErrorCounts = new Map<string, number>();
|
|
625
|
-
|
|
626
|
-
function trackAndPersistError(toolName: string, errorHint: string, cwd: string): void {
|
|
627
|
-
const key = `${toolName}:${errorHint.slice(0, 60)}`;
|
|
628
|
-
const count = (sessionErrorCounts.get(key) ?? 0) + 1;
|
|
629
|
-
sessionErrorCounts.set(key, count);
|
|
630
|
-
|
|
631
|
-
if (count === 3) {
|
|
632
|
-
try {
|
|
633
|
-
const { existsSync, readFileSync, writeFileSync, appendFileSync } = require('node:fs') as typeof import('node:fs');
|
|
634
|
-
const { join } = require('node:path') as typeof import('node:path');
|
|
635
|
-
const nimbusPath = join(cwd, 'NIMBUS.md');
|
|
636
|
-
if (!existsSync(nimbusPath)) return;
|
|
637
|
-
const existing = readFileSync(nimbusPath, 'utf-8');
|
|
638
|
-
if (existing.includes(errorHint.slice(0, 40))) return; // already recorded
|
|
639
|
-
const entry = `- ${toolName}: ${errorHint}\n`;
|
|
640
|
-
if (existing.includes('## Observed Issues')) {
|
|
641
|
-
writeFileSync(nimbusPath, existing.replace('## Observed Issues\n', `## Observed Issues\n${entry}`));
|
|
642
|
-
} else {
|
|
643
|
-
appendFileSync(nimbusPath, `\n## Observed Issues\n${entry}`);
|
|
644
|
-
}
|
|
645
|
-
} catch { /* non-critical */ }
|
|
646
|
-
}
|
|
647
|
-
}
|
|
648
|
-
|
|
649
|
-
// ---------------------------------------------------------------------------
|
|
650
|
-
// M6: Destructive action guard — force confirmation before terraform destroy / kubectl delete
|
|
651
|
-
// ---------------------------------------------------------------------------
|
|
652
|
-
|
|
653
|
-
function isDestructiveAction(toolName: string, input: Record<string, unknown>): string | null {
|
|
654
|
-
const action = String(input.action ?? input.command ?? '');
|
|
655
|
-
if (toolName === 'terraform' && action === 'destroy') {
|
|
656
|
-
return 'terraform destroy will PERMANENTLY DELETE all managed infrastructure. Explicitly confirm with the user before proceeding.';
|
|
657
|
-
}
|
|
658
|
-
if (toolName === 'kubectl' && action === 'delete') {
|
|
659
|
-
const resource = String(input.resource ?? '');
|
|
660
|
-
return `kubectl delete ${resource} is IRREVERSIBLE. Explicitly confirm with the user before proceeding.`;
|
|
661
|
-
}
|
|
662
|
-
if (toolName === 'helm' && action === 'uninstall') {
|
|
663
|
-
return 'helm uninstall will remove the release and its resources. Explicitly confirm with the user before proceeding.';
|
|
664
|
-
}
|
|
665
|
-
return null;
|
|
666
|
-
}
|
|
667
|
-
|
|
668
|
-
/**
|
|
669
|
-
* Session-scoped terraform plan cache.
|
|
670
|
-
* Maps workdir → { output, timestamp } so that within one agent session,
|
|
671
|
-
* a plan result can be reused for the apply call without re-running tf plan.
|
|
672
|
-
* Cache expires after 10 minutes.
|
|
673
|
-
*/
|
|
674
|
-
interface TerraformPlanCacheEntry {
|
|
675
|
-
output: string;
|
|
676
|
-
workdir: string;
|
|
677
|
-
timestamp: number;
|
|
678
|
-
}
|
|
679
|
-
|
|
680
|
-
const PLAN_CACHE_TTL_MS = 10 * 60 * 1000; // 10 minutes
|
|
681
|
-
const terraformPlanCache = new Map<string, TerraformPlanCacheEntry>();
|
|
682
|
-
|
|
683
|
-
/** Store a terraform plan output for a workdir. */
|
|
684
|
-
function cacheTerraformPlan(workdir: string, output: string): void {
|
|
685
|
-
terraformPlanCache.set(workdir, { output, workdir, timestamp: Date.now() });
|
|
686
|
-
}
|
|
687
|
-
|
|
688
|
-
/** Retrieve a cached terraform plan for a workdir, or null if expired/missing. */
|
|
689
|
-
function getCachedTerraformPlan(workdir: string): string | null {
|
|
690
|
-
const entry = terraformPlanCache.get(workdir);
|
|
691
|
-
if (!entry) return null;
|
|
692
|
-
if (Date.now() - entry.timestamp > PLAN_CACHE_TTL_MS) {
|
|
693
|
-
terraformPlanCache.delete(workdir);
|
|
694
|
-
return null;
|
|
695
|
-
}
|
|
696
|
-
return entry.output;
|
|
697
|
-
}
|
|
698
|
-
|
|
699
|
-
/**
|
|
700
|
-
* Background interval that evicts expired terraform plan cache entries every 60s.
|
|
701
|
-
* `.unref()` ensures this does not prevent the process from exiting.
|
|
702
|
-
* Exported for test teardown.
|
|
703
|
-
*/
|
|
704
|
-
export const _planCacheCleanupInterval: ReturnType<typeof setInterval> = setInterval(() => {
|
|
705
|
-
const now = Date.now();
|
|
706
|
-
for (const [key, entry] of terraformPlanCache) {
|
|
707
|
-
if (now - entry.timestamp > PLAN_CACHE_TTL_MS) {
|
|
708
|
-
terraformPlanCache.delete(key);
|
|
709
|
-
}
|
|
710
|
-
}
|
|
711
|
-
}, 60_000).unref();
|
|
712
|
-
|
|
713
|
-
/** Default max output tokens per LLM call. */
|
|
714
|
-
const DEFAULT_MAX_TOKENS = 8192;
|
|
715
|
-
|
|
716
|
-
/** Default maximum number of agent turns. */
|
|
717
|
-
const DEFAULT_MAX_TURNS = 50;
|
|
718
|
-
|
|
719
|
-
/** Maximum characters of tool output to include in conversation history.
|
|
720
|
-
* Anything beyond this is truncated to prevent context window overflow. */
|
|
721
|
-
const MAX_TOOL_OUTPUT_CHARS = 100_000;
|
|
722
|
-
|
|
723
|
-
// ---------------------------------------------------------------------------
|
|
724
|
-
// Main Entry Point
|
|
725
|
-
// ---------------------------------------------------------------------------
|
|
726
|
-
|
|
727
|
-
/**
|
|
728
|
-
* Run the agentic loop.
|
|
729
|
-
*
|
|
730
|
-
* Takes a user message and existing conversation history, then runs
|
|
731
|
-
* the LLM in a loop until it stops requesting tool calls.
|
|
732
|
-
*
|
|
733
|
-
* The loop terminates when any of the following conditions are met:
|
|
734
|
-
* - The LLM returns a response with no tool calls (natural end).
|
|
735
|
-
* - The maximum number of turns is reached.
|
|
736
|
-
* - The AbortSignal fires (e.g. user presses Ctrl+C).
|
|
737
|
-
* - An unrecoverable LLM API error occurs.
|
|
738
|
-
*
|
|
739
|
-
* @param userMessage - The new user message to process.
|
|
740
|
-
* @param history - Prior conversation messages (may be empty for a fresh session).
|
|
741
|
-
* @param options - Configuration for the loop.
|
|
742
|
-
* @returns The final conversation state, turn count, usage, and cost.
|
|
743
|
-
*/
|
|
744
|
-
export async function runAgentLoop(
|
|
745
|
-
userMessage: string,
|
|
746
|
-
history: LLMMessage[],
|
|
747
|
-
options: AgentLoopOptions
|
|
748
|
-
): Promise<AgentLoopResult> {
|
|
749
|
-
const {
|
|
750
|
-
router,
|
|
751
|
-
toolRegistry,
|
|
752
|
-
mode,
|
|
753
|
-
maxTurns = DEFAULT_MAX_TURNS,
|
|
754
|
-
model,
|
|
755
|
-
cwd,
|
|
756
|
-
nimbusInstructions,
|
|
757
|
-
onText,
|
|
758
|
-
onToolCallStart,
|
|
759
|
-
onToolCallEnd,
|
|
760
|
-
onToolOutputChunk,
|
|
761
|
-
checkPermission,
|
|
762
|
-
signal,
|
|
763
|
-
} = options;
|
|
764
|
-
|
|
765
|
-
// -----------------------------------------------------------------------
|
|
766
|
-
// 1. Prepare tools and system prompt
|
|
767
|
-
// -----------------------------------------------------------------------
|
|
768
|
-
|
|
769
|
-
const tools = getToolsForMode(toolRegistry.getAll(), mode);
|
|
770
|
-
|
|
771
|
-
// H3: Auto-discover infra context if not provided and cwd is set (best-effort, cached per cwd)
|
|
772
|
-
let resolvedInfraContext = options.infraContext;
|
|
773
|
-
if (!resolvedInfraContext && cwd) {
|
|
774
|
-
try {
|
|
775
|
-
const { discoverInfraContext } = await import('../cli/init');
|
|
776
|
-
resolvedInfraContext = await Promise.race([
|
|
777
|
-
discoverInfraContext(cwd),
|
|
778
|
-
new Promise<undefined>(r => setTimeout(() => r(undefined), 5000)),
|
|
779
|
-
]);
|
|
780
|
-
} catch { /* best-effort */ }
|
|
781
|
-
}
|
|
782
|
-
|
|
783
|
-
const systemPrompt = buildSystemPrompt({
|
|
784
|
-
mode,
|
|
785
|
-
tools,
|
|
786
|
-
nimbusInstructions,
|
|
787
|
-
cwd,
|
|
788
|
-
infraContext: resolvedInfraContext,
|
|
789
|
-
dryRun: options.dryRun,
|
|
790
|
-
});
|
|
791
|
-
|
|
792
|
-
// Convert agentic ToolDefinitions to the LLM-level format expected by
|
|
793
|
-
// the router's routeWithTools() method (OpenAI function-calling shape).
|
|
794
|
-
const llmTools: LLMToolDefinition[] = tools.map(toOpenAITool);
|
|
795
|
-
|
|
796
|
-
// -----------------------------------------------------------------------
|
|
797
|
-
// 2. Initialize conversation state
|
|
798
|
-
// -----------------------------------------------------------------------
|
|
799
|
-
|
|
800
|
-
// PERF-4a: Capacity-hinted pre-allocation avoids repeated V8 array reallocation
|
|
801
|
-
// as messages accumulate during a long conversation.
|
|
802
|
-
const messages: LLMMessage[] = new Array(Math.max(history.length + 1, 10));
|
|
803
|
-
messages.length = 0;
|
|
804
|
-
messages.push(...history, { role: 'user', content: userMessage });
|
|
805
|
-
|
|
806
|
-
let turns = 0;
|
|
807
|
-
let interrupted = false;
|
|
808
|
-
const totalUsage: AgentLoopUsage = {
|
|
809
|
-
promptTokens: 0,
|
|
810
|
-
completionTokens: 0,
|
|
811
|
-
totalTokens: 0,
|
|
812
|
-
};
|
|
813
|
-
let totalCost = 0;
|
|
814
|
-
|
|
815
|
-
// G3: Session-level destructive operation counter and per-turn tool call counter
|
|
816
|
-
let sessionDestructiveOps = 0;
|
|
817
|
-
const MAX_TOOL_CALLS_PER_TURN = options.maxToolCallsPerTurn ?? 20;
|
|
818
|
-
const MAX_DESTRUCTIVE_OPS_PER_SESSION = options.maxDestructiveOpsPerSession ?? 5;
|
|
819
|
-
|
|
820
|
-
// M2/M5: Track tool calls that have already received a credential-error retry message
|
|
821
|
-
// to avoid spamming the auth-refresh hint on repeated failures.
|
|
822
|
-
const credentialRetried = new Set<string>();
|
|
823
|
-
|
|
824
|
-
// G8: Track which terraform workdirs have had a plan run in this session.
|
|
825
|
-
// Used to warn when apply is run without a prior plan.
|
|
826
|
-
const terraformPlannedWorkdirs = new Set<string>();
|
|
827
|
-
|
|
828
|
-
// G10: One-time kubectl RBAC pre-flight check state.
|
|
829
|
-
// kubectlRbacChecked: ensures we only run `kubectl auth can-i --list` once per session.
|
|
830
|
-
// rbacPreamble: stores the RBAC output to inject into the first kubectl tool result.
|
|
831
|
-
let kubectlRbacChecked = false;
|
|
832
|
-
let rbacPreamble = '';
|
|
833
|
-
|
|
834
|
-
// G10: Pre-import async exec utilities so they're available inside the loop.
|
|
835
|
-
// Using async execFile avoids blocking the Node.js event loop for kubectl/terraform calls.
|
|
836
|
-
const { execFile: _execFile, exec: _exec } = await import('node:child_process');
|
|
837
|
-
const { promisify: _promisify } = await import('node:util');
|
|
838
|
-
const _execFileAsync = _promisify(_execFile);
|
|
839
|
-
const _execAsync = _promisify(_exec);
|
|
840
|
-
|
|
841
|
-
// PERF-4a: Pre-build the system message once so it can be reused every turn
|
|
842
|
-
// without allocating a new object on each loop iteration.
|
|
843
|
-
const _systemMessageObj: LLMMessage = { role: 'system', content: systemPrompt };
|
|
844
|
-
|
|
845
|
-
// Shared mutable ref: set to true by 'apply-all' diff decision to skip further prompts
|
|
846
|
-
const skipRemainingDiffPrompts = { value: options.skipRemainingDiffPrompts ?? false };
|
|
847
|
-
// Shared mutable ref: set to true by 'reject-all' diff decision to auto-reject further prompts
|
|
848
|
-
const rejectRemainingDiffPrompts = { value: options.rejectRemainingDiffPrompts ?? false };
|
|
849
|
-
|
|
850
|
-
// -----------------------------------------------------------------------
|
|
851
|
-
// 3. Main agent loop
|
|
852
|
-
// -----------------------------------------------------------------------
|
|
853
|
-
|
|
854
|
-
while (turns < maxTurns) {
|
|
855
|
-
// Check for cancellation before each turn
|
|
856
|
-
if (signal?.aborted) {
|
|
857
|
-
interrupted = true;
|
|
858
|
-
break;
|
|
859
|
-
}
|
|
860
|
-
|
|
861
|
-
turns++;
|
|
862
|
-
|
|
863
|
-
try {
|
|
864
|
-
// Gap 18: Auto-route model based on task complexity when no explicit model set
|
|
865
|
-
let effectiveModel = model ?? DEFAULT_MODEL;
|
|
866
|
-
if (!model && options.autoRouteModel) {
|
|
867
|
-
const lastUserMsg = [...messages].reverse().find(m => m.role === 'user');
|
|
868
|
-
const lastMsgText = lastUserMsg
|
|
869
|
-
? typeof lastUserMsg.content === 'string'
|
|
870
|
-
? lastUserMsg.content
|
|
871
|
-
: JSON.stringify(lastUserMsg.content)
|
|
872
|
-
: '';
|
|
873
|
-
const complexity = classifyTaskComplexity(lastMsgText);
|
|
874
|
-
effectiveModel = routeModel(complexity);
|
|
875
|
-
if (onText && turns === 1) {
|
|
876
|
-
onText(`\n[auto: ${effectiveModel.split('/').pop()?.replace('anthropic/', '') ?? effectiveModel}]\n`);
|
|
877
|
-
}
|
|
878
|
-
}
|
|
879
|
-
|
|
880
|
-
// Build the completion request with tool definitions.
|
|
881
|
-
// The systemMessageObj is pre-built before the loop (PERF-4a) — reuse it.
|
|
882
|
-
const allMessages: LLMMessage[] = new Array(messages.length + 1);
|
|
883
|
-
allMessages.length = 0;
|
|
884
|
-
allMessages.push(_systemMessageObj, ...messages);
|
|
885
|
-
const request: ToolCompletionRequest = {
|
|
886
|
-
messages: allMessages,
|
|
887
|
-
model: effectiveModel,
|
|
888
|
-
tools: llmTools,
|
|
889
|
-
maxTokens: DEFAULT_MAX_TOKENS,
|
|
890
|
-
};
|
|
891
|
-
|
|
892
|
-
// Stream text tokens incrementally via routeStreamWithTools.
|
|
893
|
-
// Tokens are forwarded to onText as they arrive; tool calls
|
|
894
|
-
// are accumulated from the final chunk.
|
|
895
|
-
let responseContent = '';
|
|
896
|
-
let responseToolCalls: ToolCall[] | undefined;
|
|
897
|
-
let responseUsage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 };
|
|
898
|
-
|
|
899
|
-
// A1: Retry on transient errors (rate-limit / 5xx) with exponential backoff
|
|
900
|
-
const MAX_STREAM_RETRIES = 2;
|
|
901
|
-
let streamAttempt = 0;
|
|
902
|
-
while (true) {
|
|
903
|
-
// A2: Silence timeout — abort if no chunk arrives (G21: configurable)
|
|
904
|
-
const STREAM_SILENCE_MS = options.streamSilenceTimeoutMs ?? 60_000;
|
|
905
|
-
const silenceAbort = new AbortController();
|
|
906
|
-
let silenceTimer: ReturnType<typeof setTimeout> | undefined;
|
|
907
|
-
const resetSilence = () => {
|
|
908
|
-
clearTimeout(silenceTimer);
|
|
909
|
-
silenceTimer = setTimeout(() => silenceAbort.abort('Stream timeout'), STREAM_SILENCE_MS);
|
|
910
|
-
};
|
|
911
|
-
resetSilence();
|
|
912
|
-
|
|
913
|
-
try {
|
|
914
|
-
// Pass silence abort signal via request cast (non-standard but supported by most providers)
|
|
915
|
-
const requestWithSignal = { ...request, signal: silenceAbort.signal } as typeof request;
|
|
916
|
-
for await (const chunk of router.routeStreamWithTools(requestWithSignal)) {
|
|
917
|
-
resetSilence(); // reset on every chunk
|
|
918
|
-
if (chunk.content) {
|
|
919
|
-
responseContent += chunk.content;
|
|
920
|
-
if (onText) {
|
|
921
|
-
onText(chunk.content);
|
|
922
|
-
}
|
|
923
|
-
}
|
|
924
|
-
if (chunk.toolCallStart && onText) {
|
|
925
|
-
// Show early feedback when the LLM starts composing a tool call
|
|
926
|
-
onText(`\n[Preparing tool: ${chunk.toolCallStart.name}...]\n`);
|
|
927
|
-
}
|
|
928
|
-
if (chunk.toolCalls) {
|
|
929
|
-
responseToolCalls = chunk.toolCalls;
|
|
930
|
-
}
|
|
931
|
-
if (chunk.usage) {
|
|
932
|
-
responseUsage = chunk.usage;
|
|
933
|
-
}
|
|
934
|
-
}
|
|
935
|
-
clearTimeout(silenceTimer);
|
|
936
|
-
break; // success — exit retry loop
|
|
937
|
-
} catch (streamErr) {
|
|
938
|
-
clearTimeout(silenceTimer);
|
|
939
|
-
if (streamAttempt < MAX_STREAM_RETRIES && isRetryableStreamError(streamErr)) {
|
|
940
|
-
const delay = 1000 * Math.pow(2, streamAttempt);
|
|
941
|
-
if (onText) {
|
|
942
|
-
onText(`\n[Retrying after error (attempt ${streamAttempt + 1})...]\n`);
|
|
943
|
-
}
|
|
944
|
-
await new Promise(r => setTimeout(r, delay));
|
|
945
|
-
streamAttempt++;
|
|
946
|
-
// Reset partial accumulation before retry
|
|
947
|
-
responseContent = '';
|
|
948
|
-
responseToolCalls = undefined;
|
|
949
|
-
responseUsage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 };
|
|
950
|
-
continue;
|
|
951
|
-
}
|
|
952
|
-
// G24: Graceful network error message instead of raw Node.js error
|
|
953
|
-
const streamErrObj = streamErr as Error | null;
|
|
954
|
-
const isNetworkError = /ECONNREFUSED|ETIMEDOUT|ENOTFOUND|fetch failed|network/i.test(streamErrObj?.message ?? '');
|
|
955
|
-
if (isNetworkError) {
|
|
956
|
-
const netMsg = '\n[!!] Network unreachable — cannot reach the LLM API.\nCheck your internet connection and API key validity, then try again.\n';
|
|
957
|
-
if (onText) onText(netMsg);
|
|
958
|
-
// Re-throw a specially-marked error so the outer turn catch block can handle it
|
|
959
|
-
const netErr = new Error(netMsg);
|
|
960
|
-
(netErr as Error & { _nimbusNetworkError?: boolean })._nimbusNetworkError = true;
|
|
961
|
-
throw netErr;
|
|
962
|
-
}
|
|
963
|
-
throw streamErr; // non-retryable — propagate to outer catch
|
|
964
|
-
}
|
|
965
|
-
}
|
|
966
|
-
|
|
967
|
-
// Accumulate usage and cost
|
|
968
|
-
totalUsage.promptTokens += responseUsage.promptTokens;
|
|
969
|
-
totalUsage.completionTokens += responseUsage.completionTokens;
|
|
970
|
-
totalUsage.totalTokens += responseUsage.totalTokens;
|
|
971
|
-
|
|
972
|
-
// Estimate cost for this turn
|
|
973
|
-
const resolvedModel = effectiveModel;
|
|
974
|
-
const providerName = resolvedModel.includes('/') ? resolvedModel.split('/')[0] : 'anthropic';
|
|
975
|
-
const modelName = resolvedModel.includes('/')
|
|
976
|
-
? resolvedModel.split('/').slice(1).join('/')
|
|
977
|
-
: resolvedModel;
|
|
978
|
-
const turnCost = calculateCost(
|
|
979
|
-
providerName,
|
|
980
|
-
modelName,
|
|
981
|
-
responseUsage.promptTokens,
|
|
982
|
-
responseUsage.completionTokens
|
|
983
|
-
);
|
|
984
|
-
totalCost += turnCost.costUSD;
|
|
985
|
-
|
|
986
|
-
// Notify caller of accumulated usage/cost after each turn
|
|
987
|
-
if (options.onUsage) {
|
|
988
|
-
options.onUsage(totalUsage, totalCost);
|
|
989
|
-
}
|
|
990
|
-
|
|
991
|
-
// M2: Emit per-turn token/cost stats as a dim system message in the TUI.
|
|
992
|
-
// Only emit when there was actual token usage (skip turns with 0 tokens).
|
|
993
|
-
if (onText && (responseUsage.promptTokens > 0 || responseUsage.completionTokens > 0)) {
|
|
994
|
-
const statsLine = `\n[${responseUsage.promptTokens} in / ${responseUsage.completionTokens} out — $${turnCost.costUSD.toFixed(4)}]\n`;
|
|
995
|
-
onText(statsLine);
|
|
996
|
-
}
|
|
997
|
-
|
|
998
|
-
// G16: Cost budget enforcement — stop if cumulative cost exceeds the limit
|
|
999
|
-
if (options.costBudgetUSD !== undefined && totalCost >= options.costBudgetUSD) {
|
|
1000
|
-
const budgetMsg = `\n\n[!!] Cost budget of $${options.costBudgetUSD.toFixed(2)} reached (used: $${totalCost.toFixed(3)}). Stopping to prevent overspend.\n`;
|
|
1001
|
-
if (onText) onText(budgetMsg);
|
|
1002
|
-
messages.push({ role: 'assistant', content: budgetMsg });
|
|
1003
|
-
break;
|
|
1004
|
-
}
|
|
1005
|
-
|
|
1006
|
-
// -----------------------------------------------------------------
|
|
1007
|
-
// No tool calls → the LLM is done
|
|
1008
|
-
// -----------------------------------------------------------------
|
|
1009
|
-
if (!responseToolCalls || responseToolCalls.length === 0) {
|
|
1010
|
-
messages.push({
|
|
1011
|
-
role: 'assistant',
|
|
1012
|
-
content: responseContent,
|
|
1013
|
-
});
|
|
1014
|
-
break;
|
|
1015
|
-
}
|
|
1016
|
-
|
|
1017
|
-
// -----------------------------------------------------------------
|
|
1018
|
-
// Tool calls present → execute each one
|
|
1019
|
-
// -----------------------------------------------------------------
|
|
1020
|
-
|
|
1021
|
-
// Append the assistant message that contains the tool calls
|
|
1022
|
-
messages.push({
|
|
1023
|
-
role: 'assistant',
|
|
1024
|
-
content: responseContent,
|
|
1025
|
-
toolCalls: responseToolCalls,
|
|
1026
|
-
});
|
|
1027
|
-
|
|
1028
|
-
// G3: Per-turn tool call counter — reset at the start of each tool-call batch
|
|
1029
|
-
let turnToolCallCount = 0;
|
|
1030
|
-
|
|
1031
|
-
// H2: Parallel dispatch for read-only tools (safe to run concurrently)
|
|
1032
|
-
const READ_ONLY_TOOLS = new Set([
|
|
1033
|
-
'read_file', 'glob', 'grep', 'cloud_discover', 'terraform_plan_analyze',
|
|
1034
|
-
'kubectl_context', 'helm_values', 'cost_estimate', 'drift_detect',
|
|
1035
|
-
]);
|
|
1036
|
-
const canRunInParallel = (tc: ToolCall): boolean => READ_ONLY_TOOLS.has(tc.function.name);
|
|
1037
|
-
const allReadOnly = responseToolCalls.every(canRunInParallel);
|
|
1038
|
-
|
|
1039
|
-
if (allReadOnly && responseToolCalls.length > 1) {
|
|
1040
|
-
// All tools are read-only — dispatch in parallel
|
|
1041
|
-
const parallelChunkCallback = onToolOutputChunk
|
|
1042
|
-
? (id: string) => (chunk: string) => onToolOutputChunk(id, chunk)
|
|
1043
|
-
: undefined;
|
|
1044
|
-
|
|
1045
|
-
const parallelResults = await Promise.allSettled(
|
|
1046
|
-
responseToolCalls.map(tc =>
|
|
1047
|
-
executeToolCall(
|
|
1048
|
-
tc,
|
|
1049
|
-
toolRegistry,
|
|
1050
|
-
onToolCallStart,
|
|
1051
|
-
onToolCallEnd,
|
|
1052
|
-
checkPermission,
|
|
1053
|
-
options.lspManager,
|
|
1054
|
-
options.snapshotManager,
|
|
1055
|
-
options.sessionId,
|
|
1056
|
-
signal,
|
|
1057
|
-
options.hookEngine,
|
|
1058
|
-
mode,
|
|
1059
|
-
options.requestFileDiff,
|
|
1060
|
-
skipRemainingDiffPrompts,
|
|
1061
|
-
rejectRemainingDiffPrompts,
|
|
1062
|
-
parallelChunkCallback ? parallelChunkCallback(tc.id) : undefined,
|
|
1063
|
-
options.toolTimeouts,
|
|
1064
|
-
options.infraContext
|
|
1065
|
-
)
|
|
1066
|
-
)
|
|
1067
|
-
);
|
|
1068
|
-
|
|
1069
|
-
for (let pi = 0; pi < responseToolCalls.length; pi++) {
|
|
1070
|
-
const tc = responseToolCalls[pi];
|
|
1071
|
-
const pResult = parallelResults[pi];
|
|
1072
|
-
const pContent = pResult.status === 'fulfilled'
|
|
1073
|
-
? (pResult.value.isError ? `Error: ${pResult.value.error}` : pResult.value.output)
|
|
1074
|
-
: `Error: ${pResult.reason}`;
|
|
1075
|
-
messages.push({ role: 'tool', toolCallId: tc.id, name: tc.function.name, content: pContent });
|
|
1076
|
-
}
|
|
1077
|
-
// Skip sequential processing — jump directly to next LLM turn
|
|
1078
|
-
continue;
|
|
1079
|
-
}
|
|
1080
|
-
|
|
1081
|
-
// Process tool calls sequentially (order may matter for side effects)
|
|
1082
|
-
for (const toolCall of responseToolCalls) {
|
|
1083
|
-
// Check for cancellation between tool calls
|
|
1084
|
-
if (signal?.aborted) {
|
|
1085
|
-
interrupted = true;
|
|
1086
|
-
break;
|
|
1087
|
-
}
|
|
1088
|
-
|
|
1089
|
-
// G3: Enforce per-turn tool call limit to prevent runaway loops
|
|
1090
|
-
turnToolCallCount++;
|
|
1091
|
-
if (turnToolCallCount > MAX_TOOL_CALLS_PER_TURN) {
|
|
1092
|
-
messages.push({
|
|
1093
|
-
role: 'tool',
|
|
1094
|
-
toolCallId: toolCall.id,
|
|
1095
|
-
name: toolCall.function.name,
|
|
1096
|
-
content: `[Tool limit reached: ${MAX_TOOL_CALLS_PER_TURN} tool calls in this turn. Summarizing progress and stopping to avoid runaway execution.]`,
|
|
1097
|
-
});
|
|
1098
|
-
break;
|
|
1099
|
-
}
|
|
1100
|
-
|
|
1101
|
-
// G3: Count destructive operations at the session level
|
|
1102
|
-
if (isDestructiveOp(toolCall.function.name, toolCall.function.arguments)) {
|
|
1103
|
-
sessionDestructiveOps++;
|
|
1104
|
-
}
|
|
1105
|
-
|
|
1106
|
-
// G10: One-time kubectl RBAC pre-flight check — runs before the first kubectl call
|
|
1107
|
-
// in this session. Stores the RBAC permissions summary in rbacPreamble so it can
|
|
1108
|
-
// be injected into the first kubectl tool result (keeps conversation structure valid).
|
|
1109
|
-
// Uses async execFile to avoid blocking the Node.js event loop (up to 5s call).
|
|
1110
|
-
if (!kubectlRbacChecked && toolCall.function.name === 'kubectl') {
|
|
1111
|
-
kubectlRbacChecked = true;
|
|
1112
|
-
try {
|
|
1113
|
-
const { stdout: rbacOut } = await _execFileAsync('kubectl', ['auth', 'can-i', '--list'], {
|
|
1114
|
-
encoding: 'utf-8', timeout: 5000,
|
|
1115
|
-
});
|
|
1116
|
-
const truncated = rbacOut.length > 1500
|
|
1117
|
-
? `${rbacOut.slice(0, 1500)}\n...[truncated]`
|
|
1118
|
-
: rbacOut;
|
|
1119
|
-
rbacPreamble = `[kubectl RBAC context: permissions available in current context]\n${truncated}\n\n`;
|
|
1120
|
-
} catch { /* non-critical — RBAC check failure does not block kubectl */ }
|
|
1121
|
-
}
|
|
1122
|
-
|
|
1123
|
-
// M6: Destructive action guard — inject warning into LLM context before executing
|
|
1124
|
-
try {
|
|
1125
|
-
const m6Input = JSON.parse(toolCall.function.arguments) as Record<string, unknown>;
|
|
1126
|
-
const destructiveWarning = isDestructiveAction(toolCall.function.name, m6Input);
|
|
1127
|
-
if (destructiveWarning) {
|
|
1128
|
-
messages.push({
|
|
1129
|
-
role: 'tool',
|
|
1130
|
-
toolCallId: toolCall.id + '-guard',
|
|
1131
|
-
name: toolCall.function.name,
|
|
1132
|
-
content: `[SAFETY] ${destructiveWarning}`,
|
|
1133
|
-
});
|
|
1134
|
-
}
|
|
1135
|
-
} catch { /* ignore parse errors */ }
|
|
1136
|
-
|
|
1137
|
-
// Build chunk callback that forwards tool output to the TUI in real-time
|
|
1138
|
-
const chunkCallback = onToolOutputChunk
|
|
1139
|
-
? (chunk: string) => onToolOutputChunk(toolCall.id, chunk)
|
|
1140
|
-
: undefined;
|
|
1141
|
-
|
|
1142
|
-
const result = await executeToolCall(
|
|
1143
|
-
toolCall,
|
|
1144
|
-
toolRegistry,
|
|
1145
|
-
onToolCallStart,
|
|
1146
|
-
onToolCallEnd,
|
|
1147
|
-
checkPermission,
|
|
1148
|
-
options.lspManager,
|
|
1149
|
-
options.snapshotManager,
|
|
1150
|
-
options.sessionId,
|
|
1151
|
-
signal,
|
|
1152
|
-
options.hookEngine,
|
|
1153
|
-
mode,
|
|
1154
|
-
options.requestFileDiff,
|
|
1155
|
-
skipRemainingDiffPrompts,
|
|
1156
|
-
rejectRemainingDiffPrompts,
|
|
1157
|
-
chunkCallback,
|
|
1158
|
-
options.toolTimeouts,
|
|
1159
|
-
options.infraContext
|
|
1160
|
-
);
|
|
1161
|
-
|
|
1162
|
-
// Append each tool result as a separate message so the LLM can
|
|
1163
|
-
// match it to the corresponding tool_use block by toolCallId.
|
|
1164
|
-
let toolContent = result.isError ? `Error: ${result.error}` : result.output;
|
|
1165
|
-
|
|
1166
|
-
// G10: Inject RBAC context preamble into the first kubectl result
|
|
1167
|
-
if (rbacPreamble && toolCall.function.name === 'kubectl') {
|
|
1168
|
-
toolContent = rbacPreamble + toolContent;
|
|
1169
|
-
rbacPreamble = ''; // consume once — only injected into the first kubectl result
|
|
1170
|
-
}
|
|
1171
|
-
|
|
1172
|
-
// Inject DevOps error classification hints to guide self-correction
|
|
1173
|
-
if (result.isError && result.error) {
|
|
1174
|
-
const hint = classifyDevOpsError(toolCall.function.name, result.error, options.nimbusInstructions);
|
|
1175
|
-
if (hint) {
|
|
1176
|
-
toolContent += `\n\n${hint}`;
|
|
1177
|
-
// C4: Also show hint in TUI error output (not just LLM context)
|
|
1178
|
-
result.output += `\n\n${hint}`;
|
|
1179
|
-
|
|
1180
|
-
// M2/M5: Auto-retry signal on credential expiry errors
|
|
1181
|
-
// If the classified hint indicates a credential/auth problem, append
|
|
1182
|
-
// a structured prompt so the agent knows to run auth-refresh, and
|
|
1183
|
-
// set provider-specific env hints for the auth-refresh command.
|
|
1184
|
-
const isCredentialError =
|
|
1185
|
-
hint.toLowerCase().includes('credential') ||
|
|
1186
|
-
hint.toLowerCase().includes('expired') ||
|
|
1187
|
-
hint.toLowerCase().includes('auth') ||
|
|
1188
|
-
hint.toLowerCase().includes('login required');
|
|
1189
|
-
|
|
1190
|
-
if (isCredentialError && !credentialRetried.has(toolCall.id ?? toolCall.function.name)) {
|
|
1191
|
-
credentialRetried.add(toolCall.id ?? toolCall.function.name);
|
|
1192
|
-
|
|
1193
|
-
// M5: Set provider-specific refresh hint env vars so auth-refresh
|
|
1194
|
-
// can surface targeted guidance when invoked by the user.
|
|
1195
|
-
const errorLower = (result.error ?? '').toLowerCase();
|
|
1196
|
-
if (errorLower.includes('aws')) {
|
|
1197
|
-
process.env.NIMBUS_AWS_REFRESH_HINT = '1';
|
|
1198
|
-
}
|
|
1199
|
-
if (errorLower.includes('gcp') || errorLower.includes('google')) {
|
|
1200
|
-
process.env.NIMBUS_GCP_REFRESH_HINT = '1';
|
|
1201
|
-
}
|
|
1202
|
-
if (errorLower.includes('azure')) {
|
|
1203
|
-
process.env.NIMBUS_AZURE_REFRESH_HINT = '1';
|
|
1204
|
-
}
|
|
1205
|
-
|
|
1206
|
-
const refreshMsg = [
|
|
1207
|
-
'[!!] Credential expired. Run: nimbus auth-refresh',
|
|
1208
|
-
'[Nimbus] Credential error detected on tool: ' + toolCall.function.name,
|
|
1209
|
-
'Run "nimbus auth-refresh" to refresh cloud credentials, then retry.',
|
|
1210
|
-
].join('\n');
|
|
1211
|
-
toolContent += '\n\n' + refreshMsg;
|
|
1212
|
-
result.output += '\n\n' + refreshMsg;
|
|
1213
|
-
}
|
|
1214
|
-
} else if (DEVOPS_TOOL_NAMES.has(toolCall.function.name)) {
|
|
1215
|
-
// Unknown DevOps error — provide structured self-diagnosis steps
|
|
1216
|
-
toolContent += [
|
|
1217
|
-
'\n\n--- Self-Diagnosis Steps ---',
|
|
1218
|
-
'1. Check tool is installed: `which terraform` / `kubectl version` / `helm version`',
|
|
1219
|
-
'2. Check credentials: `aws sts get-caller-identity` / `gcloud auth list` / `az account show`',
|
|
1220
|
-
'3. Check network connectivity to the cluster/cloud provider',
|
|
1221
|
-
'4. Retry with verbose flag if available (e.g., TF_LOG=DEBUG, kubectl --v=6)',
|
|
1222
|
-
'5. If the error persists, report the exact error message and the command that caused it.',
|
|
1223
|
-
].join('\n');
|
|
1224
|
-
}
|
|
1225
|
-
// M4: Track recurring errors and persist to NIMBUS.md after 3 occurrences
|
|
1226
|
-
const m4Hint = classifyDevOpsError(toolCall.function.name, result.error ?? '', options.nimbusInstructions);
|
|
1227
|
-
if (m4Hint) {
|
|
1228
|
-
trackAndPersistError(toolCall.function.name, m4Hint, options.cwd ?? process.cwd());
|
|
1229
|
-
}
|
|
1230
|
-
}
|
|
1231
|
-
|
|
1232
|
-
// H5: Inject cost delta hint after successful infra operations
|
|
1233
|
-
if (!result.isError) {
|
|
1234
|
-
try {
|
|
1235
|
-
const h5Input = JSON.parse(toolCall.function.arguments) as Record<string, unknown>;
|
|
1236
|
-
const costHint = extractCostHintFromToolOutput(toolCall.function.name, h5Input, result.output);
|
|
1237
|
-
if (costHint) {
|
|
1238
|
-
onText?.(`\n[cost] ${costHint}\n`);
|
|
1239
|
-
}
|
|
1240
|
-
} catch { /* ignore parse errors */ }
|
|
1241
|
-
}
|
|
1242
|
-
|
|
1243
|
-
// L6: Auto-generate runbook after terraform apply success
|
|
1244
|
-
if (!result.isError && toolCall.function.name === 'terraform') {
|
|
1245
|
-
try {
|
|
1246
|
-
const l6Input = JSON.parse(toolCall.function.arguments) as Record<string, unknown>;
|
|
1247
|
-
if (String(l6Input.action) === 'apply') {
|
|
1248
|
-
const l6Match = result.output.match(/Resources:\s*(\d+) added/);
|
|
1249
|
-
if (l6Match && parseInt(l6Match[1] ?? '0', 10) > 0) {
|
|
1250
|
-
const { join: _l6Join } = require('node:path') as typeof import('node:path');
|
|
1251
|
-
const { homedir: _l6Homedir } = require('node:os') as typeof import('node:os');
|
|
1252
|
-
const { mkdirSync: _l6MkdirSync, writeFileSync: _l6WriteFileSync } = require('node:fs') as typeof import('node:fs');
|
|
1253
|
-
const runbookDir = _l6Join(_l6Homedir(), '.nimbus', 'runbooks');
|
|
1254
|
-
_l6MkdirSync(runbookDir, { recursive: true });
|
|
1255
|
-
const ts = new Date().toISOString().replace(/[:.]/g, '-');
|
|
1256
|
-
const runbookPath = _l6Join(runbookDir, `terraform-apply-${ts}.md`);
|
|
1257
|
-
const runbookContent = [
|
|
1258
|
-
'# Terraform Apply Runbook',
|
|
1259
|
-
'',
|
|
1260
|
-
`Date: ${new Date().toLocaleString()}`,
|
|
1261
|
-
'',
|
|
1262
|
-
'Apply output:',
|
|
1263
|
-
'```',
|
|
1264
|
-
result.output.slice(0, 2000),
|
|
1265
|
-
'```',
|
|
1266
|
-
'',
|
|
1267
|
-
'## Rollback',
|
|
1268
|
-
'',
|
|
1269
|
-
'To rollback, run `terraform destroy` or restore from a previous state.',
|
|
1270
|
-
].join('\n');
|
|
1271
|
-
_l6WriteFileSync(runbookPath, runbookContent, 'utf-8');
|
|
1272
|
-
options.onText?.(`\n[runbook] Saved to ${runbookPath}\n`);
|
|
1273
|
-
}
|
|
1274
|
-
}
|
|
1275
|
-
} catch { /* non-critical */ }
|
|
1276
|
-
}
|
|
1277
|
-
|
|
1278
|
-
// GAP-25: Structured audit trail for destructive operations
|
|
1279
|
-
if (!result.isError && isDestructiveOp(toolCall.function.name, toolCall.function.arguments)) {
|
|
1280
|
-
try {
|
|
1281
|
-
const { appendFileSync, mkdirSync } = await import('node:fs');
|
|
1282
|
-
const { homedir } = await import('node:os');
|
|
1283
|
-
const { join } = await import('node:path');
|
|
1284
|
-
const auditDir = join(homedir(), '.nimbus');
|
|
1285
|
-
mkdirSync(auditDir, { recursive: true });
|
|
1286
|
-
const event = JSON.stringify({
|
|
1287
|
-
type: 'infra-change',
|
|
1288
|
-
tool: toolCall.function.name,
|
|
1289
|
-
action: (JSON.parse(toolCall.function.arguments) as Record<string, unknown>).action,
|
|
1290
|
-
sessionId: options.sessionId ?? 'unknown',
|
|
1291
|
-
cwd: options.cwd ?? process.cwd(),
|
|
1292
|
-
timestamp: new Date().toISOString(),
|
|
1293
|
-
});
|
|
1294
|
-
appendFileSync(join(auditDir, 'audit.jsonl'), event + '\n', 'utf-8');
|
|
1295
|
-
} catch { /* audit logging is non-critical */ }
|
|
1296
|
-
}
|
|
1297
|
-
|
|
1298
|
-
// G3: Append a warning when session-level destructive op threshold is reached
|
|
1299
|
-
if (sessionDestructiveOps >= MAX_DESTRUCTIVE_OPS_PER_SESSION) {
|
|
1300
|
-
toolContent += `\n\n[Warning: ${sessionDestructiveOps} destructive operations executed in this session. Review changes carefully.]`;
|
|
1301
|
-
}
|
|
1302
|
-
|
|
1303
|
-
// Cache terraform plan output so a subsequent apply can reference it.
|
|
1304
|
-
// Also track planned workdirs (G8) and warn on unplanned applies.
|
|
1305
|
-
if (toolCall.function.name === 'terraform' && !result.isError) {
|
|
1306
|
-
try {
|
|
1307
|
-
const tfArgs = JSON.parse(toolCall.function.arguments) as Record<string, unknown>;
|
|
1308
|
-
if (tfArgs.action === 'plan' && tfArgs.workdir) {
|
|
1309
|
-
cacheTerraformPlan(String(tfArgs.workdir), result.output);
|
|
1310
|
-
// G8: Track that a plan was run for this workdir in this session
|
|
1311
|
-
terraformPlannedWorkdirs.add(String(tfArgs.workdir));
|
|
1312
|
-
}
|
|
1313
|
-
// G8: Warn if apply ran without a prior plan in this session
|
|
1314
|
-
if (tfArgs.action === 'apply' && tfArgs.workdir && !terraformPlannedWorkdirs.has(String(tfArgs.workdir))) {
|
|
1315
|
-
toolContent = `[Note: terraform apply ran without a prior terraform plan in this session for ${String(tfArgs.workdir)}. Always run terraform plan first to review changes before applying.]\n\n${toolContent}`;
|
|
1316
|
-
}
|
|
1317
|
-
// Inject cached plan into apply context for the LLM
|
|
1318
|
-
if (tfArgs.action === 'apply' && tfArgs.workdir) {
|
|
1319
|
-
const cached = getCachedTerraformPlan(String(tfArgs.workdir));
|
|
1320
|
-
if (cached) {
|
|
1321
|
-
toolContent = `[Apply succeeded. This was the plan that was applied:]\n${cached.slice(0, 3000)}\n\n[Apply output:]\n${toolContent}`;
|
|
1322
|
-
}
|
|
1323
|
-
}
|
|
1324
|
-
} catch { /* ignore parse errors */ }
|
|
1325
|
-
}
|
|
1326
|
-
|
|
1327
|
-
// GAP-11: trigger FileDiff UI after terraform plan shows resource changes
|
|
1328
|
-
if (toolCall.function.name === 'terraform' && !result.isError && options.requestFileDiff) {
|
|
1329
|
-
try {
|
|
1330
|
-
const tfArgs11 = JSON.parse(toolCall.function.arguments) as Record<string, unknown>;
|
|
1331
|
-
if (tfArgs11.action === 'plan') {
|
|
1332
|
-
const { parseTerraformPlanOutput, buildFileDiffBatchFromPlan } = await import('./deploy-preview');
|
|
1333
|
-
const changes = parseTerraformPlanOutput(toolContent);
|
|
1334
|
-
if (changes.length > 0) {
|
|
1335
|
-
const batchFiles = buildFileDiffBatchFromPlan({ changes } as import('./deploy-preview').DeployPreview);
|
|
1336
|
-
for (const file of batchFiles) {
|
|
1337
|
-
const decision = await options.requestFileDiff(file.filePath, file.toolName ?? 'terraform', file.diff ?? '');
|
|
1338
|
-
if (decision === 'reject-all') break;
|
|
1339
|
-
}
|
|
1340
|
-
}
|
|
1341
|
-
}
|
|
1342
|
-
} catch { /* non-critical — FileDiff UI not always available */ }
|
|
1343
|
-
}
|
|
1344
|
-
|
|
1345
|
-
// GAP-18: auto-validate terraform files after write/edit tool calls
|
|
1346
|
-
if (['write_file', 'edit_file', 'multi_edit'].includes(toolCall.function.name) && !result.isError) {
|
|
1347
|
-
const gap18Input = JSON.parse(toolCall.function.arguments) as { path?: string; file_path?: string };
|
|
1348
|
-
const gap18FilePath = gap18Input.path ?? gap18Input.file_path ?? '';
|
|
1349
|
-
if (gap18FilePath.endsWith('.tf')) {
|
|
1350
|
-
try {
|
|
1351
|
-
// Use async exec to avoid blocking the event loop (up to 10s for terraform validate)
|
|
1352
|
-
const { stdout: validateOut } = await _execAsync('terraform validate -json 2>/dev/null', {
|
|
1353
|
-
cwd: options.cwd ?? process.cwd(),
|
|
1354
|
-
encoding: 'utf-8',
|
|
1355
|
-
timeout: 10_000,
|
|
1356
|
-
});
|
|
1357
|
-
const parsed = JSON.parse(validateOut) as { valid: boolean; diagnostics?: Array<{ severity: string; summary: string; detail: string }> };
|
|
1358
|
-
if (!parsed.valid && parsed.diagnostics && parsed.diagnostics.length > 0) {
|
|
1359
|
-
const errors = parsed.diagnostics
|
|
1360
|
-
.filter(d => d.severity === 'error')
|
|
1361
|
-
.map(d => ` ${d.summary}: ${d.detail}`)
|
|
1362
|
-
.join('\n');
|
|
1363
|
-
toolContent += `\n\nTerraform validation errors (please fix):\n${errors}`;
|
|
1364
|
-
}
|
|
1365
|
-
} catch { /* terraform not available or not in tf project — ignore */ }
|
|
1366
|
-
}
|
|
1367
|
-
}
|
|
1368
|
-
|
|
1369
|
-
// Truncate excessively large tool outputs to prevent context overflow
|
|
1370
|
-
if (toolContent.length > MAX_TOOL_OUTPUT_CHARS) {
|
|
1371
|
-
let head: string;
|
|
1372
|
-
let tail: string;
|
|
1373
|
-
let omitted: number;
|
|
1374
|
-
const lines = toolContent.split('\n');
|
|
1375
|
-
|
|
1376
|
-
// C3: Smart truncation for terraform plan — preserve all diff lines
|
|
1377
|
-
const isTerraformPlan = toolCall.function.name === 'terraform' && (() => {
|
|
1378
|
-
try {
|
|
1379
|
-
const tfArgs = JSON.parse(toolCall.function.arguments) as Record<string, unknown>;
|
|
1380
|
-
return tfArgs.action === 'plan';
|
|
1381
|
-
} catch { return false; }
|
|
1382
|
-
})();
|
|
1383
|
-
|
|
1384
|
-
if (isTerraformPlan) {
|
|
1385
|
-
// Keep all diff lines (create/update/destroy/replace) and the plan summary
|
|
1386
|
-
const diffLines: string[] = [];
|
|
1387
|
-
const contextLines: string[] = [];
|
|
1388
|
-
for (const line of lines) {
|
|
1389
|
-
const trimmed = line.trimStart();
|
|
1390
|
-
const isDiffLine = trimmed.startsWith('+') || trimmed.startsWith('-') ||
|
|
1391
|
-
trimmed.startsWith('~') || trimmed.startsWith('!') ||
|
|
1392
|
-
line.includes('will be created') || line.includes('will be destroyed') ||
|
|
1393
|
-
line.includes('will be updated') || line.includes('will be replaced') ||
|
|
1394
|
-
line.includes('Plan:') || line.includes('No changes') ||
|
|
1395
|
-
line.includes('Error:') || line.includes('Warning:');
|
|
1396
|
-
if (isDiffLine) {
|
|
1397
|
-
diffLines.push(line);
|
|
1398
|
-
} else {
|
|
1399
|
-
contextLines.push(line);
|
|
1400
|
-
}
|
|
1401
|
-
}
|
|
1402
|
-
// Allow up to 500 diff lines + first 50 context lines
|
|
1403
|
-
const keptDiff = diffLines.slice(0, 500);
|
|
1404
|
-
const keptCtx = contextLines.slice(0, 50);
|
|
1405
|
-
omitted = Math.max(0, lines.length - keptDiff.length - keptCtx.length);
|
|
1406
|
-
head = [...keptCtx, ...keptDiff].join('\n');
|
|
1407
|
-
tail = '';
|
|
1408
|
-
} else {
|
|
1409
|
-
const headLines = 100, tailLines = 20;
|
|
1410
|
-
head = lines.slice(0, headLines).join('\n');
|
|
1411
|
-
tail = lines.slice(-tailLines).join('\n');
|
|
1412
|
-
omitted = Math.max(0, lines.length - headLines - tailLines);
|
|
1413
|
-
}
|
|
1414
|
-
|
|
1415
|
-
// Save full output to disk for reference
|
|
1416
|
-
try {
|
|
1417
|
-
const { mkdirSync: _mkdirSync, writeFileSync: _writeFileSync } = await import('node:fs');
|
|
1418
|
-
const { homedir: _homedir } = await import('node:os');
|
|
1419
|
-
const outDir = join(_homedir(), '.nimbus', 'tool-outputs');
|
|
1420
|
-
_mkdirSync(outDir, { recursive: true });
|
|
1421
|
-
const outFile = join(outDir, `${Date.now()}-${toolCall.function.name}.log`);
|
|
1422
|
-
_writeFileSync(outFile, toolContent, 'utf-8');
|
|
1423
|
-
toolContent = omitted > 0
|
|
1424
|
-
? `${head}${tail ? '\n\n... [' + omitted + ' lines omitted — full output saved to ' + outFile + '] ...\n\n' + tail : '\n\n... [full output saved to ' + outFile + ']'}`
|
|
1425
|
-
: `${head}${tail ? '\n\n' + tail : ''}`;
|
|
1426
|
-
} catch {
|
|
1427
|
-
toolContent = omitted > 0
|
|
1428
|
-
? `${head}${tail ? '\n\n... [' + omitted + ' lines omitted — output too large for context] ...\n\n' + tail : '\n\n... [' + omitted + ' lines omitted]'}`
|
|
1429
|
-
: `${head}${tail ? '\n\n' + tail : ''}`;
|
|
1430
|
-
}
|
|
1431
|
-
}
|
|
1432
|
-
|
|
1433
|
-
messages.push({
|
|
1434
|
-
role: 'tool',
|
|
1435
|
-
toolCallId: toolCall.id,
|
|
1436
|
-
name: toolCall.function.name,
|
|
1437
|
-
content: toolContent,
|
|
1438
|
-
});
|
|
1439
|
-
}
|
|
1440
|
-
|
|
1441
|
-
// If we broke out of the tool-call loop due to cancellation, exit
|
|
1442
|
-
// the main loop as well.
|
|
1443
|
-
if (interrupted) {
|
|
1444
|
-
break;
|
|
1445
|
-
}
|
|
1446
|
-
|
|
1447
|
-
// -----------------------------------------------------------------
|
|
1448
|
-
// Auto-compact check
|
|
1449
|
-
// -----------------------------------------------------------------
|
|
1450
|
-
// After tool results are appended, check whether the conversation
|
|
1451
|
-
// has grown past the context window threshold. If so, summarize
|
|
1452
|
-
// older messages to free up space for future turns.
|
|
1453
|
-
if (options.contextManager) {
|
|
1454
|
-
const toolTokens = llmTools.reduce(
|
|
1455
|
-
(sum, t) => sum + Math.ceil(JSON.stringify(t).length / 4),
|
|
1456
|
-
0
|
|
1457
|
-
);
|
|
1458
|
-
if (options.contextManager.shouldCompact(systemPrompt, messages, toolTokens)) {
|
|
1459
|
-
try {
|
|
1460
|
-
const compactResult = await runCompaction(messages, options.contextManager, {
|
|
1461
|
-
router,
|
|
1462
|
-
...(options.infraContext ? { infraContext: options.infraContext } : {}),
|
|
1463
|
-
});
|
|
1464
|
-
// Replace messages with the compacted version
|
|
1465
|
-
messages.length = 0;
|
|
1466
|
-
messages.push(...compactResult.messages);
|
|
1467
|
-
// Clear the token cache after compaction — old message entries are no longer valid
|
|
1468
|
-
options.contextManager.clearTokenCache();
|
|
1469
|
-
if (options.onCompact) {
|
|
1470
|
-
options.onCompact(compactResult.result);
|
|
1471
|
-
}
|
|
1472
|
-
} catch (compactErr) {
|
|
1473
|
-
// Compaction failed — notify user visibly and continue with original messages
|
|
1474
|
-
const compactErrMsg =
|
|
1475
|
-
compactErr instanceof Error ? compactErr.message : String(compactErr);
|
|
1476
|
-
if (onText) {
|
|
1477
|
-
onText(
|
|
1478
|
-
`\n[Warning: Auto-compaction failed: ${compactErrMsg}. Context may exceed budget on the next turn.]\n`
|
|
1479
|
-
);
|
|
1480
|
-
}
|
|
1481
|
-
}
|
|
1482
|
-
}
|
|
1483
|
-
}
|
|
1484
|
-
} catch (error: unknown) {
|
|
1485
|
-
// LLM API error — report to the caller and break
|
|
1486
|
-
const msg = error instanceof Error ? error.message : String(error);
|
|
1487
|
-
// G24: Network errors already printed via onText above — skip duplicate output
|
|
1488
|
-
const isNetworkErr = (error instanceof Error) && (error as Error & { _nimbusNetworkError?: boolean })._nimbusNetworkError;
|
|
1489
|
-
if (!isNetworkErr && onText) {
|
|
1490
|
-
onText(`\n[Error: ${msg}]\n`);
|
|
1491
|
-
}
|
|
1492
|
-
messages.push({
|
|
1493
|
-
role: 'assistant',
|
|
1494
|
-
content: isNetworkErr ? msg : `I encountered an error: ${msg}`,
|
|
1495
|
-
});
|
|
1496
|
-
break;
|
|
1497
|
-
}
|
|
1498
|
-
}
|
|
1499
|
-
|
|
1500
|
-
// -----------------------------------------------------------------------
|
|
1501
|
-
// 4. Post-loop bookkeeping
|
|
1502
|
-
// -----------------------------------------------------------------------
|
|
1503
|
-
|
|
1504
|
-
if (turns >= maxTurns && !interrupted) {
|
|
1505
|
-
if (onText) {
|
|
1506
|
-
onText(`\n[Agent reached maximum turns limit (${maxTurns}). Stopping.]\n`);
|
|
1507
|
-
}
|
|
1508
|
-
}
|
|
1509
|
-
|
|
1510
|
-
// GAP-19: Session summary after multi-step deploy
|
|
1511
|
-
if (options.mode === 'deploy' && options.onText) {
|
|
1512
|
-
// Collect tool calls from messages
|
|
1513
|
-
const allToolCalls: Array<{ name: string; input: Record<string, unknown> }> = [];
|
|
1514
|
-
for (const msg of messages) {
|
|
1515
|
-
if (msg.role === 'assistant' && Array.isArray((msg as {toolCalls?: unknown[]}).toolCalls)) {
|
|
1516
|
-
for (const tc of (msg as {toolCalls: Array<{function: {name: string; arguments: string}}>}).toolCalls) {
|
|
1517
|
-
try {
|
|
1518
|
-
allToolCalls.push({ name: tc.function.name, input: JSON.parse(tc.function.arguments) as Record<string, unknown> });
|
|
1519
|
-
} catch { /* ignore */ }
|
|
1520
|
-
}
|
|
1521
|
-
}
|
|
1522
|
-
}
|
|
1523
|
-
if (allToolCalls.length > 3) {
|
|
1524
|
-
const terraform = allToolCalls.filter(c => c.name === 'terraform');
|
|
1525
|
-
const kubectl = allToolCalls.filter(c => c.name === 'kubectl');
|
|
1526
|
-
const helm = allToolCalls.filter(c => c.name === 'helm');
|
|
1527
|
-
const summaryLines: string[] = ['---', '**Session Summary**'];
|
|
1528
|
-
if (terraform.length) summaryLines.push(`• Terraform: ${terraform.map(c => String(c.input.action ?? '')).join(', ')}`);
|
|
1529
|
-
if (kubectl.length) summaryLines.push(`• Kubectl: ${kubectl.map(c => String(c.input.action ?? '')).join(', ')}`);
|
|
1530
|
-
if (helm.length) summaryLines.push(`• Helm: ${helm.map(c => String(c.input.action ?? '')).join(', ')}`);
|
|
1531
|
-
if (summaryLines.length > 2) {
|
|
1532
|
-
options.onText('\n\n' + summaryLines.join('\n'));
|
|
1533
|
-
}
|
|
1534
|
-
}
|
|
1535
|
-
}
|
|
1536
|
-
|
|
1537
|
-
return {
|
|
1538
|
-
messages,
|
|
1539
|
-
turns,
|
|
1540
|
-
interrupted,
|
|
1541
|
-
usage: totalUsage,
|
|
1542
|
-
totalCost,
|
|
1543
|
-
};
|
|
1544
|
-
}
|
|
1545
|
-
|
|
1546
|
-
// ---------------------------------------------------------------------------
|
|
1547
|
-
// Tool Execution
|
|
1548
|
-
// ---------------------------------------------------------------------------
|
|
1549
|
-
|
|
1550
|
-
/** Tools that modify files and should trigger LSP diagnostics. */
|
|
1551
|
-
const FILE_EDITING_TOOLS = new Set(['edit_file', 'multi_edit', 'write_file']);
|
|
1552
|
-
|
|
1553
|
-
/** Tools that mutate files and may require a pre-approval diff. */
|
|
1554
|
-
const FILE_MUTATING_TOOLS = new Set(['edit_file', 'multi_edit', 'write_file']);
|
|
1555
|
-
|
|
1556
|
-
/**
|
|
1557
|
-
* Generate a simple unified diff between two strings.
|
|
1558
|
-
* Suitable for display; uses a greedy line-by-line approach.
|
|
1559
|
-
*/
|
|
1560
|
-
function generateUnifiedDiff(filename: string, before: string, after: string): string {
|
|
1561
|
-
const beforeLines = before.split('\n');
|
|
1562
|
-
const afterLines = after.split('\n');
|
|
1563
|
-
const lines: string[] = [`--- a/${filename}`, `+++ b/${filename}`];
|
|
1564
|
-
let i = 0;
|
|
1565
|
-
let j = 0;
|
|
1566
|
-
while (i < beforeLines.length || j < afterLines.length) {
|
|
1567
|
-
if (beforeLines[i] === afterLines[j]) {
|
|
1568
|
-
i++;
|
|
1569
|
-
j++;
|
|
1570
|
-
continue;
|
|
1571
|
-
}
|
|
1572
|
-
const hunkBefore: string[] = [];
|
|
1573
|
-
const hunkAfter: string[] = [];
|
|
1574
|
-
const start = i;
|
|
1575
|
-
while (i < beforeLines.length && beforeLines[i] !== afterLines[j]) {
|
|
1576
|
-
hunkBefore.push(beforeLines[i++]);
|
|
1577
|
-
}
|
|
1578
|
-
while (
|
|
1579
|
-
j < afterLines.length &&
|
|
1580
|
-
(i >= beforeLines.length || beforeLines[i] !== afterLines[j])
|
|
1581
|
-
) {
|
|
1582
|
-
hunkAfter.push(afterLines[j++]);
|
|
1583
|
-
}
|
|
1584
|
-
lines.push(
|
|
1585
|
-
`@@ -${start + 1},${hunkBefore.length} +${start + 1},${hunkAfter.length} @@`
|
|
1586
|
-
);
|
|
1587
|
-
hunkBefore.forEach(l => lines.push(`-${l}`));
|
|
1588
|
-
hunkAfter.forEach(l => lines.push(`+${l}`));
|
|
1589
|
-
}
|
|
1590
|
-
return lines.join('\n');
|
|
1591
|
-
}
|
|
1592
|
-
|
|
1593
|
-
/**
|
|
1594
|
-
* Compute a proposed diff for a file-mutating tool call without writing to disk.
|
|
1595
|
-
* Returns the unified diff string, or null if it cannot be computed.
|
|
1596
|
-
*/
|
|
1597
|
-
async function computeProposedDiff(
|
|
1598
|
-
toolName: string,
|
|
1599
|
-
args: Record<string, unknown>
|
|
1600
|
-
): Promise<string | null> {
|
|
1601
|
-
try {
|
|
1602
|
-
const { readFile } = await import('node:fs/promises');
|
|
1603
|
-
const path = args.path as string;
|
|
1604
|
-
if (!path) return null;
|
|
1605
|
-
const currentContent = await readFile(path, 'utf-8').catch(() => '');
|
|
1606
|
-
let proposed = currentContent;
|
|
1607
|
-
if (toolName === 'edit_file') {
|
|
1608
|
-
proposed = currentContent.replace(args.old_string as string, args.new_string as string);
|
|
1609
|
-
} else if (toolName === 'multi_edit') {
|
|
1610
|
-
const edits = args.edits as Array<{ old_string: string; new_string: string }>;
|
|
1611
|
-
if (Array.isArray(edits)) {
|
|
1612
|
-
for (const e of edits) {
|
|
1613
|
-
proposed = proposed.replace(e.old_string, e.new_string);
|
|
1614
|
-
}
|
|
1615
|
-
}
|
|
1616
|
-
} else if (toolName === 'write_file') {
|
|
1617
|
-
proposed = args.content as string;
|
|
1618
|
-
}
|
|
1619
|
-
if (proposed === currentContent) return null; // no change
|
|
1620
|
-
return generateUnifiedDiff(path, currentContent, proposed);
|
|
1621
|
-
} catch {
|
|
1622
|
-
return null;
|
|
1623
|
-
}
|
|
1624
|
-
}
|
|
1625
|
-
|
|
1626
|
-
/**
|
|
1627
|
-
* Extract the file path from a tool call's parsed arguments.
|
|
1628
|
-
*
|
|
1629
|
-
* File-editing tools all have a `path` parameter that identifies
|
|
1630
|
-
* the target file. Returns `null` for non-file tools.
|
|
1631
|
-
*/
|
|
1632
|
-
function extractFilePath(toolName: string, input: unknown): string | null {
|
|
1633
|
-
if (!FILE_EDITING_TOOLS.has(toolName)) {
|
|
1634
|
-
return null;
|
|
1635
|
-
}
|
|
1636
|
-
if (input && typeof input === 'object' && 'path' in input) {
|
|
1637
|
-
return (input as { path: string }).path;
|
|
1638
|
-
}
|
|
1639
|
-
return null;
|
|
1640
|
-
}
|
|
1641
|
-
|
|
1642
|
-
/**
|
|
1643
|
-
* Execute a single tool call.
|
|
1644
|
-
*
|
|
1645
|
-
* Handles:
|
|
1646
|
-
* - Looking up the tool in the registry.
|
|
1647
|
-
* - Parsing the JSON arguments string from the LLM response.
|
|
1648
|
-
* - Validating input against the Zod schema.
|
|
1649
|
-
* - Checking permissions via the caller-supplied callback.
|
|
1650
|
-
* - Invoking the tool and returning the result.
|
|
1651
|
-
* - Notifying start/end callbacks.
|
|
1652
|
-
* - Querying the LSP for diagnostics after file edits.
|
|
1653
|
-
*
|
|
1654
|
-
* @param toolCall - The raw tool call from the LLM response.
|
|
1655
|
-
* @param registry - The tool registry to look up the tool definition.
|
|
1656
|
-
* @param onStart - Optional callback fired before execution.
|
|
1657
|
-
* @param onEnd - Optional callback fired after execution (or error).
|
|
1658
|
-
* @param checkPermission - Optional permission gate.
|
|
1659
|
-
* @param lspManager - Optional LSP manager for post-edit diagnostics.
|
|
1660
|
-
* @returns The tool result (always succeeds; errors are captured inside the result).
|
|
1661
|
-
*/
|
|
1662
|
-
async function executeToolCall(
|
|
1663
|
-
toolCall: ToolCall,
|
|
1664
|
-
registry: ToolRegistry,
|
|
1665
|
-
onStart?: (info: ToolCallInfo) => void,
|
|
1666
|
-
onEnd?: (info: ToolCallInfo, result: ToolResult) => void,
|
|
1667
|
-
checkPermission?: (tool: ToolDefinition, input: unknown) => Promise<PermissionDecision>,
|
|
1668
|
-
lspManager?: LSPManager,
|
|
1669
|
-
snapshotManager?: SnapshotManager,
|
|
1670
|
-
sessionId?: string,
|
|
1671
|
-
signal?: AbortSignal,
|
|
1672
|
-
hookEngine?: HookEngine,
|
|
1673
|
-
mode?: AgentMode,
|
|
1674
|
-
requestFileDiff?: (path: string, toolName: string, diff: string) => Promise<FileDiffDecision>,
|
|
1675
|
-
skipRemainingDiffPrompts?: { value: boolean },
|
|
1676
|
-
rejectRemainingDiffPrompts?: { value: boolean },
|
|
1677
|
-
onChunk?: (chunk: string) => void,
|
|
1678
|
-
toolTimeouts?: Record<string, number>,
|
|
1679
|
-
infraContext?: import('../sessions/manager').SessionInfraContext
|
|
1680
|
-
): Promise<ToolResult> {
|
|
1681
|
-
const toolName = toolCall.function.name;
|
|
1682
|
-
|
|
1683
|
-
// Parse the JSON arguments string from the LLM
|
|
1684
|
-
let parsedArgs: unknown;
|
|
1685
|
-
try {
|
|
1686
|
-
parsedArgs = JSON.parse(toolCall.function.arguments);
|
|
1687
|
-
} catch {
|
|
1688
|
-
const result: ToolResult = {
|
|
1689
|
-
output: '',
|
|
1690
|
-
error: `Tool '${toolName}' received malformed JSON arguments — please retry the tool call with valid JSON. Received: ${toolCall.function.arguments.slice(0, 200)}`,
|
|
1691
|
-
isError: true,
|
|
1692
|
-
};
|
|
1693
|
-
return result;
|
|
1694
|
-
}
|
|
1695
|
-
|
|
1696
|
-
const callInfo: ToolCallInfo = {
|
|
1697
|
-
id: toolCall.id,
|
|
1698
|
-
name: toolName,
|
|
1699
|
-
input: parsedArgs,
|
|
1700
|
-
startTime: Date.now(),
|
|
1701
|
-
};
|
|
1702
|
-
|
|
1703
|
-
// Look up the tool definition
|
|
1704
|
-
const tool = registry.get(toolName);
|
|
1705
|
-
if (!tool) {
|
|
1706
|
-
const result: ToolResult = {
|
|
1707
|
-
output: '',
|
|
1708
|
-
error: `Unknown tool: ${toolName}`,
|
|
1709
|
-
isError: true,
|
|
1710
|
-
};
|
|
1711
|
-
if (onEnd) {
|
|
1712
|
-
onEnd(callInfo, result);
|
|
1713
|
-
}
|
|
1714
|
-
return result;
|
|
1715
|
-
}
|
|
1716
|
-
|
|
1717
|
-
// Notify start
|
|
1718
|
-
if (onStart) {
|
|
1719
|
-
onStart(callInfo);
|
|
1720
|
-
}
|
|
1721
|
-
|
|
1722
|
-
// Build shared hook context for PreToolUse and PostToolUse
|
|
1723
|
-
const hookContext: HookContext = {
|
|
1724
|
-
tool: toolName,
|
|
1725
|
-
input: parsedArgs && typeof parsedArgs === 'object' ? (parsedArgs as Record<string, unknown>) : {},
|
|
1726
|
-
sessionId: sessionId ?? 'default',
|
|
1727
|
-
agent: mode ?? 'build',
|
|
1728
|
-
timestamp: new Date().toISOString(),
|
|
1729
|
-
};
|
|
1730
|
-
|
|
1731
|
-
// PreToolUse hooks — may block the tool call
|
|
1732
|
-
if (hookEngine) {
|
|
1733
|
-
const preResult = await runPreToolHooks(hookEngine, hookContext);
|
|
1734
|
-
if (!preResult.allowed) {
|
|
1735
|
-
const result: ToolResult = {
|
|
1736
|
-
output: '',
|
|
1737
|
-
error: `Tool '${toolName}' blocked by hook: ${preResult.message ?? 'no reason given'}`,
|
|
1738
|
-
isError: true,
|
|
1739
|
-
};
|
|
1740
|
-
if (onEnd) {
|
|
1741
|
-
onEnd(callInfo, result);
|
|
1742
|
-
}
|
|
1743
|
-
return result;
|
|
1744
|
-
}
|
|
1745
|
-
}
|
|
1746
|
-
|
|
1747
|
-
// Permission check
|
|
1748
|
-
if (checkPermission) {
|
|
1749
|
-
const decision = await checkPermission(tool, parsedArgs);
|
|
1750
|
-
if (decision === 'deny' || decision === 'block') {
|
|
1751
|
-
const result: ToolResult = {
|
|
1752
|
-
output: '',
|
|
1753
|
-
error:
|
|
1754
|
-
decision === 'block'
|
|
1755
|
-
? `Tool '${toolName}' is blocked by permission policy.`
|
|
1756
|
-
: `User denied permission for tool '${toolName}'.`,
|
|
1757
|
-
isError: true,
|
|
1758
|
-
};
|
|
1759
|
-
if (onEnd) {
|
|
1760
|
-
onEnd(callInfo, result);
|
|
1761
|
-
}
|
|
1762
|
-
return result;
|
|
1763
|
-
}
|
|
1764
|
-
}
|
|
1765
|
-
|
|
1766
|
-
// B1: Pre-approval diff — show proposed change before writing files
|
|
1767
|
-
if (
|
|
1768
|
-
FILE_MUTATING_TOOLS.has(toolName) &&
|
|
1769
|
-
requestFileDiff &&
|
|
1770
|
-
!(skipRemainingDiffPrompts?.value)
|
|
1771
|
-
) {
|
|
1772
|
-
// Auto-reject if 'reject-all' was previously chosen
|
|
1773
|
-
if (rejectRemainingDiffPrompts?.value) {
|
|
1774
|
-
const rejResult: ToolResult = {
|
|
1775
|
-
output: 'User rejected this change (reject-all).',
|
|
1776
|
-
error: undefined,
|
|
1777
|
-
isError: false,
|
|
1778
|
-
};
|
|
1779
|
-
if (onEnd) onEnd(callInfo, rejResult);
|
|
1780
|
-
return rejResult;
|
|
1781
|
-
}
|
|
1782
|
-
|
|
1783
|
-
const diff = await computeProposedDiff(toolName, parsedArgs as Record<string, unknown>);
|
|
1784
|
-
if (diff) {
|
|
1785
|
-
const targetPath =
|
|
1786
|
-
(parsedArgs as Record<string, unknown>).path as string | undefined ?? '(file)';
|
|
1787
|
-
const decision = await requestFileDiff(targetPath, toolName, diff);
|
|
1788
|
-
if (decision === 'reject') {
|
|
1789
|
-
const rejResult: ToolResult = {
|
|
1790
|
-
output: 'User rejected this change.',
|
|
1791
|
-
error: undefined,
|
|
1792
|
-
isError: false,
|
|
1793
|
-
};
|
|
1794
|
-
if (onEnd) onEnd(callInfo, rejResult);
|
|
1795
|
-
return rejResult;
|
|
1796
|
-
}
|
|
1797
|
-
if (decision === 'reject-all') {
|
|
1798
|
-
if (rejectRemainingDiffPrompts) {
|
|
1799
|
-
rejectRemainingDiffPrompts.value = true;
|
|
1800
|
-
}
|
|
1801
|
-
const rejResult: ToolResult = {
|
|
1802
|
-
output: 'User rejected this change (reject-all).',
|
|
1803
|
-
error: undefined,
|
|
1804
|
-
isError: false,
|
|
1805
|
-
};
|
|
1806
|
-
if (onEnd) onEnd(callInfo, rejResult);
|
|
1807
|
-
return rejResult;
|
|
1808
|
-
}
|
|
1809
|
-
if (decision === 'apply-all' && skipRemainingDiffPrompts) {
|
|
1810
|
-
skipRemainingDiffPrompts.value = true;
|
|
1811
|
-
}
|
|
1812
|
-
}
|
|
1813
|
-
}
|
|
1814
|
-
|
|
1815
|
-
// Capture snapshot before file-modifying tools for undo/redo support
|
|
1816
|
-
if (
|
|
1817
|
-
snapshotManager &&
|
|
1818
|
-
SnapshotManager.shouldSnapshot(toolName, parsedArgs as Record<string, unknown>)
|
|
1819
|
-
) {
|
|
1820
|
-
try {
|
|
1821
|
-
await snapshotManager.captureSnapshot({
|
|
1822
|
-
sessionId: sessionId || 'default',
|
|
1823
|
-
messageId: toolCall.id,
|
|
1824
|
-
toolCallId: toolCall.id,
|
|
1825
|
-
description: `${toolName}: ${extractFilePath(toolName, parsedArgs) || '(bash command)'}`,
|
|
1826
|
-
});
|
|
1827
|
-
} catch {
|
|
1828
|
-
// Snapshot failure should never block the tool call
|
|
1829
|
-
}
|
|
1830
|
-
}
|
|
1831
|
-
|
|
1832
|
-
// Validate input against the tool's Zod schema and execute
|
|
1833
|
-
let result: ToolResult;
|
|
1834
|
-
try {
|
|
1835
|
-
const validatedInput = tool.inputSchema.parse(parsedArgs);
|
|
1836
|
-
|
|
1837
|
-
// Thread AbortSignal into bash tool for Ctrl+C child process killing
|
|
1838
|
-
if (signal && toolName === 'bash' && validatedInput && typeof validatedInput === 'object') {
|
|
1839
|
-
(validatedInput as Record<string, unknown>)._signal = signal;
|
|
1840
|
-
}
|
|
1841
|
-
|
|
1842
|
-
// GAP-20: Build tool execute context, including per-tool timeout from toolTimeouts map
|
|
1843
|
-
// C2: Also pass infraContext from session so tools can use it as fallback
|
|
1844
|
-
const toolCtx: ToolExecuteContext | undefined = onChunk || toolTimeouts?.[toolName] || infraContext
|
|
1845
|
-
? {
|
|
1846
|
-
...(onChunk ? { onProgress: onChunk } : {}),
|
|
1847
|
-
...(toolTimeouts?.[toolName] !== undefined ? { timeout: toolTimeouts[toolName] } : {}),
|
|
1848
|
-
...(infraContext ? { infraContext } : {}),
|
|
1849
|
-
}
|
|
1850
|
-
: undefined;
|
|
1851
|
-
// C2: Write infra checkpoint before mutating terraform/helm operations
|
|
1852
|
-
if (toolName === 'terraform' || toolName === 'helm') {
|
|
1853
|
-
const _cpArgs = parsedArgs && typeof parsedArgs === 'object'
|
|
1854
|
-
? (parsedArgs as Record<string, unknown>)
|
|
1855
|
-
: {};
|
|
1856
|
-
const _cpAction = String(_cpArgs.action ?? '');
|
|
1857
|
-
const _cpNeedCheckpoint =
|
|
1858
|
-
(toolName === 'terraform' && _cpAction === 'apply') ||
|
|
1859
|
-
(toolName === 'helm' && ['install', 'upgrade', 'rollback'].includes(_cpAction));
|
|
1860
|
-
if (_cpNeedCheckpoint) {
|
|
1861
|
-
writeInfraCheckpoint(toolName, _cpAction, _cpArgs);
|
|
1862
|
-
}
|
|
1863
|
-
}
|
|
1864
|
-
result = await tool.execute(validatedInput, toolCtx);
|
|
1865
|
-
} catch (error: unknown) {
|
|
1866
|
-
result = {
|
|
1867
|
-
output: '',
|
|
1868
|
-
error: formatToolInputError(toolName, error),
|
|
1869
|
-
isError: true,
|
|
1870
|
-
};
|
|
1871
|
-
}
|
|
1872
|
-
|
|
1873
|
-
// -----------------------------------------------------------------------
|
|
1874
|
-
// LSP diagnostics injection
|
|
1875
|
-
// -----------------------------------------------------------------------
|
|
1876
|
-
// After a successful file edit, notify the language server and collect
|
|
1877
|
-
// any diagnostics (type errors, lint issues). If errors exist they are
|
|
1878
|
-
// appended to the tool output so the LLM sees them on its next turn
|
|
1879
|
-
// and can self-correct.
|
|
1880
|
-
if (lspManager && !result.isError) {
|
|
1881
|
-
const filePath = extractFilePath(toolName, parsedArgs);
|
|
1882
|
-
if (filePath) {
|
|
1883
|
-
try {
|
|
1884
|
-
await lspManager.touchFile(filePath);
|
|
1885
|
-
const diagnostics = await lspManager.getDiagnostics(filePath);
|
|
1886
|
-
if (diagnostics.length > 0) {
|
|
1887
|
-
const formatted = lspManager.formatDiagnosticsForAgent(diagnostics);
|
|
1888
|
-
if (formatted) {
|
|
1889
|
-
result = {
|
|
1890
|
-
...result,
|
|
1891
|
-
output: result.output ? `${result.output}\n\n${formatted}` : formatted,
|
|
1892
|
-
};
|
|
1893
|
-
}
|
|
1894
|
-
}
|
|
1895
|
-
} catch (lspErr) {
|
|
1896
|
-
// LSP errors should never block the agent loop.
|
|
1897
|
-
// Append a note to the tool result so the LLM (and user) can see it.
|
|
1898
|
-
const lspErrMsg = lspErr instanceof Error ? lspErr.message : String(lspErr);
|
|
1899
|
-
result = {
|
|
1900
|
-
...result,
|
|
1901
|
-
output: result.output
|
|
1902
|
-
? `${result.output}\n\n[Note: LSP diagnostics unavailable: ${lspErrMsg}]`
|
|
1903
|
-
: `[Note: LSP diagnostics unavailable: ${lspErrMsg}]`,
|
|
1904
|
-
};
|
|
1905
|
-
}
|
|
1906
|
-
}
|
|
1907
|
-
}
|
|
1908
|
-
|
|
1909
|
-
// Gap 12: Mask secrets in tool output before forwarding to callbacks/history
|
|
1910
|
-
if (!result.isError && result.output) {
|
|
1911
|
-
result = { ...result, output: maskSecrets(result.output) };
|
|
1912
|
-
}
|
|
1913
|
-
|
|
1914
|
-
// PostToolUse hooks — fire-and-forget (audit, auto-format, etc.)
|
|
1915
|
-
if (hookEngine) {
|
|
1916
|
-
await runPostToolHooks(hookEngine, {
|
|
1917
|
-
...hookContext,
|
|
1918
|
-
result: {
|
|
1919
|
-
output: result.isError ? (result.error ?? '') : result.output,
|
|
1920
|
-
isError: result.isError,
|
|
1921
|
-
},
|
|
1922
|
-
});
|
|
1923
|
-
}
|
|
1924
|
-
|
|
1925
|
-
// Notify end
|
|
1926
|
-
if (onEnd) {
|
|
1927
|
-
onEnd(callInfo, result);
|
|
1928
|
-
}
|
|
1929
|
-
|
|
1930
|
-
return result;
|
|
1931
|
-
}
|
|
1932
|
-
|
|
1933
|
-
// ---------------------------------------------------------------------------
|
|
1934
|
-
// Mode-Based Tool Filtering
|
|
1935
|
-
// ---------------------------------------------------------------------------
|
|
1936
|
-
|
|
1937
|
-
/**
|
|
1938
|
-
* Set of tool names allowed in `plan` mode.
|
|
1939
|
-
*
|
|
1940
|
-
* Plan mode is strictly read-only: the agent can inspect files, search
|
|
1941
|
-
* the codebase, read tasks, estimate costs, and detect drift -- but it
|
|
1942
|
-
* cannot write files, run commands, or mutate infrastructure.
|
|
1943
|
-
*/
|
|
1944
|
-
const PLAN_MODE_TOOLS = new Set([
|
|
1945
|
-
'read_file',
|
|
1946
|
-
'glob',
|
|
1947
|
-
'grep',
|
|
1948
|
-
'list_dir',
|
|
1949
|
-
'webfetch',
|
|
1950
|
-
'todo_read',
|
|
1951
|
-
'todo_write',
|
|
1952
|
-
'task',
|
|
1953
|
-
'cost_estimate',
|
|
1954
|
-
'drift_detect',
|
|
1955
|
-
'cloud_discover',
|
|
1956
|
-
]);
|
|
1957
|
-
|
|
1958
|
-
/**
|
|
1959
|
-
* Set of tool names blocked in `build` mode.
|
|
1960
|
-
*
|
|
1961
|
-
* Build mode allows reads and writes (file edits, code generation) but
|
|
1962
|
-
* blocks infrastructure-mutating operations that could affect live
|
|
1963
|
-
* environments. The permission engine provides fine-grained control on
|
|
1964
|
-
* top of this coarse filter.
|
|
1965
|
-
*/
|
|
1966
|
-
const BUILD_MODE_BLOCKED_TOOLS = new Set(['terraform', 'kubectl', 'helm']);
|
|
1967
|
-
|
|
1968
|
-
/**
|
|
1969
|
-
* Filter tools based on the current agent mode.
|
|
1970
|
-
*
|
|
1971
|
-
* - **plan**: Only read-only tools + cost/drift analysis.
|
|
1972
|
-
* - **build**: All tools except infrastructure mutation commands.
|
|
1973
|
-
* - **deploy**: All tools are available.
|
|
1974
|
-
*
|
|
1975
|
-
* @param allTools - Every tool registered in the system.
|
|
1976
|
-
* @param mode - The active agent mode.
|
|
1977
|
-
* @returns The subset of tools available in the given mode.
|
|
1978
|
-
*/
|
|
1979
|
-
export function getToolsForMode(allTools: ToolDefinition[], mode: AgentMode): ToolDefinition[] {
|
|
1980
|
-
switch (mode) {
|
|
1981
|
-
case 'plan':
|
|
1982
|
-
return allTools.filter(t => PLAN_MODE_TOOLS.has(t.name));
|
|
1983
|
-
|
|
1984
|
-
case 'build':
|
|
1985
|
-
return allTools.filter(t => !BUILD_MODE_BLOCKED_TOOLS.has(t.name));
|
|
1986
|
-
|
|
1987
|
-
case 'deploy':
|
|
1988
|
-
// All tools available
|
|
1989
|
-
return allTools;
|
|
1990
|
-
|
|
1991
|
-
default: {
|
|
1992
|
-
// Exhaustive check -- if a new mode is added this becomes a compile
|
|
1993
|
-
// error (assuming AgentMode is a union type).
|
|
1994
|
-
const _exhaustive: never = mode;
|
|
1995
|
-
return allTools;
|
|
1996
|
-
}
|
|
1997
|
-
}
|
|
1998
|
-
}
|