@build-astron-co/nimbus 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (313) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +628 -0
  3. package/bin/nimbus +38 -0
  4. package/package.json +80 -0
  5. package/src/__tests__/app.test.ts +76 -0
  6. package/src/__tests__/audit.test.ts +877 -0
  7. package/src/__tests__/circuit-breaker.test.ts +116 -0
  8. package/src/__tests__/cli-run.test.ts +115 -0
  9. package/src/__tests__/context-manager.test.ts +502 -0
  10. package/src/__tests__/context.test.ts +242 -0
  11. package/src/__tests__/enterprise.test.ts +401 -0
  12. package/src/__tests__/generator.test.ts +433 -0
  13. package/src/__tests__/hooks.test.ts +582 -0
  14. package/src/__tests__/init.test.ts +436 -0
  15. package/src/__tests__/intent-parser.test.ts +229 -0
  16. package/src/__tests__/llm-router.test.ts +209 -0
  17. package/src/__tests__/lsp.test.ts +293 -0
  18. package/src/__tests__/modes.test.ts +336 -0
  19. package/src/__tests__/permissions.test.ts +338 -0
  20. package/src/__tests__/serve.test.ts +275 -0
  21. package/src/__tests__/sessions.test.ts +227 -0
  22. package/src/__tests__/sharing.test.ts +288 -0
  23. package/src/__tests__/snapshots.test.ts +581 -0
  24. package/src/__tests__/state-db.test.ts +334 -0
  25. package/src/__tests__/stream-with-tools.test.ts +732 -0
  26. package/src/__tests__/subagents.test.ts +176 -0
  27. package/src/__tests__/system-prompt.test.ts +169 -0
  28. package/src/__tests__/tool-converter.test.ts +256 -0
  29. package/src/__tests__/tool-schemas.test.ts +397 -0
  30. package/src/__tests__/tools.test.ts +143 -0
  31. package/src/__tests__/version.test.ts +49 -0
  32. package/src/agent/compaction-agent.ts +227 -0
  33. package/src/agent/context-manager.ts +435 -0
  34. package/src/agent/context.ts +427 -0
  35. package/src/agent/deploy-preview.ts +426 -0
  36. package/src/agent/index.ts +68 -0
  37. package/src/agent/loop.ts +717 -0
  38. package/src/agent/modes.ts +429 -0
  39. package/src/agent/permissions.ts +466 -0
  40. package/src/agent/subagents/base.ts +116 -0
  41. package/src/agent/subagents/cost.ts +51 -0
  42. package/src/agent/subagents/explore.ts +42 -0
  43. package/src/agent/subagents/general.ts +54 -0
  44. package/src/agent/subagents/index.ts +102 -0
  45. package/src/agent/subagents/infra.ts +59 -0
  46. package/src/agent/subagents/security.ts +69 -0
  47. package/src/agent/system-prompt.ts +436 -0
  48. package/src/app.ts +122 -0
  49. package/src/audit/activity-log.ts +290 -0
  50. package/src/audit/compliance-checker.ts +540 -0
  51. package/src/audit/cost-tracker.ts +318 -0
  52. package/src/audit/index.ts +23 -0
  53. package/src/audit/security-scanner.ts +596 -0
  54. package/src/auth/guard.ts +75 -0
  55. package/src/auth/index.ts +56 -0
  56. package/src/auth/oauth.ts +455 -0
  57. package/src/auth/providers.ts +470 -0
  58. package/src/auth/sso.ts +113 -0
  59. package/src/auth/store.ts +505 -0
  60. package/src/auth/types.ts +187 -0
  61. package/src/build.ts +141 -0
  62. package/src/cli/index.ts +16 -0
  63. package/src/cli/init.ts +854 -0
  64. package/src/cli/openapi-spec.ts +356 -0
  65. package/src/cli/run.ts +237 -0
  66. package/src/cli/serve-auth.ts +80 -0
  67. package/src/cli/serve.ts +462 -0
  68. package/src/cli/web.ts +67 -0
  69. package/src/cli.ts +1417 -0
  70. package/src/clients/core-engine-client.ts +227 -0
  71. package/src/clients/enterprise-client.ts +334 -0
  72. package/src/clients/generator-client.ts +351 -0
  73. package/src/clients/git-client.ts +627 -0
  74. package/src/clients/github-client.ts +410 -0
  75. package/src/clients/helm-client.ts +504 -0
  76. package/src/clients/index.ts +80 -0
  77. package/src/clients/k8s-client.ts +497 -0
  78. package/src/clients/llm-client.ts +161 -0
  79. package/src/clients/rest-client.ts +130 -0
  80. package/src/clients/service-discovery.ts +33 -0
  81. package/src/clients/terraform-client.ts +482 -0
  82. package/src/clients/tools-client.ts +1843 -0
  83. package/src/clients/ws-client.ts +115 -0
  84. package/src/commands/analyze/index.ts +352 -0
  85. package/src/commands/apply/helm.ts +473 -0
  86. package/src/commands/apply/index.ts +213 -0
  87. package/src/commands/apply/k8s.ts +454 -0
  88. package/src/commands/apply/terraform.ts +582 -0
  89. package/src/commands/ask.ts +167 -0
  90. package/src/commands/audit/index.ts +238 -0
  91. package/src/commands/auth-cloud.ts +294 -0
  92. package/src/commands/auth-list.ts +134 -0
  93. package/src/commands/auth-profile.ts +121 -0
  94. package/src/commands/auth-status.ts +141 -0
  95. package/src/commands/aws/ec2.ts +501 -0
  96. package/src/commands/aws/iam.ts +397 -0
  97. package/src/commands/aws/index.ts +133 -0
  98. package/src/commands/aws/lambda.ts +396 -0
  99. package/src/commands/aws/rds.ts +439 -0
  100. package/src/commands/aws/s3.ts +439 -0
  101. package/src/commands/aws/vpc.ts +393 -0
  102. package/src/commands/aws-discover.ts +649 -0
  103. package/src/commands/aws-terraform.ts +805 -0
  104. package/src/commands/azure/aks.ts +376 -0
  105. package/src/commands/azure/functions.ts +253 -0
  106. package/src/commands/azure/index.ts +116 -0
  107. package/src/commands/azure/storage.ts +478 -0
  108. package/src/commands/azure/vm.ts +355 -0
  109. package/src/commands/billing/index.ts +256 -0
  110. package/src/commands/chat.ts +314 -0
  111. package/src/commands/config.ts +346 -0
  112. package/src/commands/cost/cloud-cost-estimator.ts +266 -0
  113. package/src/commands/cost/estimator.ts +79 -0
  114. package/src/commands/cost/index.ts +594 -0
  115. package/src/commands/cost/parsers/terraform.ts +273 -0
  116. package/src/commands/cost/parsers/types.ts +25 -0
  117. package/src/commands/cost/pricing/aws.ts +544 -0
  118. package/src/commands/cost/pricing/azure.ts +499 -0
  119. package/src/commands/cost/pricing/gcp.ts +396 -0
  120. package/src/commands/cost/pricing/index.ts +40 -0
  121. package/src/commands/demo.ts +250 -0
  122. package/src/commands/doctor.ts +794 -0
  123. package/src/commands/drift/index.ts +439 -0
  124. package/src/commands/explain.ts +277 -0
  125. package/src/commands/feedback.ts +389 -0
  126. package/src/commands/fix.ts +324 -0
  127. package/src/commands/fs/index.ts +402 -0
  128. package/src/commands/gcp/compute.ts +325 -0
  129. package/src/commands/gcp/functions.ts +271 -0
  130. package/src/commands/gcp/gke.ts +438 -0
  131. package/src/commands/gcp/iam.ts +344 -0
  132. package/src/commands/gcp/index.ts +129 -0
  133. package/src/commands/gcp/storage.ts +284 -0
  134. package/src/commands/generate-helm.ts +1249 -0
  135. package/src/commands/generate-k8s.ts +1560 -0
  136. package/src/commands/generate-terraform.ts +1460 -0
  137. package/src/commands/gh/index.ts +863 -0
  138. package/src/commands/git/index.ts +1343 -0
  139. package/src/commands/helm/index.ts +1126 -0
  140. package/src/commands/help.ts +539 -0
  141. package/src/commands/history.ts +142 -0
  142. package/src/commands/import.ts +868 -0
  143. package/src/commands/index.ts +367 -0
  144. package/src/commands/init.ts +1046 -0
  145. package/src/commands/k8s/index.ts +1137 -0
  146. package/src/commands/login.ts +631 -0
  147. package/src/commands/logout.ts +83 -0
  148. package/src/commands/onboarding.ts +228 -0
  149. package/src/commands/plan/display.ts +279 -0
  150. package/src/commands/plan/index.ts +599 -0
  151. package/src/commands/preview.ts +452 -0
  152. package/src/commands/questionnaire.ts +1270 -0
  153. package/src/commands/resume.ts +55 -0
  154. package/src/commands/team/index.ts +346 -0
  155. package/src/commands/template.ts +232 -0
  156. package/src/commands/tf/index.ts +1034 -0
  157. package/src/commands/upgrade.ts +550 -0
  158. package/src/commands/usage/index.ts +134 -0
  159. package/src/commands/version.ts +170 -0
  160. package/src/compat/index.ts +2 -0
  161. package/src/compat/runtime.ts +12 -0
  162. package/src/compat/sqlite.ts +107 -0
  163. package/src/config/index.ts +17 -0
  164. package/src/config/manager.ts +530 -0
  165. package/src/config/safety-policy.ts +358 -0
  166. package/src/config/schema.ts +125 -0
  167. package/src/config/types.ts +527 -0
  168. package/src/context/context-db.ts +199 -0
  169. package/src/demo/index.ts +349 -0
  170. package/src/demo/scenarios/full-journey.ts +229 -0
  171. package/src/demo/scenarios/getting-started.ts +127 -0
  172. package/src/demo/scenarios/helm-release.ts +341 -0
  173. package/src/demo/scenarios/k8s-deployment.ts +194 -0
  174. package/src/demo/scenarios/terraform-vpc.ts +170 -0
  175. package/src/demo/types.ts +92 -0
  176. package/src/engine/cost-estimator.ts +438 -0
  177. package/src/engine/diagram-generator.ts +256 -0
  178. package/src/engine/drift-detector.ts +902 -0
  179. package/src/engine/executor.ts +1035 -0
  180. package/src/engine/index.ts +76 -0
  181. package/src/engine/orchestrator.ts +636 -0
  182. package/src/engine/planner.ts +720 -0
  183. package/src/engine/safety.ts +743 -0
  184. package/src/engine/verifier.ts +770 -0
  185. package/src/enterprise/audit.ts +348 -0
  186. package/src/enterprise/auth.ts +270 -0
  187. package/src/enterprise/billing.ts +822 -0
  188. package/src/enterprise/index.ts +17 -0
  189. package/src/enterprise/teams.ts +443 -0
  190. package/src/generator/best-practices.ts +1608 -0
  191. package/src/generator/helm.ts +630 -0
  192. package/src/generator/index.ts +37 -0
  193. package/src/generator/intent-parser.ts +514 -0
  194. package/src/generator/kubernetes.ts +976 -0
  195. package/src/generator/terraform.ts +1867 -0
  196. package/src/history/index.ts +8 -0
  197. package/src/history/manager.ts +322 -0
  198. package/src/history/types.ts +34 -0
  199. package/src/hooks/config.ts +432 -0
  200. package/src/hooks/engine.ts +391 -0
  201. package/src/hooks/index.ts +4 -0
  202. package/src/llm/auth-bridge.ts +198 -0
  203. package/src/llm/circuit-breaker.ts +140 -0
  204. package/src/llm/config-loader.ts +201 -0
  205. package/src/llm/cost-calculator.ts +171 -0
  206. package/src/llm/index.ts +8 -0
  207. package/src/llm/model-aliases.ts +115 -0
  208. package/src/llm/provider-registry.ts +63 -0
  209. package/src/llm/providers/anthropic.ts +433 -0
  210. package/src/llm/providers/bedrock.ts +477 -0
  211. package/src/llm/providers/google.ts +405 -0
  212. package/src/llm/providers/ollama.ts +767 -0
  213. package/src/llm/providers/openai-compatible.ts +340 -0
  214. package/src/llm/providers/openai.ts +328 -0
  215. package/src/llm/providers/openrouter.ts +338 -0
  216. package/src/llm/router.ts +1035 -0
  217. package/src/llm/types.ts +232 -0
  218. package/src/lsp/client.ts +298 -0
  219. package/src/lsp/languages.ts +116 -0
  220. package/src/lsp/manager.ts +278 -0
  221. package/src/mcp/client.ts +402 -0
  222. package/src/mcp/index.ts +5 -0
  223. package/src/mcp/manager.ts +133 -0
  224. package/src/nimbus.ts +214 -0
  225. package/src/plugins/index.ts +27 -0
  226. package/src/plugins/loader.ts +334 -0
  227. package/src/plugins/manager.ts +376 -0
  228. package/src/plugins/types.ts +284 -0
  229. package/src/scanners/cicd-scanner.ts +258 -0
  230. package/src/scanners/cloud-scanner.ts +466 -0
  231. package/src/scanners/framework-scanner.ts +469 -0
  232. package/src/scanners/iac-scanner.ts +388 -0
  233. package/src/scanners/index.ts +539 -0
  234. package/src/scanners/language-scanner.ts +276 -0
  235. package/src/scanners/package-manager-scanner.ts +277 -0
  236. package/src/scanners/types.ts +172 -0
  237. package/src/sessions/manager.ts +365 -0
  238. package/src/sessions/types.ts +44 -0
  239. package/src/sharing/sync.ts +296 -0
  240. package/src/sharing/viewer.ts +97 -0
  241. package/src/snapshots/index.ts +2 -0
  242. package/src/snapshots/manager.ts +530 -0
  243. package/src/state/artifacts.ts +147 -0
  244. package/src/state/audit.ts +137 -0
  245. package/src/state/billing.ts +240 -0
  246. package/src/state/checkpoints.ts +117 -0
  247. package/src/state/config.ts +67 -0
  248. package/src/state/conversations.ts +14 -0
  249. package/src/state/credentials.ts +154 -0
  250. package/src/state/db.ts +58 -0
  251. package/src/state/index.ts +26 -0
  252. package/src/state/messages.ts +115 -0
  253. package/src/state/projects.ts +123 -0
  254. package/src/state/schema.ts +236 -0
  255. package/src/state/sessions.ts +147 -0
  256. package/src/state/teams.ts +200 -0
  257. package/src/telemetry.ts +108 -0
  258. package/src/tools/aws-ops.ts +952 -0
  259. package/src/tools/azure-ops.ts +579 -0
  260. package/src/tools/file-ops.ts +593 -0
  261. package/src/tools/gcp-ops.ts +625 -0
  262. package/src/tools/git-ops.ts +773 -0
  263. package/src/tools/github-ops.ts +799 -0
  264. package/src/tools/helm-ops.ts +943 -0
  265. package/src/tools/index.ts +17 -0
  266. package/src/tools/k8s-ops.ts +819 -0
  267. package/src/tools/schemas/converter.ts +184 -0
  268. package/src/tools/schemas/devops.ts +612 -0
  269. package/src/tools/schemas/index.ts +73 -0
  270. package/src/tools/schemas/standard.ts +1144 -0
  271. package/src/tools/schemas/types.ts +705 -0
  272. package/src/tools/terraform-ops.ts +862 -0
  273. package/src/types/ambient.d.ts +193 -0
  274. package/src/types/config.ts +83 -0
  275. package/src/types/drift.ts +116 -0
  276. package/src/types/enterprise.ts +335 -0
  277. package/src/types/index.ts +20 -0
  278. package/src/types/plan.ts +44 -0
  279. package/src/types/request.ts +65 -0
  280. package/src/types/response.ts +54 -0
  281. package/src/types/service.ts +51 -0
  282. package/src/ui/App.tsx +997 -0
  283. package/src/ui/DeployPreview.tsx +169 -0
  284. package/src/ui/Header.tsx +68 -0
  285. package/src/ui/InputBox.tsx +350 -0
  286. package/src/ui/MessageList.tsx +585 -0
  287. package/src/ui/PermissionPrompt.tsx +151 -0
  288. package/src/ui/StatusBar.tsx +158 -0
  289. package/src/ui/ToolCallDisplay.tsx +409 -0
  290. package/src/ui/chat-ui.ts +853 -0
  291. package/src/ui/index.ts +33 -0
  292. package/src/ui/ink/index.ts +711 -0
  293. package/src/ui/streaming.ts +176 -0
  294. package/src/ui/types.ts +57 -0
  295. package/src/utils/analytics.ts +72 -0
  296. package/src/utils/cost-warning.ts +27 -0
  297. package/src/utils/env.ts +46 -0
  298. package/src/utils/errors.ts +69 -0
  299. package/src/utils/event-bus.ts +38 -0
  300. package/src/utils/index.ts +24 -0
  301. package/src/utils/logger.ts +171 -0
  302. package/src/utils/rate-limiter.ts +121 -0
  303. package/src/utils/service-auth.ts +49 -0
  304. package/src/utils/validation.ts +53 -0
  305. package/src/version.ts +4 -0
  306. package/src/watcher/index.ts +163 -0
  307. package/src/wizard/approval.ts +383 -0
  308. package/src/wizard/index.ts +25 -0
  309. package/src/wizard/prompts.ts +338 -0
  310. package/src/wizard/types.ts +171 -0
  311. package/src/wizard/ui.ts +556 -0
  312. package/src/wizard/wizard.ts +304 -0
  313. package/tsconfig.json +24 -0
@@ -0,0 +1,227 @@
1
+ /**
2
+ * Compaction Agent
3
+ *
4
+ * Uses a fast LLM model (haiku) to summarize earlier conversation context
5
+ * while preserving key information needed for continuity.
6
+ *
7
+ * The compaction agent is invoked automatically by the context manager
8
+ * when the conversation exceeds the configured threshold, or manually
9
+ * by the user via a `/compact` command in the TUI.
10
+ *
11
+ * Key design decisions:
12
+ * - Uses the cheapest available model (haiku alias) to minimize cost.
13
+ * - Truncates very long tool outputs before sending to the summarizer.
14
+ * - Falls back to a simple extractive summary if the LLM call fails.
15
+ * - Preserves all technical details, file paths, and decisions.
16
+ *
17
+ * @module agent/compaction-agent
18
+ */
19
+
20
+ import type { LLMRouter } from '../llm/router';
21
+ import { getTextContent, type LLMMessage } from '../llm/types';
22
+ import { ContextManager, estimateTokens, type CompactionResult } from './context-manager';
23
+
24
+ // ---------------------------------------------------------------------------
25
+ // Public Types
26
+ // ---------------------------------------------------------------------------
27
+
28
+ /** Options for running compaction. */
29
+ export interface CompactionOptions {
30
+ /** LLM Router instance for making summary calls. */
31
+ router: LLMRouter;
32
+ /** Optional focus area for the summary (e.g. "terraform changes"). */
33
+ focusArea?: string;
34
+ /** Model to use for compaction (default: haiku). */
35
+ model?: string;
36
+ }
37
+
38
+ // ---------------------------------------------------------------------------
39
+ // Constants
40
+ // ---------------------------------------------------------------------------
41
+
42
+ /** The system prompt given to the compaction model. */
43
+ const COMPACTION_SYSTEM_PROMPT = `You are a conversation summarizer for the Nimbus CLI agent. Your job is to create a concise summary of a conversation between a user and an AI assistant that helps with cloud infrastructure and DevOps tasks.
44
+
45
+ Rules:
46
+ 1. Preserve ALL important technical details: file paths, resource names, configuration values, error messages, decisions made.
47
+ 2. Preserve the user's original intent and any requirements they specified.
48
+ 3. Preserve the current state of any ongoing work (what was done, what remains).
49
+ 4. Remove conversational filler, repeated information, and verbose tool outputs.
50
+ 5. Use bullet points for clarity.
51
+ 6. Keep the summary under 2000 tokens.
52
+ 7. Structure the summary as:
53
+ - **User's Goal**: What the user is trying to accomplish
54
+ - **Key Decisions**: Important choices that were made
55
+ - **Work Completed**: What actions were taken and their results
56
+ - **Current State**: Where things stand now
57
+ - **Pending Items**: What still needs to be done (if any)`;
58
+
59
+ // ---------------------------------------------------------------------------
60
+ // Public API
61
+ // ---------------------------------------------------------------------------
62
+
63
+ /**
64
+ * Run the compaction agent to summarize a set of messages.
65
+ *
66
+ * Splits the conversation into messages to preserve and messages to
67
+ * summarize (using the context manager's selection logic), sends the
68
+ * latter to a fast LLM for summarization, then reassembles a compacted
69
+ * message array.
70
+ *
71
+ * @param messages - The full conversation message array.
72
+ * @param contextManager - The context manager instance (provides selection logic).
73
+ * @param options - Compaction options (router, model, focus area).
74
+ * @returns The compacted messages and a result summary.
75
+ */
76
+ export async function runCompaction(
77
+ messages: LLMMessage[],
78
+ contextManager: ContextManager,
79
+ options: CompactionOptions
80
+ ): Promise<{ messages: LLMMessage[]; result: CompactionResult }> {
81
+ const { preserved, toSummarize } = contextManager.selectPreservedMessages(messages);
82
+
83
+ // Nothing to summarize -- return early
84
+ if (toSummarize.length === 0) {
85
+ const totalTokens = messages.reduce(
86
+ (sum, m) => sum + estimateTokens(getTextContent(m.content)),
87
+ 0
88
+ );
89
+ return {
90
+ messages,
91
+ result: {
92
+ originalTokens: totalTokens,
93
+ compactedTokens: totalTokens,
94
+ savedTokens: 0,
95
+ summaryPreserved: false,
96
+ },
97
+ };
98
+ }
99
+
100
+ // Format messages for the summarizer
101
+ const conversationText = formatMessagesForSummary(toSummarize);
102
+ const originalTokens = estimateTokens(conversationText);
103
+
104
+ // Build the user prompt for the summarizer
105
+ let userPrompt = `Please summarize the following conversation between a user and the Nimbus AI assistant:\n\n${conversationText}`;
106
+ if (options.focusArea) {
107
+ userPrompt += `\n\nPay special attention to: ${options.focusArea}`;
108
+ }
109
+
110
+ // Call the LLM for summarization using a fast, cheap model
111
+ const model = options.model ?? 'haiku';
112
+ let summary: string;
113
+
114
+ try {
115
+ const response = await options.router.route({
116
+ messages: [
117
+ { role: 'system', content: COMPACTION_SYSTEM_PROMPT },
118
+ { role: 'user', content: userPrompt },
119
+ ],
120
+ model,
121
+ maxTokens: 2048,
122
+ });
123
+ summary = response.content;
124
+ } catch {
125
+ // If LLM call fails, fall back to a simple extractive summary
126
+ summary = fallbackSummary(toSummarize);
127
+ }
128
+
129
+ // Reassemble the compacted message array
130
+ const compactedMessages = contextManager.buildCompactedMessages(preserved, summary);
131
+ const compactedTokens = compactedMessages.reduce(
132
+ (sum, m) => sum + estimateTokens(getTextContent(m.content)),
133
+ 0
134
+ );
135
+
136
+ return {
137
+ messages: compactedMessages,
138
+ result: {
139
+ originalTokens,
140
+ compactedTokens,
141
+ savedTokens: originalTokens - estimateTokens(summary),
142
+ summaryPreserved: true,
143
+ },
144
+ };
145
+ }
146
+
147
+ /**
148
+ * Run manual compaction from a `/compact` command.
149
+ *
150
+ * Creates a temporary context manager with default settings and
151
+ * delegates to {@link runCompaction}.
152
+ *
153
+ * @param messages - The full conversation message array.
154
+ * @param options - Compaction options plus an optional max token override.
155
+ * @returns The compacted messages and a result summary.
156
+ */
157
+ export async function runManualCompaction(
158
+ messages: LLMMessage[],
159
+ options: CompactionOptions & { maxContextTokens?: number }
160
+ ): Promise<{ messages: LLMMessage[]; result: CompactionResult }> {
161
+ const contextManager = new ContextManager({
162
+ maxContextTokens: options.maxContextTokens,
163
+ preserveRecentMessages: 5,
164
+ });
165
+ return runCompaction(messages, contextManager, options);
166
+ }
167
+
168
+ // ---------------------------------------------------------------------------
169
+ // Internal Helpers
170
+ // ---------------------------------------------------------------------------
171
+
172
+ /**
173
+ * Format messages into a readable conversation transcript.
174
+ *
175
+ * Each message is labelled with its role. Very long tool outputs are
176
+ * truncated to avoid overwhelming the summarizer model. Tool call
177
+ * metadata is included inline for context.
178
+ */
179
+ function formatMessagesForSummary(messages: LLMMessage[]): string {
180
+ const parts: string[] = [];
181
+
182
+ for (const msg of messages) {
183
+ const role = msg.role === 'user' ? 'User' : msg.role === 'assistant' ? 'Assistant' : 'Tool';
184
+ const content = getTextContent(msg.content);
185
+
186
+ // Truncate very long tool outputs to keep summarizer input manageable
187
+ const truncated = content.length > 2000 ? `${content.slice(0, 2000)}... [truncated]` : content;
188
+
189
+ parts.push(`[${role}]: ${truncated}`);
190
+
191
+ // Include tool call info if present
192
+ if (msg.toolCalls) {
193
+ for (const tc of msg.toolCalls) {
194
+ parts.push(` [Tool Call: ${tc.function.name}(${tc.function.arguments.slice(0, 200)})]`);
195
+ }
196
+ }
197
+ }
198
+
199
+ return parts.join('\n\n');
200
+ }
201
+
202
+ /**
203
+ * Fallback summary when the LLM is unavailable.
204
+ *
205
+ * Produces a simple extractive summary by listing message counts
206
+ * and the first few user messages. This is better than nothing when
207
+ * the compaction model cannot be reached.
208
+ */
209
+ function fallbackSummary(messages: LLMMessage[]): string {
210
+ const userMessages = messages.filter(m => m.role === 'user');
211
+ const assistantMessages = messages.filter(m => m.role === 'assistant');
212
+
213
+ const parts: string[] = ['**Conversation Summary (auto-generated)**\n'];
214
+ parts.push(
215
+ `- ${userMessages.length} user messages and ${assistantMessages.length} assistant responses`
216
+ );
217
+
218
+ // Extract key topics from user messages
219
+ for (const msg of userMessages.slice(0, 5)) {
220
+ const content = getTextContent(msg.content);
221
+ if (content.length > 0) {
222
+ parts.push(`- User asked: "${content.slice(0, 150)}${content.length > 150 ? '...' : ''}"`);
223
+ }
224
+ }
225
+
226
+ return parts.join('\n');
227
+ }
@@ -0,0 +1,435 @@
1
+ /**
2
+ * Context Manager — Token Tracking & Auto-Compact
3
+ *
4
+ * Tracks cumulative token usage across the agent loop and triggers
5
+ * automatic context compaction when usage exceeds a configurable
6
+ * threshold (default 85% of the model's context window).
7
+ *
8
+ * The manager provides:
9
+ * - Token estimation for messages, system prompts, and tool definitions.
10
+ * - A breakdown of how the context budget is being consumed.
11
+ * - Message selection logic for deciding what to preserve vs. summarize.
12
+ * - A builder for reassembling messages after compaction.
13
+ *
14
+ * Configuration can be supplied via constructor options or read from the
15
+ * Nimbus config database (keys: `context.auto_compact_threshold`,
16
+ * `context.max_file_injection`).
17
+ *
18
+ * @module agent/context-manager
19
+ */
20
+
21
+ import { getTextContent, type LLMMessage } from '../llm/types';
22
+ import { getConfig } from '../state/config';
23
+
24
+ // ---------------------------------------------------------------------------
25
+ // Public Types
26
+ // ---------------------------------------------------------------------------
27
+
28
+ /** Detailed breakdown of how the context window budget is being used. */
29
+ export interface ContextBreakdown {
30
+ /** Tokens consumed by the base system prompt (excluding NIMBUS.md). */
31
+ systemPrompt: number;
32
+ /** Tokens consumed by NIMBUS.md instructions within the system prompt. */
33
+ nimbusInstructions: number;
34
+ /** Tokens consumed by all conversation messages. */
35
+ messages: number;
36
+ /** Tokens consumed by tool definition schemas. */
37
+ toolDefinitions: number;
38
+ /** Sum of all token categories. */
39
+ total: number;
40
+ /** Total available budget (model context window size). */
41
+ budget: number;
42
+ /** Percentage of budget currently in use (0-100). */
43
+ usagePercent: number;
44
+ }
45
+
46
+ /** Result of a compaction operation. */
47
+ export interface CompactionResult {
48
+ /** Token count of the messages that were summarized. */
49
+ originalTokens: number;
50
+ /** Token count of the compacted message array. */
51
+ compactedTokens: number;
52
+ /** Tokens saved by compaction (originalTokens - summary tokens). */
53
+ savedTokens: number;
54
+ /** Whether a proper LLM summary was produced (false = fallback used). */
55
+ summaryPreserved: boolean;
56
+ }
57
+
58
+ /** Configuration options for the context manager. */
59
+ export interface ContextManagerOptions {
60
+ /** Max context window tokens (default: auto-detected from model, fallback 200000). */
61
+ maxContextTokens?: number;
62
+ /** Model identifier — used to auto-detect context window size. */
63
+ model?: string;
64
+ /** Threshold percentage to trigger auto-compact (0.0 - 1.0, default: 0.85). */
65
+ autoCompactThreshold?: number;
66
+ /** Number of recent messages to always preserve during compaction (default: 5). */
67
+ preserveRecentMessages?: number;
68
+ /** NIMBUS.md section keys that should always remain in context. */
69
+ alwaysInContext?: string[];
70
+ }
71
+
72
+ // ---------------------------------------------------------------------------
73
+ // Per-Model Context Window Sizes
74
+ // ---------------------------------------------------------------------------
75
+
76
+ /**
77
+ * Known context window sizes (in tokens) for popular models.
78
+ *
79
+ * When a model is not listed here, the manager falls back to the
80
+ * `maxContextTokens` option (default: 200 000).
81
+ */
82
+ const MODEL_CONTEXT_WINDOWS: Record<string, number> = {
83
+ // Anthropic
84
+ 'claude-opus-4-20250514': 200_000,
85
+ 'claude-sonnet-4-20250514': 200_000,
86
+ 'claude-haiku-4-20250514': 200_000,
87
+ 'claude-3-5-sonnet-20241022': 200_000,
88
+ 'claude-3-5-haiku-20241022': 200_000,
89
+ 'claude-3-opus-20240229': 200_000,
90
+ 'claude-3-sonnet-20240229': 200_000,
91
+ 'claude-3-haiku-20240307': 200_000,
92
+
93
+ // OpenAI
94
+ 'gpt-4o': 128_000,
95
+ 'gpt-4o-mini': 128_000,
96
+ 'gpt-4-turbo': 128_000,
97
+ 'gpt-4': 8_192,
98
+ 'gpt-3.5-turbo': 16_385,
99
+ o1: 200_000,
100
+ 'o1-mini': 128_000,
101
+ 'o1-preview': 128_000,
102
+ 'o3-mini': 200_000,
103
+
104
+ // Google
105
+ 'gemini-2.0-flash-exp': 1_048_576,
106
+ 'gemini-1.5-pro': 2_097_152,
107
+ 'gemini-1.5-flash': 1_048_576,
108
+
109
+ // Groq (Llama)
110
+ 'llama-3.1-70b-versatile': 131_072,
111
+ 'llama-3.1-8b-instant': 131_072,
112
+ 'llama-3.3-70b-versatile': 131_072,
113
+
114
+ // DeepSeek
115
+ 'deepseek-chat': 64_000,
116
+ 'deepseek-coder': 64_000,
117
+ 'deepseek-reasoner': 64_000,
118
+
119
+ // Local (Ollama defaults — dynamic lookup can override)
120
+ 'llama3.2': 128_000,
121
+ mistral: 32_768,
122
+ codellama: 16_384,
123
+ };
124
+
125
+ /**
126
+ * Look up the context window size for a model identifier.
127
+ *
128
+ * Tries exact match first, then prefix match (for versioned model IDs
129
+ * like `claude-sonnet-4-20250514`), then returns `null` if unknown.
130
+ */
131
+ export function getModelContextWindow(model: string): number | null {
132
+ // Exact match
133
+ if (MODEL_CONTEXT_WINDOWS[model] !== undefined) {
134
+ return MODEL_CONTEXT_WINDOWS[model];
135
+ }
136
+
137
+ // Prefix match: e.g., "gpt-4o-2024-08-06" should match "gpt-4o"
138
+ for (const [key, value] of Object.entries(MODEL_CONTEXT_WINDOWS)) {
139
+ if (model.startsWith(key)) {
140
+ return value;
141
+ }
142
+ }
143
+
144
+ return null;
145
+ }
146
+
147
+ // ---------------------------------------------------------------------------
148
+ // Token Estimation Utilities
149
+ // ---------------------------------------------------------------------------
150
+
151
+ /**
152
+ * Rough token estimate based on character count.
153
+ *
154
+ * Uses the common heuristic of ~4 characters per token, which is a
155
+ * reasonable average across English text and source code.
156
+ *
157
+ * @param text - The text to estimate.
158
+ * @returns Approximate token count (rounded up).
159
+ */
160
+ export function estimateTokens(text: string): number {
161
+ return Math.ceil(text.length / 4);
162
+ }
163
+
164
+ /**
165
+ * Estimate token count for a single LLM message.
166
+ *
167
+ * Accounts for the message content, structural overhead (role, framing),
168
+ * and any tool calls embedded in the message.
169
+ *
170
+ * @param message - The LLM message to estimate.
171
+ * @returns Approximate token count.
172
+ */
173
+ export function estimateMessageTokens(message: LLMMessage): number {
174
+ let tokens = 0;
175
+
176
+ tokens += estimateTokens(getTextContent(message.content));
177
+
178
+ // Add overhead for role and message structure
179
+ tokens += 4;
180
+
181
+ // Tool calls add extra tokens for name, arguments, and JSON structure
182
+ if (message.toolCalls) {
183
+ for (const tc of message.toolCalls) {
184
+ tokens += estimateTokens(tc.function.name);
185
+ tokens += estimateTokens(tc.function.arguments);
186
+ tokens += 10; // structural overhead per tool call
187
+ }
188
+ }
189
+
190
+ return tokens;
191
+ }
192
+
193
+ // ---------------------------------------------------------------------------
194
+ // ContextManager Class
195
+ // ---------------------------------------------------------------------------
196
+
197
+ /**
198
+ * Manages context window budget and auto-compaction decisions.
199
+ *
200
+ * Create one instance per agent session. The manager does not hold
201
+ * conversation state itself -- it operates on message arrays passed in
202
+ * by the caller.
203
+ */
204
+ export class ContextManager {
205
+ private maxContextTokens: number;
206
+ private autoCompactThreshold: number;
207
+ private preserveRecentMessages: number;
208
+ private alwaysInContext: string[];
209
+
210
+ constructor(options?: ContextManagerOptions) {
211
+ // Try loading from config DB, fall back to options/defaults
212
+ const configThreshold = getConfigSafe('context.auto_compact_threshold');
213
+
214
+ // Auto-detect context window from model if provided, then options, then default
215
+ const modelWindow = options?.model ? getModelContextWindow(options.model) : null;
216
+ this.maxContextTokens = options?.maxContextTokens ?? modelWindow ?? 200_000;
217
+ this.autoCompactThreshold = configThreshold ?? options?.autoCompactThreshold ?? 0.85;
218
+ this.preserveRecentMessages = options?.preserveRecentMessages ?? 5;
219
+ this.alwaysInContext = options?.alwaysInContext ?? [];
220
+ }
221
+
222
+ /**
223
+ * Check whether auto-compaction should be triggered.
224
+ *
225
+ * Returns `true` if the estimated token usage is at or above the
226
+ * configured threshold percentage of the context window.
227
+ *
228
+ * @param systemPrompt - The full system prompt string.
229
+ * @param messages - Current conversation messages.
230
+ * @param toolDefinitionsTokens - Pre-computed token count for tool schemas.
231
+ * @returns `true` if compaction should run.
232
+ */
233
+ shouldCompact(
234
+ systemPrompt: string,
235
+ messages: LLMMessage[],
236
+ toolDefinitionsTokens: number
237
+ ): boolean {
238
+ const usage = this.calculateUsage(systemPrompt, messages, toolDefinitionsTokens);
239
+ return usage.usagePercent >= this.autoCompactThreshold * 100;
240
+ }
241
+
242
+ /**
243
+ * Calculate a detailed context usage breakdown.
244
+ *
245
+ * Separates the system prompt into base instructions and NIMBUS.md
246
+ * content (if present), and sums up messages and tool definitions
247
+ * to produce a full picture of context window consumption.
248
+ *
249
+ * @param systemPrompt - The full system prompt string.
250
+ * @param messages - Current conversation messages.
251
+ * @param toolDefinitionsTokens - Pre-computed token count for tool schemas.
252
+ * @returns A {@link ContextBreakdown} with per-category token counts.
253
+ */
254
+ calculateUsage(
255
+ systemPrompt: string,
256
+ messages: LLMMessage[],
257
+ toolDefinitionsTokens: number
258
+ ): ContextBreakdown {
259
+ const systemPromptTokens = estimateTokens(systemPrompt);
260
+
261
+ // Separate NIMBUS.md instructions if they appear in system prompt
262
+ const nimbusMarker = '# NIMBUS.md';
263
+ const nimbusIdx = systemPrompt.indexOf(nimbusMarker);
264
+ let nimbusInstructionsTokens = 0;
265
+ let baseSystemTokens = systemPromptTokens;
266
+
267
+ if (nimbusIdx >= 0) {
268
+ const nimbusContent = systemPrompt.slice(nimbusIdx);
269
+ nimbusInstructionsTokens = estimateTokens(nimbusContent);
270
+ baseSystemTokens = systemPromptTokens - nimbusInstructionsTokens;
271
+ }
272
+
273
+ const messagesTokens = messages.reduce((sum, msg) => sum + estimateMessageTokens(msg), 0);
274
+
275
+ const total = systemPromptTokens + messagesTokens + toolDefinitionsTokens;
276
+ const usagePercent =
277
+ this.maxContextTokens > 0 ? Math.round((total / this.maxContextTokens) * 100) : 0;
278
+
279
+ return {
280
+ systemPrompt: baseSystemTokens,
281
+ nimbusInstructions: nimbusInstructionsTokens,
282
+ messages: messagesTokens,
283
+ toolDefinitions: toolDefinitionsTokens,
284
+ total,
285
+ budget: this.maxContextTokens,
286
+ usagePercent,
287
+ };
288
+ }
289
+
290
+ /**
291
+ * Select which messages to preserve during compaction.
292
+ *
293
+ * Preservation rules:
294
+ * - The first message is always kept (initial user context).
295
+ * - The last N messages are always kept (recent conversation).
296
+ * - Tool messages near the recent window are kept (active tool state).
297
+ * - Previous compaction summary blocks are always kept.
298
+ * - Everything else is marked for summarization.
299
+ *
300
+ * @param messages - The full conversation message array.
301
+ * @returns An object with `preserved` and `toSummarize` arrays.
302
+ */
303
+ selectPreservedMessages(messages: LLMMessage[]): {
304
+ preserved: LLMMessage[];
305
+ toSummarize: LLMMessage[];
306
+ } {
307
+ if (messages.length <= this.preserveRecentMessages + 1) {
308
+ return { preserved: [...messages], toSummarize: [] };
309
+ }
310
+
311
+ const preserved: LLMMessage[] = [];
312
+ const toSummarize: LLMMessage[] = [];
313
+
314
+ for (let i = 0; i < messages.length; i++) {
315
+ const msg = messages[i];
316
+ const isFirst = i === 0;
317
+ const isRecent = i >= messages.length - this.preserveRecentMessages;
318
+ const hasActiveTools =
319
+ msg.role === 'tool' && i >= messages.length - this.preserveRecentMessages - 2;
320
+
321
+ // Always preserve summary blocks (from previous compactions)
322
+ const isSummary = getTextContent(msg.content).startsWith('[Context Summary]');
323
+
324
+ if (isFirst || isRecent || hasActiveTools || isSummary) {
325
+ preserved.push(msg);
326
+ } else {
327
+ toSummarize.push(msg);
328
+ }
329
+ }
330
+
331
+ return { preserved, toSummarize };
332
+ }
333
+
334
+ /**
335
+ * Build the compacted message array by inserting a summary.
336
+ *
337
+ * Places the summary as a user message immediately after the first
338
+ * preserved message, then appends all remaining preserved messages.
339
+ * The summary is wrapped with `[Context Summary]` markers so future
340
+ * compaction passes can identify and preserve it.
341
+ *
342
+ * @param preserved - Messages to keep verbatim.
343
+ * @param summary - The LLM-generated (or fallback) summary text.
344
+ * @returns A new message array ready to replace the original.
345
+ */
346
+ buildCompactedMessages(preserved: LLMMessage[], summary: string): LLMMessage[] {
347
+ const result: LLMMessage[] = [];
348
+
349
+ // Keep the first preserved message (typically the first user message)
350
+ if (preserved.length > 0) {
351
+ result.push(preserved[0]);
352
+ }
353
+
354
+ // Insert the summary as a user message with a clear marker
355
+ result.push({
356
+ role: 'user' as const,
357
+ content: `[Context Summary] The following is a summary of the earlier conversation:\n\n${summary}\n\n---\nThe conversation continues below.`,
358
+ });
359
+
360
+ // Append remaining preserved messages
361
+ for (let i = 1; i < preserved.length; i++) {
362
+ result.push(preserved[i]);
363
+ }
364
+
365
+ return result;
366
+ }
367
+
368
+ /**
369
+ * Get the current configuration values.
370
+ *
371
+ * Useful for displaying context status in the TUI.
372
+ */
373
+ getConfig(): {
374
+ maxContextTokens: number;
375
+ autoCompactThreshold: number;
376
+ preserveRecentMessages: number;
377
+ } {
378
+ return {
379
+ maxContextTokens: this.maxContextTokens,
380
+ autoCompactThreshold: this.autoCompactThreshold,
381
+ preserveRecentMessages: this.preserveRecentMessages,
382
+ };
383
+ }
384
+
385
+ /**
386
+ * Update the max context tokens.
387
+ *
388
+ * Call this when the model changes mid-session so the compaction
389
+ * threshold adjusts to the new model's context window.
390
+ *
391
+ * @param tokens - The new maximum context window size.
392
+ */
393
+ setMaxContextTokens(tokens: number): void {
394
+ this.maxContextTokens = tokens;
395
+ }
396
+
397
+ /**
398
+ * Update the context window based on a model identifier.
399
+ *
400
+ * Looks up the model's known context window size. If the model is
401
+ * not in the built-in map, the current budget is left unchanged.
402
+ *
403
+ * @param model - The model identifier (e.g., "gpt-4o", "claude-sonnet-4-20250514").
404
+ * @returns `true` if the budget was updated, `false` if model is unknown.
405
+ */
406
+ setModel(model: string): boolean {
407
+ // Strip provider prefix (e.g., "openai/gpt-4o" → "gpt-4o")
408
+ const stripped = model.includes('/') ? model.split('/').slice(1).join('/') : model;
409
+ const window = getModelContextWindow(stripped);
410
+ if (window !== null) {
411
+ this.maxContextTokens = window;
412
+ return true;
413
+ }
414
+ return false;
415
+ }
416
+ }
417
+
418
+ // ---------------------------------------------------------------------------
419
+ // Internal Helpers
420
+ // ---------------------------------------------------------------------------
421
+
422
+ /**
423
+ * Safely read a config value without crashing if the DB is not ready.
424
+ *
425
+ * During early initialization the SQLite database may not yet be open.
426
+ * This wrapper catches any error and returns `null` so the constructor
427
+ * can fall back to provided options or built-in defaults.
428
+ */
429
+ function getConfigSafe(key: string): any | null {
430
+ try {
431
+ return getConfig(key);
432
+ } catch {
433
+ return null;
434
+ }
435
+ }