@build-astron-co/nimbus 0.4.2 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (430) hide show
  1. package/dist/src/agent/compaction-agent.js +24 -12
  2. package/dist/src/agent/context-manager.js +2 -1
  3. package/dist/src/agent/expand-files.js +2 -1
  4. package/dist/src/agent/loop.js +71 -33
  5. package/dist/src/agent/permissions.js +4 -2
  6. package/dist/src/agent/system-prompt.js +34 -17
  7. package/dist/src/app.js +1 -1
  8. package/dist/src/auth/keychain.js +8 -4
  9. package/dist/src/auth/store.js +70 -107
  10. package/dist/src/cli/init.js +35 -19
  11. package/dist/src/cli/run.js +18 -10
  12. package/dist/src/cli/serve.js +4 -2
  13. package/dist/src/cli.js +52 -11
  14. package/dist/src/commands/alias.js +5 -3
  15. package/dist/src/commands/audit/index.js +2 -1
  16. package/dist/src/commands/aws-terraform.js +36 -18
  17. package/dist/src/commands/completions.js +1 -1
  18. package/dist/src/commands/config.js +3 -2
  19. package/dist/src/commands/connect-github.js +92 -0
  20. package/dist/src/commands/cost/index.js +3 -2
  21. package/dist/src/commands/deploy.js +15 -10
  22. package/dist/src/commands/doctor.js +6 -3
  23. package/dist/src/commands/drift/index.js +2 -1
  24. package/dist/src/commands/export.js +5 -3
  25. package/dist/src/commands/generate-terraform.js +110 -2
  26. package/dist/src/commands/import.js +3 -3
  27. package/dist/src/commands/incident.js +10 -5
  28. package/dist/src/commands/login.js +8 -93
  29. package/dist/src/commands/logs.js +16 -8
  30. package/dist/src/commands/onboarding.js +6 -4
  31. package/dist/src/commands/pipeline.js +6 -3
  32. package/dist/src/commands/plugin.js +3 -2
  33. package/dist/src/commands/profile.js +27 -14
  34. package/dist/src/commands/questionnaire.js +1 -1
  35. package/dist/src/commands/rollback.js +3 -2
  36. package/dist/src/commands/rollout.js +5 -3
  37. package/dist/src/commands/runbook.js +17 -10
  38. package/dist/src/commands/schedule.js +10 -5
  39. package/dist/src/commands/status.js +2 -1
  40. package/dist/src/commands/team-context.js +12 -7
  41. package/dist/src/commands/template.js +1 -1
  42. package/dist/src/commands/tf/index.js +6 -3
  43. package/dist/src/commands/version.js +6 -3
  44. package/dist/src/commands/watch.js +6 -3
  45. package/dist/src/compat/sqlite.js +5 -3
  46. package/dist/src/config/mode-store.js +2 -1
  47. package/dist/src/config/profiles.js +4 -2
  48. package/dist/src/config/types.js +2 -1
  49. package/dist/src/engine/executor.js +8 -4
  50. package/dist/src/engine/planner.js +9 -5
  51. package/dist/src/llm/providers/anthropic.js +6 -3
  52. package/dist/src/llm/providers/ollama.js +1 -1
  53. package/dist/src/llm/router.js +22 -7
  54. package/dist/src/sessions/manager.js +6 -3
  55. package/dist/src/sharing/viewer.js +2 -1
  56. package/dist/src/tools/file-ops.js +1 -2
  57. package/dist/src/tools/schemas/devops.js +197 -108
  58. package/dist/src/tools/schemas/standard.js +1 -1
  59. package/dist/src/ui/App.js +25 -13
  60. package/dist/src/ui/FileDiffModal.js +22 -11
  61. package/dist/src/ui/HelpModal.js +2 -1
  62. package/dist/src/ui/InputBox.js +6 -3
  63. package/dist/src/ui/MessageList.js +40 -20
  64. package/dist/src/ui/TerminalPane.js +2 -1
  65. package/dist/src/ui/ToolCallDisplay.js +12 -6
  66. package/dist/src/ui/TreePane.js +2 -1
  67. package/dist/src/ui/ink/index.js +37 -21
  68. package/dist/src/watcher/index.js +8 -4
  69. package/package.json +3 -5
  70. package/src/__tests__/alias.test.ts +0 -133
  71. package/src/__tests__/app.test.ts +0 -76
  72. package/src/__tests__/audit.test.ts +0 -877
  73. package/src/__tests__/circuit-breaker.test.ts +0 -116
  74. package/src/__tests__/cli-run.test.ts +0 -351
  75. package/src/__tests__/compat-sqlite.test.ts +0 -68
  76. package/src/__tests__/context-manager.test.ts +0 -632
  77. package/src/__tests__/context.test.ts +0 -242
  78. package/src/__tests__/devops-terminal-gaps.test.ts +0 -718
  79. package/src/__tests__/doctor.test.ts +0 -48
  80. package/src/__tests__/enterprise.test.ts +0 -401
  81. package/src/__tests__/export.test.ts +0 -236
  82. package/src/__tests__/gap-11-18-20.test.ts +0 -958
  83. package/src/__tests__/generator.test.ts +0 -433
  84. package/src/__tests__/helm-streaming.test.ts +0 -127
  85. package/src/__tests__/hooks.test.ts +0 -582
  86. package/src/__tests__/incident.test.ts +0 -179
  87. package/src/__tests__/init.test.ts +0 -487
  88. package/src/__tests__/intent-parser.test.ts +0 -229
  89. package/src/__tests__/llm-router.test.ts +0 -209
  90. package/src/__tests__/logs.test.ts +0 -107
  91. package/src/__tests__/loop-errors.test.ts +0 -244
  92. package/src/__tests__/lsp.test.ts +0 -293
  93. package/src/__tests__/modes.test.ts +0 -336
  94. package/src/__tests__/perf-optimizations.test.ts +0 -847
  95. package/src/__tests__/permissions.test.ts +0 -338
  96. package/src/__tests__/pipeline.test.ts +0 -50
  97. package/src/__tests__/polish-phase3.test.ts +0 -340
  98. package/src/__tests__/profile.test.ts +0 -237
  99. package/src/__tests__/rollback.test.ts +0 -83
  100. package/src/__tests__/runbook.test.ts +0 -219
  101. package/src/__tests__/schedule.test.ts +0 -206
  102. package/src/__tests__/serve.test.ts +0 -275
  103. package/src/__tests__/sessions.test.ts +0 -322
  104. package/src/__tests__/sharing.test.ts +0 -340
  105. package/src/__tests__/snapshots.test.ts +0 -581
  106. package/src/__tests__/standalone-migration.test.ts +0 -199
  107. package/src/__tests__/state-db.test.ts +0 -334
  108. package/src/__tests__/status.test.ts +0 -158
  109. package/src/__tests__/stream-with-tools.test.ts +0 -778
  110. package/src/__tests__/subagents.test.ts +0 -176
  111. package/src/__tests__/system-prompt.test.ts +0 -248
  112. package/src/__tests__/terminal-gap-v2.test.ts +0 -395
  113. package/src/__tests__/terminal-parity.test.ts +0 -393
  114. package/src/__tests__/tf-apply.test.ts +0 -187
  115. package/src/__tests__/tool-converter.test.ts +0 -256
  116. package/src/__tests__/tool-schemas.test.ts +0 -602
  117. package/src/__tests__/tools.test.ts +0 -144
  118. package/src/__tests__/version-json.test.ts +0 -184
  119. package/src/__tests__/version.test.ts +0 -49
  120. package/src/__tests__/watch.test.ts +0 -129
  121. package/src/agent/compaction-agent.ts +0 -266
  122. package/src/agent/context-manager.ts +0 -499
  123. package/src/agent/context.ts +0 -427
  124. package/src/agent/deploy-preview.ts +0 -487
  125. package/src/agent/expand-files.ts +0 -108
  126. package/src/agent/index.ts +0 -68
  127. package/src/agent/loop.ts +0 -1998
  128. package/src/agent/modes.ts +0 -429
  129. package/src/agent/permissions.ts +0 -513
  130. package/src/agent/subagents/base.ts +0 -116
  131. package/src/agent/subagents/cost.ts +0 -51
  132. package/src/agent/subagents/explore.ts +0 -42
  133. package/src/agent/subagents/general.ts +0 -54
  134. package/src/agent/subagents/index.ts +0 -102
  135. package/src/agent/subagents/infra.ts +0 -59
  136. package/src/agent/subagents/security.ts +0 -69
  137. package/src/agent/system-prompt.ts +0 -990
  138. package/src/app.ts +0 -180
  139. package/src/audit/activity-log.ts +0 -290
  140. package/src/audit/compliance-checker.ts +0 -540
  141. package/src/audit/cost-tracker.ts +0 -318
  142. package/src/audit/index.ts +0 -23
  143. package/src/audit/security-scanner.ts +0 -641
  144. package/src/auth/guard.ts +0 -75
  145. package/src/auth/index.ts +0 -56
  146. package/src/auth/keychain.ts +0 -82
  147. package/src/auth/oauth.ts +0 -465
  148. package/src/auth/providers.ts +0 -470
  149. package/src/auth/sso.ts +0 -113
  150. package/src/auth/store.ts +0 -505
  151. package/src/auth/types.ts +0 -187
  152. package/src/build.ts +0 -141
  153. package/src/cli/index.ts +0 -16
  154. package/src/cli/init.ts +0 -1227
  155. package/src/cli/openapi-spec.ts +0 -356
  156. package/src/cli/run.ts +0 -628
  157. package/src/cli/serve-auth.ts +0 -80
  158. package/src/cli/serve.ts +0 -539
  159. package/src/cli/web.ts +0 -71
  160. package/src/cli.ts +0 -1728
  161. package/src/clients/core-engine-client.ts +0 -227
  162. package/src/clients/enterprise-client.ts +0 -334
  163. package/src/clients/generator-client.ts +0 -351
  164. package/src/clients/git-client.ts +0 -627
  165. package/src/clients/github-client.ts +0 -410
  166. package/src/clients/helm-client.ts +0 -504
  167. package/src/clients/index.ts +0 -80
  168. package/src/clients/k8s-client.ts +0 -497
  169. package/src/clients/llm-client.ts +0 -161
  170. package/src/clients/rest-client.ts +0 -130
  171. package/src/clients/service-discovery.ts +0 -38
  172. package/src/clients/terraform-client.ts +0 -482
  173. package/src/clients/tools-client.ts +0 -1843
  174. package/src/clients/ws-client.ts +0 -115
  175. package/src/commands/alias.ts +0 -100
  176. package/src/commands/analyze/index.ts +0 -352
  177. package/src/commands/apply/helm.ts +0 -473
  178. package/src/commands/apply/index.ts +0 -213
  179. package/src/commands/apply/k8s.ts +0 -454
  180. package/src/commands/apply/terraform.ts +0 -582
  181. package/src/commands/ask.ts +0 -167
  182. package/src/commands/audit/index.ts +0 -357
  183. package/src/commands/auth-cloud.ts +0 -407
  184. package/src/commands/auth-list.ts +0 -134
  185. package/src/commands/auth-profile.ts +0 -121
  186. package/src/commands/auth-refresh.ts +0 -187
  187. package/src/commands/auth-status.ts +0 -141
  188. package/src/commands/aws/ec2.ts +0 -501
  189. package/src/commands/aws/iam.ts +0 -397
  190. package/src/commands/aws/index.ts +0 -133
  191. package/src/commands/aws/lambda.ts +0 -396
  192. package/src/commands/aws/rds.ts +0 -439
  193. package/src/commands/aws/s3.ts +0 -439
  194. package/src/commands/aws/vpc.ts +0 -393
  195. package/src/commands/aws-discover.ts +0 -542
  196. package/src/commands/aws-terraform.ts +0 -755
  197. package/src/commands/azure/aks.ts +0 -376
  198. package/src/commands/azure/functions.ts +0 -253
  199. package/src/commands/azure/index.ts +0 -116
  200. package/src/commands/azure/storage.ts +0 -478
  201. package/src/commands/azure/vm.ts +0 -355
  202. package/src/commands/billing/index.ts +0 -256
  203. package/src/commands/chat.ts +0 -320
  204. package/src/commands/completions.ts +0 -268
  205. package/src/commands/config.ts +0 -372
  206. package/src/commands/cost/cloud-cost-estimator.ts +0 -266
  207. package/src/commands/cost/estimator.ts +0 -79
  208. package/src/commands/cost/index.ts +0 -810
  209. package/src/commands/cost/parsers/terraform.ts +0 -273
  210. package/src/commands/cost/parsers/types.ts +0 -25
  211. package/src/commands/cost/pricing/aws.ts +0 -544
  212. package/src/commands/cost/pricing/azure.ts +0 -499
  213. package/src/commands/cost/pricing/gcp.ts +0 -396
  214. package/src/commands/cost/pricing/index.ts +0 -40
  215. package/src/commands/demo.ts +0 -250
  216. package/src/commands/deploy.ts +0 -260
  217. package/src/commands/doctor.ts +0 -1386
  218. package/src/commands/drift/index.ts +0 -787
  219. package/src/commands/explain.ts +0 -277
  220. package/src/commands/export.ts +0 -146
  221. package/src/commands/feedback.ts +0 -389
  222. package/src/commands/fix.ts +0 -324
  223. package/src/commands/fs/index.ts +0 -402
  224. package/src/commands/gcp/compute.ts +0 -325
  225. package/src/commands/gcp/functions.ts +0 -271
  226. package/src/commands/gcp/gke.ts +0 -438
  227. package/src/commands/gcp/iam.ts +0 -344
  228. package/src/commands/gcp/index.ts +0 -129
  229. package/src/commands/gcp/storage.ts +0 -284
  230. package/src/commands/generate-helm.ts +0 -1249
  231. package/src/commands/generate-k8s.ts +0 -1508
  232. package/src/commands/generate-terraform.ts +0 -1202
  233. package/src/commands/gh/index.ts +0 -863
  234. package/src/commands/git/index.ts +0 -1343
  235. package/src/commands/helm/index.ts +0 -1126
  236. package/src/commands/help.ts +0 -715
  237. package/src/commands/history.ts +0 -149
  238. package/src/commands/import.ts +0 -868
  239. package/src/commands/incident.ts +0 -166
  240. package/src/commands/index.ts +0 -367
  241. package/src/commands/init.ts +0 -1051
  242. package/src/commands/k8s/index.ts +0 -1137
  243. package/src/commands/login.ts +0 -716
  244. package/src/commands/logout.ts +0 -83
  245. package/src/commands/logs.ts +0 -167
  246. package/src/commands/onboarding.ts +0 -405
  247. package/src/commands/pipeline.ts +0 -186
  248. package/src/commands/plan/display.ts +0 -279
  249. package/src/commands/plan/index.ts +0 -599
  250. package/src/commands/plugin.ts +0 -398
  251. package/src/commands/preview.ts +0 -452
  252. package/src/commands/profile.ts +0 -342
  253. package/src/commands/questionnaire.ts +0 -1172
  254. package/src/commands/resume.ts +0 -47
  255. package/src/commands/rollback.ts +0 -315
  256. package/src/commands/rollout.ts +0 -88
  257. package/src/commands/runbook.ts +0 -346
  258. package/src/commands/schedule.ts +0 -236
  259. package/src/commands/status.ts +0 -252
  260. package/src/commands/team/index.ts +0 -346
  261. package/src/commands/team-context.ts +0 -220
  262. package/src/commands/template.ts +0 -233
  263. package/src/commands/tf/index.ts +0 -1093
  264. package/src/commands/upgrade.ts +0 -609
  265. package/src/commands/usage/index.ts +0 -134
  266. package/src/commands/version.ts +0 -174
  267. package/src/commands/watch.ts +0 -153
  268. package/src/compat/index.ts +0 -2
  269. package/src/compat/runtime.ts +0 -12
  270. package/src/compat/sqlite.ts +0 -177
  271. package/src/config/index.ts +0 -17
  272. package/src/config/manager.ts +0 -530
  273. package/src/config/mode-store.ts +0 -62
  274. package/src/config/profiles.ts +0 -84
  275. package/src/config/safety-policy.ts +0 -358
  276. package/src/config/schema.ts +0 -125
  277. package/src/config/types.ts +0 -609
  278. package/src/config/workspace-state.ts +0 -53
  279. package/src/context/context-db.ts +0 -199
  280. package/src/demo/index.ts +0 -349
  281. package/src/demo/scenarios/full-journey.ts +0 -229
  282. package/src/demo/scenarios/getting-started.ts +0 -127
  283. package/src/demo/scenarios/helm-release.ts +0 -341
  284. package/src/demo/scenarios/k8s-deployment.ts +0 -194
  285. package/src/demo/scenarios/terraform-vpc.ts +0 -170
  286. package/src/demo/types.ts +0 -92
  287. package/src/engine/cost-estimator.ts +0 -480
  288. package/src/engine/diagram-generator.ts +0 -256
  289. package/src/engine/drift-detector.ts +0 -902
  290. package/src/engine/executor.ts +0 -1066
  291. package/src/engine/index.ts +0 -76
  292. package/src/engine/orchestrator.ts +0 -636
  293. package/src/engine/planner.ts +0 -787
  294. package/src/engine/safety.ts +0 -743
  295. package/src/engine/verifier.ts +0 -770
  296. package/src/enterprise/audit.ts +0 -348
  297. package/src/enterprise/auth.ts +0 -270
  298. package/src/enterprise/billing.ts +0 -822
  299. package/src/enterprise/index.ts +0 -17
  300. package/src/enterprise/teams.ts +0 -443
  301. package/src/generator/best-practices.ts +0 -1608
  302. package/src/generator/helm.ts +0 -630
  303. package/src/generator/index.ts +0 -37
  304. package/src/generator/intent-parser.ts +0 -514
  305. package/src/generator/kubernetes.ts +0 -976
  306. package/src/generator/terraform.ts +0 -1875
  307. package/src/history/index.ts +0 -8
  308. package/src/history/manager.ts +0 -250
  309. package/src/history/types.ts +0 -34
  310. package/src/hooks/config.ts +0 -432
  311. package/src/hooks/engine.ts +0 -392
  312. package/src/hooks/index.ts +0 -4
  313. package/src/llm/auth-bridge.ts +0 -198
  314. package/src/llm/circuit-breaker.ts +0 -140
  315. package/src/llm/config-loader.ts +0 -201
  316. package/src/llm/cost-calculator.ts +0 -171
  317. package/src/llm/index.ts +0 -8
  318. package/src/llm/model-aliases.ts +0 -115
  319. package/src/llm/provider-registry.ts +0 -63
  320. package/src/llm/providers/anthropic.ts +0 -462
  321. package/src/llm/providers/bedrock.ts +0 -477
  322. package/src/llm/providers/google.ts +0 -405
  323. package/src/llm/providers/ollama.ts +0 -767
  324. package/src/llm/providers/openai-compatible.ts +0 -340
  325. package/src/llm/providers/openai.ts +0 -328
  326. package/src/llm/providers/openrouter.ts +0 -338
  327. package/src/llm/router.ts +0 -1104
  328. package/src/llm/types.ts +0 -232
  329. package/src/lsp/client.ts +0 -298
  330. package/src/lsp/languages.ts +0 -119
  331. package/src/lsp/manager.ts +0 -294
  332. package/src/mcp/client.ts +0 -402
  333. package/src/mcp/index.ts +0 -5
  334. package/src/mcp/manager.ts +0 -133
  335. package/src/nimbus.ts +0 -234
  336. package/src/plugins/index.ts +0 -27
  337. package/src/plugins/loader.ts +0 -334
  338. package/src/plugins/manager.ts +0 -376
  339. package/src/plugins/types.ts +0 -284
  340. package/src/scanners/cicd-scanner.ts +0 -258
  341. package/src/scanners/cloud-scanner.ts +0 -466
  342. package/src/scanners/framework-scanner.ts +0 -469
  343. package/src/scanners/iac-scanner.ts +0 -388
  344. package/src/scanners/index.ts +0 -539
  345. package/src/scanners/language-scanner.ts +0 -276
  346. package/src/scanners/package-manager-scanner.ts +0 -277
  347. package/src/scanners/types.ts +0 -172
  348. package/src/sessions/manager.ts +0 -472
  349. package/src/sessions/types.ts +0 -44
  350. package/src/sharing/sync.ts +0 -300
  351. package/src/sharing/viewer.ts +0 -163
  352. package/src/snapshots/index.ts +0 -2
  353. package/src/snapshots/manager.ts +0 -530
  354. package/src/state/artifacts.ts +0 -147
  355. package/src/state/audit.ts +0 -137
  356. package/src/state/billing.ts +0 -240
  357. package/src/state/checkpoints.ts +0 -117
  358. package/src/state/config.ts +0 -67
  359. package/src/state/conversations.ts +0 -14
  360. package/src/state/credentials.ts +0 -154
  361. package/src/state/db.ts +0 -58
  362. package/src/state/index.ts +0 -26
  363. package/src/state/messages.ts +0 -115
  364. package/src/state/projects.ts +0 -123
  365. package/src/state/schema.ts +0 -236
  366. package/src/state/sessions.ts +0 -147
  367. package/src/state/teams.ts +0 -200
  368. package/src/telemetry.ts +0 -108
  369. package/src/tools/aws-ops.ts +0 -952
  370. package/src/tools/azure-ops.ts +0 -579
  371. package/src/tools/file-ops.ts +0 -615
  372. package/src/tools/gcp-ops.ts +0 -625
  373. package/src/tools/git-ops.ts +0 -773
  374. package/src/tools/github-ops.ts +0 -799
  375. package/src/tools/helm-ops.ts +0 -943
  376. package/src/tools/index.ts +0 -17
  377. package/src/tools/k8s-ops.ts +0 -819
  378. package/src/tools/schemas/converter.ts +0 -184
  379. package/src/tools/schemas/devops.ts +0 -3502
  380. package/src/tools/schemas/index.ts +0 -73
  381. package/src/tools/schemas/standard.ts +0 -1148
  382. package/src/tools/schemas/types.ts +0 -735
  383. package/src/tools/spawn-exec.ts +0 -148
  384. package/src/tools/terraform-ops.ts +0 -862
  385. package/src/types/ambient.d.ts +0 -193
  386. package/src/types/config.ts +0 -83
  387. package/src/types/drift.ts +0 -116
  388. package/src/types/enterprise.ts +0 -335
  389. package/src/types/index.ts +0 -20
  390. package/src/types/plan.ts +0 -44
  391. package/src/types/request.ts +0 -65
  392. package/src/types/response.ts +0 -54
  393. package/src/types/service.ts +0 -51
  394. package/src/ui/App.tsx +0 -2114
  395. package/src/ui/DeployPreview.tsx +0 -174
  396. package/src/ui/FileDiffModal.tsx +0 -162
  397. package/src/ui/Header.tsx +0 -131
  398. package/src/ui/HelpModal.tsx +0 -57
  399. package/src/ui/InputBox.tsx +0 -503
  400. package/src/ui/MessageList.tsx +0 -1032
  401. package/src/ui/PermissionPrompt.tsx +0 -163
  402. package/src/ui/StatusBar.tsx +0 -277
  403. package/src/ui/TerminalPane.tsx +0 -84
  404. package/src/ui/ToolCallDisplay.tsx +0 -643
  405. package/src/ui/TreePane.tsx +0 -132
  406. package/src/ui/chat-ui.ts +0 -850
  407. package/src/ui/index.ts +0 -33
  408. package/src/ui/ink/index.ts +0 -1444
  409. package/src/ui/streaming.ts +0 -176
  410. package/src/ui/theme.ts +0 -104
  411. package/src/ui/types.ts +0 -75
  412. package/src/utils/analytics.ts +0 -72
  413. package/src/utils/cost-warning.ts +0 -27
  414. package/src/utils/env.ts +0 -46
  415. package/src/utils/errors.ts +0 -69
  416. package/src/utils/event-bus.ts +0 -38
  417. package/src/utils/index.ts +0 -24
  418. package/src/utils/logger.ts +0 -171
  419. package/src/utils/rate-limiter.ts +0 -121
  420. package/src/utils/service-auth.ts +0 -49
  421. package/src/utils/validation.ts +0 -53
  422. package/src/version.ts +0 -4
  423. package/src/watcher/index.ts +0 -214
  424. package/src/wizard/approval.ts +0 -383
  425. package/src/wizard/index.ts +0 -25
  426. package/src/wizard/prompts.ts +0 -338
  427. package/src/wizard/types.ts +0 -172
  428. package/src/wizard/ui.ts +0 -556
  429. package/src/wizard/wizard.ts +0 -304
  430. package/tsconfig.json +0 -24
package/src/llm/router.ts DELETED
@@ -1,1104 +0,0 @@
1
- /**
2
- * LLM Router
3
- * Routes requests to the appropriate provider based on model, cost optimization, and fallback logic.
4
- *
5
- * Refactored for the embedded Nimbus architecture. Key changes from the microservice version:
6
- * - Imports providers from local ./providers/ directory
7
- * - Integrates model alias resolution via resolveModelAlias
8
- * - Integrates auto-detection via detectProvider
9
- * - Supports OpenAI-compatible and Bedrock providers via env vars
10
- * - persistUsage writes to the embedded SQLite usage table (fire-and-forget)
11
- */
12
-
13
- import { logger } from '../utils';
14
- import {
15
- getTextContent,
16
- type LLMProvider,
17
- type CompletionRequest,
18
- type LLMResponse,
19
- type StreamChunk,
20
- type ToolCompletionRequest,
21
- } from './types';
22
- import { AnthropicProvider } from './providers/anthropic';
23
- import { OpenAIProvider } from './providers/openai';
24
- import { GoogleProvider } from './providers/google';
25
- import { OllamaProvider } from './providers/ollama';
26
- import { OpenRouterProvider } from './providers/openrouter';
27
- import { OpenAICompatibleProvider } from './providers/openai-compatible';
28
- import { BedrockProvider } from './providers/bedrock';
29
- import { calculateCost, type CostResult } from './cost-calculator';
30
- import { resolveModelAlias, stripProviderPrefix } from './model-aliases';
31
- import { detectProvider } from './provider-registry';
32
- import { ProviderCircuitBreaker } from './circuit-breaker';
33
-
34
- export interface RouterConfig {
35
- defaultProvider: string;
36
- defaultModel: string;
37
- costOptimization: {
38
- enabled: boolean;
39
- cheapModelFor: string[];
40
- expensiveModelFor: string[];
41
- cheapModel: string;
42
- expensiveModel: string;
43
- };
44
- fallback: {
45
- enabled: boolean;
46
- providers: string[];
47
- };
48
- tokenBudget?: {
49
- maxTokensPerRequest?: number;
50
- };
51
- }
52
-
53
- export interface ProviderInfo {
54
- name: string;
55
- available: boolean;
56
- models: string[];
57
- }
58
-
59
- /**
60
- * Metadata emitted by the streaming fallback to indicate which provider
61
- * is actually serving the response. The WebSocket handler inspects this
62
- * to notify clients of provider switches.
63
- */
64
- export interface StreamFallbackMeta {
65
- /** The provider that is actively streaming. */
66
- activeProvider: string;
67
- /** If a fallback occurred, the provider that originally failed. */
68
- failedProvider?: string;
69
- /** True when this stream is being served by a fallback provider. */
70
- isFallback: boolean;
71
- }
72
-
73
- export class LLMRouter {
74
- private providers: Map<string, LLMProvider>;
75
- private config: RouterConfig;
76
- private circuitBreaker = new ProviderCircuitBreaker();
77
-
78
- /**
79
- * Populated during streaming with fallback so callers (e.g. WebSocket)
80
- * can inspect which provider ended up serving the stream. Reset on
81
- * every call to routeStream / executeStreamWithFallback.
82
- */
83
- lastStreamFallbackMeta: StreamFallbackMeta | null = null;
84
-
85
- constructor(config?: Partial<RouterConfig>) {
86
- this.providers = new Map();
87
- this.config = {
88
- defaultProvider: config?.defaultProvider || process.env.DEFAULT_PROVIDER || 'anthropic',
89
- defaultModel: config?.defaultModel || process.env.DEFAULT_MODEL || 'claude-sonnet-4-20250514',
90
- costOptimization: {
91
- enabled:
92
- config?.costOptimization?.enabled ?? process.env.ENABLE_COST_OPTIMIZATION === 'true',
93
- cheapModelFor: config?.costOptimization?.cheapModelFor || [
94
- 'simple_queries',
95
- 'summarization',
96
- 'classification',
97
- 'explanations',
98
- ],
99
- expensiveModelFor: config?.costOptimization?.expensiveModelFor || [
100
- 'code_generation',
101
- 'complex_reasoning',
102
- 'planning',
103
- ],
104
- cheapModel:
105
- config?.costOptimization?.cheapModel ||
106
- process.env.CHEAP_MODEL ||
107
- 'claude-haiku-4-20250514',
108
- expensiveModel:
109
- config?.costOptimization?.expensiveModel ||
110
- process.env.EXPENSIVE_MODEL ||
111
- 'claude-opus-4-20250514',
112
- },
113
- fallback: {
114
- enabled: config?.fallback?.enabled ?? process.env.DISABLE_FALLBACK !== 'true',
115
- providers:
116
- config?.fallback?.providers ||
117
- (process.env.FALLBACK_PROVIDERS?.split(',') ?? [
118
- 'anthropic',
119
- 'openai',
120
- 'openrouter',
121
- 'google',
122
- ]),
123
- },
124
- };
125
-
126
- this.initializeProviders();
127
- }
128
-
129
- /**
130
- * Initialize all available providers based on API keys, auth.json, and environment variables.
131
- *
132
- * Resolution order per provider:
133
- * 1. auth.json (~/.nimbus/auth.json) via the auth-bridge
134
- * 2. Environment variables (ANTHROPIC_API_KEY, etc.)
135
- */
136
- private initializeProviders(): void {
137
- // Lazy-import the auth-bridge to avoid circular deps at module level
138
- let isConfigured: (name: string) => boolean;
139
- let getApiKey: (name: string) => string | undefined;
140
- try {
141
- // eslint-disable-next-line @typescript-eslint/no-var-requires
142
- const bridge = require('./auth-bridge');
143
- isConfigured = bridge.isProviderConfigured;
144
- getApiKey = bridge.getProviderApiKey;
145
- } catch (err) {
146
- // Auth-bridge unavailable (open-source build) — fall back to env-only
147
- logger.debug(
148
- 'Auth-bridge unavailable, using environment variables only:',
149
- err instanceof Error ? err.message : String(err)
150
- );
151
- isConfigured = () => false;
152
- getApiKey = () => undefined;
153
- }
154
-
155
- // Anthropic
156
- if (process.env.ANTHROPIC_API_KEY || isConfigured('anthropic')) {
157
- this.providers.set('anthropic', new AnthropicProvider());
158
- logger.info('Initialized Anthropic provider');
159
- }
160
-
161
- // OpenAI
162
- if (process.env.OPENAI_API_KEY || isConfigured('openai')) {
163
- this.providers.set('openai', new OpenAIProvider());
164
- logger.info('Initialized OpenAI provider');
165
- }
166
-
167
- // Google
168
- if (process.env.GOOGLE_API_KEY || isConfigured('google')) {
169
- this.providers.set('google', new GoogleProvider());
170
- logger.info('Initialized Google provider');
171
- }
172
-
173
- // OpenRouter
174
- if (process.env.OPENROUTER_API_KEY || isConfigured('openrouter')) {
175
- this.providers.set('openrouter', new OpenRouterProvider());
176
- logger.info('Initialized OpenRouter provider');
177
- }
178
-
179
- // Ollama (only if explicitly configured via auth.json or env var)
180
- if (process.env.OLLAMA_BASE_URL || isConfigured('ollama')) {
181
- this.providers.set('ollama', new OllamaProvider());
182
- logger.info('Initialized Ollama provider');
183
- }
184
-
185
- // AWS Bedrock (uses IAM credentials from environment / instance profile)
186
- if (
187
- process.env.AWS_BEDROCK_ENABLED === 'true' ||
188
- process.env.AWS_REGION ||
189
- isConfigured('bedrock') ||
190
- (process.env.AWS_ACCESS_KEY_ID && process.env.AWS_SECRET_ACCESS_KEY)
191
- ) {
192
- this.providers.set('bedrock', new BedrockProvider());
193
- logger.info('Initialized AWS Bedrock provider');
194
- }
195
-
196
- // Groq (OpenAI-compatible)
197
- const groqKey = process.env.GROQ_API_KEY || getApiKey('groq');
198
- if (groqKey) {
199
- this.providers.set(
200
- 'groq',
201
- new OpenAICompatibleProvider({
202
- name: 'groq',
203
- apiKey: groqKey,
204
- baseURL: 'https://api.groq.com/openai/v1',
205
- defaultModel: 'llama-3.1-70b-versatile',
206
- })
207
- );
208
- logger.info('Initialized Groq provider (OpenAI-compatible)');
209
- }
210
-
211
- // Together AI (OpenAI-compatible)
212
- const togetherKey = process.env.TOGETHER_API_KEY || getApiKey('together');
213
- if (togetherKey) {
214
- this.providers.set(
215
- 'together',
216
- new OpenAICompatibleProvider({
217
- name: 'together',
218
- apiKey: togetherKey,
219
- baseURL: 'https://api.together.xyz/v1',
220
- defaultModel: 'meta-llama/Llama-3.1-70B-Instruct-Turbo',
221
- })
222
- );
223
- logger.info('Initialized Together AI provider (OpenAI-compatible)');
224
- }
225
-
226
- // DeepSeek (OpenAI-compatible)
227
- const deepseekKey = process.env.DEEPSEEK_API_KEY || getApiKey('deepseek');
228
- if (deepseekKey) {
229
- this.providers.set(
230
- 'deepseek',
231
- new OpenAICompatibleProvider({
232
- name: 'deepseek',
233
- apiKey: deepseekKey,
234
- baseURL: 'https://api.deepseek.com/v1',
235
- defaultModel: 'deepseek-chat',
236
- })
237
- );
238
- logger.info('Initialized DeepSeek provider (OpenAI-compatible)');
239
- }
240
-
241
- // Fireworks AI (OpenAI-compatible)
242
- const fireworksKey = process.env.FIREWORKS_API_KEY || getApiKey('fireworks');
243
- if (fireworksKey) {
244
- this.providers.set(
245
- 'fireworks',
246
- new OpenAICompatibleProvider({
247
- name: 'fireworks',
248
- apiKey: fireworksKey,
249
- baseURL: 'https://api.fireworks.ai/inference/v1',
250
- defaultModel: 'accounts/fireworks/models/llama-v3p1-70b-instruct',
251
- })
252
- );
253
- logger.info('Initialized Fireworks AI provider (OpenAI-compatible)');
254
- }
255
-
256
- // Perplexity (OpenAI-compatible)
257
- const perplexityKey = process.env.PERPLEXITY_API_KEY || getApiKey('perplexity');
258
- if (perplexityKey) {
259
- this.providers.set(
260
- 'perplexity',
261
- new OpenAICompatibleProvider({
262
- name: 'perplexity',
263
- apiKey: perplexityKey,
264
- baseURL: 'https://api.perplexity.ai',
265
- defaultModel: 'llama-3.1-sonar-large-128k-online',
266
- })
267
- );
268
- logger.info('Initialized Perplexity provider (OpenAI-compatible)');
269
- }
270
- }
271
-
272
- /**
273
- * Get the names of all initialized providers.
274
- */
275
- getAvailableProviders(): string[] {
276
- return [...this.providers.keys()];
277
- }
278
-
279
- /**
280
- * Get the names of providers whose circuit breakers are currently OPEN
281
- * (i.e. temporarily disabled due to consecutive failures).
282
- */
283
- getDisabledProviders(): string[] {
284
- return this.circuitBreaker.getOpenCircuits();
285
- }
286
-
287
- /**
288
- * Register a custom provider
289
- */
290
- registerProvider(provider: LLMProvider): void {
291
- this.providers.set(provider.name, provider);
292
- logger.info(`Registered custom provider: ${provider.name}`);
293
- }
294
-
295
- /**
296
- * Route a completion request to the appropriate provider
297
- */
298
- async route(request: CompletionRequest, taskType?: string): Promise<LLMResponse> {
299
- // Resolve model alias before routing
300
- if (request.model) {
301
- request.model = resolveModelAlias(request.model);
302
- }
303
-
304
- const provider = this.selectProvider(request, taskType);
305
-
306
- // Strip provider prefix after routing (APIs expect model ID without prefix)
307
- if (request.model) {
308
- request.model = stripProviderPrefix(request.model);
309
- }
310
-
311
- // Enforce token budget
312
- this.enforceTokenBudget(request);
313
-
314
- if (!provider) {
315
- throw new Error(
316
- 'No LLM provider available. Run `nimbus login` to configure a provider, or set an API key via environment variable (e.g. ANTHROPIC_API_KEY).'
317
- );
318
- }
319
-
320
- let response: LLMResponse;
321
- if (this.config.fallback.enabled) {
322
- response = await this.executeWithFallback(provider, request);
323
- } else {
324
- response = await provider.complete(request);
325
- }
326
-
327
- // Attach per-request cost calculation
328
- const cost = this.computeCost(provider.name, response);
329
- response.cost = cost;
330
-
331
- // Persist usage (fire-and-forget)
332
- if (response.usage) {
333
- this.persistUsage(response.usage, response.model, provider.name, cost);
334
- }
335
-
336
- return response;
337
- }
338
-
339
- /**
340
- * Route a streaming completion request.
341
- * Collects token usage from the final chunk and persists cost data
342
- * after the stream completes (fire-and-forget, same as route()).
343
- */
344
- async *routeStream(request: CompletionRequest, taskType?: string): AsyncIterable<StreamChunk> {
345
- // Resolve model alias before routing
346
- if (request.model) {
347
- request.model = resolveModelAlias(request.model);
348
- }
349
-
350
- // Capture `this` and config references before yield points.
351
- // TypeScript strict mode narrows `this` to `never` after yield in
352
- // async generators, so all post-yield access goes through locals.
353
- const self = this as LLMRouter;
354
- const defaultModel = self.config.defaultModel;
355
-
356
- const provider = self.selectProvider(request, taskType);
357
-
358
- // Strip provider prefix after routing (APIs expect model ID without prefix)
359
- if (request.model) {
360
- request.model = stripProviderPrefix(request.model);
361
- }
362
-
363
- // Enforce token budget
364
- self.enforceTokenBudget(request);
365
-
366
- if (!provider) {
367
- throw new Error(
368
- 'No LLM provider available. Run `nimbus login` to configure a provider, or set an API key via environment variable (e.g. ANTHROPIC_API_KEY).'
369
- );
370
- }
371
-
372
- // Reset fallback metadata
373
- self.lastStreamFallbackMeta = null;
374
-
375
- const stream = self.config.fallback.enabled
376
- ? self.executeStreamWithFallback(provider, request)
377
- : provider.stream(request);
378
-
379
- let totalContent = '';
380
- let lastUsage: StreamChunk['usage'] | undefined;
381
-
382
- for await (const chunk of stream) {
383
- if (chunk.content) {
384
- totalContent += chunk.content;
385
- }
386
- if (chunk.usage) {
387
- lastUsage = chunk.usage;
388
- }
389
- yield chunk;
390
- }
391
-
392
- // Determine which provider actually served the stream.
393
- // Use type assertion because TS control-flow analysis incorrectly
394
- // narrows lastStreamFallbackMeta to `null` -- it was mutated by
395
- // executeStreamWithFallback during iteration above.
396
- const fallbackMeta = self.lastStreamFallbackMeta as StreamFallbackMeta | null;
397
- const activeProviderName = fallbackMeta?.activeProvider ?? provider.name;
398
-
399
- // Track cost after stream completes
400
- if (lastUsage) {
401
- const model = request.model || defaultModel;
402
- const cost = calculateCost(
403
- activeProviderName,
404
- model,
405
- lastUsage.promptTokens,
406
- lastUsage.completionTokens
407
- );
408
- self.persistUsage(lastUsage, model, activeProviderName, cost);
409
- } else {
410
- // Estimate tokens from content length if no usage data
411
- const estimatedOutputTokens = Math.ceil(totalContent.length / 4);
412
- const estimatedInputTokens = request.messages.reduce(
413
- (sum, m) => sum + Math.ceil(getTextContent(m.content).length / 4),
414
- 0
415
- );
416
- const model = request.model || defaultModel;
417
- const cost = calculateCost(
418
- activeProviderName,
419
- model,
420
- estimatedInputTokens,
421
- estimatedOutputTokens
422
- );
423
- self.persistUsage(
424
- {
425
- promptTokens: estimatedInputTokens,
426
- completionTokens: estimatedOutputTokens,
427
- totalTokens: estimatedInputTokens + estimatedOutputTokens,
428
- },
429
- model,
430
- activeProviderName,
431
- cost
432
- );
433
- }
434
- }
435
-
436
- /**
437
- * Route a streaming tool completion request.
438
- * Text chunks are yielded incrementally; tool calls arrive on the final
439
- * chunk. Falls back to non-streaming completeWithTools when the selected
440
- * provider doesn't support streamWithTools.
441
- */
442
- async *routeStreamWithTools(
443
- request: ToolCompletionRequest,
444
- taskType?: string
445
- ): AsyncIterable<StreamChunk> {
446
- // Resolve model alias before routing
447
- if (request.model) {
448
- request.model = resolveModelAlias(request.model);
449
- }
450
-
451
- const self = this as LLMRouter;
452
- const defaultModel = self.config.defaultModel;
453
- const provider = self.selectProvider(request, taskType);
454
-
455
- // Strip provider prefix after routing
456
- if (request.model) {
457
- request.model = stripProviderPrefix(request.model);
458
- }
459
-
460
- self.enforceTokenBudget(request);
461
-
462
- if (!provider) {
463
- throw new Error(
464
- 'No LLM provider available. Run `nimbus login` to configure a provider, or set an API key via environment variable (e.g. ANTHROPIC_API_KEY).'
465
- );
466
- }
467
-
468
- // Use native streaming-with-tools if providers support it
469
- if (provider.streamWithTools && self.config.fallback.enabled) {
470
- // Try primary provider first, then fallbacks
471
- const fallbackProviders = self.config.fallback.providers
472
- .map(name => self.providers.get(name))
473
- .filter(Boolean) as LLMProvider[];
474
- const allProviders = [provider, ...fallbackProviders.filter(p => p !== provider)];
475
-
476
- for (const p of allProviders) {
477
- if (!p.streamWithTools || !self.circuitBreaker.isAvailable(p.name)) {
478
- continue;
479
- }
480
- try {
481
- let lastUsage: StreamChunk['usage'] | undefined;
482
- // Yield each chunk immediately (no buffering) for real-time streaming UX.
483
- for await (const chunk of p.streamWithTools(request)) {
484
- if (chunk.usage) {
485
- lastUsage = chunk.usage;
486
- }
487
- yield chunk;
488
- }
489
- // Record success after the `done: true` chunk has been received and yielded.
490
- self.circuitBreaker.recordSuccess(p.name);
491
- if (lastUsage) {
492
- const model = request.model || defaultModel;
493
- const cost = calculateCost(
494
- p.name,
495
- model,
496
- lastUsage.promptTokens,
497
- lastUsage.completionTokens
498
- );
499
- self.persistUsage(lastUsage, model, p.name, cost);
500
- }
501
- return;
502
- } catch (error) {
503
- self.circuitBreaker.recordFailure(p.name);
504
- logger.warn(`Provider ${p.name} failed for streamWithTools, trying fallback...`, {
505
- error,
506
- });
507
- continue;
508
- }
509
- }
510
- // If all providers with streamWithTools failed, fall through to non-streaming fallback below
511
- } else if (provider.streamWithTools) {
512
- // Fallback disabled — use provider directly
513
- let lastUsage: StreamChunk['usage'] | undefined;
514
- for await (const chunk of provider.streamWithTools(request)) {
515
- if (chunk.usage) {
516
- lastUsage = chunk.usage;
517
- }
518
- yield chunk;
519
- }
520
- if (lastUsage) {
521
- const model = request.model || defaultModel;
522
- const cost = calculateCost(
523
- provider.name,
524
- model,
525
- lastUsage.promptTokens,
526
- lastUsage.completionTokens
527
- );
528
- self.persistUsage(lastUsage, model, provider.name, cost);
529
- }
530
- return;
531
- }
532
-
533
- // Fallback: non-streaming completeWithTools, yield result as a single chunk
534
- const response = await provider.completeWithTools(request);
535
- const cost = self.computeCost(provider.name, response);
536
- response.cost = cost;
537
- if (response.usage) {
538
- self.persistUsage(response.usage, response.model, provider.name, cost);
539
- }
540
-
541
- if (response.content) {
542
- yield { content: response.content, done: false };
543
- }
544
- yield {
545
- done: true,
546
- toolCalls: response.toolCalls,
547
- usage: response.usage,
548
- };
549
- }
550
-
551
- /**
552
- * Route a tool completion request
553
- */
554
- async routeWithTools(request: ToolCompletionRequest, taskType?: string): Promise<LLMResponse> {
555
- // Resolve model alias before routing
556
- if (request.model) {
557
- request.model = resolveModelAlias(request.model);
558
- }
559
-
560
- const provider = this.selectProvider(request, taskType);
561
-
562
- // Strip provider prefix after routing (APIs expect model ID without prefix)
563
- if (request.model) {
564
- request.model = stripProviderPrefix(request.model);
565
- }
566
-
567
- // Enforce token budget
568
- this.enforceTokenBudget(request);
569
-
570
- if (!provider) {
571
- throw new Error(
572
- 'No LLM provider available. Run `nimbus login` to configure a provider, or set an API key via environment variable (e.g. ANTHROPIC_API_KEY).'
573
- );
574
- }
575
-
576
- let response: LLMResponse;
577
- if (this.config.fallback.enabled) {
578
- response = await this.executeToolsWithFallback(provider, request);
579
- } else {
580
- response = await provider.completeWithTools(request);
581
- }
582
-
583
- // Attach per-request cost calculation
584
- const cost = this.computeCost(provider.name, response);
585
- response.cost = cost;
586
-
587
- // Persist usage (fire-and-forget)
588
- if (response.usage) {
589
- this.persistUsage(response.usage, response.model, provider.name, cost);
590
- }
591
-
592
- return response;
593
- }
594
-
595
- /**
596
- * Get list of available models across all providers
597
- */
598
- async getAvailableModels(): Promise<Record<string, string[]>> {
599
- const models: Record<string, string[]> = {};
600
-
601
- const entries = Array.from(this.providers.entries());
602
- const results = await Promise.allSettled(
603
- entries.map(async ([name, provider]) => {
604
- const providerModels = await provider.listModels();
605
- return { name, models: providerModels };
606
- })
607
- );
608
-
609
- for (const result of results) {
610
- if (result.status === 'fulfilled') {
611
- models[result.value.name] = result.value.models;
612
- }
613
- }
614
-
615
- return models;
616
- }
617
-
618
- /**
619
- * Get provider information including availability and models.
620
- * Each registered provider is queried for its model list. If the query
621
- * succeeds the provider is marked available; otherwise it is marked
622
- * unavailable with an empty model list.
623
- */
624
- async getProviders(): Promise<ProviderInfo[]> {
625
- const entries = Array.from(this.providers.entries());
626
- const results = await Promise.allSettled(
627
- entries.map(async ([name, provider]) => {
628
- const models = await provider.listModels();
629
- return { name, available: true, models };
630
- })
631
- );
632
-
633
- const providers: ProviderInfo[] = [];
634
- for (let i = 0; i < results.length; i++) {
635
- const result = results[i];
636
- if (result.status === 'fulfilled') {
637
- providers.push(result.value);
638
- } else {
639
- providers.push({ name: entries[i][0], available: false, models: [] });
640
- }
641
- }
642
-
643
- return providers;
644
- }
645
-
646
- /**
647
- * Select the appropriate provider based on request and task type
648
- */
649
- private selectProvider(request: CompletionRequest, taskType?: string): LLMProvider | null {
650
- // If model explicitly specified, use its provider
651
- if (request.model) {
652
- const providerName = this.getProviderForModel(request.model);
653
- const provider = this.providers.get(providerName);
654
- if (provider) {
655
- logger.info(`Selected ${providerName} provider for model ${request.model}`);
656
- return provider;
657
- }
658
- }
659
-
660
- // Cost optimization
661
- if (this.config.costOptimization.enabled && taskType) {
662
- if (this.config.costOptimization.cheapModelFor.includes(taskType)) {
663
- const cheapModel = this.config.costOptimization.cheapModel;
664
- const provider = this.getProviderForModel(cheapModel)
665
- ? this.providers.get(this.getProviderForModel(cheapModel)) || this.getCheapProvider()
666
- : this.getCheapProvider();
667
- if (provider) {
668
- if (!request.model) {
669
- request.model = cheapModel;
670
- }
671
- logger.info(
672
- `Selected cheap provider ${provider.name} with model ${request.model} for task type: ${taskType}`
673
- );
674
- return provider;
675
- }
676
- }
677
- if (this.config.costOptimization.expensiveModelFor.includes(taskType)) {
678
- const expensiveModel = this.config.costOptimization.expensiveModel;
679
- const provider = this.getProviderForModel(expensiveModel)
680
- ? this.providers.get(this.getProviderForModel(expensiveModel)) ||
681
- this.getExpensiveProvider()
682
- : this.getExpensiveProvider();
683
- if (provider) {
684
- if (!request.model) {
685
- request.model = expensiveModel;
686
- }
687
- logger.info(
688
- `Selected expensive provider ${provider.name} with model ${request.model} for task type: ${taskType}`
689
- );
690
- return provider;
691
- }
692
- }
693
- }
694
-
695
- // Default provider
696
- const defaultProvider = this.providers.get(this.config.defaultProvider);
697
- if (defaultProvider) {
698
- logger.info(`Using default provider: ${this.config.defaultProvider}`);
699
- return defaultProvider;
700
- }
701
-
702
- // Fallback to any available provider
703
- const firstAvailable = Array.from(this.providers.values())[0];
704
- if (firstAvailable) {
705
- logger.warn(`No default provider, using first available: ${firstAvailable.name}`);
706
- return firstAvailable;
707
- }
708
-
709
- return null;
710
- }
711
-
712
- /**
713
- * Check whether an error is a rate-limit (429) or server error (5xx)
714
- * that should be retried with backoff before falling through.
715
- */
716
- private static isRetryableError(error: unknown): boolean {
717
- if (error && typeof error === 'object') {
718
- const errObj = error as Record<string, unknown>;
719
- const status =
720
- (typeof errObj.status === 'number' ? errObj.status : undefined) ??
721
- (typeof errObj.statusCode === 'number' ? errObj.statusCode : undefined);
722
- if (status !== undefined && (status === 429 || (status >= 500 && status < 600))) {
723
- return true;
724
- }
725
- const msg = typeof errObj.message === 'string' ? errObj.message : '';
726
- if (/rate.?limit|429|too many requests|overloaded|503/i.test(msg)) {
727
- return true;
728
- }
729
- }
730
- return false;
731
- }
732
-
733
- /**
734
- * Execute an async function with retry + exponential backoff for rate limits.
735
- * Retries up to `maxRetries` times with delays of 1s, 2s, 4s, ...
736
- */
737
- private async withRetry<T>(fn: () => Promise<T>, maxRetries = 3): Promise<T> {
738
- let lastError: unknown;
739
- for (let attempt = 0; attempt <= maxRetries; attempt++) {
740
- try {
741
- return await fn();
742
- } catch (error) {
743
- lastError = error;
744
- if (attempt < maxRetries && LLMRouter.isRetryableError(error)) {
745
- const delay = Math.min(1000 * Math.pow(2, attempt), 8000);
746
- const jitter = Math.random() * 500;
747
- logger.info(
748
- `Rate limited — retrying in ${Math.round(delay + jitter)}ms (attempt ${attempt + 1}/${maxRetries})`
749
- );
750
- await new Promise(resolve => setTimeout(resolve, delay + jitter));
751
- continue;
752
- }
753
- throw error;
754
- }
755
- }
756
- throw lastError;
757
- }
758
-
759
- /**
760
- * Execute request with fallback logic
761
- */
762
- private async executeWithFallback(
763
- primaryProvider: LLMProvider,
764
- request: CompletionRequest
765
- ): Promise<LLMResponse> {
766
- const fallbackProviders = this.config.fallback.providers
767
- .map(name => this.providers.get(name))
768
- .filter(Boolean) as LLMProvider[];
769
-
770
- const allProviders = [primaryProvider, ...fallbackProviders.filter(p => p !== primaryProvider)];
771
-
772
- for (const provider of allProviders) {
773
- if (!this.circuitBreaker.isAvailable(provider.name)) {
774
- logger.info(`Skipping ${provider.name} (circuit open)`);
775
- continue;
776
- }
777
- try {
778
- logger.info(`Attempting request with ${provider.name}`);
779
- const result = await this.withRetry(() => provider.complete(request));
780
- this.circuitBreaker.recordSuccess(provider.name);
781
- return result;
782
- } catch (error) {
783
- this.circuitBreaker.recordFailure(provider.name);
784
- logger.warn(`Provider ${provider.name} failed, trying fallback...`, { error });
785
- continue;
786
- }
787
- }
788
-
789
- throw new Error(
790
- 'All LLM providers failed. Check your API keys and network connection, or try a different provider.'
791
- );
792
- }
793
-
794
- /**
795
- * Execute tool request with fallback logic
796
- */
797
- private async executeToolsWithFallback(
798
- primaryProvider: LLMProvider,
799
- request: ToolCompletionRequest
800
- ): Promise<LLMResponse> {
801
- const fallbackProviders = this.config.fallback.providers
802
- .map(name => this.providers.get(name))
803
- .filter(Boolean) as LLMProvider[];
804
-
805
- const allProviders = [primaryProvider, ...fallbackProviders.filter(p => p !== primaryProvider)];
806
-
807
- for (const provider of allProviders) {
808
- if (!this.circuitBreaker.isAvailable(provider.name)) {
809
- logger.info(`Skipping ${provider.name} for tool request (circuit open)`);
810
- continue;
811
- }
812
- try {
813
- logger.info(`Attempting tool request with ${provider.name}`);
814
- const result = await this.withRetry(() => provider.completeWithTools(request));
815
- this.circuitBreaker.recordSuccess(provider.name);
816
- return result;
817
- } catch (error) {
818
- this.circuitBreaker.recordFailure(provider.name);
819
- logger.warn(`Provider ${provider.name} failed for tool request, trying fallback...`, {
820
- error,
821
- });
822
- continue;
823
- }
824
- }
825
-
826
- throw new Error(
827
- 'All LLM providers failed for tool request. Check your API keys and network connection, or try a different provider.'
828
- );
829
- }
830
-
831
- /**
832
- * Execute streaming request with fallback logic.
833
- *
834
- * Handles two failure modes:
835
- * 1. Provider fails before producing any chunks (e.g. auth error, rate limit) --
836
- * immediately falls through to the next provider.
837
- * 2. Provider fails mid-stream (partial chunks already buffered) -- discards
838
- * the partial output and starts fresh with the next provider.
839
- *
840
- * Chunks are buffered internally per-provider attempt. Only once a provider
841
- * completes its full stream successfully are the buffered chunks yielded to
842
- * the caller. This prevents the caller from receiving a garbled mix of
843
- * partial responses from multiple providers.
844
- */
845
- private async *executeStreamWithFallback(
846
- primaryProvider: LLMProvider,
847
- request: CompletionRequest
848
- ): AsyncIterable<StreamChunk> {
849
- // Capture `this` for use across yield points
850
- const self = this as LLMRouter;
851
-
852
- const fallbackProviders = self.config.fallback.providers
853
- .map(name => self.providers.get(name))
854
- .filter(Boolean) as LLMProvider[];
855
-
856
- const allProviders = [primaryProvider, ...fallbackProviders.filter(p => p !== primaryProvider)];
857
-
858
- let failedProvider: string | undefined;
859
-
860
- for (const provider of allProviders) {
861
- if (!self.circuitBreaker.isAvailable(provider.name)) {
862
- logger.info(`Skipping ${provider.name} for stream (circuit open)`);
863
- continue;
864
- }
865
-
866
- const bufferedChunks: StreamChunk[] = [];
867
- let streamCompleted = false;
868
-
869
- try {
870
- logger.info(`Attempting stream with ${provider.name}`);
871
-
872
- for await (const chunk of provider.stream(request)) {
873
- bufferedChunks.push(chunk);
874
-
875
- if (chunk.done) {
876
- streamCompleted = true;
877
- }
878
- }
879
-
880
- // If we got here the stream completed without throwing.
881
- // Even if there was no explicit done=true chunk we treat
882
- // exhausting the iterator as success.
883
- streamCompleted = true;
884
- self.circuitBreaker.recordSuccess(provider.name);
885
- } catch (error) {
886
- self.circuitBreaker.recordFailure(provider.name);
887
- const partialChunkCount = bufferedChunks.length;
888
- logger.warn(
889
- `Provider ${provider.name} failed for stream after ${partialChunkCount} chunk(s), trying fallback...`,
890
- { error }
891
- );
892
- failedProvider = provider.name;
893
- // Discard buffered chunks from the failed provider and try next
894
- continue;
895
- }
896
-
897
- if (streamCompleted) {
898
- // Record which provider served the response
899
- self.lastStreamFallbackMeta = {
900
- activeProvider: provider.name,
901
- failedProvider,
902
- isFallback: !!failedProvider,
903
- };
904
-
905
- if (failedProvider) {
906
- logger.info(
907
- `Stream fallback: ${failedProvider} -> ${provider.name} (${bufferedChunks.length} chunks)`
908
- );
909
- }
910
-
911
- // Yield all buffered chunks to the caller
912
- for (const chunk of bufferedChunks) {
913
- yield chunk;
914
- }
915
- return;
916
- }
917
- }
918
-
919
- throw new Error(
920
- 'All LLM providers failed for streaming request. Check your API keys and network connection, or try a different provider.'
921
- );
922
- }
923
-
924
- /**
925
- * Compute cost for a response using the cost calculator
926
- */
927
- private computeCost(providerName: string, response: LLMResponse): CostResult {
928
- return calculateCost(
929
- providerName,
930
- response.model,
931
- response.usage.promptTokens,
932
- response.usage.completionTokens
933
- );
934
- }
935
-
936
- /**
937
- * Get provider name for a specific model.
938
- * Uses the detectProvider utility for auto-detection.
939
- */
940
- private getProviderForModel(model: string): string {
941
- const detected = detectProvider(model);
942
-
943
- // If the detected provider is registered, use it
944
- if (this.providers.has(detected)) {
945
- return detected;
946
- }
947
-
948
- // For models with "/" prefix that could be OpenRouter
949
- if (model.includes('/') && this.providers.has('openrouter')) {
950
- return 'openrouter';
951
- }
952
-
953
- return this.config.defaultProvider;
954
- }
955
-
956
- /**
957
- * Get the cheapest available provider
958
- */
959
- private getCheapProvider(): LLMProvider | null {
960
- // Prefer Ollama (free) > Haiku > GPT-4o-mini
961
- return (
962
- this.providers.get('ollama') ||
963
- this.providers.get('anthropic') || // Will use Haiku in practice
964
- this.providers.get('openai') || // Will use gpt-4o-mini in practice
965
- null
966
- );
967
- }
968
-
969
- /**
970
- * Persist token usage to the embedded SQLite state layer (fire-and-forget).
971
- *
972
- * Inserts a row into the `usage` table with token counts, cost, and metadata.
973
- * Failures are logged but never propagated -- persistence is non-critical and
974
- * must not break the LLM request path.
975
- */
976
- persistUsage(
977
- usage: { promptTokens: number; completionTokens: number; totalTokens: number },
978
- model?: string,
979
- provider?: string,
980
- cost?: CostResult
981
- ): void {
982
- try {
983
- // Lazy import to avoid circular dependency between llm/ and state/
984
- import('../state/db')
985
- .then(({ getDb }) => {
986
- try {
987
- const db = getDb();
988
- const id = crypto.randomUUID();
989
- const metadata = JSON.stringify({
990
- model: model ?? null,
991
- provider: provider ?? null,
992
- prompt_tokens: usage.promptTokens,
993
- completion_tokens: usage.completionTokens,
994
- });
995
-
996
- db.run(
997
- `INSERT INTO usage (id, type, quantity, unit, cost_usd, metadata)
998
- VALUES (?, ?, ?, ?, ?, ?)`,
999
- [id, 'llm_call', usage.totalTokens, 'tokens', cost?.costUSD ?? 0, metadata]
1000
- );
1001
- } catch (err) {
1002
- logger.debug('Failed to persist LLM usage to SQLite', { error: err });
1003
- }
1004
- })
1005
- .catch(err => {
1006
- logger.debug('Failed to import state/db for usage persistence', { error: err });
1007
- });
1008
- } catch (err) {
1009
- logger.debug('Unexpected error in persistUsage', { error: err });
1010
- }
1011
- }
1012
-
1013
- /**
1014
- * Get the most capable (expensive) provider
1015
- */
1016
- private getExpensiveProvider(): LLMProvider | null {
1017
- // Prefer Claude Opus/Sonnet > GPT-4o > Gemini Pro
1018
- return (
1019
- this.providers.get('anthropic') ||
1020
- this.providers.get('openai') ||
1021
- this.providers.get('google') ||
1022
- null
1023
- );
1024
- }
1025
-
1026
- /**
1027
- * Enforce token budget on a request
1028
- */
1029
- private enforceTokenBudget(request: CompletionRequest): void {
1030
- const maxTokens = this.config.tokenBudget?.maxTokensPerRequest || 32768;
1031
- request.maxTokens = Math.min(request.maxTokens || 4096, maxTokens);
1032
- }
1033
- }
1034
-
1035
- // ---------------------------------------------------------------------------
1036
- // Gap 6: List authenticated providers for /model command
1037
- // ---------------------------------------------------------------------------
1038
-
1039
- /**
1040
- * Return the names of providers that have valid credentials configured.
1041
- * Checks both environment variables and the auth store.
1042
- */
1043
- export function listAuthenticatedProviders(): string[] {
1044
- const authenticated: string[] = [];
1045
- if (process.env.ANTHROPIC_API_KEY) authenticated.push('anthropic');
1046
- if (process.env.OPENAI_API_KEY) authenticated.push('openai');
1047
- if (process.env.GOOGLE_API_KEY || process.env.GOOGLE_GENERATIVE_AI_API_KEY) authenticated.push('google');
1048
- if (process.env.GROQ_API_KEY) authenticated.push('groq');
1049
- if (process.env.OPENROUTER_API_KEY) authenticated.push('openrouter');
1050
- if (process.env.AWS_ACCESS_KEY_ID || process.env.AWS_PROFILE) authenticated.push('bedrock');
1051
- return authenticated;
1052
- }
1053
-
1054
- // ---------------------------------------------------------------------------
1055
- // Gap 18: Multi-model routing based on task complexity
1056
- // ---------------------------------------------------------------------------
1057
-
1058
- /** Complexity tiers for automatic model selection. */
1059
- export type TaskComplexity = 'simple' | 'moderate' | 'complex';
1060
-
1061
- /**
1062
- * Classify a user message as simple, moderate, or complex.
1063
- *
1064
- * - **simple**: short status/list/describe queries (<200 chars, no code generation)
1065
- * - **complex**: long messages, code generation, architectural reasoning
1066
- * - **moderate**: everything else
1067
- */
1068
- export function classifyTaskComplexity(message: string): TaskComplexity {
1069
- const lower = message.toLowerCase().trim();
1070
-
1071
- // Simple: short status/list queries
1072
- if (
1073
- message.length < 200 &&
1074
- /^(list|show|get|check|status|what is|what are|describe|which|where|who|ping|echo)\b/.test(lower)
1075
- ) {
1076
- return 'simple';
1077
- }
1078
-
1079
- // Complex: long messages or keywords that imply heavy reasoning
1080
- if (
1081
- message.length > 500 ||
1082
- /\b(implement|design|architect|refactor|migrate|rewrite|build|create|scaffold|generate|optimize|debug|diagnose|analyze)\b/.test(lower)
1083
- ) {
1084
- return 'complex';
1085
- }
1086
-
1087
- return 'moderate';
1088
- }
1089
-
1090
- /**
1091
- * Select the appropriate model string for a given complexity level.
1092
- * If `preferredModel` is provided it always wins (user override).
1093
- */
1094
- export function routeModel(complexity: TaskComplexity, preferredModel?: string): string {
1095
- if (preferredModel) return preferredModel;
1096
- switch (complexity) {
1097
- case 'simple':
1098
- return 'anthropic/claude-haiku-4-5-20251001';
1099
- case 'complex':
1100
- return 'anthropic/claude-opus-4-6';
1101
- default:
1102
- return 'anthropic/claude-sonnet-4-20250514';
1103
- }
1104
- }