@agile-vibe-coding/avc 0.1.1 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (239) hide show
  1. package/cli/agent-loader.js +21 -0
  2. package/cli/agents/agent-selector.md +152 -0
  3. package/cli/agents/architecture-recommender.md +418 -0
  4. package/cli/agents/code-implementer.md +117 -0
  5. package/cli/agents/code-validator.md +80 -0
  6. package/cli/agents/context-reviewer-epic.md +101 -0
  7. package/cli/agents/context-reviewer-story.md +92 -0
  8. package/cli/agents/context-writer-epic.md +145 -0
  9. package/cli/agents/context-writer-story.md +111 -0
  10. package/cli/agents/database-deep-dive.md +470 -0
  11. package/cli/agents/database-recommender.md +634 -0
  12. package/cli/agents/doc-distributor.md +176 -0
  13. package/cli/agents/doc-writer-epic.md +42 -0
  14. package/cli/agents/doc-writer-story.md +43 -0
  15. package/cli/agents/documentation-updater.md +203 -0
  16. package/cli/agents/duplicate-detector.md +110 -0
  17. package/cli/agents/epic-story-decomposer.md +559 -0
  18. package/cli/agents/feature-context-generator.md +91 -0
  19. package/cli/agents/gap-checker-epic.md +52 -0
  20. package/cli/agents/impact-checker-story.md +51 -0
  21. package/cli/agents/migration-guide-generator.md +305 -0
  22. package/cli/agents/mission-scope-generator.md +143 -0
  23. package/cli/agents/mission-scope-validator.md +146 -0
  24. package/cli/agents/project-context-extractor.md +122 -0
  25. package/cli/agents/project-documentation-creator.json +226 -0
  26. package/cli/agents/project-documentation-creator.md +595 -0
  27. package/cli/agents/question-prefiller.md +269 -0
  28. package/cli/agents/refiner-epic.md +39 -0
  29. package/cli/agents/refiner-story.md +42 -0
  30. package/cli/agents/scaffolding-generator.md +99 -0
  31. package/cli/agents/seed-validator.md +71 -0
  32. package/cli/agents/story-doc-enricher.md +133 -0
  33. package/cli/agents/story-scope-reviewer.md +147 -0
  34. package/cli/agents/story-splitter.md +83 -0
  35. package/cli/agents/suggestion-business-analyst.md +88 -0
  36. package/cli/agents/suggestion-deployment-architect.md +263 -0
  37. package/cli/agents/suggestion-product-manager.md +129 -0
  38. package/cli/agents/suggestion-security-specialist.md +156 -0
  39. package/cli/agents/suggestion-technical-architect.md +269 -0
  40. package/cli/agents/suggestion-ux-researcher.md +93 -0
  41. package/cli/agents/task-subtask-decomposer.md +188 -0
  42. package/cli/agents/validator-documentation.json +183 -0
  43. package/cli/agents/validator-documentation.md +455 -0
  44. package/cli/agents/validator-selector.md +211 -0
  45. package/cli/ansi-colors.js +21 -0
  46. package/cli/api-reference-tool.js +368 -0
  47. package/cli/build-docs.js +29 -8
  48. package/cli/ceremony-history.js +369 -0
  49. package/cli/checks/catalog.json +76 -0
  50. package/cli/checks/code/quality.json +26 -0
  51. package/cli/checks/code/testing.json +14 -0
  52. package/cli/checks/code/traceability.json +26 -0
  53. package/cli/checks/cross-refs/epic.json +171 -0
  54. package/cli/checks/cross-refs/story.json +149 -0
  55. package/cli/checks/epic/api.json +114 -0
  56. package/cli/checks/epic/backend.json +126 -0
  57. package/cli/checks/epic/cloud.json +126 -0
  58. package/cli/checks/epic/data.json +102 -0
  59. package/cli/checks/epic/database.json +114 -0
  60. package/cli/checks/epic/developer.json +182 -0
  61. package/cli/checks/epic/devops.json +174 -0
  62. package/cli/checks/epic/frontend.json +162 -0
  63. package/cli/checks/epic/mobile.json +102 -0
  64. package/cli/checks/epic/qa.json +90 -0
  65. package/cli/checks/epic/security.json +184 -0
  66. package/cli/checks/epic/solution-architect.json +192 -0
  67. package/cli/checks/epic/test-architect.json +90 -0
  68. package/cli/checks/epic/ui.json +102 -0
  69. package/cli/checks/epic/ux.json +90 -0
  70. package/cli/checks/fixes/epic-fix-template.md +10 -0
  71. package/cli/checks/fixes/story-fix-template.md +10 -0
  72. package/cli/checks/story/api.json +186 -0
  73. package/cli/checks/story/backend.json +102 -0
  74. package/cli/checks/story/cloud.json +102 -0
  75. package/cli/checks/story/data.json +210 -0
  76. package/cli/checks/story/database.json +102 -0
  77. package/cli/checks/story/developer.json +168 -0
  78. package/cli/checks/story/devops.json +102 -0
  79. package/cli/checks/story/frontend.json +174 -0
  80. package/cli/checks/story/mobile.json +102 -0
  81. package/cli/checks/story/qa.json +210 -0
  82. package/cli/checks/story/security.json +198 -0
  83. package/cli/checks/story/solution-architect.json +230 -0
  84. package/cli/checks/story/test-architect.json +210 -0
  85. package/cli/checks/story/ui.json +102 -0
  86. package/cli/checks/story/ux.json +102 -0
  87. package/cli/coding-order.js +401 -0
  88. package/cli/command-logger.js +49 -12
  89. package/cli/components/static-output.js +63 -0
  90. package/cli/console-output-manager.js +94 -0
  91. package/cli/dependency-checker.js +72 -0
  92. package/cli/docs-sync.js +306 -0
  93. package/cli/epic-story-validator.js +659 -0
  94. package/cli/evaluation-prompts.js +1008 -0
  95. package/cli/execution-context.js +195 -0
  96. package/cli/generate-summary-table.js +340 -0
  97. package/cli/init-model-config.js +704 -0
  98. package/cli/init.js +1737 -278
  99. package/cli/kanban-server-manager.js +227 -0
  100. package/cli/llm-claude.js +150 -1
  101. package/cli/llm-gemini.js +109 -0
  102. package/cli/llm-local.js +493 -0
  103. package/cli/llm-mock.js +233 -0
  104. package/cli/llm-openai.js +454 -0
  105. package/cli/llm-provider.js +379 -3
  106. package/cli/llm-token-limits.js +211 -0
  107. package/cli/llm-verifier.js +662 -0
  108. package/cli/llm-xiaomi.js +143 -0
  109. package/cli/message-constants.js +49 -0
  110. package/cli/message-manager.js +334 -0
  111. package/cli/message-types.js +96 -0
  112. package/cli/messaging-api.js +291 -0
  113. package/cli/micro-check-fixer.js +335 -0
  114. package/cli/micro-check-runner.js +449 -0
  115. package/cli/micro-check-scorer.js +148 -0
  116. package/cli/micro-check-validator.js +538 -0
  117. package/cli/model-pricing.js +192 -0
  118. package/cli/model-query-engine.js +468 -0
  119. package/cli/model-recommendation-analyzer.js +495 -0
  120. package/cli/model-selector.js +270 -0
  121. package/cli/output-buffer.js +107 -0
  122. package/cli/process-manager.js +73 -2
  123. package/cli/prompt-logger.js +57 -0
  124. package/cli/repl-ink.js +4625 -1094
  125. package/cli/repl-old.js +3 -4
  126. package/cli/seed-processor.js +962 -0
  127. package/cli/sprint-planning-processor.js +4162 -0
  128. package/cli/template-processor.js +2149 -105
  129. package/cli/templates/project.md +25 -8
  130. package/cli/templates/vitepress-config.mts.template +5 -4
  131. package/cli/token-tracker.js +547 -0
  132. package/cli/tools/generate-story-validators.js +317 -0
  133. package/cli/tools/generate-validators.js +669 -0
  134. package/cli/update-checker.js +19 -17
  135. package/cli/update-notifier.js +4 -4
  136. package/cli/validation-router.js +667 -0
  137. package/cli/verification-tracker.js +563 -0
  138. package/cli/worktree-runner.js +654 -0
  139. package/kanban/README.md +386 -0
  140. package/kanban/client/README.md +205 -0
  141. package/kanban/client/components.json +20 -0
  142. package/kanban/client/dist/assets/index-D_KC5EQT.css +1 -0
  143. package/kanban/client/dist/assets/index-DjY5zqW7.js +351 -0
  144. package/kanban/client/dist/index.html +16 -0
  145. package/kanban/client/dist/vite.svg +1 -0
  146. package/kanban/client/index.html +15 -0
  147. package/kanban/client/package-lock.json +9442 -0
  148. package/kanban/client/package.json +44 -0
  149. package/kanban/client/postcss.config.js +6 -0
  150. package/kanban/client/public/vite.svg +1 -0
  151. package/kanban/client/src/App.jsx +651 -0
  152. package/kanban/client/src/components/ProjectFileEditorPopup.jsx +117 -0
  153. package/kanban/client/src/components/ceremony/AskArchPopup.jsx +420 -0
  154. package/kanban/client/src/components/ceremony/AskModelPopup.jsx +629 -0
  155. package/kanban/client/src/components/ceremony/CeremonyWorkflowModal.jsx +1133 -0
  156. package/kanban/client/src/components/ceremony/EpicStorySelectionModal.jsx +254 -0
  157. package/kanban/client/src/components/ceremony/ProviderSwitcherButton.jsx +290 -0
  158. package/kanban/client/src/components/ceremony/SponsorCallModal.jsx +686 -0
  159. package/kanban/client/src/components/ceremony/SprintPlanningModal.jsx +838 -0
  160. package/kanban/client/src/components/ceremony/steps/ArchitectureStep.jsx +150 -0
  161. package/kanban/client/src/components/ceremony/steps/CompleteStep.jsx +136 -0
  162. package/kanban/client/src/components/ceremony/steps/DatabaseStep.jsx +202 -0
  163. package/kanban/client/src/components/ceremony/steps/DeploymentStep.jsx +123 -0
  164. package/kanban/client/src/components/ceremony/steps/MissionStep.jsx +106 -0
  165. package/kanban/client/src/components/ceremony/steps/ReviewAnswersStep.jsx +329 -0
  166. package/kanban/client/src/components/ceremony/steps/RunningStep.jsx +249 -0
  167. package/kanban/client/src/components/kanban/CardDetailModal.jsx +646 -0
  168. package/kanban/client/src/components/kanban/EpicSection.jsx +146 -0
  169. package/kanban/client/src/components/kanban/FilterToolbar.jsx +222 -0
  170. package/kanban/client/src/components/kanban/GroupingSelector.jsx +63 -0
  171. package/kanban/client/src/components/kanban/KanbanBoard.jsx +211 -0
  172. package/kanban/client/src/components/kanban/KanbanCard.jsx +147 -0
  173. package/kanban/client/src/components/kanban/KanbanColumn.jsx +90 -0
  174. package/kanban/client/src/components/kanban/RefineWorkItemPopup.jsx +784 -0
  175. package/kanban/client/src/components/kanban/RunButton.jsx +162 -0
  176. package/kanban/client/src/components/kanban/SeedButton.jsx +176 -0
  177. package/kanban/client/src/components/layout/LoadingScreen.jsx +82 -0
  178. package/kanban/client/src/components/process/ProcessMonitorBar.jsx +80 -0
  179. package/kanban/client/src/components/settings/AgentEditorPopup.jsx +171 -0
  180. package/kanban/client/src/components/settings/AgentsTab.jsx +381 -0
  181. package/kanban/client/src/components/settings/ApiKeysTab.jsx +142 -0
  182. package/kanban/client/src/components/settings/CeremonyModelsTab.jsx +105 -0
  183. package/kanban/client/src/components/settings/CheckEditorPopup.jsx +507 -0
  184. package/kanban/client/src/components/settings/CostThresholdsTab.jsx +95 -0
  185. package/kanban/client/src/components/settings/ModelPricingTab.jsx +269 -0
  186. package/kanban/client/src/components/settings/OpenAIAuthSection.jsx +412 -0
  187. package/kanban/client/src/components/settings/ServersTab.jsx +121 -0
  188. package/kanban/client/src/components/settings/SettingsModal.jsx +84 -0
  189. package/kanban/client/src/components/stats/CostModal.jsx +384 -0
  190. package/kanban/client/src/components/ui/badge.jsx +27 -0
  191. package/kanban/client/src/components/ui/dialog.jsx +121 -0
  192. package/kanban/client/src/components/ui/tabs.jsx +85 -0
  193. package/kanban/client/src/hooks/__tests__/useGrouping.test.js +232 -0
  194. package/kanban/client/src/hooks/useGrouping.js +177 -0
  195. package/kanban/client/src/hooks/useWebSocket.js +120 -0
  196. package/kanban/client/src/lib/__tests__/api.test.js +196 -0
  197. package/kanban/client/src/lib/__tests__/status-grouping.test.js +94 -0
  198. package/kanban/client/src/lib/api.js +515 -0
  199. package/kanban/client/src/lib/status-grouping.js +154 -0
  200. package/kanban/client/src/lib/utils.js +11 -0
  201. package/kanban/client/src/main.jsx +10 -0
  202. package/kanban/client/src/store/__tests__/kanbanStore.test.js +164 -0
  203. package/kanban/client/src/store/ceremonyStore.js +172 -0
  204. package/kanban/client/src/store/filterStore.js +201 -0
  205. package/kanban/client/src/store/kanbanStore.js +123 -0
  206. package/kanban/client/src/store/processStore.js +65 -0
  207. package/kanban/client/src/store/sprintPlanningStore.js +33 -0
  208. package/kanban/client/src/styles/globals.css +59 -0
  209. package/kanban/client/tailwind.config.js +77 -0
  210. package/kanban/client/vite.config.js +28 -0
  211. package/kanban/client/vitest.config.js +28 -0
  212. package/kanban/dev-start.sh +47 -0
  213. package/kanban/package.json +12 -0
  214. package/kanban/server/index.js +537 -0
  215. package/kanban/server/routes/ceremony.js +454 -0
  216. package/kanban/server/routes/costs.js +163 -0
  217. package/kanban/server/routes/openai-oauth.js +366 -0
  218. package/kanban/server/routes/processes.js +50 -0
  219. package/kanban/server/routes/settings.js +736 -0
  220. package/kanban/server/routes/websocket.js +281 -0
  221. package/kanban/server/routes/work-items.js +487 -0
  222. package/kanban/server/services/CeremonyService.js +1441 -0
  223. package/kanban/server/services/FileSystemScanner.js +95 -0
  224. package/kanban/server/services/FileWatcher.js +144 -0
  225. package/kanban/server/services/HierarchyBuilder.js +196 -0
  226. package/kanban/server/services/ProcessRegistry.js +122 -0
  227. package/kanban/server/services/TaskRunnerService.js +261 -0
  228. package/kanban/server/services/WorkItemReader.js +123 -0
  229. package/kanban/server/services/WorkItemRefineService.js +510 -0
  230. package/kanban/server/start.js +49 -0
  231. package/kanban/server/utils/kanban-logger.js +132 -0
  232. package/kanban/server/utils/markdown.js +91 -0
  233. package/kanban/server/utils/status-grouping.js +107 -0
  234. package/kanban/server/workers/run-task-worker.js +121 -0
  235. package/kanban/server/workers/seed-worker.js +94 -0
  236. package/kanban/server/workers/sponsor-call-worker.js +92 -0
  237. package/kanban/server/workers/sprint-planning-worker.js +212 -0
  238. package/package.json +19 -7
  239. package/cli/agents/documentation.md +0 -302
@@ -1,12 +1,368 @@
1
1
  export class LLMProvider {
2
- constructor(providerName, model) {
2
+ constructor(providerName, model, retryConfig = {}) {
3
3
  this.providerName = providerName;
4
4
  this.model = model;
5
5
  this._client = null;
6
+ this.tokenUsage = {
7
+ inputTokens: 0,
8
+ outputTokens: 0,
9
+ cachedInputTokens: 0, // cache reads (already counted inside inputTokens)
10
+ cacheWriteTokens: 0, // cache writes (billed at 1.25× on Claude, free on OpenAI/Gemini)
11
+ totalCalls: 0
12
+ };
13
+
14
+ // Per-call token callbacks (fired synchronously after each _trackTokens call)
15
+ this._callCallbacks = [];
16
+
17
+ // Retry configuration
18
+ this.retryConfig = {
19
+ maxRetries: retryConfig.maxRetries || 5,
20
+ initialDelay: retryConfig.initialDelay || 1000, // 1 second
21
+ maxDelay: retryConfig.maxDelay || 32000, // 32 seconds
22
+ backoffMultiplier: retryConfig.backoffMultiplier || 2
23
+ };
24
+ }
25
+
26
+ /**
27
+ * Check if error is retryable (high demand, rate limit, temporary failures)
28
+ * Provider-specific error codes:
29
+ * - OpenAI: 429 (rate limit), 503 (overloaded)
30
+ * - Claude: 429 (rate_limit_error), 529 (overloaded_error)
31
+ * - Gemini: 429 (RESOURCE_EXHAUSTED), 503 (UNAVAILABLE/overloaded)
32
+ *
33
+ * @param {Error} error - The error to check
34
+ * @returns {boolean} True if error is retryable
35
+ */
36
+ _isRetryableError(error) {
37
+ const errorMessage = error.message?.toLowerCase() || '';
38
+ const errorCode = error.status || error.code;
39
+ const errorType = error.type || error.error?.type || '';
40
+
41
+ // Check for provider-specific error codes
42
+ const retryableStatusCodes = [
43
+ 429, // Rate limit (all providers)
44
+ 503, // Service unavailable (OpenAI, Gemini)
45
+ 529 // Overloaded (Claude/Anthropic)
46
+ ];
47
+
48
+ const hasRetryableCode = retryableStatusCodes.includes(errorCode);
49
+
50
+ // Check for provider-specific error types
51
+ const retryableErrorTypes = [
52
+ 'rate_limit_error', // Claude
53
+ 'overloaded_error', // Claude
54
+ 'resource_exhausted', // Gemini
55
+ 'unavailable' // Gemini
56
+ ];
57
+
58
+ const hasRetryableType = retryableErrorTypes.some(type =>
59
+ errorType.toLowerCase().includes(type)
60
+ );
61
+
62
+ // Check for high demand/overload message patterns
63
+ const highDemandPatterns = [
64
+ 'high demand',
65
+ 'experiencing high demand',
66
+ 'spikes in demand',
67
+ 'try again later',
68
+ 'temporarily unavailable',
69
+ 'service unavailable',
70
+ 'overloaded',
71
+ 'model is overloaded',
72
+ 'too many requests',
73
+ 'rate limit',
74
+ 'quota exceeded',
75
+ 'resource exhausted',
76
+ 'resource has been exhausted',
77
+ // Transient network/connection errors (e.g. WSL2 IPv6, momentary TCP failures)
78
+ 'connection error',
79
+ 'econnreset',
80
+ 'econnrefused',
81
+ 'enotfound',
82
+ 'etimedout',
83
+ 'network error',
84
+ 'fetch failed',
85
+ ];
86
+
87
+ const hasHighDemandMessage = highDemandPatterns.some(pattern =>
88
+ errorMessage.includes(pattern)
89
+ );
90
+
91
+ return hasRetryableCode || hasRetryableType || hasHighDemandMessage;
92
+ }
93
+
94
+ /**
95
+ * Sleep for specified milliseconds
96
+ * @param {number} ms - Milliseconds to sleep
97
+ * @returns {Promise<void>}
98
+ */
99
+ _sleep(ms) {
100
+ return new Promise(resolve => setTimeout(resolve, ms));
101
+ }
102
+
103
+ /**
104
+ * Extract retry delay from error headers (e.g., Claude's retry-after header)
105
+ * @param {Error} error - The error object
106
+ * @returns {number|null} Delay in milliseconds, or null if not found
107
+ */
108
+ _getRetryAfterDelay(error) {
109
+ // Check for retry-after header (Claude provides this)
110
+ const retryAfter = error.headers?.['retry-after'] || error.error?.headers?.['retry-after'];
111
+
112
+ if (retryAfter) {
113
+ // retry-after can be in seconds
114
+ const seconds = parseInt(retryAfter, 10);
115
+ if (!isNaN(seconds)) {
116
+ return seconds * 1000; // Convert to milliseconds
117
+ }
118
+ }
119
+
120
+ return null;
121
+ }
122
+
123
+ /**
124
+ * Execute an async function with exponential backoff retry strategy
125
+ * Uses retry-after header when available (Claude), otherwise exponential backoff
126
+ *
127
+ * @param {Function} fn - Async function to execute
128
+ * @param {string} operationName - Name of operation for logging
129
+ * @returns {Promise<any>} Result of the function
130
+ */
131
+ async _withRetry(fn, operationName = 'API call') {
132
+ let lastError;
133
+ let delay = this.retryConfig.initialDelay;
134
+
135
+ for (let attempt = 0; attempt <= this.retryConfig.maxRetries; attempt++) {
136
+ try {
137
+ return await fn();
138
+ } catch (error) {
139
+ lastError = error;
140
+
141
+ // Don't retry if it's not a retryable error
142
+ if (!this._isRetryableError(error)) {
143
+ throw error;
144
+ }
145
+
146
+ // Don't retry if we've exhausted all attempts
147
+ if (attempt === this.retryConfig.maxRetries) {
148
+ console.error(`\n${operationName} failed after ${this.retryConfig.maxRetries + 1} attempts`);
149
+ throw error;
150
+ }
151
+
152
+ // Check for retry-after header (Claude provides this)
153
+ const retryAfterDelay = this._getRetryAfterDelay(error);
154
+ const currentDelay = retryAfterDelay || delay;
155
+
156
+ // Log retry attempt with helpful info
157
+ const retrySource = retryAfterDelay ? 'server directive' : 'exponential backoff';
158
+ console.log(`\n⏳ ${operationName} failed (attempt ${attempt + 1}/${this.retryConfig.maxRetries + 1})`);
159
+ const causeMsg = error.cause?.message || error.cause?.code || '';
160
+ console.log(` Error: ${error.message}${causeMsg ? ` (cause: ${causeMsg})` : ''}`);
161
+ console.log(` Retrying in ${currentDelay / 1000}s (${retrySource})...`);
162
+
163
+ // Wait before retrying
164
+ await this._sleep(currentDelay);
165
+
166
+ // Exponential backoff with max cap (only if not using retry-after)
167
+ if (!retryAfterDelay) {
168
+ delay = Math.min(
169
+ delay * this.retryConfig.backoffMultiplier,
170
+ this.retryConfig.maxDelay
171
+ );
172
+ }
173
+ }
174
+ }
175
+
176
+ throw lastError;
177
+ }
178
+
179
+ /**
180
+ * Register a callback to be fired synchronously after every LLM API call.
181
+ * Receives { input, output, provider, model } delta for that single call.
182
+ * @param {Function} fn - Callback function
183
+ */
184
+ onCall(fn) {
185
+ this._callCallbacks.push(fn);
186
+ }
187
+
188
+ /**
189
+ * Attach a PromptLogger instance and set the stage label for subsequent calls.
190
+ * @param {import('./prompt-logger.js').PromptLogger} promptLogger
191
+ * @param {string} [stage]
192
+ */
193
+ setPromptLogger(promptLogger, stage) {
194
+ this._promptLogger = promptLogger;
195
+ this._promptLoggerStage = stage || 'unknown';
196
+ }
197
+
198
+ /**
199
+ * Track token usage from API response and fire per-call callbacks.
200
+ * Optionally writes a prompt/response payload to the attached PromptLogger.
201
+ * @param {Object} usage - Usage object from API response
202
+ * @param {Object|null} [promptPayload] - Optional { prompt, agentInstructions, response, elapsed }
203
+ */
204
+ _trackTokens(usage, promptPayload = null) {
205
+ if (usage) {
206
+ const deltaIn = usage.input_tokens || usage.inputTokens || usage.promptTokenCount || usage.prompt_tokens || 0;
207
+ const deltaOut = usage.output_tokens || usage.outputTokens || usage.candidatesTokenCount || usage.completion_tokens || 0;
208
+ // Cache stats — each provider uses a different field name:
209
+ // Claude: cache_read_input_tokens / cache_creation_input_tokens
210
+ // OpenAI: prompt_tokens_details.cached_tokens
211
+ // Gemini: cachedContentTokenCount
212
+ const deltaCacheRead = usage.cache_read_input_tokens
213
+ || usage.cachedContentTokenCount
214
+ || usage.prompt_tokens_details?.cached_tokens
215
+ || 0;
216
+ const deltaCacheWrite = usage.cache_creation_input_tokens || 0;
217
+ this.tokenUsage.inputTokens += deltaIn;
218
+ this.tokenUsage.outputTokens += deltaOut;
219
+ this.tokenUsage.cachedInputTokens += deltaCacheRead;
220
+ this.tokenUsage.cacheWriteTokens += deltaCacheWrite;
221
+ this.tokenUsage.totalCalls++;
222
+ if (deltaCacheRead > 0 || deltaCacheWrite > 0) {
223
+ console.log(` [cache] write=${deltaCacheWrite} read=${deltaCacheRead} tokens`);
224
+ }
225
+ if (this._callCallbacks.length > 0 && (deltaIn > 0 || deltaOut > 0)) {
226
+ const delta = { input: deltaIn, output: deltaOut, cached: deltaCacheRead, cacheWrite: deltaCacheWrite, provider: this.providerName, model: this.model };
227
+ for (const fn of this._callCallbacks) {
228
+ try { fn(delta); } catch (_) {}
229
+ }
230
+ }
231
+ if (this._promptLogger && promptPayload) {
232
+ this._promptLogger.write({
233
+ ceremony: this._promptLogger.ceremony,
234
+ stage: this._promptLoggerStage || 'unknown',
235
+ call: (this._promptLogger.callCount || 0) + 1,
236
+ timestamp: new Date().toISOString(),
237
+ elapsed_ms: promptPayload.elapsed ?? null,
238
+ provider: this.providerName,
239
+ model: this.model,
240
+ tokens: { input: deltaIn, output: deltaOut },
241
+ prompt: promptPayload.prompt ?? null,
242
+ agentInstructions: promptPayload.agentInstructions ?? null,
243
+ response: promptPayload.response ?? null,
244
+ });
245
+ }
246
+ }
247
+ }
248
+
249
+ /**
250
+ * Get token usage statistics with cost estimate
251
+ * @returns {Object} Token usage and cost information
252
+ */
253
+ getTokenUsage() {
254
+ const total = this.tokenUsage.inputTokens + this.tokenUsage.outputTokens;
255
+
256
+ // Pricing per 1M tokens (as of 2026-02)
257
+ const pricing = {
258
+ 'claude': { input: 3.00, output: 15.00 }, // Claude Sonnet 4.5
259
+ 'gemini': { input: 0.15, output: 0.60 }, // Gemini 2.0 Flash
260
+ 'openai': { input: 1.75, output: 14.00 }, // GPT-5.2
261
+ 'local': { input: 0, output: 0 }, // Local models — free
262
+ 'xiaomi': { input: 0.09, output: 0.29 } // MiMo V2 Flash (default)
263
+ };
264
+
265
+ const rates = pricing[this.providerName] || { input: 0, output: 0 };
266
+ const inputCost = (this.tokenUsage.inputTokens / 1000000) * rates.input;
267
+ const outputCost = (this.tokenUsage.outputTokens / 1000000) * rates.output;
268
+ const estimatedCost = inputCost + outputCost;
269
+
270
+ return {
271
+ inputTokens: this.tokenUsage.inputTokens,
272
+ outputTokens: this.tokenUsage.outputTokens,
273
+ cachedInputTokens: this.tokenUsage.cachedInputTokens,
274
+ cacheWriteTokens: this.tokenUsage.cacheWriteTokens,
275
+ totalTokens: total,
276
+ totalCalls: this.tokenUsage.totalCalls,
277
+ estimatedCost,
278
+ provider: this.providerName,
279
+ model: this.model
280
+ };
281
+ }
282
+
283
+ /**
284
+ * Map of cloud providers to their required env var.
285
+ */
286
+ static PROVIDER_ENV_MAP = {
287
+ 'claude': 'ANTHROPIC_API_KEY',
288
+ 'gemini': 'GEMINI_API_KEY',
289
+ 'openai': 'OPENAI_API_KEY',
290
+ 'xiaomi': 'XIAOMI_API_KEY',
291
+ };
292
+
293
+ /**
294
+ * Default model per provider (used as fallback when switching providers).
295
+ */
296
+ static PROVIDER_DEFAULT_MODELS = {
297
+ 'claude': 'claude-sonnet-4-6',
298
+ 'gemini': 'gemini-2.5-flash',
299
+ 'openai': 'gpt-4.1',
300
+ 'xiaomi': 'MiMo-V2-Flash',
301
+ };
302
+
303
+ /**
304
+ * Check whether a provider has valid credentials available.
305
+ * For cloud providers this means the env var is set.
306
+ * For local, probe the server (with a short timeout).
307
+ * @param {string} providerName
308
+ * @returns {Promise<boolean>}
309
+ */
310
+ static async hasProviderCredentials(providerName) {
311
+ if (providerName === 'local') {
312
+ try {
313
+ const { discoverLocalServers } = await import('./llm-local.js');
314
+ const servers = await Promise.race([
315
+ discoverLocalServers(),
316
+ new Promise((_, reject) => setTimeout(() => reject(new Error('timeout')), 3000))
317
+ ]);
318
+ return servers.length > 0;
319
+ } catch {
320
+ return false;
321
+ }
322
+ }
323
+ if (providerName === 'openai') {
324
+ const oauthMode = process.env.OPENAI_AUTH_MODE === 'oauth';
325
+ return !!(process.env.OPENAI_API_KEY || (oauthMode && process.env.OPENAI_OAUTH_TOKEN));
326
+ }
327
+ const envVar = LLMProvider.PROVIDER_ENV_MAP[providerName];
328
+ return envVar ? !!process.env[envVar] : false;
329
+ }
330
+
331
+ /**
332
+ * Resolve a provider+model pair, falling back to any available provider
333
+ * if the requested one has no credentials.
334
+ * @param {string} provider - Requested provider name
335
+ * @param {string} model - Requested model name
336
+ * @returns {Promise<{ provider: string, model: string, fellBack: boolean }>}
337
+ */
338
+ static async resolveAvailableProvider(provider, model) {
339
+ // Check requested provider first
340
+ if (await LLMProvider.hasProviderCredentials(provider)) {
341
+ return { provider, model, fellBack: false };
342
+ }
343
+
344
+ // Fallback priority: claude → gemini → openai → xiaomi → local
345
+ const fallbackOrder = ['claude', 'gemini', 'openai', 'xiaomi', 'local'];
346
+ for (const candidate of fallbackOrder) {
347
+ if (candidate === provider) continue; // already checked
348
+ if (await LLMProvider.hasProviderCredentials(candidate)) {
349
+ const fallbackModel = LLMProvider.PROVIDER_DEFAULT_MODELS[candidate] || candidate;
350
+ return { provider: candidate, model: fallbackModel, fellBack: true };
351
+ }
352
+ }
353
+
354
+ // Nothing available — return original and let create() throw the usual error
355
+ return { provider, model, fellBack: false };
6
356
  }
7
357
 
8
358
  // Factory — async because of dynamic import (only loads the SDK you need)
9
359
  static async create(providerName, model) {
360
+ // AVC_LLM_MOCK=1: return instant mock provider for E2E testing (no API calls)
361
+ if (process.env.AVC_LLM_MOCK) {
362
+ const { MockLLMProvider } = await import('./llm-mock.js');
363
+ return new MockLLMProvider();
364
+ }
365
+
10
366
  switch (providerName) {
11
367
  case 'claude': {
12
368
  const { ClaudeProvider } = await import('./llm-claude.js');
@@ -16,8 +372,20 @@ export class LLMProvider {
16
372
  const { GeminiProvider } = await import('./llm-gemini.js');
17
373
  return new GeminiProvider(model);
18
374
  }
375
+ case 'openai': {
376
+ const { OpenAIProvider } = await import('./llm-openai.js');
377
+ return new OpenAIProvider(model);
378
+ }
379
+ case 'local': {
380
+ const { LocalProvider } = await import('./llm-local.js');
381
+ return new LocalProvider(model);
382
+ }
383
+ case 'xiaomi': {
384
+ const { XiaomiProvider } = await import('./llm-xiaomi.js');
385
+ return new XiaomiProvider(model);
386
+ }
19
387
  default:
20
- throw new Error(`Unknown LLM provider: "${providerName}". Supported: claude, gemini`);
388
+ throw new Error(`Unknown LLM provider: "${providerName}". Supported: claude, gemini, openai, local, xiaomi`);
21
389
  }
22
390
  }
23
391
 
@@ -26,7 +394,10 @@ export class LLMProvider {
26
394
  if (!this._client) {
27
395
  this._client = this._createClient();
28
396
  }
29
- return this._callProvider(prompt, maxTokens, systemInstructions);
397
+ return this._withRetry(
398
+ () => this._callProvider(prompt, maxTokens, systemInstructions),
399
+ 'Text generation'
400
+ );
30
401
  }
31
402
 
32
403
  // Validate API key and provider connectivity with a minimal test call
@@ -57,6 +428,11 @@ export class LLMProvider {
57
428
  }
58
429
  }
59
430
 
431
+ // Generate structured JSON output
432
+ async generateJSON(prompt, agentInstructions = null) {
433
+ throw new Error(`${this.constructor.name} must implement generateJSON()`);
434
+ }
435
+
60
436
  // Subclass hooks — throw if not overridden
61
437
  _createClient() { throw new Error(`${this.constructor.name} must implement _createClient()`); }
62
438
  async _callProvider(prompt, maxTokens, systemInstructions) { throw new Error(`${this.constructor.name} must implement _callProvider()`); }
@@ -0,0 +1,211 @@
1
+ /**
2
+ * Model-specific maximum output token limits (ACTUAL API MAXIMUMS)
3
+ *
4
+ * These limits represent the ACTUAL maximum output tokens each model can generate
5
+ * according to official API documentation (verified February 2026).
6
+ *
7
+ * Critical Updates (Feb 2026):
8
+ * - Claude 4.x models: 64K-128K tokens (8x-16x previous limits)
9
+ * - Gemini 2.5 Flash: 65,535 tokens (8x previous limit)
10
+ * - GPT-4o/mini: 16,384 tokens (unchanged)
11
+ *
12
+ * Using actual model limits ensures complete documentation generation without truncation.
13
+ *
14
+ * Sources:
15
+ * - Claude: https://docs.cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude/
16
+ * - Gemini: https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/
17
+ * - OpenAI: https://community.openai.com/t/what-is-the-token-limit-of-the-new-version-gpt-4o/752528
18
+ */
19
+
20
+ // Suppress duplicate warnings — each model warned only once per process
21
+ const _warnedModels = new Set();
22
+
23
+ export const MODEL_MAX_TOKENS = {
24
+ // Claude models (Anthropic)
25
+ // Source: https://docs.cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude/
26
+ 'claude-sonnet-4-5-20250929': 64000, // Was 8192 - ACTUAL: 64,000 tokens
27
+ 'claude-sonnet-4-5': 64000,
28
+ 'claude-sonnet-4': 64000,
29
+ 'claude-opus-4-6': 128000, // Was 16384 - ACTUAL: 128,000 tokens
30
+ 'claude-opus-4': 128000,
31
+ 'claude-haiku-4-5-20251001': 64000, // Was 8192 - ACTUAL: 64,000 tokens
32
+ 'claude-haiku-4-5': 64000,
33
+ 'claude-haiku-4': 64000,
34
+
35
+ // OpenAI models
36
+ // Source: https://community.openai.com/t/what-is-the-token-limit-of-the-new-version-gpt-4o/752528
37
+ // GPT-5 family (32K output tokens assumed; add specific overrides as official limits are confirmed)
38
+ 'gpt-5': 32768,
39
+ 'gpt-5.1': 32768,
40
+ 'gpt-5.2': 32768,
41
+ 'gpt-5.4': 32768,
42
+ 'gpt-5-mini': 32768,
43
+ 'gpt-5.2-chat-latest': 16384, // Keep at 16384 — existing tests depend on this value
44
+ 'gpt-5.2-pro': 16384,
45
+ 'gpt-5.2-codex': 16384,
46
+ 'gpt-4o': 16384, // Correct - max 16,384 tokens
47
+ 'gpt-4o-2024-11-20': 16384,
48
+ 'gpt-4o-mini': 16384, // Correct - max 16,384 tokens
49
+ 'gpt-4-turbo': 4096,
50
+ 'gpt-4': 8192,
51
+ 'gpt-3.5-turbo': 4096,
52
+
53
+ // Google Gemini models
54
+ // Source: https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/
55
+ 'gemini-3.1-pro-preview': 65536, // ACTUAL: 65,536 tokens (verified March 2026)
56
+ 'gemini-3-flash-preview': 65536, // ACTUAL: 65,536 tokens (verified March 2026)
57
+ 'gemini-2.5-pro': 65536, // ACTUAL: 65,536 tokens
58
+ 'gemini-2.5-flash': 65535, // Was 8192 - ACTUAL: 65,535 tokens
59
+ 'gemini-2.5-flash-lite': 32768, // ACTUAL: 32,768 tokens
60
+ 'gemini-2.0-flash': 8192, // Correct - max 8,192 tokens
61
+ 'gemini-2.0-flash-exp': 8192,
62
+ 'gemini-1.5-pro': 8192,
63
+ 'gemini-1.5-flash': 8192,
64
+ 'gemini-pro': 8192,
65
+
66
+ // Default fallback
67
+ 'default': 8192
68
+ };
69
+
70
+ /**
71
+ * Model context window sizes (total input + output tokens)
72
+ *
73
+ * These represent the full context window each model supports.
74
+ * For local models, we use conservative defaults since the actual
75
+ * model behind the endpoint is unknown.
76
+ */
77
+ export const MODEL_CONTEXT_WINDOWS = {
78
+ // Claude models
79
+ 'claude-sonnet-4-5': 200000,
80
+ 'claude-sonnet-4': 200000,
81
+ 'claude-opus-4-6': 200000,
82
+ 'claude-opus-4': 200000,
83
+ 'claude-haiku-4-5': 200000,
84
+ 'claude-haiku-4': 200000,
85
+
86
+ // OpenAI models
87
+ 'gpt-5': 128000,
88
+ 'gpt-5.1': 128000,
89
+ 'gpt-5.2': 128000,
90
+ 'gpt-5.4': 128000,
91
+ 'gpt-5-mini': 128000,
92
+ 'gpt-4o': 128000,
93
+ 'gpt-4o-mini': 128000,
94
+ 'gpt-4-turbo': 128000,
95
+ 'gpt-4': 8192,
96
+
97
+ // Google Gemini models
98
+ 'gemini-3.1-pro-preview': 1048576,
99
+ 'gemini-3-flash-preview': 1048576,
100
+ 'gemini-2.5-pro': 1048576,
101
+ 'gemini-2.5-flash': 1048576,
102
+ 'gemini-2.0-flash': 1048576,
103
+ 'gemini-1.5-pro': 2097152,
104
+ 'gemini-1.5-flash': 1048576,
105
+
106
+ // Default for unknown local models - conservative estimate
107
+ 'local-default': 8192,
108
+
109
+ // Xiaomi MiMo models
110
+ 'mimo-v2-flash': 262144,
111
+ 'mimo-v2-pro': 1048576,
112
+ 'mimo-v2-omni': 262144,
113
+ };
114
+
115
+ /**
116
+ * Get maximum output tokens for a specific model
117
+ * @param {string} modelId - The model identifier
118
+ * @returns {number} Maximum output tokens supported by the model
119
+ */
120
+ export function getMaxTokensForModel(modelId) {
121
+ if (!modelId) {
122
+ return MODEL_MAX_TOKENS['default'];
123
+ }
124
+
125
+ // Direct lookup
126
+ if (MODEL_MAX_TOKENS[modelId]) {
127
+ return MODEL_MAX_TOKENS[modelId];
128
+ }
129
+
130
+ // Fuzzy match for versioned models (e.g., "claude-opus-4-latest" → "claude-opus-4")
131
+ const baseModel = modelId.split('-').slice(0, 3).join('-');
132
+ if (MODEL_MAX_TOKENS[baseModel]) {
133
+ return MODEL_MAX_TOKENS[baseModel];
134
+ }
135
+
136
+ // Prefix fallback: catch any gpt-5.x variant not explicitly listed
137
+ if (modelId.startsWith('gpt-5')) {
138
+ if (!_warnedModels.has(modelId)) {
139
+ _warnedModels.add(modelId);
140
+ console.warn(`No exact max tokens for "${modelId}", using GPT-5 family default: 32768`);
141
+ }
142
+ return 32768;
143
+ }
144
+
145
+ // Fallback to default
146
+ if (!_warnedModels.has(modelId)) {
147
+ _warnedModels.add(modelId);
148
+ console.warn(`No max tokens configured for model "${modelId}", using default: ${MODEL_MAX_TOKENS['default']}`);
149
+ }
150
+ return MODEL_MAX_TOKENS['default'];
151
+ }
152
+
153
+ /**
154
+ * Get the context window size for a specific model
155
+ * @param {string} modelId - The model identifier
156
+ * @param {string} provider - The provider name ('claude', 'openai', 'gemini', 'local')
157
+ * @returns {number|null} Context window size in tokens, or null if no limit enforcement
158
+ */
159
+ export function getContextWindowForModel(modelId, provider) {
160
+ // Local provider: use conservative default, overridable via env var
161
+ if (provider === 'local') {
162
+ const envOverride = process.env.LOCAL_LLM_CONTEXT_WINDOW;
163
+ if (envOverride) {
164
+ const parsed = parseInt(envOverride, 10);
165
+ if (!isNaN(parsed) && parsed > 0) {
166
+ return parsed;
167
+ }
168
+ console.warn(`Invalid LOCAL_LLM_CONTEXT_WINDOW value "${envOverride}", using default: ${MODEL_CONTEXT_WINDOWS['local-default']}`);
169
+ }
170
+ return MODEL_CONTEXT_WINDOWS['local-default'];
171
+ }
172
+
173
+ if (!modelId) {
174
+ return null;
175
+ }
176
+
177
+ // Direct lookup
178
+ if (MODEL_CONTEXT_WINDOWS[modelId]) {
179
+ return MODEL_CONTEXT_WINDOWS[modelId];
180
+ }
181
+
182
+ // Fuzzy match for versioned models (e.g., "claude-opus-4-latest" → "claude-opus-4")
183
+ const baseModel = modelId.split('-').slice(0, 3).join('-');
184
+ if (MODEL_CONTEXT_WINDOWS[baseModel]) {
185
+ return MODEL_CONTEXT_WINDOWS[baseModel];
186
+ }
187
+
188
+ // Unknown cloud model — no limit enforcement
189
+ return null;
190
+ }
191
+
192
+ /**
193
+ * Validate if requested tokens exceed model limit
194
+ * @param {string} modelId - The model identifier
195
+ * @param {number} requestedTokens - Requested output tokens
196
+ * @returns {number} Clamped token value within model limits
197
+ */
198
+ export function clampTokensToModelLimit(modelId, requestedTokens) {
199
+ const maxTokens = getMaxTokensForModel(modelId);
200
+
201
+ if (requestedTokens > maxTokens) {
202
+ const clampKey = `clamp:${modelId}:${requestedTokens}`;
203
+ if (!_warnedModels.has(clampKey)) {
204
+ _warnedModels.add(clampKey);
205
+ console.warn(`Requested ${requestedTokens} tokens for ${modelId}, clamping to model limit: ${maxTokens}`);
206
+ }
207
+ return maxTokens;
208
+ }
209
+
210
+ return requestedTokens;
211
+ }