@agile-vibe-coding/avc 0.2.3 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (262) hide show
  1. package/README.md +475 -3
  2. package/cli/agents/agent-selector.md +23 -0
  3. package/cli/agents/code-implementer.md +117 -0
  4. package/cli/agents/code-validator.md +80 -0
  5. package/cli/agents/context-reviewer-epic.md +101 -0
  6. package/cli/agents/context-reviewer-story.md +92 -0
  7. package/cli/agents/context-writer-epic.md +145 -0
  8. package/cli/agents/context-writer-story.md +111 -0
  9. package/cli/agents/doc-writer-epic.md +42 -0
  10. package/cli/agents/doc-writer-story.md +43 -0
  11. package/cli/agents/duplicate-detector.md +110 -0
  12. package/cli/agents/epic-story-decomposer.md +318 -39
  13. package/cli/agents/mission-scope-generator.md +68 -4
  14. package/cli/agents/mission-scope-validator.md +40 -6
  15. package/cli/agents/project-context-extractor.md +21 -6
  16. package/cli/agents/scaffolding-generator.md +99 -0
  17. package/cli/agents/seed-validator.md +71 -0
  18. package/cli/agents/story-scope-reviewer.md +147 -0
  19. package/cli/agents/story-splitter.md +83 -0
  20. package/cli/agents/validator-documentation.json +31 -0
  21. package/cli/agents/validator-documentation.md +3 -1
  22. package/cli/api-reference-tool.js +368 -0
  23. package/cli/checks/catalog.json +76 -0
  24. package/cli/checks/code/quality.json +26 -0
  25. package/cli/checks/code/testing.json +14 -0
  26. package/cli/checks/code/traceability.json +26 -0
  27. package/cli/checks/cross-refs/epic.json +171 -0
  28. package/cli/checks/cross-refs/story.json +149 -0
  29. package/cli/checks/epic/api.json +114 -0
  30. package/cli/checks/epic/backend.json +126 -0
  31. package/cli/checks/epic/cloud.json +126 -0
  32. package/cli/checks/epic/data.json +102 -0
  33. package/cli/checks/epic/database.json +114 -0
  34. package/cli/checks/epic/developer.json +182 -0
  35. package/cli/checks/epic/devops.json +174 -0
  36. package/cli/checks/epic/frontend.json +162 -0
  37. package/cli/checks/epic/mobile.json +102 -0
  38. package/cli/checks/epic/qa.json +90 -0
  39. package/cli/checks/epic/security.json +184 -0
  40. package/cli/checks/epic/solution-architect.json +192 -0
  41. package/cli/checks/epic/test-architect.json +90 -0
  42. package/cli/checks/epic/ui.json +102 -0
  43. package/cli/checks/epic/ux.json +90 -0
  44. package/cli/checks/fixes/epic-fix-template.md +10 -0
  45. package/cli/checks/fixes/story-fix-template.md +10 -0
  46. package/cli/checks/story/api.json +186 -0
  47. package/cli/checks/story/backend.json +102 -0
  48. package/cli/checks/story/cloud.json +102 -0
  49. package/cli/checks/story/data.json +210 -0
  50. package/cli/checks/story/database.json +102 -0
  51. package/cli/checks/story/developer.json +168 -0
  52. package/cli/checks/story/devops.json +102 -0
  53. package/cli/checks/story/frontend.json +174 -0
  54. package/cli/checks/story/mobile.json +102 -0
  55. package/cli/checks/story/qa.json +210 -0
  56. package/cli/checks/story/security.json +198 -0
  57. package/cli/checks/story/solution-architect.json +230 -0
  58. package/cli/checks/story/test-architect.json +210 -0
  59. package/cli/checks/story/ui.json +102 -0
  60. package/cli/checks/story/ux.json +102 -0
  61. package/cli/coding-order.js +401 -0
  62. package/cli/dependency-checker.js +72 -0
  63. package/cli/epic-story-validator.js +284 -799
  64. package/cli/index.js +0 -0
  65. package/cli/init-model-config.js +17 -10
  66. package/cli/init.js +514 -92
  67. package/cli/kanban-server-manager.js +1 -2
  68. package/cli/llm-claude.js +98 -31
  69. package/cli/llm-gemini.js +29 -5
  70. package/cli/llm-local.js +493 -0
  71. package/cli/llm-openai.js +262 -41
  72. package/cli/llm-provider.js +147 -8
  73. package/cli/llm-token-limits.js +113 -4
  74. package/cli/llm-verifier.js +209 -1
  75. package/cli/llm-xiaomi.js +143 -0
  76. package/cli/message-constants.js +3 -12
  77. package/cli/messaging-api.js +6 -12
  78. package/cli/micro-check-fixer.js +335 -0
  79. package/cli/micro-check-runner.js +449 -0
  80. package/cli/micro-check-scorer.js +148 -0
  81. package/cli/micro-check-validator.js +538 -0
  82. package/cli/model-pricing.js +23 -0
  83. package/cli/model-selector.js +3 -2
  84. package/cli/prompt-logger.js +57 -0
  85. package/cli/repl-ink.js +106 -346
  86. package/cli/repl-old.js +1 -2
  87. package/cli/seed-processor.js +194 -24
  88. package/cli/sprint-planning-processor.js +2638 -289
  89. package/cli/template-processor.js +50 -3
  90. package/cli/token-tracker.js +50 -23
  91. package/cli/tools/generate-story-validators.js +1 -1
  92. package/cli/validation-router.js +70 -8
  93. package/cli/worktree-runner.js +654 -0
  94. package/kanban/client/dist/assets/index-D_KC5EQT.css +1 -0
  95. package/kanban/client/dist/assets/index-DjY5zqW7.js +351 -0
  96. package/kanban/client/dist/index.html +2 -2
  97. package/kanban/client/src/App.jsx +43 -14
  98. package/kanban/client/src/components/ceremony/AskArchPopup.jsx +7 -3
  99. package/kanban/client/src/components/ceremony/AskModelPopup.jsx +23 -10
  100. package/kanban/client/src/components/ceremony/CeremonyWorkflowModal.jsx +320 -133
  101. package/kanban/client/src/components/ceremony/ProviderSwitcherButton.jsx +290 -0
  102. package/kanban/client/src/components/ceremony/SponsorCallModal.jsx +80 -13
  103. package/kanban/client/src/components/ceremony/SprintPlanningModal.jsx +156 -22
  104. package/kanban/client/src/components/ceremony/steps/ArchitectureStep.jsx +11 -11
  105. package/kanban/client/src/components/ceremony/steps/CompleteStep.jsx +3 -21
  106. package/kanban/client/src/components/ceremony/steps/ReviewAnswersStep.jsx +214 -10
  107. package/kanban/client/src/components/ceremony/steps/RunningStep.jsx +23 -2
  108. package/kanban/client/src/components/kanban/CardDetailModal.jsx +97 -10
  109. package/kanban/client/src/components/kanban/GroupingSelector.jsx +7 -1
  110. package/kanban/client/src/components/kanban/KanbanCard.jsx +23 -14
  111. package/kanban/client/src/components/kanban/RefineWorkItemPopup.jsx +9 -14
  112. package/kanban/client/src/components/kanban/RunButton.jsx +162 -0
  113. package/kanban/client/src/components/kanban/SeedButton.jsx +176 -0
  114. package/kanban/client/src/components/settings/AgentsTab.jsx +103 -75
  115. package/kanban/client/src/components/settings/ApiKeysTab.jsx +31 -2
  116. package/kanban/client/src/components/settings/CeremonyModelsTab.jsx +9 -2
  117. package/kanban/client/src/components/settings/CheckEditorPopup.jsx +507 -0
  118. package/kanban/client/src/components/settings/CostThresholdsTab.jsx +3 -2
  119. package/kanban/client/src/components/settings/ModelPricingTab.jsx +72 -7
  120. package/kanban/client/src/components/settings/OpenAIAuthSection.jsx +412 -0
  121. package/kanban/client/src/components/settings/SettingsModal.jsx +4 -4
  122. package/kanban/client/src/components/stats/CostModal.jsx +34 -3
  123. package/kanban/client/src/hooks/useGrouping.js +59 -0
  124. package/kanban/client/src/lib/api.js +118 -4
  125. package/kanban/client/src/lib/status-grouping.js +10 -0
  126. package/kanban/client/src/store/kanbanStore.js +8 -0
  127. package/kanban/server/index.js +23 -2
  128. package/kanban/server/routes/ceremony.js +153 -4
  129. package/kanban/server/routes/costs.js +9 -3
  130. package/kanban/server/routes/openai-oauth.js +366 -0
  131. package/kanban/server/routes/settings.js +447 -14
  132. package/kanban/server/routes/websocket.js +7 -2
  133. package/kanban/server/routes/work-items.js +141 -1
  134. package/kanban/server/services/CeremonyService.js +275 -24
  135. package/kanban/server/services/TaskRunnerService.js +261 -0
  136. package/kanban/server/workers/run-task-worker.js +121 -0
  137. package/kanban/server/workers/seed-worker.js +94 -0
  138. package/kanban/server/workers/sponsor-call-worker.js +14 -6
  139. package/kanban/server/workers/sprint-planning-worker.js +94 -12
  140. package/package.json +2 -3
  141. package/cli/agents/solver-epic-api.json +0 -15
  142. package/cli/agents/solver-epic-api.md +0 -39
  143. package/cli/agents/solver-epic-backend.json +0 -15
  144. package/cli/agents/solver-epic-backend.md +0 -39
  145. package/cli/agents/solver-epic-cloud.json +0 -15
  146. package/cli/agents/solver-epic-cloud.md +0 -39
  147. package/cli/agents/solver-epic-data.json +0 -15
  148. package/cli/agents/solver-epic-data.md +0 -39
  149. package/cli/agents/solver-epic-database.json +0 -15
  150. package/cli/agents/solver-epic-database.md +0 -39
  151. package/cli/agents/solver-epic-developer.json +0 -15
  152. package/cli/agents/solver-epic-developer.md +0 -39
  153. package/cli/agents/solver-epic-devops.json +0 -15
  154. package/cli/agents/solver-epic-devops.md +0 -39
  155. package/cli/agents/solver-epic-frontend.json +0 -15
  156. package/cli/agents/solver-epic-frontend.md +0 -39
  157. package/cli/agents/solver-epic-mobile.json +0 -15
  158. package/cli/agents/solver-epic-mobile.md +0 -39
  159. package/cli/agents/solver-epic-qa.json +0 -15
  160. package/cli/agents/solver-epic-qa.md +0 -39
  161. package/cli/agents/solver-epic-security.json +0 -15
  162. package/cli/agents/solver-epic-security.md +0 -39
  163. package/cli/agents/solver-epic-solution-architect.json +0 -15
  164. package/cli/agents/solver-epic-solution-architect.md +0 -39
  165. package/cli/agents/solver-epic-test-architect.json +0 -15
  166. package/cli/agents/solver-epic-test-architect.md +0 -39
  167. package/cli/agents/solver-epic-ui.json +0 -15
  168. package/cli/agents/solver-epic-ui.md +0 -39
  169. package/cli/agents/solver-epic-ux.json +0 -15
  170. package/cli/agents/solver-epic-ux.md +0 -39
  171. package/cli/agents/solver-story-api.json +0 -15
  172. package/cli/agents/solver-story-api.md +0 -39
  173. package/cli/agents/solver-story-backend.json +0 -15
  174. package/cli/agents/solver-story-backend.md +0 -39
  175. package/cli/agents/solver-story-cloud.json +0 -15
  176. package/cli/agents/solver-story-cloud.md +0 -39
  177. package/cli/agents/solver-story-data.json +0 -15
  178. package/cli/agents/solver-story-data.md +0 -39
  179. package/cli/agents/solver-story-database.json +0 -15
  180. package/cli/agents/solver-story-database.md +0 -39
  181. package/cli/agents/solver-story-developer.json +0 -15
  182. package/cli/agents/solver-story-developer.md +0 -39
  183. package/cli/agents/solver-story-devops.json +0 -15
  184. package/cli/agents/solver-story-devops.md +0 -39
  185. package/cli/agents/solver-story-frontend.json +0 -15
  186. package/cli/agents/solver-story-frontend.md +0 -39
  187. package/cli/agents/solver-story-mobile.json +0 -15
  188. package/cli/agents/solver-story-mobile.md +0 -39
  189. package/cli/agents/solver-story-qa.json +0 -15
  190. package/cli/agents/solver-story-qa.md +0 -39
  191. package/cli/agents/solver-story-security.json +0 -15
  192. package/cli/agents/solver-story-security.md +0 -39
  193. package/cli/agents/solver-story-solution-architect.json +0 -15
  194. package/cli/agents/solver-story-solution-architect.md +0 -39
  195. package/cli/agents/solver-story-test-architect.json +0 -15
  196. package/cli/agents/solver-story-test-architect.md +0 -39
  197. package/cli/agents/solver-story-ui.json +0 -15
  198. package/cli/agents/solver-story-ui.md +0 -39
  199. package/cli/agents/solver-story-ux.json +0 -15
  200. package/cli/agents/solver-story-ux.md +0 -39
  201. package/cli/agents/validator-epic-api.json +0 -93
  202. package/cli/agents/validator-epic-api.md +0 -137
  203. package/cli/agents/validator-epic-backend.json +0 -93
  204. package/cli/agents/validator-epic-backend.md +0 -130
  205. package/cli/agents/validator-epic-cloud.json +0 -93
  206. package/cli/agents/validator-epic-cloud.md +0 -137
  207. package/cli/agents/validator-epic-data.json +0 -93
  208. package/cli/agents/validator-epic-data.md +0 -130
  209. package/cli/agents/validator-epic-database.json +0 -93
  210. package/cli/agents/validator-epic-database.md +0 -137
  211. package/cli/agents/validator-epic-developer.json +0 -74
  212. package/cli/agents/validator-epic-developer.md +0 -153
  213. package/cli/agents/validator-epic-devops.json +0 -74
  214. package/cli/agents/validator-epic-devops.md +0 -153
  215. package/cli/agents/validator-epic-frontend.json +0 -74
  216. package/cli/agents/validator-epic-frontend.md +0 -153
  217. package/cli/agents/validator-epic-mobile.json +0 -93
  218. package/cli/agents/validator-epic-mobile.md +0 -130
  219. package/cli/agents/validator-epic-qa.json +0 -93
  220. package/cli/agents/validator-epic-qa.md +0 -130
  221. package/cli/agents/validator-epic-security.json +0 -74
  222. package/cli/agents/validator-epic-security.md +0 -154
  223. package/cli/agents/validator-epic-solution-architect.json +0 -74
  224. package/cli/agents/validator-epic-solution-architect.md +0 -156
  225. package/cli/agents/validator-epic-test-architect.json +0 -93
  226. package/cli/agents/validator-epic-test-architect.md +0 -130
  227. package/cli/agents/validator-epic-ui.json +0 -93
  228. package/cli/agents/validator-epic-ui.md +0 -130
  229. package/cli/agents/validator-epic-ux.json +0 -93
  230. package/cli/agents/validator-epic-ux.md +0 -130
  231. package/cli/agents/validator-story-api.json +0 -104
  232. package/cli/agents/validator-story-api.md +0 -152
  233. package/cli/agents/validator-story-backend.json +0 -104
  234. package/cli/agents/validator-story-backend.md +0 -152
  235. package/cli/agents/validator-story-cloud.json +0 -104
  236. package/cli/agents/validator-story-cloud.md +0 -152
  237. package/cli/agents/validator-story-data.json +0 -104
  238. package/cli/agents/validator-story-data.md +0 -152
  239. package/cli/agents/validator-story-database.json +0 -104
  240. package/cli/agents/validator-story-database.md +0 -152
  241. package/cli/agents/validator-story-developer.json +0 -104
  242. package/cli/agents/validator-story-developer.md +0 -152
  243. package/cli/agents/validator-story-devops.json +0 -104
  244. package/cli/agents/validator-story-devops.md +0 -152
  245. package/cli/agents/validator-story-frontend.json +0 -104
  246. package/cli/agents/validator-story-frontend.md +0 -152
  247. package/cli/agents/validator-story-mobile.json +0 -104
  248. package/cli/agents/validator-story-mobile.md +0 -152
  249. package/cli/agents/validator-story-qa.json +0 -104
  250. package/cli/agents/validator-story-qa.md +0 -152
  251. package/cli/agents/validator-story-security.json +0 -104
  252. package/cli/agents/validator-story-security.md +0 -152
  253. package/cli/agents/validator-story-solution-architect.json +0 -104
  254. package/cli/agents/validator-story-solution-architect.md +0 -152
  255. package/cli/agents/validator-story-test-architect.json +0 -104
  256. package/cli/agents/validator-story-test-architect.md +0 -152
  257. package/cli/agents/validator-story-ui.json +0 -104
  258. package/cli/agents/validator-story-ui.md +0 -152
  259. package/cli/agents/validator-story-ux.json +0 -104
  260. package/cli/agents/validator-story-ux.md +0 -152
  261. package/kanban/client/dist/assets/index-CiD8PS2e.js +0 -306
  262. package/kanban/client/dist/assets/index-nLh0m82Q.css +0 -1
package/cli/llm-openai.js CHANGED
@@ -2,6 +2,9 @@ import OpenAI from 'openai';
2
2
  import { jsonrepair } from 'jsonrepair';
3
3
  import { LLMProvider } from './llm-provider.js';
4
4
  import { getMaxTokensForModel } from './llm-token-limits.js';
5
+ import fs from 'node:fs/promises';
6
+ import { existsSync } from 'node:fs';
7
+ import path from 'node:path';
5
8
 
6
9
  export class OpenAIProvider extends LLMProvider {
7
10
  constructor(model = 'gpt-5.2-chat-latest', reasoningEffort = 'medium') {
@@ -10,11 +13,116 @@ export class OpenAIProvider extends LLMProvider {
10
13
  }
11
14
 
12
15
  _createClient() {
16
+ if (process.env.OPENAI_AUTH_MODE === 'oauth') {
17
+ const oauthPath = path.join(process.cwd(), '.avc', 'openai-oauth.json');
18
+ // Only use OAuth mode if the token file actually exists — avoids per-call ENOENT warnings
19
+ if (existsSync(oauthPath)) return { mode: 'oauth' };
20
+ }
13
21
  const apiKey = process.env.OPENAI_API_KEY;
14
22
  if (!apiKey) throw new Error('OPENAI_API_KEY not set. Add it to your .env file.');
15
23
  return new OpenAI({ apiKey });
16
24
  }
17
25
 
26
+ /**
27
+ * Load OAuth tokens from .avc/openai-oauth.json, refreshing if close to expiry.
28
+ */
29
+ async _loadOAuthTokens() {
30
+ const oauthPath = path.join(process.cwd(), '.avc', 'openai-oauth.json');
31
+ const raw = await fs.readFile(oauthPath, 'utf8');
32
+ let tokens = JSON.parse(raw);
33
+
34
+ // Refresh if within 60s of expiry
35
+ if (tokens.expires - Date.now() < 60_000) {
36
+ const body = new URLSearchParams({
37
+ grant_type: 'refresh_token',
38
+ client_id: 'app_EMoamEEZ73f0CkXaXp7hrann',
39
+ refresh_token: tokens.refresh,
40
+ });
41
+ const resp = await fetch('https://auth.openai.com/oauth/token', {
42
+ method: 'POST',
43
+ headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
44
+ body: body.toString(),
45
+ });
46
+ if (!resp.ok) throw new Error(`OAuth token refresh failed: ${resp.status}`);
47
+ const refreshed = await resp.json();
48
+ tokens = {
49
+ access: refreshed.access_token,
50
+ refresh: refreshed.refresh_token || tokens.refresh,
51
+ expires: Date.now() + (refreshed.expires_in || 3600) * 1000,
52
+ accountId: tokens.accountId,
53
+ };
54
+ await fs.writeFile(oauthPath, JSON.stringify(tokens, null, 2), 'utf8');
55
+ }
56
+
57
+ return { access: tokens.access, accountId: tokens.accountId };
58
+ }
59
+
60
+ /**
61
+ * Call the ChatGPT Codex endpoint using OAuth bearer token.
62
+ */
63
+ async _callChatGPTCodex(prompt, agentInstructions) {
64
+ const { access, accountId } = await this._loadOAuthTokens();
65
+
66
+ const t0 = Date.now();
67
+ const resp = await fetch('https://chatgpt.com/backend-api/codex/responses', {
68
+ method: 'POST',
69
+ headers: {
70
+ 'Authorization': `Bearer ${access}`,
71
+ 'chatgpt-account-id': accountId,
72
+ 'Content-Type': 'application/json',
73
+ 'OpenAI-Beta': 'responses=experimental',
74
+ 'accept': 'application/json',
75
+ },
76
+ body: JSON.stringify({
77
+ model: this.model,
78
+ instructions: agentInstructions || 'You are a helpful assistant.',
79
+ input: [{ role: 'user', content: prompt }],
80
+ store: false,
81
+ stream: true,
82
+ }),
83
+ });
84
+
85
+ if (!resp.ok) {
86
+ const raw = await resp.text();
87
+ throw new Error(`ChatGPT Codex API error (${resp.status}): ${raw}`);
88
+ }
89
+
90
+ // Parse SSE stream — accumulate text from delta events; use response.done for final text + usage
91
+ const body = await resp.text();
92
+ let text = '';
93
+ let finalEvent = null;
94
+ for (const line of body.split('\n')) {
95
+ if (!line.startsWith('data: ')) continue;
96
+ const chunk = line.slice(6).trim();
97
+ if (chunk === '[DONE]') break;
98
+ try {
99
+ const event = JSON.parse(chunk);
100
+ if (event.type === 'response.output_text.delta') {
101
+ text += event.delta ?? '';
102
+ } else if (event.type === 'response.output_text.done') {
103
+ text = event.text ?? text; // prefer the complete text when available
104
+ } else if (event.type === 'response.done' || event.type === 'response.completed') {
105
+ finalEvent = event.response ?? event;
106
+ // response.done may carry output_text if delta events were absent
107
+ if (!text) {
108
+ text = finalEvent?.output_text ?? finalEvent?.output?.[0]?.content?.[0]?.text ?? '';
109
+ }
110
+ break;
111
+ }
112
+ } catch { /* skip malformed lines */ }
113
+ }
114
+ const usage = finalEvent?.usage ?? null;
115
+
116
+ this._trackTokens(usage, {
117
+ prompt,
118
+ agentInstructions: agentInstructions ?? null,
119
+ response: text,
120
+ elapsed: Date.now() - t0,
121
+ });
122
+
123
+ return text;
124
+ }
125
+
18
126
  /**
19
127
  * Determine if model uses Responses API instead of Chat Completions API
20
128
  * Models that use Responses API: gpt-5.2-pro, gpt-5.2-codex
@@ -42,11 +150,11 @@ export class OpenAIProvider extends LLMProvider {
42
150
  messages
43
151
  };
44
152
 
45
- // GPT-5+ models use max_completion_tokens, older models use max_tokens
46
- if (this.model.startsWith('gpt-5') || this.model.startsWith('o1') || this.model.startsWith('o3')) {
47
- params.max_completion_tokens = maxTokens;
48
- } else {
153
+ // max_completion_tokens is the modern unified parameter; max_tokens is only for legacy gpt-3.5-turbo
154
+ if (this.model.startsWith('gpt-3.5')) {
49
155
  params.max_tokens = maxTokens;
156
+ } else {
157
+ params.max_completion_tokens = maxTokens;
50
158
  }
51
159
 
52
160
  const response = await this._client.chat.completions.create(params);
@@ -57,8 +165,11 @@ export class OpenAIProvider extends LLMProvider {
57
165
 
58
166
  /**
59
167
  * Call using Responses API (pro/codex models)
168
+ * @param {string} prompt
169
+ * @param {string|null} systemInstructions
170
+ * @param {Object|null} [promptPayload] - Partial payload { prompt, agentInstructions } to log
60
171
  */
61
- async _callResponsesAPI(prompt, systemInstructions) {
172
+ async _callResponsesAPI(prompt, systemInstructions, promptPayload = null) {
62
173
  // Combine system instructions with prompt
63
174
  const fullInput = systemInstructions
64
175
  ? `${systemInstructions}\n\n${prompt}`
@@ -74,20 +185,33 @@ export class OpenAIProvider extends LLMProvider {
74
185
  params.reasoning = { effort: this.reasoningEffort };
75
186
  }
76
187
 
188
+ const _t0 = Date.now();
77
189
  const response = await this._withRetry(
78
190
  () => this._client.responses.create(params),
79
191
  'Responses API call'
80
192
  );
193
+ const _elapsed = Date.now() - _t0;
194
+
195
+ const text = response.output_text;
81
196
 
82
197
  // Track tokens if usage data is available
83
198
  if (response.usage) {
84
- this._trackTokens(response.usage);
199
+ const finalPayload = promptPayload ? {
200
+ ...promptPayload,
201
+ response: text,
202
+ elapsed: _elapsed,
203
+ } : null;
204
+ this._trackTokens(response.usage, finalPayload);
85
205
  }
86
206
 
87
- return response.output_text;
207
+ return text;
88
208
  }
89
209
 
90
210
  async _callProvider(prompt, maxTokens, systemInstructions) {
211
+ // OAuth mode: all calls go through the ChatGPT Codex endpoint
212
+ if (this._client?.mode === 'oauth') {
213
+ return await this._callChatGPTCodex(prompt, systemInstructions);
214
+ }
91
215
  if (this._usesResponsesAPI()) {
92
216
  return await this._callResponsesAPI(prompt, systemInstructions);
93
217
  } else {
@@ -95,17 +219,56 @@ export class OpenAIProvider extends LLMProvider {
95
219
  }
96
220
  }
97
221
 
98
- async generateJSON(prompt, agentInstructions = null) {
222
+ /** True when oauth mode is active AND fallback to api-key is enabled AND key is present */
223
+ _hasFallback() {
224
+ return process.env.OPENAI_AUTH_MODE === 'oauth'
225
+ && process.env.OPENAI_OAUTH_FALLBACK === 'true'
226
+ && !!process.env.OPENAI_API_KEY;
227
+ }
228
+
229
+ /** Create a plain OpenAI SDK client using OPENAI_API_KEY (for fallback) */
230
+ _createApiKeyClient() {
231
+ return new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
232
+ }
233
+
234
+ async generateJSON(prompt, agentInstructions = null, cachedContext = null) {
99
235
  if (!this._client) {
100
236
  this._client = this._createClient();
101
237
  }
102
238
 
239
+ // OAuth path — route through ChatGPT Codex endpoint
240
+ if (this._client?.mode === 'oauth') {
241
+ try {
242
+ const jsonInstructions = (agentInstructions ? agentInstructions + '\n\n' : '')
243
+ + 'You are a helpful assistant that always returns valid JSON. Your response must be a valid JSON object or array, nothing else.';
244
+ const text = await this._callChatGPTCodex(prompt, jsonInstructions);
245
+ let jsonStr = text.trim();
246
+ if (jsonStr.startsWith('```')) {
247
+ jsonStr = jsonStr.replace(/^```(?:json)?\s*\n?/, '').replace(/\n?\s*```\s*$/, '').trim();
248
+ }
249
+ try {
250
+ return JSON.parse(jsonStr);
251
+ } catch (firstError) {
252
+ if (jsonStr.startsWith('{') || jsonStr.startsWith('[')) {
253
+ try { return JSON.parse(jsonrepair(jsonStr)); } catch { /* fall through */ }
254
+ }
255
+ throw new Error(`Failed to parse JSON response: ${firstError.message}\n\nResponse was:\n${text}`);
256
+ }
257
+ } catch (oauthErr) {
258
+ if (!this._hasFallback()) throw oauthErr;
259
+ console.warn(`[openai] OAuth call failed, falling back to API key: ${oauthErr.message}`);
260
+ this._client = this._createApiKeyClient();
261
+ // fall through to standard paths below
262
+ }
263
+ }
264
+
103
265
  const fullPrompt = agentInstructions ? `${agentInstructions}\n\n${prompt}` : prompt;
104
266
 
105
267
  if (this._usesResponsesAPI()) {
106
268
  // Responses API: Use system instructions to enforce JSON
107
269
  const systemInstructions = 'You are a helpful assistant that always returns valid JSON. Your response must be a valid JSON object or array, nothing else.';
108
- const response = await this._callResponsesAPI(fullPrompt, systemInstructions);
270
+ const _rApiPayload = this._promptLogger ? { prompt: fullPrompt, agentInstructions: agentInstructions ?? null } : null;
271
+ const response = await this._callResponsesAPI(fullPrompt, systemInstructions, _rApiPayload);
109
272
 
110
273
  // Parse and return JSON
111
274
  let jsonStr = response.trim();
@@ -126,31 +289,35 @@ export class OpenAIProvider extends LLMProvider {
126
289
  throw new Error(`Failed to parse JSON response: ${firstError.message}\n\nResponse was:\n${response}`);
127
290
  }
128
291
  } else {
129
- // Chat Completions API: Use native JSON mode
292
+ // Chat Completions API: Use native JSON mode.
293
+ // Build system message as: JSON_SYSTEM + agentInstructions + cachedContext.
294
+ // Putting agentInstructions in the system message (not the user message) makes the full
295
+ // prefix eligible for OpenAI automatic prefix caching — identical system prefixes across
296
+ // repeated calls of the same stage type get a 90% discount after the first 1024 tokens.
297
+ const JSON_SYSTEM = 'You are a helpful assistant that always returns valid JSON. Your response must be a valid JSON object or array, nothing else.';
298
+ const systemParts = [JSON_SYSTEM];
299
+ if (agentInstructions) systemParts.push(agentInstructions);
300
+ if (cachedContext) systemParts.push(`---\n\n${cachedContext}`);
301
+ const systemContent = systemParts.join('\n\n');
302
+
130
303
  const messages = [
131
- {
132
- role: 'system',
133
- content: 'You are a helpful assistant that always returns valid JSON. Your response must be a valid JSON object or array, nothing else.'
134
- },
135
- {
136
- role: 'user',
137
- content: fullPrompt
138
- }
304
+ { role: 'system', content: systemContent },
305
+ { role: 'user', content: prompt },
139
306
  ];
140
307
 
141
308
  const params = {
142
309
  model: this.model,
143
- messages
310
+ messages,
144
311
  };
145
312
 
146
313
  // Use model-specific maximum tokens
147
314
  const maxTokens = getMaxTokensForModel(this.model);
148
315
 
149
- // GPT-5+ models use max_completion_tokens, older models use max_tokens
150
- if (this.model.startsWith('gpt-5') || this.model.startsWith('o1') || this.model.startsWith('o3')) {
151
- params.max_completion_tokens = maxTokens;
152
- } else {
316
+ // max_completion_tokens is the modern unified parameter; max_tokens is only for legacy gpt-3.5-turbo
317
+ if (this.model.startsWith('gpt-3.5')) {
153
318
  params.max_tokens = maxTokens;
319
+ } else {
320
+ params.max_completion_tokens = maxTokens;
154
321
  }
155
322
 
156
323
  // Enable JSON mode if model supports it (GPT-4+)
@@ -158,13 +325,38 @@ export class OpenAIProvider extends LLMProvider {
158
325
  params.response_format = { type: 'json_object' };
159
326
  }
160
327
 
328
+ // Extended 24-hour cache retention — free on gpt-5.x and gpt-4.1+ families.
329
+ // Keeps the system-message prefix in cache across long ceremony runs (>1 hr).
330
+ if (this.model.startsWith('gpt-5') || this.model.startsWith('gpt-4.1')) {
331
+ params.prompt_cache_retention = '24h';
332
+ }
333
+
334
+ const _t0Json = Date.now();
161
335
  const response = await this._withRetry(
162
336
  () => this._client.chat.completions.create(params),
163
337
  'JSON generation (Chat Completions)'
164
338
  );
165
339
 
166
- this._trackTokens(response.usage);
167
- const content = response.choices[0].message.content;
340
+ const choice = response.choices[0];
341
+ const content = choice.message.content;
342
+
343
+ // Detect output truncation — json_object mode returns null/empty when cut off at token limit
344
+ if (choice.finish_reason === 'length' || !content) {
345
+ const maxTok = getMaxTokensForModel(this.model);
346
+ const usedOut = response.usage?.completion_tokens ?? '?';
347
+ throw new Error(
348
+ `Response truncated at token limit (finish_reason=length). ` +
349
+ `Model: ${this.model}, limit: ${maxTok}, used: ${usedOut}. ` +
350
+ `Increase max tokens for this model in llm-token-limits.js or reduce prompt size.`
351
+ );
352
+ }
353
+
354
+ this._trackTokens(response.usage, {
355
+ prompt: fullPrompt,
356
+ agentInstructions: agentInstructions ?? null,
357
+ response: content,
358
+ elapsed: Date.now() - _t0Json,
359
+ });
168
360
 
169
361
  // Strip markdown code fences if present (defense-in-depth)
170
362
  let jsonStr = content.trim();
@@ -187,47 +379,76 @@ export class OpenAIProvider extends LLMProvider {
187
379
  }
188
380
  }
189
381
 
190
- async generateText(prompt, agentInstructions = null) {
382
+ async generateText(prompt, agentInstructions = null, cachedContext = null) {
191
383
  if (!this._client) {
192
384
  this._client = this._createClient();
193
385
  }
194
386
 
387
+ // OAuth path — route through ChatGPT Codex endpoint
388
+ if (this._client?.mode === 'oauth') {
389
+ try {
390
+ return await this._callChatGPTCodex(prompt, agentInstructions);
391
+ } catch (oauthErr) {
392
+ if (!this._hasFallback()) throw oauthErr;
393
+ console.warn(`[openai] OAuth call failed, falling back to API key: ${oauthErr.message}`);
394
+ this._client = this._createApiKeyClient();
395
+ // fall through to standard paths below
396
+ }
397
+ }
398
+
195
399
  const fullPrompt = agentInstructions ? `${agentInstructions}\n\n${prompt}` : prompt;
196
400
 
197
401
  if (this._usesResponsesAPI()) {
198
402
  // Responses API
199
- return await this._callResponsesAPI(fullPrompt, null);
403
+ const _rApiPayload = this._promptLogger ? { prompt: fullPrompt, agentInstructions: agentInstructions ?? null } : null;
404
+ return await this._callResponsesAPI(fullPrompt, null, _rApiPayload);
200
405
  } else {
201
- // Chat Completions API
202
- const messages = [
203
- {
204
- role: 'user',
205
- content: fullPrompt
206
- }
207
- ];
406
+ // Chat Completions API.
407
+ // Build system message as agentInstructions + cachedContext so both are eligible for
408
+ // OpenAI automatic prefix caching (90% discount when system prefix is stable across calls).
409
+ const systemParts = [];
410
+ if (agentInstructions) systemParts.push(agentInstructions);
411
+ if (cachedContext) systemParts.push(cachedContext);
412
+ const messages = [];
413
+ if (systemParts.length > 0) {
414
+ messages.push({ role: 'system', content: systemParts.join('\n\n') });
415
+ }
416
+ messages.push({ role: 'user', content: prompt });
208
417
 
209
418
  const params = {
210
419
  model: this.model,
211
- messages
420
+ messages,
212
421
  };
213
422
 
214
423
  // Use model-specific maximum tokens
215
424
  const maxTokens = getMaxTokensForModel(this.model);
216
425
 
217
- // GPT-5+ models use max_completion_tokens, older models use max_tokens
218
- if (this.model.startsWith('gpt-5') || this.model.startsWith('o1') || this.model.startsWith('o3')) {
219
- params.max_completion_tokens = maxTokens;
220
- } else {
426
+ // max_completion_tokens is the modern unified parameter; max_tokens is only for legacy gpt-3.5-turbo
427
+ if (this.model.startsWith('gpt-3.5')) {
221
428
  params.max_tokens = maxTokens;
429
+ } else {
430
+ params.max_completion_tokens = maxTokens;
431
+ }
432
+
433
+ // Extended 24-hour cache retention — free on gpt-5.x and gpt-4.1+ families.
434
+ if (this.model.startsWith('gpt-5') || this.model.startsWith('gpt-4.1')) {
435
+ params.prompt_cache_retention = '24h';
222
436
  }
223
437
 
438
+ const _t0Text = Date.now();
224
439
  const response = await this._withRetry(
225
440
  () => this._client.chat.completions.create(params),
226
441
  'Text generation (Chat Completions)'
227
442
  );
228
443
 
229
- this._trackTokens(response.usage);
230
- return response.choices[0].message.content;
444
+ const textContent = response.choices[0].message.content;
445
+ this._trackTokens(response.usage, {
446
+ prompt: fullPrompt,
447
+ agentInstructions: agentInstructions ?? null,
448
+ response: textContent,
449
+ elapsed: Date.now() - _t0Text,
450
+ });
451
+ return textContent;
231
452
  }
232
453
  }
233
454
  }
@@ -6,6 +6,8 @@ export class LLMProvider {
6
6
  this.tokenUsage = {
7
7
  inputTokens: 0,
8
8
  outputTokens: 0,
9
+ cachedInputTokens: 0, // cache reads (already counted inside inputTokens)
10
+ cacheWriteTokens: 0, // cache writes (billed at 1.25× on Claude, free on OpenAI/Gemini)
9
11
  totalCalls: 0
10
12
  };
11
13
 
@@ -71,7 +73,15 @@ export class LLMProvider {
71
73
  'rate limit',
72
74
  'quota exceeded',
73
75
  'resource exhausted',
74
- 'resource has been exhausted'
76
+ 'resource has been exhausted',
77
+ // Transient network/connection errors (e.g. WSL2 IPv6, momentary TCP failures)
78
+ 'connection error',
79
+ 'econnreset',
80
+ 'econnrefused',
81
+ 'enotfound',
82
+ 'etimedout',
83
+ 'network error',
84
+ 'fetch failed',
75
85
  ];
76
86
 
77
87
  const hasHighDemandMessage = highDemandPatterns.some(pattern =>
@@ -146,7 +156,8 @@ export class LLMProvider {
146
156
  // Log retry attempt with helpful info
147
157
  const retrySource = retryAfterDelay ? 'server directive' : 'exponential backoff';
148
158
  console.log(`\n⏳ ${operationName} failed (attempt ${attempt + 1}/${this.retryConfig.maxRetries + 1})`);
149
- console.log(` Error: ${error.message}`);
159
+ const causeMsg = error.cause?.message || error.cause?.code || '';
160
+ console.log(` Error: ${error.message}${causeMsg ? ` (cause: ${causeMsg})` : ''}`);
150
161
  console.log(` Retrying in ${currentDelay / 1000}s (${retrySource})...`);
151
162
 
152
163
  // Wait before retrying
@@ -174,23 +185,64 @@ export class LLMProvider {
174
185
  this._callCallbacks.push(fn);
175
186
  }
176
187
 
188
+ /**
189
+ * Attach a PromptLogger instance and set the stage label for subsequent calls.
190
+ * @param {import('./prompt-logger.js').PromptLogger} promptLogger
191
+ * @param {string} [stage]
192
+ */
193
+ setPromptLogger(promptLogger, stage) {
194
+ this._promptLogger = promptLogger;
195
+ this._promptLoggerStage = stage || 'unknown';
196
+ }
197
+
177
198
  /**
178
199
  * Track token usage from API response and fire per-call callbacks.
200
+ * Optionally writes a prompt/response payload to the attached PromptLogger.
179
201
  * @param {Object} usage - Usage object from API response
202
+ * @param {Object|null} [promptPayload] - Optional { prompt, agentInstructions, response, elapsed }
180
203
  */
181
- _trackTokens(usage) {
204
+ _trackTokens(usage, promptPayload = null) {
182
205
  if (usage) {
183
206
  const deltaIn = usage.input_tokens || usage.inputTokens || usage.promptTokenCount || usage.prompt_tokens || 0;
184
207
  const deltaOut = usage.output_tokens || usage.outputTokens || usage.candidatesTokenCount || usage.completion_tokens || 0;
185
- this.tokenUsage.inputTokens += deltaIn;
186
- this.tokenUsage.outputTokens += deltaOut;
208
+ // Cache stats — each provider uses a different field name:
209
+ // Claude: cache_read_input_tokens / cache_creation_input_tokens
210
+ // OpenAI: prompt_tokens_details.cached_tokens
211
+ // Gemini: cachedContentTokenCount
212
+ const deltaCacheRead = usage.cache_read_input_tokens
213
+ || usage.cachedContentTokenCount
214
+ || usage.prompt_tokens_details?.cached_tokens
215
+ || 0;
216
+ const deltaCacheWrite = usage.cache_creation_input_tokens || 0;
217
+ this.tokenUsage.inputTokens += deltaIn;
218
+ this.tokenUsage.outputTokens += deltaOut;
219
+ this.tokenUsage.cachedInputTokens += deltaCacheRead;
220
+ this.tokenUsage.cacheWriteTokens += deltaCacheWrite;
187
221
  this.tokenUsage.totalCalls++;
222
+ if (deltaCacheRead > 0 || deltaCacheWrite > 0) {
223
+ console.log(` [cache] write=${deltaCacheWrite} read=${deltaCacheRead} tokens`);
224
+ }
188
225
  if (this._callCallbacks.length > 0 && (deltaIn > 0 || deltaOut > 0)) {
189
- const delta = { input: deltaIn, output: deltaOut, provider: this.providerName, model: this.model };
226
+ const delta = { input: deltaIn, output: deltaOut, cached: deltaCacheRead, cacheWrite: deltaCacheWrite, provider: this.providerName, model: this.model };
190
227
  for (const fn of this._callCallbacks) {
191
228
  try { fn(delta); } catch (_) {}
192
229
  }
193
230
  }
231
+ if (this._promptLogger && promptPayload) {
232
+ this._promptLogger.write({
233
+ ceremony: this._promptLogger.ceremony,
234
+ stage: this._promptLoggerStage || 'unknown',
235
+ call: (this._promptLogger.callCount || 0) + 1,
236
+ timestamp: new Date().toISOString(),
237
+ elapsed_ms: promptPayload.elapsed ?? null,
238
+ provider: this.providerName,
239
+ model: this.model,
240
+ tokens: { input: deltaIn, output: deltaOut },
241
+ prompt: promptPayload.prompt ?? null,
242
+ agentInstructions: promptPayload.agentInstructions ?? null,
243
+ response: promptPayload.response ?? null,
244
+ });
245
+ }
194
246
  }
195
247
  }
196
248
 
@@ -205,7 +257,9 @@ export class LLMProvider {
205
257
  const pricing = {
206
258
  'claude': { input: 3.00, output: 15.00 }, // Claude Sonnet 4.5
207
259
  'gemini': { input: 0.15, output: 0.60 }, // Gemini 2.0 Flash
208
- 'openai': { input: 1.75, output: 14.00 } // GPT-5.2
260
+ 'openai': { input: 1.75, output: 14.00 }, // GPT-5.2
261
+ 'local': { input: 0, output: 0 }, // Local models — free
262
+ 'xiaomi': { input: 0.09, output: 0.29 } // MiMo V2 Flash (default)
209
263
  };
210
264
 
211
265
  const rates = pricing[this.providerName] || { input: 0, output: 0 };
@@ -216,6 +270,8 @@ export class LLMProvider {
216
270
  return {
217
271
  inputTokens: this.tokenUsage.inputTokens,
218
272
  outputTokens: this.tokenUsage.outputTokens,
273
+ cachedInputTokens: this.tokenUsage.cachedInputTokens,
274
+ cacheWriteTokens: this.tokenUsage.cacheWriteTokens,
219
275
  totalTokens: total,
220
276
  totalCalls: this.tokenUsage.totalCalls,
221
277
  estimatedCost,
@@ -224,6 +280,81 @@ export class LLMProvider {
224
280
  };
225
281
  }
226
282
 
283
+ /**
284
+ * Map of cloud providers to their required env var.
285
+ */
286
+ static PROVIDER_ENV_MAP = {
287
+ 'claude': 'ANTHROPIC_API_KEY',
288
+ 'gemini': 'GEMINI_API_KEY',
289
+ 'openai': 'OPENAI_API_KEY',
290
+ 'xiaomi': 'XIAOMI_API_KEY',
291
+ };
292
+
293
+ /**
294
+ * Default model per provider (used as fallback when switching providers).
295
+ */
296
+ static PROVIDER_DEFAULT_MODELS = {
297
+ 'claude': 'claude-sonnet-4-6',
298
+ 'gemini': 'gemini-2.5-flash',
299
+ 'openai': 'gpt-4.1',
300
+ 'xiaomi': 'MiMo-V2-Flash',
301
+ };
302
+
303
+ /**
304
+ * Check whether a provider has valid credentials available.
305
+ * For cloud providers this means the env var is set.
306
+ * For local, probe the server (with a short timeout).
307
+ * @param {string} providerName
308
+ * @returns {Promise<boolean>}
309
+ */
310
+ static async hasProviderCredentials(providerName) {
311
+ if (providerName === 'local') {
312
+ try {
313
+ const { discoverLocalServers } = await import('./llm-local.js');
314
+ const servers = await Promise.race([
315
+ discoverLocalServers(),
316
+ new Promise((_, reject) => setTimeout(() => reject(new Error('timeout')), 3000))
317
+ ]);
318
+ return servers.length > 0;
319
+ } catch {
320
+ return false;
321
+ }
322
+ }
323
+ if (providerName === 'openai') {
324
+ const oauthMode = process.env.OPENAI_AUTH_MODE === 'oauth';
325
+ return !!(process.env.OPENAI_API_KEY || (oauthMode && process.env.OPENAI_OAUTH_TOKEN));
326
+ }
327
+ const envVar = LLMProvider.PROVIDER_ENV_MAP[providerName];
328
+ return envVar ? !!process.env[envVar] : false;
329
+ }
330
+
331
+ /**
332
+ * Resolve a provider+model pair, falling back to any available provider
333
+ * if the requested one has no credentials.
334
+ * @param {string} provider - Requested provider name
335
+ * @param {string} model - Requested model name
336
+ * @returns {Promise<{ provider: string, model: string, fellBack: boolean }>}
337
+ */
338
+ static async resolveAvailableProvider(provider, model) {
339
+ // Check requested provider first
340
+ if (await LLMProvider.hasProviderCredentials(provider)) {
341
+ return { provider, model, fellBack: false };
342
+ }
343
+
344
+ // Fallback priority: claude → gemini → openai → xiaomi → local
345
+ const fallbackOrder = ['claude', 'gemini', 'openai', 'xiaomi', 'local'];
346
+ for (const candidate of fallbackOrder) {
347
+ if (candidate === provider) continue; // already checked
348
+ if (await LLMProvider.hasProviderCredentials(candidate)) {
349
+ const fallbackModel = LLMProvider.PROVIDER_DEFAULT_MODELS[candidate] || candidate;
350
+ return { provider: candidate, model: fallbackModel, fellBack: true };
351
+ }
352
+ }
353
+
354
+ // Nothing available — return original and let create() throw the usual error
355
+ return { provider, model, fellBack: false };
356
+ }
357
+
227
358
  // Factory — async because of dynamic import (only loads the SDK you need)
228
359
  static async create(providerName, model) {
229
360
  // AVC_LLM_MOCK=1: return instant mock provider for E2E testing (no API calls)
@@ -245,8 +376,16 @@ export class LLMProvider {
245
376
  const { OpenAIProvider } = await import('./llm-openai.js');
246
377
  return new OpenAIProvider(model);
247
378
  }
379
+ case 'local': {
380
+ const { LocalProvider } = await import('./llm-local.js');
381
+ return new LocalProvider(model);
382
+ }
383
+ case 'xiaomi': {
384
+ const { XiaomiProvider } = await import('./llm-xiaomi.js');
385
+ return new XiaomiProvider(model);
386
+ }
248
387
  default:
249
- throw new Error(`Unknown LLM provider: "${providerName}". Supported: claude, gemini, openai`);
388
+ throw new Error(`Unknown LLM provider: "${providerName}". Supported: claude, gemini, openai, local, xiaomi`);
250
389
  }
251
390
  }
252
391