npm - @agile-vibe-coding/avc - Versions diffs - 0.2.3 → 0.3.2 - Mend

@agile-vibe-coding/avc 0.2.3 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (262) hide show

package/README.md +475 -3
package/cli/agents/agent-selector.md +23 -0
package/cli/agents/code-implementer.md +117 -0
package/cli/agents/code-validator.md +80 -0
package/cli/agents/context-reviewer-epic.md +101 -0
package/cli/agents/context-reviewer-story.md +92 -0
package/cli/agents/context-writer-epic.md +145 -0
package/cli/agents/context-writer-story.md +111 -0
package/cli/agents/doc-writer-epic.md +42 -0
package/cli/agents/doc-writer-story.md +43 -0
package/cli/agents/duplicate-detector.md +110 -0
package/cli/agents/epic-story-decomposer.md +318 -39
package/cli/agents/mission-scope-generator.md +68 -4
package/cli/agents/mission-scope-validator.md +40 -6
package/cli/agents/project-context-extractor.md +21 -6
package/cli/agents/scaffolding-generator.md +99 -0
package/cli/agents/seed-validator.md +71 -0
package/cli/agents/story-scope-reviewer.md +147 -0
package/cli/agents/story-splitter.md +83 -0
package/cli/agents/validator-documentation.json +31 -0
package/cli/agents/validator-documentation.md +3 -1
package/cli/api-reference-tool.js +368 -0
package/cli/checks/catalog.json +76 -0
package/cli/checks/code/quality.json +26 -0
package/cli/checks/code/testing.json +14 -0
package/cli/checks/code/traceability.json +26 -0
package/cli/checks/cross-refs/epic.json +171 -0
package/cli/checks/cross-refs/story.json +149 -0
package/cli/checks/epic/api.json +114 -0
package/cli/checks/epic/backend.json +126 -0
package/cli/checks/epic/cloud.json +126 -0
package/cli/checks/epic/data.json +102 -0
package/cli/checks/epic/database.json +114 -0
package/cli/checks/epic/developer.json +182 -0
package/cli/checks/epic/devops.json +174 -0
package/cli/checks/epic/frontend.json +162 -0
package/cli/checks/epic/mobile.json +102 -0
package/cli/checks/epic/qa.json +90 -0
package/cli/checks/epic/security.json +184 -0
package/cli/checks/epic/solution-architect.json +192 -0
package/cli/checks/epic/test-architect.json +90 -0
package/cli/checks/epic/ui.json +102 -0
package/cli/checks/epic/ux.json +90 -0
package/cli/checks/fixes/epic-fix-template.md +10 -0
package/cli/checks/fixes/story-fix-template.md +10 -0
package/cli/checks/story/api.json +186 -0
package/cli/checks/story/backend.json +102 -0
package/cli/checks/story/cloud.json +102 -0
package/cli/checks/story/data.json +210 -0
package/cli/checks/story/database.json +102 -0
package/cli/checks/story/developer.json +168 -0
package/cli/checks/story/devops.json +102 -0
package/cli/checks/story/frontend.json +174 -0
package/cli/checks/story/mobile.json +102 -0
package/cli/checks/story/qa.json +210 -0
package/cli/checks/story/security.json +198 -0
package/cli/checks/story/solution-architect.json +230 -0
package/cli/checks/story/test-architect.json +210 -0
package/cli/checks/story/ui.json +102 -0
package/cli/checks/story/ux.json +102 -0
package/cli/coding-order.js +401 -0
package/cli/dependency-checker.js +72 -0
package/cli/epic-story-validator.js +284 -799
package/cli/index.js +0 -0
package/cli/init-model-config.js +17 -10
package/cli/init.js +514 -92
package/cli/kanban-server-manager.js +1 -2
package/cli/llm-claude.js +98 -31
package/cli/llm-gemini.js +29 -5
package/cli/llm-local.js +493 -0
package/cli/llm-openai.js +262 -41
package/cli/llm-provider.js +147 -8
package/cli/llm-token-limits.js +113 -4
package/cli/llm-verifier.js +209 -1
package/cli/llm-xiaomi.js +143 -0
package/cli/message-constants.js +3 -12
package/cli/messaging-api.js +6 -12
package/cli/micro-check-fixer.js +335 -0
package/cli/micro-check-runner.js +449 -0
package/cli/micro-check-scorer.js +148 -0
package/cli/micro-check-validator.js +538 -0
package/cli/model-pricing.js +23 -0
package/cli/model-selector.js +3 -2
package/cli/prompt-logger.js +57 -0
package/cli/repl-ink.js +106 -346
package/cli/repl-old.js +1 -2
package/cli/seed-processor.js +194 -24
package/cli/sprint-planning-processor.js +2638 -289
package/cli/template-processor.js +50 -3
package/cli/token-tracker.js +50 -23
package/cli/tools/generate-story-validators.js +1 -1
package/cli/validation-router.js +70 -8
package/cli/worktree-runner.js +654 -0
package/kanban/client/dist/assets/index-D_KC5EQT.css +1 -0
package/kanban/client/dist/assets/index-DjY5zqW7.js +351 -0
package/kanban/client/dist/index.html +2 -2
package/kanban/client/src/App.jsx +43 -14
package/kanban/client/src/components/ceremony/AskArchPopup.jsx +7 -3
package/kanban/client/src/components/ceremony/AskModelPopup.jsx +23 -10
package/kanban/client/src/components/ceremony/CeremonyWorkflowModal.jsx +320 -133
package/kanban/client/src/components/ceremony/ProviderSwitcherButton.jsx +290 -0
package/kanban/client/src/components/ceremony/SponsorCallModal.jsx +80 -13
package/kanban/client/src/components/ceremony/SprintPlanningModal.jsx +156 -22
package/kanban/client/src/components/ceremony/steps/ArchitectureStep.jsx +11 -11
package/kanban/client/src/components/ceremony/steps/CompleteStep.jsx +3 -21
package/kanban/client/src/components/ceremony/steps/ReviewAnswersStep.jsx +214 -10
package/kanban/client/src/components/ceremony/steps/RunningStep.jsx +23 -2
package/kanban/client/src/components/kanban/CardDetailModal.jsx +97 -10
package/kanban/client/src/components/kanban/GroupingSelector.jsx +7 -1
package/kanban/client/src/components/kanban/KanbanCard.jsx +23 -14
package/kanban/client/src/components/kanban/RefineWorkItemPopup.jsx +9 -14
package/kanban/client/src/components/kanban/RunButton.jsx +162 -0
package/kanban/client/src/components/kanban/SeedButton.jsx +176 -0
package/kanban/client/src/components/settings/AgentsTab.jsx +103 -75
package/kanban/client/src/components/settings/ApiKeysTab.jsx +31 -2
package/kanban/client/src/components/settings/CeremonyModelsTab.jsx +9 -2
package/kanban/client/src/components/settings/CheckEditorPopup.jsx +507 -0
package/kanban/client/src/components/settings/CostThresholdsTab.jsx +3 -2
package/kanban/client/src/components/settings/ModelPricingTab.jsx +72 -7
package/kanban/client/src/components/settings/OpenAIAuthSection.jsx +412 -0
package/kanban/client/src/components/settings/SettingsModal.jsx +4 -4
package/kanban/client/src/components/stats/CostModal.jsx +34 -3
package/kanban/client/src/hooks/useGrouping.js +59 -0
package/kanban/client/src/lib/api.js +118 -4
package/kanban/client/src/lib/status-grouping.js +10 -0
package/kanban/client/src/store/kanbanStore.js +8 -0
package/kanban/server/index.js +23 -2
package/kanban/server/routes/ceremony.js +153 -4
package/kanban/server/routes/costs.js +9 -3
package/kanban/server/routes/openai-oauth.js +366 -0
package/kanban/server/routes/settings.js +447 -14
package/kanban/server/routes/websocket.js +7 -2
package/kanban/server/routes/work-items.js +141 -1
package/kanban/server/services/CeremonyService.js +275 -24
package/kanban/server/services/TaskRunnerService.js +261 -0
package/kanban/server/workers/run-task-worker.js +121 -0
package/kanban/server/workers/seed-worker.js +94 -0
package/kanban/server/workers/sponsor-call-worker.js +14 -6
package/kanban/server/workers/sprint-planning-worker.js +94 -12
package/package.json +2 -3
package/cli/agents/solver-epic-api.json +0 -15
package/cli/agents/solver-epic-api.md +0 -39
package/cli/agents/solver-epic-backend.json +0 -15
package/cli/agents/solver-epic-backend.md +0 -39
package/cli/agents/solver-epic-cloud.json +0 -15
package/cli/agents/solver-epic-cloud.md +0 -39
package/cli/agents/solver-epic-data.json +0 -15
package/cli/agents/solver-epic-data.md +0 -39
package/cli/agents/solver-epic-database.json +0 -15
package/cli/agents/solver-epic-database.md +0 -39
package/cli/agents/solver-epic-developer.json +0 -15
package/cli/agents/solver-epic-developer.md +0 -39
package/cli/agents/solver-epic-devops.json +0 -15
package/cli/agents/solver-epic-devops.md +0 -39
package/cli/agents/solver-epic-frontend.json +0 -15
package/cli/agents/solver-epic-frontend.md +0 -39
package/cli/agents/solver-epic-mobile.json +0 -15
package/cli/agents/solver-epic-mobile.md +0 -39
package/cli/agents/solver-epic-qa.json +0 -15
package/cli/agents/solver-epic-qa.md +0 -39
package/cli/agents/solver-epic-security.json +0 -15
package/cli/agents/solver-epic-security.md +0 -39
package/cli/agents/solver-epic-solution-architect.json +0 -15
package/cli/agents/solver-epic-solution-architect.md +0 -39
package/cli/agents/solver-epic-test-architect.json +0 -15
package/cli/agents/solver-epic-test-architect.md +0 -39
package/cli/agents/solver-epic-ui.json +0 -15
package/cli/agents/solver-epic-ui.md +0 -39
package/cli/agents/solver-epic-ux.json +0 -15
package/cli/agents/solver-epic-ux.md +0 -39
package/cli/agents/solver-story-api.json +0 -15
package/cli/agents/solver-story-api.md +0 -39
package/cli/agents/solver-story-backend.json +0 -15
package/cli/agents/solver-story-backend.md +0 -39
package/cli/agents/solver-story-cloud.json +0 -15
package/cli/agents/solver-story-cloud.md +0 -39
package/cli/agents/solver-story-data.json +0 -15
package/cli/agents/solver-story-data.md +0 -39
package/cli/agents/solver-story-database.json +0 -15
package/cli/agents/solver-story-database.md +0 -39
package/cli/agents/solver-story-developer.json +0 -15
package/cli/agents/solver-story-developer.md +0 -39
package/cli/agents/solver-story-devops.json +0 -15
package/cli/agents/solver-story-devops.md +0 -39
package/cli/agents/solver-story-frontend.json +0 -15
package/cli/agents/solver-story-frontend.md +0 -39
package/cli/agents/solver-story-mobile.json +0 -15
package/cli/agents/solver-story-mobile.md +0 -39
package/cli/agents/solver-story-qa.json +0 -15
package/cli/agents/solver-story-qa.md +0 -39
package/cli/agents/solver-story-security.json +0 -15
package/cli/agents/solver-story-security.md +0 -39
package/cli/agents/solver-story-solution-architect.json +0 -15
package/cli/agents/solver-story-solution-architect.md +0 -39
package/cli/agents/solver-story-test-architect.json +0 -15
package/cli/agents/solver-story-test-architect.md +0 -39
package/cli/agents/solver-story-ui.json +0 -15
package/cli/agents/solver-story-ui.md +0 -39
package/cli/agents/solver-story-ux.json +0 -15
package/cli/agents/solver-story-ux.md +0 -39
package/cli/agents/validator-epic-api.json +0 -93
package/cli/agents/validator-epic-api.md +0 -137
package/cli/agents/validator-epic-backend.json +0 -93
package/cli/agents/validator-epic-backend.md +0 -130
package/cli/agents/validator-epic-cloud.json +0 -93
package/cli/agents/validator-epic-cloud.md +0 -137
package/cli/agents/validator-epic-data.json +0 -93
package/cli/agents/validator-epic-data.md +0 -130
package/cli/agents/validator-epic-database.json +0 -93
package/cli/agents/validator-epic-database.md +0 -137
package/cli/agents/validator-epic-developer.json +0 -74
package/cli/agents/validator-epic-developer.md +0 -153
package/cli/agents/validator-epic-devops.json +0 -74
package/cli/agents/validator-epic-devops.md +0 -153
package/cli/agents/validator-epic-frontend.json +0 -74
package/cli/agents/validator-epic-frontend.md +0 -153
package/cli/agents/validator-epic-mobile.json +0 -93
package/cli/agents/validator-epic-mobile.md +0 -130
package/cli/agents/validator-epic-qa.json +0 -93
package/cli/agents/validator-epic-qa.md +0 -130
package/cli/agents/validator-epic-security.json +0 -74
package/cli/agents/validator-epic-security.md +0 -154
package/cli/agents/validator-epic-solution-architect.json +0 -74
package/cli/agents/validator-epic-solution-architect.md +0 -156
package/cli/agents/validator-epic-test-architect.json +0 -93
package/cli/agents/validator-epic-test-architect.md +0 -130
package/cli/agents/validator-epic-ui.json +0 -93
package/cli/agents/validator-epic-ui.md +0 -130
package/cli/agents/validator-epic-ux.json +0 -93
package/cli/agents/validator-epic-ux.md +0 -130
package/cli/agents/validator-story-api.json +0 -104
package/cli/agents/validator-story-api.md +0 -152
package/cli/agents/validator-story-backend.json +0 -104
package/cli/agents/validator-story-backend.md +0 -152
package/cli/agents/validator-story-cloud.json +0 -104
package/cli/agents/validator-story-cloud.md +0 -152
package/cli/agents/validator-story-data.json +0 -104
package/cli/agents/validator-story-data.md +0 -152
package/cli/agents/validator-story-database.json +0 -104
package/cli/agents/validator-story-database.md +0 -152
package/cli/agents/validator-story-developer.json +0 -104
package/cli/agents/validator-story-developer.md +0 -152
package/cli/agents/validator-story-devops.json +0 -104
package/cli/agents/validator-story-devops.md +0 -152
package/cli/agents/validator-story-frontend.json +0 -104
package/cli/agents/validator-story-frontend.md +0 -152
package/cli/agents/validator-story-mobile.json +0 -104
package/cli/agents/validator-story-mobile.md +0 -152
package/cli/agents/validator-story-qa.json +0 -104
package/cli/agents/validator-story-qa.md +0 -152
package/cli/agents/validator-story-security.json +0 -104
package/cli/agents/validator-story-security.md +0 -152
package/cli/agents/validator-story-solution-architect.json +0 -104
package/cli/agents/validator-story-solution-architect.md +0 -152
package/cli/agents/validator-story-test-architect.json +0 -104
package/cli/agents/validator-story-test-architect.md +0 -152
package/cli/agents/validator-story-ui.json +0 -104
package/cli/agents/validator-story-ui.md +0 -152
package/cli/agents/validator-story-ux.json +0 -104
package/cli/agents/validator-story-ux.md +0 -152
package/kanban/client/dist/assets/index-CiD8PS2e.js +0 -306
package/kanban/client/dist/assets/index-nLh0m82Q.css +0 -1

package/cli/llm-openai.js CHANGED Viewed

@@ -2,6 +2,9 @@ import OpenAI from 'openai';
 import { jsonrepair } from 'jsonrepair';
 import { LLMProvider } from './llm-provider.js';
 import { getMaxTokensForModel } from './llm-token-limits.js';
+import fs from 'node:fs/promises';
+import { existsSync } from 'node:fs';
+import path from 'node:path';
 export class OpenAIProvider extends LLMProvider {
   constructor(model = 'gpt-5.2-chat-latest', reasoningEffort = 'medium') {
@@ -10,11 +13,116 @@ export class OpenAIProvider extends LLMProvider {
   }
   _createClient() {
+    if (process.env.OPENAI_AUTH_MODE === 'oauth') {
+      const oauthPath = path.join(process.cwd(), '.avc', 'openai-oauth.json');
+      // Only use OAuth mode if the token file actually exists — avoids per-call ENOENT warnings
+      if (existsSync(oauthPath)) return { mode: 'oauth' };
+    }
     const apiKey = process.env.OPENAI_API_KEY;
     if (!apiKey) throw new Error('OPENAI_API_KEY not set. Add it to your .env file.');
     return new OpenAI({ apiKey });
   }
+  /**
+   * Load OAuth tokens from .avc/openai-oauth.json, refreshing if close to expiry.
+   */
+  async _loadOAuthTokens() {
+    const oauthPath = path.join(process.cwd(), '.avc', 'openai-oauth.json');
+    const raw = await fs.readFile(oauthPath, 'utf8');
+    let tokens = JSON.parse(raw);
+    // Refresh if within 60s of expiry
+    if (tokens.expires - Date.now() < 60_000) {
+      const body = new URLSearchParams({
+        grant_type:    'refresh_token',
+        client_id:     'app_EMoamEEZ73f0CkXaXp7hrann',
+        refresh_token: tokens.refresh,
+      });
+      const resp = await fetch('https://auth.openai.com/oauth/token', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
+        body: body.toString(),
+      });
+      if (!resp.ok) throw new Error(`OAuth token refresh failed: ${resp.status}`);
+      const refreshed = await resp.json();
+      tokens = {
+        access:    refreshed.access_token,
+        refresh:   refreshed.refresh_token || tokens.refresh,
+        expires:   Date.now() + (refreshed.expires_in || 3600) * 1000,
+        accountId: tokens.accountId,
+      };
+      await fs.writeFile(oauthPath, JSON.stringify(tokens, null, 2), 'utf8');
+    }
+    return { access: tokens.access, accountId: tokens.accountId };
+  }
+  /**
+   * Call the ChatGPT Codex endpoint using OAuth bearer token.
+   */
+  async _callChatGPTCodex(prompt, agentInstructions) {
+    const { access, accountId } = await this._loadOAuthTokens();
+    const t0 = Date.now();
+    const resp = await fetch('https://chatgpt.com/backend-api/codex/responses', {
+      method: 'POST',
+      headers: {
+        'Authorization':       `Bearer ${access}`,
+        'chatgpt-account-id':  accountId,
+        'Content-Type':        'application/json',
+        'OpenAI-Beta':         'responses=experimental',
+        'accept':              'application/json',
+      },
+      body: JSON.stringify({
+        model:        this.model,
+        instructions: agentInstructions || 'You are a helpful assistant.',
+        input:        [{ role: 'user', content: prompt }],
+        store:        false,
+        stream:       true,
+      }),
+    });
+    if (!resp.ok) {
+      const raw = await resp.text();
+      throw new Error(`ChatGPT Codex API error (${resp.status}): ${raw}`);
+    }
+    // Parse SSE stream — accumulate text from delta events; use response.done for final text + usage
+    const body = await resp.text();
+    let text = '';
+    let finalEvent = null;
+    for (const line of body.split('\n')) {
+      if (!line.startsWith('data: ')) continue;
+      const chunk = line.slice(6).trim();
+      if (chunk === '[DONE]') break;
+      try {
+        const event = JSON.parse(chunk);
+        if (event.type === 'response.output_text.delta') {
+          text += event.delta ?? '';
+        } else if (event.type === 'response.output_text.done') {
+          text = event.text ?? text;   // prefer the complete text when available
+        } else if (event.type === 'response.done' || event.type === 'response.completed') {
+          finalEvent = event.response ?? event;
+          // response.done may carry output_text if delta events were absent
+          if (!text) {
+            text = finalEvent?.output_text ?? finalEvent?.output?.[0]?.content?.[0]?.text ?? '';
+          }
+          break;
+        }
+      } catch { /* skip malformed lines */ }
+    }
+    const usage = finalEvent?.usage ?? null;
+    this._trackTokens(usage, {
+      prompt,
+      agentInstructions: agentInstructions ?? null,
+      response: text,
+      elapsed: Date.now() - t0,
+    });
+    return text;
+  }
   /**
    * Determine if model uses Responses API instead of Chat Completions API
    * Models that use Responses API: gpt-5.2-pro, gpt-5.2-codex
@@ -42,11 +150,11 @@ export class OpenAIProvider extends LLMProvider {
       messages
     };
-    // GPT-5+ models use max_completion_tokens, older models use max_tokens
-    if (this.model.startsWith('gpt-5') || this.model.startsWith('o1') || this.model.startsWith('o3')) {
-      params.max_completion_tokens = maxTokens;
-    } else {
+    // max_completion_tokens is the modern unified parameter; max_tokens is only for legacy gpt-3.5-turbo
+    if (this.model.startsWith('gpt-3.5')) {
       params.max_tokens = maxTokens;
+    } else {
+      params.max_completion_tokens = maxTokens;
     }
     const response = await this._client.chat.completions.create(params);
@@ -57,8 +165,11 @@ export class OpenAIProvider extends LLMProvider {
   /**
    * Call using Responses API (pro/codex models)
+   * @param {string} prompt
+   * @param {string|null} systemInstructions
+   * @param {Object|null} [promptPayload] - Partial payload { prompt, agentInstructions } to log
    */
-  async _callResponsesAPI(prompt, systemInstructions) {
+  async _callResponsesAPI(prompt, systemInstructions, promptPayload = null) {
     // Combine system instructions with prompt
     const fullInput = systemInstructions
       ? `${systemInstructions}\n\n${prompt}`
@@ -74,20 +185,33 @@ export class OpenAIProvider extends LLMProvider {
       params.reasoning = { effort: this.reasoningEffort };
     }
+    const _t0 = Date.now();
     const response = await this._withRetry(
       () => this._client.responses.create(params),
       'Responses API call'
     );
+    const _elapsed = Date.now() - _t0;
+    const text = response.output_text;
     // Track tokens if usage data is available
     if (response.usage) {
-      this._trackTokens(response.usage);
+      const finalPayload = promptPayload ? {
+        ...promptPayload,
+        response: text,
+        elapsed: _elapsed,
+      } : null;
+      this._trackTokens(response.usage, finalPayload);
     }
-    return response.output_text;
+    return text;
   }
   async _callProvider(prompt, maxTokens, systemInstructions) {
+    // OAuth mode: all calls go through the ChatGPT Codex endpoint
+    if (this._client?.mode === 'oauth') {
+      return await this._callChatGPTCodex(prompt, systemInstructions);
+    }
     if (this._usesResponsesAPI()) {
       return await this._callResponsesAPI(prompt, systemInstructions);
     } else {
@@ -95,17 +219,56 @@ export class OpenAIProvider extends LLMProvider {
     }
   }
-  async generateJSON(prompt, agentInstructions = null) {
+  /** True when oauth mode is active AND fallback to api-key is enabled AND key is present */
+  _hasFallback() {
+    return process.env.OPENAI_AUTH_MODE === 'oauth'
+      && process.env.OPENAI_OAUTH_FALLBACK === 'true'
+      && !!process.env.OPENAI_API_KEY;
+  }
+  /** Create a plain OpenAI SDK client using OPENAI_API_KEY (for fallback) */
+  _createApiKeyClient() {
+    return new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
+  }
+  async generateJSON(prompt, agentInstructions = null, cachedContext = null) {
     if (!this._client) {
       this._client = this._createClient();
     }
+    // OAuth path — route through ChatGPT Codex endpoint
+    if (this._client?.mode === 'oauth') {
+      try {
+        const jsonInstructions = (agentInstructions ? agentInstructions + '\n\n' : '')
+          + 'You are a helpful assistant that always returns valid JSON. Your response must be a valid JSON object or array, nothing else.';
+        const text = await this._callChatGPTCodex(prompt, jsonInstructions);
+        let jsonStr = text.trim();
+        if (jsonStr.startsWith('```')) {
+          jsonStr = jsonStr.replace(/^```(?:json)?\s*\n?/, '').replace(/\n?\s*```\s*$/, '').trim();
+        }
+        try {
+          return JSON.parse(jsonStr);
+        } catch (firstError) {
+          if (jsonStr.startsWith('{') || jsonStr.startsWith('[')) {
+            try { return JSON.parse(jsonrepair(jsonStr)); } catch { /* fall through */ }
+          }
+          throw new Error(`Failed to parse JSON response: ${firstError.message}\n\nResponse was:\n${text}`);
+        }
+      } catch (oauthErr) {
+        if (!this._hasFallback()) throw oauthErr;
+        console.warn(`[openai] OAuth call failed, falling back to API key: ${oauthErr.message}`);
+        this._client = this._createApiKeyClient();
+        // fall through to standard paths below
+      }
+    }
     const fullPrompt = agentInstructions ? `${agentInstructions}\n\n${prompt}` : prompt;
     if (this._usesResponsesAPI()) {
       // Responses API: Use system instructions to enforce JSON
       const systemInstructions = 'You are a helpful assistant that always returns valid JSON. Your response must be a valid JSON object or array, nothing else.';
-      const response = await this._callResponsesAPI(fullPrompt, systemInstructions);
+      const _rApiPayload = this._promptLogger ? { prompt: fullPrompt, agentInstructions: agentInstructions ?? null } : null;
+      const response = await this._callResponsesAPI(fullPrompt, systemInstructions, _rApiPayload);
       // Parse and return JSON
       let jsonStr = response.trim();
@@ -126,31 +289,35 @@ export class OpenAIProvider extends LLMProvider {
         throw new Error(`Failed to parse JSON response: ${firstError.message}\n\nResponse was:\n${response}`);
       }
     } else {
-      // Chat Completions API: Use native JSON mode
+      // Chat Completions API: Use native JSON mode.
+      // Build system message as: JSON_SYSTEM + agentInstructions + cachedContext.
+      // Putting agentInstructions in the system message (not the user message) makes the full
+      // prefix eligible for OpenAI automatic prefix caching — identical system prefixes across
+      // repeated calls of the same stage type get a 90% discount after the first 1024 tokens.
+      const JSON_SYSTEM = 'You are a helpful assistant that always returns valid JSON. Your response must be a valid JSON object or array, nothing else.';
+      const systemParts = [JSON_SYSTEM];
+      if (agentInstructions) systemParts.push(agentInstructions);
+      if (cachedContext) systemParts.push(`---\n\n${cachedContext}`);
+      const systemContent = systemParts.join('\n\n');
       const messages = [
-        {
-          role: 'system',
-          content: 'You are a helpful assistant that always returns valid JSON. Your response must be a valid JSON object or array, nothing else.'
-        },
-        {
-          role: 'user',
-          content: fullPrompt
-        }
+        { role: 'system', content: systemContent },
+        { role: 'user',   content: prompt },
       ];
       const params = {
         model: this.model,
-        messages
+        messages,
       };
       // Use model-specific maximum tokens
       const maxTokens = getMaxTokensForModel(this.model);
-      // GPT-5+ models use max_completion_tokens, older models use max_tokens
-      if (this.model.startsWith('gpt-5') || this.model.startsWith('o1') || this.model.startsWith('o3')) {
-        params.max_completion_tokens = maxTokens;
-      } else {
+      // max_completion_tokens is the modern unified parameter; max_tokens is only for legacy gpt-3.5-turbo
+      if (this.model.startsWith('gpt-3.5')) {
         params.max_tokens = maxTokens;
+      } else {
+        params.max_completion_tokens = maxTokens;
       }
       // Enable JSON mode if model supports it (GPT-4+)
@@ -158,13 +325,38 @@ export class OpenAIProvider extends LLMProvider {
         params.response_format = { type: 'json_object' };
       }
+      // Extended 24-hour cache retention — free on gpt-5.x and gpt-4.1+ families.
+      // Keeps the system-message prefix in cache across long ceremony runs (>1 hr).
+      if (this.model.startsWith('gpt-5') || this.model.startsWith('gpt-4.1')) {
+        params.prompt_cache_retention = '24h';
+      }
+      const _t0Json = Date.now();
       const response = await this._withRetry(
         () => this._client.chat.completions.create(params),
         'JSON generation (Chat Completions)'
       );
-      this._trackTokens(response.usage);
-      const content = response.choices[0].message.content;
+      const choice = response.choices[0];
+      const content = choice.message.content;
+      // Detect output truncation — json_object mode returns null/empty when cut off at token limit
+      if (choice.finish_reason === 'length' || !content) {
+        const maxTok = getMaxTokensForModel(this.model);
+        const usedOut = response.usage?.completion_tokens ?? '?';
+        throw new Error(
+          `Response truncated at token limit (finish_reason=length). ` +
+          `Model: ${this.model}, limit: ${maxTok}, used: ${usedOut}. ` +
+          `Increase max tokens for this model in llm-token-limits.js or reduce prompt size.`
+        );
+      }
+      this._trackTokens(response.usage, {
+        prompt: fullPrompt,
+        agentInstructions: agentInstructions ?? null,
+        response: content,
+        elapsed: Date.now() - _t0Json,
+      });
       // Strip markdown code fences if present (defense-in-depth)
       let jsonStr = content.trim();
@@ -187,47 +379,76 @@ export class OpenAIProvider extends LLMProvider {
     }
   }
-  async generateText(prompt, agentInstructions = null) {
+  async generateText(prompt, agentInstructions = null, cachedContext = null) {
     if (!this._client) {
       this._client = this._createClient();
     }
+    // OAuth path — route through ChatGPT Codex endpoint
+    if (this._client?.mode === 'oauth') {
+      try {
+        return await this._callChatGPTCodex(prompt, agentInstructions);
+      } catch (oauthErr) {
+        if (!this._hasFallback()) throw oauthErr;
+        console.warn(`[openai] OAuth call failed, falling back to API key: ${oauthErr.message}`);
+        this._client = this._createApiKeyClient();
+        // fall through to standard paths below
+      }
+    }
     const fullPrompt = agentInstructions ? `${agentInstructions}\n\n${prompt}` : prompt;
     if (this._usesResponsesAPI()) {
       // Responses API
-      return await this._callResponsesAPI(fullPrompt, null);
+      const _rApiPayload = this._promptLogger ? { prompt: fullPrompt, agentInstructions: agentInstructions ?? null } : null;
+      return await this._callResponsesAPI(fullPrompt, null, _rApiPayload);
     } else {
-      // Chat Completions API
-      const messages = [
-        {
-          role: 'user',
-          content: fullPrompt
-        }
-      ];
+      // Chat Completions API.
+      // Build system message as agentInstructions + cachedContext so both are eligible for
+      // OpenAI automatic prefix caching (90% discount when system prefix is stable across calls).
+      const systemParts = [];
+      if (agentInstructions) systemParts.push(agentInstructions);
+      if (cachedContext) systemParts.push(cachedContext);
+      const messages = [];
+      if (systemParts.length > 0) {
+        messages.push({ role: 'system', content: systemParts.join('\n\n') });
+      }
+      messages.push({ role: 'user', content: prompt });
       const params = {
         model: this.model,
-        messages
+        messages,
       };
       // Use model-specific maximum tokens
       const maxTokens = getMaxTokensForModel(this.model);
-      // GPT-5+ models use max_completion_tokens, older models use max_tokens
-      if (this.model.startsWith('gpt-5') || this.model.startsWith('o1') || this.model.startsWith('o3')) {
-        params.max_completion_tokens = maxTokens;
-      } else {
+      // max_completion_tokens is the modern unified parameter; max_tokens is only for legacy gpt-3.5-turbo
+      if (this.model.startsWith('gpt-3.5')) {
         params.max_tokens = maxTokens;
+      } else {
+        params.max_completion_tokens = maxTokens;
+      }
+      // Extended 24-hour cache retention — free on gpt-5.x and gpt-4.1+ families.
+      if (this.model.startsWith('gpt-5') || this.model.startsWith('gpt-4.1')) {
+        params.prompt_cache_retention = '24h';
       }
+      const _t0Text = Date.now();
       const response = await this._withRetry(
         () => this._client.chat.completions.create(params),
         'Text generation (Chat Completions)'
       );
-      this._trackTokens(response.usage);
-      return response.choices[0].message.content;
+      const textContent = response.choices[0].message.content;
+      this._trackTokens(response.usage, {
+        prompt: fullPrompt,
+        agentInstructions: agentInstructions ?? null,
+        response: textContent,
+        elapsed: Date.now() - _t0Text,
+      });
+      return textContent;
     }
   }
 }

package/cli/llm-provider.js CHANGED Viewed

@@ -6,6 +6,8 @@ export class LLMProvider {
     this.tokenUsage = {
       inputTokens: 0,
       outputTokens: 0,
+      cachedInputTokens: 0,  // cache reads (already counted inside inputTokens)
+      cacheWriteTokens: 0,   // cache writes (billed at 1.25× on Claude, free on OpenAI/Gemini)
       totalCalls: 0
     };
@@ -71,7 +73,15 @@ export class LLMProvider {
       'rate limit',
       'quota exceeded',
       'resource exhausted',
-      'resource has been exhausted'
+      'resource has been exhausted',
+      // Transient network/connection errors (e.g. WSL2 IPv6, momentary TCP failures)
+      'connection error',
+      'econnreset',
+      'econnrefused',
+      'enotfound',
+      'etimedout',
+      'network error',
+      'fetch failed',
     ];
     const hasHighDemandMessage = highDemandPatterns.some(pattern =>
@@ -146,7 +156,8 @@ export class LLMProvider {
         // Log retry attempt with helpful info
         const retrySource = retryAfterDelay ? 'server directive' : 'exponential backoff';
         console.log(`\n⏳ ${operationName} failed (attempt ${attempt + 1}/${this.retryConfig.maxRetries + 1})`);
-        console.log(`   Error: ${error.message}`);
+        const causeMsg = error.cause?.message || error.cause?.code || '';
+        console.log(`   Error: ${error.message}${causeMsg ? ` (cause: ${causeMsg})` : ''}`);
         console.log(`   Retrying in ${currentDelay / 1000}s (${retrySource})...`);
         // Wait before retrying
@@ -174,23 +185,64 @@ export class LLMProvider {
     this._callCallbacks.push(fn);
   }
+  /**
+   * Attach a PromptLogger instance and set the stage label for subsequent calls.
+   * @param {import('./prompt-logger.js').PromptLogger} promptLogger
+   * @param {string} [stage]
+   */
+  setPromptLogger(promptLogger, stage) {
+    this._promptLogger = promptLogger;
+    this._promptLoggerStage = stage || 'unknown';
+  }
   /**
    * Track token usage from API response and fire per-call callbacks.
+   * Optionally writes a prompt/response payload to the attached PromptLogger.
    * @param {Object} usage - Usage object from API response
+   * @param {Object|null} [promptPayload] - Optional { prompt, agentInstructions, response, elapsed }
    */
-  _trackTokens(usage) {
+  _trackTokens(usage, promptPayload = null) {
     if (usage) {
       const deltaIn  = usage.input_tokens  || usage.inputTokens  || usage.promptTokenCount    || usage.prompt_tokens    || 0;
       const deltaOut = usage.output_tokens || usage.outputTokens || usage.candidatesTokenCount || usage.completion_tokens || 0;
-      this.tokenUsage.inputTokens  += deltaIn;
-      this.tokenUsage.outputTokens += deltaOut;
+      // Cache stats — each provider uses a different field name:
+      //   Claude:  cache_read_input_tokens / cache_creation_input_tokens
+      //   OpenAI:  prompt_tokens_details.cached_tokens
+      //   Gemini:  cachedContentTokenCount
+      const deltaCacheRead  = usage.cache_read_input_tokens
+        || usage.cachedContentTokenCount
+        || usage.prompt_tokens_details?.cached_tokens
+        || 0;
+      const deltaCacheWrite = usage.cache_creation_input_tokens || 0;
+      this.tokenUsage.inputTokens       += deltaIn;
+      this.tokenUsage.outputTokens      += deltaOut;
+      this.tokenUsage.cachedInputTokens += deltaCacheRead;
+      this.tokenUsage.cacheWriteTokens  += deltaCacheWrite;
       this.tokenUsage.totalCalls++;
+      if (deltaCacheRead > 0 || deltaCacheWrite > 0) {
+        console.log(`   [cache] write=${deltaCacheWrite} read=${deltaCacheRead} tokens`);
+      }
       if (this._callCallbacks.length > 0 && (deltaIn > 0 || deltaOut > 0)) {
-        const delta = { input: deltaIn, output: deltaOut, provider: this.providerName, model: this.model };
+        const delta = { input: deltaIn, output: deltaOut, cached: deltaCacheRead, cacheWrite: deltaCacheWrite, provider: this.providerName, model: this.model };
         for (const fn of this._callCallbacks) {
           try { fn(delta); } catch (_) {}
         }
       }
+      if (this._promptLogger && promptPayload) {
+        this._promptLogger.write({
+          ceremony: this._promptLogger.ceremony,
+          stage: this._promptLoggerStage || 'unknown',
+          call: (this._promptLogger.callCount || 0) + 1,
+          timestamp: new Date().toISOString(),
+          elapsed_ms: promptPayload.elapsed ?? null,
+          provider: this.providerName,
+          model: this.model,
+          tokens: { input: deltaIn, output: deltaOut },
+          prompt: promptPayload.prompt ?? null,
+          agentInstructions: promptPayload.agentInstructions ?? null,
+          response: promptPayload.response ?? null,
+        });
+      }
     }
   }
@@ -205,7 +257,9 @@ export class LLMProvider {
     const pricing = {
       'claude': { input: 3.00, output: 15.00 },  // Claude Sonnet 4.5
       'gemini': { input: 0.15, output: 0.60 },   // Gemini 2.0 Flash
-      'openai': { input: 1.75, output: 14.00 }   // GPT-5.2
+      'openai': { input: 1.75, output: 14.00 },  // GPT-5.2
+      'local':  { input: 0,    output: 0 },       // Local models — free
+      'xiaomi': { input: 0.09, output: 0.29 }     // MiMo V2 Flash (default)
     };
     const rates = pricing[this.providerName] || { input: 0, output: 0 };
@@ -216,6 +270,8 @@ export class LLMProvider {
     return {
       inputTokens: this.tokenUsage.inputTokens,
       outputTokens: this.tokenUsage.outputTokens,
+      cachedInputTokens: this.tokenUsage.cachedInputTokens,
+      cacheWriteTokens: this.tokenUsage.cacheWriteTokens,
       totalTokens: total,
       totalCalls: this.tokenUsage.totalCalls,
       estimatedCost,
@@ -224,6 +280,81 @@ export class LLMProvider {
     };
   }
+  /**
+   * Map of cloud providers to their required env var.
+   */
+  static PROVIDER_ENV_MAP = {
+    'claude': 'ANTHROPIC_API_KEY',
+    'gemini': 'GEMINI_API_KEY',
+    'openai': 'OPENAI_API_KEY',
+    'xiaomi': 'XIAOMI_API_KEY',
+  };
+  /**
+   * Default model per provider (used as fallback when switching providers).
+   */
+  static PROVIDER_DEFAULT_MODELS = {
+    'claude': 'claude-sonnet-4-6',
+    'gemini': 'gemini-2.5-flash',
+    'openai': 'gpt-4.1',
+    'xiaomi': 'MiMo-V2-Flash',
+  };
+  /**
+   * Check whether a provider has valid credentials available.
+   * For cloud providers this means the env var is set.
+   * For local, probe the server (with a short timeout).
+   * @param {string} providerName
+   * @returns {Promise<boolean>}
+   */
+  static async hasProviderCredentials(providerName) {
+    if (providerName === 'local') {
+      try {
+        const { discoverLocalServers } = await import('./llm-local.js');
+        const servers = await Promise.race([
+          discoverLocalServers(),
+          new Promise((_, reject) => setTimeout(() => reject(new Error('timeout')), 3000))
+        ]);
+        return servers.length > 0;
+      } catch {
+        return false;
+      }
+    }
+    if (providerName === 'openai') {
+      const oauthMode = process.env.OPENAI_AUTH_MODE === 'oauth';
+      return !!(process.env.OPENAI_API_KEY || (oauthMode && process.env.OPENAI_OAUTH_TOKEN));
+    }
+    const envVar = LLMProvider.PROVIDER_ENV_MAP[providerName];
+    return envVar ? !!process.env[envVar] : false;
+  }
+  /**
+   * Resolve a provider+model pair, falling back to any available provider
+   * if the requested one has no credentials.
+   * @param {string} provider - Requested provider name
+   * @param {string} model - Requested model name
+   * @returns {Promise<{ provider: string, model: string, fellBack: boolean }>}
+   */
+  static async resolveAvailableProvider(provider, model) {
+    // Check requested provider first
+    if (await LLMProvider.hasProviderCredentials(provider)) {
+      return { provider, model, fellBack: false };
+    }
+    // Fallback priority: claude → gemini → openai → xiaomi → local
+    const fallbackOrder = ['claude', 'gemini', 'openai', 'xiaomi', 'local'];
+    for (const candidate of fallbackOrder) {
+      if (candidate === provider) continue; // already checked
+      if (await LLMProvider.hasProviderCredentials(candidate)) {
+        const fallbackModel = LLMProvider.PROVIDER_DEFAULT_MODELS[candidate] || candidate;
+        return { provider: candidate, model: fallbackModel, fellBack: true };
+      }
+    }
+    // Nothing available — return original and let create() throw the usual error
+    return { provider, model, fellBack: false };
+  }
   // Factory — async because of dynamic import (only loads the SDK you need)
   static async create(providerName, model) {
     // AVC_LLM_MOCK=1: return instant mock provider for E2E testing (no API calls)
@@ -245,8 +376,16 @@ export class LLMProvider {
         const { OpenAIProvider } = await import('./llm-openai.js');
         return new OpenAIProvider(model);
       }
+      case 'local': {
+        const { LocalProvider } = await import('./llm-local.js');
+        return new LocalProvider(model);
+      }
+      case 'xiaomi': {
+        const { XiaomiProvider } = await import('./llm-xiaomi.js');
+        return new XiaomiProvider(model);
+      }
       default:
-        throw new Error(`Unknown LLM provider: "${providerName}". Supported: claude, gemini, openai`);
+        throw new Error(`Unknown LLM provider: "${providerName}". Supported: claude, gemini, openai, local, xiaomi`);
     }
   }