@delegance/claude-autopilot 5.0.2 → 5.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,16 @@ import { GuardrailError } from "../../core/errors.js";
3
3
  import { classifyError } from "../review-engine/prompt-builder.js";
4
4
  const SYSTEM_PROMPT = `You are a technical advisor reviewing a software design decision. Evaluate the provided context and question critically. Be direct and specific. Surface tradeoffs, risks, and your recommendation.`;
5
5
  const MAX_OUTPUT_TOKENS = 2048;
6
+ // Models that ONLY work via the Responses API (not chat.completions).
7
+ // Codex variants and the o-series reasoning models all 404 on chat.completions.
8
+ // Without this branch, putting `gpt-5.3-codex` (the typical default) in
9
+ // council.models throws model_not_found, AND the synthesizer (also typically
10
+ // gpt-5.3-codex) fails the same way — so the whole council returns `partial`
11
+ // with no synthesis. That regression made the marketed multi-model differentiator
12
+ // unusable for any user who only had OPENAI_API_KEY.
13
+ function isResponsesOnlyModel(model) {
14
+ return /codex|^o[1-9]|^gpt-5\.3-/i.test(model);
15
+ }
6
16
  export function makeOpenAICouncilAdapter(model, label) {
7
17
  return {
8
18
  label,
@@ -12,27 +22,36 @@ export function makeOpenAICouncilAdapter(model, label) {
12
22
  throw new GuardrailError('OPENAI_API_KEY not set', { code: 'auth', provider: 'openai' });
13
23
  }
14
24
  const client = new OpenAI({ apiKey });
15
- let response;
25
+ const userInput = `## Context\n\n${context}\n\n## Question\n\n${prompt}`;
16
26
  try {
17
- response = await client.chat.completions.create({
27
+ if (isResponsesOnlyModel(model)) {
28
+ const response = await client.responses.create({
29
+ model,
30
+ instructions: SYSTEM_PROMPT,
31
+ input: userInput,
32
+ max_output_tokens: MAX_OUTPUT_TOKENS,
33
+ });
34
+ return response.output_text ?? '';
35
+ }
36
+ const response = await client.chat.completions.create({
18
37
  model,
19
38
  max_tokens: MAX_OUTPUT_TOKENS,
20
39
  messages: [
21
40
  { role: 'system', content: SYSTEM_PROMPT },
22
- { role: 'user', content: `## Context\n\n${context}\n\n## Question\n\n${prompt}` },
41
+ { role: 'user', content: userInput },
23
42
  ],
24
43
  });
44
+ return response.choices[0]?.message?.content ?? '';
25
45
  }
26
46
  catch (err) {
27
47
  const message = err instanceof Error ? err.message : String(err);
28
48
  const code = classifyError(message);
29
- throw new GuardrailError(`OpenAI council call failed: ${message}`, {
49
+ throw new GuardrailError(`OpenAI council call failed (model=${model}): ${message}`, {
30
50
  code,
31
51
  provider: 'openai',
32
52
  retryable: code === 'rate_limit',
33
53
  });
34
54
  }
35
- return response.choices[0]?.message?.content ?? '';
36
55
  },
37
56
  };
38
57
  }
@@ -4,6 +4,12 @@ import { GuardrailError } from "../../core/errors.js";
4
4
  import { buildSystemPrompt, classifyError } from "./prompt-builder.js";
5
5
  const DEFAULT_MODEL = process.env.CODEX_MODEL ?? 'gpt-5.3-codex';
6
6
  const MAX_OUTPUT_TOKENS = 4096;
7
+ // Per-million-token rates for gpt-5.3-codex (override via env for other models).
8
+ // Computed client-side because the OpenAI Responses API returns token counts
9
+ // but no $-cost field. Without this, every codex run logged costUSD=0 even
10
+ // though tokens were tracked correctly.
11
+ const COST_PER_M_INPUT = Number(process.env.CODEX_COST_INPUT_PER_M ?? 1.25);
12
+ const COST_PER_M_OUTPUT = Number(process.env.CODEX_COST_OUTPUT_PER_M ?? 10.0);
7
13
  const SYSTEM_PROMPT_TEMPLATE = `You are a senior software architect providing feedback on designs, proposals, and ideas.
8
14
 
9
15
  The codebase context:
@@ -62,10 +68,16 @@ export const codexAdapter = {
62
68
  });
63
69
  }
64
70
  const rawOutput = response.output_text ?? '';
71
+ const costUSD = response.usage
72
+ ? (response.usage.input_tokens / 1_000_000) * COST_PER_M_INPUT +
73
+ (response.usage.output_tokens / 1_000_000) * COST_PER_M_OUTPUT
74
+ : undefined;
65
75
  return {
66
76
  findings: parseReviewOutput(rawOutput, 'codex'),
67
77
  rawOutput,
68
- usage: response.usage ? { input: response.usage.input_tokens, output: response.usage.output_tokens } : undefined,
78
+ usage: response.usage
79
+ ? { input: response.usage.input_tokens, output: response.usage.output_tokens, costUSD }
80
+ : undefined,
69
81
  };
70
82
  },
71
83
  };
@@ -6,6 +6,7 @@ import { runReviewPhase } from "../core/pipeline/review-phase.js";
6
6
  import { detectStack } from "../core/detect/stack.js";
7
7
  import { loadIgnoreRules, parseConfigIgnore, applyIgnoreRules } from "../core/ignore/index.js";
8
8
  import { saveCachedFindings } from "../core/persist/findings-cache.js";
9
+ import { appendCostLog } from "../core/persist/cost-log.js";
9
10
  import { detectLLMKey, LLM_KEY_HINTS } from "../core/detect/llm-key.js";
10
11
  const C = {
11
12
  reset: '\x1b[0m', bold: '\x1b[1m', dim: '\x1b[2m',
@@ -184,6 +185,17 @@ export async function runScan(options = {}) {
184
185
  }
185
186
  // Persist findings so `guardrail fix` can read them
186
187
  saveCachedFindings(cwd, findings);
188
+ // Persist run to cost log so `claude-autopilot costs` reflects scans, not
189
+ // just full pipeline runs. Previously scan never wrote to the log, so the
190
+ // costs report stayed frozen at whatever the last `run` invocation produced.
191
+ appendCostLog(cwd, {
192
+ timestamp: new Date().toISOString(),
193
+ files: files.length,
194
+ inputTokens: result.usage?.input ?? 0,
195
+ outputTokens: result.usage?.output ?? 0,
196
+ costUSD: result.costUSD ?? 0,
197
+ durationMs: result.durationMs,
198
+ });
187
199
  if (result.costUSD !== undefined) {
188
200
  console.log(fmt('dim', ` $${result.costUSD.toFixed(4)} · ${result.durationMs}ms`));
189
201
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@delegance/claude-autopilot",
3
- "version": "5.0.2",
3
+ "version": "5.0.4",
4
4
  "type": "module",
5
5
  "description": "Autonomous development pipeline for Claude Code: brainstorm → spec → plan → implement → migrate → validate → PR → review → merge. Multi-model, local-first, every phase a skill you can intervene in.",
6
6
  "keywords": [
@@ -33,3 +33,15 @@ chunking:
33
33
  pipeline:
34
34
  runReviewOnStaticFail: true
35
35
  runReviewOnTestFail: false
36
+ # Optional: multi-model council. Uncomment + set ANTHROPIC_API_KEY and/or
37
+ # OPENAI_API_KEY. Models are dispatched in parallel; the synthesizer reads
38
+ # their responses and writes the consensus. Both APIs supported (chat-completions
39
+ # and Responses API for codex/o-series models — auto-detected by name).
40
+ # Usage: claude-autopilot council --prompt "..." --context-file <path>
41
+ # council:
42
+ # models:
43
+ # - { adapter: claude, model: claude-opus-4-7, label: opus }
44
+ # - { adapter: openai, model: gpt-5.3-codex, label: codex }
45
+ # synthesizer: { adapter: claude, model: claude-sonnet-4-6, label: synth }
46
+ # timeout_ms: 30000
47
+ # min_successful_responses: 1