@delegance/claude-autopilot 5.0.2 → 5.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -3,6 +3,16 @@ import { GuardrailError } from "../../core/errors.js";
|
|
|
3
3
|
import { classifyError } from "../review-engine/prompt-builder.js";
|
|
4
4
|
const SYSTEM_PROMPT = `You are a technical advisor reviewing a software design decision. Evaluate the provided context and question critically. Be direct and specific. Surface tradeoffs, risks, and your recommendation.`;
|
|
5
5
|
const MAX_OUTPUT_TOKENS = 2048;
|
|
6
|
+
// Models that ONLY work via the Responses API (not chat.completions).
|
|
7
|
+
// Codex variants and the o-series reasoning models all 404 on chat.completions.
|
|
8
|
+
// Without this branch, putting `gpt-5.3-codex` (the typical default) in
|
|
9
|
+
// council.models throws model_not_found, AND the synthesizer (also typically
|
|
10
|
+
// gpt-5.3-codex) fails the same way — so the whole council returns `partial`
|
|
11
|
+
// with no synthesis. That regression made the marketed multi-model differentiator
|
|
12
|
+
// unusable for any user who only had OPENAI_API_KEY.
|
|
13
|
+
function isResponsesOnlyModel(model) {
|
|
14
|
+
return /codex|^o[1-9]|^gpt-5\.3-/i.test(model);
|
|
15
|
+
}
|
|
6
16
|
export function makeOpenAICouncilAdapter(model, label) {
|
|
7
17
|
return {
|
|
8
18
|
label,
|
|
@@ -12,27 +22,36 @@ export function makeOpenAICouncilAdapter(model, label) {
|
|
|
12
22
|
throw new GuardrailError('OPENAI_API_KEY not set', { code: 'auth', provider: 'openai' });
|
|
13
23
|
}
|
|
14
24
|
const client = new OpenAI({ apiKey });
|
|
15
|
-
|
|
25
|
+
const userInput = `## Context\n\n${context}\n\n## Question\n\n${prompt}`;
|
|
16
26
|
try {
|
|
17
|
-
|
|
27
|
+
if (isResponsesOnlyModel(model)) {
|
|
28
|
+
const response = await client.responses.create({
|
|
29
|
+
model,
|
|
30
|
+
instructions: SYSTEM_PROMPT,
|
|
31
|
+
input: userInput,
|
|
32
|
+
max_output_tokens: MAX_OUTPUT_TOKENS,
|
|
33
|
+
});
|
|
34
|
+
return response.output_text ?? '';
|
|
35
|
+
}
|
|
36
|
+
const response = await client.chat.completions.create({
|
|
18
37
|
model,
|
|
19
38
|
max_tokens: MAX_OUTPUT_TOKENS,
|
|
20
39
|
messages: [
|
|
21
40
|
{ role: 'system', content: SYSTEM_PROMPT },
|
|
22
|
-
{ role: 'user', content:
|
|
41
|
+
{ role: 'user', content: userInput },
|
|
23
42
|
],
|
|
24
43
|
});
|
|
44
|
+
return response.choices[0]?.message?.content ?? '';
|
|
25
45
|
}
|
|
26
46
|
catch (err) {
|
|
27
47
|
const message = err instanceof Error ? err.message : String(err);
|
|
28
48
|
const code = classifyError(message);
|
|
29
|
-
throw new GuardrailError(`OpenAI council call failed: ${message}`, {
|
|
49
|
+
throw new GuardrailError(`OpenAI council call failed (model=${model}): ${message}`, {
|
|
30
50
|
code,
|
|
31
51
|
provider: 'openai',
|
|
32
52
|
retryable: code === 'rate_limit',
|
|
33
53
|
});
|
|
34
54
|
}
|
|
35
|
-
return response.choices[0]?.message?.content ?? '';
|
|
36
55
|
},
|
|
37
56
|
};
|
|
38
57
|
}
|
|
@@ -4,6 +4,12 @@ import { GuardrailError } from "../../core/errors.js";
|
|
|
4
4
|
import { buildSystemPrompt, classifyError } from "./prompt-builder.js";
|
|
5
5
|
const DEFAULT_MODEL = process.env.CODEX_MODEL ?? 'gpt-5.3-codex';
|
|
6
6
|
const MAX_OUTPUT_TOKENS = 4096;
|
|
7
|
+
// Per-million-token rates for gpt-5.3-codex (override via env for other models).
|
|
8
|
+
// Computed client-side because the OpenAI Responses API returns token counts
|
|
9
|
+
// but no $-cost field. Without this, every codex run logged costUSD=0 even
|
|
10
|
+
// though tokens were tracked correctly.
|
|
11
|
+
const COST_PER_M_INPUT = Number(process.env.CODEX_COST_INPUT_PER_M ?? 1.25);
|
|
12
|
+
const COST_PER_M_OUTPUT = Number(process.env.CODEX_COST_OUTPUT_PER_M ?? 10.0);
|
|
7
13
|
const SYSTEM_PROMPT_TEMPLATE = `You are a senior software architect providing feedback on designs, proposals, and ideas.
|
|
8
14
|
|
|
9
15
|
The codebase context:
|
|
@@ -62,10 +68,16 @@ export const codexAdapter = {
|
|
|
62
68
|
});
|
|
63
69
|
}
|
|
64
70
|
const rawOutput = response.output_text ?? '';
|
|
71
|
+
const costUSD = response.usage
|
|
72
|
+
? (response.usage.input_tokens / 1_000_000) * COST_PER_M_INPUT +
|
|
73
|
+
(response.usage.output_tokens / 1_000_000) * COST_PER_M_OUTPUT
|
|
74
|
+
: undefined;
|
|
65
75
|
return {
|
|
66
76
|
findings: parseReviewOutput(rawOutput, 'codex'),
|
|
67
77
|
rawOutput,
|
|
68
|
-
usage: response.usage
|
|
78
|
+
usage: response.usage
|
|
79
|
+
? { input: response.usage.input_tokens, output: response.usage.output_tokens, costUSD }
|
|
80
|
+
: undefined,
|
|
69
81
|
};
|
|
70
82
|
},
|
|
71
83
|
};
|
package/dist/src/cli/scan.js
CHANGED
|
@@ -6,6 +6,7 @@ import { runReviewPhase } from "../core/pipeline/review-phase.js";
|
|
|
6
6
|
import { detectStack } from "../core/detect/stack.js";
|
|
7
7
|
import { loadIgnoreRules, parseConfigIgnore, applyIgnoreRules } from "../core/ignore/index.js";
|
|
8
8
|
import { saveCachedFindings } from "../core/persist/findings-cache.js";
|
|
9
|
+
import { appendCostLog } from "../core/persist/cost-log.js";
|
|
9
10
|
import { detectLLMKey, LLM_KEY_HINTS } from "../core/detect/llm-key.js";
|
|
10
11
|
const C = {
|
|
11
12
|
reset: '\x1b[0m', bold: '\x1b[1m', dim: '\x1b[2m',
|
|
@@ -184,6 +185,17 @@ export async function runScan(options = {}) {
|
|
|
184
185
|
}
|
|
185
186
|
// Persist findings so `guardrail fix` can read them
|
|
186
187
|
saveCachedFindings(cwd, findings);
|
|
188
|
+
// Persist run to cost log so `claude-autopilot costs` reflects scans, not
|
|
189
|
+
// just full pipeline runs. Previously scan never wrote to the log, so the
|
|
190
|
+
// costs report stayed frozen at whatever the last `run` invocation produced.
|
|
191
|
+
appendCostLog(cwd, {
|
|
192
|
+
timestamp: new Date().toISOString(),
|
|
193
|
+
files: files.length,
|
|
194
|
+
inputTokens: result.usage?.input ?? 0,
|
|
195
|
+
outputTokens: result.usage?.output ?? 0,
|
|
196
|
+
costUSD: result.costUSD ?? 0,
|
|
197
|
+
durationMs: result.durationMs,
|
|
198
|
+
});
|
|
187
199
|
if (result.costUSD !== undefined) {
|
|
188
200
|
console.log(fmt('dim', ` $${result.costUSD.toFixed(4)} · ${result.durationMs}ms`));
|
|
189
201
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@delegance/claude-autopilot",
|
|
3
|
-
"version": "5.0.
|
|
3
|
+
"version": "5.0.4",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Autonomous development pipeline for Claude Code: brainstorm → spec → plan → implement → migrate → validate → PR → review → merge. Multi-model, local-first, every phase a skill you can intervene in.",
|
|
6
6
|
"keywords": [
|
|
@@ -33,3 +33,15 @@ chunking:
|
|
|
33
33
|
pipeline:
|
|
34
34
|
runReviewOnStaticFail: true
|
|
35
35
|
runReviewOnTestFail: false
|
|
36
|
+
# Optional: multi-model council. Uncomment + set ANTHROPIC_API_KEY and/or
|
|
37
|
+
# OPENAI_API_KEY. Models are dispatched in parallel; the synthesizer reads
|
|
38
|
+
# their responses and writes the consensus. Both APIs supported (chat-completions
|
|
39
|
+
# and Responses API for codex/o-series models — auto-detected by name).
|
|
40
|
+
# Usage: claude-autopilot council --prompt "..." --context-file <path>
|
|
41
|
+
# council:
|
|
42
|
+
# models:
|
|
43
|
+
# - { adapter: claude, model: claude-opus-4-7, label: opus }
|
|
44
|
+
# - { adapter: openai, model: gpt-5.3-codex, label: codex }
|
|
45
|
+
# synthesizer: { adapter: claude, model: claude-sonnet-4-6, label: synth }
|
|
46
|
+
# timeout_ms: 30000
|
|
47
|
+
# min_successful_responses: 1
|