@delegance/claude-autopilot 5.0.5 → 5.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,9 @@ import { GuardrailError } from "../../core/errors.js";
3
3
  import { classifyError } from "../review-engine/prompt-builder.js";
4
4
  const SYSTEM_PROMPT = `You are a technical advisor reviewing a software design decision. Evaluate the provided context and question critically. Be direct and specific. Surface tradeoffs, risks, and your recommendation.`;
5
5
  const MAX_OUTPUT_TOKENS = 2048;
6
+ // Default Opus 4.7 rates — env override for other models.
7
+ const COST_PER_M_INPUT = Number(process.env.CLAUDE_COST_INPUT_PER_M ?? 15.0);
8
+ const COST_PER_M_OUTPUT = Number(process.env.CLAUDE_COST_OUTPUT_PER_M ?? 75.0);
6
9
  export function makeClaudeCouncilAdapter(model, label) {
7
10
  return {
8
11
  label,
@@ -30,10 +33,17 @@ export function makeClaudeCouncilAdapter(model, label) {
30
33
  retryable: code === 'rate_limit',
31
34
  });
32
35
  }
33
- return response.content
36
+ const text = response.content
34
37
  .filter(b => b.type === 'text')
35
38
  .map(b => b.text)
36
39
  .join('');
40
+ const usage = response.usage ? {
41
+ input: response.usage.input_tokens,
42
+ output: response.usage.output_tokens,
43
+ costUSD: (response.usage.input_tokens / 1_000_000) * COST_PER_M_INPUT +
44
+ (response.usage.output_tokens / 1_000_000) * COST_PER_M_OUTPUT,
45
+ } : undefined;
46
+ return { text, usage };
37
47
  },
38
48
  };
39
49
  }
@@ -13,6 +13,10 @@ const MAX_OUTPUT_TOKENS = 2048;
13
13
  function isResponsesOnlyModel(model) {
14
14
  return /codex|^o[1-9]|^gpt-5\.3-/i.test(model);
15
15
  }
16
+ // Per-million-token rates for gpt-5.3-codex (override via env for other models).
17
+ // Mirrors the review-engine codex adapter's pricing.
18
+ const COST_PER_M_INPUT = Number(process.env.CODEX_COST_INPUT_PER_M ?? 1.25);
19
+ const COST_PER_M_OUTPUT = Number(process.env.CODEX_COST_OUTPUT_PER_M ?? 10.0);
16
20
  export function makeOpenAICouncilAdapter(model, label) {
17
21
  return {
18
22
  label,
@@ -31,7 +35,13 @@ export function makeOpenAICouncilAdapter(model, label) {
31
35
  input: userInput,
32
36
  max_output_tokens: MAX_OUTPUT_TOKENS,
33
37
  });
34
- return response.output_text ?? '';
38
+ const usage = response.usage ? {
39
+ input: response.usage.input_tokens,
40
+ output: response.usage.output_tokens,
41
+ costUSD: (response.usage.input_tokens / 1_000_000) * COST_PER_M_INPUT +
42
+ (response.usage.output_tokens / 1_000_000) * COST_PER_M_OUTPUT,
43
+ } : undefined;
44
+ return { text: response.output_text ?? '', usage };
35
45
  }
36
46
  const response = await client.chat.completions.create({
37
47
  model,
@@ -41,7 +51,13 @@ export function makeOpenAICouncilAdapter(model, label) {
41
51
  { role: 'user', content: userInput },
42
52
  ],
43
53
  });
44
- return response.choices[0]?.message?.content ?? '';
54
+ const usage = response.usage ? {
55
+ input: response.usage.prompt_tokens,
56
+ output: response.usage.completion_tokens,
57
+ costUSD: (response.usage.prompt_tokens / 1_000_000) * COST_PER_M_INPUT +
58
+ (response.usage.completion_tokens / 1_000_000) * COST_PER_M_OUTPUT,
59
+ } : undefined;
60
+ return { text: response.choices[0]?.message?.content ?? '', usage };
45
61
  }
46
62
  catch (err) {
47
63
  const message = err instanceof Error ? err.message : String(err);
@@ -1,5 +1,14 @@
1
+ export interface CouncilUsage {
2
+ input: number;
3
+ output: number;
4
+ costUSD?: number;
5
+ }
6
+ export interface CouncilConsultResult {
7
+ text: string;
8
+ usage?: CouncilUsage;
9
+ }
1
10
  export interface CouncilAdapter {
2
11
  readonly label: string;
3
- consult(prompt: string, context: string): Promise<string>;
12
+ consult(prompt: string, context: string): Promise<CouncilConsultResult>;
4
13
  }
5
14
  //# sourceMappingURL=types.d.ts.map
@@ -22,9 +22,11 @@ const CODE_EXT = String.raw `(?:` +
22
22
  // 3
23
23
  String.raw `asm|cjs|clj|cpp|css|edn|elm|env|erl|exs|fsi|fsx|gql|hcl|hpp|htm|ini|jsx|lua|mdx|mjs|mli|nim|php|sol|sql|tsx|vue|xml|yml|zig|zsh|` +
24
24
  // 2
25
- String.raw `cc|cs|ex|fs|go|hs|jl|js|kt|md|mk|ml|mm|pl|pm|py|rb|rs|sc|sh|tf|ts|` +
26
- // 1
27
- String.raw `c|d|h|m|r|s` +
25
+ String.raw `cc|cs|ex|fs|go|hs|jl|js|kt|md|mk|ml|mm|pl|pm|py|rb|rs|sc|sh|tf|ts` +
26
+ // (single-letter code extensions like c/d/h/m/r/s are intentionally NOT in
27
+ // the bare-reference alternation: prose like "fn.r" or "lib.h" matches as
28
+ // a "file" too easily and breaks the `fix` command. They still match when
29
+ // explicitly backtick-wrapped — the LLM has to signal intent.)
28
30
  String.raw `)`;
29
31
  // Matches "path/to/file.ts:42" (bare with known ext), "`path/to/file.ts`" (any
30
32
  // ext when explicitly backtick-wrapped). Backtick-wrapped accepts any extension
@@ -7,6 +7,7 @@ import { runCouncil } from "../core/council/runner.js";
7
7
  import { makeClaudeCouncilAdapter } from "../adapters/council/claude.js";
8
8
  import { makeOpenAICouncilAdapter } from "../adapters/council/openai.js";
9
9
  import { GuardrailError } from "../core/errors.js";
10
+ import { appendCostLog } from "../core/persist/cost-log.js";
10
11
  function makeAdapter(entry) {
11
12
  switch (entry.adapter) {
12
13
  case 'claude': return makeClaudeCouncilAdapter(entry.model, entry.label);
@@ -58,14 +59,26 @@ export async function runCouncilCmd(opts) {
58
59
  }
59
60
  const adapters = councilConfig.models.map(makeAdapter);
60
61
  const synthesizer = opts.noSynthesize
61
- ? { label: 'none', consult: async () => '' }
62
+ ? { label: 'none', consult: async () => ({ text: '' }) }
62
63
  : makeAdapter(councilConfig.synthesizer);
63
- const result = await runCouncil(councilConfig, adapters, synthesizer, opts.prompt, contextDoc);
64
+ const start = Date.now();
65
+ const { result, usage } = await runCouncil(councilConfig, adapters, synthesizer, opts.prompt, contextDoc);
64
66
  // When no-synthesize, clear the empty synthesis object
65
67
  if (opts.noSynthesize && result.synthesis?.text === '') {
66
68
  delete result['synthesis'];
67
69
  }
68
70
  process.stdout.write(JSON.stringify(result, null, 2) + '\n');
71
+ // Persist to cost log so `claude-autopilot costs` reflects council runs
72
+ // (previously dropped — only scan + run pipeline were tracked, leading to
73
+ // misleadingly low lifetime totals after a council-heavy session).
74
+ appendCostLog(cwd, {
75
+ timestamp: new Date().toISOString(),
76
+ files: 0,
77
+ inputTokens: usage.inputTokens,
78
+ outputTokens: usage.outputTokens,
79
+ costUSD: usage.costUSD,
80
+ durationMs: Date.now() - start,
81
+ });
69
82
  if (result.status === 'failed')
70
83
  return 2;
71
84
  if (result.status === 'partial')
@@ -13,8 +13,14 @@ export interface PrDescOptions {
13
13
  kind: string;
14
14
  }): Promise<{
15
15
  rawOutput: string;
16
+ usage?: {
17
+ input: number;
18
+ output: number;
19
+ costUSD?: number;
20
+ };
16
21
  }>;
17
22
  };
23
+ _cwd?: string;
18
24
  }
19
25
  export interface PrDescResult {
20
26
  title: string;
@@ -1,5 +1,6 @@
1
1
  import * as fs from 'node:fs';
2
2
  import { execSync, spawnSync } from 'node:child_process';
3
+ import { appendCostLog } from "../core/persist/cost-log.js";
3
4
  export function truncateDiff(diff, charLimit = 6000) {
4
5
  if (diff.length <= charLimit)
5
6
  return diff;
@@ -52,11 +53,19 @@ function deriveTitleFromBranch(branch) {
52
53
  const cleaned = rest.replace(/[-_/]+/g, ' ').trim();
53
54
  return cleaned ? `${prefix}: ${cleaned}` : null;
54
55
  }
55
- }
56
- // Final fallback return null (not '') when the branch normalizes to an
57
- // empty string (e.g. `_`, `---`). The caller chains via `??`, which only
58
- // short-circuits on null/undefined; an empty string would skip the rest
59
- // of the fallback chain and produce an empty PR title.
56
+ // Unknown prefix that contains a slash — treat the segment after the
57
+ // first slash as the descriptive part and default the conventional
58
+ // type to `chore:`. Example: `autopilot-test/validate-weights`
59
+ // `chore: validate weights` rather than the prefix-less
60
+ // `autopilot test validate weights` (which fails commitlint and looks
61
+ // half-finished in PR titles).
62
+ const cleanedRest = rest.replace(/[-_/]+/g, ' ').trim();
63
+ if (cleanedRest)
64
+ return `chore: ${cleanedRest}`;
65
+ }
66
+ // No slash — return cleaned branch name, or null when it normalizes empty
67
+ // (e.g. `_`, `---`). The caller chains via `??`, which only short-circuits
68
+ // on null/undefined; an empty string would skip the rest of the fallback.
60
69
  const cleaned = branch.replace(/[-_/]+/g, ' ').trim();
61
70
  return cleaned || null;
62
71
  }
@@ -79,7 +88,8 @@ export async function runPrDesc(options) {
79
88
  const findings = options._cachedFindings ?? loadCachedFindings();
80
89
  const prompt = buildPrompt(branchName, truncateDiff(diff), summarizeFindings(findings));
81
90
  const engine = options._reviewEngine ?? await resolveEngine();
82
- const { rawOutput } = await engine.review({ content: prompt, kind: 'pr-diff' });
91
+ const start = Date.now();
92
+ const { rawOutput, usage } = await engine.review({ content: prompt, kind: 'pr-diff' });
83
93
  // Extract first non-empty bullet from the model's Summary section as a
84
94
  // last-resort title fallback when the model didn't emit `Title: ...`.
85
95
  const firstSummaryLine = rawOutput.split('\n')
@@ -93,6 +103,17 @@ export async function runPrDesc(options) {
93
103
  else {
94
104
  process.stdout.write(formatted + '\n');
95
105
  }
106
+ // Persist to cost log AFTER the output is emitted. The function itself
107
+ // swallows write errors (see core/persist/cost-log.ts) so a read-only FS
108
+ // or full disk doesn't kill commands that already succeeded.
109
+ appendCostLog(options._cwd ?? process.cwd(), {
110
+ timestamp: new Date().toISOString(),
111
+ files: 1,
112
+ inputTokens: usage?.input ?? 0,
113
+ outputTokens: usage?.output ?? 0,
114
+ costUSD: usage?.costUSD ?? 0,
115
+ durationMs: Date.now() - start,
116
+ });
96
117
  if (options.post) {
97
118
  return createPr(title, body, options.yes ?? false);
98
119
  }
@@ -133,6 +133,19 @@ export async function runScan(options = {}) {
133
133
  cwd,
134
134
  gitSummary: focusHint,
135
135
  });
136
+ // Single-file scan: every finding is about that file (or its imports).
137
+ // The LLM sometimes emits prose tokens like "n.r" or "fn.c" that the parser
138
+ // greedily matches as a file ref, producing junk paths that break `fix`.
139
+ // For single-file scan we KNOW the file — overwrite unconditionally rather
140
+ // than only filling `<unspecified>`. The 5.0.6 fallback was conditional on
141
+ // `<unspecified>` and missed the prose-noise case, leaving findings with
142
+ // bogus `n.r` paths that broke `fix --severity all` ("no fixable findings").
143
+ if (relFiles.length === 1) {
144
+ const onlyFile = relFiles[0];
145
+ for (const f of result.findings) {
146
+ f.file = onlyFile;
147
+ }
148
+ }
136
149
  // Apply ignore rules
137
150
  const ignoreRules = [...loadIgnoreRules(cwd), ...parseConfigIgnore(config.ignore)];
138
151
  const findings = applyIgnoreRules(result.findings, ignoreRules);
@@ -112,7 +112,15 @@ export async function runSetup(options = {}) {
112
112
  throw new Error(`Preset config not found for: ${detection.preset}. Looked in:\n ${presetSearchPaths(detection.preset, cwd).join('\n ')}`);
113
113
  }
114
114
  let presetContent = await fsAsync.readFile(presetConfigPath, 'utf8');
115
- presetContent = presetContent.trimEnd() + `\ntestCommand: "${detection.testCommand}"\n`;
115
+ // Only append testCommand if the preset doesn't already declare one — several
116
+ // presets (go, python, python-fastapi, rails-postgres) ship with their own
117
+ // testCommand line. Unconditionally appending produced duplicate YAML keys
118
+ // ("testCommand" twice in the same map), which yaml parsers reject. After
119
+ // 5.0.5 that broke `setup` on Python repos: every command after setup
120
+ // hard-failed until the user manually edited the file.
121
+ if (!/^testCommand\s*:/m.test(presetContent)) {
122
+ presetContent = presetContent.trimEnd() + `\ntestCommand: "${detection.testCommand}"\n`;
123
+ }
116
124
  // Apply profile overlay if specified
117
125
  if (options.profile) {
118
126
  const profile = PROFILES[options.profile];
@@ -1,4 +1,12 @@
1
1
  import type { CouncilConfig, CouncilResult } from './types.ts';
2
2
  import type { CouncilAdapter } from '../../adapters/council/types.ts';
3
- export declare function runCouncil(config: CouncilConfig, adapters: CouncilAdapter[], synthesizer: CouncilAdapter, prompt: string, contextDoc: string): Promise<CouncilResult>;
3
+ export interface CouncilRunOutput {
4
+ result: CouncilResult;
5
+ usage: {
6
+ inputTokens: number;
7
+ outputTokens: number;
8
+ costUSD: number;
9
+ };
10
+ }
11
+ export declare function runCouncil(config: CouncilConfig, adapters: CouncilAdapter[], synthesizer: CouncilAdapter, prompt: string, contextDoc: string): Promise<CouncilRunOutput>;
4
12
  //# sourceMappingURL=runner.d.ts.map
@@ -4,13 +4,19 @@ async function consultWithTimeout(adapter, prompt, context, timeoutMs) {
4
4
  const start = Date.now();
5
5
  let timer;
6
6
  try {
7
- const text = await Promise.race([
7
+ const consultResult = await Promise.race([
8
8
  adapter.consult(prompt, context),
9
9
  new Promise((_, reject) => {
10
10
  timer = setTimeout(() => reject(new Error('timeout')), timeoutMs);
11
11
  }),
12
12
  ]);
13
- return { label: adapter.label, status: 'ok', text, latencyMs: Date.now() - start };
13
+ return {
14
+ label: adapter.label,
15
+ status: 'ok',
16
+ text: consultResult.text,
17
+ latencyMs: Date.now() - start,
18
+ usage: consultResult.usage,
19
+ };
14
20
  }
15
21
  catch (err) {
16
22
  const message = err instanceof Error ? err.message : String(err);
@@ -30,9 +36,27 @@ export async function runCouncil(config, adapters, synthesizer, prompt, contextD
30
36
  const run_id = crypto.randomUUID();
31
37
  const context = windowContext(contextDoc, config.parallelInputMaxTokens);
32
38
  const responses = await Promise.all(adapters.map(a => consultWithTimeout(a, prompt, context, config.timeoutMs)));
39
+ const aggregateUsage = (entries) => {
40
+ let inputTokens = 0, outputTokens = 0, costUSD = 0;
41
+ for (const e of entries) {
42
+ if (e.usage) {
43
+ inputTokens += e.usage.input;
44
+ outputTokens += e.usage.output;
45
+ costUSD += e.usage.costUSD ?? 0;
46
+ }
47
+ }
48
+ return { inputTokens, outputTokens, costUSD };
49
+ };
50
+ // Strip internal `usage` field before serializing to the public CouncilResult
51
+ // schema — usage is summed and surfaced separately so the CLI can log it to
52
+ // the cost ledger without leaking it into the JSON wire format.
53
+ const publicResponses = responses.map(({ usage: _u, ...rest }) => rest);
33
54
  const successful = responses.filter(r => r.status === 'ok');
34
55
  if (successful.length < config.minSuccessfulResponses) {
35
- return { schema_version: 1, run_id, status: 'failed', prompt, responses };
56
+ return {
57
+ result: { schema_version: 1, run_id, status: 'failed', prompt, responses: publicResponses },
58
+ usage: aggregateUsage(responses),
59
+ };
36
60
  }
37
61
  const responseSections = successful
38
62
  .map(r => `### ${r.label}\n${r.text}`)
@@ -47,13 +71,20 @@ export async function runCouncil(config, adapters, synthesizer, prompt, contextD
47
71
  // Synthesizer shares the same per-call timeout as model calls so a hung
48
72
  // synthesizer API doesn't block the whole command indefinitely.
49
73
  const synthResponse = await consultWithTimeout(synthesizer, synthesisPrompt, synthesisCtx, config.timeoutMs);
74
+ const totalUsage = aggregateUsage([...responses, synthResponse]);
50
75
  // status:'ok' means the synthesizer call itself completed without error.
51
76
  // Empty text is valid (e.g. the --no-synthesize stub that intentionally
52
77
  // returns ''); only treat actual failures/timeouts as partial.
53
78
  if (synthResponse.status === 'ok') {
54
79
  const synthesis = { label: synthesizer.label, text: synthResponse.text ?? '', latencyMs: synthResponse.latencyMs };
55
- return { schema_version: 1, run_id, status: 'success', prompt, responses, synthesis };
80
+ return {
81
+ result: { schema_version: 1, run_id, status: 'success', prompt, responses: publicResponses, synthesis },
82
+ usage: totalUsage,
83
+ };
56
84
  }
57
- return { schema_version: 1, run_id, status: 'partial', prompt, responses };
85
+ return {
86
+ result: { schema_version: 1, run_id, status: 'partial', prompt, responses: publicResponses },
87
+ usage: totalUsage,
88
+ };
58
89
  }
59
90
  //# sourceMappingURL=runner.js.map
@@ -3,9 +3,20 @@ import * as path from 'node:path';
3
3
  const CACHE_DIR = '.guardrail-cache';
4
4
  const LOG_FILE = 'costs.jsonl';
5
5
  export function appendCostLog(cwd, entry) {
6
- const dir = path.join(cwd, CACHE_DIR);
7
- fs.mkdirSync(dir, { recursive: true });
8
- fs.appendFileSync(path.join(dir, LOG_FILE), JSON.stringify(entry) + '\n', 'utf8');
6
+ // Cost log is observability, not a contract. A failed write (read-only FS,
7
+ // full disk, permission error) must NEVER block the caller — every callsite
8
+ // calls this *after* its primary output is emitted, and a throw here would
9
+ // cause unhandled-rejection crashes after work has already succeeded.
10
+ // Bugbot HIGH on PR #51 surfaced this for pr-desc/council; consolidating
11
+ // the swallow here so the same defense applies to scan/run automatically.
12
+ try {
13
+ const dir = path.join(cwd, CACHE_DIR);
14
+ fs.mkdirSync(dir, { recursive: true });
15
+ fs.appendFileSync(path.join(dir, LOG_FILE), JSON.stringify(entry) + '\n', 'utf8');
16
+ }
17
+ catch {
18
+ // Intentionally empty — observability failures should not surface to users.
19
+ }
9
20
  }
10
21
  export function readCostLog(cwd) {
11
22
  const p = path.join(cwd, CACHE_DIR, LOG_FILE);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@delegance/claude-autopilot",
3
- "version": "5.0.5",
3
+ "version": "5.0.7",
4
4
  "type": "module",
5
5
  "description": "Autonomous development pipeline for Claude Code: brainstorm → spec → plan → implement → migrate → validate → PR → review → merge. Multi-model, local-first, every phase a skill you can intervene in.",
6
6
  "keywords": [