@link-assistant/hive-mind 1.76.1 → 1.77.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +70 -0
- package/package.json +1 -1
- package/src/anthropic-cost-accumulator.lib.mjs +107 -0
- package/src/claude.budget-stats.lib.mjs +29 -6
- package/src/claude.lib.mjs +38 -7
- package/src/solve.auto-continue.lib.mjs +17 -0
- package/src/solve.config.lib.mjs +70 -0
- package/src/solve.escalate.lib.mjs +505 -0
- package/src/solve.mjs +3 -3
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,75 @@
|
|
|
1
1
|
# @link-assistant/hive-mind
|
|
2
2
|
|
|
3
|
+
## 1.77.0
|
|
4
|
+
|
|
5
|
+
### Minor Changes
|
|
6
|
+
|
|
7
|
+
- a50d201: feat(solve): experimental `--escalate` mode (#1885)
|
|
8
|
+
|
|
9
|
+
Add an experimental `solve` option family that solves a task cheaply first and
|
|
10
|
+
escalates to a more capable (more expensive) model only while unfinished work
|
|
11
|
+
remains. The model ladder, cheapest → most capable, is `haiku < sonnet < opus <
|
|
12
|
+
fable`.
|
|
13
|
+
- `--escalate` (bare) → the default range `sonnet-fable`.
|
|
14
|
+
- `--escalate sonnet-opus` → an explicit `<lower>-<upper>` range (`-` delimits the
|
|
15
|
+
bounds; only the short ladder names are allowed inside a range).
|
|
16
|
+
- `--escalate-from haiku` → shortcut for `--escalate haiku-fable` (aliases such as
|
|
17
|
+
`opus-4-8` accepted here, since a single value is unambiguous).
|
|
18
|
+
- `--escalate-steps N` (default 1) → keep each tier for N working sessions before
|
|
19
|
+
escalating (e.g. `2` → two sonnet sessions, then two opus, then two fable).
|
|
20
|
+
|
|
21
|
+
The first regular solve session runs on the range's lower bound (unless `--model`
|
|
22
|
+
is explicitly pinned). After it finishes, the escalate loop re-scans the pull
|
|
23
|
+
request for deferred/unfinished-work indicators — reusing the detector from issue
|
|
24
|
+
#1883 — and escalates to the next tier only if work remains; otherwise it stops
|
|
25
|
+
early so the expensive tiers are never invoked. Restarts are capped at 3
|
|
26
|
+
consecutive errors and stop on a usage limit. Escalate is Claude-only and runs
|
|
27
|
+
before `--finalize` / `--keep-working`.
|
|
28
|
+
|
|
29
|
+
Pure parsing/planning helpers live in a network-free module
|
|
30
|
+
(`src/solve.escalate.lib.mjs`) with full unit-test coverage
|
|
31
|
+
(`tests/test-escalate-1885.mjs`); a deep case study is compiled under
|
|
32
|
+
`docs/case-studies/issue-1885/`.
|
|
33
|
+
|
|
34
|
+
- 53a0544: Update Hive Mind Docker images to `konard/box` and `konard/box-dind` 2.3.1 so Docker-in-Docker deployments can use the upstream host-image passthrough allowlist.
|
|
35
|
+
|
|
36
|
+
## 1.76.2
|
|
37
|
+
|
|
38
|
+
### Patch Changes
|
|
39
|
+
|
|
40
|
+
- 5d8d6c1: fix(cost): accumulate Anthropic cost across limit-reset resumes (#1886)
|
|
41
|
+
|
|
42
|
+
The session cost summary could report a large negative "Difference" (e.g.
|
|
43
|
+
`$-11.422796 (-31.66%)`) between the public pricing estimate and the Anthropic
|
|
44
|
+
figure. Root cause: the public estimate is computed from the session JSONL,
|
|
45
|
+
which accumulates the **entire** session across every limit-reset resume, while
|
|
46
|
+
the Anthropic `total_cost_usd` from the stream-json `result` event is scoped to a
|
|
47
|
+
**single** Claude process (only the resumed run). Comparing a full-session
|
|
48
|
+
estimate against a single-process figure produced a misleading gap even though
|
|
49
|
+
both numbers were individually correct.
|
|
50
|
+
|
|
51
|
+
The per-token math (`calculateModelCost`) was audited and is correct; this is a
|
|
52
|
+
scope mismatch, not a pricing error.
|
|
53
|
+
|
|
54
|
+
Fix:
|
|
55
|
+
- New `src/anthropic-cost-accumulator.lib.mjs` keeps a model-agnostic running
|
|
56
|
+
total of Anthropic's per-process `total_cost_usd` (it sums dollars, never
|
|
57
|
+
inspecting per-token prices, so it is correct for all models).
|
|
58
|
+
- `runClaude` seeds from and returns the cumulative total on every terminal path;
|
|
59
|
+
the cross-process limit-reset resume threads it via a new hidden
|
|
60
|
+
`--previous-anthropic-cost` option (`autoContinueWhenLimitResets`).
|
|
61
|
+
- A usage-limit hit ends as `is_error` with no `success` result event, so its
|
|
62
|
+
cost was previously discarded. The cost from a non-success terminal `result`
|
|
63
|
+
event is now kept as a fallback and folded into the accumulator, closing the
|
|
64
|
+
gap in the reported scenario.
|
|
65
|
+
- `displayCostComparison` / `displaySessionTokenUsage` print a verbose
|
|
66
|
+
accumulation breakdown ("cumulative across resume iterations: this run … +
|
|
67
|
+
carried forward … = …") so the figure is never mysterious again.
|
|
68
|
+
|
|
69
|
+
A deep case study (timeline, proven root causes, exact reproduced numbers, online
|
|
70
|
+
prior art incl. `anthropics/claude-code#13088`) is compiled under
|
|
71
|
+
`docs/case-studies/issue-1886/`.
|
|
72
|
+
|
|
3
73
|
## 1.76.1
|
|
4
74
|
|
|
5
75
|
### Patch Changes
|
package/package.json
CHANGED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Issue #1886: Cumulative Anthropic cost across resume iterations.
|
|
5
|
+
*
|
|
6
|
+
* Background
|
|
7
|
+
* ----------
|
|
8
|
+
* The "Token Usage Summary" compares two numbers:
|
|
9
|
+
* - "Public pricing estimate" — computed locally from the session JSONL by
|
|
10
|
+
* `calculateSessionTokens`. The JSONL accumulates the *entire* logical
|
|
11
|
+
* session: when a run hits a usage limit and is resumed (either in-process
|
|
12
|
+
* via the auto-merge loop, or cross-process via
|
|
13
|
+
* `autoContinueWhenLimitResets` spawning a fresh `solve` with `--resume`),
|
|
14
|
+
* Claude Code appends to the *same* `<session-id>.jsonl`, so every run's
|
|
15
|
+
* tokens are present.
|
|
16
|
+
* - "Calculated by Anthropic" — taken from the `result` event's
|
|
17
|
+
* `total_cost_usd`. That figure is scoped to a *single* Claude process: it
|
|
18
|
+
* only covers the tokens that process produced, NOT the tokens inherited
|
|
19
|
+
* from a previous run that was interrupted by a limit reset.
|
|
20
|
+
*
|
|
21
|
+
* The result is a scope mismatch, not a pricing bug. In issue #1886 the public
|
|
22
|
+
* estimate ($36.085016, full session) was compared against Anthropic's
|
|
23
|
+
* per-process figure ($24.662220, the resumed run only), yielding a misleading
|
|
24
|
+
* "-31.66%" difference even though both numbers are individually correct.
|
|
25
|
+
*
|
|
26
|
+
* The fix
|
|
27
|
+
* -------
|
|
28
|
+
* Accumulate Anthropic's reported cost across resume iterations so the figure
|
|
29
|
+
* shown next to the full-session public estimate covers the same scope. This
|
|
30
|
+
* module is the single source of truth for that running total:
|
|
31
|
+
*
|
|
32
|
+
* - Each `solve` process seeds the accumulator once from
|
|
33
|
+
* `--previous-anthropic-cost` (0 for the first run; the carried-forward
|
|
34
|
+
* total for an auto-resumed run).
|
|
35
|
+
* - Every finished Claude process adds its own `total_cost_usd` via
|
|
36
|
+
* `addAnthropicRunCost`, which also covers the in-process auto-merge loop
|
|
37
|
+
* (each iteration is a separate Claude process in the same node process).
|
|
38
|
+
* - The display and the cross-process spawn both read the cumulative total,
|
|
39
|
+
* so "Calculated by Anthropic" tracks the full session.
|
|
40
|
+
*
|
|
41
|
+
* The accumulation is model-agnostic: it sums dollar figures and never inspects
|
|
42
|
+
* per-token prices, so it is correct for Fable 5, Opus, Sonnet, Haiku, and any
|
|
43
|
+
* future model. See docs/case-studies/issue-1886/ for the full analysis.
|
|
44
|
+
*/
|
|
45
|
+
|
|
46
|
+
// Module-level singleton: the cumulative Anthropic cost for the current logical
|
|
47
|
+
// session (this node process plus everything seeded from prior processes).
|
|
48
|
+
let cumulativeAnthropicCostUSD = 0;
|
|
49
|
+
// Seeding must happen exactly once per node process. The auto-merge loop calls
|
|
50
|
+
// runClaude (and therefore the seed helper) repeatedly within a single process;
|
|
51
|
+
// re-seeding from the same CLI flag each time would wipe out accumulation.
|
|
52
|
+
let seeded = false;
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Coerce an arbitrary value to a non-negative finite USD amount.
|
|
56
|
+
* @param {*} value
|
|
57
|
+
* @returns {number} the sanitized amount, or 0 when not a positive finite number
|
|
58
|
+
*/
|
|
59
|
+
const toCostAmount = value => {
|
|
60
|
+
const n = typeof value === 'number' ? value : Number(value);
|
|
61
|
+
return Number.isFinite(n) && n > 0 ? n : 0;
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Seed the accumulator from the carried-forward previous-run cost, exactly once
|
|
66
|
+
* per node process. Subsequent calls are no-ops so the in-process auto-merge
|
|
67
|
+
* loop does not reset the running total.
|
|
68
|
+
* @param {number|string|null|undefined} previousAnthropicCostUSD
|
|
69
|
+
* @returns {number} the cumulative total after seeding
|
|
70
|
+
*/
|
|
71
|
+
export const seedCumulativeAnthropicCost = previousAnthropicCostUSD => {
|
|
72
|
+
if (seeded) return cumulativeAnthropicCostUSD;
|
|
73
|
+
cumulativeAnthropicCostUSD = toCostAmount(previousAnthropicCostUSD);
|
|
74
|
+
seeded = true;
|
|
75
|
+
return cumulativeAnthropicCostUSD;
|
|
76
|
+
};
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Add a single Claude process's reported cost to the running total.
|
|
80
|
+
* Non-positive / non-finite inputs (e.g. a null cost when a run was interrupted
|
|
81
|
+
* by a limit before emitting a success result) contribute nothing.
|
|
82
|
+
* @param {number|string|null|undefined} runCostUSD
|
|
83
|
+
* @returns {number} the cumulative total after adding
|
|
84
|
+
*/
|
|
85
|
+
export const addAnthropicRunCost = runCostUSD => {
|
|
86
|
+
cumulativeAnthropicCostUSD += toCostAmount(runCostUSD);
|
|
87
|
+
return cumulativeAnthropicCostUSD;
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* @returns {number} the cumulative Anthropic cost for the current logical session
|
|
92
|
+
*/
|
|
93
|
+
export const getCumulativeAnthropicCost = () => cumulativeAnthropicCostUSD;
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* @returns {boolean} true once a positive cost has been seeded or accumulated
|
|
97
|
+
*/
|
|
98
|
+
export const hasCumulativeAnthropicCost = () => cumulativeAnthropicCostUSD > 0;
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Reset the accumulator. Intended for tests — production code seeds once and
|
|
102
|
+
* accumulates for the lifetime of the process.
|
|
103
|
+
*/
|
|
104
|
+
export const resetCumulativeAnthropicCost = () => {
|
|
105
|
+
cumulativeAnthropicCostUSD = 0;
|
|
106
|
+
seeded = false;
|
|
107
|
+
};
|
|
@@ -138,19 +138,38 @@ export const displayModelUsage = async (usage, log) => {
|
|
|
138
138
|
/**
|
|
139
139
|
* Display cost comparison between public pricing and Anthropic's official cost
|
|
140
140
|
* Issue #1557: Show simplified format when costs match, remove USD suffix
|
|
141
|
-
*
|
|
142
|
-
*
|
|
141
|
+
* Issue #1886: `anthropicCost` is the cumulative Anthropic cost across every
|
|
142
|
+
* resume iteration (the session JSONL — and therefore `publicCost` — spans
|
|
143
|
+
* the full session, so the Anthropic figure must too). The optional
|
|
144
|
+
* `previousAnthropicCost` is the portion carried in from earlier runs; when
|
|
145
|
+
* non-zero we show a verbose breakdown so the accumulation is auditable.
|
|
146
|
+
* @param {number|null} publicCost - Public pricing estimate (full session)
|
|
147
|
+
* @param {number|null} anthropicCost - Anthropic's cumulative official cost (full session)
|
|
143
148
|
* @param {Function} log - Logging function
|
|
149
|
+
* @param {Object} [options]
|
|
150
|
+
* @param {number} [options.previousAnthropicCost=0] - cost carried in from earlier resume iterations
|
|
144
151
|
*/
|
|
145
|
-
export const displayCostComparison = async (publicCost, anthropicCost, log) => {
|
|
152
|
+
export const displayCostComparison = async (publicCost, anthropicCost, log, options = {}) => {
|
|
153
|
+
const previousAnthropicCost = options.previousAnthropicCost || 0;
|
|
146
154
|
const hasPublic = publicCost !== null && publicCost !== undefined;
|
|
147
155
|
const hasAnthropic = anthropicCost !== null && anthropicCost !== undefined;
|
|
148
156
|
const publicDec = hasPublic ? new Decimal(publicCost) : null;
|
|
149
157
|
const anthropicDec = hasAnthropic ? new Decimal(anthropicCost) : null;
|
|
158
|
+
// Issue #1886: when the Anthropic figure was accumulated across resumes,
|
|
159
|
+
// expose the breakdown in verbose mode so "this run + carried forward = total"
|
|
160
|
+
// is auditable from the saved log.
|
|
161
|
+
const logAccumulationBreakdown = async () => {
|
|
162
|
+
if (previousAnthropicCost > 0 && anthropicDec) {
|
|
163
|
+
const thisRun = anthropicDec.minus(new Decimal(previousAnthropicCost));
|
|
164
|
+
await log(` ↳ Anthropic cost is cumulative across resume iterations (issue #1886):`, { verbose: true });
|
|
165
|
+
await log(` this run: $${thisRun.toFixed(6)} + carried forward: $${new Decimal(previousAnthropicCost).toFixed(6)} = $${anthropicDec.toFixed(6)}`, { verbose: true });
|
|
166
|
+
}
|
|
167
|
+
};
|
|
150
168
|
// Issue #1703: also collapse to the short form when the rounded difference is below display precision,
|
|
151
169
|
// so reports like "Difference: $-0.000000 (-0.00%)" no longer waste two extra lines.
|
|
152
170
|
if (publicDec && anthropicDec && anthropicDec.minus(publicDec).abs().toFixed(6) === '0.000000') {
|
|
153
171
|
await log(`\n 💰 Cost: $${anthropicDec.toFixed(6)}`);
|
|
172
|
+
await logAccumulationBreakdown();
|
|
154
173
|
return;
|
|
155
174
|
}
|
|
156
175
|
await log('\n 💰 Cost estimation:');
|
|
@@ -163,6 +182,7 @@ export const displayCostComparison = async (publicCost, anthropicCost, log) => {
|
|
|
163
182
|
} else {
|
|
164
183
|
await log(' Difference: unknown');
|
|
165
184
|
}
|
|
185
|
+
await logAccumulationBreakdown();
|
|
166
186
|
};
|
|
167
187
|
|
|
168
188
|
/**
|
|
@@ -316,11 +336,12 @@ export const displayBudgetStats = async (usage, tokenUsage, log) => {
|
|
|
316
336
|
* @param {string} params.sessionId - Claude session id (skips when falsy)
|
|
317
337
|
* @param {string} params.tempDir - Working directory containing the session JSONL (skips when falsy)
|
|
318
338
|
* @param {Object|null} params.resultModelUsage - Authoritative per-model usage from the result JSON event
|
|
319
|
-
* @param {number} params.anthropicTotalCostUSD - Anthropic's official
|
|
339
|
+
* @param {number} params.anthropicTotalCostUSD - Anthropic's cumulative official cost across resume iterations (issue #1886)
|
|
340
|
+
* @param {number} [params.previousAnthropicCostUSD=0] - portion of anthropicTotalCostUSD carried in from earlier resume iterations (issue #1886)
|
|
320
341
|
* @param {Object} params.argv - Parsed CLI args (reads argv.tokensBudgetStats)
|
|
321
342
|
* @param {Function} params.log - Logger
|
|
322
343
|
*/
|
|
323
|
-
export const displaySessionTokenUsage = async ({ sessionId, tempDir, resultModelUsage, anthropicTotalCostUSD, argv, log }) => {
|
|
344
|
+
export const displaySessionTokenUsage = async ({ sessionId, tempDir, resultModelUsage, anthropicTotalCostUSD, previousAnthropicCostUSD = 0, argv, log }) => {
|
|
324
345
|
if (!sessionId || !tempDir) return;
|
|
325
346
|
try {
|
|
326
347
|
const tokenUsage = await calculateSessionTokens(sessionId, tempDir, resultModelUsage);
|
|
@@ -355,7 +376,9 @@ export const displaySessionTokenUsage = async ({ sessionId, tempDir, resultModel
|
|
|
355
376
|
await log('\n 📈 Total across all models:');
|
|
356
377
|
}
|
|
357
378
|
// Show cost comparison (for both single and multiple models)
|
|
358
|
-
|
|
379
|
+
// Issue #1886: anthropicTotalCostUSD is cumulative across resume iterations
|
|
380
|
+
// so it shares the full-session scope of the JSONL-based public estimate.
|
|
381
|
+
await displayCostComparison(tokenUsage.totalCostUSD, anthropicTotalCostUSD, log, { previousAnthropicCost: previousAnthropicCostUSD });
|
|
359
382
|
// Show total tokens for single model only
|
|
360
383
|
if (modelIds.length === 1) {
|
|
361
384
|
await log(` Total tokens: ${formatNumber(tokenUsage.totalTokens)}`);
|
package/src/claude.lib.mjs
CHANGED
|
@@ -17,6 +17,7 @@ import { sanitizeObjectStrings } from './unicode-sanitization.lib.mjs';
|
|
|
17
17
|
import Decimal from 'decimal.js-light';
|
|
18
18
|
import { createEmptySubSessionUsage, accumulateModelUsage, mergeResultModelUsage, createSubAgentCallEntry, accumulateSubAgentUsage, getRawRequestInputTokens, displaySessionTokenUsage } from './claude.budget-stats.lib.mjs';
|
|
19
19
|
import { buildClaudeResumeCommand, buildClaudeAutonomousResumeCommand } from './claude.command-builder.lib.mjs';
|
|
20
|
+
import { seedCumulativeAnthropicCost, addAnthropicRunCost } from './anthropic-cost-accumulator.lib.mjs'; // Issue #1886
|
|
20
21
|
import { buildSolveResumeCommand } from './solve.resume-command.lib.mjs'; // Issue #942
|
|
21
22
|
import { SESSION_FORCE_KILLED_MARKER, postTrackedComment } from './tool-comments.lib.mjs'; // Issue #1625
|
|
22
23
|
import { handleClaudeRuntimeSwitch } from './claude.runtime-switch.lib.mjs'; // see issue #1141
|
|
@@ -662,6 +663,9 @@ export const executeClaudeCommand = async params => {
|
|
|
662
663
|
let stderrErrors = [];
|
|
663
664
|
let resultSuccessReceived = false;
|
|
664
665
|
let anthropicTotalCostUSD = null;
|
|
666
|
+
// Issue #1886: a usage-limit hit ends as is_error (no success result). Keep
|
|
667
|
+
// the latest cost from ANY result event as a fallback for the failure path.
|
|
668
|
+
let anthropicCostFromAnyResult = null;
|
|
665
669
|
let errorDuringExecution = false;
|
|
666
670
|
let resultSummary = null;
|
|
667
671
|
let resultModelUsage = null;
|
|
@@ -942,7 +946,9 @@ export const executeClaudeCommand = async params => {
|
|
|
942
946
|
anthropicTotalCostUSD = data.total_cost_usd;
|
|
943
947
|
await log(`💰 Anthropic official cost captured from success result: $${anthropicTotalCostUSD.toFixed(6)}`, { verbose: true });
|
|
944
948
|
} else if (data.total_cost_usd !== undefined && data.total_cost_usd !== null) {
|
|
945
|
-
|
|
949
|
+
// Issue #1886: non-success terminal (e.g. usage-limit hit) still reports this process's cost — keep as accumulation fallback.
|
|
950
|
+
anthropicCostFromAnyResult = data.total_cost_usd;
|
|
951
|
+
await log(`💰 Anthropic cost from ${data.subtype || 'unknown'} result kept as fallback for accumulation: $${data.total_cost_usd.toFixed(6)}`, { verbose: true });
|
|
946
952
|
}
|
|
947
953
|
// Issue #1263: Extract result summary (AI's summary of work done) for --attach-solution-summary
|
|
948
954
|
if (data.subtype === 'success' && data.result && typeof data.result === 'string') {
|
|
@@ -1106,6 +1112,10 @@ export const executeClaudeCommand = async params => {
|
|
|
1106
1112
|
await log(JSON.stringify(data, null, 2));
|
|
1107
1113
|
if (data.type === 'result' && data.subtype === 'success' && data.total_cost_usd != null) {
|
|
1108
1114
|
anthropicTotalCostUSD = data.total_cost_usd;
|
|
1115
|
+
} else if (data.type === 'result' && data.total_cost_usd != null) {
|
|
1116
|
+
// Issue #1886: keep a non-success terminal result's cost as a fallback
|
|
1117
|
+
// for accumulation (see the streaming branch above).
|
|
1118
|
+
anthropicCostFromAnyResult = data.total_cost_usd;
|
|
1109
1119
|
}
|
|
1110
1120
|
// Issue #1472: Forward remaining buffer event to interactive handler (was previously missed)
|
|
1111
1121
|
if (interactiveHandler) {
|
|
@@ -1187,6 +1197,9 @@ export const executeClaudeCommand = async params => {
|
|
|
1187
1197
|
await log(`\n\n❌ API explicitly marked error as not retryable (x-should-retry: false) and session made no progress (num_turns=${resultNumTurns}) after ${retryCount} attempt(s)`, { level: 'error' });
|
|
1188
1198
|
await log(` This error is not recoverable. Failing fast to avoid a stuck retry loop (Issue #1437).`, { level: 'error' });
|
|
1189
1199
|
await log(` Check https://status.anthropic.com/ for API status.`, { level: 'error' });
|
|
1200
|
+
// Issue #1886: fold captured cost so a cross-process resume's carried-forward cost is not dropped here.
|
|
1201
|
+
seedCumulativeAnthropicCost(argv.previousAnthropicCost);
|
|
1202
|
+
const cumulativeAnthropicCostUSDOnStuckRetry = addAnthropicRunCost(anthropicTotalCostUSD ?? anthropicCostFromAnyResult);
|
|
1190
1203
|
return {
|
|
1191
1204
|
success: false,
|
|
1192
1205
|
sessionId,
|
|
@@ -1196,7 +1209,7 @@ export const executeClaudeCommand = async params => {
|
|
|
1196
1209
|
messageCount,
|
|
1197
1210
|
toolUseCount,
|
|
1198
1211
|
is503Error,
|
|
1199
|
-
anthropicTotalCostUSD,
|
|
1212
|
+
anthropicTotalCostUSD: cumulativeAnthropicCostUSDOnStuckRetry, // Issue #1104/#1886
|
|
1200
1213
|
resultSummary,
|
|
1201
1214
|
// Issue #1845: surface the actual error so callers can show it to users
|
|
1202
1215
|
errorInfo: { message: lastMessage || 'API explicitly marked error as not retryable', exitCode },
|
|
@@ -1233,6 +1246,9 @@ export const executeClaudeCommand = async params => {
|
|
|
1233
1246
|
return await executeWithRetry();
|
|
1234
1247
|
} else {
|
|
1235
1248
|
await log(`\n\n❌ Transient API error persisted after ${maxRetries} retries\n Please try again later or check https://status.anthropic.com/`, { level: 'error' });
|
|
1249
|
+
// Issue #1886: fold captured cost so the carried-forward cost survives this retries-exhausted path.
|
|
1250
|
+
seedCumulativeAnthropicCost(argv.previousAnthropicCost);
|
|
1251
|
+
const cumulativeAnthropicCostUSDOnRetriesExhausted = addAnthropicRunCost(anthropicTotalCostUSD ?? anthropicCostFromAnyResult);
|
|
1236
1252
|
return {
|
|
1237
1253
|
success: false,
|
|
1238
1254
|
sessionId,
|
|
@@ -1242,7 +1258,7 @@ export const executeClaudeCommand = async params => {
|
|
|
1242
1258
|
messageCount,
|
|
1243
1259
|
toolUseCount,
|
|
1244
1260
|
is503Error, // preserve for callers that check this
|
|
1245
|
-
anthropicTotalCostUSD, // Issue #1104: Include cost even on failure
|
|
1261
|
+
anthropicTotalCostUSD: cumulativeAnthropicCostUSDOnRetriesExhausted, // Issue #1104/#1886: Include cumulative cost even on failure
|
|
1246
1262
|
resultSummary, // Issue #1263: Include result summary
|
|
1247
1263
|
// Issue #1845: surface the actual error so callers can show it to users
|
|
1248
1264
|
errorInfo: { message: lastMessage || `Transient API error persisted after ${maxRetries} retries`, exitCode },
|
|
@@ -1294,6 +1310,12 @@ export const executeClaudeCommand = async params => {
|
|
|
1294
1310
|
await log(` Memory: ${resourcesAfter.memory.split('\n')[1]}`, { verbose: true });
|
|
1295
1311
|
await log(` Load: ${resourcesAfter.load}`, { verbose: true });
|
|
1296
1312
|
await showResumeCommand(sessionId, tempDir, claudePath, argv.model, log, argv);
|
|
1313
|
+
// Issue #1886: on failure (usually a usage-limit hit → auto-resume) fold
|
|
1314
|
+
// the captured cost into the cumulative total so autoContinueWhenLimitResets
|
|
1315
|
+
// carries it forward. A limit hit ends as is_error → fall back to the
|
|
1316
|
+
// non-success result cost.
|
|
1317
|
+
seedCumulativeAnthropicCost(argv.previousAnthropicCost);
|
|
1318
|
+
const cumulativeAnthropicCostUSDOnFailure = addAnthropicRunCost(anthropicTotalCostUSD ?? anthropicCostFromAnyResult);
|
|
1297
1319
|
return {
|
|
1298
1320
|
success: false,
|
|
1299
1321
|
sessionId,
|
|
@@ -1303,7 +1325,7 @@ export const executeClaudeCommand = async params => {
|
|
|
1303
1325
|
messageCount,
|
|
1304
1326
|
toolUseCount,
|
|
1305
1327
|
errorDuringExecution,
|
|
1306
|
-
anthropicTotalCostUSD, // Issue #1104:
|
|
1328
|
+
anthropicTotalCostUSD: cumulativeAnthropicCostUSDOnFailure, // Issue #1104/#1886: cumulative cost even on failure
|
|
1307
1329
|
resultSummary, // Issue #1263: Include result summary
|
|
1308
1330
|
// Issue #1845: surface the core error (e.g. "API Error: Output blocked by content
|
|
1309
1331
|
// filtering policy") so users see what actually went wrong, not just a generic message.
|
|
@@ -1322,7 +1344,13 @@ export const executeClaudeCommand = async params => {
|
|
|
1322
1344
|
await log(`📊 Total messages: ${messageCount}, Tool uses: ${toolUseCount}`);
|
|
1323
1345
|
// Calculate and display total token usage from session JSONL file.
|
|
1324
1346
|
// Extracted to claude.budget-stats.lib.mjs to keep this file under the line limit (Issue #1834).
|
|
1325
|
-
|
|
1347
|
+
// Issue #1886: the JSONL spans every resume iteration but each result
|
|
1348
|
+
// event's total_cost_usd covers only this process; seed the carried-forward
|
|
1349
|
+
// cost + add this process's so the cumulative total shares the JSONL scope.
|
|
1350
|
+
seedCumulativeAnthropicCost(argv.previousAnthropicCost);
|
|
1351
|
+
const cumulativeAnthropicCostUSD = addAnthropicRunCost(anthropicTotalCostUSD);
|
|
1352
|
+
const previousAnthropicCostUSD = cumulativeAnthropicCostUSD - (anthropicTotalCostUSD || 0);
|
|
1353
|
+
await displaySessionTokenUsage({ sessionId, tempDir, resultModelUsage, anthropicTotalCostUSD: cumulativeAnthropicCostUSD, previousAnthropicCostUSD, argv, log });
|
|
1326
1354
|
await showResumeCommand(sessionId, tempDir, claudePath, argv.model, log, argv);
|
|
1327
1355
|
return {
|
|
1328
1356
|
success: true,
|
|
@@ -1332,7 +1360,7 @@ export const executeClaudeCommand = async params => {
|
|
|
1332
1360
|
limitTimezone,
|
|
1333
1361
|
messageCount,
|
|
1334
1362
|
toolUseCount,
|
|
1335
|
-
anthropicTotalCostUSD, //
|
|
1363
|
+
anthropicTotalCostUSD: cumulativeAnthropicCostUSD, // Issue #1104/#1886: cumulative Anthropic cost across resume iterations
|
|
1336
1364
|
errorDuringExecution, // Issue #1088: Track if error_during_execution subtype occurred
|
|
1337
1365
|
resultSummary, // Issue #1263: Include result summary for --attach-solution-summary
|
|
1338
1366
|
resultModelUsage, // Issue #1454
|
|
@@ -1378,6 +1406,9 @@ export const executeClaudeCommand = async params => {
|
|
|
1378
1406
|
}
|
|
1379
1407
|
}
|
|
1380
1408
|
await log(`\n\n❌ Error executing Claude command: ${error.message}`, { level: 'error' });
|
|
1409
|
+
// Issue #1886: fold captured cost so the carried-forward cost survives this exception path too.
|
|
1410
|
+
seedCumulativeAnthropicCost(argv.previousAnthropicCost);
|
|
1411
|
+
const cumulativeAnthropicCostUSDOnException = addAnthropicRunCost(anthropicTotalCostUSD ?? anthropicCostFromAnyResult);
|
|
1381
1412
|
return {
|
|
1382
1413
|
success: false,
|
|
1383
1414
|
sessionId,
|
|
@@ -1386,7 +1417,7 @@ export const executeClaudeCommand = async params => {
|
|
|
1386
1417
|
limitTimezone: null,
|
|
1387
1418
|
messageCount,
|
|
1388
1419
|
toolUseCount,
|
|
1389
|
-
anthropicTotalCostUSD, // Issue #1104: Include cost even on failure
|
|
1420
|
+
anthropicTotalCostUSD: cumulativeAnthropicCostUSDOnException, // Issue #1104/#1886: Include cumulative cost even on failure
|
|
1390
1421
|
resultSummary, // Issue #1263: Include result summary
|
|
1391
1422
|
// Issue #1845: surface the actual exception message so callers can show it to users
|
|
1392
1423
|
errorInfo: { message: error.message || error.toString() },
|
|
@@ -54,6 +54,9 @@ import { formatAutoIterationLimit, hasReachedAutoIterationLimit, normalizeAutoIt
|
|
|
54
54
|
// Issue #1574: Interruptible sleep so CTRL+C is never blocked by a lingering timer
|
|
55
55
|
const { interruptibleSleep } = await import('./interruptible-sleep.lib.mjs');
|
|
56
56
|
|
|
57
|
+
// Issue #1886: cumulative Anthropic cost carried across cross-process resumes
|
|
58
|
+
const { getCumulativeAnthropicCost } = await import('./anthropic-cost-accumulator.lib.mjs');
|
|
59
|
+
|
|
57
60
|
const { calculateWaitTime } = validation;
|
|
58
61
|
|
|
59
62
|
/**
|
|
@@ -168,6 +171,20 @@ export const autoContinueWhenLimitResets = async (issueUrl, sessionId, argv, sho
|
|
|
168
171
|
resumeArgs.push('--auto-resume-iteration', String(nextAutoResumeIteration));
|
|
169
172
|
resumeArgs.push('--auto-resume-max-iterations', String(maxAutoResumeIterations));
|
|
170
173
|
|
|
174
|
+
// Issue #1886: carry the cumulative Anthropic cost into the resumed process.
|
|
175
|
+
// The resumed run reads the same session JSONL (full session, all runs), so
|
|
176
|
+
// its public-pricing estimate spans every run; without this the resumed
|
|
177
|
+
// run's "Calculated by Anthropic" figure would cover only its own process
|
|
178
|
+
// and disagree with the full-session public estimate (the -31.66% gap in
|
|
179
|
+
// issue #1886). getCumulativeAnthropicCost() already includes this run's
|
|
180
|
+
// cost folded in at the runClaude return, plus anything carried from prior
|
|
181
|
+
// iterations via --previous-anthropic-cost.
|
|
182
|
+
const carriedAnthropicCost = getCumulativeAnthropicCost();
|
|
183
|
+
if (carriedAnthropicCost > 0) {
|
|
184
|
+
resumeArgs.push('--previous-anthropic-cost', String(carriedAnthropicCost));
|
|
185
|
+
await log(`💰 Carrying forward cumulative Anthropic cost: $${carriedAnthropicCost.toFixed(6)} (issue #1886)`, { verbose: true });
|
|
186
|
+
}
|
|
187
|
+
|
|
171
188
|
// Pass session type for proper comment differentiation
|
|
172
189
|
// See: https://github.com/link-assistant/hive-mind/issues/1152
|
|
173
190
|
const sessionType = isRestart ? 'auto-restart' : 'auto-resume';
|
package/src/solve.config.lib.mjs
CHANGED
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
import { enhanceErrorMessage, detectMalformedFlags } from './option-suggestions.lib.mjs';
|
|
11
11
|
import { defaultModels, buildModelOptionDescription, resolveDefaultFallbackModel, resolveRuntimeDefaultModel } from './models/index.mjs';
|
|
12
12
|
import { validateBranchName } from './solve.branch.lib.mjs';
|
|
13
|
+
import { resolveEscalationConfig, isEscalateEnabled, DEFAULT_ESCALATE_RANGE } from './solve.escalate.lib.mjs';
|
|
13
14
|
import { getLinoYargsFactory, hideBin, parseCliArgumentsWithLino } from './cli-arguments.lib.mjs';
|
|
14
15
|
|
|
15
16
|
// Re-export for use by telegram-bot.mjs (avoids extra import lines there)
|
|
@@ -218,6 +219,19 @@ export const SOLVE_OPTION_DEFINITIONS = {
|
|
|
218
219
|
default: 0,
|
|
219
220
|
hidden: true,
|
|
220
221
|
},
|
|
222
|
+
// Issue #1886: carried-forward Anthropic cost from previous resume iterations.
|
|
223
|
+
// The session JSONL accumulates the full session across limit-reset resumes,
|
|
224
|
+
// but Anthropic's result-event total_cost_usd is scoped to a single process.
|
|
225
|
+
// Threading the previous total here lets the resumed run display the
|
|
226
|
+
// full-session Anthropic cost alongside the full-session public estimate,
|
|
227
|
+
// instead of a misleading per-run figure. Internal/hidden: set automatically
|
|
228
|
+
// by autoContinueWhenLimitResets when spawning the resumed solve process.
|
|
229
|
+
'previous-anthropic-cost': {
|
|
230
|
+
type: 'number',
|
|
231
|
+
description: 'Internal: cumulative Anthropic total_cost_usd carried forward from previous resume iterations (issue #1886)',
|
|
232
|
+
default: 0,
|
|
233
|
+
hidden: true,
|
|
234
|
+
},
|
|
221
235
|
'auto-merge': {
|
|
222
236
|
type: 'boolean',
|
|
223
237
|
description: 'Automatically merge the pull request when the working session is finished and all CI/CD statuses pass and PR is mergeable. Implies --auto-restart-until-mergeable.',
|
|
@@ -597,6 +611,21 @@ export const SOLVE_OPTION_DEFINITIONS = {
|
|
|
597
611
|
alias: ['keep-going-until-all-requirements-are-fully-done', 'keep-working', 'keep-going'],
|
|
598
612
|
default: undefined,
|
|
599
613
|
},
|
|
614
|
+
escalate: {
|
|
615
|
+
type: 'string',
|
|
616
|
+
description: '[EXPERIMENTAL] Start solving with a cheaper/lower-tier model and automatically escalate to a more capable (more expensive) model while unfinished work remains. Accepts a model range "<lower>-<upper>" using short Claude tier names (ladder: haiku < sonnet < opus < fable), e.g. "sonnet-opus". A single name (e.g. "opus") means just that tier. Bare flag means "sonnet-fable". The idea: iterate cheaply first so expensive models do more reading and less writing.',
|
|
617
|
+
default: undefined,
|
|
618
|
+
},
|
|
619
|
+
'escalate-from': {
|
|
620
|
+
type: 'string',
|
|
621
|
+
description: '[EXPERIMENTAL] Shortcut for --escalate <model>-fable: start solving from the given model (haiku/sonnet/opus/fable, aliases accepted) and escalate up to the top of the ladder while unfinished work remains. Takes precedence over --escalate when both are given.',
|
|
622
|
+
default: undefined,
|
|
623
|
+
},
|
|
624
|
+
'escalate-steps': {
|
|
625
|
+
type: 'number',
|
|
626
|
+
description: '[EXPERIMENTAL] How many working sessions to keep each model tier before escalating to the next one (default: 1). For example 2 keeps the lower tier for two working sessions, then the next tier for two, and so on. Only used with --escalate / --escalate-from.',
|
|
627
|
+
default: 1,
|
|
628
|
+
},
|
|
600
629
|
'working-session-live-progress': {
|
|
601
630
|
type: 'string',
|
|
602
631
|
description: '[EXPERIMENTAL] Enable live progress monitoring. Accepts "comment" (default, updates a per-session PR comment) or "pr" (updates PR description). Plain --working-session-live-progress means "comment". Works with or without --interactive-mode.',
|
|
@@ -870,6 +899,34 @@ export const parseArguments = async (yargs = getLinoYargsFactory(), hideBinFn =
|
|
|
870
899
|
}
|
|
871
900
|
}
|
|
872
901
|
|
|
902
|
+
// --escalate / --escalate-from / --escalate-steps normalization (issue #1885)
|
|
903
|
+
// The bare `--escalate` flag is a string-typed option, so yargs yields `true`
|
|
904
|
+
// (or an empty string) for a value-less flag. Canonicalize that to the default
|
|
905
|
+
// range so downstream parsing in solve.escalate.lib.mjs sees a meaningful
|
|
906
|
+
// value. We also validate the range/steps eagerly here so misuse fails fast at
|
|
907
|
+
// config time rather than mid-solve.
|
|
908
|
+
{
|
|
909
|
+
const escalateProvided = hasRawOption(rawArgs, '--escalate');
|
|
910
|
+
if (escalateProvided) {
|
|
911
|
+
const current = argv.escalate;
|
|
912
|
+
if (current === true || current === '' || current === undefined || current === null) {
|
|
913
|
+
argv.escalate = DEFAULT_ESCALATE_RANGE;
|
|
914
|
+
} else if (typeof current === 'string') {
|
|
915
|
+
argv.escalate = current.trim().toLowerCase();
|
|
916
|
+
}
|
|
917
|
+
} else if (argv.escalate === undefined) {
|
|
918
|
+
argv.escalate = undefined;
|
|
919
|
+
}
|
|
920
|
+
if (typeof argv.escalateFrom === 'string') {
|
|
921
|
+
argv.escalateFrom = argv.escalateFrom.trim().toLowerCase();
|
|
922
|
+
}
|
|
923
|
+
// Validate eagerly (throws on invalid range / from / steps). resolveEscalationConfig
|
|
924
|
+
// is a no-op (returns null) when the feature is disabled.
|
|
925
|
+
if (isEscalateEnabled(argv)) {
|
|
926
|
+
resolveEscalationConfig(argv);
|
|
927
|
+
}
|
|
928
|
+
}
|
|
929
|
+
|
|
873
930
|
// --working-session-live-progress normalization
|
|
874
931
|
// When passed as --working-session-live-progress (no value), yargs gives true for string type
|
|
875
932
|
// Normalize: true → "comment", validate known values
|
|
@@ -898,6 +955,19 @@ export const parseArguments = async (yargs = getLinoYargsFactory(), hideBinFn =
|
|
|
898
955
|
argv.model = await resolveRuntimeDefaultModel(argv.tool);
|
|
899
956
|
}
|
|
900
957
|
|
|
958
|
+
// Escalate mode (issue #1885): when enabled and the user did not explicitly
|
|
959
|
+
// pin a model, the very first regular solve session should run on the cheapest
|
|
960
|
+
// tier in the plan (the range's lower bound). The restart loop in
|
|
961
|
+
// solve.escalate.lib.mjs then escalates upward while unfinished work remains.
|
|
962
|
+
// An explicit --model always wins (the user pinned the worker model on
|
|
963
|
+
// purpose), so only override the resolved default.
|
|
964
|
+
if (isEscalateEnabled(argv) && !modelExplicitlyProvided && (argv.tool || 'claude') === 'claude') {
|
|
965
|
+
const escalationConfig = resolveEscalationConfig(argv);
|
|
966
|
+
if (escalationConfig && escalationConfig.plan.length > 0) {
|
|
967
|
+
argv.model = escalationConfig.plan[0];
|
|
968
|
+
}
|
|
969
|
+
}
|
|
970
|
+
|
|
901
971
|
if (argv.tool && !fallbackModelExplicitlyProvided) {
|
|
902
972
|
const defaultFallbackModel = resolveDefaultFallbackModel(argv.tool, argv.model);
|
|
903
973
|
argv.fallbackModel = defaultFallbackModel || undefined;
|
|
@@ -0,0 +1,505 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Escalate-mode module for solve.mjs
|
|
5
|
+
*
|
|
6
|
+
* [EXPERIMENTAL] `--escalate` makes the solver try to solve a task fast and
|
|
7
|
+
* cheap first (with a lower-tier model), and only escalate to a more capable
|
|
8
|
+
* (and more expensive) model when the cheaper model did not finish the job.
|
|
9
|
+
*
|
|
10
|
+
* The intuition (from issue #1885): small models often get *most* of the work
|
|
11
|
+
* right, but not quite right. By iterating cheaply first, the more expensive
|
|
12
|
+
* models spend their budget reading and refining an existing draft rather than
|
|
13
|
+
* writing everything from scratch.
|
|
14
|
+
*
|
|
15
|
+
* Model ladder (Claude), cheapest → most capable:
|
|
16
|
+
*
|
|
17
|
+
* haiku → sonnet → opus → fable
|
|
18
|
+
*
|
|
19
|
+
* Options:
|
|
20
|
+
* --escalate [lower-upper] Enable escalate mode. Bare flag means the default
|
|
21
|
+
* range `sonnet-fable`. `sonnet-opus` sets the lower
|
|
22
|
+
* and upper bound (delimiter is `-`). A single name
|
|
23
|
+
* (e.g. `opus`) means just that one tier.
|
|
24
|
+
* --escalate-from <model> Shortcut for `--escalate <model>-fable` (escalate
|
|
25
|
+
* from <model> up to the top of the ladder).
|
|
26
|
+
* --escalate-steps <n> How many working sessions to keep each tier before
|
|
27
|
+
* escalating to the next one (default: 1). For
|
|
28
|
+
* example `2` keeps the lower tier for two working
|
|
29
|
+
* sessions, then the next tier for two, and so on.
|
|
30
|
+
*
|
|
31
|
+
* The pure parsing/planning helpers in this module are network-free so they can
|
|
32
|
+
* be unit-tested in isolation. The `runEscalation` orchestrator restarts the AI
|
|
33
|
+
* tool with the escalated model, reusing the same deferred-work detection that
|
|
34
|
+
* powers `--keep-working-until-all-requirements-are-fully-done` (issue #1883) as
|
|
35
|
+
* the "did the cheaper model finish?" signal.
|
|
36
|
+
*
|
|
37
|
+
* @see https://github.com/link-assistant/hive-mind/issues/1885
|
|
38
|
+
*/
|
|
39
|
+
|
|
40
|
+
// ───────────────────────────── Pure helpers ──────────────────────────────────
|
|
41
|
+
// Everything above the `runEscalation` orchestrator is pure (no I/O, no network)
|
|
42
|
+
// so it can be imported and unit-tested without side effects.
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Ordered Claude model ladder used by escalate mode, cheapest → most capable.
|
|
46
|
+
* These are the short canonical tier names; aliases (e.g. `opus-4-8`,
|
|
47
|
+
* `claude-fable-5`) are normalized to these by {@link canonicalTier}.
|
|
48
|
+
*/
|
|
49
|
+
export const MODEL_ESCALATION_ORDER = ['haiku', 'sonnet', 'opus', 'fable'];
|
|
50
|
+
|
|
51
|
+
/** Default lower bound when `--escalate` is given without an explicit range. */
|
|
52
|
+
export const DEFAULT_ESCALATE_LOWER = 'sonnet';
|
|
53
|
+
|
|
54
|
+
/** Default upper bound (top of the ladder). */
|
|
55
|
+
export const DEFAULT_ESCALATE_UPPER = 'fable';
|
|
56
|
+
|
|
57
|
+
/** Default range used when `--escalate` is given as a bare flag. */
|
|
58
|
+
export const DEFAULT_ESCALATE_RANGE = `${DEFAULT_ESCALATE_LOWER}-${DEFAULT_ESCALATE_UPPER}`;
|
|
59
|
+
|
|
60
|
+
/** Default number of working sessions to keep each tier before escalating. */
|
|
61
|
+
export const DEFAULT_ESCALATE_STEPS = 1;
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Mapping of known model aliases → canonical tier name. Lets users pass either
|
|
65
|
+
* the short tier name (`opus`) or a more specific alias (`opus-4-8`,
|
|
66
|
+
* `claude-opus-4-8`) wherever a single model is accepted (e.g. --escalate-from).
|
|
67
|
+
*/
|
|
68
|
+
const TIER_ALIASES = {
|
|
69
|
+
haiku: 'haiku',
|
|
70
|
+
'haiku-4-5': 'haiku',
|
|
71
|
+
'claude-haiku-4-5': 'haiku',
|
|
72
|
+
'claude-haiku-4-5-20251001': 'haiku',
|
|
73
|
+
sonnet: 'sonnet',
|
|
74
|
+
'sonnet-4-6': 'sonnet',
|
|
75
|
+
'sonnet-4-5': 'sonnet',
|
|
76
|
+
'claude-sonnet-4-6': 'sonnet',
|
|
77
|
+
'claude-sonnet-4-5': 'sonnet',
|
|
78
|
+
opus: 'opus',
|
|
79
|
+
'opus-4-8': 'opus',
|
|
80
|
+
'opus-4-7': 'opus',
|
|
81
|
+
'opus-4-6': 'opus',
|
|
82
|
+
'opus-4-5': 'opus',
|
|
83
|
+
'claude-opus-4-8': 'opus',
|
|
84
|
+
'claude-opus-4-7': 'opus',
|
|
85
|
+
fable: 'fable',
|
|
86
|
+
'fable-5': 'fable',
|
|
87
|
+
'claude-fable-5': 'fable',
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Normalize a model name/alias to its canonical escalate-ladder tier.
|
|
92
|
+
* @param {string} name
|
|
93
|
+
* @returns {string|null} canonical tier name, or null if not a known tier.
|
|
94
|
+
*/
|
|
95
|
+
export const canonicalTier = name => {
|
|
96
|
+
if (typeof name !== 'string') return null;
|
|
97
|
+
const key = name.trim().toLowerCase();
|
|
98
|
+
if (!key) return null;
|
|
99
|
+
return TIER_ALIASES[key] || null;
|
|
100
|
+
};
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Parse a `--escalate` range value into { from, to } canonical tier names.
|
|
104
|
+
*
|
|
105
|
+
* Accepted forms:
|
|
106
|
+
* - true / '' / undefined → the default range (`sonnet-fable`)
|
|
107
|
+
* - `sonnet` → { from: 'sonnet', to: 'sonnet' }
|
|
108
|
+
* - `sonnet-fable` → { from: 'sonnet', to: 'fable' }
|
|
109
|
+
*
|
|
110
|
+
* The delimiter is `-`. Only the short ladder names (haiku|sonnet|opus|fable)
|
|
111
|
+
* are accepted inside a range to avoid ambiguity with dashed aliases such as
|
|
112
|
+
* `opus-4-8` (use --escalate-from for those).
|
|
113
|
+
*
|
|
114
|
+
* @param {string|boolean|undefined} value
|
|
115
|
+
* @returns {{ from: string, to: string }}
|
|
116
|
+
* @throws {Error} on an unparseable / invalid range.
|
|
117
|
+
*/
|
|
118
|
+
export const parseEscalateRange = value => {
|
|
119
|
+
let raw = value;
|
|
120
|
+
if (raw === true || raw === undefined || raw === null) {
|
|
121
|
+
raw = DEFAULT_ESCALATE_RANGE;
|
|
122
|
+
}
|
|
123
|
+
if (typeof raw !== 'string') {
|
|
124
|
+
throw new Error(`Invalid --escalate value: ${JSON.stringify(value)}. Expected a model range like "sonnet-fable".`);
|
|
125
|
+
}
|
|
126
|
+
const trimmed = raw.trim().toLowerCase();
|
|
127
|
+
if (trimmed === '') {
|
|
128
|
+
raw = DEFAULT_ESCALATE_RANGE;
|
|
129
|
+
}
|
|
130
|
+
const parts = (trimmed === '' ? DEFAULT_ESCALATE_RANGE : trimmed).split('-');
|
|
131
|
+
|
|
132
|
+
const order = MODEL_ESCALATION_ORDER;
|
|
133
|
+
const requireLadderName = part => {
|
|
134
|
+
if (!order.includes(part)) {
|
|
135
|
+
throw new Error(`Invalid --escalate model "${part}". Expected one of: ${order.join(', ')} (range form: "${DEFAULT_ESCALATE_RANGE}").`);
|
|
136
|
+
}
|
|
137
|
+
return part;
|
|
138
|
+
};
|
|
139
|
+
|
|
140
|
+
let from;
|
|
141
|
+
let to;
|
|
142
|
+
if (parts.length === 1) {
|
|
143
|
+
from = requireLadderName(parts[0]);
|
|
144
|
+
to = from;
|
|
145
|
+
} else if (parts.length === 2) {
|
|
146
|
+
from = requireLadderName(parts[0]);
|
|
147
|
+
to = requireLadderName(parts[1]);
|
|
148
|
+
} else {
|
|
149
|
+
throw new Error(`Invalid --escalate range "${trimmed}". Expected "<lower>-<upper>" with short model names (e.g. "${DEFAULT_ESCALATE_RANGE}").`);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
if (order.indexOf(from) > order.indexOf(to)) {
|
|
153
|
+
throw new Error(`Invalid --escalate range "${trimmed}": lower bound "${from}" is more capable than upper bound "${to}". Order is ${order.join(' < ')}.`);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
return { from, to };
|
|
157
|
+
};
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* Parse a `--escalate-from` value into { from, to } where `to` is the top of
|
|
161
|
+
* the ladder. Accepts canonical names and aliases (e.g. `opus-4-8`).
|
|
162
|
+
* @param {string} value
|
|
163
|
+
* @returns {{ from: string, to: string }}
|
|
164
|
+
* @throws {Error} on an invalid model name.
|
|
165
|
+
*/
|
|
166
|
+
export const parseEscalateFrom = value => {
|
|
167
|
+
const from = canonicalTier(value);
|
|
168
|
+
if (!from) {
|
|
169
|
+
throw new Error(`Invalid --escalate-from model ${JSON.stringify(value)}. Expected one of: ${MODEL_ESCALATION_ORDER.join(', ')}.`);
|
|
170
|
+
}
|
|
171
|
+
return { from, to: DEFAULT_ESCALATE_UPPER };
|
|
172
|
+
};
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* Normalize the `--escalate-steps` value into a positive integer (default 1).
|
|
176
|
+
* @param {string|number|undefined} value
|
|
177
|
+
* @returns {number}
|
|
178
|
+
* @throws {Error} on a non-positive / non-numeric value.
|
|
179
|
+
*/
|
|
180
|
+
export const normalizeEscalateSteps = value => {
|
|
181
|
+
if (value === undefined || value === null || value === true || value === '') {
|
|
182
|
+
return DEFAULT_ESCALATE_STEPS;
|
|
183
|
+
}
|
|
184
|
+
const n = typeof value === 'number' ? value : Number(String(value).trim());
|
|
185
|
+
if (!Number.isFinite(n) || !Number.isInteger(n) || n < 1) {
|
|
186
|
+
throw new Error(`Invalid --escalate-steps value: ${JSON.stringify(value)}. Expected a positive integer (>= 1).`);
|
|
187
|
+
}
|
|
188
|
+
return n;
|
|
189
|
+
};
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Build the ordered list of models (the "escalation plan"), where each tier
|
|
193
|
+
* between `from` and `to` (inclusive) is repeated `steps` times.
|
|
194
|
+
*
|
|
195
|
+
* Example: { from: 'sonnet', to: 'fable', steps: 2 } →
|
|
196
|
+
* ['sonnet', 'sonnet', 'opus', 'opus', 'fable', 'fable']
|
|
197
|
+
*
|
|
198
|
+
* @param {{ from: string, to: string, steps?: number }} params
|
|
199
|
+
* @returns {string[]}
|
|
200
|
+
*/
|
|
201
|
+
export const buildEscalationPlan = ({ from, to, steps = DEFAULT_ESCALATE_STEPS }) => {
|
|
202
|
+
const order = MODEL_ESCALATION_ORDER;
|
|
203
|
+
const fromIdx = order.indexOf(from);
|
|
204
|
+
const toIdx = order.indexOf(to);
|
|
205
|
+
if (fromIdx === -1 || toIdx === -1 || fromIdx > toIdx) {
|
|
206
|
+
throw new Error(`Invalid escalation bounds: from="${from}", to="${to}".`);
|
|
207
|
+
}
|
|
208
|
+
const tiers = order.slice(fromIdx, toIdx + 1);
|
|
209
|
+
const plan = [];
|
|
210
|
+
for (const tier of tiers) {
|
|
211
|
+
for (let i = 0; i < steps; i++) {
|
|
212
|
+
plan.push(tier);
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
return plan;
|
|
216
|
+
};
|
|
217
|
+
|
|
218
|
+
/**
|
|
219
|
+
* Resolve the model to use for a given 0-based working-session index. Indexes
|
|
220
|
+
* past the end of the plan clamp to the last (most capable) model so the loop
|
|
221
|
+
* never reaches outside the ladder.
|
|
222
|
+
* @param {string[]} plan
|
|
223
|
+
* @param {number} sessionIndex
|
|
224
|
+
* @returns {string}
|
|
225
|
+
*/
|
|
226
|
+
export const resolveEscalationModel = (plan, sessionIndex) => {
|
|
227
|
+
if (!Array.isArray(plan) || plan.length === 0) return undefined;
|
|
228
|
+
const idx = Math.max(0, Math.min(sessionIndex, plan.length - 1));
|
|
229
|
+
return plan[idx];
|
|
230
|
+
};
|
|
231
|
+
|
|
232
|
+
/**
|
|
233
|
+
* Whether escalate mode is enabled given parsed argv.
|
|
234
|
+
* @param {object} argv
|
|
235
|
+
* @returns {boolean}
|
|
236
|
+
*/
|
|
237
|
+
export const isEscalateEnabled = argv => {
|
|
238
|
+
if (!argv) return false;
|
|
239
|
+
return Boolean(argv.escalate) || Boolean(argv.escalateFrom);
|
|
240
|
+
};
|
|
241
|
+
|
|
242
|
+
/**
|
|
243
|
+
* Resolve the full escalation configuration from argv. Returns null when the
|
|
244
|
+
* feature is disabled.
|
|
245
|
+
*
|
|
246
|
+
* `--escalate-from` takes precedence over `--escalate` when both are given.
|
|
247
|
+
*
|
|
248
|
+
* @param {object} argv
|
|
249
|
+
* @returns {{ enabled: boolean, from: string, to: string, steps: number, plan: string[] }|null}
|
|
250
|
+
*/
|
|
251
|
+
export const resolveEscalationConfig = argv => {
|
|
252
|
+
if (!isEscalateEnabled(argv)) return null;
|
|
253
|
+
const { from, to } = argv.escalateFrom ? parseEscalateFrom(argv.escalateFrom) : parseEscalateRange(argv.escalate);
|
|
254
|
+
const steps = normalizeEscalateSteps(argv.escalateSteps);
|
|
255
|
+
const plan = buildEscalationPlan({ from, to, steps });
|
|
256
|
+
return { enabled: true, from, to, steps, plan };
|
|
257
|
+
};
|
|
258
|
+
|
|
259
|
+
/**
|
|
260
|
+
* Human-readable one-line description of an escalation plan, collapsing
|
|
261
|
+
* consecutive repeats into "model×N".
|
|
262
|
+
* @param {string[]} plan
|
|
263
|
+
* @returns {string}
|
|
264
|
+
*/
|
|
265
|
+
export const formatEscalationPlan = plan => {
|
|
266
|
+
if (!Array.isArray(plan) || plan.length === 0) return '(empty)';
|
|
267
|
+
const groups = [];
|
|
268
|
+
for (const model of plan) {
|
|
269
|
+
const last = groups[groups.length - 1];
|
|
270
|
+
if (last && last.model === model) {
|
|
271
|
+
last.count++;
|
|
272
|
+
} else {
|
|
273
|
+
groups.push({ model, count: 1 });
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
return groups.map(({ model, count }) => (count > 1 ? `${model}×${count}` : model)).join(' → ');
|
|
277
|
+
};
|
|
278
|
+
|
|
279
|
+
// ─────────────────────────── Orchestrator (I/O) ──────────────────────────────
|
|
280
|
+
|
|
281
|
+
// Lazy module bindings are set up inside runEscalation so that importing this
|
|
282
|
+
// module for its pure helpers (e.g. in tests) does not pull in command-stream,
|
|
283
|
+
// the network bootstrap, or other heavy dependencies.
|
|
284
|
+
|
|
285
|
+
/**
|
|
286
|
+
* Runs escalate restart iterations after the main solve.
|
|
287
|
+
*
|
|
288
|
+
* The first regular solve session already ran with the lowest tier in the plan
|
|
289
|
+
* (see the config-time model override in solve.config.lib.mjs), so escalation
|
|
290
|
+
* continues from plan index 1 onward. Before each restart it re-scans for
|
|
291
|
+
* deferred / unfinished work (the same detector used by keep-working). If no
|
|
292
|
+
* unfinished-work indicators remain, the cheaper model is considered to have
|
|
293
|
+
* succeeded and escalation stops early — we do not waste the more expensive
|
|
294
|
+
* models.
|
|
295
|
+
*
|
|
296
|
+
* @param {object} params
|
|
297
|
+
* @param {string} params.issueUrl
|
|
298
|
+
* @param {string} params.owner
|
|
299
|
+
* @param {string} params.repo
|
|
300
|
+
* @param {string|number} params.issueNumber
|
|
301
|
+
* @param {string|number} params.prNumber
|
|
302
|
+
* @param {string} params.branchName
|
|
303
|
+
* @param {string} params.tempDir
|
|
304
|
+
* @param {string} [params.workspaceTmpDir]
|
|
305
|
+
* @param {object} params.argv - CLI arguments
|
|
306
|
+
* @param {function} params.cleanupClaudeFile - cleanup function
|
|
307
|
+
* @param {string} [params.resultSummary] - AI solution summary from the last session
|
|
308
|
+
* @returns {Promise<{sessionId, anthropicTotalCostUSD, publicPricingEstimate, pricingInfo}|null>}
|
|
309
|
+
*/
|
|
310
|
+
export const runEscalation = async ({ issueUrl, owner, repo, issueNumber, prNumber, branchName, tempDir, workspaceTmpDir, argv, cleanupClaudeFile, resultSummary }) => {
|
|
311
|
+
const config = resolveEscalationConfig(argv);
|
|
312
|
+
if (!config || !prNumber) {
|
|
313
|
+
return null;
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
// Import shared library functions lazily (network bootstrap lives here).
|
|
317
|
+
const lib = await import('./lib.mjs');
|
|
318
|
+
const { log, cleanErrorMessage } = lib;
|
|
319
|
+
|
|
320
|
+
// Escalate mode only makes sense for the Claude model ladder. For other tools
|
|
321
|
+
// we skip with a clear message rather than misusing the ladder names.
|
|
322
|
+
const tool = argv.tool || 'claude';
|
|
323
|
+
if (tool !== 'claude') {
|
|
324
|
+
await log(`ℹ️ ESCALATE: --escalate is only supported with --tool claude (current tool: ${tool}). Skipping.`, { level: 'warning' });
|
|
325
|
+
return null;
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
if (typeof globalThis.use === 'undefined') {
|
|
329
|
+
globalThis.use = (await eval(await (await fetch('https://unpkg.com/use-m/use.js')).text())).use;
|
|
330
|
+
}
|
|
331
|
+
const use = globalThis.use;
|
|
332
|
+
const { $: __rawDollar$ } = await use('command-stream');
|
|
333
|
+
const { wrapDollarWithGhRetry } = await import('./github-rate-limit.lib.mjs');
|
|
334
|
+
const $ = wrapDollarWithGhRetry(__rawDollar$);
|
|
335
|
+
|
|
336
|
+
const restartShared = await import('./solve.restart-shared.lib.mjs');
|
|
337
|
+
const { executeToolIteration, isApiError, isUsageLimitReached } = restartShared;
|
|
338
|
+
|
|
339
|
+
const keepWorkingLib = await import('./solve.keep-working.lib.mjs');
|
|
340
|
+
const { collectDeferredWorkSources } = keepWorkingLib;
|
|
341
|
+
const detectLib = await import('./solve.keep-working.detect.lib.mjs');
|
|
342
|
+
const { detectDeferredWorkInSources } = detectLib;
|
|
343
|
+
|
|
344
|
+
const { resolveDefaultFallbackModel } = await import('./models/index.mjs');
|
|
345
|
+
|
|
346
|
+
const sentryLib = await import('./sentry.lib.mjs');
|
|
347
|
+
const { reportError } = sentryLib;
|
|
348
|
+
|
|
349
|
+
const { plan } = config;
|
|
350
|
+
|
|
351
|
+
await log('');
|
|
352
|
+
await log(`🆙 ESCALATE: ${config.from} → ${config.to} (steps: ${config.steps} working session(s) per tier)`);
|
|
353
|
+
await log(` Plan: ${formatEscalationPlan(plan)}`);
|
|
354
|
+
await log(' Strategy: solve cheaply first; escalate to a more capable model only while unfinished work remains.');
|
|
355
|
+
await log('');
|
|
356
|
+
|
|
357
|
+
// Get PR merge state status for the iterations
|
|
358
|
+
let currentMergeStateStatus = null;
|
|
359
|
+
try {
|
|
360
|
+
// `$` is wrapped via wrapDollarWithGhRetry above; the lazy import keeps this module
|
|
361
|
+
// network-free for tests, so the lint rule (which only detects top-level rebinds) can't see it.
|
|
362
|
+
// eslint-disable-next-line gh-rate-limit/no-direct-gh-exec -- $ is rate-limit-safe (wrapDollarWithGhRetry), rebound lazily on line 334.
|
|
363
|
+
const prStateResult = await $`gh api repos/${owner}/${repo}/pulls/${prNumber} --jq '.mergeStateStatus'`;
|
|
364
|
+
if (prStateResult.code === 0) {
|
|
365
|
+
currentMergeStateStatus = prStateResult.stdout.toString().trim();
|
|
366
|
+
}
|
|
367
|
+
} catch {
|
|
368
|
+
// Ignore errors getting merge state
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
let sessionId;
|
|
372
|
+
let anthropicTotalCostUSD;
|
|
373
|
+
let publicPricingEstimate;
|
|
374
|
+
let pricingInfo;
|
|
375
|
+
let lastResultSummary = resultSummary;
|
|
376
|
+
let consecutiveErrors = 0;
|
|
377
|
+
const MAX_CONSECUTIVE_ERRORS = 3;
|
|
378
|
+
let restartsRun = 0;
|
|
379
|
+
|
|
380
|
+
// The first regular solve session = plan index 0. Continue escalating from 1.
|
|
381
|
+
for (let sessionIndex = 1; sessionIndex < plan.length; sessionIndex++) {
|
|
382
|
+
const model = resolveEscalationModel(plan, sessionIndex);
|
|
383
|
+
const previousModel = resolveEscalationModel(plan, sessionIndex - 1);
|
|
384
|
+
|
|
385
|
+
// Decide whether the cheaper model already finished. Re-scan the PR
|
|
386
|
+
// description, AI solution summary and changed markdown documents for
|
|
387
|
+
// deferred/unfinished-work indicators (same signal as keep-working).
|
|
388
|
+
let sources = [];
|
|
389
|
+
try {
|
|
390
|
+
sources = await collectDeferredWorkSources({ owner, repo, prNumber, resultSummary: lastResultSummary });
|
|
391
|
+
} catch (error) {
|
|
392
|
+
reportError(error, { context: 'escalate_collect_sources', owner, repo, prNumber, operation: 'collect_sources' });
|
|
393
|
+
await log(`⚠️ ESCALATE: Could not collect sources to evaluate completion: ${cleanErrorMessage(error)}`, { level: 'warning' });
|
|
394
|
+
}
|
|
395
|
+
const detections = detectDeferredWorkInSources(sources);
|
|
396
|
+
|
|
397
|
+
if (detections.length === 0) {
|
|
398
|
+
await log(`✅ ESCALATE: No unfinished-work indicators after ${previousModel} session(s). Stopping before escalating to ${model}.`);
|
|
399
|
+
break;
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
await log('');
|
|
403
|
+
await log(`🆙 ESCALATE: ${detections.length} unfinished-work indicator(s) remain after ${previousModel}; escalating to ${model} (session ${sessionIndex + 1}/${plan.length}).`);
|
|
404
|
+
for (const detection of detections.slice(0, 10)) {
|
|
405
|
+
await log(` • [${detection.label}] in ${detection.source}: "${detection.snippet}"`);
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
// Sync local branch with remote before each iteration (issue #1572 pattern).
|
|
409
|
+
try {
|
|
410
|
+
const pullResult = await $({ cwd: tempDir })`git pull origin ${branchName} 2>&1`;
|
|
411
|
+
if (pullResult.code === 0) {
|
|
412
|
+
await log(` Synced local branch ${branchName} from remote`, { verbose: true });
|
|
413
|
+
} else {
|
|
414
|
+
await log(` Warning: git pull failed (code ${pullResult.code}); continuing with local state`, { level: 'warning' });
|
|
415
|
+
}
|
|
416
|
+
} catch (error) {
|
|
417
|
+
reportError(error, { context: 'escalate_git_pull', branchName, operation: 'git_pull' });
|
|
418
|
+
await log(` Warning: git pull error: ${cleanErrorMessage(error)}`, { level: 'warning' });
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
const feedbackLines = ['', '='.repeat(60), `🆙 ESCALATE MODE — now running on a more capable model (${model}):`, '='.repeat(60), '', `The previous working session(s) used "${previousModel}" but left unfinished work. You are a more capable model. Carefully review what has already been done, then finish every remaining requirement in this single pull request — do not defer, delay, or mark anything as out of scope. Ensure all changes are correct, consistent, validated, tested and that all CI/CD checks pass.`, ''];
|
|
422
|
+
|
|
423
|
+
const fallbackModel = resolveDefaultFallbackModel(tool, model) || undefined;
|
|
424
|
+
|
|
425
|
+
const iterationResult = await executeToolIteration({
|
|
426
|
+
issueUrl,
|
|
427
|
+
owner,
|
|
428
|
+
repo,
|
|
429
|
+
issueNumber,
|
|
430
|
+
prNumber,
|
|
431
|
+
branchName,
|
|
432
|
+
tempDir,
|
|
433
|
+
workspaceTmpDir,
|
|
434
|
+
mergeStateStatus: currentMergeStateStatus,
|
|
435
|
+
feedbackLines,
|
|
436
|
+
argv: {
|
|
437
|
+
...argv,
|
|
438
|
+
// Escalate to the next tier for this iteration.
|
|
439
|
+
model,
|
|
440
|
+
fallbackModel,
|
|
441
|
+
// Reinforce the "finish everything now" guidance in the system prompt.
|
|
442
|
+
promptEnsureAllRequirementsAreMet: true,
|
|
443
|
+
// Prevent recursive escalation inside the restart iteration.
|
|
444
|
+
escalate: undefined,
|
|
445
|
+
escalateFrom: undefined,
|
|
446
|
+
},
|
|
447
|
+
});
|
|
448
|
+
|
|
449
|
+
restartsRun++;
|
|
450
|
+
|
|
451
|
+
if (iterationResult) {
|
|
452
|
+
if (iterationResult.sessionId) sessionId = iterationResult.sessionId;
|
|
453
|
+
if (iterationResult.anthropicTotalCostUSD) anthropicTotalCostUSD = iterationResult.anthropicTotalCostUSD;
|
|
454
|
+
if (iterationResult.publicPricingEstimate) publicPricingEstimate = iterationResult.publicPricingEstimate;
|
|
455
|
+
if (iterationResult.pricingInfo) pricingInfo = iterationResult.pricingInfo;
|
|
456
|
+
if (iterationResult.result) lastResultSummary = iterationResult.result;
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
if (isUsageLimitReached(iterationResult)) {
|
|
460
|
+
await log('🛑 ESCALATE: Usage limit reached during restart. Stopping escalate loop.');
|
|
461
|
+
break;
|
|
462
|
+
}
|
|
463
|
+
if (isApiError(iterationResult)) {
|
|
464
|
+
consecutiveErrors++;
|
|
465
|
+
await log(`⚠️ ESCALATE: API error during ${model} restart (${consecutiveErrors}/${MAX_CONSECUTIVE_ERRORS} consecutive).`, { level: 'warning' });
|
|
466
|
+
if (consecutiveErrors >= MAX_CONSECUTIVE_ERRORS) {
|
|
467
|
+
await log('🛑 ESCALATE: Too many consecutive errors. Stopping escalate loop.');
|
|
468
|
+
break;
|
|
469
|
+
}
|
|
470
|
+
} else {
|
|
471
|
+
consecutiveErrors = 0;
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
await log(`✅ ESCALATE: ${model} session complete (${sessionIndex + 1}/${plan.length})`);
|
|
475
|
+
await log('');
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
// Clean up CLAUDE.md/.gitkeep after restarts
|
|
479
|
+
try {
|
|
480
|
+
await cleanupClaudeFile(tempDir, branchName, null, argv);
|
|
481
|
+
} catch (error) {
|
|
482
|
+
reportError(error, { context: 'escalate_cleanup', branchName, operation: 'cleanup_claude_file' });
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
if (restartsRun === 0) return null;
|
|
486
|
+
return { sessionId, anthropicTotalCostUSD, publicPricingEstimate, pricingInfo };
|
|
487
|
+
};
|
|
488
|
+
|
|
489
|
+
export default {
|
|
490
|
+
MODEL_ESCALATION_ORDER,
|
|
491
|
+
DEFAULT_ESCALATE_LOWER,
|
|
492
|
+
DEFAULT_ESCALATE_UPPER,
|
|
493
|
+
DEFAULT_ESCALATE_RANGE,
|
|
494
|
+
DEFAULT_ESCALATE_STEPS,
|
|
495
|
+
canonicalTier,
|
|
496
|
+
parseEscalateRange,
|
|
497
|
+
parseEscalateFrom,
|
|
498
|
+
normalizeEscalateSteps,
|
|
499
|
+
buildEscalationPlan,
|
|
500
|
+
resolveEscalationModel,
|
|
501
|
+
isEscalateEnabled,
|
|
502
|
+
resolveEscalationConfig,
|
|
503
|
+
formatEscalationPlan,
|
|
504
|
+
runEscalation,
|
|
505
|
+
};
|
package/src/solve.mjs
CHANGED
|
@@ -46,6 +46,7 @@ const { startWatchMode } = watchLib;
|
|
|
46
46
|
const { startAutoRestartUntilMergeable } = await import('./solve.auto-merge.lib.mjs');
|
|
47
47
|
const { runAutoEnsureRequirements } = await import('./solve.auto-ensure.lib.mjs');
|
|
48
48
|
const { runKeepWorkingUntilDone } = await import('./solve.keep-working.lib.mjs');
|
|
49
|
+
const { runEscalation } = await import('./solve.escalate.lib.mjs');
|
|
49
50
|
const exitHandler = await import('./exit-handler.lib.mjs');
|
|
50
51
|
const { initializeExitHandler, installGlobalExitHandlers, safeExit, logActiveHandles } = exitHandler;
|
|
51
52
|
const { createInterruptWrapper } = await import('./solve.interrupt.lib.mjs');
|
|
@@ -1270,10 +1271,9 @@ try {
|
|
|
1270
1271
|
await log('⚠️ PR title/description still not updated after restart');
|
|
1271
1272
|
}
|
|
1272
1273
|
}
|
|
1273
|
-
|
|
1274
|
-
|
|
1274
|
+
// Post-solve restart loops (escalate #1885 first, then finalize #1383, then keep-working #1883):
|
|
1275
|
+
applyRestartResult(await runEscalation({ issueUrl, owner, repo, issueNumber, prNumber, branchName, tempDir, workspaceTmpDir, argv, cleanupClaudeFile, resultSummary }));
|
|
1275
1276
|
applyRestartResult(await runAutoEnsureRequirements({ issueUrl, owner, repo, issueNumber, prNumber, branchName, tempDir, argv, cleanupClaudeFile }));
|
|
1276
|
-
// Issue #1883: --keep-working-until-all-requirements-are-fully-done (detect deferred work and auto-restart until done)
|
|
1277
1277
|
applyRestartResult(await runKeepWorkingUntilDone({ issueUrl, owner, repo, issueNumber, prNumber, branchName, tempDir, workspaceTmpDir, argv, cleanupClaudeFile, resultSummary }));
|
|
1278
1278
|
|
|
1279
1279
|
// Start watch mode if enabled OR if we need to handle uncommitted changes
|