@link-assistant/hive-mind 1.76.2 → 1.77.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,38 @@
1
1
  # @link-assistant/hive-mind
2
2
 
3
+ ## 1.77.0
4
+
5
+ ### Minor Changes
6
+
7
+ - a50d201: feat(solve): experimental `--escalate` mode (#1885)
8
+
9
+ Add an experimental `solve` option family that solves a task cheaply first and
10
+ escalates to a more capable (more expensive) model only while unfinished work
11
+ remains. The model ladder, cheapest → most capable, is `haiku < sonnet < opus <
12
+ fable`.
13
+ - `--escalate` (bare) → the default range `sonnet-fable`.
14
+ - `--escalate sonnet-opus` → an explicit `<lower>-<upper>` range (`-` delimits the
15
+ bounds; only the short ladder names are allowed inside a range).
16
+ - `--escalate-from haiku` → shortcut for `--escalate haiku-fable` (aliases such as
17
+ `opus-4-8` accepted here, since a single value is unambiguous).
18
+ - `--escalate-steps N` (default 1) → keep each tier for N working sessions before
19
+ escalating (e.g. `2` → two sonnet sessions, then two opus, then two fable).
20
+
21
+ The first regular solve session runs on the range's lower bound (unless `--model`
22
+ is explicitly pinned). After it finishes, the escalate loop re-scans the pull
23
+ request for deferred/unfinished-work indicators — reusing the detector from issue
24
+ #1883 — and escalates to the next tier only if work remains; otherwise it stops
25
+ early so the expensive tiers are never invoked. Restarts are capped at 3
26
+ consecutive errors and stop on a usage limit. Escalate is Claude-only and runs
27
+ before `--finalize` / `--keep-working`.
28
+
29
+ Pure parsing/planning helpers live in a network-free module
30
+ (`src/solve.escalate.lib.mjs`) with full unit-test coverage
31
+ (`tests/test-escalate-1885.mjs`); a deep case study is compiled under
32
+ `docs/case-studies/issue-1885/`.
33
+
34
+ - 53a0544: Update Hive Mind Docker images to `konard/box` and `konard/box-dind` 2.3.1 so Docker-in-Docker deployments can use the upstream host-image passthrough allowlist.
35
+
3
36
  ## 1.76.2
4
37
 
5
38
  ### Patch Changes
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@link-assistant/hive-mind",
3
- "version": "1.76.2",
3
+ "version": "1.77.0",
4
4
  "description": "AI-powered issue solver and hive mind for collaborative problem solving",
5
5
  "main": "src/hive.mjs",
6
6
  "type": "module",
@@ -10,6 +10,7 @@
10
10
  import { enhanceErrorMessage, detectMalformedFlags } from './option-suggestions.lib.mjs';
11
11
  import { defaultModels, buildModelOptionDescription, resolveDefaultFallbackModel, resolveRuntimeDefaultModel } from './models/index.mjs';
12
12
  import { validateBranchName } from './solve.branch.lib.mjs';
13
+ import { resolveEscalationConfig, isEscalateEnabled, DEFAULT_ESCALATE_RANGE } from './solve.escalate.lib.mjs';
13
14
  import { getLinoYargsFactory, hideBin, parseCliArgumentsWithLino } from './cli-arguments.lib.mjs';
14
15
 
15
16
  // Re-export for use by telegram-bot.mjs (avoids extra import lines there)
@@ -610,6 +611,21 @@ export const SOLVE_OPTION_DEFINITIONS = {
610
611
  alias: ['keep-going-until-all-requirements-are-fully-done', 'keep-working', 'keep-going'],
611
612
  default: undefined,
612
613
  },
614
+ escalate: {
615
+ type: 'string',
616
+ description: '[EXPERIMENTAL] Start solving with a cheaper/lower-tier model and automatically escalate to a more capable (more expensive) model while unfinished work remains. Accepts a model range "<lower>-<upper>" using short Claude tier names (ladder: haiku < sonnet < opus < fable), e.g. "sonnet-opus". A single name (e.g. "opus") means just that tier. Bare flag means "sonnet-fable". The idea: iterate cheaply first so expensive models do more reading and less writing.',
617
+ default: undefined,
618
+ },
619
+ 'escalate-from': {
620
+ type: 'string',
621
+ description: '[EXPERIMENTAL] Shortcut for --escalate <model>-fable: start solving from the given model (haiku/sonnet/opus/fable, aliases accepted) and escalate up to the top of the ladder while unfinished work remains. Takes precedence over --escalate when both are given.',
622
+ default: undefined,
623
+ },
624
+ 'escalate-steps': {
625
+ type: 'number',
626
+ description: '[EXPERIMENTAL] How many working sessions to keep each model tier before escalating to the next one (default: 1). For example 2 keeps the lower tier for two working sessions, then the next tier for two, and so on. Only used with --escalate / --escalate-from.',
627
+ default: 1,
628
+ },
613
629
  'working-session-live-progress': {
614
630
  type: 'string',
615
631
  description: '[EXPERIMENTAL] Enable live progress monitoring. Accepts "comment" (default, updates a per-session PR comment) or "pr" (updates PR description). Plain --working-session-live-progress means "comment". Works with or without --interactive-mode.',
@@ -883,6 +899,34 @@ export const parseArguments = async (yargs = getLinoYargsFactory(), hideBinFn =
883
899
  }
884
900
  }
885
901
 
902
+ // --escalate / --escalate-from / --escalate-steps normalization (issue #1885)
903
+ // The bare `--escalate` flag is a string-typed option, so yargs yields `true`
904
+ // (or an empty string) for a value-less flag. Canonicalize that to the default
905
+ // range so downstream parsing in solve.escalate.lib.mjs sees a meaningful
906
+ // value. We also validate the range/steps eagerly here so misuse fails fast at
907
+ // config time rather than mid-solve.
908
+ {
909
+ const escalateProvided = hasRawOption(rawArgs, '--escalate');
910
+ if (escalateProvided) {
911
+ const current = argv.escalate;
912
+ if (current === true || current === '' || current === undefined || current === null) {
913
+ argv.escalate = DEFAULT_ESCALATE_RANGE;
914
+ } else if (typeof current === 'string') {
915
+ argv.escalate = current.trim().toLowerCase();
916
+ }
917
+ } else if (argv.escalate === undefined) {
918
+ argv.escalate = undefined;
919
+ }
920
+ if (typeof argv.escalateFrom === 'string') {
921
+ argv.escalateFrom = argv.escalateFrom.trim().toLowerCase();
922
+ }
923
+ // Validate eagerly (throws on invalid range / from / steps). resolveEscalationConfig
924
+ // is a no-op (returns null) when the feature is disabled.
925
+ if (isEscalateEnabled(argv)) {
926
+ resolveEscalationConfig(argv);
927
+ }
928
+ }
929
+
886
930
  // --working-session-live-progress normalization
887
931
  // When passed as --working-session-live-progress (no value), yargs gives true for string type
888
932
  // Normalize: true → "comment", validate known values
@@ -911,6 +955,19 @@ export const parseArguments = async (yargs = getLinoYargsFactory(), hideBinFn =
911
955
  argv.model = await resolveRuntimeDefaultModel(argv.tool);
912
956
  }
913
957
 
958
+ // Escalate mode (issue #1885): when enabled and the user did not explicitly
959
+ // pin a model, the very first regular solve session should run on the cheapest
960
+ // tier in the plan (the range's lower bound). The restart loop in
961
+ // solve.escalate.lib.mjs then escalates upward while unfinished work remains.
962
+ // An explicit --model always wins (the user pinned the worker model on
963
+ // purpose), so only override the resolved default.
964
+ if (isEscalateEnabled(argv) && !modelExplicitlyProvided && (argv.tool || 'claude') === 'claude') {
965
+ const escalationConfig = resolveEscalationConfig(argv);
966
+ if (escalationConfig && escalationConfig.plan.length > 0) {
967
+ argv.model = escalationConfig.plan[0];
968
+ }
969
+ }
970
+
914
971
  if (argv.tool && !fallbackModelExplicitlyProvided) {
915
972
  const defaultFallbackModel = resolveDefaultFallbackModel(argv.tool, argv.model);
916
973
  argv.fallbackModel = defaultFallbackModel || undefined;
@@ -0,0 +1,505 @@
1
+ #!/usr/bin/env node
2
+
3
+ /**
4
+ * Escalate-mode module for solve.mjs
5
+ *
6
+ * [EXPERIMENTAL] `--escalate` makes the solver try to solve a task fast and
7
+ * cheap first (with a lower-tier model), and only escalate to a more capable
8
+ * (and more expensive) model when the cheaper model did not finish the job.
9
+ *
10
+ * The intuition (from issue #1885): small models often get *most* of the work
11
+ * right, but not quite right. By iterating cheaply first, the more expensive
12
+ * models spend their budget reading and refining an existing draft rather than
13
+ * writing everything from scratch.
14
+ *
15
+ * Model ladder (Claude), cheapest → most capable:
16
+ *
17
+ * haiku → sonnet → opus → fable
18
+ *
19
+ * Options:
20
+ * --escalate [lower-upper] Enable escalate mode. Bare flag means the default
21
+ * range `sonnet-fable`. `sonnet-opus` sets the lower
22
+ * and upper bound (delimiter is `-`). A single name
23
+ * (e.g. `opus`) means just that one tier.
24
+ * --escalate-from <model> Shortcut for `--escalate <model>-fable` (escalate
25
+ * from <model> up to the top of the ladder).
26
+ * --escalate-steps <n> How many working sessions to keep each tier before
27
+ * escalating to the next one (default: 1). For
28
+ * example `2` keeps the lower tier for two working
29
+ * sessions, then the next tier for two, and so on.
30
+ *
31
+ * The pure parsing/planning helpers in this module are network-free so they can
32
+ * be unit-tested in isolation. The `runEscalation` orchestrator restarts the AI
33
+ * tool with the escalated model, reusing the same deferred-work detection that
34
+ * powers `--keep-working-until-all-requirements-are-fully-done` (issue #1883) as
35
+ * the "did the cheaper model finish?" signal.
36
+ *
37
+ * @see https://github.com/link-assistant/hive-mind/issues/1885
38
+ */
39
+
40
+ // ───────────────────────────── Pure helpers ──────────────────────────────────
41
+ // Everything above the `runEscalation` orchestrator is pure (no I/O, no network)
42
+ // so it can be imported and unit-tested without side effects.
43
+
44
+ /**
45
+ * Ordered Claude model ladder used by escalate mode, cheapest → most capable.
46
+ * These are the short canonical tier names; aliases (e.g. `opus-4-8`,
47
+ * `claude-fable-5`) are normalized to these by {@link canonicalTier}.
48
+ */
49
+ export const MODEL_ESCALATION_ORDER = ['haiku', 'sonnet', 'opus', 'fable'];
50
+
51
+ /** Default lower bound when `--escalate` is given without an explicit range. */
52
+ export const DEFAULT_ESCALATE_LOWER = 'sonnet';
53
+
54
+ /** Default upper bound (top of the ladder). */
55
+ export const DEFAULT_ESCALATE_UPPER = 'fable';
56
+
57
+ /** Default range used when `--escalate` is given as a bare flag. */
58
+ export const DEFAULT_ESCALATE_RANGE = `${DEFAULT_ESCALATE_LOWER}-${DEFAULT_ESCALATE_UPPER}`;
59
+
60
+ /** Default number of working sessions to keep each tier before escalating. */
61
+ export const DEFAULT_ESCALATE_STEPS = 1;
62
+
63
+ /**
64
+ * Mapping of known model aliases → canonical tier name. Lets users pass either
65
+ * the short tier name (`opus`) or a more specific alias (`opus-4-8`,
66
+ * `claude-opus-4-8`) wherever a single model is accepted (e.g. --escalate-from).
67
+ */
68
+ const TIER_ALIASES = {
69
+ haiku: 'haiku',
70
+ 'haiku-4-5': 'haiku',
71
+ 'claude-haiku-4-5': 'haiku',
72
+ 'claude-haiku-4-5-20251001': 'haiku',
73
+ sonnet: 'sonnet',
74
+ 'sonnet-4-6': 'sonnet',
75
+ 'sonnet-4-5': 'sonnet',
76
+ 'claude-sonnet-4-6': 'sonnet',
77
+ 'claude-sonnet-4-5': 'sonnet',
78
+ opus: 'opus',
79
+ 'opus-4-8': 'opus',
80
+ 'opus-4-7': 'opus',
81
+ 'opus-4-6': 'opus',
82
+ 'opus-4-5': 'opus',
83
+ 'claude-opus-4-8': 'opus',
84
+ 'claude-opus-4-7': 'opus',
85
+ fable: 'fable',
86
+ 'fable-5': 'fable',
87
+ 'claude-fable-5': 'fable',
88
+ };
89
+
90
+ /**
91
+ * Normalize a model name/alias to its canonical escalate-ladder tier.
92
+ * @param {string} name
93
+ * @returns {string|null} canonical tier name, or null if not a known tier.
94
+ */
95
+ export const canonicalTier = name => {
96
+ if (typeof name !== 'string') return null;
97
+ const key = name.trim().toLowerCase();
98
+ if (!key) return null;
99
+ return TIER_ALIASES[key] || null;
100
+ };
101
+
102
+ /**
103
+ * Parse a `--escalate` range value into { from, to } canonical tier names.
104
+ *
105
+ * Accepted forms:
106
+ * - true / '' / undefined → the default range (`sonnet-fable`)
107
+ * - `sonnet` → { from: 'sonnet', to: 'sonnet' }
108
+ * - `sonnet-fable` → { from: 'sonnet', to: 'fable' }
109
+ *
110
+ * The delimiter is `-`. Only the short ladder names (haiku|sonnet|opus|fable)
111
+ * are accepted inside a range to avoid ambiguity with dashed aliases such as
112
+ * `opus-4-8` (use --escalate-from for those).
113
+ *
114
+ * @param {string|boolean|undefined} value
115
+ * @returns {{ from: string, to: string }}
116
+ * @throws {Error} on an unparseable / invalid range.
117
+ */
118
+ export const parseEscalateRange = value => {
119
+ let raw = value;
120
+ if (raw === true || raw === undefined || raw === null) {
121
+ raw = DEFAULT_ESCALATE_RANGE;
122
+ }
123
+ if (typeof raw !== 'string') {
124
+ throw new Error(`Invalid --escalate value: ${JSON.stringify(value)}. Expected a model range like "sonnet-fable".`);
125
+ }
126
+ const trimmed = raw.trim().toLowerCase();
127
+ if (trimmed === '') {
128
+ raw = DEFAULT_ESCALATE_RANGE;
129
+ }
130
+ const parts = (trimmed === '' ? DEFAULT_ESCALATE_RANGE : trimmed).split('-');
131
+
132
+ const order = MODEL_ESCALATION_ORDER;
133
+ const requireLadderName = part => {
134
+ if (!order.includes(part)) {
135
+ throw new Error(`Invalid --escalate model "${part}". Expected one of: ${order.join(', ')} (range form: "${DEFAULT_ESCALATE_RANGE}").`);
136
+ }
137
+ return part;
138
+ };
139
+
140
+ let from;
141
+ let to;
142
+ if (parts.length === 1) {
143
+ from = requireLadderName(parts[0]);
144
+ to = from;
145
+ } else if (parts.length === 2) {
146
+ from = requireLadderName(parts[0]);
147
+ to = requireLadderName(parts[1]);
148
+ } else {
149
+ throw new Error(`Invalid --escalate range "${trimmed}". Expected "<lower>-<upper>" with short model names (e.g. "${DEFAULT_ESCALATE_RANGE}").`);
150
+ }
151
+
152
+ if (order.indexOf(from) > order.indexOf(to)) {
153
+ throw new Error(`Invalid --escalate range "${trimmed}": lower bound "${from}" is more capable than upper bound "${to}". Order is ${order.join(' < ')}.`);
154
+ }
155
+
156
+ return { from, to };
157
+ };
158
+
159
+ /**
160
+ * Parse a `--escalate-from` value into { from, to } where `to` is the top of
161
+ * the ladder. Accepts canonical names and aliases (e.g. `opus-4-8`).
162
+ * @param {string} value
163
+ * @returns {{ from: string, to: string }}
164
+ * @throws {Error} on an invalid model name.
165
+ */
166
+ export const parseEscalateFrom = value => {
167
+ const from = canonicalTier(value);
168
+ if (!from) {
169
+ throw new Error(`Invalid --escalate-from model ${JSON.stringify(value)}. Expected one of: ${MODEL_ESCALATION_ORDER.join(', ')}.`);
170
+ }
171
+ return { from, to: DEFAULT_ESCALATE_UPPER };
172
+ };
173
+
174
+ /**
175
+ * Normalize the `--escalate-steps` value into a positive integer (default 1).
176
+ * @param {string|number|undefined} value
177
+ * @returns {number}
178
+ * @throws {Error} on a non-positive / non-numeric value.
179
+ */
180
+ export const normalizeEscalateSteps = value => {
181
+ if (value === undefined || value === null || value === true || value === '') {
182
+ return DEFAULT_ESCALATE_STEPS;
183
+ }
184
+ const n = typeof value === 'number' ? value : Number(String(value).trim());
185
+ if (!Number.isFinite(n) || !Number.isInteger(n) || n < 1) {
186
+ throw new Error(`Invalid --escalate-steps value: ${JSON.stringify(value)}. Expected a positive integer (>= 1).`);
187
+ }
188
+ return n;
189
+ };
190
+
191
+ /**
192
+ * Build the ordered list of models (the "escalation plan"), where each tier
193
+ * between `from` and `to` (inclusive) is repeated `steps` times.
194
+ *
195
+ * Example: { from: 'sonnet', to: 'fable', steps: 2 } →
196
+ * ['sonnet', 'sonnet', 'opus', 'opus', 'fable', 'fable']
197
+ *
198
+ * @param {{ from: string, to: string, steps?: number }} params
199
+ * @returns {string[]}
200
+ */
201
+ export const buildEscalationPlan = ({ from, to, steps = DEFAULT_ESCALATE_STEPS }) => {
202
+ const order = MODEL_ESCALATION_ORDER;
203
+ const fromIdx = order.indexOf(from);
204
+ const toIdx = order.indexOf(to);
205
+ if (fromIdx === -1 || toIdx === -1 || fromIdx > toIdx) {
206
+ throw new Error(`Invalid escalation bounds: from="${from}", to="${to}".`);
207
+ }
208
+ const tiers = order.slice(fromIdx, toIdx + 1);
209
+ const plan = [];
210
+ for (const tier of tiers) {
211
+ for (let i = 0; i < steps; i++) {
212
+ plan.push(tier);
213
+ }
214
+ }
215
+ return plan;
216
+ };
217
+
218
+ /**
219
+ * Resolve the model to use for a given 0-based working-session index. Indexes
220
+ * past the end of the plan clamp to the last (most capable) model so the loop
221
+ * never reaches outside the ladder.
222
+ * @param {string[]} plan
223
+ * @param {number} sessionIndex
224
+ * @returns {string}
225
+ */
226
+ export const resolveEscalationModel = (plan, sessionIndex) => {
227
+ if (!Array.isArray(plan) || plan.length === 0) return undefined;
228
+ const idx = Math.max(0, Math.min(sessionIndex, plan.length - 1));
229
+ return plan[idx];
230
+ };
231
+
232
+ /**
233
+ * Whether escalate mode is enabled given parsed argv.
234
+ * @param {object} argv
235
+ * @returns {boolean}
236
+ */
237
+ export const isEscalateEnabled = argv => {
238
+ if (!argv) return false;
239
+ return Boolean(argv.escalate) || Boolean(argv.escalateFrom);
240
+ };
241
+
242
+ /**
243
+ * Resolve the full escalation configuration from argv. Returns null when the
244
+ * feature is disabled.
245
+ *
246
+ * `--escalate-from` takes precedence over `--escalate` when both are given.
247
+ *
248
+ * @param {object} argv
249
+ * @returns {{ enabled: boolean, from: string, to: string, steps: number, plan: string[] }|null}
250
+ */
251
+ export const resolveEscalationConfig = argv => {
252
+ if (!isEscalateEnabled(argv)) return null;
253
+ const { from, to } = argv.escalateFrom ? parseEscalateFrom(argv.escalateFrom) : parseEscalateRange(argv.escalate);
254
+ const steps = normalizeEscalateSteps(argv.escalateSteps);
255
+ const plan = buildEscalationPlan({ from, to, steps });
256
+ return { enabled: true, from, to, steps, plan };
257
+ };
258
+
259
+ /**
260
+ * Human-readable one-line description of an escalation plan, collapsing
261
+ * consecutive repeats into "model×N".
262
+ * @param {string[]} plan
263
+ * @returns {string}
264
+ */
265
+ export const formatEscalationPlan = plan => {
266
+ if (!Array.isArray(plan) || plan.length === 0) return '(empty)';
267
+ const groups = [];
268
+ for (const model of plan) {
269
+ const last = groups[groups.length - 1];
270
+ if (last && last.model === model) {
271
+ last.count++;
272
+ } else {
273
+ groups.push({ model, count: 1 });
274
+ }
275
+ }
276
+ return groups.map(({ model, count }) => (count > 1 ? `${model}×${count}` : model)).join(' → ');
277
+ };
278
+
279
+ // ─────────────────────────── Orchestrator (I/O) ──────────────────────────────
280
+
281
+ // Lazy module bindings are set up inside runEscalation so that importing this
282
+ // module for its pure helpers (e.g. in tests) does not pull in command-stream,
283
+ // the network bootstrap, or other heavy dependencies.
284
+
285
+ /**
286
+ * Runs escalate restart iterations after the main solve.
287
+ *
288
+ * The first regular solve session already ran with the lowest tier in the plan
289
+ * (see the config-time model override in solve.config.lib.mjs), so escalation
290
+ * continues from plan index 1 onward. Before each restart it re-scans for
291
+ * deferred / unfinished work (the same detector used by keep-working). If no
292
+ * unfinished-work indicators remain, the cheaper model is considered to have
293
+ * succeeded and escalation stops early — we do not waste the more expensive
294
+ * models.
295
+ *
296
+ * @param {object} params
297
+ * @param {string} params.issueUrl
298
+ * @param {string} params.owner
299
+ * @param {string} params.repo
300
+ * @param {string|number} params.issueNumber
301
+ * @param {string|number} params.prNumber
302
+ * @param {string} params.branchName
303
+ * @param {string} params.tempDir
304
+ * @param {string} [params.workspaceTmpDir]
305
+ * @param {object} params.argv - CLI arguments
306
+ * @param {function} params.cleanupClaudeFile - cleanup function
307
+ * @param {string} [params.resultSummary] - AI solution summary from the last session
308
+ * @returns {Promise<{sessionId, anthropicTotalCostUSD, publicPricingEstimate, pricingInfo}|null>}
309
+ */
310
+ export const runEscalation = async ({ issueUrl, owner, repo, issueNumber, prNumber, branchName, tempDir, workspaceTmpDir, argv, cleanupClaudeFile, resultSummary }) => {
311
+ const config = resolveEscalationConfig(argv);
312
+ if (!config || !prNumber) {
313
+ return null;
314
+ }
315
+
316
+ // Import shared library functions lazily (network bootstrap lives here).
317
+ const lib = await import('./lib.mjs');
318
+ const { log, cleanErrorMessage } = lib;
319
+
320
+ // Escalate mode only makes sense for the Claude model ladder. For other tools
321
+ // we skip with a clear message rather than misusing the ladder names.
322
+ const tool = argv.tool || 'claude';
323
+ if (tool !== 'claude') {
324
+ await log(`ℹ️ ESCALATE: --escalate is only supported with --tool claude (current tool: ${tool}). Skipping.`, { level: 'warning' });
325
+ return null;
326
+ }
327
+
328
+ if (typeof globalThis.use === 'undefined') {
329
+ globalThis.use = (await eval(await (await fetch('https://unpkg.com/use-m/use.js')).text())).use;
330
+ }
331
+ const use = globalThis.use;
332
+ const { $: __rawDollar$ } = await use('command-stream');
333
+ const { wrapDollarWithGhRetry } = await import('./github-rate-limit.lib.mjs');
334
+ const $ = wrapDollarWithGhRetry(__rawDollar$);
335
+
336
+ const restartShared = await import('./solve.restart-shared.lib.mjs');
337
+ const { executeToolIteration, isApiError, isUsageLimitReached } = restartShared;
338
+
339
+ const keepWorkingLib = await import('./solve.keep-working.lib.mjs');
340
+ const { collectDeferredWorkSources } = keepWorkingLib;
341
+ const detectLib = await import('./solve.keep-working.detect.lib.mjs');
342
+ const { detectDeferredWorkInSources } = detectLib;
343
+
344
+ const { resolveDefaultFallbackModel } = await import('./models/index.mjs');
345
+
346
+ const sentryLib = await import('./sentry.lib.mjs');
347
+ const { reportError } = sentryLib;
348
+
349
+ const { plan } = config;
350
+
351
+ await log('');
352
+ await log(`🆙 ESCALATE: ${config.from} → ${config.to} (steps: ${config.steps} working session(s) per tier)`);
353
+ await log(` Plan: ${formatEscalationPlan(plan)}`);
354
+ await log(' Strategy: solve cheaply first; escalate to a more capable model only while unfinished work remains.');
355
+ await log('');
356
+
357
+ // Get PR merge state status for the iterations
358
+ let currentMergeStateStatus = null;
359
+ try {
360
+ // `$` is wrapped via wrapDollarWithGhRetry above; the lazy import keeps this module
361
+ // network-free for tests, so the lint rule (which only detects top-level rebinds) can't see it.
362
+ // eslint-disable-next-line gh-rate-limit/no-direct-gh-exec -- $ is rate-limit-safe (wrapDollarWithGhRetry), rebound lazily on line 334.
363
+ const prStateResult = await $`gh api repos/${owner}/${repo}/pulls/${prNumber} --jq '.mergeStateStatus'`;
364
+ if (prStateResult.code === 0) {
365
+ currentMergeStateStatus = prStateResult.stdout.toString().trim();
366
+ }
367
+ } catch {
368
+ // Ignore errors getting merge state
369
+ }
370
+
371
+ let sessionId;
372
+ let anthropicTotalCostUSD;
373
+ let publicPricingEstimate;
374
+ let pricingInfo;
375
+ let lastResultSummary = resultSummary;
376
+ let consecutiveErrors = 0;
377
+ const MAX_CONSECUTIVE_ERRORS = 3;
378
+ let restartsRun = 0;
379
+
380
+ // The first regular solve session = plan index 0. Continue escalating from 1.
381
+ for (let sessionIndex = 1; sessionIndex < plan.length; sessionIndex++) {
382
+ const model = resolveEscalationModel(plan, sessionIndex);
383
+ const previousModel = resolveEscalationModel(plan, sessionIndex - 1);
384
+
385
+ // Decide whether the cheaper model already finished. Re-scan the PR
386
+ // description, AI solution summary and changed markdown documents for
387
+ // deferred/unfinished-work indicators (same signal as keep-working).
388
+ let sources = [];
389
+ try {
390
+ sources = await collectDeferredWorkSources({ owner, repo, prNumber, resultSummary: lastResultSummary });
391
+ } catch (error) {
392
+ reportError(error, { context: 'escalate_collect_sources', owner, repo, prNumber, operation: 'collect_sources' });
393
+ await log(`⚠️ ESCALATE: Could not collect sources to evaluate completion: ${cleanErrorMessage(error)}`, { level: 'warning' });
394
+ }
395
+ const detections = detectDeferredWorkInSources(sources);
396
+
397
+ if (detections.length === 0) {
398
+ await log(`✅ ESCALATE: No unfinished-work indicators after ${previousModel} session(s). Stopping before escalating to ${model}.`);
399
+ break;
400
+ }
401
+
402
+ await log('');
403
+ await log(`🆙 ESCALATE: ${detections.length} unfinished-work indicator(s) remain after ${previousModel}; escalating to ${model} (session ${sessionIndex + 1}/${plan.length}).`);
404
+ for (const detection of detections.slice(0, 10)) {
405
+ await log(` • [${detection.label}] in ${detection.source}: "${detection.snippet}"`);
406
+ }
407
+
408
+ // Sync local branch with remote before each iteration (issue #1572 pattern).
409
+ try {
410
+ const pullResult = await $({ cwd: tempDir })`git pull origin ${branchName} 2>&1`;
411
+ if (pullResult.code === 0) {
412
+ await log(` Synced local branch ${branchName} from remote`, { verbose: true });
413
+ } else {
414
+ await log(` Warning: git pull failed (code ${pullResult.code}); continuing with local state`, { level: 'warning' });
415
+ }
416
+ } catch (error) {
417
+ reportError(error, { context: 'escalate_git_pull', branchName, operation: 'git_pull' });
418
+ await log(` Warning: git pull error: ${cleanErrorMessage(error)}`, { level: 'warning' });
419
+ }
420
+
421
+ const feedbackLines = ['', '='.repeat(60), `🆙 ESCALATE MODE — now running on a more capable model (${model}):`, '='.repeat(60), '', `The previous working session(s) used "${previousModel}" but left unfinished work. You are a more capable model. Carefully review what has already been done, then finish every remaining requirement in this single pull request — do not defer, delay, or mark anything as out of scope. Ensure all changes are correct, consistent, validated, tested and that all CI/CD checks pass.`, ''];
422
+
423
+ const fallbackModel = resolveDefaultFallbackModel(tool, model) || undefined;
424
+
425
+ const iterationResult = await executeToolIteration({
426
+ issueUrl,
427
+ owner,
428
+ repo,
429
+ issueNumber,
430
+ prNumber,
431
+ branchName,
432
+ tempDir,
433
+ workspaceTmpDir,
434
+ mergeStateStatus: currentMergeStateStatus,
435
+ feedbackLines,
436
+ argv: {
437
+ ...argv,
438
+ // Escalate to the next tier for this iteration.
439
+ model,
440
+ fallbackModel,
441
+ // Reinforce the "finish everything now" guidance in the system prompt.
442
+ promptEnsureAllRequirementsAreMet: true,
443
+ // Prevent recursive escalation inside the restart iteration.
444
+ escalate: undefined,
445
+ escalateFrom: undefined,
446
+ },
447
+ });
448
+
449
+ restartsRun++;
450
+
451
+ if (iterationResult) {
452
+ if (iterationResult.sessionId) sessionId = iterationResult.sessionId;
453
+ if (iterationResult.anthropicTotalCostUSD) anthropicTotalCostUSD = iterationResult.anthropicTotalCostUSD;
454
+ if (iterationResult.publicPricingEstimate) publicPricingEstimate = iterationResult.publicPricingEstimate;
455
+ if (iterationResult.pricingInfo) pricingInfo = iterationResult.pricingInfo;
456
+ if (iterationResult.result) lastResultSummary = iterationResult.result;
457
+ }
458
+
459
+ if (isUsageLimitReached(iterationResult)) {
460
+ await log('🛑 ESCALATE: Usage limit reached during restart. Stopping escalate loop.');
461
+ break;
462
+ }
463
+ if (isApiError(iterationResult)) {
464
+ consecutiveErrors++;
465
+ await log(`⚠️ ESCALATE: API error during ${model} restart (${consecutiveErrors}/${MAX_CONSECUTIVE_ERRORS} consecutive).`, { level: 'warning' });
466
+ if (consecutiveErrors >= MAX_CONSECUTIVE_ERRORS) {
467
+ await log('🛑 ESCALATE: Too many consecutive errors. Stopping escalate loop.');
468
+ break;
469
+ }
470
+ } else {
471
+ consecutiveErrors = 0;
472
+ }
473
+
474
+ await log(`✅ ESCALATE: ${model} session complete (${sessionIndex + 1}/${plan.length})`);
475
+ await log('');
476
+ }
477
+
478
+ // Clean up CLAUDE.md/.gitkeep after restarts
479
+ try {
480
+ await cleanupClaudeFile(tempDir, branchName, null, argv);
481
+ } catch (error) {
482
+ reportError(error, { context: 'escalate_cleanup', branchName, operation: 'cleanup_claude_file' });
483
+ }
484
+
485
+ if (restartsRun === 0) return null;
486
+ return { sessionId, anthropicTotalCostUSD, publicPricingEstimate, pricingInfo };
487
+ };
488
+
489
+ export default {
490
+ MODEL_ESCALATION_ORDER,
491
+ DEFAULT_ESCALATE_LOWER,
492
+ DEFAULT_ESCALATE_UPPER,
493
+ DEFAULT_ESCALATE_RANGE,
494
+ DEFAULT_ESCALATE_STEPS,
495
+ canonicalTier,
496
+ parseEscalateRange,
497
+ parseEscalateFrom,
498
+ normalizeEscalateSteps,
499
+ buildEscalationPlan,
500
+ resolveEscalationModel,
501
+ isEscalateEnabled,
502
+ resolveEscalationConfig,
503
+ formatEscalationPlan,
504
+ runEscalation,
505
+ };
package/src/solve.mjs CHANGED
@@ -46,6 +46,7 @@ const { startWatchMode } = watchLib;
46
46
  const { startAutoRestartUntilMergeable } = await import('./solve.auto-merge.lib.mjs');
47
47
  const { runAutoEnsureRequirements } = await import('./solve.auto-ensure.lib.mjs');
48
48
  const { runKeepWorkingUntilDone } = await import('./solve.keep-working.lib.mjs');
49
+ const { runEscalation } = await import('./solve.escalate.lib.mjs');
49
50
  const exitHandler = await import('./exit-handler.lib.mjs');
50
51
  const { initializeExitHandler, installGlobalExitHandlers, safeExit, logActiveHandles } = exitHandler;
51
52
  const { createInterruptWrapper } = await import('./solve.interrupt.lib.mjs');
@@ -1270,10 +1271,9 @@ try {
1270
1271
  await log('⚠️ PR title/description still not updated after restart');
1271
1272
  }
1272
1273
  }
1273
-
1274
- // Issue #1383: --finalize
1274
+ // Post-solve restart loops (escalate #1885 first, then finalize #1383, then keep-working #1883):
1275
+ applyRestartResult(await runEscalation({ issueUrl, owner, repo, issueNumber, prNumber, branchName, tempDir, workspaceTmpDir, argv, cleanupClaudeFile, resultSummary }));
1275
1276
  applyRestartResult(await runAutoEnsureRequirements({ issueUrl, owner, repo, issueNumber, prNumber, branchName, tempDir, argv, cleanupClaudeFile }));
1276
- // Issue #1883: --keep-working-until-all-requirements-are-fully-done (detect deferred work and auto-restart until done)
1277
1277
  applyRestartResult(await runKeepWorkingUntilDone({ issueUrl, owner, repo, issueNumber, prNumber, branchName, tempDir, workspaceTmpDir, argv, cleanupClaudeFile, resultSummary }));
1278
1278
 
1279
1279
  // Start watch mode if enabled OR if we need to handle uncommitted changes