@link-assistant/hive-mind 1.76.2 → 1.77.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +33 -0
- package/package.json +1 -1
- package/src/solve.config.lib.mjs +57 -0
- package/src/solve.escalate.lib.mjs +505 -0
- package/src/solve.mjs +3 -3
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,38 @@
|
|
|
1
1
|
# @link-assistant/hive-mind
|
|
2
2
|
|
|
3
|
+
## 1.77.0
|
|
4
|
+
|
|
5
|
+
### Minor Changes
|
|
6
|
+
|
|
7
|
+
- a50d201: feat(solve): experimental `--escalate` mode (#1885)
|
|
8
|
+
|
|
9
|
+
Add an experimental `solve` option family that solves a task cheaply first and
|
|
10
|
+
escalates to a more capable (more expensive) model only while unfinished work
|
|
11
|
+
remains. The model ladder, cheapest → most capable, is `haiku < sonnet < opus <
|
|
12
|
+
fable`.
|
|
13
|
+
- `--escalate` (bare) → the default range `sonnet-fable`.
|
|
14
|
+
- `--escalate sonnet-opus` → an explicit `<lower>-<upper>` range (`-` delimits the
|
|
15
|
+
bounds; only the short ladder names are allowed inside a range).
|
|
16
|
+
- `--escalate-from haiku` → shortcut for `--escalate haiku-fable` (aliases such as
|
|
17
|
+
`opus-4-8` accepted here, since a single value is unambiguous).
|
|
18
|
+
- `--escalate-steps N` (default 1) → keep each tier for N working sessions before
|
|
19
|
+
escalating (e.g. `2` → two sonnet sessions, then two opus, then two fable).
|
|
20
|
+
|
|
21
|
+
The first regular solve session runs on the range's lower bound (unless `--model`
|
|
22
|
+
is explicitly pinned). After it finishes, the escalate loop re-scans the pull
|
|
23
|
+
request for deferred/unfinished-work indicators — reusing the detector from issue
|
|
24
|
+
#1883 — and escalates to the next tier only if work remains; otherwise it stops
|
|
25
|
+
early so the expensive tiers are never invoked. Restarts are capped at 3
|
|
26
|
+
consecutive errors and stop on a usage limit. Escalate is Claude-only and runs
|
|
27
|
+
before `--finalize` / `--keep-working`.
|
|
28
|
+
|
|
29
|
+
Pure parsing/planning helpers live in a network-free module
|
|
30
|
+
(`src/solve.escalate.lib.mjs`) with full unit-test coverage
|
|
31
|
+
(`tests/test-escalate-1885.mjs`); a deep case study is compiled under
|
|
32
|
+
`docs/case-studies/issue-1885/`.
|
|
33
|
+
|
|
34
|
+
- 53a0544: Update Hive Mind Docker images to `konard/box` and `konard/box-dind` 2.3.1 so Docker-in-Docker deployments can use the upstream host-image passthrough allowlist.
|
|
35
|
+
|
|
3
36
|
## 1.76.2
|
|
4
37
|
|
|
5
38
|
### Patch Changes
|
package/package.json
CHANGED
package/src/solve.config.lib.mjs
CHANGED
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
import { enhanceErrorMessage, detectMalformedFlags } from './option-suggestions.lib.mjs';
|
|
11
11
|
import { defaultModels, buildModelOptionDescription, resolveDefaultFallbackModel, resolveRuntimeDefaultModel } from './models/index.mjs';
|
|
12
12
|
import { validateBranchName } from './solve.branch.lib.mjs';
|
|
13
|
+
import { resolveEscalationConfig, isEscalateEnabled, DEFAULT_ESCALATE_RANGE } from './solve.escalate.lib.mjs';
|
|
13
14
|
import { getLinoYargsFactory, hideBin, parseCliArgumentsWithLino } from './cli-arguments.lib.mjs';
|
|
14
15
|
|
|
15
16
|
// Re-export for use by telegram-bot.mjs (avoids extra import lines there)
|
|
@@ -610,6 +611,21 @@ export const SOLVE_OPTION_DEFINITIONS = {
|
|
|
610
611
|
alias: ['keep-going-until-all-requirements-are-fully-done', 'keep-working', 'keep-going'],
|
|
611
612
|
default: undefined,
|
|
612
613
|
},
|
|
614
|
+
escalate: {
|
|
615
|
+
type: 'string',
|
|
616
|
+
description: '[EXPERIMENTAL] Start solving with a cheaper/lower-tier model and automatically escalate to a more capable (more expensive) model while unfinished work remains. Accepts a model range "<lower>-<upper>" using short Claude tier names (ladder: haiku < sonnet < opus < fable), e.g. "sonnet-opus". A single name (e.g. "opus") means just that tier. Bare flag means "sonnet-fable". The idea: iterate cheaply first so expensive models do more reading and less writing.',
|
|
617
|
+
default: undefined,
|
|
618
|
+
},
|
|
619
|
+
'escalate-from': {
|
|
620
|
+
type: 'string',
|
|
621
|
+
description: '[EXPERIMENTAL] Shortcut for --escalate <model>-fable: start solving from the given model (haiku/sonnet/opus/fable, aliases accepted) and escalate up to the top of the ladder while unfinished work remains. Takes precedence over --escalate when both are given.',
|
|
622
|
+
default: undefined,
|
|
623
|
+
},
|
|
624
|
+
'escalate-steps': {
|
|
625
|
+
type: 'number',
|
|
626
|
+
description: '[EXPERIMENTAL] How many working sessions to keep each model tier before escalating to the next one (default: 1). For example 2 keeps the lower tier for two working sessions, then the next tier for two, and so on. Only used with --escalate / --escalate-from.',
|
|
627
|
+
default: 1,
|
|
628
|
+
},
|
|
613
629
|
'working-session-live-progress': {
|
|
614
630
|
type: 'string',
|
|
615
631
|
description: '[EXPERIMENTAL] Enable live progress monitoring. Accepts "comment" (default, updates a per-session PR comment) or "pr" (updates PR description). Plain --working-session-live-progress means "comment". Works with or without --interactive-mode.',
|
|
@@ -883,6 +899,34 @@ export const parseArguments = async (yargs = getLinoYargsFactory(), hideBinFn =
|
|
|
883
899
|
}
|
|
884
900
|
}
|
|
885
901
|
|
|
902
|
+
// --escalate / --escalate-from / --escalate-steps normalization (issue #1885)
|
|
903
|
+
// The bare `--escalate` flag is a string-typed option, so yargs yields `true`
|
|
904
|
+
// (or an empty string) for a value-less flag. Canonicalize that to the default
|
|
905
|
+
// range so downstream parsing in solve.escalate.lib.mjs sees a meaningful
|
|
906
|
+
// value. We also validate the range/steps eagerly here so misuse fails fast at
|
|
907
|
+
// config time rather than mid-solve.
|
|
908
|
+
{
|
|
909
|
+
const escalateProvided = hasRawOption(rawArgs, '--escalate');
|
|
910
|
+
if (escalateProvided) {
|
|
911
|
+
const current = argv.escalate;
|
|
912
|
+
if (current === true || current === '' || current === undefined || current === null) {
|
|
913
|
+
argv.escalate = DEFAULT_ESCALATE_RANGE;
|
|
914
|
+
} else if (typeof current === 'string') {
|
|
915
|
+
argv.escalate = current.trim().toLowerCase();
|
|
916
|
+
}
|
|
917
|
+
} else if (argv.escalate === undefined) {
|
|
918
|
+
argv.escalate = undefined;
|
|
919
|
+
}
|
|
920
|
+
if (typeof argv.escalateFrom === 'string') {
|
|
921
|
+
argv.escalateFrom = argv.escalateFrom.trim().toLowerCase();
|
|
922
|
+
}
|
|
923
|
+
// Validate eagerly (throws on invalid range / from / steps). resolveEscalationConfig
|
|
924
|
+
// is a no-op (returns null) when the feature is disabled.
|
|
925
|
+
if (isEscalateEnabled(argv)) {
|
|
926
|
+
resolveEscalationConfig(argv);
|
|
927
|
+
}
|
|
928
|
+
}
|
|
929
|
+
|
|
886
930
|
// --working-session-live-progress normalization
|
|
887
931
|
// When passed as --working-session-live-progress (no value), yargs gives true for string type
|
|
888
932
|
// Normalize: true → "comment", validate known values
|
|
@@ -911,6 +955,19 @@ export const parseArguments = async (yargs = getLinoYargsFactory(), hideBinFn =
|
|
|
911
955
|
argv.model = await resolveRuntimeDefaultModel(argv.tool);
|
|
912
956
|
}
|
|
913
957
|
|
|
958
|
+
// Escalate mode (issue #1885): when enabled and the user did not explicitly
|
|
959
|
+
// pin a model, the very first regular solve session should run on the cheapest
|
|
960
|
+
// tier in the plan (the range's lower bound). The restart loop in
|
|
961
|
+
// solve.escalate.lib.mjs then escalates upward while unfinished work remains.
|
|
962
|
+
// An explicit --model always wins (the user pinned the worker model on
|
|
963
|
+
// purpose), so only override the resolved default.
|
|
964
|
+
if (isEscalateEnabled(argv) && !modelExplicitlyProvided && (argv.tool || 'claude') === 'claude') {
|
|
965
|
+
const escalationConfig = resolveEscalationConfig(argv);
|
|
966
|
+
if (escalationConfig && escalationConfig.plan.length > 0) {
|
|
967
|
+
argv.model = escalationConfig.plan[0];
|
|
968
|
+
}
|
|
969
|
+
}
|
|
970
|
+
|
|
914
971
|
if (argv.tool && !fallbackModelExplicitlyProvided) {
|
|
915
972
|
const defaultFallbackModel = resolveDefaultFallbackModel(argv.tool, argv.model);
|
|
916
973
|
argv.fallbackModel = defaultFallbackModel || undefined;
|
|
@@ -0,0 +1,505 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Escalate-mode module for solve.mjs
|
|
5
|
+
*
|
|
6
|
+
* [EXPERIMENTAL] `--escalate` makes the solver try to solve a task fast and
|
|
7
|
+
* cheap first (with a lower-tier model), and only escalate to a more capable
|
|
8
|
+
* (and more expensive) model when the cheaper model did not finish the job.
|
|
9
|
+
*
|
|
10
|
+
* The intuition (from issue #1885): small models often get *most* of the work
|
|
11
|
+
* right, but not quite right. By iterating cheaply first, the more expensive
|
|
12
|
+
* models spend their budget reading and refining an existing draft rather than
|
|
13
|
+
* writing everything from scratch.
|
|
14
|
+
*
|
|
15
|
+
* Model ladder (Claude), cheapest → most capable:
|
|
16
|
+
*
|
|
17
|
+
* haiku → sonnet → opus → fable
|
|
18
|
+
*
|
|
19
|
+
* Options:
|
|
20
|
+
* --escalate [lower-upper] Enable escalate mode. Bare flag means the default
|
|
21
|
+
* range `sonnet-fable`. `sonnet-opus` sets the lower
|
|
22
|
+
* and upper bound (delimiter is `-`). A single name
|
|
23
|
+
* (e.g. `opus`) means just that one tier.
|
|
24
|
+
* --escalate-from <model> Shortcut for `--escalate <model>-fable` (escalate
|
|
25
|
+
* from <model> up to the top of the ladder).
|
|
26
|
+
* --escalate-steps <n> How many working sessions to keep each tier before
|
|
27
|
+
* escalating to the next one (default: 1). For
|
|
28
|
+
* example `2` keeps the lower tier for two working
|
|
29
|
+
* sessions, then the next tier for two, and so on.
|
|
30
|
+
*
|
|
31
|
+
* The pure parsing/planning helpers in this module are network-free so they can
|
|
32
|
+
* be unit-tested in isolation. The `runEscalation` orchestrator restarts the AI
|
|
33
|
+
* tool with the escalated model, reusing the same deferred-work detection that
|
|
34
|
+
* powers `--keep-working-until-all-requirements-are-fully-done` (issue #1883) as
|
|
35
|
+
* the "did the cheaper model finish?" signal.
|
|
36
|
+
*
|
|
37
|
+
* @see https://github.com/link-assistant/hive-mind/issues/1885
|
|
38
|
+
*/
|
|
39
|
+
|
|
40
|
+
// ───────────────────────────── Pure helpers ──────────────────────────────────
|
|
41
|
+
// Everything above the `runEscalation` orchestrator is pure (no I/O, no network)
|
|
42
|
+
// so it can be imported and unit-tested without side effects.
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Ordered Claude model ladder used by escalate mode, cheapest → most capable.
|
|
46
|
+
* These are the short canonical tier names; aliases (e.g. `opus-4-8`,
|
|
47
|
+
* `claude-fable-5`) are normalized to these by {@link canonicalTier}.
|
|
48
|
+
*/
|
|
49
|
+
export const MODEL_ESCALATION_ORDER = ['haiku', 'sonnet', 'opus', 'fable'];
|
|
50
|
+
|
|
51
|
+
/** Default lower bound when `--escalate` is given without an explicit range. */
|
|
52
|
+
export const DEFAULT_ESCALATE_LOWER = 'sonnet';
|
|
53
|
+
|
|
54
|
+
/** Default upper bound (top of the ladder). */
|
|
55
|
+
export const DEFAULT_ESCALATE_UPPER = 'fable';
|
|
56
|
+
|
|
57
|
+
/** Default range used when `--escalate` is given as a bare flag. */
|
|
58
|
+
export const DEFAULT_ESCALATE_RANGE = `${DEFAULT_ESCALATE_LOWER}-${DEFAULT_ESCALATE_UPPER}`;
|
|
59
|
+
|
|
60
|
+
/** Default number of working sessions to keep each tier before escalating. */
|
|
61
|
+
export const DEFAULT_ESCALATE_STEPS = 1;
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Mapping of known model aliases → canonical tier name. Lets users pass either
|
|
65
|
+
* the short tier name (`opus`) or a more specific alias (`opus-4-8`,
|
|
66
|
+
* `claude-opus-4-8`) wherever a single model is accepted (e.g. --escalate-from).
|
|
67
|
+
*/
|
|
68
|
+
const TIER_ALIASES = {
|
|
69
|
+
haiku: 'haiku',
|
|
70
|
+
'haiku-4-5': 'haiku',
|
|
71
|
+
'claude-haiku-4-5': 'haiku',
|
|
72
|
+
'claude-haiku-4-5-20251001': 'haiku',
|
|
73
|
+
sonnet: 'sonnet',
|
|
74
|
+
'sonnet-4-6': 'sonnet',
|
|
75
|
+
'sonnet-4-5': 'sonnet',
|
|
76
|
+
'claude-sonnet-4-6': 'sonnet',
|
|
77
|
+
'claude-sonnet-4-5': 'sonnet',
|
|
78
|
+
opus: 'opus',
|
|
79
|
+
'opus-4-8': 'opus',
|
|
80
|
+
'opus-4-7': 'opus',
|
|
81
|
+
'opus-4-6': 'opus',
|
|
82
|
+
'opus-4-5': 'opus',
|
|
83
|
+
'claude-opus-4-8': 'opus',
|
|
84
|
+
'claude-opus-4-7': 'opus',
|
|
85
|
+
fable: 'fable',
|
|
86
|
+
'fable-5': 'fable',
|
|
87
|
+
'claude-fable-5': 'fable',
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Normalize a model name/alias to its canonical escalate-ladder tier.
|
|
92
|
+
* @param {string} name
|
|
93
|
+
* @returns {string|null} canonical tier name, or null if not a known tier.
|
|
94
|
+
*/
|
|
95
|
+
export const canonicalTier = name => {
|
|
96
|
+
if (typeof name !== 'string') return null;
|
|
97
|
+
const key = name.trim().toLowerCase();
|
|
98
|
+
if (!key) return null;
|
|
99
|
+
return TIER_ALIASES[key] || null;
|
|
100
|
+
};
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Parse a `--escalate` range value into { from, to } canonical tier names.
|
|
104
|
+
*
|
|
105
|
+
* Accepted forms:
|
|
106
|
+
* - true / '' / undefined → the default range (`sonnet-fable`)
|
|
107
|
+
* - `sonnet` → { from: 'sonnet', to: 'sonnet' }
|
|
108
|
+
* - `sonnet-fable` → { from: 'sonnet', to: 'fable' }
|
|
109
|
+
*
|
|
110
|
+
* The delimiter is `-`. Only the short ladder names (haiku|sonnet|opus|fable)
|
|
111
|
+
* are accepted inside a range to avoid ambiguity with dashed aliases such as
|
|
112
|
+
* `opus-4-8` (use --escalate-from for those).
|
|
113
|
+
*
|
|
114
|
+
* @param {string|boolean|undefined} value
|
|
115
|
+
* @returns {{ from: string, to: string }}
|
|
116
|
+
* @throws {Error} on an unparseable / invalid range.
|
|
117
|
+
*/
|
|
118
|
+
export const parseEscalateRange = value => {
|
|
119
|
+
let raw = value;
|
|
120
|
+
if (raw === true || raw === undefined || raw === null) {
|
|
121
|
+
raw = DEFAULT_ESCALATE_RANGE;
|
|
122
|
+
}
|
|
123
|
+
if (typeof raw !== 'string') {
|
|
124
|
+
throw new Error(`Invalid --escalate value: ${JSON.stringify(value)}. Expected a model range like "sonnet-fable".`);
|
|
125
|
+
}
|
|
126
|
+
const trimmed = raw.trim().toLowerCase();
|
|
127
|
+
if (trimmed === '') {
|
|
128
|
+
raw = DEFAULT_ESCALATE_RANGE;
|
|
129
|
+
}
|
|
130
|
+
const parts = (trimmed === '' ? DEFAULT_ESCALATE_RANGE : trimmed).split('-');
|
|
131
|
+
|
|
132
|
+
const order = MODEL_ESCALATION_ORDER;
|
|
133
|
+
const requireLadderName = part => {
|
|
134
|
+
if (!order.includes(part)) {
|
|
135
|
+
throw new Error(`Invalid --escalate model "${part}". Expected one of: ${order.join(', ')} (range form: "${DEFAULT_ESCALATE_RANGE}").`);
|
|
136
|
+
}
|
|
137
|
+
return part;
|
|
138
|
+
};
|
|
139
|
+
|
|
140
|
+
let from;
|
|
141
|
+
let to;
|
|
142
|
+
if (parts.length === 1) {
|
|
143
|
+
from = requireLadderName(parts[0]);
|
|
144
|
+
to = from;
|
|
145
|
+
} else if (parts.length === 2) {
|
|
146
|
+
from = requireLadderName(parts[0]);
|
|
147
|
+
to = requireLadderName(parts[1]);
|
|
148
|
+
} else {
|
|
149
|
+
throw new Error(`Invalid --escalate range "${trimmed}". Expected "<lower>-<upper>" with short model names (e.g. "${DEFAULT_ESCALATE_RANGE}").`);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
if (order.indexOf(from) > order.indexOf(to)) {
|
|
153
|
+
throw new Error(`Invalid --escalate range "${trimmed}": lower bound "${from}" is more capable than upper bound "${to}". Order is ${order.join(' < ')}.`);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
return { from, to };
|
|
157
|
+
};
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* Parse a `--escalate-from` value into { from, to } where `to` is the top of
|
|
161
|
+
* the ladder. Accepts canonical names and aliases (e.g. `opus-4-8`).
|
|
162
|
+
* @param {string} value
|
|
163
|
+
* @returns {{ from: string, to: string }}
|
|
164
|
+
* @throws {Error} on an invalid model name.
|
|
165
|
+
*/
|
|
166
|
+
export const parseEscalateFrom = value => {
|
|
167
|
+
const from = canonicalTier(value);
|
|
168
|
+
if (!from) {
|
|
169
|
+
throw new Error(`Invalid --escalate-from model ${JSON.stringify(value)}. Expected one of: ${MODEL_ESCALATION_ORDER.join(', ')}.`);
|
|
170
|
+
}
|
|
171
|
+
return { from, to: DEFAULT_ESCALATE_UPPER };
|
|
172
|
+
};
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* Normalize the `--escalate-steps` value into a positive integer (default 1).
|
|
176
|
+
* @param {string|number|undefined} value
|
|
177
|
+
* @returns {number}
|
|
178
|
+
* @throws {Error} on a non-positive / non-numeric value.
|
|
179
|
+
*/
|
|
180
|
+
export const normalizeEscalateSteps = value => {
|
|
181
|
+
if (value === undefined || value === null || value === true || value === '') {
|
|
182
|
+
return DEFAULT_ESCALATE_STEPS;
|
|
183
|
+
}
|
|
184
|
+
const n = typeof value === 'number' ? value : Number(String(value).trim());
|
|
185
|
+
if (!Number.isFinite(n) || !Number.isInteger(n) || n < 1) {
|
|
186
|
+
throw new Error(`Invalid --escalate-steps value: ${JSON.stringify(value)}. Expected a positive integer (>= 1).`);
|
|
187
|
+
}
|
|
188
|
+
return n;
|
|
189
|
+
};
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Build the ordered list of models (the "escalation plan"), where each tier
|
|
193
|
+
* between `from` and `to` (inclusive) is repeated `steps` times.
|
|
194
|
+
*
|
|
195
|
+
* Example: { from: 'sonnet', to: 'fable', steps: 2 } →
|
|
196
|
+
* ['sonnet', 'sonnet', 'opus', 'opus', 'fable', 'fable']
|
|
197
|
+
*
|
|
198
|
+
* @param {{ from: string, to: string, steps?: number }} params
|
|
199
|
+
* @returns {string[]}
|
|
200
|
+
*/
|
|
201
|
+
export const buildEscalationPlan = ({ from, to, steps = DEFAULT_ESCALATE_STEPS }) => {
|
|
202
|
+
const order = MODEL_ESCALATION_ORDER;
|
|
203
|
+
const fromIdx = order.indexOf(from);
|
|
204
|
+
const toIdx = order.indexOf(to);
|
|
205
|
+
if (fromIdx === -1 || toIdx === -1 || fromIdx > toIdx) {
|
|
206
|
+
throw new Error(`Invalid escalation bounds: from="${from}", to="${to}".`);
|
|
207
|
+
}
|
|
208
|
+
const tiers = order.slice(fromIdx, toIdx + 1);
|
|
209
|
+
const plan = [];
|
|
210
|
+
for (const tier of tiers) {
|
|
211
|
+
for (let i = 0; i < steps; i++) {
|
|
212
|
+
plan.push(tier);
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
return plan;
|
|
216
|
+
};
|
|
217
|
+
|
|
218
|
+
/**
|
|
219
|
+
* Resolve the model to use for a given 0-based working-session index. Indexes
|
|
220
|
+
* past the end of the plan clamp to the last (most capable) model so the loop
|
|
221
|
+
* never reaches outside the ladder.
|
|
222
|
+
* @param {string[]} plan
|
|
223
|
+
* @param {number} sessionIndex
|
|
224
|
+
* @returns {string}
|
|
225
|
+
*/
|
|
226
|
+
export const resolveEscalationModel = (plan, sessionIndex) => {
|
|
227
|
+
if (!Array.isArray(plan) || plan.length === 0) return undefined;
|
|
228
|
+
const idx = Math.max(0, Math.min(sessionIndex, plan.length - 1));
|
|
229
|
+
return plan[idx];
|
|
230
|
+
};
|
|
231
|
+
|
|
232
|
+
/**
|
|
233
|
+
* Whether escalate mode is enabled given parsed argv.
|
|
234
|
+
* @param {object} argv
|
|
235
|
+
* @returns {boolean}
|
|
236
|
+
*/
|
|
237
|
+
export const isEscalateEnabled = argv => {
|
|
238
|
+
if (!argv) return false;
|
|
239
|
+
return Boolean(argv.escalate) || Boolean(argv.escalateFrom);
|
|
240
|
+
};
|
|
241
|
+
|
|
242
|
+
/**
|
|
243
|
+
* Resolve the full escalation configuration from argv. Returns null when the
|
|
244
|
+
* feature is disabled.
|
|
245
|
+
*
|
|
246
|
+
* `--escalate-from` takes precedence over `--escalate` when both are given.
|
|
247
|
+
*
|
|
248
|
+
* @param {object} argv
|
|
249
|
+
* @returns {{ enabled: boolean, from: string, to: string, steps: number, plan: string[] }|null}
|
|
250
|
+
*/
|
|
251
|
+
export const resolveEscalationConfig = argv => {
|
|
252
|
+
if (!isEscalateEnabled(argv)) return null;
|
|
253
|
+
const { from, to } = argv.escalateFrom ? parseEscalateFrom(argv.escalateFrom) : parseEscalateRange(argv.escalate);
|
|
254
|
+
const steps = normalizeEscalateSteps(argv.escalateSteps);
|
|
255
|
+
const plan = buildEscalationPlan({ from, to, steps });
|
|
256
|
+
return { enabled: true, from, to, steps, plan };
|
|
257
|
+
};
|
|
258
|
+
|
|
259
|
+
/**
|
|
260
|
+
* Human-readable one-line description of an escalation plan, collapsing
|
|
261
|
+
* consecutive repeats into "model×N".
|
|
262
|
+
* @param {string[]} plan
|
|
263
|
+
* @returns {string}
|
|
264
|
+
*/
|
|
265
|
+
export const formatEscalationPlan = plan => {
|
|
266
|
+
if (!Array.isArray(plan) || plan.length === 0) return '(empty)';
|
|
267
|
+
const groups = [];
|
|
268
|
+
for (const model of plan) {
|
|
269
|
+
const last = groups[groups.length - 1];
|
|
270
|
+
if (last && last.model === model) {
|
|
271
|
+
last.count++;
|
|
272
|
+
} else {
|
|
273
|
+
groups.push({ model, count: 1 });
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
return groups.map(({ model, count }) => (count > 1 ? `${model}×${count}` : model)).join(' → ');
|
|
277
|
+
};
|
|
278
|
+
|
|
279
|
+
// ─────────────────────────── Orchestrator (I/O) ──────────────────────────────
|
|
280
|
+
|
|
281
|
+
// Lazy module bindings are set up inside runEscalation so that importing this
|
|
282
|
+
// module for its pure helpers (e.g. in tests) does not pull in command-stream,
|
|
283
|
+
// the network bootstrap, or other heavy dependencies.
|
|
284
|
+
|
|
285
|
+
/**
|
|
286
|
+
* Runs escalate restart iterations after the main solve.
|
|
287
|
+
*
|
|
288
|
+
* The first regular solve session already ran with the lowest tier in the plan
|
|
289
|
+
* (see the config-time model override in solve.config.lib.mjs), so escalation
|
|
290
|
+
* continues from plan index 1 onward. Before each restart it re-scans for
|
|
291
|
+
* deferred / unfinished work (the same detector used by keep-working). If no
|
|
292
|
+
* unfinished-work indicators remain, the cheaper model is considered to have
|
|
293
|
+
* succeeded and escalation stops early — we do not waste the more expensive
|
|
294
|
+
* models.
|
|
295
|
+
*
|
|
296
|
+
* @param {object} params
|
|
297
|
+
* @param {string} params.issueUrl
|
|
298
|
+
* @param {string} params.owner
|
|
299
|
+
* @param {string} params.repo
|
|
300
|
+
* @param {string|number} params.issueNumber
|
|
301
|
+
* @param {string|number} params.prNumber
|
|
302
|
+
* @param {string} params.branchName
|
|
303
|
+
* @param {string} params.tempDir
|
|
304
|
+
* @param {string} [params.workspaceTmpDir]
|
|
305
|
+
* @param {object} params.argv - CLI arguments
|
|
306
|
+
* @param {function} params.cleanupClaudeFile - cleanup function
|
|
307
|
+
* @param {string} [params.resultSummary] - AI solution summary from the last session
|
|
308
|
+
* @returns {Promise<{sessionId, anthropicTotalCostUSD, publicPricingEstimate, pricingInfo}|null>}
|
|
309
|
+
*/
|
|
310
|
+
export const runEscalation = async ({ issueUrl, owner, repo, issueNumber, prNumber, branchName, tempDir, workspaceTmpDir, argv, cleanupClaudeFile, resultSummary }) => {
|
|
311
|
+
const config = resolveEscalationConfig(argv);
|
|
312
|
+
if (!config || !prNumber) {
|
|
313
|
+
return null;
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
// Import shared library functions lazily (network bootstrap lives here).
|
|
317
|
+
const lib = await import('./lib.mjs');
|
|
318
|
+
const { log, cleanErrorMessage } = lib;
|
|
319
|
+
|
|
320
|
+
// Escalate mode only makes sense for the Claude model ladder. For other tools
|
|
321
|
+
// we skip with a clear message rather than misusing the ladder names.
|
|
322
|
+
const tool = argv.tool || 'claude';
|
|
323
|
+
if (tool !== 'claude') {
|
|
324
|
+
await log(`ℹ️ ESCALATE: --escalate is only supported with --tool claude (current tool: ${tool}). Skipping.`, { level: 'warning' });
|
|
325
|
+
return null;
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
if (typeof globalThis.use === 'undefined') {
|
|
329
|
+
globalThis.use = (await eval(await (await fetch('https://unpkg.com/use-m/use.js')).text())).use;
|
|
330
|
+
}
|
|
331
|
+
const use = globalThis.use;
|
|
332
|
+
const { $: __rawDollar$ } = await use('command-stream');
|
|
333
|
+
const { wrapDollarWithGhRetry } = await import('./github-rate-limit.lib.mjs');
|
|
334
|
+
const $ = wrapDollarWithGhRetry(__rawDollar$);
|
|
335
|
+
|
|
336
|
+
const restartShared = await import('./solve.restart-shared.lib.mjs');
|
|
337
|
+
const { executeToolIteration, isApiError, isUsageLimitReached } = restartShared;
|
|
338
|
+
|
|
339
|
+
const keepWorkingLib = await import('./solve.keep-working.lib.mjs');
|
|
340
|
+
const { collectDeferredWorkSources } = keepWorkingLib;
|
|
341
|
+
const detectLib = await import('./solve.keep-working.detect.lib.mjs');
|
|
342
|
+
const { detectDeferredWorkInSources } = detectLib;
|
|
343
|
+
|
|
344
|
+
const { resolveDefaultFallbackModel } = await import('./models/index.mjs');
|
|
345
|
+
|
|
346
|
+
const sentryLib = await import('./sentry.lib.mjs');
|
|
347
|
+
const { reportError } = sentryLib;
|
|
348
|
+
|
|
349
|
+
const { plan } = config;
|
|
350
|
+
|
|
351
|
+
await log('');
|
|
352
|
+
await log(`🆙 ESCALATE: ${config.from} → ${config.to} (steps: ${config.steps} working session(s) per tier)`);
|
|
353
|
+
await log(` Plan: ${formatEscalationPlan(plan)}`);
|
|
354
|
+
await log(' Strategy: solve cheaply first; escalate to a more capable model only while unfinished work remains.');
|
|
355
|
+
await log('');
|
|
356
|
+
|
|
357
|
+
// Get PR merge state status for the iterations
|
|
358
|
+
let currentMergeStateStatus = null;
|
|
359
|
+
try {
|
|
360
|
+
// `$` is wrapped via wrapDollarWithGhRetry above; the lazy import keeps this module
|
|
361
|
+
// network-free for tests, so the lint rule (which only detects top-level rebinds) can't see it.
|
|
362
|
+
// eslint-disable-next-line gh-rate-limit/no-direct-gh-exec -- $ is rate-limit-safe (wrapDollarWithGhRetry), rebound lazily on line 334.
|
|
363
|
+
const prStateResult = await $`gh api repos/${owner}/${repo}/pulls/${prNumber} --jq '.mergeStateStatus'`;
|
|
364
|
+
if (prStateResult.code === 0) {
|
|
365
|
+
currentMergeStateStatus = prStateResult.stdout.toString().trim();
|
|
366
|
+
}
|
|
367
|
+
} catch {
|
|
368
|
+
// Ignore errors getting merge state
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
let sessionId;
|
|
372
|
+
let anthropicTotalCostUSD;
|
|
373
|
+
let publicPricingEstimate;
|
|
374
|
+
let pricingInfo;
|
|
375
|
+
let lastResultSummary = resultSummary;
|
|
376
|
+
let consecutiveErrors = 0;
|
|
377
|
+
const MAX_CONSECUTIVE_ERRORS = 3;
|
|
378
|
+
let restartsRun = 0;
|
|
379
|
+
|
|
380
|
+
// The first regular solve session = plan index 0. Continue escalating from 1.
|
|
381
|
+
for (let sessionIndex = 1; sessionIndex < plan.length; sessionIndex++) {
|
|
382
|
+
const model = resolveEscalationModel(plan, sessionIndex);
|
|
383
|
+
const previousModel = resolveEscalationModel(plan, sessionIndex - 1);
|
|
384
|
+
|
|
385
|
+
// Decide whether the cheaper model already finished. Re-scan the PR
|
|
386
|
+
// description, AI solution summary and changed markdown documents for
|
|
387
|
+
// deferred/unfinished-work indicators (same signal as keep-working).
|
|
388
|
+
let sources = [];
|
|
389
|
+
try {
|
|
390
|
+
sources = await collectDeferredWorkSources({ owner, repo, prNumber, resultSummary: lastResultSummary });
|
|
391
|
+
} catch (error) {
|
|
392
|
+
reportError(error, { context: 'escalate_collect_sources', owner, repo, prNumber, operation: 'collect_sources' });
|
|
393
|
+
await log(`⚠️ ESCALATE: Could not collect sources to evaluate completion: ${cleanErrorMessage(error)}`, { level: 'warning' });
|
|
394
|
+
}
|
|
395
|
+
const detections = detectDeferredWorkInSources(sources);
|
|
396
|
+
|
|
397
|
+
if (detections.length === 0) {
|
|
398
|
+
await log(`✅ ESCALATE: No unfinished-work indicators after ${previousModel} session(s). Stopping before escalating to ${model}.`);
|
|
399
|
+
break;
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
await log('');
|
|
403
|
+
await log(`🆙 ESCALATE: ${detections.length} unfinished-work indicator(s) remain after ${previousModel}; escalating to ${model} (session ${sessionIndex + 1}/${plan.length}).`);
|
|
404
|
+
for (const detection of detections.slice(0, 10)) {
|
|
405
|
+
await log(` • [${detection.label}] in ${detection.source}: "${detection.snippet}"`);
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
// Sync local branch with remote before each iteration (issue #1572 pattern).
|
|
409
|
+
try {
|
|
410
|
+
const pullResult = await $({ cwd: tempDir })`git pull origin ${branchName} 2>&1`;
|
|
411
|
+
if (pullResult.code === 0) {
|
|
412
|
+
await log(` Synced local branch ${branchName} from remote`, { verbose: true });
|
|
413
|
+
} else {
|
|
414
|
+
await log(` Warning: git pull failed (code ${pullResult.code}); continuing with local state`, { level: 'warning' });
|
|
415
|
+
}
|
|
416
|
+
} catch (error) {
|
|
417
|
+
reportError(error, { context: 'escalate_git_pull', branchName, operation: 'git_pull' });
|
|
418
|
+
await log(` Warning: git pull error: ${cleanErrorMessage(error)}`, { level: 'warning' });
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
const feedbackLines = ['', '='.repeat(60), `🆙 ESCALATE MODE — now running on a more capable model (${model}):`, '='.repeat(60), '', `The previous working session(s) used "${previousModel}" but left unfinished work. You are a more capable model. Carefully review what has already been done, then finish every remaining requirement in this single pull request — do not defer, delay, or mark anything as out of scope. Ensure all changes are correct, consistent, validated, tested and that all CI/CD checks pass.`, ''];
|
|
422
|
+
|
|
423
|
+
const fallbackModel = resolveDefaultFallbackModel(tool, model) || undefined;
|
|
424
|
+
|
|
425
|
+
const iterationResult = await executeToolIteration({
|
|
426
|
+
issueUrl,
|
|
427
|
+
owner,
|
|
428
|
+
repo,
|
|
429
|
+
issueNumber,
|
|
430
|
+
prNumber,
|
|
431
|
+
branchName,
|
|
432
|
+
tempDir,
|
|
433
|
+
workspaceTmpDir,
|
|
434
|
+
mergeStateStatus: currentMergeStateStatus,
|
|
435
|
+
feedbackLines,
|
|
436
|
+
argv: {
|
|
437
|
+
...argv,
|
|
438
|
+
// Escalate to the next tier for this iteration.
|
|
439
|
+
model,
|
|
440
|
+
fallbackModel,
|
|
441
|
+
// Reinforce the "finish everything now" guidance in the system prompt.
|
|
442
|
+
promptEnsureAllRequirementsAreMet: true,
|
|
443
|
+
// Prevent recursive escalation inside the restart iteration.
|
|
444
|
+
escalate: undefined,
|
|
445
|
+
escalateFrom: undefined,
|
|
446
|
+
},
|
|
447
|
+
});
|
|
448
|
+
|
|
449
|
+
restartsRun++;
|
|
450
|
+
|
|
451
|
+
if (iterationResult) {
|
|
452
|
+
if (iterationResult.sessionId) sessionId = iterationResult.sessionId;
|
|
453
|
+
if (iterationResult.anthropicTotalCostUSD) anthropicTotalCostUSD = iterationResult.anthropicTotalCostUSD;
|
|
454
|
+
if (iterationResult.publicPricingEstimate) publicPricingEstimate = iterationResult.publicPricingEstimate;
|
|
455
|
+
if (iterationResult.pricingInfo) pricingInfo = iterationResult.pricingInfo;
|
|
456
|
+
if (iterationResult.result) lastResultSummary = iterationResult.result;
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
if (isUsageLimitReached(iterationResult)) {
|
|
460
|
+
await log('🛑 ESCALATE: Usage limit reached during restart. Stopping escalate loop.');
|
|
461
|
+
break;
|
|
462
|
+
}
|
|
463
|
+
if (isApiError(iterationResult)) {
|
|
464
|
+
consecutiveErrors++;
|
|
465
|
+
await log(`⚠️ ESCALATE: API error during ${model} restart (${consecutiveErrors}/${MAX_CONSECUTIVE_ERRORS} consecutive).`, { level: 'warning' });
|
|
466
|
+
if (consecutiveErrors >= MAX_CONSECUTIVE_ERRORS) {
|
|
467
|
+
await log('🛑 ESCALATE: Too many consecutive errors. Stopping escalate loop.');
|
|
468
|
+
break;
|
|
469
|
+
}
|
|
470
|
+
} else {
|
|
471
|
+
consecutiveErrors = 0;
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
await log(`✅ ESCALATE: ${model} session complete (${sessionIndex + 1}/${plan.length})`);
|
|
475
|
+
await log('');
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
// Clean up CLAUDE.md/.gitkeep after restarts
|
|
479
|
+
try {
|
|
480
|
+
await cleanupClaudeFile(tempDir, branchName, null, argv);
|
|
481
|
+
} catch (error) {
|
|
482
|
+
reportError(error, { context: 'escalate_cleanup', branchName, operation: 'cleanup_claude_file' });
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
if (restartsRun === 0) return null;
|
|
486
|
+
return { sessionId, anthropicTotalCostUSD, publicPricingEstimate, pricingInfo };
|
|
487
|
+
};
|
|
488
|
+
|
|
489
|
+
export default {
|
|
490
|
+
MODEL_ESCALATION_ORDER,
|
|
491
|
+
DEFAULT_ESCALATE_LOWER,
|
|
492
|
+
DEFAULT_ESCALATE_UPPER,
|
|
493
|
+
DEFAULT_ESCALATE_RANGE,
|
|
494
|
+
DEFAULT_ESCALATE_STEPS,
|
|
495
|
+
canonicalTier,
|
|
496
|
+
parseEscalateRange,
|
|
497
|
+
parseEscalateFrom,
|
|
498
|
+
normalizeEscalateSteps,
|
|
499
|
+
buildEscalationPlan,
|
|
500
|
+
resolveEscalationModel,
|
|
501
|
+
isEscalateEnabled,
|
|
502
|
+
resolveEscalationConfig,
|
|
503
|
+
formatEscalationPlan,
|
|
504
|
+
runEscalation,
|
|
505
|
+
};
|
package/src/solve.mjs
CHANGED
|
@@ -46,6 +46,7 @@ const { startWatchMode } = watchLib;
|
|
|
46
46
|
const { startAutoRestartUntilMergeable } = await import('./solve.auto-merge.lib.mjs');
|
|
47
47
|
const { runAutoEnsureRequirements } = await import('./solve.auto-ensure.lib.mjs');
|
|
48
48
|
const { runKeepWorkingUntilDone } = await import('./solve.keep-working.lib.mjs');
|
|
49
|
+
const { runEscalation } = await import('./solve.escalate.lib.mjs');
|
|
49
50
|
const exitHandler = await import('./exit-handler.lib.mjs');
|
|
50
51
|
const { initializeExitHandler, installGlobalExitHandlers, safeExit, logActiveHandles } = exitHandler;
|
|
51
52
|
const { createInterruptWrapper } = await import('./solve.interrupt.lib.mjs');
|
|
@@ -1270,10 +1271,9 @@ try {
|
|
|
1270
1271
|
await log('⚠️ PR title/description still not updated after restart');
|
|
1271
1272
|
}
|
|
1272
1273
|
}
|
|
1273
|
-
|
|
1274
|
-
|
|
1274
|
+
// Post-solve restart loops (escalate #1885 first, then finalize #1383, then keep-working #1883):
|
|
1275
|
+
applyRestartResult(await runEscalation({ issueUrl, owner, repo, issueNumber, prNumber, branchName, tempDir, workspaceTmpDir, argv, cleanupClaudeFile, resultSummary }));
|
|
1275
1276
|
applyRestartResult(await runAutoEnsureRequirements({ issueUrl, owner, repo, issueNumber, prNumber, branchName, tempDir, argv, cleanupClaudeFile }));
|
|
1276
|
-
// Issue #1883: --keep-working-until-all-requirements-are-fully-done (detect deferred work and auto-restart until done)
|
|
1277
1277
|
applyRestartResult(await runKeepWorkingUntilDone({ issueUrl, owner, repo, issueNumber, prNumber, branchName, tempDir, workspaceTmpDir, argv, cleanupClaudeFile, resultSummary }));
|
|
1278
1278
|
|
|
1279
1279
|
// Start watch mode if enabled OR if we need to handle uncommitted changes
|