@visorcraft/idlehands 2.0.0 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/context-budget.js +103 -0
- package/dist/agent/context-budget.js.map +1 -0
- package/dist/agent/tool-loop-detection.js +91 -20
- package/dist/agent/tool-loop-detection.js.map +1 -1
- package/dist/agent.js +55 -11
- package/dist/agent.js.map +1 -1
- package/dist/anton/controller.js +512 -186
- package/dist/anton/controller.js.map +1 -1
- package/dist/anton/preflight.js +52 -24
- package/dist/anton/preflight.js.map +1 -1
- package/dist/anton/session.js +6 -0
- package/dist/anton/session.js.map +1 -1
- package/dist/bot/anton-run.js +16 -5
- package/dist/bot/anton-run.js.map +1 -1
- package/dist/bot/discord-commands.js +25 -0
- package/dist/bot/discord-commands.js.map +1 -1
- package/dist/bot/discord.js +28 -0
- package/dist/bot/discord.js.map +1 -1
- package/dist/bot/format.js +0 -5
- package/dist/bot/format.js.map +1 -1
- package/dist/bot/telegram-commands.js +21 -0
- package/dist/bot/telegram-commands.js.map +1 -1
- package/dist/bot/telegram.js +3 -1
- package/dist/bot/telegram.js.map +1 -1
- package/dist/bot/upgrade-command.js +398 -0
- package/dist/bot/upgrade-command.js.map +1 -0
- package/dist/bot/ux/shared-formatter.js +43 -0
- package/dist/bot/ux/shared-formatter.js.map +1 -0
- package/dist/cli/commands/upgrade.js +27 -0
- package/dist/cli/commands/upgrade.js.map +1 -0
- package/dist/history.js +418 -0
- package/dist/history.js.map +1 -1
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -1
- package/dist/tui/command-handler.js +2 -0
- package/dist/tui/command-handler.js.map +1 -1
- package/dist/vault.js +133 -0
- package/dist/vault.js.map +1 -1
- package/package.json +1 -1
package/dist/anton/controller.js
CHANGED
|
@@ -4,17 +4,182 @@
|
|
|
4
4
|
* Coordinates all components: parser, prompt, verifier, lock, git, session.
|
|
5
5
|
* Structured as a deterministic orchestration flow for autonomous task execution.
|
|
6
6
|
*/
|
|
7
|
+
import * as fs from 'fs';
|
|
8
|
+
import * as path from 'path';
|
|
7
9
|
import { isToolLoopBreak, AUTO_CONTINUE_PROMPT } from '../bot/auto-continue.js';
|
|
8
10
|
import { ensureCleanWorkingTree, getWorkingDiff, commitAll, restoreTrackedChanges, cleanUntracked, createBranch, getUntrackedFiles, removeUntrackedFiles, } from '../git.js';
|
|
9
11
|
import { estimateTokens } from '../utils.js';
|
|
10
12
|
import { acquireAntonLock, releaseAntonLock, touchAntonLock } from './lock.js';
|
|
11
13
|
import { parseTaskFile, findRunnablePendingTasks, markTaskChecked, insertSubTasks, autoCompleteAncestors, } from './parser.js';
|
|
12
|
-
import { ensureAgentsTasksDir, makeUniqueTaskPlanFilename, buildDiscoveryPrompt, parseDiscoveryResult, buildRequirementsReviewPrompt, parseRequirementsReviewResult, ensurePlanFileExistsOrBootstrap, } from './preflight.js';
|
|
14
|
+
import { ensureAgentsTasksDir, makeUniqueTaskPlanFilename, buildDiscoveryPrompt, parseDiscoveryResult, buildRequirementsReviewPrompt, parseRequirementsReviewResult, ensurePlanFileExistsOrBootstrap, FORCE_DISCOVERY_DECISION_PROMPT, FORCE_REVIEW_DECISION_PROMPT, } from './preflight.js';
|
|
13
15
|
import { buildAntonPrompt, parseAntonResult, classifyTaskComplexity } from './prompt.js';
|
|
14
16
|
import { formatDryRunPlan } from './reporter.js';
|
|
15
17
|
import { classifyInfraError, ensureAntonRuntimeReady } from './runtime-ready.js';
|
|
16
18
|
import { buildSessionConfig, buildPreflightConfig, buildDecomposeConfig, buildVerifyConfig, defaultCreateSession, } from './session.js';
|
|
17
19
|
import { captureLintBaseline, detectVerificationCommands, runVerification } from './verifier.js';
|
|
20
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
21
|
+
// L2 Retry Enhancement Helpers
|
|
22
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
23
|
+
/**
|
|
24
|
+
* Extract file paths mentioned in an L2 failure reason.
|
|
25
|
+
* Looks for patterns like: app/Models/Channel.php, src/foo/bar.ts, etc.
|
|
26
|
+
*/
|
|
27
|
+
function extractFilePathsFromL2Reason(reason) {
|
|
28
|
+
const patterns = [
|
|
29
|
+
// PHP/Laravel style: app/Models/Channel.php, app/Http/Controllers/Foo.php
|
|
30
|
+
/\b(app\/[\w\/]+\.php)\b/gi,
|
|
31
|
+
// General file paths with extensions
|
|
32
|
+
/\b((?:src|lib|tests?)\/[\w\/.-]+\.\w+)\b/gi,
|
|
33
|
+
// Model names that can be mapped to files: "Channel model" -> app/Models/Channel.php
|
|
34
|
+
/\b(\w+)\s+model\b/gi,
|
|
35
|
+
];
|
|
36
|
+
const found = new Set();
|
|
37
|
+
for (const pattern of patterns) {
|
|
38
|
+
const matches = reason.matchAll(pattern);
|
|
39
|
+
for (const match of matches) {
|
|
40
|
+
const p = match[1];
|
|
41
|
+
// If it's a model name reference like "Channel model", convert to path
|
|
42
|
+
if (/model$/i.test(match[0]) && !/\.php$/i.test(p)) {
|
|
43
|
+
found.add(`app/Models/${p}.php`);
|
|
44
|
+
}
|
|
45
|
+
else {
|
|
46
|
+
found.add(p);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
return [...found];
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Detect if L2 reason indicates a "missing implementation" pattern.
|
|
54
|
+
* Returns true if the model wrote tests but forgot the actual implementation.
|
|
55
|
+
*/
|
|
56
|
+
function isL2MissingImplementation(reason) {
|
|
57
|
+
const missingPatterns = [
|
|
58
|
+
/missing\s+(?:from|in)\s+/i,
|
|
59
|
+
/no\s+(?:corresponding|evidence|actual)/i,
|
|
60
|
+
/relationship\s+(?:method\s+)?is\s+missing/i,
|
|
61
|
+
/but\s+(?:the|there['']?s?\s+no)/i,
|
|
62
|
+
/tests?\s+(?:expect|added|written).*but/i,
|
|
63
|
+
/should\s+be\s+(?:hasMany|hasOne|belongsTo|morphMany)/i,
|
|
64
|
+
];
|
|
65
|
+
return missingPatterns.some((p) => p.test(reason));
|
|
66
|
+
}
|
|
67
|
+
function isRecoverablePreflightDiscoveryError(errMsg) {
|
|
68
|
+
return (/preflight-json-missing-object|preflight-discovery-invalid-status|preflight-discovery-invalid-filename|preflight-discovery-filename/i.test(errMsg) || /identical call repeated|breaking loop|tool\s+edit_range/i.test(errMsg));
|
|
69
|
+
}
|
|
70
|
+
function isRecoverablePreflightReviewError(errMsg) {
|
|
71
|
+
return /preflight-json-missing-object|preflight-review-invalid-status|preflight-review-invalid-filename|preflight-review-filename/i.test(errMsg);
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Try to read a file's contents for injection into retry context.
|
|
75
|
+
* Returns null if file doesn't exist or is too large.
|
|
76
|
+
*/
|
|
77
|
+
function readFileForL2Injection(projectDir, filePath) {
|
|
78
|
+
const MAX_FILE_SIZE = 15000; // ~15KB, reasonable for injection
|
|
79
|
+
try {
|
|
80
|
+
const fullPath = path.resolve(projectDir, filePath);
|
|
81
|
+
if (!fs.existsSync(fullPath))
|
|
82
|
+
return null;
|
|
83
|
+
const stat = fs.statSync(fullPath);
|
|
84
|
+
if (stat.size > MAX_FILE_SIZE)
|
|
85
|
+
return null;
|
|
86
|
+
return fs.readFileSync(fullPath, 'utf8');
|
|
87
|
+
}
|
|
88
|
+
catch {
|
|
89
|
+
return null;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Build enhanced retry context when L2 fails due to missing implementation.
|
|
94
|
+
* - On first L2 failure: Add strong guidance about which files to modify
|
|
95
|
+
* - On 2+ L2 failures: Inject the actual file contents so model can see what's missing
|
|
96
|
+
*/
|
|
97
|
+
function buildL2EnhancedRetryContext(l2Reason, l2FailCount, projectDir, taskText) {
|
|
98
|
+
const parts = [];
|
|
99
|
+
const filePaths = extractFilePathsFromL2Reason(l2Reason);
|
|
100
|
+
const isMissingImpl = isL2MissingImplementation(l2Reason);
|
|
101
|
+
if (!isMissingImpl || filePaths.length === 0) {
|
|
102
|
+
// Not a "missing implementation" pattern, no enhancement needed
|
|
103
|
+
return '';
|
|
104
|
+
}
|
|
105
|
+
parts.push('');
|
|
106
|
+
parts.push('═══════════════════════════════════════════════════════════════════════');
|
|
107
|
+
parts.push('⚠️ CRITICAL: AI REVIEW FAILED — MISSING IMPLEMENTATION DETECTED');
|
|
108
|
+
parts.push('═══════════════════════════════════════════════════════════════════════');
|
|
109
|
+
parts.push('');
|
|
110
|
+
parts.push(`The AI review found that you wrote tests but FORGOT THE ACTUAL IMPLEMENTATION.`);
|
|
111
|
+
parts.push(`Task: "${taskText}"`);
|
|
112
|
+
parts.push('');
|
|
113
|
+
parts.push('YOU MUST MODIFY THESE FILES:');
|
|
114
|
+
for (const fp of filePaths) {
|
|
115
|
+
parts.push(` → ${fp}`);
|
|
116
|
+
}
|
|
117
|
+
parts.push('');
|
|
118
|
+
// After 2+ identical L2 failures, inject file contents
|
|
119
|
+
if (l2FailCount >= 2) {
|
|
120
|
+
parts.push('Since you have failed this verification multiple times, here are the current');
|
|
121
|
+
parts.push('contents of the files you need to modify:');
|
|
122
|
+
parts.push('');
|
|
123
|
+
for (const fp of filePaths) {
|
|
124
|
+
const contents = readFileForL2Injection(projectDir, fp);
|
|
125
|
+
if (contents !== null) {
|
|
126
|
+
parts.push(`┌─── ${fp} ───`);
|
|
127
|
+
parts.push(contents);
|
|
128
|
+
parts.push(`└─── end of ${fp} ───`);
|
|
129
|
+
parts.push('');
|
|
130
|
+
}
|
|
131
|
+
else {
|
|
132
|
+
parts.push(`[Could not read ${fp} — file may not exist or is too large]`);
|
|
133
|
+
parts.push('');
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
parts.push('INSTRUCTIONS:');
|
|
138
|
+
parts.push('1. READ the files listed above (they are your existing code)');
|
|
139
|
+
parts.push('2. ADD the missing method/relationship to the model file');
|
|
140
|
+
parts.push('3. Do NOT just modify tests — the MODEL/SOURCE file must change');
|
|
141
|
+
parts.push('4. The L2 review expects to see your implementation in the diff');
|
|
142
|
+
parts.push('');
|
|
143
|
+
return parts.join('\n');
|
|
144
|
+
}
|
|
145
|
+
const ANTON_RESULT_SYSTEM_CONTRACT = `[Anton output contract]
|
|
146
|
+
Every final implementation/decompose answer MUST contain exactly one structured block:
|
|
147
|
+
<anton-result>
|
|
148
|
+
status: done|failed|blocked|decompose
|
|
149
|
+
reason: <optional>
|
|
150
|
+
subtasks:
|
|
151
|
+
- <only when status=decompose>
|
|
152
|
+
</anton-result>
|
|
153
|
+
Do not omit this block.`;
|
|
154
|
+
const STRUCTURED_RESULT_RECOVERY_PROMPT = `Your previous reply did not include a valid <anton-result> block.
|
|
155
|
+
Do NOT call tools.
|
|
156
|
+
Return ONLY this block shape and nothing else:
|
|
157
|
+
<anton-result>
|
|
158
|
+
status: done|failed|blocked|decompose
|
|
159
|
+
reason: <optional>
|
|
160
|
+
subtasks:
|
|
161
|
+
- <only when status=decompose>
|
|
162
|
+
</anton-result>`;
|
|
163
|
+
function isStructuredResultParseFailure(reason) {
|
|
164
|
+
if (!reason)
|
|
165
|
+
return false;
|
|
166
|
+
return (reason === 'Agent did not emit structured result' ||
|
|
167
|
+
reason === 'No status line found in result block' ||
|
|
168
|
+
reason.startsWith('Unknown status:'));
|
|
169
|
+
}
|
|
170
|
+
function injectAntonResultContract(session) {
|
|
171
|
+
try {
|
|
172
|
+
const current = String(session.getSystemPrompt?.() ?? '').trim();
|
|
173
|
+
if (!current)
|
|
174
|
+
return;
|
|
175
|
+
if (current.includes('<anton-result>') || current.includes('[Anton output contract]'))
|
|
176
|
+
return;
|
|
177
|
+
session.setSystemPrompt(`${current}\n\n${ANTON_RESULT_SYSTEM_CONTRACT}`);
|
|
178
|
+
}
|
|
179
|
+
catch {
|
|
180
|
+
// best effort
|
|
181
|
+
}
|
|
182
|
+
}
|
|
18
183
|
export async function runAnton(opts) {
|
|
19
184
|
const { config, idlehandsConfig, progress, abortSignal, apiKey, vault, lens } = opts;
|
|
20
185
|
const createSessionFn = opts.createSession || defaultCreateSession;
|
|
@@ -31,6 +196,7 @@ export async function runAnton(opts) {
|
|
|
31
196
|
const taskRetryCount = new Map();
|
|
32
197
|
const lastFailureReason = new Map();
|
|
33
198
|
const consecutiveIdenticalCount = new Map();
|
|
199
|
+
const l2FailCount = new Map(); // Track consecutive L2 failures per task
|
|
34
200
|
let lockHeartbeatTimer = null;
|
|
35
201
|
// SIGINT handler
|
|
36
202
|
const handleAbort = () => {
|
|
@@ -131,8 +297,15 @@ export async function runAnton(opts) {
|
|
|
131
297
|
parts.push('- Test command failed');
|
|
132
298
|
if (v.l1_lint === false)
|
|
133
299
|
parts.push('- Lint command failed');
|
|
134
|
-
if (v.l2_ai === false && v.l2_reason)
|
|
300
|
+
if (v.l2_ai === false && v.l2_reason) {
|
|
135
301
|
parts.push(`- AI review: ${v.l2_reason}`);
|
|
302
|
+
// Enhanced L2 retry context: stronger guidance + file injection on repeated failures
|
|
303
|
+
const currentL2Count = l2FailCount.get(currentTask.key) || 0;
|
|
304
|
+
const l2Enhancement = buildL2EnhancedRetryContext(v.l2_reason, currentL2Count, config.projectDir, currentTask.text);
|
|
305
|
+
if (l2Enhancement) {
|
|
306
|
+
parts.push(l2Enhancement);
|
|
307
|
+
}
|
|
308
|
+
}
|
|
136
309
|
// Include error output (filtered to errors only, no warnings) so the
|
|
137
310
|
// agent can see and fix the exact issues.
|
|
138
311
|
if (v.commandOutput) {
|
|
@@ -241,171 +414,117 @@ export async function runAnton(opts) {
|
|
|
241
414
|
let discoveryOk = false;
|
|
242
415
|
await ensureAgentsTasksDir(config.projectDir);
|
|
243
416
|
const plannedFilePath = taskPlanByTaskKey.get(currentTask.key) ?? makeUniqueTaskPlanFilename(config.projectDir);
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
discoverySession = await createSessionFn(buildPreflightConfig(idlehandsConfig, config, discoveryTimeoutSec, discoveryIterationCap), apiKey);
|
|
254
|
-
const discoveryPrompt = buildDiscoveryPrompt({
|
|
255
|
-
task: currentTask,
|
|
256
|
-
taskFilePath: config.taskFile,
|
|
257
|
-
projectDir: config.projectDir,
|
|
258
|
-
planFilePath: plannedFilePath,
|
|
259
|
-
});
|
|
260
|
-
const discoveryRes = await Promise.race([
|
|
261
|
-
discoverySession.ask(discoveryPrompt),
|
|
262
|
-
new Promise((_, reject) => setTimeout(() => {
|
|
263
|
-
try {
|
|
264
|
-
discoverySession?.cancel();
|
|
265
|
-
}
|
|
266
|
-
catch {
|
|
267
|
-
// best effort
|
|
268
|
-
}
|
|
269
|
-
reject(new Error('preflight-discovery-timeout'));
|
|
270
|
-
}, discoveryTimeoutMs)),
|
|
271
|
-
]);
|
|
272
|
-
const discoveryTokens = discoverySession.usage.prompt + discoverySession.usage.completion;
|
|
273
|
-
totalTokens += discoveryTokens;
|
|
274
|
-
const discovery = parseDiscoveryResult(discoveryRes.text, config.projectDir);
|
|
275
|
-
preflightRecords.push({
|
|
276
|
-
taskKey: currentTask.key,
|
|
277
|
-
stage: 'discovery',
|
|
278
|
-
durationMs: Date.now() - stageStart,
|
|
279
|
-
tokensUsed: discoveryTokens,
|
|
280
|
-
status: discovery.status,
|
|
281
|
-
filename: discovery.filename || undefined,
|
|
282
|
-
});
|
|
283
|
-
if (discovery.status === 'complete') {
|
|
284
|
-
await markTaskChecked(config.taskFile, currentTask.key);
|
|
285
|
-
await autoCompleteAncestors(config.taskFile, currentTask.key);
|
|
286
|
-
autoCompleted += 1;
|
|
287
|
-
progress.onStage?.(`✅ Discovery confirmed already complete: ${currentTask.text}`);
|
|
288
|
-
preflightMarkedComplete = true;
|
|
289
|
-
discoveryOk = true;
|
|
290
|
-
break;
|
|
291
|
-
}
|
|
292
|
-
const discoveryPlanState = await ensurePlanFileExistsOrBootstrap({
|
|
293
|
-
absPath: discovery.filename,
|
|
294
|
-
task: currentTask,
|
|
295
|
-
source: 'discovery',
|
|
296
|
-
});
|
|
297
|
-
if (discoveryPlanState === 'bootstrapped') {
|
|
298
|
-
progress.onStage?.(`⚠️ Discovery returned a filename but did not write it. Created fallback plan file: ${discovery.filename}`);
|
|
299
|
-
}
|
|
300
|
-
taskPlanByTaskKey.set(currentTask.key, discovery.filename);
|
|
301
|
-
progress.onStage?.(`📝 Discovery plan file: ${discovery.filename}`);
|
|
302
|
-
discoveryOk = true;
|
|
303
|
-
break;
|
|
304
|
-
}
|
|
305
|
-
catch (error) {
|
|
306
|
-
const errMsg = error instanceof Error ? error.message : String(error);
|
|
307
|
-
const timeout = /timeout/i.test(errMsg);
|
|
308
|
-
preflightRecords.push({
|
|
309
|
-
taskKey: currentTask.key,
|
|
310
|
-
stage: 'discovery',
|
|
311
|
-
durationMs: Date.now() - stageStart,
|
|
312
|
-
tokensUsed: 0,
|
|
313
|
-
status: timeout ? 'timeout' : 'error',
|
|
314
|
-
error: errMsg,
|
|
315
|
-
});
|
|
316
|
-
if (discoveryTry < preflightMaxRetries) {
|
|
317
|
-
const short = errMsg.length > 180 ? `${errMsg.slice(0, 177)}...` : errMsg;
|
|
318
|
-
if (/max iterations exceeded/i.test(errMsg)) {
|
|
319
|
-
const nextCap = Math.min(Math.max(discoveryIterationCap * 2, discoveryIterationCap + 2), 1000);
|
|
320
|
-
if (nextCap > discoveryIterationCap) {
|
|
321
|
-
progress.onStage?.(`⚠️ Discovery hit max iterations (${discoveryIterationCap}). Increasing preflight cap to ${nextCap} and retrying...`);
|
|
322
|
-
discoveryIterationCap = nextCap;
|
|
323
|
-
continue;
|
|
324
|
-
}
|
|
325
|
-
}
|
|
326
|
-
progress.onStage?.(`⚠️ Discovery failed (${discoveryTry + 1}/${preflightTotalTries}): ${short}. Retrying discovery...`);
|
|
327
|
-
continue;
|
|
328
|
-
}
|
|
329
|
-
const preflightAttempt = {
|
|
330
|
-
taskKey: currentTask.key,
|
|
331
|
-
taskText: currentTask.text,
|
|
332
|
-
attempt: attemptNumber,
|
|
333
|
-
durationMs: Date.now() - stageStart,
|
|
334
|
-
tokensUsed: 0,
|
|
335
|
-
status: timeout ? 'timeout' : 'error',
|
|
336
|
-
verification: undefined,
|
|
337
|
-
error: `preflight-error(discovery): ${errMsg}`,
|
|
338
|
-
commitHash: undefined,
|
|
339
|
-
};
|
|
340
|
-
attempts.push(preflightAttempt);
|
|
341
|
-
taskRetryCount.set(currentTask.key, retries + 1);
|
|
342
|
-
if (!config.skipOnFail)
|
|
343
|
-
break mainLoop;
|
|
344
|
-
}
|
|
345
|
-
finally {
|
|
417
|
+
// Default to 50 iterations for discovery (was 500 - way too high for a simple JSON check)
|
|
418
|
+
let discoveryIterationCap = Math.max(1, Math.floor(config.preflightSessionMaxIterations ?? 50));
|
|
419
|
+
let discoveryRetryHint;
|
|
420
|
+
// Shared preflight session - reused between discovery and review stages to avoid
|
|
421
|
+
// session creation overhead. Created lazily, closed on error (for fresh retry state)
|
|
422
|
+
// or at end of preflight block.
|
|
423
|
+
let preflightSession;
|
|
424
|
+
const closePreflightSession = async () => {
|
|
425
|
+
if (preflightSession) {
|
|
346
426
|
try {
|
|
347
|
-
await
|
|
427
|
+
await preflightSession.close();
|
|
348
428
|
}
|
|
349
429
|
catch {
|
|
350
430
|
// best effort
|
|
351
431
|
}
|
|
432
|
+
preflightSession = undefined;
|
|
352
433
|
}
|
|
353
|
-
}
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
}
|
|
358
|
-
if (!discoveryOk) {
|
|
359
|
-
continue;
|
|
360
|
-
}
|
|
361
|
-
// Stage 2: requirements review (retry review only; keep same plan file).
|
|
362
|
-
if (config.preflightRequirementsReview) {
|
|
363
|
-
const reviewPlanFile = taskPlanByTaskKey.get(currentTask.key) ?? plannedFilePath;
|
|
364
|
-
let reviewOk = false;
|
|
365
|
-
let reviewIterationCap = Math.max(1, Math.floor(config.preflightSessionMaxIterations ?? 500));
|
|
366
|
-
for (let reviewTry = 0; reviewTry <= preflightMaxRetries; reviewTry++) {
|
|
434
|
+
};
|
|
435
|
+
try {
|
|
436
|
+
// Stage 1: discovery (retry discovery only).
|
|
437
|
+
for (let discoveryTry = 0; discoveryTry <= preflightMaxRetries; discoveryTry++) {
|
|
367
438
|
const stageStart = Date.now();
|
|
368
|
-
const
|
|
369
|
-
const
|
|
370
|
-
let reviewSession;
|
|
439
|
+
const discoveryTimeoutSec = config.preflightDiscoveryTimeoutSec ?? config.taskTimeoutSec;
|
|
440
|
+
const discoveryTimeoutMs = discoveryTimeoutSec * 1000;
|
|
371
441
|
try {
|
|
372
|
-
progress.onStage?.('
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
442
|
+
progress.onStage?.('🔎 Discovery: checking if already done...');
|
|
443
|
+
// Create session if not already open (first try or after error closed it)
|
|
444
|
+
if (!preflightSession) {
|
|
445
|
+
preflightSession = await createSessionFn(buildPreflightConfig(idlehandsConfig, config, discoveryTimeoutSec, discoveryIterationCap), apiKey);
|
|
446
|
+
}
|
|
447
|
+
const discoveryPrompt = buildDiscoveryPrompt({
|
|
448
|
+
task: currentTask,
|
|
449
|
+
taskFilePath: config.taskFile,
|
|
450
|
+
projectDir: config.projectDir,
|
|
451
|
+
planFilePath: plannedFilePath,
|
|
452
|
+
retryHint: discoveryRetryHint,
|
|
453
|
+
});
|
|
454
|
+
let discoveryTimeoutHandle;
|
|
455
|
+
const discoveryRes = await Promise.race([
|
|
456
|
+
preflightSession.ask(discoveryPrompt).finally(() => clearTimeout(discoveryTimeoutHandle)),
|
|
457
|
+
new Promise((_, reject) => {
|
|
458
|
+
discoveryTimeoutHandle = setTimeout(() => {
|
|
459
|
+
try {
|
|
460
|
+
preflightSession?.cancel();
|
|
461
|
+
}
|
|
462
|
+
catch {
|
|
463
|
+
// best effort
|
|
464
|
+
}
|
|
465
|
+
reject(new Error('preflight-discovery-timeout'));
|
|
466
|
+
}, discoveryTimeoutMs);
|
|
467
|
+
}),
|
|
468
|
+
]);
|
|
469
|
+
let discoveryTokens = preflightSession.usage.prompt + preflightSession.usage.completion;
|
|
470
|
+
totalTokens += discoveryTokens;
|
|
471
|
+
// Try to parse discovery result; if invalid JSON, attempt force-decision prompt
|
|
472
|
+
let discovery;
|
|
473
|
+
try {
|
|
474
|
+
discovery = parseDiscoveryResult(discoveryRes.text, config.projectDir);
|
|
475
|
+
}
|
|
476
|
+
catch (parseError) {
|
|
477
|
+
const parseErrMsg = parseError instanceof Error ? parseError.message : String(parseError);
|
|
478
|
+
// Only try force-decision for JSON/format errors, not file path errors
|
|
479
|
+
if (/preflight-json-missing-object|preflight-discovery-invalid/i.test(parseErrMsg)) {
|
|
480
|
+
progress.onStage?.('⚠️ Discovery output invalid, requesting forced decision...');
|
|
378
481
|
try {
|
|
379
|
-
|
|
482
|
+
const forceRes = await preflightSession.ask(FORCE_DISCOVERY_DECISION_PROMPT);
|
|
483
|
+
const forceTokens = preflightSession.usage.prompt + preflightSession.usage.completion - discoveryTokens;
|
|
484
|
+
discoveryTokens += forceTokens;
|
|
485
|
+
totalTokens += forceTokens;
|
|
486
|
+
discovery = parseDiscoveryResult(forceRes.text, config.projectDir);
|
|
487
|
+
progress.onStage?.('✅ Forced decision succeeded');
|
|
380
488
|
}
|
|
381
|
-
catch {
|
|
382
|
-
//
|
|
489
|
+
catch (forceError) {
|
|
490
|
+
// Force-decision also failed, throw original error
|
|
491
|
+
throw parseError;
|
|
383
492
|
}
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
totalTokens += reviewTokens;
|
|
389
|
-
const review = parseRequirementsReviewResult(reviewRes.text, config.projectDir);
|
|
390
|
-
const reviewPlanState = await ensurePlanFileExistsOrBootstrap({
|
|
391
|
-
absPath: review.filename,
|
|
392
|
-
task: currentTask,
|
|
393
|
-
source: 'requirements-review',
|
|
394
|
-
});
|
|
395
|
-
if (reviewPlanState === 'bootstrapped') {
|
|
396
|
-
progress.onStage?.(`⚠️ Requirements review returned a filename but did not write it. Created fallback plan file: ${review.filename}`);
|
|
493
|
+
}
|
|
494
|
+
else {
|
|
495
|
+
throw parseError;
|
|
496
|
+
}
|
|
397
497
|
}
|
|
398
498
|
preflightRecords.push({
|
|
399
499
|
taskKey: currentTask.key,
|
|
400
|
-
stage: '
|
|
500
|
+
stage: 'discovery',
|
|
401
501
|
durationMs: Date.now() - stageStart,
|
|
402
|
-
tokensUsed:
|
|
403
|
-
status:
|
|
404
|
-
filename:
|
|
502
|
+
tokensUsed: discoveryTokens,
|
|
503
|
+
status: discovery.status,
|
|
504
|
+
filename: discovery.filename || undefined,
|
|
505
|
+
});
|
|
506
|
+
if (discovery.status === 'complete') {
|
|
507
|
+
await markTaskChecked(config.taskFile, currentTask.key);
|
|
508
|
+
await autoCompleteAncestors(config.taskFile, currentTask.key);
|
|
509
|
+
autoCompleted += 1;
|
|
510
|
+
progress.onStage?.(`✅ Discovery confirmed already complete: ${currentTask.text}`);
|
|
511
|
+
preflightMarkedComplete = true;
|
|
512
|
+
discoveryOk = true;
|
|
513
|
+
// No review needed - close session now
|
|
514
|
+
await closePreflightSession();
|
|
515
|
+
break;
|
|
516
|
+
}
|
|
517
|
+
const discoveryPlanState = await ensurePlanFileExistsOrBootstrap({
|
|
518
|
+
absPath: discovery.filename,
|
|
519
|
+
task: currentTask,
|
|
520
|
+
source: 'discovery',
|
|
405
521
|
});
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
522
|
+
if (discoveryPlanState === 'bootstrapped') {
|
|
523
|
+
progress.onStage?.(`⚠️ Discovery returned a filename but did not write it. Created fallback plan file: ${discovery.filename}`);
|
|
524
|
+
}
|
|
525
|
+
taskPlanByTaskKey.set(currentTask.key, discovery.filename);
|
|
526
|
+
progress.onStage?.(`📝 Discovery plan file: ${discovery.filename}`);
|
|
527
|
+
discoveryOk = true;
|
|
409
528
|
break;
|
|
410
529
|
}
|
|
411
530
|
catch (error) {
|
|
@@ -413,53 +532,227 @@ export async function runAnton(opts) {
|
|
|
413
532
|
const timeout = /timeout/i.test(errMsg);
|
|
414
533
|
preflightRecords.push({
|
|
415
534
|
taskKey: currentTask.key,
|
|
416
|
-
stage: '
|
|
535
|
+
stage: 'discovery',
|
|
417
536
|
durationMs: Date.now() - stageStart,
|
|
418
537
|
tokensUsed: 0,
|
|
419
538
|
status: timeout ? 'timeout' : 'error',
|
|
420
539
|
error: errMsg,
|
|
421
540
|
});
|
|
422
|
-
|
|
423
|
-
|
|
541
|
+
const short = errMsg.length > 180 ? `${errMsg.slice(0, 177)}...` : errMsg;
|
|
542
|
+
discoveryRetryHint = `Previous discovery attempt failed: ${short}. Do not edit source files. Only update ${plannedFilePath} and return strict JSON.`;
|
|
543
|
+
// If discovery returns malformed/non-JSON output (or loops on source edits),
|
|
544
|
+
// degrade immediately to fallback plan instead of burning retries.
|
|
545
|
+
if (isRecoverablePreflightDiscoveryError(errMsg)) {
|
|
546
|
+
const fallbackState = await ensurePlanFileExistsOrBootstrap({
|
|
547
|
+
absPath: plannedFilePath,
|
|
548
|
+
task: currentTask,
|
|
549
|
+
source: 'discovery',
|
|
550
|
+
});
|
|
551
|
+
if (fallbackState === 'bootstrapped') {
|
|
552
|
+
progress.onStage?.(`⚠️ Discovery returned invalid output (${short}). Bootstrapped fallback plan and continuing: ${plannedFilePath}`);
|
|
553
|
+
}
|
|
554
|
+
else {
|
|
555
|
+
progress.onStage?.(`⚠️ Discovery returned invalid output (${short}). Reusing existing plan and continuing: ${plannedFilePath}`);
|
|
556
|
+
}
|
|
557
|
+
taskPlanByTaskKey.set(currentTask.key, plannedFilePath);
|
|
558
|
+
discoveryOk = true;
|
|
559
|
+
break;
|
|
560
|
+
}
|
|
561
|
+
if (discoveryTry < preflightMaxRetries) {
|
|
562
|
+
// Close session on error so retry gets fresh state
|
|
563
|
+
await closePreflightSession();
|
|
424
564
|
if (/max iterations exceeded/i.test(errMsg)) {
|
|
425
|
-
const nextCap = Math.min(Math.max(
|
|
426
|
-
if (nextCap >
|
|
427
|
-
progress.onStage?.(`⚠️
|
|
428
|
-
|
|
565
|
+
const nextCap = Math.min(Math.max(discoveryIterationCap * 2, discoveryIterationCap + 2), 1000);
|
|
566
|
+
if (nextCap > discoveryIterationCap) {
|
|
567
|
+
progress.onStage?.(`⚠️ Discovery hit max iterations (${discoveryIterationCap}). Increasing preflight cap to ${nextCap} and retrying...`);
|
|
568
|
+
discoveryIterationCap = nextCap;
|
|
429
569
|
continue;
|
|
430
570
|
}
|
|
431
571
|
}
|
|
432
|
-
progress.onStage?.(`⚠️
|
|
572
|
+
progress.onStage?.(`⚠️ Discovery failed (${discoveryTry + 1}/${preflightTotalTries}): ${short}. Retrying discovery...`);
|
|
433
573
|
continue;
|
|
434
574
|
}
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
commitHash: undefined,
|
|
445
|
-
};
|
|
446
|
-
attempts.push(preflightAttempt);
|
|
447
|
-
taskRetryCount.set(currentTask.key, retries + 1);
|
|
448
|
-
if (!config.skipOnFail)
|
|
449
|
-
break mainLoop;
|
|
450
|
-
}
|
|
451
|
-
finally {
|
|
452
|
-
try {
|
|
453
|
-
await reviewSession?.close();
|
|
575
|
+
// Final discovery failure: degrade gracefully by bootstrapping a fallback plan file
|
|
576
|
+
// so Anton can still proceed to implementation/review instead of hard-failing task 1.
|
|
577
|
+
const fallbackState = await ensurePlanFileExistsOrBootstrap({
|
|
578
|
+
absPath: plannedFilePath,
|
|
579
|
+
task: currentTask,
|
|
580
|
+
source: 'discovery',
|
|
581
|
+
});
|
|
582
|
+
if (fallbackState === 'bootstrapped') {
|
|
583
|
+
progress.onStage?.(`⚠️ Discovery failed after ${preflightTotalTries} tries (${short}). Bootstrapped fallback plan and continuing: ${plannedFilePath}`);
|
|
454
584
|
}
|
|
455
|
-
|
|
456
|
-
|
|
585
|
+
else {
|
|
586
|
+
progress.onStage?.(`⚠️ Discovery failed after ${preflightTotalTries} tries (${short}). Reusing existing plan and continuing: ${plannedFilePath}`);
|
|
457
587
|
}
|
|
588
|
+
taskPlanByTaskKey.set(currentTask.key, plannedFilePath);
|
|
589
|
+
discoveryOk = true;
|
|
590
|
+
break;
|
|
458
591
|
}
|
|
592
|
+
// Note: session stays open for reuse in review stage (closed at end of preflight block)
|
|
593
|
+
}
|
|
594
|
+
// Discovery already marked complete -> next task.
|
|
595
|
+
if (preflightMarkedComplete) {
|
|
596
|
+
continue;
|
|
459
597
|
}
|
|
460
|
-
if (!
|
|
598
|
+
if (!discoveryOk) {
|
|
461
599
|
continue;
|
|
462
600
|
}
|
|
601
|
+
// Stage 2: requirements review (retry review only; keep same plan file).
|
|
602
|
+
// NOTE: Discovery prompt now includes review instructions, producing a "reviewed" plan.
|
|
603
|
+
// Separate review stage is skipped by default to save an LLM round-trip.
|
|
604
|
+
// Set preflightRequirementsReview=true AND preflightSeparateReview=true to force separate review.
|
|
605
|
+
const skipSeparateReview = !config.preflightSeparateReview;
|
|
606
|
+
if (config.preflightRequirementsReview && !skipSeparateReview) {
|
|
607
|
+
const reviewPlanFile = taskPlanByTaskKey.get(currentTask.key) ?? plannedFilePath;
|
|
608
|
+
let reviewOk = false;
|
|
609
|
+
// Default to 30 iterations for review (simpler than discovery, just refining existing plan)
|
|
610
|
+
let reviewIterationCap = Math.max(1, Math.floor(config.preflightSessionMaxIterations ?? 30));
|
|
611
|
+
for (let reviewTry = 0; reviewTry <= preflightMaxRetries; reviewTry++) {
|
|
612
|
+
const stageStart = Date.now();
|
|
613
|
+
const reviewTimeoutSec = config.preflightReviewTimeoutSec ?? config.taskTimeoutSec;
|
|
614
|
+
const reviewTimeoutMs = reviewTimeoutSec * 1000;
|
|
615
|
+
try {
|
|
616
|
+
progress.onStage?.('🧪 Requirements review: refining plan...');
|
|
617
|
+
// Reuse preflight session from discovery, or create new one if needed (e.g., after error)
|
|
618
|
+
if (!preflightSession) {
|
|
619
|
+
preflightSession = await createSessionFn(buildPreflightConfig(idlehandsConfig, config, reviewTimeoutSec, reviewIterationCap), apiKey);
|
|
620
|
+
}
|
|
621
|
+
const reviewPrompt = buildRequirementsReviewPrompt(reviewPlanFile);
|
|
622
|
+
let reviewTimeoutHandle;
|
|
623
|
+
const reviewRes = await Promise.race([
|
|
624
|
+
preflightSession.ask(reviewPrompt).finally(() => clearTimeout(reviewTimeoutHandle)),
|
|
625
|
+
new Promise((_, reject) => {
|
|
626
|
+
reviewTimeoutHandle = setTimeout(() => {
|
|
627
|
+
try {
|
|
628
|
+
preflightSession?.cancel();
|
|
629
|
+
}
|
|
630
|
+
catch {
|
|
631
|
+
// best effort
|
|
632
|
+
}
|
|
633
|
+
reject(new Error('preflight-review-timeout'));
|
|
634
|
+
}, reviewTimeoutMs);
|
|
635
|
+
}),
|
|
636
|
+
]);
|
|
637
|
+
let reviewTokens = preflightSession.usage.prompt + preflightSession.usage.completion;
|
|
638
|
+
totalTokens += reviewTokens;
|
|
639
|
+
// Try to parse review result; if invalid JSON, attempt force-decision prompt
|
|
640
|
+
let review;
|
|
641
|
+
try {
|
|
642
|
+
review = parseRequirementsReviewResult(reviewRes.text, config.projectDir);
|
|
643
|
+
}
|
|
644
|
+
catch (parseError) {
|
|
645
|
+
const parseErrMsg = parseError instanceof Error ? parseError.message : String(parseError);
|
|
646
|
+
// Only try force-decision for JSON/format errors
|
|
647
|
+
if (/preflight-json-missing-object|preflight-review-invalid/i.test(parseErrMsg)) {
|
|
648
|
+
progress.onStage?.('⚠️ Review output invalid, requesting forced decision...');
|
|
649
|
+
try {
|
|
650
|
+
const forceRes = await preflightSession.ask(FORCE_REVIEW_DECISION_PROMPT);
|
|
651
|
+
const forceTokens = preflightSession.usage.prompt + preflightSession.usage.completion - reviewTokens;
|
|
652
|
+
reviewTokens += forceTokens;
|
|
653
|
+
totalTokens += forceTokens;
|
|
654
|
+
review = parseRequirementsReviewResult(forceRes.text, config.projectDir);
|
|
655
|
+
progress.onStage?.('✅ Forced decision succeeded');
|
|
656
|
+
}
|
|
657
|
+
catch (forceError) {
|
|
658
|
+
// Force-decision also failed, throw original error
|
|
659
|
+
throw parseError;
|
|
660
|
+
}
|
|
661
|
+
}
|
|
662
|
+
else {
|
|
663
|
+
throw parseError;
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
const reviewPlanState = await ensurePlanFileExistsOrBootstrap({
|
|
667
|
+
absPath: review.filename,
|
|
668
|
+
task: currentTask,
|
|
669
|
+
source: 'requirements-review',
|
|
670
|
+
});
|
|
671
|
+
if (reviewPlanState === 'bootstrapped') {
|
|
672
|
+
progress.onStage?.(`⚠️ Requirements review returned a filename but did not write it. Created fallback plan file: ${review.filename}`);
|
|
673
|
+
}
|
|
674
|
+
preflightRecords.push({
|
|
675
|
+
taskKey: currentTask.key,
|
|
676
|
+
stage: 'requirements-review',
|
|
677
|
+
durationMs: Date.now() - stageStart,
|
|
678
|
+
tokensUsed: reviewTokens,
|
|
679
|
+
status: 'ready',
|
|
680
|
+
filename: review.filename,
|
|
681
|
+
});
|
|
682
|
+
taskPlanByTaskKey.set(currentTask.key, review.filename);
|
|
683
|
+
progress.onStage?.(`✅ Requirements review ready: ${review.filename}`);
|
|
684
|
+
reviewOk = true;
|
|
685
|
+
break;
|
|
686
|
+
}
|
|
687
|
+
catch (error) {
|
|
688
|
+
const errMsg = error instanceof Error ? error.message : String(error);
|
|
689
|
+
const timeout = /timeout/i.test(errMsg);
|
|
690
|
+
preflightRecords.push({
|
|
691
|
+
taskKey: currentTask.key,
|
|
692
|
+
stage: 'requirements-review',
|
|
693
|
+
durationMs: Date.now() - stageStart,
|
|
694
|
+
tokensUsed: 0,
|
|
695
|
+
status: timeout ? 'timeout' : 'error',
|
|
696
|
+
error: errMsg,
|
|
697
|
+
});
|
|
698
|
+
const short = errMsg.length > 180 ? `${errMsg.slice(0, 177)}...` : errMsg;
|
|
699
|
+
// If review returns malformed/non-JSON output, keep moving with existing plan.
|
|
700
|
+
if (isRecoverablePreflightReviewError(errMsg)) {
|
|
701
|
+
const fallbackState = await ensurePlanFileExistsOrBootstrap({
|
|
702
|
+
absPath: reviewPlanFile,
|
|
703
|
+
task: currentTask,
|
|
704
|
+
source: 'requirements-review',
|
|
705
|
+
});
|
|
706
|
+
if (fallbackState === 'bootstrapped') {
|
|
707
|
+
progress.onStage?.(`⚠️ Requirements review returned invalid output (${short}). Bootstrapped fallback plan and continuing: ${reviewPlanFile}`);
|
|
708
|
+
}
|
|
709
|
+
else {
|
|
710
|
+
progress.onStage?.(`⚠️ Requirements review returned invalid output (${short}). Reusing existing plan and continuing: ${reviewPlanFile}`);
|
|
711
|
+
}
|
|
712
|
+
taskPlanByTaskKey.set(currentTask.key, reviewPlanFile);
|
|
713
|
+
reviewOk = true;
|
|
714
|
+
break;
|
|
715
|
+
}
|
|
716
|
+
if (reviewTry < preflightMaxRetries) {
|
|
717
|
+
// Close session on error so retry gets fresh state
|
|
718
|
+
await closePreflightSession();
|
|
719
|
+
if (/max iterations exceeded/i.test(errMsg)) {
|
|
720
|
+
const nextCap = Math.min(Math.max(reviewIterationCap * 2, reviewIterationCap + 2), 1000);
|
|
721
|
+
if (nextCap > reviewIterationCap) {
|
|
722
|
+
progress.onStage?.(`⚠️ Requirements review hit max iterations (${reviewIterationCap}). Increasing preflight cap to ${nextCap} and retrying...`);
|
|
723
|
+
reviewIterationCap = nextCap;
|
|
724
|
+
continue;
|
|
725
|
+
}
|
|
726
|
+
}
|
|
727
|
+
progress.onStage?.(`⚠️ Requirements review failed (${reviewTry + 1}/${preflightTotalTries}): ${short}. Retrying review with existing plan file...`);
|
|
728
|
+
continue;
|
|
729
|
+
}
|
|
730
|
+
const preflightAttempt = {
|
|
731
|
+
taskKey: currentTask.key,
|
|
732
|
+
taskText: currentTask.text,
|
|
733
|
+
attempt: attemptNumber,
|
|
734
|
+
durationMs: Date.now() - stageStart,
|
|
735
|
+
tokensUsed: 0,
|
|
736
|
+
status: timeout ? 'timeout' : 'error',
|
|
737
|
+
verification: undefined,
|
|
738
|
+
error: `preflight-error(requirements-review): ${errMsg}`,
|
|
739
|
+
commitHash: undefined,
|
|
740
|
+
};
|
|
741
|
+
attempts.push(preflightAttempt);
|
|
742
|
+
taskRetryCount.set(currentTask.key, retries + 1);
|
|
743
|
+
if (!config.skipOnFail)
|
|
744
|
+
break mainLoop;
|
|
745
|
+
}
|
|
746
|
+
// Note: session stays open, will be closed at end of preflight block
|
|
747
|
+
}
|
|
748
|
+
if (!reviewOk) {
|
|
749
|
+
continue;
|
|
750
|
+
}
|
|
751
|
+
}
|
|
752
|
+
}
|
|
753
|
+
finally {
|
|
754
|
+
// Always close preflight session at end of preflight block
|
|
755
|
+
await closePreflightSession();
|
|
463
756
|
}
|
|
464
757
|
}
|
|
465
758
|
progress.onStage?.('🛠️ Implementation: executing vetted plan...');
|
|
@@ -476,6 +769,7 @@ export async function runAnton(opts) {
|
|
|
476
769
|
: buildSessionConfig(idlehandsConfig, config);
|
|
477
770
|
console.error(`[anton:debug] task="${currentTask.text}" depth=${currentTask.depth} complexity=${taskComplexity} isComplexDecompose=${isComplexDecompose} no_tools=${!!sessionConfig.no_tools} max_iterations=${sessionConfig.max_iterations}`);
|
|
478
771
|
session = await createSessionFn(sessionConfig, apiKey);
|
|
772
|
+
injectAntonResultContract(session);
|
|
479
773
|
// Set up timeout + stop propagation for the currently running attempt.
|
|
480
774
|
// /anton stop flips abortSignal.aborted; we poll that and cancel session.ask immediately
|
|
481
775
|
// instead of waiting for the task attempt to naturally finish.
|
|
@@ -650,18 +944,41 @@ export async function runAnton(opts) {
|
|
|
650
944
|
}
|
|
651
945
|
const taskEndMs = Date.now();
|
|
652
946
|
const durationMs = taskEndMs - taskStartMs;
|
|
653
|
-
|
|
947
|
+
let tokensUsed = session.usage.prompt + session.usage.completion;
|
|
948
|
+
// Parse structured result (with one-shot recovery for format-only failures).
|
|
949
|
+
let agentResult = parseAntonResult(result.text);
|
|
950
|
+
if (agentResult.status === 'blocked' &&
|
|
951
|
+
isStructuredResultParseFailure(agentResult.reason) &&
|
|
952
|
+
!abortSignal.aborted &&
|
|
953
|
+
!controller.signal.aborted) {
|
|
954
|
+
try {
|
|
955
|
+
progress.onStage?.('⚠️ Agent omitted structured result. Requesting format-only recovery...');
|
|
956
|
+
const repaired = await session.ask(STRUCTURED_RESULT_RECOVERY_PROMPT);
|
|
957
|
+
iterationsUsed += repaired.turns;
|
|
958
|
+
agentResult = parseAntonResult(repaired.text);
|
|
959
|
+
tokensUsed = session.usage.prompt + session.usage.completion;
|
|
960
|
+
}
|
|
961
|
+
catch (repairErr) {
|
|
962
|
+
console.error(`[anton:result-recovery] failed: ${repairErr}`);
|
|
963
|
+
}
|
|
964
|
+
}
|
|
965
|
+
// If result is still parse-broken, treat as failed (retriable) instead of blocked (terminal).
|
|
966
|
+
if (agentResult.status === 'blocked' && isStructuredResultParseFailure(agentResult.reason)) {
|
|
967
|
+
agentResult = {
|
|
968
|
+
status: 'failed',
|
|
969
|
+
reason: `structured-result-parse-failure: ${agentResult.reason}`,
|
|
970
|
+
subtasks: [],
|
|
971
|
+
};
|
|
972
|
+
}
|
|
654
973
|
// Per-attempt token cost guardrail (not just prompt size).
|
|
655
974
|
if (tokensUsed > config.maxPromptTokensPerAttempt) {
|
|
656
975
|
throw new Error(`attempt-token-budget-exceeded: used=${tokensUsed} max=${config.maxPromptTokensPerAttempt}`);
|
|
657
976
|
}
|
|
658
|
-
// Parse structured result
|
|
659
|
-
const agentResult = parseAntonResult(result.text);
|
|
660
977
|
console.error(`[anton:result] task="${currentTask.text.slice(0, 50)}" status=${agentResult.status} reason=${agentResult.reason ?? 'none'} subtasks=${agentResult.subtasks.length} tokens=${tokensUsed} duration=${Math.round(durationMs / 1000)}s`);
|
|
661
978
|
if (isComplexDecompose) {
|
|
662
979
|
console.error(`[anton:debug] decompose result: status=${agentResult.status} subtasks=${agentResult.subtasks.length} reason=${agentResult.reason ?? 'none'}`);
|
|
663
|
-
if (agentResult.status === '
|
|
664
|
-
agentResult.reason
|
|
980
|
+
if (agentResult.status === 'failed' &&
|
|
981
|
+
(agentResult.reason ?? '').startsWith('structured-result-parse-failure')) {
|
|
665
982
|
console.error(`[anton:debug] decompose raw output (first 500 chars): ${(result.text ?? '').slice(0, 500)}`);
|
|
666
983
|
}
|
|
667
984
|
}
|
|
@@ -874,6 +1191,15 @@ export async function runAnton(opts) {
|
|
|
874
1191
|
consecutiveIdenticalCount.set(currentTask.key, 1);
|
|
875
1192
|
}
|
|
876
1193
|
lastFailureReason.set(currentTask.key, currentReason);
|
|
1194
|
+
// Track L2-specific failures for enhanced retry context
|
|
1195
|
+
if (attempt.verification?.l2_ai === false) {
|
|
1196
|
+
l2FailCount.set(currentTask.key, (l2FailCount.get(currentTask.key) || 0) + 1);
|
|
1197
|
+
console.error(`[anton:l2-fail] task="${currentTask.text.slice(0, 40)}" l2_fail_count=${l2FailCount.get(currentTask.key)}`);
|
|
1198
|
+
}
|
|
1199
|
+
}
|
|
1200
|
+
else {
|
|
1201
|
+
// Task passed — reset L2 fail count
|
|
1202
|
+
l2FailCount.delete(currentTask.key);
|
|
877
1203
|
}
|
|
878
1204
|
// Report task end
|
|
879
1205
|
progress.onTaskEnd(currentTask, attempt, currentProgress);
|