@visorcraft/idlehands 1.4.6 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. package/dist/agent/constants.js +12 -0
  2. package/dist/agent/constants.js.map +1 -0
  3. package/dist/agent/context-budget.js +103 -0
  4. package/dist/agent/context-budget.js.map +1 -0
  5. package/dist/agent/errors.js +8 -0
  6. package/dist/agent/errors.js.map +1 -0
  7. package/dist/agent/exec-helpers.js +105 -0
  8. package/dist/agent/exec-helpers.js.map +1 -0
  9. package/dist/agent/model-pick.js +21 -0
  10. package/dist/agent/model-pick.js.map +1 -0
  11. package/dist/agent/session-utils.js +63 -0
  12. package/dist/agent/session-utils.js.map +1 -0
  13. package/dist/agent/subagent-context.js +78 -0
  14. package/dist/agent/subagent-context.js.map +1 -0
  15. package/dist/agent/tool-loop-detection.js +91 -20
  16. package/dist/agent/tool-loop-detection.js.map +1 -1
  17. package/dist/agent/tool-loop-guard.js.map +1 -1
  18. package/dist/agent/tool-policy.js +54 -0
  19. package/dist/agent/tool-policy.js.map +1 -0
  20. package/dist/agent/tools-schema.js +281 -0
  21. package/dist/agent/tools-schema.js.map +1 -0
  22. package/dist/agent.js +191 -641
  23. package/dist/agent.js.map +1 -1
  24. package/dist/anton/controller.js +235 -163
  25. package/dist/anton/controller.js.map +1 -1
  26. package/dist/anton/lint-baseline.js +64 -0
  27. package/dist/anton/lint-baseline.js.map +1 -0
  28. package/dist/anton/preflight.js +7 -0
  29. package/dist/anton/preflight.js.map +1 -1
  30. package/dist/anton/prompt.js +71 -71
  31. package/dist/anton/reporter.js.map +1 -1
  32. package/dist/anton/runtime-ready.js +120 -0
  33. package/dist/anton/runtime-ready.js.map +1 -0
  34. package/dist/anton/session.js +7 -1
  35. package/dist/anton/session.js.map +1 -1
  36. package/dist/anton/verifier-utils.js +148 -0
  37. package/dist/anton/verifier-utils.js.map +1 -0
  38. package/dist/anton/verifier.js +26 -227
  39. package/dist/anton/verifier.js.map +1 -1
  40. package/dist/bot/anton-auto-pin.js +12 -0
  41. package/dist/bot/anton-auto-pin.js.map +1 -0
  42. package/dist/bot/anton-commands.js +137 -0
  43. package/dist/bot/anton-commands.js.map +1 -0
  44. package/dist/bot/anton-run.js +155 -0
  45. package/dist/bot/anton-run.js.map +1 -0
  46. package/dist/bot/anton-status-format.js +18 -0
  47. package/dist/bot/anton-status-format.js.map +1 -0
  48. package/dist/bot/basic-commands.js +114 -0
  49. package/dist/bot/basic-commands.js.map +1 -0
  50. package/dist/bot/command-format.js.map +1 -1
  51. package/dist/bot/command-logic.js +8 -728
  52. package/dist/bot/command-logic.js.map +1 -1
  53. package/dist/bot/commands.js +18 -1
  54. package/dist/bot/commands.js.map +1 -1
  55. package/dist/bot/discord-anton-autopin.js +29 -0
  56. package/dist/bot/discord-anton-autopin.js.map +1 -0
  57. package/dist/bot/discord-anton.js +45 -0
  58. package/dist/bot/discord-anton.js.map +1 -0
  59. package/dist/bot/discord-commands.js +20 -52
  60. package/dist/bot/discord-commands.js.map +1 -1
  61. package/dist/bot/discord-result.js +9 -0
  62. package/dist/bot/discord-result.js.map +1 -0
  63. package/dist/bot/discord-routing.js.map +1 -1
  64. package/dist/bot/discord.js +55 -12
  65. package/dist/bot/discord.js.map +1 -1
  66. package/dist/bot/escalation-commands.js +145 -0
  67. package/dist/bot/escalation-commands.js.map +1 -0
  68. package/dist/bot/escalation.js.map +1 -1
  69. package/dist/bot/format.js +0 -5
  70. package/dist/bot/format.js.map +1 -1
  71. package/dist/bot/git-status-command.js +28 -0
  72. package/dist/bot/git-status-command.js.map +1 -0
  73. package/dist/bot/model-endpoint.js +25 -0
  74. package/dist/bot/model-endpoint.js.map +1 -0
  75. package/dist/bot/session-history.js +61 -0
  76. package/dist/bot/session-history.js.map +1 -0
  77. package/dist/bot/session-settings.js +89 -0
  78. package/dist/bot/session-settings.js.map +1 -0
  79. package/dist/bot/telegram-commands.js +15 -7
  80. package/dist/bot/telegram-commands.js.map +1 -1
  81. package/dist/bot/telegram.js +15 -29
  82. package/dist/bot/telegram.js.map +1 -1
  83. package/dist/cli/agent-turn.js +8 -2
  84. package/dist/cli/agent-turn.js.map +1 -1
  85. package/dist/cli/commands/anton.js +6 -1
  86. package/dist/cli/commands/anton.js.map +1 -1
  87. package/dist/cli/commands/model.js +1 -3
  88. package/dist/cli/commands/model.js.map +1 -1
  89. package/dist/cli/commands/project.js +1 -1
  90. package/dist/cli/commands/project.js.map +1 -1
  91. package/dist/cli/commands/secrets.js +1 -1
  92. package/dist/cli/commands/secrets.js.map +1 -1
  93. package/dist/cli/commands/session.js +22 -12
  94. package/dist/cli/commands/session.js.map +1 -1
  95. package/dist/cli/guided-onboarding.js +20 -0
  96. package/dist/cli/guided-onboarding.js.map +1 -0
  97. package/dist/cli/runtime-cmds.js +8 -133
  98. package/dist/cli/runtime-cmds.js.map +1 -1
  99. package/dist/cli/runtime-common.js +35 -0
  100. package/dist/cli/runtime-common.js.map +1 -0
  101. package/dist/cli/runtime-detect.js +12 -0
  102. package/dist/cli/runtime-detect.js.map +1 -0
  103. package/dist/cli/runtime-host-command.js +7 -0
  104. package/dist/cli/runtime-host-command.js.map +1 -0
  105. package/dist/cli/runtime-probe-defaults.js +63 -0
  106. package/dist/cli/runtime-probe-defaults.js.map +1 -0
  107. package/dist/cli/runtime-scan-ports.js +30 -0
  108. package/dist/cli/runtime-scan-ports.js.map +1 -0
  109. package/dist/cli/setup-bot-step.js +51 -0
  110. package/dist/cli/setup-bot-step.js.map +1 -0
  111. package/dist/cli/setup-runtime-forms.js +214 -0
  112. package/dist/cli/setup-runtime-forms.js.map +1 -0
  113. package/dist/cli/setup-style.js +8 -0
  114. package/dist/cli/setup-style.js.map +1 -0
  115. package/dist/cli/setup-ui.js +146 -0
  116. package/dist/cli/setup-ui.js.map +1 -0
  117. package/dist/cli/setup.js +11 -449
  118. package/dist/cli/setup.js.map +1 -1
  119. package/dist/client/error-utils.js +37 -0
  120. package/dist/client/error-utils.js.map +1 -0
  121. package/dist/client/pressure.js +77 -0
  122. package/dist/client/pressure.js.map +1 -0
  123. package/dist/client.js +24 -122
  124. package/dist/client.js.map +1 -1
  125. package/dist/config.js +31 -14
  126. package/dist/config.js.map +1 -1
  127. package/dist/git.js +8 -2
  128. package/dist/git.js.map +1 -1
  129. package/dist/history.js +418 -0
  130. package/dist/history.js.map +1 -1
  131. package/dist/hooks/types.js.map +1 -1
  132. package/dist/index.js.map +1 -1
  133. package/dist/progress/message-edit-scheduler.js.map +1 -1
  134. package/dist/progress/turn-progress.js.map +1 -1
  135. package/dist/runtime/executor.js +4 -1
  136. package/dist/runtime/executor.js.map +1 -1
  137. package/dist/runtime/health.js.map +1 -1
  138. package/dist/runtime/host-runner.js.map +1 -1
  139. package/dist/safety.js +3 -2
  140. package/dist/safety.js.map +1 -1
  141. package/dist/shared/config-utils.js.map +1 -1
  142. package/dist/tools/exec-core.js +252 -0
  143. package/dist/tools/exec-core.js.map +1 -0
  144. package/dist/tools/exec-pty.js +89 -0
  145. package/dist/tools/exec-pty.js.map +1 -0
  146. package/dist/tools/exec-utils.js +94 -0
  147. package/dist/tools/exec-utils.js.map +1 -0
  148. package/dist/tools/file-discovery.js +144 -0
  149. package/dist/tools/file-discovery.js.map +1 -0
  150. package/dist/tools/file-mutations.js +326 -0
  151. package/dist/tools/file-mutations.js.map +1 -0
  152. package/dist/tools/file-read.js +133 -0
  153. package/dist/tools/file-read.js.map +1 -0
  154. package/dist/tools/patch-apply.js +168 -0
  155. package/dist/tools/patch-apply.js.map +1 -0
  156. package/dist/tools/path-safety.js.map +1 -1
  157. package/dist/tools/replay-utils.js +25 -0
  158. package/dist/tools/replay-utils.js.map +1 -0
  159. package/dist/tools/search-utils.js +55 -0
  160. package/dist/tools/search-utils.js.map +1 -0
  161. package/dist/tools/sys-notes.js +34 -0
  162. package/dist/tools/sys-notes.js.map +1 -0
  163. package/dist/tools/text-utils.js +164 -0
  164. package/dist/tools/text-utils.js.map +1 -0
  165. package/dist/tools/undo.js +1 -1
  166. package/dist/tools/undo.js.map +1 -1
  167. package/dist/tools/vault-tools.js +36 -0
  168. package/dist/tools/vault-tools.js.map +1 -0
  169. package/dist/tools.js +19 -1460
  170. package/dist/tools.js.map +1 -1
  171. package/dist/tui/controller.js +5 -2
  172. package/dist/tui/controller.js.map +1 -1
  173. package/dist/tui/render.js.map +1 -1
  174. package/dist/utils.js +2 -2
  175. package/dist/utils.js.map +1 -1
  176. package/dist/vault.js +134 -1
  177. package/dist/vault.js.map +1 -1
  178. package/dist/watchdog.js +1 -3
  179. package/dist/watchdog.js.map +1 -1
  180. package/package.json +2 -1
@@ -4,139 +4,154 @@
4
4
  * Coordinates all components: parser, prompt, verifier, lock, git, session.
5
5
  * Structured as a deterministic orchestration flow for autonomous task execution.
6
6
  */
7
+ import * as fs from 'fs';
8
+ import * as path from 'path';
9
+ import { isToolLoopBreak, AUTO_CONTINUE_PROMPT } from '../bot/auto-continue.js';
7
10
  import { ensureCleanWorkingTree, getWorkingDiff, commitAll, restoreTrackedChanges, cleanUntracked, createBranch, getUntrackedFiles, removeUntrackedFiles, } from '../git.js';
8
- import { execute, loadActiveRuntime, runOnHost } from '../runtime/executor.js';
9
- import { waitForModelsReady } from '../runtime/health.js';
10
- import { plan } from '../runtime/planner.js';
11
- import { loadRuntimes } from '../runtime/store.js';
12
11
  import { estimateTokens } from '../utils.js';
13
12
  import { acquireAntonLock, releaseAntonLock, touchAntonLock } from './lock.js';
14
13
  import { parseTaskFile, findRunnablePendingTasks, markTaskChecked, insertSubTasks, autoCompleteAncestors, } from './parser.js';
15
- import { buildAntonPrompt, parseAntonResult, classifyTaskComplexity } from './prompt.js';
16
14
  import { ensureAgentsTasksDir, makeUniqueTaskPlanFilename, buildDiscoveryPrompt, parseDiscoveryResult, buildRequirementsReviewPrompt, parseRequirementsReviewResult, ensurePlanFileExistsOrBootstrap, } from './preflight.js';
15
+ import { buildAntonPrompt, parseAntonResult, classifyTaskComplexity } from './prompt.js';
17
16
  import { formatDryRunPlan } from './reporter.js';
17
+ import { classifyInfraError, ensureAntonRuntimeReady } from './runtime-ready.js';
18
18
  import { buildSessionConfig, buildPreflightConfig, buildDecomposeConfig, buildVerifyConfig, defaultCreateSession, } from './session.js';
19
19
  import { captureLintBaseline, detectVerificationCommands, runVerification } from './verifier.js';
20
- import { isToolLoopBreak, AUTO_CONTINUE_PROMPT } from '../bot/auto-continue.js';
21
- function endpointBase(endpoint) {
22
- if (!endpoint)
23
- return null;
24
- const e = endpoint.trim().replace(/\/+$/, '');
25
- if (!e)
26
- return null;
27
- return e.endsWith('/v1') ? e : `${e}/v1`;
28
- }
29
- async function probeEndpointReady(endpoint) {
30
- const base = endpointBase(endpoint);
31
- if (!base)
32
- return { ok: false, reason: 'endpoint-not-configured' };
33
- const ctrl = new AbortController();
34
- const t = setTimeout(() => ctrl.abort(), 7000);
35
- try {
36
- const res = await fetch(`${base}/models`, { signal: ctrl.signal });
37
- if (res.status === 503)
38
- return { ok: false, reason: 'loading-http-503' };
39
- if (!res.ok)
40
- return { ok: false, reason: `http-${res.status}` };
41
- return { ok: true, reason: 'ok' };
42
- }
43
- catch (e) {
44
- const msg = String(e?.message ?? e).toLowerCase();
45
- if (msg.includes('aborted'))
46
- return { ok: false, reason: 'timeout' };
47
- return { ok: false, reason: msg.slice(0, 120) };
48
- }
49
- finally {
50
- clearTimeout(t);
20
+ // ─────────────────────────────────────────────────────────────────────────────
21
+ // L2 Retry Enhancement Helpers
22
+ // ─────────────────────────────────────────────────────────────────────────────
23
+ /**
24
+ * Extract file paths mentioned in an L2 failure reason.
25
+ * Looks for patterns like: app/Models/Channel.php, src/foo/bar.ts, etc.
26
+ */
27
+ function extractFilePathsFromL2Reason(reason) {
28
+ const patterns = [
29
+ // PHP/Laravel style: app/Models/Channel.php, app/Http/Controllers/Foo.php
30
+ /\b(app\/[\w\/]+\.php)\b/gi,
31
+ // General file paths with extensions
32
+ /\b((?:src|lib|tests?)\/[\w\/.-]+\.\w+)\b/gi,
33
+ // Model names that can be mapped to files: "Channel model" -> app/Models/Channel.php
34
+ /\b(\w+)\s+model\b/gi,
35
+ ];
36
+ const found = new Set();
37
+ for (const pattern of patterns) {
38
+ const matches = reason.matchAll(pattern);
39
+ for (const match of matches) {
40
+ const p = match[1];
41
+ // If it's a model name reference like "Channel model", convert to path
42
+ if (/model$/i.test(match[0]) && !/\.php$/i.test(p)) {
43
+ found.add(`app/Models/${p}.php`);
44
+ }
45
+ else {
46
+ found.add(p);
47
+ }
48
+ }
51
49
  }
50
+ return [...found];
52
51
  }
53
- function classifyInfraError(err) {
54
- const msg = String(err?.message ?? err ?? '').toLowerCase();
55
- if (!msg)
56
- return 'other';
57
- if (msg.includes('aborted') || msg.includes('cancel'))
58
- return 'other';
59
- if (msg.includes('503') || msg.includes('model is loading') || msg.includes('loading')) {
60
- return 'loading';
61
- }
62
- const infraPatterns = [
63
- 'econnrefused',
64
- 'could not connect',
65
- 'connection refused',
66
- 'enotfound',
67
- 'fetch failed',
68
- 'connect timeout',
69
- 'socket hang up',
70
- 'no models found',
71
- 'endpoint',
52
+ /**
53
+ * Detect if L2 reason indicates a "missing implementation" pattern.
54
+ * Returns true if the model wrote tests but forgot the actual implementation.
55
+ */
56
+ function isL2MissingImplementation(reason) {
57
+ const missingPatterns = [
58
+ /missing\s+(?:from|in)\s+/i,
59
+ /no\s+(?:corresponding|evidence|actual)/i,
60
+ /relationship\s+(?:method\s+)?is\s+missing/i,
61
+ /but\s+(?:the|there['']?s?\s+no)/i,
62
+ /tests?\s+(?:expect|added|written).*but/i,
63
+ /should\s+be\s+(?:hasMany|hasOne|belongsTo|morphMany)/i,
72
64
  ];
73
- if (infraPatterns.some((p) => msg.includes(p))) {
74
- return 'infra_down';
75
- }
76
- return 'other';
65
+ return missingPatterns.some((p) => p.test(reason));
77
66
  }
78
- async function ensureAntonRuntimeReady(idlehandsConfig, opts) {
79
- const endpointProbe = await probeEndpointReady(idlehandsConfig.endpoint);
80
- if (endpointProbe.ok)
81
- return { ok: true, detail: 'endpoint-ready' };
82
- // Try runtime orchestration recovery when endpoint probe fails.
83
- let rtConfig;
67
+ /**
68
+ * Try to read a file's contents for injection into retry context.
69
+ * Returns null if file doesn't exist or is too large.
70
+ */
71
+ function readFileForL2Injection(projectDir, filePath) {
72
+ const MAX_FILE_SIZE = 15000; // ~15KB, reasonable for injection
84
73
  try {
85
- rtConfig = await loadRuntimes();
74
+ const fullPath = path.resolve(projectDir, filePath);
75
+ if (!fs.existsSync(fullPath))
76
+ return null;
77
+ const stat = fs.statSync(fullPath);
78
+ if (stat.size > MAX_FILE_SIZE)
79
+ return null;
80
+ return fs.readFileSync(fullPath, 'utf8');
86
81
  }
87
82
  catch {
88
- return {
89
- ok: false,
90
- detail: `endpoint-not-ready (${endpointProbe.reason}); runtimes-unavailable`,
91
- };
92
- }
93
- const active = await loadActiveRuntime();
94
- let targetModelId;
95
- if (active?.modelId && rtConfig.models.some((m) => m.id === active.modelId && m.enabled)) {
96
- targetModelId = active.modelId;
97
- }
98
- else if (typeof idlehandsConfig.model === 'string' &&
99
- rtConfig.models.some((m) => m.id === idlehandsConfig.model && m.enabled)) {
100
- targetModelId = idlehandsConfig.model;
101
- }
102
- if (!targetModelId) {
103
- return {
104
- ok: false,
105
- detail: `endpoint-not-ready (${endpointProbe.reason}); no-runtime-model-mapping`,
106
- };
83
+ return null;
107
84
  }
108
- const planOut = plan({ modelId: targetModelId, mode: 'live', forceRestart: opts.forceRestart }, rtConfig, active);
109
- if (!planOut.ok) {
110
- return { ok: false, detail: `runtime-plan-failed ${planOut.code}: ${planOut.reason}` };
85
+ }
86
+ /**
87
+ * Build enhanced retry context when L2 fails due to missing implementation.
88
+ * - On first L2 failure: Add strong guidance about which files to modify
89
+ * - On 2+ L2 failures: Inject the actual file contents so model can see what's missing
90
+ */
91
+ function buildL2EnhancedRetryContext(l2Reason, l2FailCount, projectDir, taskText) {
92
+ const parts = [];
93
+ const filePaths = extractFilePathsFromL2Reason(l2Reason);
94
+ const isMissingImpl = isL2MissingImplementation(l2Reason);
95
+ if (!isMissingImpl || filePaths.length === 0) {
96
+ // Not a "missing implementation" pattern, no enhancement needed
97
+ return '';
111
98
  }
112
- const execRes = await execute(planOut, {
113
- force: true,
114
- confirm: async () => true,
115
- });
116
- if (!execRes.ok) {
117
- return { ok: false, detail: `runtime-exec-failed: ${execRes.error ?? 'unknown'}` };
99
+ parts.push('');
100
+ parts.push('═══════════════════════════════════════════════════════════════════════');
101
+ parts.push('⚠️ CRITICAL: AI REVIEW FAILED — MISSING IMPLEMENTATION DETECTED');
102
+ parts.push('═══════════════════════════════════════════════════════════════════════');
103
+ parts.push('');
104
+ parts.push(`The AI review found that you wrote tests but FORGOT THE ACTUAL IMPLEMENTATION.`);
105
+ parts.push(`Task: "${taskText}"`);
106
+ parts.push('');
107
+ parts.push('YOU MUST MODIFY THESE FILES:');
108
+ for (const fp of filePaths) {
109
+ parts.push(` → ${fp}`);
118
110
  }
119
- const timeoutMs = Math.max(10_000, opts.timeoutMs ?? (planOut.model.launch.probe_timeout_sec ?? 600) * 1000);
120
- for (const resolvedHost of planOut.hosts) {
121
- const hostCfg = rtConfig.hosts.find((h) => h.id === resolvedHost.id);
122
- if (!hostCfg)
123
- continue;
124
- const ready = await waitForModelsReady(runOnHost, hostCfg, planOut.model.runtime_defaults?.port ?? 8080, {
125
- timeoutMs,
126
- intervalMs: planOut.model.launch.probe_interval_ms ?? 2000,
127
- });
128
- if (!ready.ok) {
129
- return {
130
- ok: false,
131
- detail: `wait-ready failed on ${resolvedHost.id}: ${ready.reason ?? 'timeout'}`,
132
- };
111
+ parts.push('');
112
+ // After 2+ identical L2 failures, inject file contents
113
+ if (l2FailCount >= 2) {
114
+ parts.push('Since you have failed this verification multiple times, here are the current');
115
+ parts.push('contents of the files you need to modify:');
116
+ parts.push('');
117
+ for (const fp of filePaths) {
118
+ const contents = readFileForL2Injection(projectDir, fp);
119
+ if (contents !== null) {
120
+ parts.push(`┌─── ${fp} ───`);
121
+ parts.push(contents);
122
+ parts.push(`└─── end of ${fp} ───`);
123
+ parts.push('');
124
+ }
125
+ else {
126
+ parts.push(`[Could not read ${fp} — file may not exist or is too large]`);
127
+ parts.push('');
128
+ }
133
129
  }
134
130
  }
135
- return { ok: true, detail: 'runtime-ready' };
131
+ parts.push('INSTRUCTIONS:');
132
+ parts.push('1. READ the files listed above (they are your existing code)');
133
+ parts.push('2. ADD the missing method/relationship to the model file');
134
+ parts.push('3. Do NOT just modify tests — the MODEL/SOURCE file must change');
135
+ parts.push('4. The L2 review expects to see your implementation in the diff');
136
+ parts.push('');
137
+ return parts.join('\n');
138
+ }
139
+ const STRUCTURED_RESULT_RECOVERY_PROMPT = `Your previous reply did not include a valid <anton-result> block.
140
+ Do NOT call tools.
141
+ Return ONLY this block shape and nothing else:
142
+ <anton-result>
143
+ status: done|failed|blocked|decompose
144
+ reason: <optional>
145
+ subtasks:
146
+ - <only when status=decompose>
147
+ </anton-result>`;
148
+ function isStructuredResultParseFailure(reason) {
149
+ if (!reason)
150
+ return false;
151
+ return (reason === 'Agent did not emit structured result' ||
152
+ reason === 'No status line found in result block' ||
153
+ reason.startsWith('Unknown status:'));
136
154
  }
137
- /**
138
- * Main Anton orchestrator.
139
- */
140
155
  export async function runAnton(opts) {
141
156
  const { config, idlehandsConfig, progress, abortSignal, apiKey, vault, lens } = opts;
142
157
  const createSessionFn = opts.createSession || defaultCreateSession;
@@ -153,6 +168,7 @@ export async function runAnton(opts) {
153
168
  const taskRetryCount = new Map();
154
169
  const lastFailureReason = new Map();
155
170
  const consecutiveIdenticalCount = new Map();
171
+ const l2FailCount = new Map(); // Track consecutive L2 failures per task
156
172
  let lockHeartbeatTimer = null;
157
173
  // SIGINT handler
158
174
  const handleAbort = () => {
@@ -253,8 +269,15 @@ export async function runAnton(opts) {
253
269
  parts.push('- Test command failed');
254
270
  if (v.l1_lint === false)
255
271
  parts.push('- Lint command failed');
256
- if (v.l2_ai === false && v.l2_reason)
272
+ if (v.l2_ai === false && v.l2_reason) {
257
273
  parts.push(`- AI review: ${v.l2_reason}`);
274
+ // Enhanced L2 retry context: stronger guidance + file injection on repeated failures
275
+ const currentL2Count = l2FailCount.get(currentTask.key) || 0;
276
+ const l2Enhancement = buildL2EnhancedRetryContext(v.l2_reason, currentL2Count, config.projectDir, currentTask.text);
277
+ if (l2Enhancement) {
278
+ parts.push(l2Enhancement);
279
+ }
280
+ }
258
281
  // Include error output (filtered to errors only, no warnings) so the
259
282
  // agent can see and fix the exact issues.
260
283
  if (v.commandOutput) {
@@ -351,6 +374,9 @@ export async function runAnton(opts) {
351
374
  continue;
352
375
  }
353
376
  const attemptNumber = retries + 1;
377
+ // Publish active task context early so /anton status + heartbeat keep working
378
+ // during preflight stages (discovery/review), not only implementation.
379
+ progress.onTaskStart(currentTask, attemptNumber, currentProgress);
354
380
  // Optional preflight pipeline: discovery -> requirements review.
355
381
  // Runs on first attempt for each task. Retries are stage-local to avoid churn.
356
382
  if (config.preflightEnabled && retries === 0) {
@@ -361,6 +387,7 @@ export async function runAnton(opts) {
361
387
  await ensureAgentsTasksDir(config.projectDir);
362
388
  const plannedFilePath = taskPlanByTaskKey.get(currentTask.key) ?? makeUniqueTaskPlanFilename(config.projectDir);
363
389
  let discoveryIterationCap = Math.max(1, Math.floor(config.preflightSessionMaxIterations ?? 500));
390
+ let discoveryRetryHint;
364
391
  // Stage 1: discovery (retry discovery only).
365
392
  for (let discoveryTry = 0; discoveryTry <= preflightMaxRetries; discoveryTry++) {
366
393
  const stageStart = Date.now();
@@ -375,18 +402,22 @@ export async function runAnton(opts) {
375
402
  taskFilePath: config.taskFile,
376
403
  projectDir: config.projectDir,
377
404
  planFilePath: plannedFilePath,
405
+ retryHint: discoveryRetryHint,
378
406
  });
407
+ let discoveryTimeoutHandle;
379
408
  const discoveryRes = await Promise.race([
380
- discoverySession.ask(discoveryPrompt),
381
- new Promise((_, reject) => setTimeout(() => {
382
- try {
383
- discoverySession?.cancel();
384
- }
385
- catch {
386
- // best effort
387
- }
388
- reject(new Error('preflight-discovery-timeout'));
389
- }, discoveryTimeoutMs)),
409
+ discoverySession.ask(discoveryPrompt).finally(() => clearTimeout(discoveryTimeoutHandle)),
410
+ new Promise((_, reject) => {
411
+ discoveryTimeoutHandle = setTimeout(() => {
412
+ try {
413
+ discoverySession?.cancel();
414
+ }
415
+ catch {
416
+ // best effort
417
+ }
418
+ reject(new Error('preflight-discovery-timeout'));
419
+ }, discoveryTimeoutMs);
420
+ }),
390
421
  ]);
391
422
  const discoveryTokens = discoverySession.usage.prompt + discoverySession.usage.completion;
392
423
  totalTokens += discoveryTokens;
@@ -432,8 +463,9 @@ export async function runAnton(opts) {
432
463
  status: timeout ? 'timeout' : 'error',
433
464
  error: errMsg,
434
465
  });
466
+ const short = errMsg.length > 180 ? `${errMsg.slice(0, 177)}...` : errMsg;
467
+ discoveryRetryHint = `Previous discovery attempt failed: ${short}. Do not edit source files. Only update ${plannedFilePath} and return strict JSON.`;
435
468
  if (discoveryTry < preflightMaxRetries) {
436
- const short = errMsg.length > 180 ? `${errMsg.slice(0, 177)}...` : errMsg;
437
469
  if (/max iterations exceeded/i.test(errMsg)) {
438
470
  const nextCap = Math.min(Math.max(discoveryIterationCap * 2, discoveryIterationCap + 2), 1000);
439
471
  if (nextCap > discoveryIterationCap) {
@@ -445,21 +477,22 @@ export async function runAnton(opts) {
445
477
  progress.onStage?.(`⚠️ Discovery failed (${discoveryTry + 1}/${preflightTotalTries}): ${short}. Retrying discovery...`);
446
478
  continue;
447
479
  }
448
- const preflightAttempt = {
449
- taskKey: currentTask.key,
450
- taskText: currentTask.text,
451
- attempt: attemptNumber,
452
- durationMs: Date.now() - stageStart,
453
- tokensUsed: 0,
454
- status: timeout ? 'timeout' : 'error',
455
- verification: undefined,
456
- error: `preflight-error(discovery): ${errMsg}`,
457
- commitHash: undefined,
458
- };
459
- attempts.push(preflightAttempt);
460
- taskRetryCount.set(currentTask.key, retries + 1);
461
- if (!config.skipOnFail)
462
- break mainLoop;
480
+ // Final discovery failure: degrade gracefully by bootstrapping a fallback plan file
481
+ // so Anton can still proceed to implementation/review instead of hard-failing task 1.
482
+ const fallbackState = await ensurePlanFileExistsOrBootstrap({
483
+ absPath: plannedFilePath,
484
+ task: currentTask,
485
+ source: 'discovery',
486
+ });
487
+ if (fallbackState === 'bootstrapped') {
488
+ progress.onStage?.(`⚠️ Discovery failed after ${preflightTotalTries} tries (${short}). Bootstrapped fallback plan and continuing: ${plannedFilePath}`);
489
+ }
490
+ else {
491
+ progress.onStage?.(`⚠️ Discovery failed after ${preflightTotalTries} tries (${short}). Reusing existing plan and continuing: ${plannedFilePath}`);
492
+ }
493
+ taskPlanByTaskKey.set(currentTask.key, plannedFilePath);
494
+ discoveryOk = true;
495
+ break;
463
496
  }
464
497
  finally {
465
498
  try {
@@ -491,17 +524,20 @@ export async function runAnton(opts) {
491
524
  progress.onStage?.('🧪 Requirements review: refining plan...');
492
525
  reviewSession = await createSessionFn(buildPreflightConfig(idlehandsConfig, config, reviewTimeoutSec, reviewIterationCap), apiKey);
493
526
  const reviewPrompt = buildRequirementsReviewPrompt(reviewPlanFile);
527
+ let reviewTimeoutHandle;
494
528
  const reviewRes = await Promise.race([
495
- reviewSession.ask(reviewPrompt),
496
- new Promise((_, reject) => setTimeout(() => {
497
- try {
498
- reviewSession?.cancel();
499
- }
500
- catch {
501
- // best effort
502
- }
503
- reject(new Error('preflight-review-timeout'));
504
- }, reviewTimeoutMs)),
529
+ reviewSession.ask(reviewPrompt).finally(() => clearTimeout(reviewTimeoutHandle)),
530
+ new Promise((_, reject) => {
531
+ reviewTimeoutHandle = setTimeout(() => {
532
+ try {
533
+ reviewSession?.cancel();
534
+ }
535
+ catch {
536
+ // best effort
537
+ }
538
+ reject(new Error('preflight-review-timeout'));
539
+ }, reviewTimeoutMs);
540
+ }),
505
541
  ]);
506
542
  const reviewTokens = reviewSession.usage.prompt + reviewSession.usage.completion;
507
543
  totalTokens += reviewTokens;
@@ -582,7 +618,6 @@ export async function runAnton(opts) {
582
618
  }
583
619
  }
584
620
  progress.onStage?.('🛠️ Implementation: executing vetted plan...');
585
- progress.onTaskStart(currentTask, attemptNumber, currentProgress);
586
621
  let session;
587
622
  let attempt;
588
623
  const taskComplexity = classifyTaskComplexity(currentTask.text);
@@ -650,8 +685,7 @@ export async function runAnton(opts) {
650
685
  if (effectiveRetryContext) {
651
686
  if (trimPass === 0) {
652
687
  // First trim: cut command output to 1000 chars
653
- effectiveRetryContext = effectiveRetryContext
654
- .replace(/=== Full error output from failed commands ===[\s\S]*?=== End of error output ===/, (m) => {
688
+ effectiveRetryContext = effectiveRetryContext.replace(/=== Full error output from failed commands ===[\s\S]*?=== End of error output ===/, (m) => {
655
689
  const inner = m.slice(m.indexOf('===\n') + 4, m.lastIndexOf('\n==='));
656
690
  return `=== Error output (trimmed) ===\n${inner.slice(0, 1000)}\n...(truncated)\n=== End of error output ===`;
657
691
  });
@@ -659,8 +693,7 @@ export async function runAnton(opts) {
659
693
  }
660
694
  else if (trimPass === 1) {
661
695
  // Second trim: drop command output entirely, keep just summary
662
- effectiveRetryContext = effectiveRetryContext
663
- .replace(/\n*=== (Full e|E)rror output[\s\S]*?=== End of error output ===\n*/, '\n(Full error output omitted due to prompt budget — run the lint/test command to see errors)\n');
696
+ effectiveRetryContext = effectiveRetryContext.replace(/\n*=== (Full e|E)rror output[\s\S]*?=== End of error output ===\n*/, '\n(Full error output omitted due to prompt budget — run the lint/test command to see errors)\n');
664
697
  console.error(`[anton:budget] trimPass=2: dropped retry command output entirely`);
665
698
  }
666
699
  else {
@@ -685,16 +718,20 @@ export async function runAnton(opts) {
685
718
  try {
686
719
  progress.onToolLoop?.(currentTask.text, event);
687
720
  }
688
- catch { /* best effort */ }
721
+ catch {
722
+ /* best effort */
723
+ }
689
724
  },
690
725
  onCompaction: (event) => {
691
726
  try {
692
727
  progress.onCompaction?.(currentTask.text, event);
693
728
  }
694
- catch { /* best effort */ }
729
+ catch {
730
+ /* best effort */
731
+ }
695
732
  },
696
733
  onTurnEnd: (stats) => {
697
- const tokens = session ? (session.usage.prompt + session.usage.completion) : 0;
734
+ const tokens = session ? session.usage.prompt + session.usage.completion : 0;
698
735
  console.error(`[anton:turn] task="${currentTask.text.slice(0, 40)}" turn=${stats.turn} toolCalls=${stats.toolCalls} tokens=${tokens}`);
699
736
  },
700
737
  };
@@ -768,17 +805,41 @@ export async function runAnton(opts) {
768
805
  }
769
806
  const taskEndMs = Date.now();
770
807
  const durationMs = taskEndMs - taskStartMs;
771
- const tokensUsed = session.usage.prompt + session.usage.completion;
808
+ let tokensUsed = session.usage.prompt + session.usage.completion;
809
+ // Parse structured result (with one-shot recovery for format-only failures).
810
+ let agentResult = parseAntonResult(result.text);
811
+ if (agentResult.status === 'blocked' &&
812
+ isStructuredResultParseFailure(agentResult.reason) &&
813
+ !abortSignal.aborted &&
814
+ !controller.signal.aborted) {
815
+ try {
816
+ progress.onStage?.('⚠️ Agent omitted structured result. Requesting format-only recovery...');
817
+ const repaired = await session.ask(STRUCTURED_RESULT_RECOVERY_PROMPT);
818
+ iterationsUsed += repaired.turns;
819
+ agentResult = parseAntonResult(repaired.text);
820
+ tokensUsed = session.usage.prompt + session.usage.completion;
821
+ }
822
+ catch (repairErr) {
823
+ console.error(`[anton:result-recovery] failed: ${repairErr}`);
824
+ }
825
+ }
826
+ // If result is still parse-broken, treat as failed (retriable) instead of blocked (terminal).
827
+ if (agentResult.status === 'blocked' && isStructuredResultParseFailure(agentResult.reason)) {
828
+ agentResult = {
829
+ status: 'failed',
830
+ reason: `structured-result-parse-failure: ${agentResult.reason}`,
831
+ subtasks: [],
832
+ };
833
+ }
772
834
  // Per-attempt token cost guardrail (not just prompt size).
773
835
  if (tokensUsed > config.maxPromptTokensPerAttempt) {
774
836
  throw new Error(`attempt-token-budget-exceeded: used=${tokensUsed} max=${config.maxPromptTokensPerAttempt}`);
775
837
  }
776
- // Parse structured result
777
- const agentResult = parseAntonResult(result.text);
778
838
  console.error(`[anton:result] task="${currentTask.text.slice(0, 50)}" status=${agentResult.status} reason=${agentResult.reason ?? 'none'} subtasks=${agentResult.subtasks.length} tokens=${tokensUsed} duration=${Math.round(durationMs / 1000)}s`);
779
839
  if (isComplexDecompose) {
780
840
  console.error(`[anton:debug] decompose result: status=${agentResult.status} subtasks=${agentResult.subtasks.length} reason=${agentResult.reason ?? 'none'}`);
781
- if (agentResult.status === 'blocked' && agentResult.reason === 'Agent did not emit structured result') {
841
+ if (agentResult.status === 'failed' &&
842
+ (agentResult.reason ?? '').startsWith('structured-result-parse-failure')) {
782
843
  console.error(`[anton:debug] decompose raw output (first 500 chars): ${(result.text ?? '').slice(0, 500)}`);
783
844
  }
784
845
  }
@@ -858,7 +919,9 @@ export async function runAnton(opts) {
858
919
  try {
859
920
  progress.onVerification?.(currentTask.text, verification);
860
921
  }
861
- catch { /* best effort */ }
922
+ catch {
923
+ /* best effort */
924
+ }
862
925
  if (verification.passed) {
863
926
  status = 'passed';
864
927
  if (config.autoCommit) {
@@ -989,6 +1052,15 @@ export async function runAnton(opts) {
989
1052
  consecutiveIdenticalCount.set(currentTask.key, 1);
990
1053
  }
991
1054
  lastFailureReason.set(currentTask.key, currentReason);
1055
+ // Track L2-specific failures for enhanced retry context
1056
+ if (attempt.verification?.l2_ai === false) {
1057
+ l2FailCount.set(currentTask.key, (l2FailCount.get(currentTask.key) || 0) + 1);
1058
+ console.error(`[anton:l2-fail] task="${currentTask.text.slice(0, 40)}" l2_fail_count=${l2FailCount.get(currentTask.key)}`);
1059
+ }
1060
+ }
1061
+ else {
1062
+ // Task passed — reset L2 fail count
1063
+ l2FailCount.delete(currentTask.key);
992
1064
  }
993
1065
  // Report task end
994
1066
  progress.onTaskEnd(currentTask, attempt, currentProgress);
@@ -1002,7 +1074,7 @@ export async function runAnton(opts) {
1002
1074
  // break when maxRetriesPerTask is reached (if skipOnFail is false).
1003
1075
  // Previously this broke immediately on the first failure, preventing
1004
1076
  // the AI from fixing verification errors (e.g. lint) on retry.
1005
- const isFail = (attempt.status === 'failed' || attempt.status === 'error');
1077
+ const isFail = attempt.status === 'failed' || attempt.status === 'error';
1006
1078
  if (isFail && !config.skipOnFail) {
1007
1079
  const retries = taskRetryCount.get(currentTask.key) || 0;
1008
1080
  if (retries >= config.maxRetriesPerTask) {