@visorcraft/idlehands 2.0.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/dist/agent/prompt-builder.js +188 -0
  2. package/dist/agent/prompt-builder.js.map +1 -0
  3. package/dist/agent/query-classifier.js +72 -0
  4. package/dist/agent/query-classifier.js.map +1 -0
  5. package/dist/agent/resilient-provider.js +170 -0
  6. package/dist/agent/resilient-provider.js.map +1 -0
  7. package/dist/agent/response-cache.js +124 -0
  8. package/dist/agent/response-cache.js.map +1 -0
  9. package/dist/agent/semantic-search.js +138 -0
  10. package/dist/agent/semantic-search.js.map +1 -0
  11. package/dist/agent/tool-calls.js +261 -1
  12. package/dist/agent/tool-calls.js.map +1 -1
  13. package/dist/agent/tool-name-alias.js +140 -0
  14. package/dist/agent/tool-name-alias.js.map +1 -0
  15. package/dist/agent.js +146 -43
  16. package/dist/agent.js.map +1 -1
  17. package/dist/anton/controller.js +442 -186
  18. package/dist/anton/controller.js.map +1 -1
  19. package/dist/anton/preflight.js +89 -28
  20. package/dist/anton/preflight.js.map +1 -1
  21. package/dist/anton/prompt.js +20 -0
  22. package/dist/anton/prompt.js.map +1 -1
  23. package/dist/anton/reporter.js +6 -1
  24. package/dist/anton/reporter.js.map +1 -1
  25. package/dist/bot/discord-commands.js +25 -0
  26. package/dist/bot/discord-commands.js.map +1 -1
  27. package/dist/bot/discord.js +15 -0
  28. package/dist/bot/discord.js.map +1 -1
  29. package/dist/bot/telegram-commands.js +21 -0
  30. package/dist/bot/telegram-commands.js.map +1 -1
  31. package/dist/bot/telegram.js +1 -0
  32. package/dist/bot/telegram.js.map +1 -1
  33. package/dist/bot/upgrade-command.js +398 -0
  34. package/dist/bot/upgrade-command.js.map +1 -0
  35. package/dist/bot/ux/discord-renderer.js +5 -21
  36. package/dist/bot/ux/discord-renderer.js.map +1 -1
  37. package/dist/bot/ux/emitter.js +104 -0
  38. package/dist/bot/ux/emitter.js.map +1 -0
  39. package/dist/bot/ux/shared-formatter.js +43 -0
  40. package/dist/bot/ux/shared-formatter.js.map +1 -0
  41. package/dist/bot/ux/telegram-renderer.js +5 -21
  42. package/dist/bot/ux/telegram-renderer.js.map +1 -1
  43. package/dist/cli/commands/upgrade.js +27 -0
  44. package/dist/cli/commands/upgrade.js.map +1 -0
  45. package/dist/client.js +51 -7
  46. package/dist/client.js.map +1 -1
  47. package/dist/harnesses.js +2 -0
  48. package/dist/harnesses.js.map +1 -1
  49. package/dist/index.js +4 -0
  50. package/dist/index.js.map +1 -1
  51. package/dist/model-customization.js +3 -1
  52. package/dist/model-customization.js.map +1 -1
  53. package/dist/security/leak-detector.js +109 -0
  54. package/dist/security/leak-detector.js.map +1 -0
  55. package/dist/security/prompt-guard.js +120 -0
  56. package/dist/security/prompt-guard.js.map +1 -0
  57. package/dist/tui/command-handler.js +2 -0
  58. package/dist/tui/command-handler.js.map +1 -1
  59. package/package.json +1 -1
@@ -11,7 +11,7 @@ import { ensureCleanWorkingTree, getWorkingDiff, commitAll, restoreTrackedChange
11
11
  import { estimateTokens } from '../utils.js';
12
12
  import { acquireAntonLock, releaseAntonLock, touchAntonLock } from './lock.js';
13
13
  import { parseTaskFile, findRunnablePendingTasks, markTaskChecked, insertSubTasks, autoCompleteAncestors, } from './parser.js';
14
- import { ensureAgentsTasksDir, makeUniqueTaskPlanFilename, buildDiscoveryPrompt, parseDiscoveryResult, buildRequirementsReviewPrompt, parseRequirementsReviewResult, ensurePlanFileExistsOrBootstrap, } from './preflight.js';
14
+ import { ensureAgentsTasksDir, makeUniqueTaskPlanFilename, buildDiscoveryPrompt, parseDiscoveryResult, buildRequirementsReviewPrompt, parseRequirementsReviewResult, ensurePlanFileExistsOrBootstrap, assertPlanFileExistsAndNonEmpty, buildDiscoveryRewritePrompt, buildReviewRewritePrompt, FORCE_DISCOVERY_DECISION_PROMPT, FORCE_REVIEW_DECISION_PROMPT, } from './preflight.js';
15
15
  import { buildAntonPrompt, parseAntonResult, classifyTaskComplexity } from './prompt.js';
16
16
  import { formatDryRunPlan } from './reporter.js';
17
17
  import { classifyInfraError, ensureAntonRuntimeReady } from './runtime-ready.js';
@@ -64,6 +64,12 @@ function isL2MissingImplementation(reason) {
64
64
  ];
65
65
  return missingPatterns.some((p) => p.test(reason));
66
66
  }
67
+ function isRecoverablePreflightDiscoveryError(errMsg) {
68
+ return (/preflight-json-missing-object|preflight-discovery-invalid-status|preflight-discovery-invalid-filename|preflight-discovery-filename|preflight-plan-empty|preflight-plan-not-a-file/i.test(errMsg) || /identical call repeated|breaking loop|tool\s+edit_range/i.test(errMsg));
69
+ }
70
+ function isRecoverablePreflightReviewError(errMsg) {
71
+ return /preflight-json-missing-object|preflight-review-invalid-status|preflight-review-invalid-filename|preflight-review-filename|preflight-plan-empty|preflight-plan-not-a-file/i.test(errMsg);
72
+ }
67
73
  /**
68
74
  * Try to read a file's contents for injection into retry context.
69
75
  * Returns null if file doesn't exist or is too large.
@@ -136,6 +142,15 @@ function buildL2EnhancedRetryContext(l2Reason, l2FailCount, projectDir, taskText
136
142
  parts.push('');
137
143
  return parts.join('\n');
138
144
  }
145
+ const ANTON_RESULT_SYSTEM_CONTRACT = `[Anton output contract]
146
+ Every final implementation/decompose answer MUST contain exactly one structured block:
147
+ <anton-result>
148
+ status: done|failed|blocked|decompose
149
+ reason: <optional>
150
+ subtasks:
151
+ - <only when status=decompose>
152
+ </anton-result>
153
+ Do not omit this block.`;
139
154
  const STRUCTURED_RESULT_RECOVERY_PROMPT = `Your previous reply did not include a valid <anton-result> block.
140
155
  Do NOT call tools.
141
156
  Return ONLY this block shape and nothing else:
@@ -152,6 +167,19 @@ function isStructuredResultParseFailure(reason) {
152
167
  reason === 'No status line found in result block' ||
153
168
  reason.startsWith('Unknown status:'));
154
169
  }
170
+ function injectAntonResultContract(session) {
171
+ try {
172
+ const current = String(session.getSystemPrompt?.() ?? '').trim();
173
+ if (!current)
174
+ return;
175
+ if (current.includes('<anton-result>') || current.includes('[Anton output contract]'))
176
+ return;
177
+ session.setSystemPrompt(`${current}\n\n${ANTON_RESULT_SYSTEM_CONTRACT}`);
178
+ }
179
+ catch {
180
+ // best effort
181
+ }
182
+ }
155
183
  export async function runAnton(opts) {
156
184
  const { config, idlehandsConfig, progress, abortSignal, apiKey, vault, lens } = opts;
157
185
  const createSessionFn = opts.createSession || defaultCreateSession;
@@ -315,7 +343,7 @@ export async function runAnton(opts) {
315
343
  if (taskFile.totalCount > config.maxTotalTasks) {
316
344
  break mainLoop;
317
345
  }
318
- // Progress tracking
346
+ // Progress tracking (mutable so onTurnEnd can update currentTurn)
319
347
  const currentProgress = {
320
348
  currentIndex: 0,
321
349
  totalPending: initialPending,
@@ -326,6 +354,8 @@ export async function runAnton(opts) {
326
354
  estimatedRemainingMs: undefined,
327
355
  currentTask: currentTask.text,
328
356
  currentAttempt: (taskRetryCount.get(currentTask.key) || 0) + 1,
357
+ currentTurn: 1,
358
+ maxTurns: config.taskMaxIterations,
329
359
  };
330
360
  // Handle max retries
331
361
  const retries = taskRetryCount.get(currentTask.key) || 0;
@@ -384,183 +414,184 @@ export async function runAnton(opts) {
384
414
  const preflightTotalTries = preflightMaxRetries + 1;
385
415
  let preflightMarkedComplete = false;
386
416
  let discoveryOk = false;
417
+ let discoveryUsedFallbackPlan = false;
387
418
  await ensureAgentsTasksDir(config.projectDir);
388
419
  const plannedFilePath = taskPlanByTaskKey.get(currentTask.key) ?? makeUniqueTaskPlanFilename(config.projectDir);
389
- let discoveryIterationCap = Math.max(1, Math.floor(config.preflightSessionMaxIterations ?? 500));
420
+ // Default to 50 iterations for discovery (was 500 - way too high for a simple JSON check)
421
+ let discoveryIterationCap = Math.max(1, Math.floor(config.preflightSessionMaxIterations ?? 50));
390
422
  let discoveryRetryHint;
391
- // Stage 1: discovery (retry discovery only).
392
- for (let discoveryTry = 0; discoveryTry <= preflightMaxRetries; discoveryTry++) {
393
- const stageStart = Date.now();
394
- const discoveryTimeoutSec = config.preflightDiscoveryTimeoutSec ?? config.taskTimeoutSec;
395
- const discoveryTimeoutMs = discoveryTimeoutSec * 1000;
396
- let discoverySession;
397
- try {
398
- progress.onStage?.('🔎 Discovery: checking if already done...');
399
- discoverySession = await createSessionFn(buildPreflightConfig(idlehandsConfig, config, discoveryTimeoutSec, discoveryIterationCap), apiKey);
400
- const discoveryPrompt = buildDiscoveryPrompt({
401
- task: currentTask,
402
- taskFilePath: config.taskFile,
403
- projectDir: config.projectDir,
404
- planFilePath: plannedFilePath,
405
- retryHint: discoveryRetryHint,
406
- });
407
- let discoveryTimeoutHandle;
408
- const discoveryRes = await Promise.race([
409
- discoverySession.ask(discoveryPrompt).finally(() => clearTimeout(discoveryTimeoutHandle)),
410
- new Promise((_, reject) => {
411
- discoveryTimeoutHandle = setTimeout(() => {
412
- try {
413
- discoverySession?.cancel();
414
- }
415
- catch {
416
- // best effort
417
- }
418
- reject(new Error('preflight-discovery-timeout'));
419
- }, discoveryTimeoutMs);
420
- }),
421
- ]);
422
- const discoveryTokens = discoverySession.usage.prompt + discoverySession.usage.completion;
423
- totalTokens += discoveryTokens;
424
- const discovery = parseDiscoveryResult(discoveryRes.text, config.projectDir);
425
- preflightRecords.push({
426
- taskKey: currentTask.key,
427
- stage: 'discovery',
428
- durationMs: Date.now() - stageStart,
429
- tokensUsed: discoveryTokens,
430
- status: discovery.status,
431
- filename: discovery.filename || undefined,
432
- });
433
- if (discovery.status === 'complete') {
434
- await markTaskChecked(config.taskFile, currentTask.key);
435
- await autoCompleteAncestors(config.taskFile, currentTask.key);
436
- autoCompleted += 1;
437
- progress.onStage?.(`✅ Discovery confirmed already complete: ${currentTask.text}`);
438
- preflightMarkedComplete = true;
439
- discoveryOk = true;
440
- break;
441
- }
442
- const discoveryPlanState = await ensurePlanFileExistsOrBootstrap({
443
- absPath: discovery.filename,
444
- task: currentTask,
445
- source: 'discovery',
446
- });
447
- if (discoveryPlanState === 'bootstrapped') {
448
- progress.onStage?.(`⚠️ Discovery returned a filename but did not write it. Created fallback plan file: ${discovery.filename}`);
449
- }
450
- taskPlanByTaskKey.set(currentTask.key, discovery.filename);
451
- progress.onStage?.(`📝 Discovery plan file: ${discovery.filename}`);
452
- discoveryOk = true;
453
- break;
454
- }
455
- catch (error) {
456
- const errMsg = error instanceof Error ? error.message : String(error);
457
- const timeout = /timeout/i.test(errMsg);
458
- preflightRecords.push({
459
- taskKey: currentTask.key,
460
- stage: 'discovery',
461
- durationMs: Date.now() - stageStart,
462
- tokensUsed: 0,
463
- status: timeout ? 'timeout' : 'error',
464
- error: errMsg,
465
- });
466
- const short = errMsg.length > 180 ? `${errMsg.slice(0, 177)}...` : errMsg;
467
- discoveryRetryHint = `Previous discovery attempt failed: ${short}. Do not edit source files. Only update ${plannedFilePath} and return strict JSON.`;
468
- if (discoveryTry < preflightMaxRetries) {
469
- if (/max iterations exceeded/i.test(errMsg)) {
470
- const nextCap = Math.min(Math.max(discoveryIterationCap * 2, discoveryIterationCap + 2), 1000);
471
- if (nextCap > discoveryIterationCap) {
472
- progress.onStage?.(`⚠️ Discovery hit max iterations (${discoveryIterationCap}). Increasing preflight cap to ${nextCap} and retrying...`);
473
- discoveryIterationCap = nextCap;
474
- continue;
475
- }
476
- }
477
- progress.onStage?.(`⚠️ Discovery failed (${discoveryTry + 1}/${preflightTotalTries}): ${short}. Retrying discovery...`);
478
- continue;
479
- }
480
- // Final discovery failure: degrade gracefully by bootstrapping a fallback plan file
481
- // so Anton can still proceed to implementation/review instead of hard-failing task 1.
482
- const fallbackState = await ensurePlanFileExistsOrBootstrap({
483
- absPath: plannedFilePath,
484
- task: currentTask,
485
- source: 'discovery',
486
- });
487
- if (fallbackState === 'bootstrapped') {
488
- progress.onStage?.(`⚠️ Discovery failed after ${preflightTotalTries} tries (${short}). Bootstrapped fallback plan and continuing: ${plannedFilePath}`);
489
- }
490
- else {
491
- progress.onStage?.(`⚠️ Discovery failed after ${preflightTotalTries} tries (${short}). Reusing existing plan and continuing: ${plannedFilePath}`);
492
- }
493
- taskPlanByTaskKey.set(currentTask.key, plannedFilePath);
494
- discoveryOk = true;
495
- break;
496
- }
497
- finally {
423
+ // Shared preflight session - reused between discovery and review stages to avoid
424
+ // session creation overhead. Created lazily, closed on error (for fresh retry state)
425
+ // or at end of preflight block.
426
+ let preflightSession;
427
+ const closePreflightSession = async () => {
428
+ if (preflightSession) {
498
429
  try {
499
- await discoverySession?.close();
430
+ await preflightSession.close();
500
431
  }
501
432
  catch {
502
433
  // best effort
503
434
  }
435
+ preflightSession = undefined;
504
436
  }
505
- }
506
- // Discovery already marked complete -> next task.
507
- if (preflightMarkedComplete) {
508
- continue;
509
- }
510
- if (!discoveryOk) {
511
- continue;
512
- }
513
- // Stage 2: requirements review (retry review only; keep same plan file).
514
- if (config.preflightRequirementsReview) {
515
- const reviewPlanFile = taskPlanByTaskKey.get(currentTask.key) ?? plannedFilePath;
516
- let reviewOk = false;
517
- let reviewIterationCap = Math.max(1, Math.floor(config.preflightSessionMaxIterations ?? 500));
518
- for (let reviewTry = 0; reviewTry <= preflightMaxRetries; reviewTry++) {
437
+ };
438
+ try {
439
+ // Stage 1: discovery (retry discovery only).
440
+ for (let discoveryTry = 0; discoveryTry <= preflightMaxRetries; discoveryTry++) {
519
441
  const stageStart = Date.now();
520
- const reviewTimeoutSec = config.preflightReviewTimeoutSec ?? config.taskTimeoutSec;
521
- const reviewTimeoutMs = reviewTimeoutSec * 1000;
522
- let reviewSession;
442
+ const discoveryTimeoutSec = config.preflightDiscoveryTimeoutSec ?? config.taskTimeoutSec;
443
+ const discoveryTimeoutMs = discoveryTimeoutSec * 1000;
523
444
  try {
524
- progress.onStage?.('🧪 Requirements review: refining plan...');
525
- reviewSession = await createSessionFn(buildPreflightConfig(idlehandsConfig, config, reviewTimeoutSec, reviewIterationCap), apiKey);
526
- const reviewPrompt = buildRequirementsReviewPrompt(reviewPlanFile);
527
- let reviewTimeoutHandle;
528
- const reviewRes = await Promise.race([
529
- reviewSession.ask(reviewPrompt).finally(() => clearTimeout(reviewTimeoutHandle)),
445
+ progress.onStage?.('🔎 Discovery: checking if already done...');
446
+ // Create session if not already open (first try or after error closed it)
447
+ if (!preflightSession) {
448
+ preflightSession = await createSessionFn(buildPreflightConfig(idlehandsConfig, config, discoveryTimeoutSec, discoveryIterationCap), apiKey);
449
+ }
450
+ const discoveryPrompt = buildDiscoveryPrompt({
451
+ task: currentTask,
452
+ taskFilePath: config.taskFile,
453
+ projectDir: config.projectDir,
454
+ planFilePath: plannedFilePath,
455
+ retryHint: discoveryRetryHint,
456
+ });
457
+ let discoveryTimeoutHandle;
458
+ const discoveryRes = await Promise.race([
459
+ preflightSession.ask(discoveryPrompt).finally(() => clearTimeout(discoveryTimeoutHandle)),
530
460
  new Promise((_, reject) => {
531
- reviewTimeoutHandle = setTimeout(() => {
461
+ discoveryTimeoutHandle = setTimeout(() => {
532
462
  try {
533
- reviewSession?.cancel();
463
+ preflightSession?.cancel();
534
464
  }
535
465
  catch {
536
466
  // best effort
537
467
  }
538
- reject(new Error('preflight-review-timeout'));
539
- }, reviewTimeoutMs);
468
+ reject(new Error('preflight-discovery-timeout'));
469
+ }, discoveryTimeoutMs);
540
470
  }),
541
471
  ]);
542
- const reviewTokens = reviewSession.usage.prompt + reviewSession.usage.completion;
543
- totalTokens += reviewTokens;
544
- const review = parseRequirementsReviewResult(reviewRes.text, config.projectDir);
545
- const reviewPlanState = await ensurePlanFileExistsOrBootstrap({
546
- absPath: review.filename,
547
- task: currentTask,
548
- source: 'requirements-review',
549
- });
550
- if (reviewPlanState === 'bootstrapped') {
551
- progress.onStage?.(`⚠️ Requirements review returned a filename but did not write it. Created fallback plan file: ${review.filename}`);
472
+ let discoveryTokens = preflightSession.usage.prompt + preflightSession.usage.completion;
473
+ totalTokens += discoveryTokens;
474
+ // Try to parse discovery result; if invalid JSON, attempt force-decision prompt
475
+ let discovery;
476
+ try {
477
+ discovery = parseDiscoveryResult(discoveryRes.text, config.projectDir);
478
+ }
479
+ catch (parseError) {
480
+ const parseErrMsg = parseError instanceof Error ? parseError.message : String(parseError);
481
+ // Only try force-decision for JSON/format errors, not file path errors
482
+ if (/preflight-json-missing-object|preflight-discovery-invalid/i.test(parseErrMsg)) {
483
+ progress.onStage?.('⚠️ Discovery output invalid, requesting forced decision...');
484
+ try {
485
+ const forceRes = await preflightSession.ask(FORCE_DISCOVERY_DECISION_PROMPT);
486
+ const forceTokens = preflightSession.usage.prompt + preflightSession.usage.completion - discoveryTokens;
487
+ discoveryTokens += forceTokens;
488
+ totalTokens += forceTokens;
489
+ discovery = parseDiscoveryResult(forceRes.text, config.projectDir);
490
+ progress.onStage?.('✅ Forced decision succeeded');
491
+ }
492
+ catch {
493
+ // Force-decision also failed, throw original error
494
+ throw parseError;
495
+ }
496
+ }
497
+ else {
498
+ throw parseError;
499
+ }
500
+ }
501
+ if (discovery.status === 'complete') {
502
+ preflightRecords.push({
503
+ taskKey: currentTask.key,
504
+ stage: 'discovery',
505
+ durationMs: Date.now() - stageStart,
506
+ tokensUsed: discoveryTokens,
507
+ status: discovery.status,
508
+ filename: discovery.filename || undefined,
509
+ });
510
+ await markTaskChecked(config.taskFile, currentTask.key);
511
+ await autoCompleteAncestors(config.taskFile, currentTask.key);
512
+ autoCompleted += 1;
513
+ progress.onStage?.(`✅ Discovery confirmed already complete: ${currentTask.text}`);
514
+ preflightMarkedComplete = true;
515
+ discoveryOk = true;
516
+ // No review needed - close session now
517
+ await closePreflightSession();
518
+ break;
519
+ }
520
+ // If the model returned incomplete+filename without making any tool calls,
521
+ // it almost certainly hallucinated the file write. Immediately ask it to
522
+ // actually write the file before we even check the filesystem.
523
+ if (discoveryRes.toolCalls === 0) {
524
+ progress.onStage?.('⚠️ Discovery returned filename but made no tool calls — forcing write...');
525
+ const writeRes = await preflightSession.ask(buildDiscoveryRewritePrompt(discovery.filename, 'file was never written (no tool calls)'));
526
+ const writeTokens = preflightSession.usage.prompt + preflightSession.usage.completion - discoveryTokens;
527
+ discoveryTokens += writeTokens;
528
+ totalTokens += writeTokens;
529
+ try {
530
+ const rewritten = parseDiscoveryResult(writeRes.text, config.projectDir);
531
+ if (rewritten.status === 'incomplete' && rewritten.filename) {
532
+ discovery = rewritten;
533
+ }
534
+ }
535
+ catch {
536
+ // keep original discovery.filename; validation below will handle it
537
+ }
538
+ }
539
+ // Discovery claims a plan filename; verify it truly exists and has content.
540
+ // If missing/empty, explicitly ask model to retry writing before accepting success.
541
+ let planPath = discovery.filename;
542
+ for (let writeFixTry = 0; writeFixTry < 2; writeFixTry++) {
543
+ try {
544
+ await assertPlanFileExistsAndNonEmpty(planPath);
545
+ break;
546
+ }
547
+ catch (planErr) {
548
+ const planMsg = planErr instanceof Error ? planErr.message : String(planErr);
549
+ const reason = /preflight-plan-empty/i.test(planMsg)
550
+ ? 'empty file'
551
+ : /preflight-plan-not-a-file/i.test(planMsg)
552
+ ? 'not a regular file'
553
+ : /ENOENT/i.test(planMsg)
554
+ ? 'missing file'
555
+ : planMsg;
556
+ if (writeFixTry === 0) {
557
+ progress.onStage?.(`⚠️ Discovery returned filename but file is invalid (${reason}). Asking model to rewrite plan file...`);
558
+ const rewriteRes = await preflightSession.ask(buildDiscoveryRewritePrompt(planPath, reason));
559
+ const rewriteTokens = preflightSession.usage.prompt + preflightSession.usage.completion - discoveryTokens;
560
+ discoveryTokens += rewriteTokens;
561
+ totalTokens += rewriteTokens;
562
+ try {
563
+ const rewritten = parseDiscoveryResult(rewriteRes.text, config.projectDir);
564
+ if (rewritten.status === 'incomplete') {
565
+ planPath = rewritten.filename;
566
+ }
567
+ }
568
+ catch {
569
+ // Keep original planPath; second validation pass will fail and route to fallback.
570
+ }
571
+ continue;
572
+ }
573
+ const discoveryPlanState = await ensurePlanFileExistsOrBootstrap({
574
+ absPath: planPath,
575
+ task: currentTask,
576
+ source: 'discovery',
577
+ });
578
+ if (discoveryPlanState === 'bootstrapped') {
579
+ discoveryUsedFallbackPlan = true;
580
+ progress.onStage?.(`⚠️ Discovery returned a filename but did not write valid contents. Created fallback plan file: ${planPath}`);
581
+ }
582
+ }
552
583
  }
553
584
  preflightRecords.push({
554
585
  taskKey: currentTask.key,
555
- stage: 'requirements-review',
586
+ stage: 'discovery',
556
587
  durationMs: Date.now() - stageStart,
557
- tokensUsed: reviewTokens,
558
- status: 'ready',
559
- filename: review.filename,
588
+ tokensUsed: discoveryTokens,
589
+ status: discovery.status,
590
+ filename: planPath || undefined,
560
591
  });
561
- taskPlanByTaskKey.set(currentTask.key, review.filename);
562
- progress.onStage?.(`✅ Requirements review ready: ${review.filename}`);
563
- reviewOk = true;
592
+ taskPlanByTaskKey.set(currentTask.key, planPath);
593
+ progress.onStage?.(`📝 Discovery plan file: ${planPath}`);
594
+ discoveryOk = true;
564
595
  break;
565
596
  }
566
597
  catch (error) {
@@ -568,53 +599,272 @@ export async function runAnton(opts) {
568
599
  const timeout = /timeout/i.test(errMsg);
569
600
  preflightRecords.push({
570
601
  taskKey: currentTask.key,
571
- stage: 'requirements-review',
602
+ stage: 'discovery',
572
603
  durationMs: Date.now() - stageStart,
573
604
  tokensUsed: 0,
574
605
  status: timeout ? 'timeout' : 'error',
575
606
  error: errMsg,
576
607
  });
577
- if (reviewTry < preflightMaxRetries) {
578
- const short = errMsg.length > 180 ? `${errMsg.slice(0, 177)}...` : errMsg;
608
+ const short = errMsg.length > 180 ? `${errMsg.slice(0, 177)}...` : errMsg;
609
+ discoveryRetryHint = `Previous discovery attempt failed: ${short}. Do not edit source files. Only update ${plannedFilePath} and return strict JSON.`;
610
+ // If discovery returns malformed/non-JSON output (or loops on source edits),
611
+ // degrade immediately to fallback plan instead of burning retries.
612
+ if (isRecoverablePreflightDiscoveryError(errMsg)) {
613
+ discoveryUsedFallbackPlan = true;
614
+ const fallbackState = await ensurePlanFileExistsOrBootstrap({
615
+ absPath: plannedFilePath,
616
+ task: currentTask,
617
+ source: 'discovery',
618
+ });
619
+ if (fallbackState === 'bootstrapped') {
620
+ progress.onStage?.(`⚠️ Discovery returned invalid output (${short}). Bootstrapped fallback plan and continuing: ${plannedFilePath}`);
621
+ }
622
+ else {
623
+ progress.onStage?.(`⚠️ Discovery returned invalid output (${short}). Reusing existing plan and continuing: ${plannedFilePath}`);
624
+ }
625
+ taskPlanByTaskKey.set(currentTask.key, plannedFilePath);
626
+ discoveryOk = true;
627
+ break;
628
+ }
629
+ if (discoveryTry < preflightMaxRetries) {
630
+ // Close session on error so retry gets fresh state
631
+ await closePreflightSession();
579
632
  if (/max iterations exceeded/i.test(errMsg)) {
580
- const nextCap = Math.min(Math.max(reviewIterationCap * 2, reviewIterationCap + 2), 1000);
581
- if (nextCap > reviewIterationCap) {
582
- progress.onStage?.(`⚠️ Requirements review hit max iterations (${reviewIterationCap}). Increasing preflight cap to ${nextCap} and retrying...`);
583
- reviewIterationCap = nextCap;
633
+ const nextCap = Math.min(Math.max(discoveryIterationCap * 2, discoveryIterationCap + 2), 1000);
634
+ if (nextCap > discoveryIterationCap) {
635
+ progress.onStage?.(`⚠️ Discovery hit max iterations (${discoveryIterationCap}). Increasing preflight cap to ${nextCap} and retrying...`);
636
+ discoveryIterationCap = nextCap;
584
637
  continue;
585
638
  }
586
639
  }
587
- progress.onStage?.(`⚠️ Requirements review failed (${reviewTry + 1}/${preflightTotalTries}): ${short}. Retrying review with existing plan file...`);
640
+ progress.onStage?.(`⚠️ Discovery failed (${discoveryTry + 1}/${preflightTotalTries}): ${short}. Retrying discovery...`);
588
641
  continue;
589
642
  }
590
- const preflightAttempt = {
591
- taskKey: currentTask.key,
592
- taskText: currentTask.text,
593
- attempt: attemptNumber,
594
- durationMs: Date.now() - stageStart,
595
- tokensUsed: 0,
596
- status: timeout ? 'timeout' : 'error',
597
- verification: undefined,
598
- error: `preflight-error(requirements-review): ${errMsg}`,
599
- commitHash: undefined,
600
- };
601
- attempts.push(preflightAttempt);
602
- taskRetryCount.set(currentTask.key, retries + 1);
603
- if (!config.skipOnFail)
604
- break mainLoop;
605
- }
606
- finally {
607
- try {
608
- await reviewSession?.close();
643
+ // Final discovery failure: degrade gracefully by bootstrapping a fallback plan file
644
+ // so Anton can still proceed to implementation/review instead of hard-failing task 1.
645
+ discoveryUsedFallbackPlan = true;
646
+ const fallbackState = await ensurePlanFileExistsOrBootstrap({
647
+ absPath: plannedFilePath,
648
+ task: currentTask,
649
+ source: 'discovery',
650
+ });
651
+ if (fallbackState === 'bootstrapped') {
652
+ progress.onStage?.(`⚠️ Discovery failed after ${preflightTotalTries} tries (${short}). Bootstrapped fallback plan and continuing: ${plannedFilePath}`);
609
653
  }
610
- catch {
611
- // best effort
654
+ else {
655
+ progress.onStage?.(`⚠️ Discovery failed after ${preflightTotalTries} tries (${short}). Reusing existing plan and continuing: ${plannedFilePath}`);
612
656
  }
657
+ taskPlanByTaskKey.set(currentTask.key, plannedFilePath);
658
+ discoveryOk = true;
659
+ break;
613
660
  }
661
+ // Note: session stays open for reuse in review stage (closed at end of preflight block)
662
+ }
663
+ // Discovery already marked complete -> next task.
664
+ if (preflightMarkedComplete) {
665
+ continue;
614
666
  }
615
- if (!reviewOk) {
667
+ if (!discoveryOk) {
616
668
  continue;
617
669
  }
670
+ // Stage 2: requirements review (retry review only; keep same plan file).
671
+ // NOTE: Discovery prompt now includes review instructions, producing a "reviewed" plan.
672
+ // Separate review stage is skipped by default to save an LLM round-trip.
673
+ // Set preflightRequirementsReview=true AND preflightSeparateReview=true to force separate review.
674
+ const skipSeparateReview = !config.preflightSeparateReview;
675
+ const forceSeparateReview = config.preflightRequirementsReview && discoveryUsedFallbackPlan;
676
+ if (forceSeparateReview && skipSeparateReview) {
677
+ progress.onStage?.('⚠️ Discovery used a fallback plan; forcing separate requirements review before implementation...');
678
+ }
679
+ if (config.preflightRequirementsReview && (!skipSeparateReview || forceSeparateReview)) {
680
+ const reviewPlanFile = taskPlanByTaskKey.get(currentTask.key) ?? plannedFilePath;
681
+ let reviewOk = false;
682
+ // Default to 30 iterations for review (simpler than discovery, just refining existing plan)
683
+ let reviewIterationCap = Math.max(1, Math.floor(config.preflightSessionMaxIterations ?? 30));
684
+ for (let reviewTry = 0; reviewTry <= preflightMaxRetries; reviewTry++) {
685
+ const stageStart = Date.now();
686
+ const reviewTimeoutSec = config.preflightReviewTimeoutSec ?? config.taskTimeoutSec;
687
+ const reviewTimeoutMs = reviewTimeoutSec * 1000;
688
+ try {
689
+ progress.onStage?.('🧪 Requirements review: refining plan...');
690
+ // Reuse preflight session from discovery, or create new one if needed (e.g., after error)
691
+ if (!preflightSession) {
692
+ preflightSession = await createSessionFn(buildPreflightConfig(idlehandsConfig, config, reviewTimeoutSec, reviewIterationCap), apiKey);
693
+ }
694
+ const reviewPrompt = buildRequirementsReviewPrompt(reviewPlanFile);
695
+ let reviewTimeoutHandle;
696
+ const reviewRes = await Promise.race([
697
+ preflightSession.ask(reviewPrompt).finally(() => clearTimeout(reviewTimeoutHandle)),
698
+ new Promise((_, reject) => {
699
+ reviewTimeoutHandle = setTimeout(() => {
700
+ try {
701
+ preflightSession?.cancel();
702
+ }
703
+ catch {
704
+ // best effort
705
+ }
706
+ reject(new Error('preflight-review-timeout'));
707
+ }, reviewTimeoutMs);
708
+ }),
709
+ ]);
710
+ let reviewTokens = preflightSession.usage.prompt + preflightSession.usage.completion;
711
+ totalTokens += reviewTokens;
712
+ // Try to parse review result; if invalid JSON, attempt force-decision prompt
713
+ let review;
714
+ try {
715
+ review = parseRequirementsReviewResult(reviewRes.text, config.projectDir);
716
+ }
717
+ catch (parseError) {
718
+ const parseErrMsg = parseError instanceof Error ? parseError.message : String(parseError);
719
+ // Only try force-decision for JSON/format errors
720
+ if (/preflight-json-missing-object|preflight-review-invalid/i.test(parseErrMsg)) {
721
+ progress.onStage?.('⚠️ Review output invalid, requesting forced decision...');
722
+ try {
723
+ const forceRes = await preflightSession.ask(FORCE_REVIEW_DECISION_PROMPT);
724
+ const forceTokens = preflightSession.usage.prompt + preflightSession.usage.completion - reviewTokens;
725
+ reviewTokens += forceTokens;
726
+ totalTokens += forceTokens;
727
+ review = parseRequirementsReviewResult(forceRes.text, config.projectDir);
728
+ progress.onStage?.('✅ Forced decision succeeded');
729
+ }
730
+ catch (forceError) {
731
+ // Force-decision also failed, throw original error
732
+ throw parseError;
733
+ }
734
+ }
735
+ else {
736
+ throw parseError;
737
+ }
738
+ }
739
+ let reviewedPlanPath = review.filename;
740
+ for (let writeFixTry = 0; writeFixTry < 2; writeFixTry++) {
741
+ try {
742
+ await assertPlanFileExistsAndNonEmpty(reviewedPlanPath);
743
+ break;
744
+ }
745
+ catch (planErr) {
746
+ const planMsg = planErr instanceof Error ? planErr.message : String(planErr);
747
+ const reason = /preflight-plan-empty/i.test(planMsg)
748
+ ? 'empty file'
749
+ : /preflight-plan-not-a-file/i.test(planMsg)
750
+ ? 'not a regular file'
751
+ : /ENOENT/i.test(planMsg)
752
+ ? 'missing file'
753
+ : planMsg;
754
+ if (writeFixTry === 0) {
755
+ progress.onStage?.(`⚠️ Requirements review returned filename but file is invalid (${reason}). Asking model to rewrite plan file...`);
756
+ const rewriteRes = await preflightSession.ask(buildReviewRewritePrompt(reviewedPlanPath, reason));
757
+ const rewriteTokens = preflightSession.usage.prompt + preflightSession.usage.completion - reviewTokens;
758
+ reviewTokens += rewriteTokens;
759
+ totalTokens += rewriteTokens;
760
+ try {
761
+ const rewritten = parseRequirementsReviewResult(rewriteRes.text, config.projectDir);
762
+ if (rewritten.status === 'ready') {
763
+ reviewedPlanPath = rewritten.filename;
764
+ }
765
+ }
766
+ catch {
767
+ // Keep existing path; second validation pass decides fallback.
768
+ }
769
+ continue;
770
+ }
771
+ const reviewPlanState = await ensurePlanFileExistsOrBootstrap({
772
+ absPath: reviewedPlanPath,
773
+ task: currentTask,
774
+ source: 'requirements-review',
775
+ });
776
+ if (reviewPlanState === 'bootstrapped') {
777
+ progress.onStage?.(`⚠️ Requirements review returned a filename but did not write valid contents. Created fallback plan file: ${reviewedPlanPath}`);
778
+ }
779
+ }
780
+ }
781
+ preflightRecords.push({
782
+ taskKey: currentTask.key,
783
+ stage: 'requirements-review',
784
+ durationMs: Date.now() - stageStart,
785
+ tokensUsed: reviewTokens,
786
+ status: 'ready',
787
+ filename: reviewedPlanPath,
788
+ });
789
+ taskPlanByTaskKey.set(currentTask.key, reviewedPlanPath);
790
+ progress.onStage?.(`✅ Requirements review ready: ${reviewedPlanPath}`);
791
+ reviewOk = true;
792
+ break;
793
+ }
794
+ catch (error) {
795
+ const errMsg = error instanceof Error ? error.message : String(error);
796
+ const timeout = /timeout/i.test(errMsg);
797
+ preflightRecords.push({
798
+ taskKey: currentTask.key,
799
+ stage: 'requirements-review',
800
+ durationMs: Date.now() - stageStart,
801
+ tokensUsed: 0,
802
+ status: timeout ? 'timeout' : 'error',
803
+ error: errMsg,
804
+ });
805
+ const short = errMsg.length > 180 ? `${errMsg.slice(0, 177)}...` : errMsg;
806
+ // If review returns malformed/non-JSON output, keep moving with existing plan
807
+ // only when discovery already produced a real plan. If discovery used fallback,
808
+ // require a valid review result before proceeding to implementation.
809
+ if (isRecoverablePreflightReviewError(errMsg)) {
810
+ if (!forceSeparateReview) {
811
+ const fallbackState = await ensurePlanFileExistsOrBootstrap({
812
+ absPath: reviewPlanFile,
813
+ task: currentTask,
814
+ source: 'requirements-review',
815
+ });
816
+ if (fallbackState === 'bootstrapped') {
817
+ progress.onStage?.(`⚠️ Requirements review returned invalid output (${short}). Bootstrapped fallback plan and continuing: ${reviewPlanFile}`);
818
+ }
819
+ else {
820
+ progress.onStage?.(`⚠️ Requirements review returned invalid output (${short}). Reusing existing plan and continuing: ${reviewPlanFile}`);
821
+ }
822
+ taskPlanByTaskKey.set(currentTask.key, reviewPlanFile);
823
+ reviewOk = true;
824
+ break;
825
+ }
826
+ progress.onStage?.(`⚠️ Requirements review returned invalid output (${short}). Discovery fallback plan requires a valid review, retrying...`);
827
+ }
828
+ if (reviewTry < preflightMaxRetries) {
829
+ // Close session on error so retry gets fresh state
830
+ await closePreflightSession();
831
+ if (/max iterations exceeded/i.test(errMsg)) {
832
+ const nextCap = Math.min(Math.max(reviewIterationCap * 2, reviewIterationCap + 2), 1000);
833
+ if (nextCap > reviewIterationCap) {
834
+ progress.onStage?.(`⚠️ Requirements review hit max iterations (${reviewIterationCap}). Increasing preflight cap to ${nextCap} and retrying...`);
835
+ reviewIterationCap = nextCap;
836
+ continue;
837
+ }
838
+ }
839
+ progress.onStage?.(`⚠️ Requirements review failed (${reviewTry + 1}/${preflightTotalTries}): ${short}. Retrying review with existing plan file...`);
840
+ continue;
841
+ }
842
+ const preflightAttempt = {
843
+ taskKey: currentTask.key,
844
+ taskText: currentTask.text,
845
+ attempt: attemptNumber,
846
+ durationMs: Date.now() - stageStart,
847
+ tokensUsed: 0,
848
+ status: timeout ? 'timeout' : 'error',
849
+ verification: undefined,
850
+ error: `preflight-error(requirements-review): ${errMsg}`,
851
+ commitHash: undefined,
852
+ };
853
+ attempts.push(preflightAttempt);
854
+ taskRetryCount.set(currentTask.key, retries + 1);
855
+ if (!config.skipOnFail)
856
+ break mainLoop;
857
+ }
858
+ // Note: session stays open, will be closed at end of preflight block
859
+ }
860
+ if (!reviewOk) {
861
+ continue;
862
+ }
863
+ }
864
+ }
865
+ finally {
866
+ // Always close preflight session at end of preflight block
867
+ await closePreflightSession();
618
868
  }
619
869
  }
620
870
  progress.onStage?.('🛠️ Implementation: executing vetted plan...');
@@ -631,6 +881,7 @@ export async function runAnton(opts) {
631
881
  : buildSessionConfig(idlehandsConfig, config);
632
882
  console.error(`[anton:debug] task="${currentTask.text}" depth=${currentTask.depth} complexity=${taskComplexity} isComplexDecompose=${isComplexDecompose} no_tools=${!!sessionConfig.no_tools} max_iterations=${sessionConfig.max_iterations}`);
633
883
  session = await createSessionFn(sessionConfig, apiKey);
884
+ injectAntonResultContract(session);
634
885
  // Set up timeout + stop propagation for the currently running attempt.
635
886
  // /anton stop flips abortSignal.aborted; we poll that and cancel session.ask immediately
636
887
  // instead of waiting for the task attempt to naturally finish.
@@ -675,6 +926,8 @@ export async function runAnton(opts) {
675
926
  vault,
676
927
  lens,
677
928
  maxContextTokens: idlehandsConfig.context_max_tokens || 8000,
929
+ currentTurn: 1,
930
+ maxIterations: config.taskMaxIterations,
678
931
  });
679
932
  const promptText = typeof prompt === 'string' ? prompt : JSON.stringify(prompt);
680
933
  estimatedPromptTokens = estimateTokens(promptText);
@@ -732,7 +985,10 @@ export async function runAnton(opts) {
732
985
  },
733
986
  onTurnEnd: (stats) => {
734
987
  const tokens = session ? session.usage.prompt + session.usage.completion : 0;
735
- console.error(`[anton:turn] task="${currentTask.text.slice(0, 40)}" turn=${stats.turn} toolCalls=${stats.toolCalls} tokens=${tokens}`);
988
+ // Update progress with current turn so heartbeats can report it
989
+ currentProgress.currentTurn = stats.turn;
990
+ currentProgress.elapsedMs = Date.now() - startTimeMs;
991
+ console.error(`[anton:turn] task="${currentTask.text.slice(0, 40)}" turn=${stats.turn}/${config.taskMaxIterations} toolCalls=${stats.toolCalls} tokens=${tokens}`);
736
992
  },
737
993
  };
738
994
  let toolLoopRetries = 0;