@visorcraft/idlehands 2.0.1 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,7 +11,7 @@ import { ensureCleanWorkingTree, getWorkingDiff, commitAll, restoreTrackedChange
11
11
  import { estimateTokens } from '../utils.js';
12
12
  import { acquireAntonLock, releaseAntonLock, touchAntonLock } from './lock.js';
13
13
  import { parseTaskFile, findRunnablePendingTasks, markTaskChecked, insertSubTasks, autoCompleteAncestors, } from './parser.js';
14
- import { ensureAgentsTasksDir, makeUniqueTaskPlanFilename, buildDiscoveryPrompt, parseDiscoveryResult, buildRequirementsReviewPrompt, parseRequirementsReviewResult, ensurePlanFileExistsOrBootstrap, } from './preflight.js';
14
+ import { ensureAgentsTasksDir, makeUniqueTaskPlanFilename, buildDiscoveryPrompt, parseDiscoveryResult, buildRequirementsReviewPrompt, parseRequirementsReviewResult, ensurePlanFileExistsOrBootstrap, FORCE_DISCOVERY_DECISION_PROMPT, FORCE_REVIEW_DECISION_PROMPT, } from './preflight.js';
15
15
  import { buildAntonPrompt, parseAntonResult, classifyTaskComplexity } from './prompt.js';
16
16
  import { formatDryRunPlan } from './reporter.js';
17
17
  import { classifyInfraError, ensureAntonRuntimeReady } from './runtime-ready.js';
@@ -64,6 +64,12 @@ function isL2MissingImplementation(reason) {
64
64
  ];
65
65
  return missingPatterns.some((p) => p.test(reason));
66
66
  }
67
+ function isRecoverablePreflightDiscoveryError(errMsg) {
68
+ return (/preflight-json-missing-object|preflight-discovery-invalid-status|preflight-discovery-invalid-filename|preflight-discovery-filename/i.test(errMsg) || /identical call repeated|breaking loop|tool\s+edit_range/i.test(errMsg));
69
+ }
70
+ function isRecoverablePreflightReviewError(errMsg) {
71
+ return /preflight-json-missing-object|preflight-review-invalid-status|preflight-review-invalid-filename|preflight-review-filename/i.test(errMsg);
72
+ }
67
73
  /**
68
74
  * Try to read a file's contents for injection into retry context.
69
75
  * Returns null if file doesn't exist or is too large.
@@ -136,6 +142,15 @@ function buildL2EnhancedRetryContext(l2Reason, l2FailCount, projectDir, taskText
136
142
  parts.push('');
137
143
  return parts.join('\n');
138
144
  }
145
+ const ANTON_RESULT_SYSTEM_CONTRACT = `[Anton output contract]
146
+ Every final implementation/decompose answer MUST contain exactly one structured block:
147
+ <anton-result>
148
+ status: done|failed|blocked|decompose
149
+ reason: <optional>
150
+ subtasks:
151
+ - <only when status=decompose>
152
+ </anton-result>
153
+ Do not omit this block.`;
139
154
  const STRUCTURED_RESULT_RECOVERY_PROMPT = `Your previous reply did not include a valid <anton-result> block.
140
155
  Do NOT call tools.
141
156
  Return ONLY this block shape and nothing else:
@@ -152,6 +167,19 @@ function isStructuredResultParseFailure(reason) {
152
167
  reason === 'No status line found in result block' ||
153
168
  reason.startsWith('Unknown status:'));
154
169
  }
170
+ function injectAntonResultContract(session) {
171
+ try {
172
+ const current = String(session.getSystemPrompt?.() ?? '').trim();
173
+ if (!current)
174
+ return;
175
+ if (current.includes('<anton-result>') || current.includes('[Anton output contract]'))
176
+ return;
177
+ session.setSystemPrompt(`${current}\n\n${ANTON_RESULT_SYSTEM_CONTRACT}`);
178
+ }
179
+ catch {
180
+ // best effort
181
+ }
182
+ }
155
183
  export async function runAnton(opts) {
156
184
  const { config, idlehandsConfig, progress, abortSignal, apiKey, vault, lens } = opts;
157
185
  const createSessionFn = opts.createSession || defaultCreateSession;
@@ -386,181 +414,117 @@ export async function runAnton(opts) {
386
414
  let discoveryOk = false;
387
415
  await ensureAgentsTasksDir(config.projectDir);
388
416
  const plannedFilePath = taskPlanByTaskKey.get(currentTask.key) ?? makeUniqueTaskPlanFilename(config.projectDir);
389
- let discoveryIterationCap = Math.max(1, Math.floor(config.preflightSessionMaxIterations ?? 500));
417
+ // Default to 50 iterations for discovery (was 500 - way too high for a simple JSON check)
418
+ let discoveryIterationCap = Math.max(1, Math.floor(config.preflightSessionMaxIterations ?? 50));
390
419
  let discoveryRetryHint;
391
- // Stage 1: discovery (retry discovery only).
392
- for (let discoveryTry = 0; discoveryTry <= preflightMaxRetries; discoveryTry++) {
393
- const stageStart = Date.now();
394
- const discoveryTimeoutSec = config.preflightDiscoveryTimeoutSec ?? config.taskTimeoutSec;
395
- const discoveryTimeoutMs = discoveryTimeoutSec * 1000;
396
- let discoverySession;
397
- try {
398
- progress.onStage?.('🔎 Discovery: checking if already done...');
399
- discoverySession = await createSessionFn(buildPreflightConfig(idlehandsConfig, config, discoveryTimeoutSec, discoveryIterationCap), apiKey);
400
- const discoveryPrompt = buildDiscoveryPrompt({
401
- task: currentTask,
402
- taskFilePath: config.taskFile,
403
- projectDir: config.projectDir,
404
- planFilePath: plannedFilePath,
405
- retryHint: discoveryRetryHint,
406
- });
407
- let discoveryTimeoutHandle;
408
- const discoveryRes = await Promise.race([
409
- discoverySession.ask(discoveryPrompt).finally(() => clearTimeout(discoveryTimeoutHandle)),
410
- new Promise((_, reject) => {
411
- discoveryTimeoutHandle = setTimeout(() => {
412
- try {
413
- discoverySession?.cancel();
414
- }
415
- catch {
416
- // best effort
417
- }
418
- reject(new Error('preflight-discovery-timeout'));
419
- }, discoveryTimeoutMs);
420
- }),
421
- ]);
422
- const discoveryTokens = discoverySession.usage.prompt + discoverySession.usage.completion;
423
- totalTokens += discoveryTokens;
424
- const discovery = parseDiscoveryResult(discoveryRes.text, config.projectDir);
425
- preflightRecords.push({
426
- taskKey: currentTask.key,
427
- stage: 'discovery',
428
- durationMs: Date.now() - stageStart,
429
- tokensUsed: discoveryTokens,
430
- status: discovery.status,
431
- filename: discovery.filename || undefined,
432
- });
433
- if (discovery.status === 'complete') {
434
- await markTaskChecked(config.taskFile, currentTask.key);
435
- await autoCompleteAncestors(config.taskFile, currentTask.key);
436
- autoCompleted += 1;
437
- progress.onStage?.(`✅ Discovery confirmed already complete: ${currentTask.text}`);
438
- preflightMarkedComplete = true;
439
- discoveryOk = true;
440
- break;
441
- }
442
- const discoveryPlanState = await ensurePlanFileExistsOrBootstrap({
443
- absPath: discovery.filename,
444
- task: currentTask,
445
- source: 'discovery',
446
- });
447
- if (discoveryPlanState === 'bootstrapped') {
448
- progress.onStage?.(`⚠️ Discovery returned a filename but did not write it. Created fallback plan file: ${discovery.filename}`);
449
- }
450
- taskPlanByTaskKey.set(currentTask.key, discovery.filename);
451
- progress.onStage?.(`📝 Discovery plan file: ${discovery.filename}`);
452
- discoveryOk = true;
453
- break;
454
- }
455
- catch (error) {
456
- const errMsg = error instanceof Error ? error.message : String(error);
457
- const timeout = /timeout/i.test(errMsg);
458
- preflightRecords.push({
459
- taskKey: currentTask.key,
460
- stage: 'discovery',
461
- durationMs: Date.now() - stageStart,
462
- tokensUsed: 0,
463
- status: timeout ? 'timeout' : 'error',
464
- error: errMsg,
465
- });
466
- const short = errMsg.length > 180 ? `${errMsg.slice(0, 177)}...` : errMsg;
467
- discoveryRetryHint = `Previous discovery attempt failed: ${short}. Do not edit source files. Only update ${plannedFilePath} and return strict JSON.`;
468
- if (discoveryTry < preflightMaxRetries) {
469
- if (/max iterations exceeded/i.test(errMsg)) {
470
- const nextCap = Math.min(Math.max(discoveryIterationCap * 2, discoveryIterationCap + 2), 1000);
471
- if (nextCap > discoveryIterationCap) {
472
- progress.onStage?.(`⚠️ Discovery hit max iterations (${discoveryIterationCap}). Increasing preflight cap to ${nextCap} and retrying...`);
473
- discoveryIterationCap = nextCap;
474
- continue;
475
- }
476
- }
477
- progress.onStage?.(`⚠️ Discovery failed (${discoveryTry + 1}/${preflightTotalTries}): ${short}. Retrying discovery...`);
478
- continue;
479
- }
480
- // Final discovery failure: degrade gracefully by bootstrapping a fallback plan file
481
- // so Anton can still proceed to implementation/review instead of hard-failing task 1.
482
- const fallbackState = await ensurePlanFileExistsOrBootstrap({
483
- absPath: plannedFilePath,
484
- task: currentTask,
485
- source: 'discovery',
486
- });
487
- if (fallbackState === 'bootstrapped') {
488
- progress.onStage?.(`⚠️ Discovery failed after ${preflightTotalTries} tries (${short}). Bootstrapped fallback plan and continuing: ${plannedFilePath}`);
489
- }
490
- else {
491
- progress.onStage?.(`⚠️ Discovery failed after ${preflightTotalTries} tries (${short}). Reusing existing plan and continuing: ${plannedFilePath}`);
492
- }
493
- taskPlanByTaskKey.set(currentTask.key, plannedFilePath);
494
- discoveryOk = true;
495
- break;
496
- }
497
- finally {
420
+ // Shared preflight session - reused between discovery and review stages to avoid
421
+ // session creation overhead. Created lazily, closed on error (for fresh retry state)
422
+ // or at end of preflight block.
423
+ let preflightSession;
424
+ const closePreflightSession = async () => {
425
+ if (preflightSession) {
498
426
  try {
499
- await discoverySession?.close();
427
+ await preflightSession.close();
500
428
  }
501
429
  catch {
502
430
  // best effort
503
431
  }
432
+ preflightSession = undefined;
504
433
  }
505
- }
506
- // Discovery already marked complete -> next task.
507
- if (preflightMarkedComplete) {
508
- continue;
509
- }
510
- if (!discoveryOk) {
511
- continue;
512
- }
513
- // Stage 2: requirements review (retry review only; keep same plan file).
514
- if (config.preflightRequirementsReview) {
515
- const reviewPlanFile = taskPlanByTaskKey.get(currentTask.key) ?? plannedFilePath;
516
- let reviewOk = false;
517
- let reviewIterationCap = Math.max(1, Math.floor(config.preflightSessionMaxIterations ?? 500));
518
- for (let reviewTry = 0; reviewTry <= preflightMaxRetries; reviewTry++) {
434
+ };
435
+ try {
436
+ // Stage 1: discovery (retry discovery only).
437
+ for (let discoveryTry = 0; discoveryTry <= preflightMaxRetries; discoveryTry++) {
519
438
  const stageStart = Date.now();
520
- const reviewTimeoutSec = config.preflightReviewTimeoutSec ?? config.taskTimeoutSec;
521
- const reviewTimeoutMs = reviewTimeoutSec * 1000;
522
- let reviewSession;
439
+ const discoveryTimeoutSec = config.preflightDiscoveryTimeoutSec ?? config.taskTimeoutSec;
440
+ const discoveryTimeoutMs = discoveryTimeoutSec * 1000;
523
441
  try {
524
- progress.onStage?.('🧪 Requirements review: refining plan...');
525
- reviewSession = await createSessionFn(buildPreflightConfig(idlehandsConfig, config, reviewTimeoutSec, reviewIterationCap), apiKey);
526
- const reviewPrompt = buildRequirementsReviewPrompt(reviewPlanFile);
527
- let reviewTimeoutHandle;
528
- const reviewRes = await Promise.race([
529
- reviewSession.ask(reviewPrompt).finally(() => clearTimeout(reviewTimeoutHandle)),
442
+ progress.onStage?.('🔎 Discovery: checking if already done...');
443
+ // Create session if not already open (first try or after error closed it)
444
+ if (!preflightSession) {
445
+ preflightSession = await createSessionFn(buildPreflightConfig(idlehandsConfig, config, discoveryTimeoutSec, discoveryIterationCap), apiKey);
446
+ }
447
+ const discoveryPrompt = buildDiscoveryPrompt({
448
+ task: currentTask,
449
+ taskFilePath: config.taskFile,
450
+ projectDir: config.projectDir,
451
+ planFilePath: plannedFilePath,
452
+ retryHint: discoveryRetryHint,
453
+ });
454
+ let discoveryTimeoutHandle;
455
+ const discoveryRes = await Promise.race([
456
+ preflightSession.ask(discoveryPrompt).finally(() => clearTimeout(discoveryTimeoutHandle)),
530
457
  new Promise((_, reject) => {
531
- reviewTimeoutHandle = setTimeout(() => {
458
+ discoveryTimeoutHandle = setTimeout(() => {
532
459
  try {
533
- reviewSession?.cancel();
460
+ preflightSession?.cancel();
534
461
  }
535
462
  catch {
536
463
  // best effort
537
464
  }
538
- reject(new Error('preflight-review-timeout'));
539
- }, reviewTimeoutMs);
465
+ reject(new Error('preflight-discovery-timeout'));
466
+ }, discoveryTimeoutMs);
540
467
  }),
541
468
  ]);
542
- const reviewTokens = reviewSession.usage.prompt + reviewSession.usage.completion;
543
- totalTokens += reviewTokens;
544
- const review = parseRequirementsReviewResult(reviewRes.text, config.projectDir);
545
- const reviewPlanState = await ensurePlanFileExistsOrBootstrap({
546
- absPath: review.filename,
547
- task: currentTask,
548
- source: 'requirements-review',
549
- });
550
- if (reviewPlanState === 'bootstrapped') {
551
- progress.onStage?.(`⚠️ Requirements review returned a filename but did not write it. Created fallback plan file: ${review.filename}`);
469
+ let discoveryTokens = preflightSession.usage.prompt + preflightSession.usage.completion;
470
+ totalTokens += discoveryTokens;
471
+ // Try to parse discovery result; if invalid JSON, attempt force-decision prompt
472
+ let discovery;
473
+ try {
474
+ discovery = parseDiscoveryResult(discoveryRes.text, config.projectDir);
475
+ }
476
+ catch (parseError) {
477
+ const parseErrMsg = parseError instanceof Error ? parseError.message : String(parseError);
478
+ // Only try force-decision for JSON/format errors, not file path errors
479
+ if (/preflight-json-missing-object|preflight-discovery-invalid/i.test(parseErrMsg)) {
480
+ progress.onStage?.('⚠️ Discovery output invalid, requesting forced decision...');
481
+ try {
482
+ const forceRes = await preflightSession.ask(FORCE_DISCOVERY_DECISION_PROMPT);
483
+ const forceTokens = preflightSession.usage.prompt + preflightSession.usage.completion - discoveryTokens;
484
+ discoveryTokens += forceTokens;
485
+ totalTokens += forceTokens;
486
+ discovery = parseDiscoveryResult(forceRes.text, config.projectDir);
487
+ progress.onStage?.('✅ Forced decision succeeded');
488
+ }
489
+ catch (forceError) {
490
+ // Force-decision also failed, throw original error
491
+ throw parseError;
492
+ }
493
+ }
494
+ else {
495
+ throw parseError;
496
+ }
552
497
  }
553
498
  preflightRecords.push({
554
499
  taskKey: currentTask.key,
555
- stage: 'requirements-review',
500
+ stage: 'discovery',
556
501
  durationMs: Date.now() - stageStart,
557
- tokensUsed: reviewTokens,
558
- status: 'ready',
559
- filename: review.filename,
502
+ tokensUsed: discoveryTokens,
503
+ status: discovery.status,
504
+ filename: discovery.filename || undefined,
505
+ });
506
+ if (discovery.status === 'complete') {
507
+ await markTaskChecked(config.taskFile, currentTask.key);
508
+ await autoCompleteAncestors(config.taskFile, currentTask.key);
509
+ autoCompleted += 1;
510
+ progress.onStage?.(`✅ Discovery confirmed already complete: ${currentTask.text}`);
511
+ preflightMarkedComplete = true;
512
+ discoveryOk = true;
513
+ // No review needed - close session now
514
+ await closePreflightSession();
515
+ break;
516
+ }
517
+ const discoveryPlanState = await ensurePlanFileExistsOrBootstrap({
518
+ absPath: discovery.filename,
519
+ task: currentTask,
520
+ source: 'discovery',
560
521
  });
561
- taskPlanByTaskKey.set(currentTask.key, review.filename);
562
- progress.onStage?.(`✅ Requirements review ready: ${review.filename}`);
563
- reviewOk = true;
522
+ if (discoveryPlanState === 'bootstrapped') {
523
+ progress.onStage?.(`⚠️ Discovery returned a filename but did not write it. Created fallback plan file: ${discovery.filename}`);
524
+ }
525
+ taskPlanByTaskKey.set(currentTask.key, discovery.filename);
526
+ progress.onStage?.(`📝 Discovery plan file: ${discovery.filename}`);
527
+ discoveryOk = true;
564
528
  break;
565
529
  }
566
530
  catch (error) {
@@ -568,53 +532,227 @@ export async function runAnton(opts) {
568
532
  const timeout = /timeout/i.test(errMsg);
569
533
  preflightRecords.push({
570
534
  taskKey: currentTask.key,
571
- stage: 'requirements-review',
535
+ stage: 'discovery',
572
536
  durationMs: Date.now() - stageStart,
573
537
  tokensUsed: 0,
574
538
  status: timeout ? 'timeout' : 'error',
575
539
  error: errMsg,
576
540
  });
577
- if (reviewTry < preflightMaxRetries) {
578
- const short = errMsg.length > 180 ? `${errMsg.slice(0, 177)}...` : errMsg;
541
+ const short = errMsg.length > 180 ? `${errMsg.slice(0, 177)}...` : errMsg;
542
+ discoveryRetryHint = `Previous discovery attempt failed: ${short}. Do not edit source files. Only update ${plannedFilePath} and return strict JSON.`;
543
+ // If discovery returns malformed/non-JSON output (or loops on source edits),
544
+ // degrade immediately to fallback plan instead of burning retries.
545
+ if (isRecoverablePreflightDiscoveryError(errMsg)) {
546
+ const fallbackState = await ensurePlanFileExistsOrBootstrap({
547
+ absPath: plannedFilePath,
548
+ task: currentTask,
549
+ source: 'discovery',
550
+ });
551
+ if (fallbackState === 'bootstrapped') {
552
+ progress.onStage?.(`⚠️ Discovery returned invalid output (${short}). Bootstrapped fallback plan and continuing: ${plannedFilePath}`);
553
+ }
554
+ else {
555
+ progress.onStage?.(`⚠️ Discovery returned invalid output (${short}). Reusing existing plan and continuing: ${plannedFilePath}`);
556
+ }
557
+ taskPlanByTaskKey.set(currentTask.key, plannedFilePath);
558
+ discoveryOk = true;
559
+ break;
560
+ }
561
+ if (discoveryTry < preflightMaxRetries) {
562
+ // Close session on error so retry gets fresh state
563
+ await closePreflightSession();
579
564
  if (/max iterations exceeded/i.test(errMsg)) {
580
- const nextCap = Math.min(Math.max(reviewIterationCap * 2, reviewIterationCap + 2), 1000);
581
- if (nextCap > reviewIterationCap) {
582
- progress.onStage?.(`⚠️ Requirements review hit max iterations (${reviewIterationCap}). Increasing preflight cap to ${nextCap} and retrying...`);
583
- reviewIterationCap = nextCap;
565
+ const nextCap = Math.min(Math.max(discoveryIterationCap * 2, discoveryIterationCap + 2), 1000);
566
+ if (nextCap > discoveryIterationCap) {
567
+ progress.onStage?.(`⚠️ Discovery hit max iterations (${discoveryIterationCap}). Increasing preflight cap to ${nextCap} and retrying...`);
568
+ discoveryIterationCap = nextCap;
584
569
  continue;
585
570
  }
586
571
  }
587
- progress.onStage?.(`⚠️ Requirements review failed (${reviewTry + 1}/${preflightTotalTries}): ${short}. Retrying review with existing plan file...`);
572
+ progress.onStage?.(`⚠️ Discovery failed (${discoveryTry + 1}/${preflightTotalTries}): ${short}. Retrying discovery...`);
588
573
  continue;
589
574
  }
590
- const preflightAttempt = {
591
- taskKey: currentTask.key,
592
- taskText: currentTask.text,
593
- attempt: attemptNumber,
594
- durationMs: Date.now() - stageStart,
595
- tokensUsed: 0,
596
- status: timeout ? 'timeout' : 'error',
597
- verification: undefined,
598
- error: `preflight-error(requirements-review): ${errMsg}`,
599
- commitHash: undefined,
600
- };
601
- attempts.push(preflightAttempt);
602
- taskRetryCount.set(currentTask.key, retries + 1);
603
- if (!config.skipOnFail)
604
- break mainLoop;
605
- }
606
- finally {
607
- try {
608
- await reviewSession?.close();
575
+ // Final discovery failure: degrade gracefully by bootstrapping a fallback plan file
576
+ // so Anton can still proceed to implementation/review instead of hard-failing task 1.
577
+ const fallbackState = await ensurePlanFileExistsOrBootstrap({
578
+ absPath: plannedFilePath,
579
+ task: currentTask,
580
+ source: 'discovery',
581
+ });
582
+ if (fallbackState === 'bootstrapped') {
583
+ progress.onStage?.(`⚠️ Discovery failed after ${preflightTotalTries} tries (${short}). Bootstrapped fallback plan and continuing: ${plannedFilePath}`);
609
584
  }
610
- catch {
611
- // best effort
585
+ else {
586
+ progress.onStage?.(`⚠️ Discovery failed after ${preflightTotalTries} tries (${short}). Reusing existing plan and continuing: ${plannedFilePath}`);
612
587
  }
588
+ taskPlanByTaskKey.set(currentTask.key, plannedFilePath);
589
+ discoveryOk = true;
590
+ break;
613
591
  }
592
+ // Note: session stays open for reuse in review stage (closed at end of preflight block)
593
+ }
594
+ // Discovery already marked complete -> next task.
595
+ if (preflightMarkedComplete) {
596
+ continue;
614
597
  }
615
- if (!reviewOk) {
598
+ if (!discoveryOk) {
616
599
  continue;
617
600
  }
601
+ // Stage 2: requirements review (retry review only; keep same plan file).
602
+ // NOTE: Discovery prompt now includes review instructions, producing a "reviewed" plan.
603
+ // Separate review stage is skipped by default to save an LLM round-trip.
604
+ // Set preflightRequirementsReview=true AND preflightSeparateReview=true to force separate review.
605
+ const skipSeparateReview = !config.preflightSeparateReview;
606
+ if (config.preflightRequirementsReview && !skipSeparateReview) {
607
+ const reviewPlanFile = taskPlanByTaskKey.get(currentTask.key) ?? plannedFilePath;
608
+ let reviewOk = false;
609
+ // Default to 30 iterations for review (simpler than discovery, just refining existing plan)
610
+ let reviewIterationCap = Math.max(1, Math.floor(config.preflightSessionMaxIterations ?? 30));
611
+ for (let reviewTry = 0; reviewTry <= preflightMaxRetries; reviewTry++) {
612
+ const stageStart = Date.now();
613
+ const reviewTimeoutSec = config.preflightReviewTimeoutSec ?? config.taskTimeoutSec;
614
+ const reviewTimeoutMs = reviewTimeoutSec * 1000;
615
+ try {
616
+ progress.onStage?.('🧪 Requirements review: refining plan...');
617
+ // Reuse preflight session from discovery, or create new one if needed (e.g., after error)
618
+ if (!preflightSession) {
619
+ preflightSession = await createSessionFn(buildPreflightConfig(idlehandsConfig, config, reviewTimeoutSec, reviewIterationCap), apiKey);
620
+ }
621
+ const reviewPrompt = buildRequirementsReviewPrompt(reviewPlanFile);
622
+ let reviewTimeoutHandle;
623
+ const reviewRes = await Promise.race([
624
+ preflightSession.ask(reviewPrompt).finally(() => clearTimeout(reviewTimeoutHandle)),
625
+ new Promise((_, reject) => {
626
+ reviewTimeoutHandle = setTimeout(() => {
627
+ try {
628
+ preflightSession?.cancel();
629
+ }
630
+ catch {
631
+ // best effort
632
+ }
633
+ reject(new Error('preflight-review-timeout'));
634
+ }, reviewTimeoutMs);
635
+ }),
636
+ ]);
637
+ let reviewTokens = preflightSession.usage.prompt + preflightSession.usage.completion;
638
+ totalTokens += reviewTokens;
639
+ // Try to parse review result; if invalid JSON, attempt force-decision prompt
640
+ let review;
641
+ try {
642
+ review = parseRequirementsReviewResult(reviewRes.text, config.projectDir);
643
+ }
644
+ catch (parseError) {
645
+ const parseErrMsg = parseError instanceof Error ? parseError.message : String(parseError);
646
+ // Only try force-decision for JSON/format errors
647
+ if (/preflight-json-missing-object|preflight-review-invalid/i.test(parseErrMsg)) {
648
+ progress.onStage?.('⚠️ Review output invalid, requesting forced decision...');
649
+ try {
650
+ const forceRes = await preflightSession.ask(FORCE_REVIEW_DECISION_PROMPT);
651
+ const forceTokens = preflightSession.usage.prompt + preflightSession.usage.completion - reviewTokens;
652
+ reviewTokens += forceTokens;
653
+ totalTokens += forceTokens;
654
+ review = parseRequirementsReviewResult(forceRes.text, config.projectDir);
655
+ progress.onStage?.('✅ Forced decision succeeded');
656
+ }
657
+ catch (forceError) {
658
+ // Force-decision also failed, throw original error
659
+ throw parseError;
660
+ }
661
+ }
662
+ else {
663
+ throw parseError;
664
+ }
665
+ }
666
+ const reviewPlanState = await ensurePlanFileExistsOrBootstrap({
667
+ absPath: review.filename,
668
+ task: currentTask,
669
+ source: 'requirements-review',
670
+ });
671
+ if (reviewPlanState === 'bootstrapped') {
672
+ progress.onStage?.(`⚠️ Requirements review returned a filename but did not write it. Created fallback plan file: ${review.filename}`);
673
+ }
674
+ preflightRecords.push({
675
+ taskKey: currentTask.key,
676
+ stage: 'requirements-review',
677
+ durationMs: Date.now() - stageStart,
678
+ tokensUsed: reviewTokens,
679
+ status: 'ready',
680
+ filename: review.filename,
681
+ });
682
+ taskPlanByTaskKey.set(currentTask.key, review.filename);
683
+ progress.onStage?.(`✅ Requirements review ready: ${review.filename}`);
684
+ reviewOk = true;
685
+ break;
686
+ }
687
+ catch (error) {
688
+ const errMsg = error instanceof Error ? error.message : String(error);
689
+ const timeout = /timeout/i.test(errMsg);
690
+ preflightRecords.push({
691
+ taskKey: currentTask.key,
692
+ stage: 'requirements-review',
693
+ durationMs: Date.now() - stageStart,
694
+ tokensUsed: 0,
695
+ status: timeout ? 'timeout' : 'error',
696
+ error: errMsg,
697
+ });
698
+ const short = errMsg.length > 180 ? `${errMsg.slice(0, 177)}...` : errMsg;
699
+ // If review returns malformed/non-JSON output, keep moving with existing plan.
700
+ if (isRecoverablePreflightReviewError(errMsg)) {
701
+ const fallbackState = await ensurePlanFileExistsOrBootstrap({
702
+ absPath: reviewPlanFile,
703
+ task: currentTask,
704
+ source: 'requirements-review',
705
+ });
706
+ if (fallbackState === 'bootstrapped') {
707
+ progress.onStage?.(`⚠️ Requirements review returned invalid output (${short}). Bootstrapped fallback plan and continuing: ${reviewPlanFile}`);
708
+ }
709
+ else {
710
+ progress.onStage?.(`⚠️ Requirements review returned invalid output (${short}). Reusing existing plan and continuing: ${reviewPlanFile}`);
711
+ }
712
+ taskPlanByTaskKey.set(currentTask.key, reviewPlanFile);
713
+ reviewOk = true;
714
+ break;
715
+ }
716
+ if (reviewTry < preflightMaxRetries) {
717
+ // Close session on error so retry gets fresh state
718
+ await closePreflightSession();
719
+ if (/max iterations exceeded/i.test(errMsg)) {
720
+ const nextCap = Math.min(Math.max(reviewIterationCap * 2, reviewIterationCap + 2), 1000);
721
+ if (nextCap > reviewIterationCap) {
722
+ progress.onStage?.(`⚠️ Requirements review hit max iterations (${reviewIterationCap}). Increasing preflight cap to ${nextCap} and retrying...`);
723
+ reviewIterationCap = nextCap;
724
+ continue;
725
+ }
726
+ }
727
+ progress.onStage?.(`⚠️ Requirements review failed (${reviewTry + 1}/${preflightTotalTries}): ${short}. Retrying review with existing plan file...`);
728
+ continue;
729
+ }
730
+ const preflightAttempt = {
731
+ taskKey: currentTask.key,
732
+ taskText: currentTask.text,
733
+ attempt: attemptNumber,
734
+ durationMs: Date.now() - stageStart,
735
+ tokensUsed: 0,
736
+ status: timeout ? 'timeout' : 'error',
737
+ verification: undefined,
738
+ error: `preflight-error(requirements-review): ${errMsg}`,
739
+ commitHash: undefined,
740
+ };
741
+ attempts.push(preflightAttempt);
742
+ taskRetryCount.set(currentTask.key, retries + 1);
743
+ if (!config.skipOnFail)
744
+ break mainLoop;
745
+ }
746
+ // Note: session stays open, will be closed at end of preflight block
747
+ }
748
+ if (!reviewOk) {
749
+ continue;
750
+ }
751
+ }
752
+ }
753
+ finally {
754
+ // Always close preflight session at end of preflight block
755
+ await closePreflightSession();
618
756
  }
619
757
  }
620
758
  progress.onStage?.('🛠️ Implementation: executing vetted plan...');
@@ -631,6 +769,7 @@ export async function runAnton(opts) {
631
769
  : buildSessionConfig(idlehandsConfig, config);
632
770
  console.error(`[anton:debug] task="${currentTask.text}" depth=${currentTask.depth} complexity=${taskComplexity} isComplexDecompose=${isComplexDecompose} no_tools=${!!sessionConfig.no_tools} max_iterations=${sessionConfig.max_iterations}`);
633
771
  session = await createSessionFn(sessionConfig, apiKey);
772
+ injectAntonResultContract(session);
634
773
  // Set up timeout + stop propagation for the currently running attempt.
635
774
  // /anton stop flips abortSignal.aborted; we poll that and cancel session.ask immediately
636
775
  // instead of waiting for the task attempt to naturally finish.