@zhixuan92/multi-model-agent-core 3.11.1 → 3.12.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/config/read-only-review-flag.d.ts +1 -1
- package/dist/config/read-only-review-flag.d.ts.map +1 -1
- package/dist/config/read-only-review-flag.js +1 -0
- package/dist/config/read-only-review-flag.js.map +1 -1
- package/dist/config/schema.d.ts +47 -0
- package/dist/config/schema.d.ts.map +1 -1
- package/dist/config/schema.js +102 -0
- package/dist/config/schema.js.map +1 -1
- package/dist/delegate-with-escalation.d.ts.map +1 -1
- package/dist/delegate-with-escalation.js +1 -0
- package/dist/delegate-with-escalation.js.map +1 -1
- package/dist/diagnostics/types.d.ts +1 -1
- package/dist/diagnostics/types.d.ts.map +1 -1
- package/dist/escalation/fallback.d.ts +7 -1
- package/dist/escalation/fallback.d.ts.map +1 -1
- package/dist/escalation/fallback.js +39 -4
- package/dist/escalation/fallback.js.map +1 -1
- package/dist/executors/execute-plan.d.ts.map +1 -1
- package/dist/executors/execute-plan.js +1 -0
- package/dist/executors/execute-plan.js.map +1 -1
- package/dist/executors/explore.d.ts +13 -0
- package/dist/executors/explore.d.ts.map +1 -0
- package/dist/executors/explore.js +352 -0
- package/dist/executors/explore.js.map +1 -0
- package/dist/executors/index.d.ts +1 -0
- package/dist/executors/index.d.ts.map +1 -1
- package/dist/executors/index.js +1 -0
- package/dist/executors/index.js.map +1 -1
- package/dist/executors/types.d.ts +4 -0
- package/dist/executors/types.d.ts.map +1 -1
- package/dist/intake/compilers/delegate.d.ts +1 -0
- package/dist/intake/compilers/delegate.d.ts.map +1 -1
- package/dist/intake/compilers/delegate.js +1 -0
- package/dist/intake/compilers/delegate.js.map +1 -1
- package/dist/intake/compilers/execute-plan.d.ts +1 -0
- package/dist/intake/compilers/execute-plan.d.ts.map +1 -1
- package/dist/intake/compilers/execute-plan.js +1 -0
- package/dist/intake/compilers/execute-plan.js.map +1 -1
- package/dist/intake/compilers/explore.d.ts +29 -0
- package/dist/intake/compilers/explore.d.ts.map +1 -0
- package/dist/intake/compilers/explore.js +108 -0
- package/dist/intake/compilers/explore.js.map +1 -0
- package/dist/intake/resolve.d.ts.map +1 -1
- package/dist/intake/resolve.js +1 -0
- package/dist/intake/resolve.js.map +1 -1
- package/dist/intake/types.d.ts +1 -0
- package/dist/intake/types.d.ts.map +1 -1
- package/dist/observability/events.d.ts +98 -0
- package/dist/observability/events.d.ts.map +1 -1
- package/dist/observability/events.js +50 -1
- package/dist/observability/events.js.map +1 -1
- package/dist/provider.d.ts.map +1 -1
- package/dist/provider.js +46 -1
- package/dist/provider.js.map +1 -1
- package/dist/reporting/compose-explore-headline.d.ts +14 -0
- package/dist/reporting/compose-explore-headline.d.ts.map +1 -0
- package/dist/reporting/compose-explore-headline.js +14 -0
- package/dist/reporting/compose-explore-headline.js.map +1 -0
- package/dist/reporting/derive-explore-status.d.ts +18 -0
- package/dist/reporting/derive-explore-status.d.ts.map +1 -0
- package/dist/reporting/derive-explore-status.js +19 -0
- package/dist/reporting/derive-explore-status.js.map +1 -0
- package/dist/reporting/parse-explore-report.d.ts +38 -0
- package/dist/reporting/parse-explore-report.d.ts.map +1 -0
- package/dist/reporting/parse-explore-report.js +185 -0
- package/dist/reporting/parse-explore-report.js.map +1 -0
- package/dist/research/adapters/arxiv.d.ts +6 -0
- package/dist/research/adapters/arxiv.d.ts.map +1 -0
- package/dist/research/adapters/arxiv.js +36 -0
- package/dist/research/adapters/arxiv.js.map +1 -0
- package/dist/research/adapters/generic-rss.d.ts +8 -0
- package/dist/research/adapters/generic-rss.d.ts.map +1 -0
- package/dist/research/adapters/generic-rss.js +26 -0
- package/dist/research/adapters/generic-rss.js.map +1 -0
- package/dist/research/adapters/github-search.d.ts +7 -0
- package/dist/research/adapters/github-search.d.ts.map +1 -0
- package/dist/research/adapters/github-search.js +95 -0
- package/dist/research/adapters/github-search.js.map +1 -0
- package/dist/research/adapters/index.d.ts +8 -0
- package/dist/research/adapters/index.d.ts.map +1 -0
- package/dist/research/adapters/index.js +17 -0
- package/dist/research/adapters/index.js.map +1 -0
- package/dist/research/adapters/semantic-scholar.d.ts +6 -0
- package/dist/research/adapters/semantic-scholar.d.ts.map +1 -0
- package/dist/research/adapters/semantic-scholar.js +54 -0
- package/dist/research/adapters/semantic-scholar.js.map +1 -0
- package/dist/research/adapters/types.d.ts +15 -0
- package/dist/research/adapters/types.d.ts.map +1 -0
- package/dist/research/adapters/types.js +2 -0
- package/dist/research/adapters/types.js.map +1 -0
- package/dist/research/allowlist.d.ts +25 -0
- package/dist/research/allowlist.d.ts.map +1 -0
- package/dist/research/allowlist.js +102 -0
- package/dist/research/allowlist.js.map +1 -0
- package/dist/research/ssrf-guard.d.ts +12 -0
- package/dist/research/ssrf-guard.d.ts.map +1 -0
- package/dist/research/ssrf-guard.js +209 -0
- package/dist/research/ssrf-guard.js.map +1 -0
- package/dist/research/types.d.ts +14 -0
- package/dist/research/types.d.ts.map +1 -0
- package/dist/research/types.js +2 -0
- package/dist/research/types.js.map +1 -0
- package/dist/research/untrusted-content.d.ts +13 -0
- package/dist/research/untrusted-content.d.ts.map +1 -0
- package/dist/research/untrusted-content.js +9 -0
- package/dist/research/untrusted-content.js.map +1 -0
- package/dist/research/web-fetch.d.ts +50 -0
- package/dist/research/web-fetch.d.ts.map +1 -0
- package/dist/research/web-fetch.js +411 -0
- package/dist/research/web-fetch.js.map +1 -0
- package/dist/research/web-search.d.ts +28 -0
- package/dist/research/web-search.d.ts.map +1 -0
- package/dist/research/web-search.js +134 -0
- package/dist/research/web-search.js.map +1 -0
- package/dist/review/diff-review.js +7 -8
- package/dist/review/diff-review.js.map +1 -1
- package/dist/review/quality-reviewer.d.ts +15 -1
- package/dist/review/quality-reviewer.d.ts.map +1 -1
- package/dist/review/quality-reviewer.js +12 -7
- package/dist/review/quality-reviewer.js.map +1 -1
- package/dist/review/reviewer-prompt.d.ts +6 -2
- package/dist/review/reviewer-prompt.d.ts.map +1 -1
- package/dist/review/reviewer-prompt.js +23 -15
- package/dist/review/reviewer-prompt.js.map +1 -1
- package/dist/review/spec-reviewer.d.ts.map +1 -1
- package/dist/review/spec-reviewer.js +5 -4
- package/dist/review/spec-reviewer.js.map +1 -1
- package/dist/routing/canonical-model.d.ts +7 -0
- package/dist/routing/canonical-model.d.ts.map +1 -0
- package/dist/routing/canonical-model.js +11 -0
- package/dist/routing/canonical-model.js.map +1 -0
- package/dist/run-tasks/index.d.ts +2 -0
- package/dist/run-tasks/index.d.ts.map +1 -1
- package/dist/run-tasks/index.js.map +1 -1
- package/dist/run-tasks/reviewed-lifecycle.d.ts +2 -0
- package/dist/run-tasks/reviewed-lifecycle.d.ts.map +1 -1
- package/dist/run-tasks/reviewed-lifecycle.js +232 -98
- package/dist/run-tasks/reviewed-lifecycle.js.map +1 -1
- package/dist/run-tasks/worker-status.d.ts +6 -0
- package/dist/run-tasks/worker-status.d.ts.map +1 -1
- package/dist/run-tasks/worker-status.js +6 -0
- package/dist/run-tasks/worker-status.js.map +1 -1
- package/dist/runners/base/research-tools.d.ts +47 -0
- package/dist/runners/base/research-tools.d.ts.map +1 -0
- package/dist/runners/base/research-tools.js +67 -0
- package/dist/runners/base/research-tools.js.map +1 -0
- package/dist/runners/base/usage-accumulator.d.ts +9 -0
- package/dist/runners/base/usage-accumulator.d.ts.map +1 -1
- package/dist/runners/base/usage-accumulator.js +16 -2
- package/dist/runners/base/usage-accumulator.js.map +1 -1
- package/dist/runners/claude-runner.d.ts +11 -1
- package/dist/runners/claude-runner.d.ts.map +1 -1
- package/dist/runners/claude-runner.js +58 -15
- package/dist/runners/claude-runner.js.map +1 -1
- package/dist/runners/codex-runner.d.ts +10 -1
- package/dist/runners/codex-runner.d.ts.map +1 -1
- package/dist/runners/codex-runner.js +50 -11
- package/dist/runners/codex-runner.js.map +1 -1
- package/dist/runners/openai-runner.d.ts +41 -1
- package/dist/runners/openai-runner.d.ts.map +1 -1
- package/dist/runners/openai-runner.js +57 -9
- package/dist/runners/openai-runner.js.map +1 -1
- package/dist/runners/types.d.ts +17 -0
- package/dist/runners/types.d.ts.map +1 -1
- package/dist/telemetry/event-builder.d.ts +2 -2
- package/dist/telemetry/event-builder.d.ts.map +1 -1
- package/dist/telemetry/event-builder.js +63 -15
- package/dist/telemetry/event-builder.js.map +1 -1
- package/dist/telemetry/field-coverage.d.ts.map +1 -1
- package/dist/telemetry/field-coverage.js +3 -1
- package/dist/telemetry/field-coverage.js.map +1 -1
- package/dist/telemetry/types.d.ts +139 -78
- package/dist/telemetry/types.d.ts.map +1 -1
- package/dist/telemetry/types.js +30 -7
- package/dist/telemetry/types.js.map +1 -1
- package/dist/tool-schemas/explore.d.ts +9 -0
- package/dist/tool-schemas/explore.d.ts.map +1 -0
- package/dist/tool-schemas/explore.js +64 -0
- package/dist/tool-schemas/explore.js.map +1 -0
- package/dist/types.d.ts +47 -2
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/package.json +16 -1
|
@@ -33,14 +33,14 @@ const READ_ONLY_TOOL_NAMES = new Set([
|
|
|
33
33
|
const _emptyMetrics = { inputTokens: null, outputTokens: null, cachedTokens: null, reasoningTokens: null, turnCount: null, toolCallCount: null, filesReadCount: null, filesWrittenCount: null };
|
|
34
34
|
export function emptyStats() {
|
|
35
35
|
return {
|
|
36
|
-
implementing: { stage: 'implementing', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs:
|
|
37
|
-
spec_rework: { stage: 'spec_rework', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs:
|
|
38
|
-
quality_rework: { stage: 'quality_rework', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs:
|
|
39
|
-
committing: { stage: 'committing', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs:
|
|
40
|
-
verifying: { stage: 'verifying', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs:
|
|
41
|
-
spec_review: { stage: 'spec_review', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs:
|
|
42
|
-
quality_review: { stage: 'quality_review', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs:
|
|
43
|
-
diff_review: { stage: 'diff_review', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs:
|
|
36
|
+
implementing: { stage: 'implementing', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: 0, totalIdleMs: 0, activityEvents: 0, ..._emptyMetrics },
|
|
37
|
+
spec_rework: { stage: 'spec_rework', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: 0, totalIdleMs: 0, activityEvents: 0, ..._emptyMetrics },
|
|
38
|
+
quality_rework: { stage: 'quality_rework', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: 0, totalIdleMs: 0, activityEvents: 0, ..._emptyMetrics },
|
|
39
|
+
committing: { stage: 'committing', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: 0, totalIdleMs: 0, activityEvents: 0, ..._emptyMetrics },
|
|
40
|
+
verifying: { stage: 'verifying', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: 0, totalIdleMs: 0, activityEvents: 0, outcome: null, skipReason: null, ..._emptyMetrics },
|
|
41
|
+
spec_review: { stage: 'spec_review', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: 0, totalIdleMs: 0, activityEvents: 0, verdict: null, roundsUsed: null, ..._emptyMetrics },
|
|
42
|
+
quality_review: { stage: 'quality_review', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: 0, totalIdleMs: 0, activityEvents: 0, verdict: null, roundsUsed: null, ..._emptyMetrics },
|
|
43
|
+
diff_review: { stage: 'diff_review', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: 0, totalIdleMs: 0, activityEvents: 0, verdict: null, roundsUsed: null, ..._emptyMetrics },
|
|
44
44
|
};
|
|
45
45
|
}
|
|
46
46
|
function modelFamily(model) {
|
|
@@ -59,9 +59,9 @@ export function endBaseStage(stats, name, t0, c0, agent, finalCostUSD, idle, met
|
|
|
59
59
|
agentTier: agent.tier,
|
|
60
60
|
modelFamily: modelFamily(agent.model),
|
|
61
61
|
model: agent.model,
|
|
62
|
-
maxIdleMs: idle?.maxIdleMs ??
|
|
63
|
-
totalIdleMs: idle?.totalIdleMs ??
|
|
64
|
-
activityEvents: idle?.activityEvents ??
|
|
62
|
+
maxIdleMs: idle?.maxIdleMs ?? 0,
|
|
63
|
+
totalIdleMs: idle?.totalIdleMs ?? 0,
|
|
64
|
+
activityEvents: idle?.activityEvents ?? 0,
|
|
65
65
|
inputTokens: metrics?.inputTokens ?? null,
|
|
66
66
|
outputTokens: metrics?.outputTokens ?? null,
|
|
67
67
|
cachedTokens: metrics?.cachedTokens ?? null,
|
|
@@ -83,14 +83,17 @@ metrics) {
|
|
|
83
83
|
stage: name,
|
|
84
84
|
entered: true,
|
|
85
85
|
durationMs: metrics?.durationMs !== undefined ? metrics.durationMs : Date.now() - t0,
|
|
86
|
-
|
|
86
|
+
// Item 7: != null (covers both undefined AND null) — null means
|
|
87
|
+
// "pricing unavailable, fall through to runningCostUSD computation"
|
|
88
|
+
// rather than masking unknown as the literal 0.
|
|
89
|
+
costUSD: metrics?.costUSD != null ? metrics.costUSD
|
|
87
90
|
: finalCostUSD !== null && c0 !== null ? finalCostUSD - c0 : null,
|
|
88
91
|
agentTier: agent.tier,
|
|
89
92
|
modelFamily: modelFamily(agent.model),
|
|
90
93
|
model: agent.model,
|
|
91
|
-
maxIdleMs: idle?.maxIdleMs ??
|
|
92
|
-
totalIdleMs: idle?.totalIdleMs ??
|
|
93
|
-
activityEvents: idle?.activityEvents ??
|
|
94
|
+
maxIdleMs: idle?.maxIdleMs ?? 0,
|
|
95
|
+
totalIdleMs: idle?.totalIdleMs ?? 0,
|
|
96
|
+
activityEvents: idle?.activityEvents ?? 0,
|
|
94
97
|
inputTokens: metrics?.inputTokens ?? null,
|
|
95
98
|
outputTokens: metrics?.outputTokens ?? null,
|
|
96
99
|
cachedTokens: metrics?.cachedTokens ?? null,
|
|
@@ -164,9 +167,9 @@ export function endVerifyStage(stats, t0, c0, agent, finalCostUSD, idle, outcome
|
|
|
164
167
|
agentTier: agent.tier,
|
|
165
168
|
modelFamily: modelFamily(agent.model),
|
|
166
169
|
model: agent.model,
|
|
167
|
-
maxIdleMs: idle?.maxIdleMs ??
|
|
168
|
-
totalIdleMs: idle?.totalIdleMs ??
|
|
169
|
-
activityEvents: idle?.activityEvents ??
|
|
170
|
+
maxIdleMs: idle?.maxIdleMs ?? 0,
|
|
171
|
+
totalIdleMs: idle?.totalIdleMs ?? 0,
|
|
172
|
+
activityEvents: idle?.activityEvents ?? 0,
|
|
170
173
|
inputTokens: null,
|
|
171
174
|
outputTokens: null,
|
|
172
175
|
cachedTokens: null,
|
|
@@ -514,6 +517,24 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
514
517
|
const fallbackOverrides = [];
|
|
515
518
|
let latestAttemptedImpl;
|
|
516
519
|
let lastNonRejectedImpl;
|
|
520
|
+
// Review-stage timing variables hoisted so deferred-finalizer closures
|
|
521
|
+
// (defined below) can reference them from all early-exit paths.
|
|
522
|
+
let specReviewT0 = 0;
|
|
523
|
+
let specReviewC0 = null;
|
|
524
|
+
let specReviewDurationMs = 0;
|
|
525
|
+
let qualityReviewT0 = 0;
|
|
526
|
+
let qualityReviewC0 = null;
|
|
527
|
+
let qualityReviewDurationMs = 0;
|
|
528
|
+
// Accumulated metrics from spec/quality review results — threaded to
|
|
529
|
+
// the deferred finalizers so early-exit paths carry the same token/turn
|
|
530
|
+
// counts the normal post-loop path always had.
|
|
531
|
+
let specReviewMetrics = {};
|
|
532
|
+
let qualityReviewMetrics = {};
|
|
533
|
+
// Hoisted so deferred-finalizer closures (defined below) can reference
|
|
534
|
+
// these from all early-exit paths. Reassigned after the corresponding
|
|
535
|
+
// review stage runs.
|
|
536
|
+
let specStatus = 'error';
|
|
537
|
+
let qualityResult = { status: 'skipped', report: undefined, findings: [], errorReason: (reviewPolicy === 'full' || reviewPolicy === 'quality_only') ? 'all_tiers_unavailable' : 'skipped: reviewPolicy is spec_only' };
|
|
517
538
|
const reviewRounds = () => ({ spec: specAttemptIndex, quality: qualityAttemptIndex, metadata: metadataRepair, cap: Math.max(maxSpecRows, maxQualityRows) });
|
|
518
539
|
const taskCostUSD = () => (heartbeat ? heartbeat.getHeartbeatTickInfo().costUSD : null);
|
|
519
540
|
// Per-stage stats tracking
|
|
@@ -535,6 +556,46 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
535
556
|
const model = provider?.config.model ?? config.agents[tier]?.model ?? resolvedModel;
|
|
536
557
|
return { tier, family: modelFamily(model), model };
|
|
537
558
|
};
|
|
559
|
+
// Deferred finalizers for spec_review and quality_review. Called from
|
|
560
|
+
// the normal post-loop path AND from every early-exit path
|
|
561
|
+
// (round_cap, cost_ceiling, time_ceiling, all_tiers_unavailable).
|
|
562
|
+
// Idempotent on re-call; no-op when the stage was never started.
|
|
563
|
+
let specReviewFinalized = false;
|
|
564
|
+
let qualityReviewFinalized = false;
|
|
565
|
+
const finalizeSpecReviewStage = () => {
|
|
566
|
+
if (specReviewFinalized)
|
|
567
|
+
return;
|
|
568
|
+
if (specReviewT0 === 0)
|
|
569
|
+
return; // never started
|
|
570
|
+
specReviewFinalized = true;
|
|
571
|
+
const lastReviewer = specReviewerHistory[specReviewerHistory.length - 1];
|
|
572
|
+
const reviewerAgent = (lastReviewer === undefined || lastReviewer === 'skipped')
|
|
573
|
+
? implementerAgentInfo
|
|
574
|
+
: reviewerAgentInfoFor(lastReviewer);
|
|
575
|
+
endReviewStage(stats, 'spec_review', specReviewT0, specReviewC0, reviewerAgent, runningCostUSD(), snapshotIdle(stageIdle), specStatus === 'approved' ? 'approved'
|
|
576
|
+
: specStatus === 'changes_required' ? 'changes_required'
|
|
577
|
+
: specStatus === 'skipped' ? 'skipped'
|
|
578
|
+
: specStatus === 'not_applicable' ? 'not_applicable'
|
|
579
|
+
: 'error', specAttemptIndex, { ...specReviewMetrics, durationMs: specReviewDurationMs });
|
|
580
|
+
};
|
|
581
|
+
const finalizeQualityReviewStage = () => {
|
|
582
|
+
if (qualityReviewFinalized)
|
|
583
|
+
return;
|
|
584
|
+
if (qualityReviewT0 === 0)
|
|
585
|
+
return;
|
|
586
|
+
if (reviewPolicy !== 'full' && reviewPolicy !== 'quality_only')
|
|
587
|
+
return;
|
|
588
|
+
qualityReviewFinalized = true;
|
|
589
|
+
const lastReviewer = qualityReviewerHistory[qualityReviewerHistory.length - 1];
|
|
590
|
+
const reviewerAgent = (lastReviewer === undefined || lastReviewer === 'skipped')
|
|
591
|
+
? implementerAgentInfo
|
|
592
|
+
: reviewerAgentInfoFor(lastReviewer);
|
|
593
|
+
endReviewStage(stats, 'quality_review', qualityReviewT0, qualityReviewC0, reviewerAgent, runningCostUSD(), snapshotIdle(stageIdle), qualityResult.status === 'approved' ? 'approved'
|
|
594
|
+
: qualityResult.status === 'changes_required' ? 'changes_required'
|
|
595
|
+
: qualityResult.status === 'annotated' ? 'annotated'
|
|
596
|
+
: qualityResult.status === 'skipped' ? 'skipped'
|
|
597
|
+
: 'error', qualityAttemptIndex, { ...qualityReviewMetrics, durationMs: qualityReviewDurationMs });
|
|
598
|
+
};
|
|
538
599
|
// §3.9: runningCostUSD must be cumulative and monotonic across explicit
|
|
539
600
|
// runner boundaries. Runner progress reports per-runner cumulative token
|
|
540
601
|
// counts, so lifecycle cost is completed runners + current runner partial.
|
|
@@ -599,7 +660,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
599
660
|
// on the base result (set by callers via abortReviewLoop({ ...res, specReviewStatus, ... })).
|
|
600
661
|
// Defaults to 'changes_required' for whichever loop tripped — that's the only state the
|
|
601
662
|
// loop ever fires from, by construction.
|
|
602
|
-
function adaptForAllTiersUnavailable(base, loop, attempt, resolvedModel, salvageSource) {
|
|
663
|
+
function adaptForAllTiersUnavailable(base, loop, attempt, resolvedModel, salvageSource, unavailableReason) {
|
|
603
664
|
const stageName = loop === 'spec' && attempt === 0 ? 'implementing'
|
|
604
665
|
: loop === 'spec' ? 'spec_rework'
|
|
605
666
|
: 'quality_rework';
|
|
@@ -623,9 +684,9 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
623
684
|
agentTier: implementerAgentInfo.tier,
|
|
624
685
|
modelFamily: modelFamily(implementerAgentInfo.model),
|
|
625
686
|
model: implementerAgentInfo.model,
|
|
626
|
-
maxIdleMs:
|
|
627
|
-
totalIdleMs:
|
|
628
|
-
activityEvents:
|
|
687
|
+
maxIdleMs: 0,
|
|
688
|
+
totalIdleMs: 0,
|
|
689
|
+
activityEvents: 0,
|
|
629
690
|
inputTokens: salvageSource?.usage?.inputTokens ?? null,
|
|
630
691
|
outputTokens: salvageSource?.usage?.outputTokens ?? null,
|
|
631
692
|
cachedTokens: salvageSource?.usage?.cachedTokens ?? null,
|
|
@@ -636,6 +697,8 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
636
697
|
filesWrittenCount: (salvageSource?.filesWritten?.length) || null,
|
|
637
698
|
};
|
|
638
699
|
}
|
|
700
|
+
finalizeSpecReviewStage();
|
|
701
|
+
finalizeQualityReviewStage();
|
|
639
702
|
const ship = salvageSource ?? lastNonRejectedImpl?.result ?? base;
|
|
640
703
|
return {
|
|
641
704
|
...ship,
|
|
@@ -644,6 +707,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
644
707
|
terminationReason: 'all_tiers_unavailable',
|
|
645
708
|
reviewRounds: reviewRounds(),
|
|
646
709
|
error: `runWithFallback: both tiers unavailable (loop=${loop}, attempt=${attempt}, role=implementer)`,
|
|
710
|
+
errorCode: unavailableReason === 'reviewer_separation_unsatisfiable' ? 'reviewer_separation_unsatisfiable' : ship.errorCode,
|
|
647
711
|
agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? ((reviewPolicy === 'full' || reviewPolicy === 'quality_only') ? 'not_applicable' : 'skipped')),
|
|
648
712
|
stageStats: stats,
|
|
649
713
|
models: {
|
|
@@ -676,28 +740,32 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
676
740
|
...(fallbackOverrides.length > 0 ? { fallbackOverrides } : {}),
|
|
677
741
|
};
|
|
678
742
|
};
|
|
679
|
-
const abortReviewLoop = (base, terminationReason, message, aborting, wallClockMs) =>
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
:
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
743
|
+
const abortReviewLoop = (base, terminationReason, message, aborting, wallClockMs) => {
|
|
744
|
+
finalizeSpecReviewStage();
|
|
745
|
+
finalizeQualityReviewStage();
|
|
746
|
+
return {
|
|
747
|
+
...base,
|
|
748
|
+
status: 'incomplete',
|
|
749
|
+
workerStatus: 'review_loop_aborted',
|
|
750
|
+
terminationReason: terminationReason === 'round_cap'
|
|
751
|
+
? 'round_cap'
|
|
752
|
+
: {
|
|
753
|
+
cause: terminationReason === 'cost_ceiling' ? 'cost_exceeded' : 'time_ceiling',
|
|
754
|
+
turnsUsed: base.turns,
|
|
755
|
+
hasFileArtifacts: (base.filesWritten ?? []).length > 0,
|
|
756
|
+
usedShell: (base.toolCalls ?? []).some(c => c.startsWith('shell') || c.startsWith('runShell')),
|
|
757
|
+
workerSelfAssessment: 'review_loop_aborted',
|
|
758
|
+
wasPromoted: false,
|
|
759
|
+
...(wallClockMs !== undefined ? { wallClockMs } : {}),
|
|
760
|
+
},
|
|
761
|
+
reviewRounds: reviewRounds(),
|
|
762
|
+
error: message,
|
|
763
|
+
specReviewStatus: aborting === 'spec' ? 'changes_required' : (base.specReviewStatus ?? 'approved'),
|
|
764
|
+
qualityReviewStatus: aborting === 'quality' ? 'changes_required' : (base.qualityReviewStatus ?? 'skipped'),
|
|
765
|
+
agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? ((reviewPolicy === 'full' || reviewPolicy === 'quality_only') ? 'not_applicable' : 'skipped')),
|
|
766
|
+
stageStats: stats,
|
|
767
|
+
};
|
|
768
|
+
};
|
|
701
769
|
const defaultVerification = { status: 'skipped', steps: [], totalDurationMs: 0, skipReason: 'no_command' };
|
|
702
770
|
let latestVerification = defaultVerification;
|
|
703
771
|
async function runVerificationStage() {
|
|
@@ -763,6 +831,11 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
763
831
|
structuredError: { code: 'runner_crash', message: workerError.message },
|
|
764
832
|
workerStatus: 'failed',
|
|
765
833
|
workerError,
|
|
834
|
+
models: {
|
|
835
|
+
implementer: implModel,
|
|
836
|
+
specReviewer: null,
|
|
837
|
+
qualityReviewer: null,
|
|
838
|
+
},
|
|
766
839
|
});
|
|
767
840
|
}
|
|
768
841
|
function withVerification(result, verification = latestVerification) {
|
|
@@ -820,6 +893,17 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
820
893
|
stageStats: stats,
|
|
821
894
|
}, verification);
|
|
822
895
|
}
|
|
896
|
+
function diffReviewErrorTerminationReason(base) {
|
|
897
|
+
return {
|
|
898
|
+
cause: 'error',
|
|
899
|
+
turnsUsed: base.turns,
|
|
900
|
+
hasFileArtifacts: (base.filesWritten ?? []).length > 0,
|
|
901
|
+
usedShell: (base.toolCalls ?? []).some(c => c.startsWith('shell') || c.startsWith('runShell')),
|
|
902
|
+
workerSelfAssessment: 'failed',
|
|
903
|
+
wasPromoted: false,
|
|
904
|
+
...(base.terminationReason && typeof base.terminationReason === 'object' && base.terminationReason.wallClockMs !== undefined ? { wallClockMs: base.terminationReason.wallClockMs } : {}),
|
|
905
|
+
};
|
|
906
|
+
}
|
|
823
907
|
function resolveDiffOnlyTerminal(base, verdict, verification, diffTruncated) {
|
|
824
908
|
const concerns = [...(base.concerns ?? [])];
|
|
825
909
|
if ('status' in verdict && verdict.status === 'skipped') {
|
|
@@ -842,6 +926,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
842
926
|
code: 'diff_review_rejected',
|
|
843
927
|
message: verdict.message || 'diff review rejected implementation',
|
|
844
928
|
},
|
|
929
|
+
terminationReason: diffReviewErrorTerminationReason(base),
|
|
845
930
|
concerns,
|
|
846
931
|
commits,
|
|
847
932
|
commitError,
|
|
@@ -854,6 +939,12 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
854
939
|
status: verdict.status,
|
|
855
940
|
workerStatus: 'failed',
|
|
856
941
|
error: verdict.reason ?? `diff review transport failure: ${verdict.status}`,
|
|
942
|
+
errorCode: verdict.status,
|
|
943
|
+
structuredError: {
|
|
944
|
+
code: verdict.status,
|
|
945
|
+
message: verdict.reason ?? `diff review transport failure: ${verdict.status}`,
|
|
946
|
+
},
|
|
947
|
+
terminationReason: diffReviewErrorTerminationReason(base),
|
|
857
948
|
concerns: [...concerns, ...verdict.concerns],
|
|
858
949
|
commits,
|
|
859
950
|
commitError,
|
|
@@ -927,19 +1018,23 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
927
1018
|
const treeDirty = porcelain.length > 0;
|
|
928
1019
|
if (!headMoved && !treeDirty)
|
|
929
1020
|
return;
|
|
1021
|
+
// Emit committing stage for both worker-committed (headMoved) and
|
|
1022
|
+
// pending-commit (treeDirty) paths. Workers that auto-commit during
|
|
1023
|
+
// turns leave a clean tree but moved HEAD — they must still produce
|
|
1024
|
+
// a committing stage so telemetry includes filesCommittedCount.
|
|
1025
|
+
transitionStage('verifying', 'committing', { stage: 'committing', stageIndex: 7 }, null);
|
|
1026
|
+
const commitT0 = Date.now();
|
|
1027
|
+
const commitC0 = runningCostUSD();
|
|
930
1028
|
if (headMoved)
|
|
931
1029
|
await recordWorkerCommits(baselineHead, 'HEAD');
|
|
932
1030
|
if (treeDirty) {
|
|
933
1031
|
const validCommit = implReport?.commit ?? await repairCommitMetadata(implReport?.commitDiagnostic ?? 'no commit block emitted');
|
|
934
|
-
if (
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
const commitC0 = runningCostUSD();
|
|
939
|
-
const c = await runCommitStage({ cwd, filesWritten: implResult.filesWritten, commit: validCommit });
|
|
940
|
-
commits.push(c);
|
|
941
|
-
endBaseStage(stats, 'committing', commitT0, commitC0, implementerAgentInfo, runningCostUSD(), snapshotIdle(stageIdle));
|
|
1032
|
+
if (validCommit) {
|
|
1033
|
+
const c = await runCommitStage({ cwd, filesWritten: implResult.filesWritten, commit: validCommit });
|
|
1034
|
+
commits.push(c);
|
|
1035
|
+
}
|
|
942
1036
|
}
|
|
1037
|
+
endBaseStage(stats, 'committing', commitT0, commitC0, implementerAgentInfo, runningCostUSD(), snapshotIdle(stageIdle));
|
|
943
1038
|
}
|
|
944
1039
|
// Tracks the final RunResult across every exit path so the `finally` block
|
|
945
1040
|
// below fires `recorder.recordTaskCompleted` exactly once regardless of which
|
|
@@ -1037,9 +1132,9 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1037
1132
|
assignedTier: initialDecision.impl,
|
|
1038
1133
|
reason: initialImpl.unavailableReason,
|
|
1039
1134
|
});
|
|
1040
|
-
return __recordOnce(adaptForAllTiersUnavailable(initialImpl.result, 'spec', 0, resolvedModel, initialImpl.salvageResult));
|
|
1135
|
+
return __recordOnce(adaptForAllTiersUnavailable(initialImpl.result, 'spec', 0, resolvedModel, initialImpl.salvageResult, initialImpl.unavailableReason));
|
|
1041
1136
|
}
|
|
1042
|
-
|
|
1137
|
+
let implResult = initialImpl.result;
|
|
1043
1138
|
latestAttemptedImpl = { tier: initialImpl.usedTier, result: implResult };
|
|
1044
1139
|
lastNonRejectedImpl = { tier: initialImpl.usedTier, result: implResult };
|
|
1045
1140
|
implementerHistory.push(initialImpl.usedTier);
|
|
@@ -1055,8 +1150,29 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1055
1150
|
costUSD: implResult.usage?.costUSD ?? undefined,
|
|
1056
1151
|
});
|
|
1057
1152
|
specAttemptIndex = 1;
|
|
1058
|
-
const implReport =
|
|
1153
|
+
const implReport = parseStructuredReport(implResult.output);
|
|
1059
1154
|
const workerStatus = extractWorkerStatus(implReport);
|
|
1155
|
+
// Item 9: surface silent-incomplete via errorCode — the delegation layer
|
|
1156
|
+
// cascades result.status as a fallback errorCode (e.g., 'incomplete'),
|
|
1157
|
+
// which is not an informative error code. Replace it when the runner
|
|
1158
|
+
// produced no parseable summary — the operator can now filter on
|
|
1159
|
+
// 'incomplete_no_summary' instead of guessing.
|
|
1160
|
+
//
|
|
1161
|
+
// parseStructuredReport always returns a report object and has a
|
|
1162
|
+
// last-resort fallback that treats the first paragraph as an implicit
|
|
1163
|
+
// summary, so implReport.summary alone is not a reliable signal. Treat
|
|
1164
|
+
// the run as having a structured summary only when a real ## Summary
|
|
1165
|
+
// section exists and parses to non-placeholder content.
|
|
1166
|
+
const hasSummaryHeader = /\n##\s+summary\s*\n/i.test(implResult.output) || /^##\s+summary\s*\n/im.test(implResult.output);
|
|
1167
|
+
const summaryText = (hasSummaryHeader ? implReport.summary : null)?.trim().toLowerCase() ?? '';
|
|
1168
|
+
const hasStructuredSummary = hasSummaryHeader && summaryText !== ''
|
|
1169
|
+
&& !['none', '(none)', 'n/a', 'na', 'todo', 'tbd'].includes(summaryText);
|
|
1170
|
+
if (implResult.status === 'incomplete' && !hasStructuredSummary) {
|
|
1171
|
+
const cascadedFallback = implResult.errorCode === implResult.status;
|
|
1172
|
+
if (!implResult.errorCode || cascadedFallback) {
|
|
1173
|
+
implResult = { ...implResult, errorCode: 'incomplete_no_summary' };
|
|
1174
|
+
}
|
|
1175
|
+
}
|
|
1060
1176
|
if (implResult.status === 'ok' && isArtifactProducing) {
|
|
1061
1177
|
await captureCommitsAfterImplementation(implResult, implReport, baselineHead);
|
|
1062
1178
|
}
|
|
@@ -1195,6 +1311,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1195
1311
|
getStatus: (r) => r.status,
|
|
1196
1312
|
makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'),
|
|
1197
1313
|
forbiddenIdentities: implementerIdentity ? [implementerIdentity] : undefined,
|
|
1314
|
+
forbiddenTiers: [resolved.slot],
|
|
1198
1315
|
call: (provider) => runAccounted(provider, () => runDiffReview({ cwd, diff: evidence.fullDiff, diffTruncated: evidence.diffTruncated, verification, worker: { call: (prompt, opts) => provider.run(prompt, { cwd: opts?.cwd ?? cwd, abortSignal: opts?.abortSignal, timeoutMs: opts?.timeoutMs }) }, taskDeadlineMs, abortSignal: stallController.signal })),
|
|
1199
1316
|
});
|
|
1200
1317
|
if (diffCall.fallbackFired) {
|
|
@@ -1203,8 +1320,17 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1203
1320
|
}
|
|
1204
1321
|
if (diffCall.bothUnavailable) {
|
|
1205
1322
|
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'diff', attempt: 0, role: 'diffReviewer', assignedTier: diffReviewerTier, reason: diffCall.unavailableReason });
|
|
1323
|
+
if (diffCall.unavailableReason === 'reviewer_separation_unsatisfiable') {
|
|
1324
|
+
return __recordOnce(adaptForAllTiersUnavailable({ ...implResult, errorCode: 'reviewer_separation_unsatisfiable', diffReviewStatus: 'error' }, 'spec', 0, resolvedModel, implResult, diffCall.unavailableReason));
|
|
1325
|
+
}
|
|
1206
1326
|
}
|
|
1207
|
-
const verdict = diffCall.bothUnavailable
|
|
1327
|
+
const verdict = diffCall.bothUnavailable ? makeSkippedReviewResult('all_tiers_unavailable') : diffCall.result;
|
|
1328
|
+
const diffEnvelopeStatus = 'kind' in verdict
|
|
1329
|
+
? (verdict.kind === 'approve' ? 'approved'
|
|
1330
|
+
: verdict.kind === 'concerns' ? 'approved'
|
|
1331
|
+
: verdict.kind === 'reject' ? 'changes_required'
|
|
1332
|
+
: 'error')
|
|
1333
|
+
: 'skipped';
|
|
1208
1334
|
emitTaskEvent('review_decision', {
|
|
1209
1335
|
stage: 'diff_review',
|
|
1210
1336
|
verdict: 'kind' in verdict
|
|
@@ -1220,7 +1346,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1220
1346
|
// distinct from spec/quality verdicts. Map to the telemetry verdict enum here.
|
|
1221
1347
|
'kind' in verdict
|
|
1222
1348
|
? (verdict.kind === 'approve' ? 'approved'
|
|
1223
|
-
: verdict.kind === 'concerns' ? '
|
|
1349
|
+
: verdict.kind === 'concerns' ? 'approved'
|
|
1224
1350
|
: verdict.kind === 'reject' ? 'changes_required'
|
|
1225
1351
|
: 'error')
|
|
1226
1352
|
: 'skipped', 0);
|
|
@@ -1231,6 +1357,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1231
1357
|
qualityReviewStatus: 'skipped',
|
|
1232
1358
|
specReviewReason: 'skipped: reviewPolicy is diff_only',
|
|
1233
1359
|
qualityReviewReason: 'skipped: reviewPolicy is diff_only',
|
|
1360
|
+
diffReviewStatus: diffEnvelopeStatus,
|
|
1234
1361
|
implementationReport: effectiveImplReport,
|
|
1235
1362
|
fileArtifactsMissing: implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined,
|
|
1236
1363
|
agents: agentEnvelope('skipped', 'skipped'),
|
|
@@ -1240,19 +1367,8 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1240
1367
|
let finalImplResult = implResult;
|
|
1241
1368
|
let finalImplReport = effectiveImplReport;
|
|
1242
1369
|
let specResult;
|
|
1243
|
-
let specStatus;
|
|
1244
1370
|
let specReport;
|
|
1245
1371
|
let specReviewReason;
|
|
1246
|
-
let specReviewT0 = 0;
|
|
1247
|
-
let specReviewC0 = null;
|
|
1248
|
-
// Delta-only timing: accumulate per-call wall durations across the
|
|
1249
|
-
// initial spec_review + every spec_rework round's re-review. This
|
|
1250
|
-
// replaces the `Date.now() - specReviewT0` fallback at endReviewStage,
|
|
1251
|
-
// which over-counts because endReviewStage runs AFTER spec_rework,
|
|
1252
|
-
// quality_review, AND quality_rework all complete. No absolute
|
|
1253
|
-
// timestamps go on the wire — Date.now() is used only as a local
|
|
1254
|
-
// delta source. Privacy.md guarantees ms-deltas only.
|
|
1255
|
-
let specReviewDurationMs = 0;
|
|
1256
1372
|
if (reviewPolicy !== 'quality_only') {
|
|
1257
1373
|
transitionStage('verifying', 'spec_review', { stage: 'spec_review', stageIndex: 2, reviewRound: 1, attemptCap: maxSpecRows }, null);
|
|
1258
1374
|
const initialReviewerTier = pickReviewer({ loop: 'spec', attemptIndex: 0, baseTier: resolved.slot });
|
|
@@ -1267,6 +1383,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1267
1383
|
getStatus: (r) => r.status,
|
|
1268
1384
|
makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'),
|
|
1269
1385
|
forbiddenIdentities: implementerIdentity ? [implementerIdentity] : undefined,
|
|
1386
|
+
forbiddenTiers: [resolved.slot],
|
|
1270
1387
|
call: (provider) => runAccounted(provider, () => runSpecReview(provider, packet, effectiveImplReport, fileContents, implResult.toolCalls, task.planContext, evidence.block, taskDeadlineMs, stallController.signal, wrappedOnProgress, cwd)),
|
|
1271
1388
|
});
|
|
1272
1389
|
specReviewDurationMs += Date.now() - initialSpecReviewIterStart;
|
|
@@ -1274,6 +1391,15 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1274
1391
|
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: 0, role: 'specReviewer', assignedTier: initialReviewerTier, reason: initialSpecReview.unavailableReason });
|
|
1275
1392
|
fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: 0, assigned: initialReviewerTier, used: initialSpecReview.usedTier, reason: initialSpecReview.unavailableReason, triggeringStatus: initialSpecReview.fallbackTriggeringStatus, bothUnavailable: true });
|
|
1276
1393
|
specReviewerHistory.push('skipped');
|
|
1394
|
+
if (initialSpecReview.unavailableReason === 'reviewer_separation_unsatisfiable') {
|
|
1395
|
+
const unavailableBase = {
|
|
1396
|
+
...implResult,
|
|
1397
|
+
specReviewStatus: 'error',
|
|
1398
|
+
specReviewReason: 'reviewer separation unsatisfiable',
|
|
1399
|
+
errorCode: 'reviewer_separation_unsatisfiable',
|
|
1400
|
+
};
|
|
1401
|
+
return __recordOnce(adaptForAllTiersUnavailable(unavailableBase, 'spec', 0, resolvedModel, implResult, initialSpecReview.unavailableReason));
|
|
1402
|
+
}
|
|
1277
1403
|
}
|
|
1278
1404
|
else {
|
|
1279
1405
|
specReviewerHistory.push(initialSpecReview.usedTier);
|
|
@@ -1322,7 +1448,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1322
1448
|
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', assignedTier: decision.impl, reason: reworkCall.unavailableReason });
|
|
1323
1449
|
if (decision.isEscalated)
|
|
1324
1450
|
emitEscalationUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', wantedTier: decision.impl, reason: reworkCall.unavailableReason });
|
|
1325
|
-
return __recordOnce(adaptForAllTiersUnavailable(reworkCall.result, 'spec', specAttemptIndex, resolvedModel, reworkCall.salvageResult));
|
|
1451
|
+
return __recordOnce(adaptForAllTiersUnavailable(reworkCall.result, 'spec', specAttemptIndex, resolvedModel, reworkCall.salvageResult, reworkCall.unavailableReason));
|
|
1326
1452
|
}
|
|
1327
1453
|
finalImplResult = reworkCall.result;
|
|
1328
1454
|
latestAttemptedImpl = { tier: reworkCall.usedTier, result: finalImplResult };
|
|
@@ -1334,12 +1460,21 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1334
1460
|
commitReworkStage(stats, 'spec_rework', specReworkAcc, implementerAgentInfo);
|
|
1335
1461
|
transitionStage('spec_rework', 'spec_review', { stage: 'spec_review', stageIndex: 2, reviewRound: specAttemptIndex + 1, attemptCap: maxSpecRows }, null);
|
|
1336
1462
|
const reReviewIterStart = Date.now();
|
|
1337
|
-
const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), forbiddenIdentities: implementerIdentity ? [implementerIdentity] : undefined, call: (provider) => runAccounted(provider, () => runSpecReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, task.planContext, evidence.block, taskDeadlineMs, stallController.signal, wrappedOnProgress, cwd)) });
|
|
1463
|
+
const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), forbiddenIdentities: implementerIdentity ? [implementerIdentity] : undefined, forbiddenTiers: [resolved.slot], call: (provider) => runAccounted(provider, () => runSpecReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, task.planContext, evidence.block, taskDeadlineMs, stallController.signal, wrappedOnProgress, cwd)) });
|
|
1338
1464
|
specReviewDurationMs += Date.now() - reReviewIterStart;
|
|
1339
1465
|
if (reviewCall.bothUnavailable) {
|
|
1340
1466
|
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'specReviewer', assignedTier: decision.reviewer, reason: reviewCall.unavailableReason });
|
|
1341
1467
|
fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: specAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.unavailableReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: true });
|
|
1342
1468
|
specReviewerHistory.push('skipped');
|
|
1469
|
+
if (reviewCall.unavailableReason === 'reviewer_separation_unsatisfiable') {
|
|
1470
|
+
const unavailableBase = {
|
|
1471
|
+
...finalImplResult,
|
|
1472
|
+
specReviewStatus: 'error',
|
|
1473
|
+
specReviewReason: 'reviewer separation unsatisfiable',
|
|
1474
|
+
errorCode: 'reviewer_separation_unsatisfiable',
|
|
1475
|
+
};
|
|
1476
|
+
return __recordOnce(adaptForAllTiersUnavailable(unavailableBase, 'spec', specAttemptIndex, resolvedModel, finalImplResult, reviewCall.unavailableReason));
|
|
1477
|
+
}
|
|
1343
1478
|
}
|
|
1344
1479
|
else {
|
|
1345
1480
|
specReviewerHistory.push(reviewCall.usedTier);
|
|
@@ -1370,17 +1505,6 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1370
1505
|
specReport = undefined;
|
|
1371
1506
|
specReviewReason = 'skipped: reviewPolicy is quality_only';
|
|
1372
1507
|
}
|
|
1373
|
-
let qualityResult = { status: 'skipped', report: undefined, findings: [], errorReason: (reviewPolicy === 'full' || reviewPolicy === 'quality_only') ? 'all_tiers_unavailable' : 'skipped: reviewPolicy is spec_only' };
|
|
1374
|
-
// Hoisted so endReviewStage (called after this block) can read them on the
|
|
1375
|
-
// success path. When the quality review is skipped (`reviewPolicy !== 'full'`),
|
|
1376
|
-
// the values stay at 0/null and the corresponding stage entry remains in its
|
|
1377
|
-
// `entered: false` default — endReviewStage is never called.
|
|
1378
|
-
let qualityReviewT0 = 0;
|
|
1379
|
-
let qualityReviewC0 = null;
|
|
1380
|
-
// Same delta-only timing pattern as spec_review — accumulate per-call
|
|
1381
|
-
// wall durations across initial + each rework round's re-review. No
|
|
1382
|
-
// raw timestamps cross the wire.
|
|
1383
|
-
let qualityReviewDurationMs = 0;
|
|
1384
1508
|
if (reviewPolicy === 'full' || reviewPolicy === 'quality_only') {
|
|
1385
1509
|
qualityUnavailable = new Map();
|
|
1386
1510
|
const qualityReviewerTier = pickReviewer({ loop: 'quality', attemptIndex: 0, baseTier: resolved.slot });
|
|
@@ -1388,12 +1512,21 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1388
1512
|
qualityReviewT0 = Date.now();
|
|
1389
1513
|
qualityReviewC0 = runningCostUSD();
|
|
1390
1514
|
const initialQualityIterStart = Date.now();
|
|
1391
|
-
const initialQuality = await runWithFallback({ assigned: qualityReviewerTier, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), forbiddenIdentities: implementerIdentity ? [implementerIdentity] : undefined, call: (provider) => runAccounted(provider, () => runQualityReview(provider, packet, specReport ?? finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block, qualityReviewPromptBuilder, finalImplResult.output, taskDeadlineMs, stallController.signal, wrappedOnProgress, cwd)) });
|
|
1515
|
+
const initialQuality = await runWithFallback({ assigned: qualityReviewerTier, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), forbiddenIdentities: implementerIdentity ? [implementerIdentity] : undefined, forbiddenTiers: [resolved.slot], call: (provider) => runAccounted(provider, () => runQualityReview(provider, packet, specReport ?? finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block, qualityReviewPromptBuilder, finalImplResult.output, taskDeadlineMs, stallController.signal, wrappedOnProgress, cwd)) });
|
|
1392
1516
|
qualityReviewDurationMs += Date.now() - initialQualityIterStart;
|
|
1393
1517
|
if (initialQuality.bothUnavailable) {
|
|
1394
1518
|
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: 0, role: 'qualityReviewer', assignedTier: qualityReviewerTier, reason: initialQuality.unavailableReason });
|
|
1395
1519
|
fallbackOverrides.push({ role: 'qualityReviewer', loop: 'quality', attempt: 0, assigned: qualityReviewerTier, used: initialQuality.usedTier, reason: initialQuality.unavailableReason, triggeringStatus: initialQuality.fallbackTriggeringStatus, bothUnavailable: true });
|
|
1396
1520
|
qualityReviewerHistory.push('skipped');
|
|
1521
|
+
if (initialQuality.unavailableReason === 'reviewer_separation_unsatisfiable') {
|
|
1522
|
+
const unavailableBase = {
|
|
1523
|
+
...finalImplResult,
|
|
1524
|
+
qualityReviewStatus: 'error',
|
|
1525
|
+
qualityReviewReason: 'reviewer separation unsatisfiable',
|
|
1526
|
+
errorCode: 'reviewer_separation_unsatisfiable',
|
|
1527
|
+
};
|
|
1528
|
+
return __recordOnce(adaptForAllTiersUnavailable(unavailableBase, 'quality', 0, resolvedModel, finalImplResult, initialQuality.unavailableReason));
|
|
1529
|
+
}
|
|
1397
1530
|
}
|
|
1398
1531
|
else {
|
|
1399
1532
|
qualityReviewerHistory.push(initialQuality.usedTier);
|
|
@@ -1480,7 +1613,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1480
1613
|
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'implementer', assignedTier: decision.impl, reason: reworkCall.unavailableReason });
|
|
1481
1614
|
if (decision.isEscalated)
|
|
1482
1615
|
emitEscalationUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'implementer', wantedTier: decision.impl, reason: reworkCall.unavailableReason });
|
|
1483
|
-
return __recordOnce(adaptForAllTiersUnavailable(reworkCall.result, 'quality', qualityAttemptIndex, resolvedModel, reworkCall.salvageResult));
|
|
1616
|
+
return __recordOnce(adaptForAllTiersUnavailable(reworkCall.result, 'quality', qualityAttemptIndex, resolvedModel, reworkCall.salvageResult, reworkCall.unavailableReason));
|
|
1484
1617
|
}
|
|
1485
1618
|
finalImplResult = reworkCall.result;
|
|
1486
1619
|
latestAttemptedImpl = { tier: reworkCall.usedTier, result: finalImplResult };
|
|
@@ -1492,12 +1625,21 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1492
1625
|
commitReworkStage(stats, 'quality_rework', qualityReworkAcc, implementerAgentInfo);
|
|
1493
1626
|
transitionStage('quality_rework', 'quality_review', { stage: 'quality_review', stageIndex: 4, reviewRound: qualityAttemptIndex + 1, attemptCap: maxQualityRows }, null);
|
|
1494
1627
|
const qReReviewIterStart = Date.now();
|
|
1495
|
-
const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), forbiddenIdentities: implementerIdentity ? [implementerIdentity] : undefined, call: (provider) => runAccounted(provider, () => runQualityReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block, qualityReviewPromptBuilder, finalImplResult.output, taskDeadlineMs, stallController.signal, wrappedOnProgress, cwd)) });
|
|
1628
|
+
const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), forbiddenIdentities: implementerIdentity ? [implementerIdentity] : undefined, forbiddenTiers: [resolved.slot], call: (provider) => runAccounted(provider, () => runQualityReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block, qualityReviewPromptBuilder, finalImplResult.output, taskDeadlineMs, stallController.signal, wrappedOnProgress, cwd)) });
|
|
1496
1629
|
qualityReviewDurationMs += Date.now() - qReReviewIterStart;
|
|
1497
1630
|
if (reviewCall.bothUnavailable) {
|
|
1498
1631
|
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'qualityReviewer', assignedTier: decision.reviewer, reason: reviewCall.unavailableReason });
|
|
1499
1632
|
fallbackOverrides.push({ role: 'qualityReviewer', loop: 'quality', attempt: qualityAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.unavailableReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: true });
|
|
1500
1633
|
qualityReviewerHistory.push('skipped');
|
|
1634
|
+
if (reviewCall.unavailableReason === 'reviewer_separation_unsatisfiable') {
|
|
1635
|
+
const unavailableBase = {
|
|
1636
|
+
...finalImplResult,
|
|
1637
|
+
qualityReviewStatus: 'error',
|
|
1638
|
+
qualityReviewReason: 'reviewer separation unsatisfiable',
|
|
1639
|
+
errorCode: 'reviewer_separation_unsatisfiable',
|
|
1640
|
+
};
|
|
1641
|
+
return __recordOnce(adaptForAllTiersUnavailable(unavailableBase, 'quality', qualityAttemptIndex, resolvedModel, finalImplResult, reviewCall.unavailableReason));
|
|
1642
|
+
}
|
|
1501
1643
|
}
|
|
1502
1644
|
else {
|
|
1503
1645
|
qualityReviewerHistory.push(reviewCall.usedTier);
|
|
@@ -1559,21 +1701,11 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1559
1701
|
// override. endReviewStage uses the override when present and falls
|
|
1560
1702
|
// back to `Date.now() - t0` otherwise (which over-counts review-block
|
|
1561
1703
|
// span across rework + later stages).
|
|
1562
|
-
|
|
1563
|
-
|
|
1564
|
-
|
|
1565
|
-
|
|
1566
|
-
: specStatus === 'changes_required' ? 'changes_required'
|
|
1567
|
-
: specStatus === 'skipped' ? 'skipped'
|
|
1568
|
-
: specStatus === 'not_applicable' ? 'not_applicable'
|
|
1569
|
-
: 'error', specAttemptIndex, specMetrics);
|
|
1570
|
-
}
|
|
1704
|
+
specReviewMetrics = (specResult.metrics ?? {});
|
|
1705
|
+
qualityReviewMetrics = (qualityResult.metrics ?? {});
|
|
1706
|
+
finalizeSpecReviewStage();
|
|
1707
|
+
finalizeQualityReviewStage();
|
|
1571
1708
|
const qualityAggregateStatus = qualityResult.status;
|
|
1572
|
-
endReviewStage(stats, 'quality_review', qualityReviewT0, qualityReviewC0, qualityReviewAgent, runningCostUSD(), snapshotIdle(stageIdle), qualityResult.status === 'approved' ? 'approved'
|
|
1573
|
-
: qualityResult.status === 'changes_required' ? 'changes_required'
|
|
1574
|
-
: qualityResult.status === 'annotated' ? 'annotated'
|
|
1575
|
-
: qualityResult.status === 'skipped' ? 'skipped'
|
|
1576
|
-
: 'error', qualityAttemptIndex, qualityMetrics);
|
|
1577
1709
|
const aggregated = aggregateResult(finalReport, specReport, qualityResult.report, specAggregateStatus, qualityAggregateStatus);
|
|
1578
1710
|
// File artifact verification: check whether output targets exist on disk after all work.
|
|
1579
1711
|
// Only applies when status is ok; non-ok statuses skip verification entirely.
|
|
@@ -1655,6 +1787,8 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1655
1787
|
client: _client ?? 'claude-code',
|
|
1656
1788
|
triggeringSkill: _triggeringSkill ?? 'direct',
|
|
1657
1789
|
parentModel: task.parentModel ?? null,
|
|
1790
|
+
reviewPolicy,
|
|
1791
|
+
verifyCommandPresent: !!(task.verifyCommand && task.verifyCommand.length > 0),
|
|
1658
1792
|
});
|
|
1659
1793
|
}
|
|
1660
1794
|
catch { /* silent */ }
|