@zhixuan92/multi-model-agent-core 3.12.0 → 3.12.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/cost/compute.d.ts +33 -0
- package/dist/cost/compute.d.ts.map +1 -0
- package/dist/cost/compute.js +67 -0
- package/dist/cost/compute.js.map +1 -0
- package/dist/cost/rollup.d.ts +18 -0
- package/dist/cost/rollup.d.ts.map +1 -0
- package/dist/cost/rollup.js +43 -0
- package/dist/cost/rollup.js.map +1 -0
- package/dist/diagnostics/types.d.ts +1 -1
- package/dist/diagnostics/types.d.ts.map +1 -1
- package/dist/escalation/fallback.d.ts +7 -1
- package/dist/escalation/fallback.d.ts.map +1 -1
- package/dist/escalation/fallback.js +39 -4
- package/dist/escalation/fallback.js.map +1 -1
- package/dist/executors/execute-plan.d.ts.map +1 -1
- package/dist/executors/execute-plan.js +1 -0
- package/dist/executors/execute-plan.js.map +1 -1
- package/dist/executors/types.d.ts +4 -0
- package/dist/executors/types.d.ts.map +1 -1
- package/dist/heartbeat.d.ts +7 -0
- package/dist/heartbeat.d.ts.map +1 -1
- package/dist/heartbeat.js +18 -5
- package/dist/heartbeat.js.map +1 -1
- package/dist/intake/compilers/delegate.d.ts +1 -0
- package/dist/intake/compilers/delegate.d.ts.map +1 -1
- package/dist/intake/compilers/delegate.js +1 -0
- package/dist/intake/compilers/delegate.js.map +1 -1
- package/dist/intake/compilers/execute-plan.d.ts +1 -0
- package/dist/intake/compilers/execute-plan.d.ts.map +1 -1
- package/dist/intake/compilers/execute-plan.js +1 -0
- package/dist/intake/compilers/execute-plan.js.map +1 -1
- package/dist/intake/resolve.d.ts.map +1 -1
- package/dist/intake/resolve.js +1 -0
- package/dist/intake/resolve.js.map +1 -1
- package/dist/intake/types.d.ts +1 -0
- package/dist/intake/types.d.ts.map +1 -1
- package/dist/model-profiles.json +6 -6
- package/dist/observability/events.d.ts +9 -3
- package/dist/observability/events.d.ts.map +1 -1
- package/dist/observability/events.js +1 -1
- package/dist/observability/events.js.map +1 -1
- package/dist/provider.d.ts.map +1 -1
- package/dist/provider.js +46 -1
- package/dist/provider.js.map +1 -1
- package/dist/review/diff-review.js +7 -8
- package/dist/review/diff-review.js.map +1 -1
- package/dist/review/quality-reviewer.d.ts +15 -1
- package/dist/review/quality-reviewer.d.ts.map +1 -1
- package/dist/review/quality-reviewer.js +12 -7
- package/dist/review/quality-reviewer.js.map +1 -1
- package/dist/review/reviewer-prompt.d.ts +6 -2
- package/dist/review/reviewer-prompt.d.ts.map +1 -1
- package/dist/review/reviewer-prompt.js +23 -15
- package/dist/review/reviewer-prompt.js.map +1 -1
- package/dist/review/spec-reviewer.d.ts.map +1 -1
- package/dist/review/spec-reviewer.js +5 -4
- package/dist/review/spec-reviewer.js.map +1 -1
- package/dist/routing/canonical-model.d.ts +7 -0
- package/dist/routing/canonical-model.d.ts.map +1 -0
- package/dist/routing/canonical-model.js +11 -0
- package/dist/routing/canonical-model.js.map +1 -0
- package/dist/routing/model-profiles.d.ts +4 -3
- package/dist/routing/model-profiles.d.ts.map +1 -1
- package/dist/routing/model-profiles.js +12 -6
- package/dist/routing/model-profiles.js.map +1 -1
- package/dist/run-tasks/index.d.ts +2 -0
- package/dist/run-tasks/index.d.ts.map +1 -1
- package/dist/run-tasks/index.js.map +1 -1
- package/dist/run-tasks/reviewed-lifecycle.d.ts +2 -0
- package/dist/run-tasks/reviewed-lifecycle.d.ts.map +1 -1
- package/dist/run-tasks/reviewed-lifecycle.js +270 -102
- package/dist/run-tasks/reviewed-lifecycle.js.map +1 -1
- package/dist/run-tasks/worker-status.d.ts +6 -0
- package/dist/run-tasks/worker-status.d.ts.map +1 -1
- package/dist/run-tasks/worker-status.js +6 -0
- package/dist/run-tasks/worker-status.js.map +1 -1
- package/dist/runners/base/result-builders.d.ts +2 -0
- package/dist/runners/base/result-builders.d.ts.map +1 -1
- package/dist/runners/base/result-builders.js +2 -0
- package/dist/runners/base/result-builders.js.map +1 -1
- package/dist/runners/base/usage-accumulator.d.ts +2 -1
- package/dist/runners/base/usage-accumulator.d.ts.map +1 -1
- package/dist/runners/base/usage-accumulator.js +13 -10
- package/dist/runners/base/usage-accumulator.js.map +1 -1
- package/dist/runners/claude-runner.d.ts +11 -1
- package/dist/runners/claude-runner.d.ts.map +1 -1
- package/dist/runners/claude-runner.js +141 -27
- package/dist/runners/claude-runner.js.map +1 -1
- package/dist/runners/codex-runner.d.ts +10 -1
- package/dist/runners/codex-runner.d.ts.map +1 -1
- package/dist/runners/codex-runner.js +129 -29
- package/dist/runners/codex-runner.js.map +1 -1
- package/dist/runners/openai-runner.d.ts +41 -1
- package/dist/runners/openai-runner.d.ts.map +1 -1
- package/dist/runners/openai-runner.js +146 -48
- package/dist/runners/openai-runner.js.map +1 -1
- package/dist/runners/types.d.ts +19 -1
- package/dist/runners/types.d.ts.map +1 -1
- package/dist/telemetry/event-builder.d.ts +2 -2
- package/dist/telemetry/event-builder.d.ts.map +1 -1
- package/dist/telemetry/event-builder.js +111 -30
- package/dist/telemetry/event-builder.js.map +1 -1
- package/dist/telemetry/field-coverage.d.ts.map +1 -1
- package/dist/telemetry/field-coverage.js +17 -7
- package/dist/telemetry/field-coverage.js.map +1 -1
- package/dist/telemetry/types.d.ts +488 -233
- package/dist/telemetry/types.d.ts.map +1 -1
- package/dist/telemetry/types.js +86 -37
- package/dist/telemetry/types.js.map +1 -1
- package/dist/types.d.ts +14 -26
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +0 -73
- package/dist/types.js.map +1 -1
- package/package.json +1 -1
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import { execFile } from 'node:child_process';
|
|
2
2
|
import { promisify } from 'node:util';
|
|
3
|
-
import { computeCostUSD } from '../types.js';
|
|
4
3
|
import { createProvider } from '../provider.js';
|
|
5
4
|
import { delegateWithEscalation } from '../delegate-with-escalation.js';
|
|
6
5
|
import { pickEscalation, pickReviewer, maxRowsFor, } from '../escalation/policy.js';
|
|
@@ -9,6 +8,7 @@ import { findModelCapabilities, findModelProfile } from '../routing/model-profil
|
|
|
9
8
|
import { canonicalIdentity } from '../routing/canonical-model-identity.js';
|
|
10
9
|
import { HeartbeatTimer } from '../heartbeat.js';
|
|
11
10
|
import { newStageIdleTracker, snapshotIdle } from './stage-idle-tracker.js';
|
|
11
|
+
import { priceTokens, subtractTokens, resolveRateCard } from '../cost/compute.js';
|
|
12
12
|
import { DEFAULT_TASK_TIMEOUT_MS, DEFAULT_STALL_TIMEOUT_MS, MAX_TIME_PRESTOP_RATIO } from '../config/schema.js';
|
|
13
13
|
import { runSpecReview } from '../review/spec-reviewer.js';
|
|
14
14
|
import { makeSkippedReviewResult } from '../review/skipped-result.js';
|
|
@@ -33,14 +33,14 @@ const READ_ONLY_TOOL_NAMES = new Set([
|
|
|
33
33
|
const _emptyMetrics = { inputTokens: null, outputTokens: null, cachedTokens: null, reasoningTokens: null, turnCount: null, toolCallCount: null, filesReadCount: null, filesWrittenCount: null };
|
|
34
34
|
export function emptyStats() {
|
|
35
35
|
return {
|
|
36
|
-
implementing: { stage: 'implementing', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs:
|
|
37
|
-
spec_rework: { stage: 'spec_rework', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs:
|
|
38
|
-
quality_rework: { stage: 'quality_rework', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs:
|
|
39
|
-
committing: { stage: 'committing', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs:
|
|
40
|
-
verifying: { stage: 'verifying', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs:
|
|
41
|
-
spec_review: { stage: 'spec_review', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs:
|
|
42
|
-
quality_review: { stage: 'quality_review', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs:
|
|
43
|
-
diff_review: { stage: 'diff_review', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs:
|
|
36
|
+
implementing: { stage: 'implementing', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: 0, totalIdleMs: 0, activityEvents: 0, ..._emptyMetrics },
|
|
37
|
+
spec_rework: { stage: 'spec_rework', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: 0, totalIdleMs: 0, activityEvents: 0, ..._emptyMetrics },
|
|
38
|
+
quality_rework: { stage: 'quality_rework', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: 0, totalIdleMs: 0, activityEvents: 0, ..._emptyMetrics },
|
|
39
|
+
committing: { stage: 'committing', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: 0, totalIdleMs: 0, activityEvents: 0, ..._emptyMetrics },
|
|
40
|
+
verifying: { stage: 'verifying', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: 0, totalIdleMs: 0, activityEvents: 0, outcome: null, skipReason: null, ..._emptyMetrics },
|
|
41
|
+
spec_review: { stage: 'spec_review', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: 0, totalIdleMs: 0, activityEvents: 0, verdict: null, roundsUsed: null, ..._emptyMetrics },
|
|
42
|
+
quality_review: { stage: 'quality_review', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: 0, totalIdleMs: 0, activityEvents: 0, verdict: null, roundsUsed: null, ..._emptyMetrics },
|
|
43
|
+
diff_review: { stage: 'diff_review', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: 0, totalIdleMs: 0, activityEvents: 0, verdict: null, roundsUsed: null, ..._emptyMetrics },
|
|
44
44
|
};
|
|
45
45
|
}
|
|
46
46
|
function modelFamily(model) {
|
|
@@ -59,9 +59,9 @@ export function endBaseStage(stats, name, t0, c0, agent, finalCostUSD, idle, met
|
|
|
59
59
|
agentTier: agent.tier,
|
|
60
60
|
modelFamily: modelFamily(agent.model),
|
|
61
61
|
model: agent.model,
|
|
62
|
-
maxIdleMs: idle?.maxIdleMs ??
|
|
63
|
-
totalIdleMs: idle?.totalIdleMs ??
|
|
64
|
-
activityEvents: idle?.activityEvents ??
|
|
62
|
+
maxIdleMs: idle?.maxIdleMs ?? 0,
|
|
63
|
+
totalIdleMs: idle?.totalIdleMs ?? 0,
|
|
64
|
+
activityEvents: idle?.activityEvents ?? 0,
|
|
65
65
|
inputTokens: metrics?.inputTokens ?? null,
|
|
66
66
|
outputTokens: metrics?.outputTokens ?? null,
|
|
67
67
|
cachedTokens: metrics?.cachedTokens ?? null,
|
|
@@ -83,14 +83,17 @@ metrics) {
|
|
|
83
83
|
stage: name,
|
|
84
84
|
entered: true,
|
|
85
85
|
durationMs: metrics?.durationMs !== undefined ? metrics.durationMs : Date.now() - t0,
|
|
86
|
-
|
|
86
|
+
// Item 7: != null (covers both undefined AND null) — null means
|
|
87
|
+
// "pricing unavailable, fall through to runningCostUSD computation"
|
|
88
|
+
// rather than masking unknown as the literal 0.
|
|
89
|
+
costUSD: metrics?.costUSD != null ? metrics.costUSD
|
|
87
90
|
: finalCostUSD !== null && c0 !== null ? finalCostUSD - c0 : null,
|
|
88
91
|
agentTier: agent.tier,
|
|
89
92
|
modelFamily: modelFamily(agent.model),
|
|
90
93
|
model: agent.model,
|
|
91
|
-
maxIdleMs: idle?.maxIdleMs ??
|
|
92
|
-
totalIdleMs: idle?.totalIdleMs ??
|
|
93
|
-
activityEvents: idle?.activityEvents ??
|
|
94
|
+
maxIdleMs: idle?.maxIdleMs ?? 0,
|
|
95
|
+
totalIdleMs: idle?.totalIdleMs ?? 0,
|
|
96
|
+
activityEvents: idle?.activityEvents ?? 0,
|
|
94
97
|
inputTokens: metrics?.inputTokens ?? null,
|
|
95
98
|
outputTokens: metrics?.outputTokens ?? null,
|
|
96
99
|
cachedTokens: metrics?.cachedTokens ?? null,
|
|
@@ -164,9 +167,9 @@ export function endVerifyStage(stats, t0, c0, agent, finalCostUSD, idle, outcome
|
|
|
164
167
|
agentTier: agent.tier,
|
|
165
168
|
modelFamily: modelFamily(agent.model),
|
|
166
169
|
model: agent.model,
|
|
167
|
-
maxIdleMs: idle?.maxIdleMs ??
|
|
168
|
-
totalIdleMs: idle?.totalIdleMs ??
|
|
169
|
-
activityEvents: idle?.activityEvents ??
|
|
170
|
+
maxIdleMs: idle?.maxIdleMs ?? 0,
|
|
171
|
+
totalIdleMs: idle?.totalIdleMs ?? 0,
|
|
172
|
+
activityEvents: idle?.activityEvents ?? 0,
|
|
170
173
|
inputTokens: null,
|
|
171
174
|
outputTokens: null,
|
|
172
175
|
cachedTokens: null,
|
|
@@ -448,10 +451,32 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
448
451
|
if (event.kind === 'turn_complete') {
|
|
449
452
|
heartbeat?.markEvent('llm');
|
|
450
453
|
const providerConfig = _activeRunnerProviderConfig ?? resolved.provider.config;
|
|
451
|
-
|
|
452
|
-
|
|
454
|
+
// §3.5 point 2: per-turn delta tracking from cumulative usage
|
|
455
|
+
const cur = {
|
|
456
|
+
inputTokens: event.cumulativeInputTokens,
|
|
457
|
+
outputTokens: event.cumulativeOutputTokens,
|
|
458
|
+
cachedReadTokens: event.cumulativeCachedReadTokens ?? 0,
|
|
459
|
+
cachedCreationTokens: event.cumulativeCachedCreationTokens ?? 0,
|
|
460
|
+
reasoningTokens: event.cumulativeReasoningTokens ?? 0,
|
|
461
|
+
};
|
|
462
|
+
const turnTokens = subtractTokens(cur, _lastCumulative);
|
|
463
|
+
_lastCumulative = cur;
|
|
464
|
+
const card = resolveRateCard(providerConfig.model, {
|
|
465
|
+
...(providerConfig.inputCostPerMTok !== undefined && { inputCostPerMTok: providerConfig.inputCostPerMTok }),
|
|
466
|
+
...(providerConfig.outputCostPerMTok !== undefined && { outputCostPerMTok: providerConfig.outputCostPerMTok }),
|
|
467
|
+
});
|
|
468
|
+
const turnCost = card ? priceTokens(turnTokens, card) : null;
|
|
469
|
+
if (turnCost !== null) {
|
|
470
|
+
_currentRunnerCostUSD = (_currentRunnerCostUSD ?? 0) + turnCost;
|
|
471
|
+
}
|
|
472
|
+
else {
|
|
473
|
+
_rateCardUnresolved = true;
|
|
474
|
+
}
|
|
453
475
|
const cumulativeCostUSD = (_completedRunnerCostUSD ?? 0) + _currentRunnerCostUSD;
|
|
454
476
|
heartbeat?.updateCost(cumulativeCostUSD, null);
|
|
477
|
+
if (_rateCardUnresolved) {
|
|
478
|
+
heartbeat?.markRateCardUnresolved();
|
|
479
|
+
}
|
|
455
480
|
const nowTurn = Date.now();
|
|
456
481
|
const turnDurMs = nowTurn - prevEventAtMs;
|
|
457
482
|
prevEventAtMs = nowTurn;
|
|
@@ -459,7 +484,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
459
484
|
emitTaskEvent('turn_complete', {
|
|
460
485
|
input_tokens: event.cumulativeInputTokens,
|
|
461
486
|
output_tokens: event.cumulativeOutputTokens,
|
|
462
|
-
cost:
|
|
487
|
+
cost: turnCost,
|
|
463
488
|
duration_ms: turnDurMs,
|
|
464
489
|
provider: providerConfig.model,
|
|
465
490
|
});
|
|
@@ -514,6 +539,24 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
514
539
|
const fallbackOverrides = [];
|
|
515
540
|
let latestAttemptedImpl;
|
|
516
541
|
let lastNonRejectedImpl;
|
|
542
|
+
// Review-stage timing variables hoisted so deferred-finalizer closures
|
|
543
|
+
// (defined below) can reference them from all early-exit paths.
|
|
544
|
+
let specReviewT0 = 0;
|
|
545
|
+
let specReviewC0 = null;
|
|
546
|
+
let specReviewDurationMs = 0;
|
|
547
|
+
let qualityReviewT0 = 0;
|
|
548
|
+
let qualityReviewC0 = null;
|
|
549
|
+
let qualityReviewDurationMs = 0;
|
|
550
|
+
// Accumulated metrics from spec/quality review results — threaded to
|
|
551
|
+
// the deferred finalizers so early-exit paths carry the same token/turn
|
|
552
|
+
// counts the normal post-loop path always had.
|
|
553
|
+
let specReviewMetrics = {};
|
|
554
|
+
let qualityReviewMetrics = {};
|
|
555
|
+
// Hoisted so deferred-finalizer closures (defined below) can reference
|
|
556
|
+
// these from all early-exit paths. Reassigned after the corresponding
|
|
557
|
+
// review stage runs.
|
|
558
|
+
let specStatus = 'error';
|
|
559
|
+
let qualityResult = { status: 'skipped', report: undefined, findings: [], errorReason: (reviewPolicy === 'full' || reviewPolicy === 'quality_only') ? 'all_tiers_unavailable' : 'skipped: reviewPolicy is spec_only' };
|
|
517
560
|
const reviewRounds = () => ({ spec: specAttemptIndex, quality: qualityAttemptIndex, metadata: metadataRepair, cap: Math.max(maxSpecRows, maxQualityRows) });
|
|
518
561
|
const taskCostUSD = () => (heartbeat ? heartbeat.getHeartbeatTickInfo().costUSD : null);
|
|
519
562
|
// Per-stage stats tracking
|
|
@@ -535,6 +578,46 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
535
578
|
const model = provider?.config.model ?? config.agents[tier]?.model ?? resolvedModel;
|
|
536
579
|
return { tier, family: modelFamily(model), model };
|
|
537
580
|
};
|
|
581
|
+
// Deferred finalizers for spec_review and quality_review. Called from
|
|
582
|
+
// the normal post-loop path AND from every early-exit path
|
|
583
|
+
// (round_cap, cost_ceiling, time_ceiling, all_tiers_unavailable).
|
|
584
|
+
// Idempotent on re-call; no-op when the stage was never started.
|
|
585
|
+
let specReviewFinalized = false;
|
|
586
|
+
let qualityReviewFinalized = false;
|
|
587
|
+
const finalizeSpecReviewStage = () => {
|
|
588
|
+
if (specReviewFinalized)
|
|
589
|
+
return;
|
|
590
|
+
if (specReviewT0 === 0)
|
|
591
|
+
return; // never started
|
|
592
|
+
specReviewFinalized = true;
|
|
593
|
+
const lastReviewer = specReviewerHistory[specReviewerHistory.length - 1];
|
|
594
|
+
const reviewerAgent = (lastReviewer === undefined || lastReviewer === 'skipped')
|
|
595
|
+
? implementerAgentInfo
|
|
596
|
+
: reviewerAgentInfoFor(lastReviewer);
|
|
597
|
+
endReviewStage(stats, 'spec_review', specReviewT0, specReviewC0, reviewerAgent, runningCostUSD(), snapshotIdle(stageIdle), specStatus === 'approved' ? 'approved'
|
|
598
|
+
: specStatus === 'changes_required' ? 'changes_required'
|
|
599
|
+
: specStatus === 'skipped' ? 'skipped'
|
|
600
|
+
: specStatus === 'not_applicable' ? 'not_applicable'
|
|
601
|
+
: 'error', specAttemptIndex, { ...specReviewMetrics, durationMs: specReviewDurationMs });
|
|
602
|
+
};
|
|
603
|
+
const finalizeQualityReviewStage = () => {
|
|
604
|
+
if (qualityReviewFinalized)
|
|
605
|
+
return;
|
|
606
|
+
if (qualityReviewT0 === 0)
|
|
607
|
+
return;
|
|
608
|
+
if (reviewPolicy !== 'full' && reviewPolicy !== 'quality_only')
|
|
609
|
+
return;
|
|
610
|
+
qualityReviewFinalized = true;
|
|
611
|
+
const lastReviewer = qualityReviewerHistory[qualityReviewerHistory.length - 1];
|
|
612
|
+
const reviewerAgent = (lastReviewer === undefined || lastReviewer === 'skipped')
|
|
613
|
+
? implementerAgentInfo
|
|
614
|
+
: reviewerAgentInfoFor(lastReviewer);
|
|
615
|
+
endReviewStage(stats, 'quality_review', qualityReviewT0, qualityReviewC0, reviewerAgent, runningCostUSD(), snapshotIdle(stageIdle), qualityResult.status === 'approved' ? 'approved'
|
|
616
|
+
: qualityResult.status === 'changes_required' ? 'changes_required'
|
|
617
|
+
: qualityResult.status === 'annotated' ? 'annotated'
|
|
618
|
+
: qualityResult.status === 'skipped' ? 'skipped'
|
|
619
|
+
: 'error', qualityAttemptIndex, { ...qualityReviewMetrics, durationMs: qualityReviewDurationMs });
|
|
620
|
+
};
|
|
538
621
|
// §3.9: runningCostUSD must be cumulative and monotonic across explicit
|
|
539
622
|
// runner boundaries. Runner progress reports per-runner cumulative token
|
|
540
623
|
// counts, so lifecycle cost is completed runners + current runner partial.
|
|
@@ -545,6 +628,13 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
545
628
|
let _currentRunnerCostUSD = 0;
|
|
546
629
|
let _activeRunnerProviderConfig = null;
|
|
547
630
|
let _prevRunningCost = null;
|
|
631
|
+
// Per-turn delta tracking state (§3.5 point 2). Reset at each
|
|
632
|
+
// provider.run() boundary via `runAccounted`.
|
|
633
|
+
let _lastCumulative = {
|
|
634
|
+
inputTokens: 0, outputTokens: 0,
|
|
635
|
+
cachedReadTokens: 0, cachedCreationTokens: 0, reasoningTokens: 0,
|
|
636
|
+
};
|
|
637
|
+
let _rateCardUnresolved = false;
|
|
548
638
|
const runningCostUSD = () => {
|
|
549
639
|
const current = _completedRunnerCostUSD !== null || _currentRunnerCostUSD !== 0
|
|
550
640
|
? (_completedRunnerCostUSD ?? 0) + _currentRunnerCostUSD
|
|
@@ -563,6 +653,11 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
563
653
|
}
|
|
564
654
|
_activeRunnerProviderConfig = provider.config;
|
|
565
655
|
_currentRunnerCostUSD = 0;
|
|
656
|
+
_lastCumulative = {
|
|
657
|
+
inputTokens: 0, outputTokens: 0,
|
|
658
|
+
cachedReadTokens: 0, cachedCreationTokens: 0, reasoningTokens: 0,
|
|
659
|
+
};
|
|
660
|
+
_rateCardUnresolved = false;
|
|
566
661
|
try {
|
|
567
662
|
const result = await call();
|
|
568
663
|
const actualCost = result?.usage?.costUSD
|
|
@@ -599,7 +694,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
599
694
|
// on the base result (set by callers via abortReviewLoop({ ...res, specReviewStatus, ... })).
|
|
600
695
|
// Defaults to 'changes_required' for whichever loop tripped — that's the only state the
|
|
601
696
|
// loop ever fires from, by construction.
|
|
602
|
-
function adaptForAllTiersUnavailable(base, loop, attempt, resolvedModel, salvageSource) {
|
|
697
|
+
function adaptForAllTiersUnavailable(base, loop, attempt, resolvedModel, salvageSource, unavailableReason) {
|
|
603
698
|
const stageName = loop === 'spec' && attempt === 0 ? 'implementing'
|
|
604
699
|
: loop === 'spec' ? 'spec_rework'
|
|
605
700
|
: 'quality_rework';
|
|
@@ -623,9 +718,9 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
623
718
|
agentTier: implementerAgentInfo.tier,
|
|
624
719
|
modelFamily: modelFamily(implementerAgentInfo.model),
|
|
625
720
|
model: implementerAgentInfo.model,
|
|
626
|
-
maxIdleMs:
|
|
627
|
-
totalIdleMs:
|
|
628
|
-
activityEvents:
|
|
721
|
+
maxIdleMs: 0,
|
|
722
|
+
totalIdleMs: 0,
|
|
723
|
+
activityEvents: 0,
|
|
629
724
|
inputTokens: salvageSource?.usage?.inputTokens ?? null,
|
|
630
725
|
outputTokens: salvageSource?.usage?.outputTokens ?? null,
|
|
631
726
|
cachedTokens: salvageSource?.usage?.cachedTokens ?? null,
|
|
@@ -636,6 +731,8 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
636
731
|
filesWrittenCount: (salvageSource?.filesWritten?.length) || null,
|
|
637
732
|
};
|
|
638
733
|
}
|
|
734
|
+
finalizeSpecReviewStage();
|
|
735
|
+
finalizeQualityReviewStage();
|
|
639
736
|
const ship = salvageSource ?? lastNonRejectedImpl?.result ?? base;
|
|
640
737
|
return {
|
|
641
738
|
...ship,
|
|
@@ -644,6 +741,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
644
741
|
terminationReason: 'all_tiers_unavailable',
|
|
645
742
|
reviewRounds: reviewRounds(),
|
|
646
743
|
error: `runWithFallback: both tiers unavailable (loop=${loop}, attempt=${attempt}, role=implementer)`,
|
|
744
|
+
errorCode: unavailableReason === 'reviewer_separation_unsatisfiable' ? 'reviewer_separation_unsatisfiable' : ship.errorCode,
|
|
647
745
|
agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? ((reviewPolicy === 'full' || reviewPolicy === 'quality_only') ? 'not_applicable' : 'skipped')),
|
|
648
746
|
stageStats: stats,
|
|
649
747
|
models: {
|
|
@@ -676,28 +774,32 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
676
774
|
...(fallbackOverrides.length > 0 ? { fallbackOverrides } : {}),
|
|
677
775
|
};
|
|
678
776
|
};
|
|
679
|
-
const abortReviewLoop = (base, terminationReason, message, aborting, wallClockMs) =>
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
:
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
777
|
+
const abortReviewLoop = (base, terminationReason, message, aborting, wallClockMs) => {
|
|
778
|
+
finalizeSpecReviewStage();
|
|
779
|
+
finalizeQualityReviewStage();
|
|
780
|
+
return {
|
|
781
|
+
...base,
|
|
782
|
+
status: 'incomplete',
|
|
783
|
+
workerStatus: 'review_loop_aborted',
|
|
784
|
+
terminationReason: terminationReason === 'round_cap'
|
|
785
|
+
? 'round_cap'
|
|
786
|
+
: {
|
|
787
|
+
cause: terminationReason === 'cost_ceiling' ? 'cost_exceeded' : 'time_ceiling',
|
|
788
|
+
turnsUsed: base.turns,
|
|
789
|
+
hasFileArtifacts: (base.filesWritten ?? []).length > 0,
|
|
790
|
+
usedShell: (base.toolCalls ?? []).some(c => c.startsWith('shell') || c.startsWith('runShell')),
|
|
791
|
+
workerSelfAssessment: 'review_loop_aborted',
|
|
792
|
+
wasPromoted: false,
|
|
793
|
+
...(wallClockMs !== undefined ? { wallClockMs } : {}),
|
|
794
|
+
},
|
|
795
|
+
reviewRounds: reviewRounds(),
|
|
796
|
+
error: message,
|
|
797
|
+
specReviewStatus: aborting === 'spec' ? 'changes_required' : (base.specReviewStatus ?? 'approved'),
|
|
798
|
+
qualityReviewStatus: aborting === 'quality' ? 'changes_required' : (base.qualityReviewStatus ?? 'skipped'),
|
|
799
|
+
agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? ((reviewPolicy === 'full' || reviewPolicy === 'quality_only') ? 'not_applicable' : 'skipped')),
|
|
800
|
+
stageStats: stats,
|
|
801
|
+
};
|
|
802
|
+
};
|
|
701
803
|
const defaultVerification = { status: 'skipped', steps: [], totalDurationMs: 0, skipReason: 'no_command' };
|
|
702
804
|
let latestVerification = defaultVerification;
|
|
703
805
|
async function runVerificationStage() {
|
|
@@ -763,6 +865,11 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
763
865
|
structuredError: { code: 'runner_crash', message: workerError.message },
|
|
764
866
|
workerStatus: 'failed',
|
|
765
867
|
workerError,
|
|
868
|
+
models: {
|
|
869
|
+
implementer: implModel,
|
|
870
|
+
specReviewer: null,
|
|
871
|
+
qualityReviewer: null,
|
|
872
|
+
},
|
|
766
873
|
});
|
|
767
874
|
}
|
|
768
875
|
function withVerification(result, verification = latestVerification) {
|
|
@@ -820,6 +927,17 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
820
927
|
stageStats: stats,
|
|
821
928
|
}, verification);
|
|
822
929
|
}
|
|
930
|
+
function diffReviewErrorTerminationReason(base) {
|
|
931
|
+
return {
|
|
932
|
+
cause: 'error',
|
|
933
|
+
turnsUsed: base.turns,
|
|
934
|
+
hasFileArtifacts: (base.filesWritten ?? []).length > 0,
|
|
935
|
+
usedShell: (base.toolCalls ?? []).some(c => c.startsWith('shell') || c.startsWith('runShell')),
|
|
936
|
+
workerSelfAssessment: 'failed',
|
|
937
|
+
wasPromoted: false,
|
|
938
|
+
...(base.terminationReason && typeof base.terminationReason === 'object' && base.terminationReason.wallClockMs !== undefined ? { wallClockMs: base.terminationReason.wallClockMs } : {}),
|
|
939
|
+
};
|
|
940
|
+
}
|
|
823
941
|
function resolveDiffOnlyTerminal(base, verdict, verification, diffTruncated) {
|
|
824
942
|
const concerns = [...(base.concerns ?? [])];
|
|
825
943
|
if ('status' in verdict && verdict.status === 'skipped') {
|
|
@@ -842,6 +960,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
842
960
|
code: 'diff_review_rejected',
|
|
843
961
|
message: verdict.message || 'diff review rejected implementation',
|
|
844
962
|
},
|
|
963
|
+
terminationReason: diffReviewErrorTerminationReason(base),
|
|
845
964
|
concerns,
|
|
846
965
|
commits,
|
|
847
966
|
commitError,
|
|
@@ -854,6 +973,12 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
854
973
|
status: verdict.status,
|
|
855
974
|
workerStatus: 'failed',
|
|
856
975
|
error: verdict.reason ?? `diff review transport failure: ${verdict.status}`,
|
|
976
|
+
errorCode: verdict.status,
|
|
977
|
+
structuredError: {
|
|
978
|
+
code: verdict.status,
|
|
979
|
+
message: verdict.reason ?? `diff review transport failure: ${verdict.status}`,
|
|
980
|
+
},
|
|
981
|
+
terminationReason: diffReviewErrorTerminationReason(base),
|
|
857
982
|
concerns: [...concerns, ...verdict.concerns],
|
|
858
983
|
commits,
|
|
859
984
|
commitError,
|
|
@@ -927,19 +1052,23 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
927
1052
|
const treeDirty = porcelain.length > 0;
|
|
928
1053
|
if (!headMoved && !treeDirty)
|
|
929
1054
|
return;
|
|
1055
|
+
// Emit committing stage for both worker-committed (headMoved) and
|
|
1056
|
+
// pending-commit (treeDirty) paths. Workers that auto-commit during
|
|
1057
|
+
// turns leave a clean tree but moved HEAD — they must still produce
|
|
1058
|
+
// a committing stage so telemetry includes filesCommittedCount.
|
|
1059
|
+
transitionStage('verifying', 'committing', { stage: 'committing', stageIndex: 7 }, null);
|
|
1060
|
+
const commitT0 = Date.now();
|
|
1061
|
+
const commitC0 = runningCostUSD();
|
|
930
1062
|
if (headMoved)
|
|
931
1063
|
await recordWorkerCommits(baselineHead, 'HEAD');
|
|
932
1064
|
if (treeDirty) {
|
|
933
1065
|
const validCommit = implReport?.commit ?? await repairCommitMetadata(implReport?.commitDiagnostic ?? 'no commit block emitted');
|
|
934
|
-
if (
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
const commitC0 = runningCostUSD();
|
|
939
|
-
const c = await runCommitStage({ cwd, filesWritten: implResult.filesWritten, commit: validCommit });
|
|
940
|
-
commits.push(c);
|
|
941
|
-
endBaseStage(stats, 'committing', commitT0, commitC0, implementerAgentInfo, runningCostUSD(), snapshotIdle(stageIdle));
|
|
1066
|
+
if (validCommit) {
|
|
1067
|
+
const c = await runCommitStage({ cwd, filesWritten: implResult.filesWritten, commit: validCommit });
|
|
1068
|
+
commits.push(c);
|
|
1069
|
+
}
|
|
942
1070
|
}
|
|
1071
|
+
endBaseStage(stats, 'committing', commitT0, commitC0, implementerAgentInfo, runningCostUSD(), snapshotIdle(stageIdle));
|
|
943
1072
|
}
|
|
944
1073
|
// Tracks the final RunResult across every exit path so the `finally` block
|
|
945
1074
|
// below fires `recorder.recordTaskCompleted` exactly once regardless of which
|
|
@@ -1037,9 +1166,9 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1037
1166
|
assignedTier: initialDecision.impl,
|
|
1038
1167
|
reason: initialImpl.unavailableReason,
|
|
1039
1168
|
});
|
|
1040
|
-
return __recordOnce(adaptForAllTiersUnavailable(initialImpl.result, 'spec', 0, resolvedModel, initialImpl.salvageResult));
|
|
1169
|
+
return __recordOnce(adaptForAllTiersUnavailable(initialImpl.result, 'spec', 0, resolvedModel, initialImpl.salvageResult, initialImpl.unavailableReason));
|
|
1041
1170
|
}
|
|
1042
|
-
|
|
1171
|
+
let implResult = initialImpl.result;
|
|
1043
1172
|
latestAttemptedImpl = { tier: initialImpl.usedTier, result: implResult };
|
|
1044
1173
|
lastNonRejectedImpl = { tier: initialImpl.usedTier, result: implResult };
|
|
1045
1174
|
implementerHistory.push(initialImpl.usedTier);
|
|
@@ -1055,8 +1184,29 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1055
1184
|
costUSD: implResult.usage?.costUSD ?? undefined,
|
|
1056
1185
|
});
|
|
1057
1186
|
specAttemptIndex = 1;
|
|
1058
|
-
const implReport =
|
|
1187
|
+
const implReport = parseStructuredReport(implResult.output);
|
|
1059
1188
|
const workerStatus = extractWorkerStatus(implReport);
|
|
1189
|
+
// Item 9: surface silent-incomplete via errorCode — the delegation layer
|
|
1190
|
+
// cascades result.status as a fallback errorCode (e.g., 'incomplete'),
|
|
1191
|
+
// which is not an informative error code. Replace it when the runner
|
|
1192
|
+
// produced no parseable summary — the operator can now filter on
|
|
1193
|
+
// 'incomplete_no_summary' instead of guessing.
|
|
1194
|
+
//
|
|
1195
|
+
// parseStructuredReport always returns a report object and has a
|
|
1196
|
+
// last-resort fallback that treats the first paragraph as an implicit
|
|
1197
|
+
// summary, so implReport.summary alone is not a reliable signal. Treat
|
|
1198
|
+
// the run as having a structured summary only when a real ## Summary
|
|
1199
|
+
// section exists and parses to non-placeholder content.
|
|
1200
|
+
const hasSummaryHeader = /\n##\s+summary\s*\n/i.test(implResult.output) || /^##\s+summary\s*\n/im.test(implResult.output);
|
|
1201
|
+
const summaryText = (hasSummaryHeader ? implReport.summary : null)?.trim().toLowerCase() ?? '';
|
|
1202
|
+
const hasStructuredSummary = hasSummaryHeader && summaryText !== ''
|
|
1203
|
+
&& !['none', '(none)', 'n/a', 'na', 'todo', 'tbd'].includes(summaryText);
|
|
1204
|
+
if (implResult.status === 'incomplete' && !hasStructuredSummary) {
|
|
1205
|
+
const cascadedFallback = implResult.errorCode === implResult.status;
|
|
1206
|
+
if (!implResult.errorCode || cascadedFallback) {
|
|
1207
|
+
implResult = { ...implResult, errorCode: 'incomplete_no_summary' };
|
|
1208
|
+
}
|
|
1209
|
+
}
|
|
1060
1210
|
if (implResult.status === 'ok' && isArtifactProducing) {
|
|
1061
1211
|
await captureCommitsAfterImplementation(implResult, implReport, baselineHead);
|
|
1062
1212
|
}
|
|
@@ -1195,6 +1345,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1195
1345
|
getStatus: (r) => r.status,
|
|
1196
1346
|
makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'),
|
|
1197
1347
|
forbiddenIdentities: implementerIdentity ? [implementerIdentity] : undefined,
|
|
1348
|
+
forbiddenTiers: [resolved.slot],
|
|
1198
1349
|
call: (provider) => runAccounted(provider, () => runDiffReview({ cwd, diff: evidence.fullDiff, diffTruncated: evidence.diffTruncated, verification, worker: { call: (prompt, opts) => provider.run(prompt, { cwd: opts?.cwd ?? cwd, abortSignal: opts?.abortSignal, timeoutMs: opts?.timeoutMs }) }, taskDeadlineMs, abortSignal: stallController.signal })),
|
|
1199
1350
|
});
|
|
1200
1351
|
if (diffCall.fallbackFired) {
|
|
@@ -1203,8 +1354,17 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1203
1354
|
}
|
|
1204
1355
|
if (diffCall.bothUnavailable) {
|
|
1205
1356
|
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'diff', attempt: 0, role: 'diffReviewer', assignedTier: diffReviewerTier, reason: diffCall.unavailableReason });
|
|
1357
|
+
if (diffCall.unavailableReason === 'reviewer_separation_unsatisfiable') {
|
|
1358
|
+
return __recordOnce(adaptForAllTiersUnavailable({ ...implResult, errorCode: 'reviewer_separation_unsatisfiable', diffReviewStatus: 'error' }, 'spec', 0, resolvedModel, implResult, diffCall.unavailableReason));
|
|
1359
|
+
}
|
|
1206
1360
|
}
|
|
1207
|
-
const verdict = diffCall.bothUnavailable
|
|
1361
|
+
const verdict = diffCall.bothUnavailable ? makeSkippedReviewResult('all_tiers_unavailable') : diffCall.result;
|
|
1362
|
+
const diffEnvelopeStatus = 'kind' in verdict
|
|
1363
|
+
? (verdict.kind === 'approve' ? 'approved'
|
|
1364
|
+
: verdict.kind === 'concerns' ? 'approved'
|
|
1365
|
+
: verdict.kind === 'reject' ? 'changes_required'
|
|
1366
|
+
: 'error')
|
|
1367
|
+
: 'skipped';
|
|
1208
1368
|
emitTaskEvent('review_decision', {
|
|
1209
1369
|
stage: 'diff_review',
|
|
1210
1370
|
verdict: 'kind' in verdict
|
|
@@ -1220,7 +1380,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1220
1380
|
// distinct from spec/quality verdicts. Map to the telemetry verdict enum here.
|
|
1221
1381
|
'kind' in verdict
|
|
1222
1382
|
? (verdict.kind === 'approve' ? 'approved'
|
|
1223
|
-
: verdict.kind === 'concerns' ? '
|
|
1383
|
+
: verdict.kind === 'concerns' ? 'approved'
|
|
1224
1384
|
: verdict.kind === 'reject' ? 'changes_required'
|
|
1225
1385
|
: 'error')
|
|
1226
1386
|
: 'skipped', 0);
|
|
@@ -1231,6 +1391,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1231
1391
|
qualityReviewStatus: 'skipped',
|
|
1232
1392
|
specReviewReason: 'skipped: reviewPolicy is diff_only',
|
|
1233
1393
|
qualityReviewReason: 'skipped: reviewPolicy is diff_only',
|
|
1394
|
+
diffReviewStatus: diffEnvelopeStatus,
|
|
1234
1395
|
implementationReport: effectiveImplReport,
|
|
1235
1396
|
fileArtifactsMissing: implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined,
|
|
1236
1397
|
agents: agentEnvelope('skipped', 'skipped'),
|
|
@@ -1240,19 +1401,8 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1240
1401
|
let finalImplResult = implResult;
|
|
1241
1402
|
let finalImplReport = effectiveImplReport;
|
|
1242
1403
|
let specResult;
|
|
1243
|
-
let specStatus;
|
|
1244
1404
|
let specReport;
|
|
1245
1405
|
let specReviewReason;
|
|
1246
|
-
let specReviewT0 = 0;
|
|
1247
|
-
let specReviewC0 = null;
|
|
1248
|
-
// Delta-only timing: accumulate per-call wall durations across the
|
|
1249
|
-
// initial spec_review + every spec_rework round's re-review. This
|
|
1250
|
-
// replaces the `Date.now() - specReviewT0` fallback at endReviewStage,
|
|
1251
|
-
// which over-counts because endReviewStage runs AFTER spec_rework,
|
|
1252
|
-
// quality_review, AND quality_rework all complete. No absolute
|
|
1253
|
-
// timestamps go on the wire — Date.now() is used only as a local
|
|
1254
|
-
// delta source. Privacy.md guarantees ms-deltas only.
|
|
1255
|
-
let specReviewDurationMs = 0;
|
|
1256
1406
|
if (reviewPolicy !== 'quality_only') {
|
|
1257
1407
|
transitionStage('verifying', 'spec_review', { stage: 'spec_review', stageIndex: 2, reviewRound: 1, attemptCap: maxSpecRows }, null);
|
|
1258
1408
|
const initialReviewerTier = pickReviewer({ loop: 'spec', attemptIndex: 0, baseTier: resolved.slot });
|
|
@@ -1267,6 +1417,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1267
1417
|
getStatus: (r) => r.status,
|
|
1268
1418
|
makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'),
|
|
1269
1419
|
forbiddenIdentities: implementerIdentity ? [implementerIdentity] : undefined,
|
|
1420
|
+
forbiddenTiers: [resolved.slot],
|
|
1270
1421
|
call: (provider) => runAccounted(provider, () => runSpecReview(provider, packet, effectiveImplReport, fileContents, implResult.toolCalls, task.planContext, evidence.block, taskDeadlineMs, stallController.signal, wrappedOnProgress, cwd)),
|
|
1271
1422
|
});
|
|
1272
1423
|
specReviewDurationMs += Date.now() - initialSpecReviewIterStart;
|
|
@@ -1274,6 +1425,15 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1274
1425
|
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: 0, role: 'specReviewer', assignedTier: initialReviewerTier, reason: initialSpecReview.unavailableReason });
|
|
1275
1426
|
fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: 0, assigned: initialReviewerTier, used: initialSpecReview.usedTier, reason: initialSpecReview.unavailableReason, triggeringStatus: initialSpecReview.fallbackTriggeringStatus, bothUnavailable: true });
|
|
1276
1427
|
specReviewerHistory.push('skipped');
|
|
1428
|
+
if (initialSpecReview.unavailableReason === 'reviewer_separation_unsatisfiable') {
|
|
1429
|
+
const unavailableBase = {
|
|
1430
|
+
...implResult,
|
|
1431
|
+
specReviewStatus: 'error',
|
|
1432
|
+
specReviewReason: 'reviewer separation unsatisfiable',
|
|
1433
|
+
errorCode: 'reviewer_separation_unsatisfiable',
|
|
1434
|
+
};
|
|
1435
|
+
return __recordOnce(adaptForAllTiersUnavailable(unavailableBase, 'spec', 0, resolvedModel, implResult, initialSpecReview.unavailableReason));
|
|
1436
|
+
}
|
|
1277
1437
|
}
|
|
1278
1438
|
else {
|
|
1279
1439
|
specReviewerHistory.push(initialSpecReview.usedTier);
|
|
@@ -1322,7 +1482,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1322
1482
|
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', assignedTier: decision.impl, reason: reworkCall.unavailableReason });
|
|
1323
1483
|
if (decision.isEscalated)
|
|
1324
1484
|
emitEscalationUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', wantedTier: decision.impl, reason: reworkCall.unavailableReason });
|
|
1325
|
-
return __recordOnce(adaptForAllTiersUnavailable(reworkCall.result, 'spec', specAttemptIndex, resolvedModel, reworkCall.salvageResult));
|
|
1485
|
+
return __recordOnce(adaptForAllTiersUnavailable(reworkCall.result, 'spec', specAttemptIndex, resolvedModel, reworkCall.salvageResult, reworkCall.unavailableReason));
|
|
1326
1486
|
}
|
|
1327
1487
|
finalImplResult = reworkCall.result;
|
|
1328
1488
|
latestAttemptedImpl = { tier: reworkCall.usedTier, result: finalImplResult };
|
|
@@ -1334,12 +1494,21 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1334
1494
|
commitReworkStage(stats, 'spec_rework', specReworkAcc, implementerAgentInfo);
|
|
1335
1495
|
transitionStage('spec_rework', 'spec_review', { stage: 'spec_review', stageIndex: 2, reviewRound: specAttemptIndex + 1, attemptCap: maxSpecRows }, null);
|
|
1336
1496
|
const reReviewIterStart = Date.now();
|
|
1337
|
-
const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), forbiddenIdentities: implementerIdentity ? [implementerIdentity] : undefined, call: (provider) => runAccounted(provider, () => runSpecReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, task.planContext, evidence.block, taskDeadlineMs, stallController.signal, wrappedOnProgress, cwd)) });
|
|
1497
|
+
const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), forbiddenIdentities: implementerIdentity ? [implementerIdentity] : undefined, forbiddenTiers: [resolved.slot], call: (provider) => runAccounted(provider, () => runSpecReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, task.planContext, evidence.block, taskDeadlineMs, stallController.signal, wrappedOnProgress, cwd)) });
|
|
1338
1498
|
specReviewDurationMs += Date.now() - reReviewIterStart;
|
|
1339
1499
|
if (reviewCall.bothUnavailable) {
|
|
1340
1500
|
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'specReviewer', assignedTier: decision.reviewer, reason: reviewCall.unavailableReason });
|
|
1341
1501
|
fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: specAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.unavailableReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: true });
|
|
1342
1502
|
specReviewerHistory.push('skipped');
|
|
1503
|
+
if (reviewCall.unavailableReason === 'reviewer_separation_unsatisfiable') {
|
|
1504
|
+
const unavailableBase = {
|
|
1505
|
+
...finalImplResult,
|
|
1506
|
+
specReviewStatus: 'error',
|
|
1507
|
+
specReviewReason: 'reviewer separation unsatisfiable',
|
|
1508
|
+
errorCode: 'reviewer_separation_unsatisfiable',
|
|
1509
|
+
};
|
|
1510
|
+
return __recordOnce(adaptForAllTiersUnavailable(unavailableBase, 'spec', specAttemptIndex, resolvedModel, finalImplResult, reviewCall.unavailableReason));
|
|
1511
|
+
}
|
|
1343
1512
|
}
|
|
1344
1513
|
else {
|
|
1345
1514
|
specReviewerHistory.push(reviewCall.usedTier);
|
|
@@ -1370,17 +1539,6 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1370
1539
|
specReport = undefined;
|
|
1371
1540
|
specReviewReason = 'skipped: reviewPolicy is quality_only';
|
|
1372
1541
|
}
|
|
1373
|
-
let qualityResult = { status: 'skipped', report: undefined, findings: [], errorReason: (reviewPolicy === 'full' || reviewPolicy === 'quality_only') ? 'all_tiers_unavailable' : 'skipped: reviewPolicy is spec_only' };
|
|
1374
|
-
// Hoisted so endReviewStage (called after this block) can read them on the
|
|
1375
|
-
// success path. When the quality review is skipped (`reviewPolicy !== 'full'`),
|
|
1376
|
-
// the values stay at 0/null and the corresponding stage entry remains in its
|
|
1377
|
-
// `entered: false` default — endReviewStage is never called.
|
|
1378
|
-
let qualityReviewT0 = 0;
|
|
1379
|
-
let qualityReviewC0 = null;
|
|
1380
|
-
// Same delta-only timing pattern as spec_review — accumulate per-call
|
|
1381
|
-
// wall durations across initial + each rework round's re-review. No
|
|
1382
|
-
// raw timestamps cross the wire.
|
|
1383
|
-
let qualityReviewDurationMs = 0;
|
|
1384
1542
|
if (reviewPolicy === 'full' || reviewPolicy === 'quality_only') {
|
|
1385
1543
|
qualityUnavailable = new Map();
|
|
1386
1544
|
const qualityReviewerTier = pickReviewer({ loop: 'quality', attemptIndex: 0, baseTier: resolved.slot });
|
|
@@ -1388,12 +1546,21 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1388
1546
|
qualityReviewT0 = Date.now();
|
|
1389
1547
|
qualityReviewC0 = runningCostUSD();
|
|
1390
1548
|
const initialQualityIterStart = Date.now();
|
|
1391
|
-
const initialQuality = await runWithFallback({ assigned: qualityReviewerTier, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), forbiddenIdentities: implementerIdentity ? [implementerIdentity] : undefined, call: (provider) => runAccounted(provider, () => runQualityReview(provider, packet, specReport ?? finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block, qualityReviewPromptBuilder, finalImplResult.output, taskDeadlineMs, stallController.signal, wrappedOnProgress, cwd)) });
|
|
1549
|
+
const initialQuality = await runWithFallback({ assigned: qualityReviewerTier, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), forbiddenIdentities: implementerIdentity ? [implementerIdentity] : undefined, forbiddenTiers: [resolved.slot], call: (provider) => runAccounted(provider, () => runQualityReview(provider, packet, specReport ?? finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block, qualityReviewPromptBuilder, finalImplResult.output, taskDeadlineMs, stallController.signal, wrappedOnProgress, cwd)) });
|
|
1392
1550
|
qualityReviewDurationMs += Date.now() - initialQualityIterStart;
|
|
1393
1551
|
if (initialQuality.bothUnavailable) {
|
|
1394
1552
|
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: 0, role: 'qualityReviewer', assignedTier: qualityReviewerTier, reason: initialQuality.unavailableReason });
|
|
1395
1553
|
fallbackOverrides.push({ role: 'qualityReviewer', loop: 'quality', attempt: 0, assigned: qualityReviewerTier, used: initialQuality.usedTier, reason: initialQuality.unavailableReason, triggeringStatus: initialQuality.fallbackTriggeringStatus, bothUnavailable: true });
|
|
1396
1554
|
qualityReviewerHistory.push('skipped');
|
|
1555
|
+
if (initialQuality.unavailableReason === 'reviewer_separation_unsatisfiable') {
|
|
1556
|
+
const unavailableBase = {
|
|
1557
|
+
...finalImplResult,
|
|
1558
|
+
qualityReviewStatus: 'error',
|
|
1559
|
+
qualityReviewReason: 'reviewer separation unsatisfiable',
|
|
1560
|
+
errorCode: 'reviewer_separation_unsatisfiable',
|
|
1561
|
+
};
|
|
1562
|
+
return __recordOnce(adaptForAllTiersUnavailable(unavailableBase, 'quality', 0, resolvedModel, finalImplResult, initialQuality.unavailableReason));
|
|
1563
|
+
}
|
|
1397
1564
|
}
|
|
1398
1565
|
else {
|
|
1399
1566
|
qualityReviewerHistory.push(initialQuality.usedTier);
|
|
@@ -1480,7 +1647,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1480
1647
|
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'implementer', assignedTier: decision.impl, reason: reworkCall.unavailableReason });
|
|
1481
1648
|
if (decision.isEscalated)
|
|
1482
1649
|
emitEscalationUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'implementer', wantedTier: decision.impl, reason: reworkCall.unavailableReason });
|
|
1483
|
-
return __recordOnce(adaptForAllTiersUnavailable(reworkCall.result, 'quality', qualityAttemptIndex, resolvedModel, reworkCall.salvageResult));
|
|
1650
|
+
return __recordOnce(adaptForAllTiersUnavailable(reworkCall.result, 'quality', qualityAttemptIndex, resolvedModel, reworkCall.salvageResult, reworkCall.unavailableReason));
|
|
1484
1651
|
}
|
|
1485
1652
|
finalImplResult = reworkCall.result;
|
|
1486
1653
|
latestAttemptedImpl = { tier: reworkCall.usedTier, result: finalImplResult };
|
|
@@ -1492,12 +1659,21 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1492
1659
|
commitReworkStage(stats, 'quality_rework', qualityReworkAcc, implementerAgentInfo);
|
|
1493
1660
|
transitionStage('quality_rework', 'quality_review', { stage: 'quality_review', stageIndex: 4, reviewRound: qualityAttemptIndex + 1, attemptCap: maxQualityRows }, null);
|
|
1494
1661
|
const qReReviewIterStart = Date.now();
|
|
1495
|
-
const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), forbiddenIdentities: implementerIdentity ? [implementerIdentity] : undefined, call: (provider) => runAccounted(provider, () => runQualityReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block, qualityReviewPromptBuilder, finalImplResult.output, taskDeadlineMs, stallController.signal, wrappedOnProgress, cwd)) });
|
|
1662
|
+
const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), forbiddenIdentities: implementerIdentity ? [implementerIdentity] : undefined, forbiddenTiers: [resolved.slot], call: (provider) => runAccounted(provider, () => runQualityReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block, qualityReviewPromptBuilder, finalImplResult.output, taskDeadlineMs, stallController.signal, wrappedOnProgress, cwd)) });
|
|
1496
1663
|
qualityReviewDurationMs += Date.now() - qReReviewIterStart;
|
|
1497
1664
|
if (reviewCall.bothUnavailable) {
|
|
1498
1665
|
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'qualityReviewer', assignedTier: decision.reviewer, reason: reviewCall.unavailableReason });
|
|
1499
1666
|
fallbackOverrides.push({ role: 'qualityReviewer', loop: 'quality', attempt: qualityAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.unavailableReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: true });
|
|
1500
1667
|
qualityReviewerHistory.push('skipped');
|
|
1668
|
+
if (reviewCall.unavailableReason === 'reviewer_separation_unsatisfiable') {
|
|
1669
|
+
const unavailableBase = {
|
|
1670
|
+
...finalImplResult,
|
|
1671
|
+
qualityReviewStatus: 'error',
|
|
1672
|
+
qualityReviewReason: 'reviewer separation unsatisfiable',
|
|
1673
|
+
errorCode: 'reviewer_separation_unsatisfiable',
|
|
1674
|
+
};
|
|
1675
|
+
return __recordOnce(adaptForAllTiersUnavailable(unavailableBase, 'quality', qualityAttemptIndex, resolvedModel, finalImplResult, reviewCall.unavailableReason));
|
|
1676
|
+
}
|
|
1501
1677
|
}
|
|
1502
1678
|
else {
|
|
1503
1679
|
qualityReviewerHistory.push(reviewCall.usedTier);
|
|
@@ -1559,21 +1735,11 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1559
1735
|
// override. endReviewStage uses the override when present and falls
|
|
1560
1736
|
// back to `Date.now() - t0` otherwise (which over-counts review-block
|
|
1561
1737
|
// span across rework + later stages).
|
|
1562
|
-
|
|
1563
|
-
|
|
1564
|
-
|
|
1565
|
-
|
|
1566
|
-
: specStatus === 'changes_required' ? 'changes_required'
|
|
1567
|
-
: specStatus === 'skipped' ? 'skipped'
|
|
1568
|
-
: specStatus === 'not_applicable' ? 'not_applicable'
|
|
1569
|
-
: 'error', specAttemptIndex, specMetrics);
|
|
1570
|
-
}
|
|
1738
|
+
specReviewMetrics = (specResult.metrics ?? {});
|
|
1739
|
+
qualityReviewMetrics = (qualityResult.metrics ?? {});
|
|
1740
|
+
finalizeSpecReviewStage();
|
|
1741
|
+
finalizeQualityReviewStage();
|
|
1571
1742
|
const qualityAggregateStatus = qualityResult.status;
|
|
1572
|
-
endReviewStage(stats, 'quality_review', qualityReviewT0, qualityReviewC0, qualityReviewAgent, runningCostUSD(), snapshotIdle(stageIdle), qualityResult.status === 'approved' ? 'approved'
|
|
1573
|
-
: qualityResult.status === 'changes_required' ? 'changes_required'
|
|
1574
|
-
: qualityResult.status === 'annotated' ? 'annotated'
|
|
1575
|
-
: qualityResult.status === 'skipped' ? 'skipped'
|
|
1576
|
-
: 'error', qualityAttemptIndex, qualityMetrics);
|
|
1577
1743
|
const aggregated = aggregateResult(finalReport, specReport, qualityResult.report, specAggregateStatus, qualityAggregateStatus);
|
|
1578
1744
|
// File artifact verification: check whether output targets exist on disk after all work.
|
|
1579
1745
|
// Only applies when status is ok; non-ok statuses skip verification entirely.
|
|
@@ -1655,6 +1821,8 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1655
1821
|
client: _client ?? 'claude-code',
|
|
1656
1822
|
triggeringSkill: _triggeringSkill ?? 'direct',
|
|
1657
1823
|
parentModel: task.parentModel ?? null,
|
|
1824
|
+
reviewPolicy,
|
|
1825
|
+
verifyCommandPresent: !!(task.verifyCommand && task.verifyCommand.length > 0),
|
|
1658
1826
|
});
|
|
1659
1827
|
}
|
|
1660
1828
|
catch { /* silent */ }
|