@zhixuan92/multi-model-agent-core 3.5.2 → 3.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/config/schema.d.ts +3 -0
- package/dist/config/schema.d.ts.map +1 -1
- package/dist/config/schema.js +10 -1
- package/dist/config/schema.js.map +1 -1
- package/dist/delegate-with-escalation.d.ts +14 -0
- package/dist/delegate-with-escalation.d.ts.map +1 -1
- package/dist/delegate-with-escalation.js +29 -1
- package/dist/delegate-with-escalation.js.map +1 -1
- package/dist/executors/audit.d.ts.map +1 -1
- package/dist/executors/audit.js +16 -2
- package/dist/executors/audit.js.map +1 -1
- package/dist/executors/debug.d.ts.map +1 -1
- package/dist/executors/debug.js +8 -1
- package/dist/executors/debug.js.map +1 -1
- package/dist/executors/delegate.d.ts.map +1 -1
- package/dist/executors/delegate.js +23 -1
- package/dist/executors/delegate.js.map +1 -1
- package/dist/executors/execute-plan.d.ts.map +1 -1
- package/dist/executors/execute-plan.js +16 -2
- package/dist/executors/execute-plan.js.map +1 -1
- package/dist/executors/execution-context.d.ts.map +1 -1
- package/dist/executors/execution-context.js +4 -0
- package/dist/executors/execution-context.js.map +1 -1
- package/dist/executors/investigate.d.ts.map +1 -1
- package/dist/executors/investigate.js +24 -1
- package/dist/executors/investigate.js.map +1 -1
- package/dist/executors/retry.d.ts.map +1 -1
- package/dist/executors/retry.js +25 -2
- package/dist/executors/retry.js.map +1 -1
- package/dist/executors/review.d.ts.map +1 -1
- package/dist/executors/review.js +16 -2
- package/dist/executors/review.js.map +1 -1
- package/dist/executors/types.d.ts +35 -0
- package/dist/executors/types.d.ts.map +1 -1
- package/dist/executors/verify.d.ts.map +1 -1
- package/dist/executors/verify.js +16 -2
- package/dist/executors/verify.js.map +1 -1
- package/dist/heartbeat.d.ts +1 -1
- package/dist/heartbeat.d.ts.map +1 -1
- package/dist/heartbeat.js +10 -3
- package/dist/heartbeat.js.map +1 -1
- package/dist/routing/model-profiles.d.ts +1 -0
- package/dist/routing/model-profiles.d.ts.map +1 -1
- package/dist/routing/model-profiles.js +3 -0
- package/dist/routing/model-profiles.js.map +1 -1
- package/dist/run-tasks/index.d.ts +17 -0
- package/dist/run-tasks/index.d.ts.map +1 -1
- package/dist/run-tasks/index.js +1 -1
- package/dist/run-tasks/index.js.map +1 -1
- package/dist/run-tasks/reviewed-lifecycle.d.ts +24 -2
- package/dist/run-tasks/reviewed-lifecycle.d.ts.map +1 -1
- package/dist/run-tasks/reviewed-lifecycle.js +191 -13
- package/dist/run-tasks/reviewed-lifecycle.js.map +1 -1
- package/dist/runners/claude-runner.d.ts.map +1 -1
- package/dist/runners/claude-runner.js +8 -5
- package/dist/runners/claude-runner.js.map +1 -1
- package/dist/runners/codex-runner.d.ts.map +1 -1
- package/dist/runners/codex-runner.js +6 -3
- package/dist/runners/codex-runner.js.map +1 -1
- package/dist/runners/error-classification.d.ts +8 -0
- package/dist/runners/error-classification.d.ts.map +1 -1
- package/dist/runners/error-classification.js +17 -0
- package/dist/runners/error-classification.js.map +1 -1
- package/dist/runners/openai-runner.d.ts.map +1 -1
- package/dist/runners/openai-runner.js +6 -3
- package/dist/runners/openai-runner.js.map +1 -1
- package/dist/runners/types.d.ts +6 -1
- package/dist/runners/types.d.ts.map +1 -1
- package/dist/telemetry/bucketing.d.ts +11 -0
- package/dist/telemetry/bucketing.d.ts.map +1 -0
- package/dist/telemetry/bucketing.js +52 -0
- package/dist/telemetry/bucketing.js.map +1 -0
- package/dist/telemetry/concern-classifier.d.ts +9 -0
- package/dist/telemetry/concern-classifier.d.ts.map +1 -0
- package/dist/telemetry/concern-classifier.js +21 -0
- package/dist/telemetry/concern-classifier.js.map +1 -0
- package/dist/telemetry/consent-rules.d.ts +17 -0
- package/dist/telemetry/consent-rules.d.ts.map +1 -0
- package/dist/telemetry/consent-rules.js +32 -0
- package/dist/telemetry/consent-rules.js.map +1 -0
- package/dist/telemetry/event-builder.d.ts +23 -0
- package/dist/telemetry/event-builder.d.ts.map +1 -0
- package/dist/telemetry/event-builder.js +321 -0
- package/dist/telemetry/event-builder.js.map +1 -0
- package/dist/telemetry/types.d.ts +1870 -0
- package/dist/telemetry/types.d.ts.map +1 -0
- package/dist/telemetry/types.js +373 -0
- package/dist/telemetry/types.js.map +1 -0
- package/dist/types.d.ts +81 -2
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +18 -2
- package/dist/types.js.map +1 -1
- package/package.json +13 -1
|
@@ -5,6 +5,7 @@ import { createProvider } from '../provider.js';
|
|
|
5
5
|
import { delegateWithEscalation } from '../delegate-with-escalation.js';
|
|
6
6
|
import { pickEscalation, pickReviewer, maxRowsFor, } from '../escalation/policy.js';
|
|
7
7
|
import { runWithFallback, makeSyntheticRunResult, TRANSPORT_FAILURES, isReviewTransportFailure, } from '../escalation/fallback.js';
|
|
8
|
+
import { findModelCapabilities } from '../routing/model-profiles.js';
|
|
8
9
|
import { HeartbeatTimer } from '../heartbeat.js';
|
|
9
10
|
import { runSpecReview } from '../review/spec-reviewer.js';
|
|
10
11
|
import { makeSkippedReviewResult } from '../review/skipped-result.js';
|
|
@@ -22,7 +23,63 @@ import { buildFallbackImplReport, readImplementerFileContents } from './fallback
|
|
|
22
23
|
import { composeVerboseLine, toVerboseFields } from '../diagnostics/verbose-line.js';
|
|
23
24
|
import { withDoneCondition } from './execute-task.js';
|
|
24
25
|
const exec = promisify(execFile);
|
|
25
|
-
export
|
|
26
|
+
export function emptyStats() {
|
|
27
|
+
return {
|
|
28
|
+
implementing: { stage: 'implementing', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null },
|
|
29
|
+
spec_rework: { stage: 'spec_rework', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null },
|
|
30
|
+
quality_rework: { stage: 'quality_rework', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null },
|
|
31
|
+
committing: { stage: 'committing', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null },
|
|
32
|
+
verifying: { stage: 'verifying', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, outcome: null, skipReason: null },
|
|
33
|
+
spec_review: { stage: 'spec_review', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, verdict: null, roundsUsed: null },
|
|
34
|
+
quality_review: { stage: 'quality_review', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, verdict: null, roundsUsed: null },
|
|
35
|
+
diff_review: { stage: 'diff_review', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, verdict: null, roundsUsed: null },
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
function modelFamily(model) {
|
|
39
|
+
const dash = model.indexOf('-');
|
|
40
|
+
return dash > 0 ? model.slice(0, dash) : model;
|
|
41
|
+
}
|
|
42
|
+
export function endBaseStage(stats, name, t0, c0, agent, finalCostUSD) {
|
|
43
|
+
// Cast through unknown — TS can't narrow stats[name] on a union-typed index;
|
|
44
|
+
// the runtime invariant (set name's slot to its matching variant) is enforced
|
|
45
|
+
// by the helper signature and tested by tests/run-tasks/stage-stats.test.ts.
|
|
46
|
+
stats[name] = {
|
|
47
|
+
stage: name,
|
|
48
|
+
entered: true,
|
|
49
|
+
durationMs: Date.now() - t0,
|
|
50
|
+
costUSD: finalCostUSD !== null && c0 !== null ? finalCostUSD - c0 : null,
|
|
51
|
+
agentTier: agent.tier,
|
|
52
|
+
modelFamily: modelFamily(agent.model),
|
|
53
|
+
model: agent.model,
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
export function endReviewStage(stats, name, t0, c0, agent, finalCostUSD, verdict, roundsUsed) {
|
|
57
|
+
stats[name] = {
|
|
58
|
+
stage: name,
|
|
59
|
+
entered: true,
|
|
60
|
+
durationMs: Date.now() - t0,
|
|
61
|
+
costUSD: finalCostUSD !== null && c0 !== null ? finalCostUSD - c0 : null,
|
|
62
|
+
agentTier: agent.tier,
|
|
63
|
+
modelFamily: modelFamily(agent.model),
|
|
64
|
+
model: agent.model,
|
|
65
|
+
verdict,
|
|
66
|
+
roundsUsed,
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
export function endVerifyStage(stats, t0, c0, agent, finalCostUSD, outcome, skipReason) {
|
|
70
|
+
stats.verifying = {
|
|
71
|
+
stage: 'verifying',
|
|
72
|
+
entered: true,
|
|
73
|
+
durationMs: Date.now() - t0,
|
|
74
|
+
costUSD: finalCostUSD !== null && c0 !== null ? finalCostUSD - c0 : null,
|
|
75
|
+
agentTier: agent.tier,
|
|
76
|
+
modelFamily: modelFamily(agent.model),
|
|
77
|
+
model: agent.model,
|
|
78
|
+
outcome,
|
|
79
|
+
skipReason,
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
export async function executeReviewedLifecycle(task, resolved, config, taskIndex, onProgress, heartbeatWiring, diagnostics, recorder, _route, _client, _triggeringSkill) {
|
|
26
83
|
const reviewPolicy = task.reviewPolicy ?? 'full';
|
|
27
84
|
const otherSlot = resolved.slot === 'standard' ? 'complex' : 'standard';
|
|
28
85
|
let escalationProvider;
|
|
@@ -123,6 +180,20 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
123
180
|
tick_ms: heartbeat ? 5000 : undefined,
|
|
124
181
|
reason: heartbeat ? undefined : 'no_consumer',
|
|
125
182
|
});
|
|
183
|
+
// Stall watchdog: poll every 5s; abort if no runner event has fired for
|
|
184
|
+
// stallTimeoutMs. Stops at lifecycle exit (cleared in the finally block
|
|
185
|
+
// around runReviewedLifecycle's body — see end-of-function teardown).
|
|
186
|
+
const stallWatchdogInterval = setInterval(() => {
|
|
187
|
+
if (stallFired)
|
|
188
|
+
return;
|
|
189
|
+
const idleMs = Date.now() - lastRunnerEventAtMs;
|
|
190
|
+
if (idleMs >= stallTimeoutMs) {
|
|
191
|
+
stallFired = true;
|
|
192
|
+
emitTaskEvent('stall_abort', { idle_ms: idleMs, threshold_ms: stallTimeoutMs });
|
|
193
|
+
stallController.abort();
|
|
194
|
+
}
|
|
195
|
+
}, 5000);
|
|
196
|
+
stallWatchdogInterval.unref?.();
|
|
126
197
|
const implModel = resolved.provider.config.model;
|
|
127
198
|
const progressCounters = { filesRead: 0, filesWritten: 0, toolCalls: 0 };
|
|
128
199
|
const verboseStream = verboseStreamRaw;
|
|
@@ -133,8 +204,12 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
133
204
|
// the caller passed onProgress, so --verbose + HTTP handlers (which don't
|
|
134
205
|
// pass onProgress) silently dropped every tool_call / turn_complete event.
|
|
135
206
|
let textEmissionChars = 0;
|
|
207
|
+
const markRunnerEvent = () => { lastRunnerEventAtMs = Date.now(); };
|
|
136
208
|
const wrappedOnProgress = needHeartbeat
|
|
137
209
|
? (event) => {
|
|
210
|
+
if (event.kind === 'turn_start' || event.kind === 'text_emission' || event.kind === 'tool_call' || event.kind === 'turn_complete') {
|
|
211
|
+
markRunnerEvent();
|
|
212
|
+
}
|
|
138
213
|
if (event.kind === 'turn_start') {
|
|
139
214
|
heartbeat?.markEvent('llm');
|
|
140
215
|
if (verbose)
|
|
@@ -143,6 +218,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
143
218
|
emitTaskEvent('turn_start', {
|
|
144
219
|
turn: event.turn,
|
|
145
220
|
provider: event.provider,
|
|
221
|
+
model: event.model,
|
|
146
222
|
});
|
|
147
223
|
}
|
|
148
224
|
}
|
|
@@ -205,6 +281,21 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
205
281
|
: undefined;
|
|
206
282
|
const cwd = task.cwd ?? process.cwd();
|
|
207
283
|
const taskStartMs = Date.now();
|
|
284
|
+
// Hard task-level wall-clock cap. Once Date.now() crosses this, no new
|
|
285
|
+
// provider.run is dispatched (retries / tier-fallback short-circuit) and
|
|
286
|
+
// any in-flight call gets a per-call timeoutMs clamped to remaining
|
|
287
|
+
// budget so it returns its salvage promptly. The user gets *something*
|
|
288
|
+
// back instead of an open-ended retry storm.
|
|
289
|
+
const taskTimeoutMs = task.timeoutMs ?? config.defaults.timeoutMs ?? 1_800_000;
|
|
290
|
+
const taskDeadlineMs = taskStartMs + taskTimeoutMs;
|
|
291
|
+
// Stall watchdog: when no LLM / tool / text event has fired for this
|
|
292
|
+
// many ms, the in-flight runner is force-aborted via `stallController`.
|
|
293
|
+
// Catches "model is silently thinking forever" and "transport hung" —
|
|
294
|
+
// both invisible to the wall-clock cap until the very end.
|
|
295
|
+
const stallTimeoutMs = config.defaults.stallTimeoutMs ?? 600_000;
|
|
296
|
+
const stallController = new AbortController();
|
|
297
|
+
let lastRunnerEventAtMs = taskStartMs;
|
|
298
|
+
let stallFired = false;
|
|
208
299
|
const commits = [];
|
|
209
300
|
let commitError;
|
|
210
301
|
let specAttemptIndex = 0;
|
|
@@ -223,6 +314,15 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
223
314
|
let lastNonRejectedImpl;
|
|
224
315
|
const reviewRounds = () => ({ spec: specAttemptIndex, quality: qualityAttemptIndex, metadata: metadataRepair, cap: Math.max(maxSpecRows, maxQualityRows) });
|
|
225
316
|
const taskCostUSD = () => (heartbeat ? heartbeat.getHeartbeatTickInfo().costUSD : null);
|
|
317
|
+
// Per-stage stats tracking
|
|
318
|
+
const stats = emptyStats();
|
|
319
|
+
const resolvedModel = config.agents[resolved.slot].model;
|
|
320
|
+
const implementerAgentInfo = {
|
|
321
|
+
tier: resolved.slot,
|
|
322
|
+
family: modelFamily(resolvedModel),
|
|
323
|
+
model: resolvedModel,
|
|
324
|
+
};
|
|
325
|
+
const runningCostUSD = () => taskCostUSD();
|
|
226
326
|
const policyEscalated = { spec: false, quality: false, diff: false };
|
|
227
327
|
const emitFallback = (p) => {
|
|
228
328
|
diagnostics?.logger?.fallback(p);
|
|
@@ -259,17 +359,23 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
259
359
|
reviewRounds: reviewRounds(),
|
|
260
360
|
error: `runWithFallback: both tiers unavailable (loop=${loop}, attempt=${attempt}, role=implementer)`,
|
|
261
361
|
agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? (reviewPolicy === 'full' ? 'not_applicable' : 'skipped')),
|
|
362
|
+
stageStats: stats,
|
|
262
363
|
};
|
|
263
364
|
}
|
|
264
365
|
function reviewDidNotReject(status) {
|
|
265
366
|
return status === 'approved' || status === 'skipped';
|
|
266
367
|
}
|
|
368
|
+
const implementerToolMode = task.tools ?? config.defaults.tools;
|
|
369
|
+
const agentConfig = config.agents[resolved.slot];
|
|
370
|
+
const implementerCapabilities = (agentConfig.capabilities ?? findModelCapabilities(agentConfig.model) ?? []);
|
|
267
371
|
const agentEnvelope = (specReviewer, qualityReviewer) => {
|
|
268
372
|
const selectedImpl = latestAttemptedImpl ?? lastNonRejectedImpl;
|
|
269
373
|
const implementer = selectedImpl?.tier ?? resolved.slot;
|
|
270
374
|
return {
|
|
271
375
|
implementer,
|
|
272
376
|
...(implementerHistory.length > 1 || implementerHistory.some(t => t !== implementer) ? { implementerHistory } : {}),
|
|
377
|
+
implementerToolMode,
|
|
378
|
+
implementerCapabilities,
|
|
273
379
|
specReviewer,
|
|
274
380
|
...(specReviewerHistory.length > 0 && (specReviewerHistory.length > 1 || specReviewerHistory.some(t => t === 'skipped')) ? { specReviewerHistory } : {}),
|
|
275
381
|
qualityReviewer,
|
|
@@ -287,16 +393,15 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
287
393
|
specReviewStatus: aborting === 'spec' ? 'changes_required' : (base.specReviewStatus ?? 'approved'),
|
|
288
394
|
qualityReviewStatus: aborting === 'quality' ? 'changes_required' : (base.qualityReviewStatus ?? 'skipped'),
|
|
289
395
|
agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? (reviewPolicy === 'full' ? 'not_applicable' : 'skipped')),
|
|
396
|
+
stageStats: stats,
|
|
290
397
|
});
|
|
291
398
|
const defaultVerification = { status: 'skipped', steps: [], totalDurationMs: 0, skipReason: 'no_command' };
|
|
292
399
|
let latestVerification = defaultVerification;
|
|
293
400
|
async function runVerificationStage() {
|
|
294
|
-
emitTaskEvent('stage_change', { from: '
|
|
295
|
-
heartbeat?.
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
reviewRound: undefined,
|
|
299
|
-
});
|
|
401
|
+
emitTaskEvent('stage_change', { from: 'implementing', to: 'verifying' });
|
|
402
|
+
heartbeat?.setStage('verifying', 4);
|
|
403
|
+
const overallVerificationStart = Date.now();
|
|
404
|
+
const verifyCostStart = runningCostUSD();
|
|
300
405
|
const verification = await runVerifyStage({
|
|
301
406
|
cwd,
|
|
302
407
|
verifyCommand: task.verifyCommand,
|
|
@@ -304,6 +409,10 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
304
409
|
taskStartMs,
|
|
305
410
|
});
|
|
306
411
|
latestVerification = verification;
|
|
412
|
+
endVerifyStage(stats, overallVerificationStart, verifyCostStart, implementerAgentInfo, runningCostUSD(), verification.status === 'passed' ? 'passed'
|
|
413
|
+
: verification.status === 'failed' ? 'failed'
|
|
414
|
+
: verification.status === 'skipped' ? 'skipped'
|
|
415
|
+
: 'not_applicable', verification.skipReason ?? null);
|
|
307
416
|
for (const step of verification.steps) {
|
|
308
417
|
emitTaskEvent('verify_step', {
|
|
309
418
|
command: step.command,
|
|
@@ -354,7 +463,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
354
463
|
});
|
|
355
464
|
}
|
|
356
465
|
function withVerification(result, verification = latestVerification) {
|
|
357
|
-
return signalize({ ...result, verification });
|
|
466
|
+
return signalize({ ...result, verification, stageStats: stats });
|
|
358
467
|
}
|
|
359
468
|
function verificationErrorResult(base, verification) {
|
|
360
469
|
if (verification.status !== 'error')
|
|
@@ -405,6 +514,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
405
514
|
commits,
|
|
406
515
|
commitError,
|
|
407
516
|
verification,
|
|
517
|
+
stageStats: stats,
|
|
408
518
|
}, verification);
|
|
409
519
|
}
|
|
410
520
|
function resolveDiffOnlyTerminal(base, verdict, verification, diffTruncated) {
|
|
@@ -520,8 +630,12 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
520
630
|
const validCommit = implReport?.commit ?? await repairCommitMetadata(implReport?.commitDiagnostic ?? 'no commit block emitted');
|
|
521
631
|
if (!validCommit)
|
|
522
632
|
return;
|
|
633
|
+
heartbeat?.setStage('committing', 7);
|
|
634
|
+
const commitT0 = Date.now();
|
|
635
|
+
const commitC0 = runningCostUSD();
|
|
523
636
|
const c = await runCommitStage({ cwd, filesWritten: implResult.filesWritten, commit: validCommit });
|
|
524
637
|
commits.push(c);
|
|
638
|
+
endBaseStage(stats, 'committing', commitT0, commitC0, implementerAgentInfo, runningCostUSD());
|
|
525
639
|
}
|
|
526
640
|
}
|
|
527
641
|
try {
|
|
@@ -557,6 +671,8 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
557
671
|
attemptIndex: 0,
|
|
558
672
|
baseTier: resolved.slot,
|
|
559
673
|
});
|
|
674
|
+
const implT0 = Date.now();
|
|
675
|
+
const implC0 = runningCostUSD();
|
|
560
676
|
const initialImpl = await runWithFallback({
|
|
561
677
|
assigned: initialDecision.impl,
|
|
562
678
|
providerFor,
|
|
@@ -564,7 +680,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
564
680
|
isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined,
|
|
565
681
|
getStatus: (r) => r.status,
|
|
566
682
|
makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'),
|
|
567
|
-
call: (provider) => delegateWithEscalation(withDoneCondition(task), [provider], { explicitlyPinned: false, onProgress: wrappedOnProgress }),
|
|
683
|
+
call: (provider) => delegateWithEscalation(withDoneCondition(task), [provider], { explicitlyPinned: false, onProgress: wrappedOnProgress, taskDeadlineMs, abortSignal: stallController.signal }),
|
|
568
684
|
});
|
|
569
685
|
if (initialImpl.fallbackFired || initialImpl.bothUnavailable) {
|
|
570
686
|
fallbackOverrides.push({
|
|
@@ -602,6 +718,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
602
718
|
latestAttemptedImpl = { tier: initialImpl.usedTier, result: implResult };
|
|
603
719
|
lastNonRejectedImpl = { tier: initialImpl.usedTier, result: implResult };
|
|
604
720
|
implementerHistory.push(initialImpl.usedTier);
|
|
721
|
+
endBaseStage(stats, 'implementing', implT0, implC0, implementerAgentInfo, runningCostUSD());
|
|
605
722
|
specAttemptIndex = 1;
|
|
606
723
|
const implReport = implResult.status === 'ok' ? parseStructuredReport(implResult.output) : undefined;
|
|
607
724
|
const workerStatus = extractWorkerStatus(implReport);
|
|
@@ -673,6 +790,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
673
790
|
commits,
|
|
674
791
|
commitError,
|
|
675
792
|
verification,
|
|
793
|
+
stageStats: stats,
|
|
676
794
|
};
|
|
677
795
|
}
|
|
678
796
|
if (workerStatus === 'needs_context' || workerStatus === 'blocked') {
|
|
@@ -693,6 +811,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
693
811
|
commits,
|
|
694
812
|
commitError,
|
|
695
813
|
verification,
|
|
814
|
+
stageStats: stats,
|
|
696
815
|
};
|
|
697
816
|
}
|
|
698
817
|
if (reviewPolicy === 'off') {
|
|
@@ -730,7 +849,11 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
730
849
|
const diffUnavailable = new Map();
|
|
731
850
|
const diffReviewerTier = pickReviewer({ loop: 'spec', attemptIndex: 0, baseTier: resolved.slot });
|
|
732
851
|
emitTaskEvent('stage_change', { from: 'verifying', to: 'diff_review' });
|
|
852
|
+
const diffReviewT0 = Date.now();
|
|
853
|
+
const diffReviewC0 = runningCostUSD();
|
|
733
854
|
heartbeat?.transition({ stage: 'diff_review', stageIndex: 2, reviewRound: 1, attemptCap: 1 });
|
|
855
|
+
const diffReviewT0_commit = Date.now();
|
|
856
|
+
const diffReviewC0_commit = runningCostUSD();
|
|
734
857
|
const diffCall = await runWithFallback({
|
|
735
858
|
assigned: diffReviewerTier,
|
|
736
859
|
providerFor,
|
|
@@ -749,6 +872,15 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
749
872
|
}
|
|
750
873
|
const verdict = diffCall.bothUnavailable || isReviewTransportFailure(diffCall.result) ? makeSkippedReviewResult('all_tiers_unavailable') : diffCall.result;
|
|
751
874
|
emitTaskEvent('review_decision', { stage: 'diff_review', verdict: 'kind' in verdict ? verdict.kind : 'skipped', round: 1 });
|
|
875
|
+
endReviewStage(stats, 'diff_review', diffReviewT0_commit, diffReviewC0_commit, implementerAgentInfo, runningCostUSD(),
|
|
876
|
+
// Diff review uses 'approve' | 'concerns' | 'reject' | 'transport_failure' (DiffReviewVerdict),
|
|
877
|
+
// distinct from spec/quality verdicts. Map to the telemetry verdict enum here.
|
|
878
|
+
'kind' in verdict
|
|
879
|
+
? (verdict.kind === 'approve' ? 'approved'
|
|
880
|
+
: verdict.kind === 'concerns' ? 'concerns'
|
|
881
|
+
: verdict.kind === 'reject' ? 'changes_required'
|
|
882
|
+
: 'error')
|
|
883
|
+
: 'skipped', 0);
|
|
752
884
|
return resolveDiffOnlyTerminal({
|
|
753
885
|
...implResult,
|
|
754
886
|
workerStatus,
|
|
@@ -770,6 +902,8 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
770
902
|
let specReviewReason;
|
|
771
903
|
heartbeat?.transition({ stage: 'spec_review', stageIndex: 2, reviewRound: 1, attemptCap: maxSpecRows });
|
|
772
904
|
const initialReviewerTier = pickReviewer({ loop: 'spec', attemptIndex: 0, baseTier: resolved.slot });
|
|
905
|
+
const specReviewT0 = Date.now();
|
|
906
|
+
const specReviewC0 = runningCostUSD();
|
|
773
907
|
const initialSpecReview = await runWithFallback({
|
|
774
908
|
assigned: initialReviewerTier,
|
|
775
909
|
providerFor,
|
|
@@ -813,7 +947,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
813
947
|
heartbeat?.transition({ stage: 'spec_rework', stageIndex: 3, reviewRound: specAttemptIndex, attemptCap: maxSpecRows });
|
|
814
948
|
const feedback = specResult.findings.length > 0 ? `\n\n## Spec Review Feedback (round ${specAttemptIndex}):\n${specResult.findings.map(f => `- ${f}`).join('\n')}` : '';
|
|
815
949
|
const reworkTask = withDoneCondition({ ...task, prompt: `${task.prompt}${feedback}` });
|
|
816
|
-
const reworkCall = await runWithFallback({ assigned: decision.impl, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined, getStatus: (r) => r.status, makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'), call: (provider) => delegateWithEscalation(reworkTask, [provider], { explicitlyPinned: true, onProgress: wrappedOnProgress }) });
|
|
950
|
+
const reworkCall = await runWithFallback({ assigned: decision.impl, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined, getStatus: (r) => r.status, makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'), call: (provider) => delegateWithEscalation(reworkTask, [provider], { explicitlyPinned: true, onProgress: wrappedOnProgress, taskDeadlineMs, abortSignal: stallController.signal }) });
|
|
817
951
|
if (reworkCall.fallbackFired || reworkCall.bothUnavailable)
|
|
818
952
|
fallbackOverrides.push({ role: 'implementer', loop: 'spec', attempt: specAttemptIndex, assigned: decision.impl, used: reworkCall.usedTier, reason: (reworkCall.fallbackReason ?? reworkCall.unavailableReason), triggeringStatus: reworkCall.fallbackTriggeringStatus, bothUnavailable: reworkCall.bothUnavailable });
|
|
819
953
|
if (reworkCall.fallbackFired) {
|
|
@@ -863,10 +997,18 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
863
997
|
prevSpecFindings = [...(specResult.findings ?? [])];
|
|
864
998
|
}
|
|
865
999
|
let qualityResult = { status: 'skipped', report: undefined, findings: [], errorReason: reviewPolicy === 'full' ? 'all_tiers_unavailable' : 'skipped: reviewPolicy is spec_only' };
|
|
1000
|
+
// Hoisted so endReviewStage (called after this block) can read them on the
|
|
1001
|
+
// success path. When the quality review is skipped (`reviewPolicy !== 'full'`),
|
|
1002
|
+
// the values stay at 0/null and the corresponding stage entry remains in its
|
|
1003
|
+
// `entered: false` default — endReviewStage is never called.
|
|
1004
|
+
let qualityReviewT0 = 0;
|
|
1005
|
+
let qualityReviewC0 = null;
|
|
866
1006
|
if (reviewPolicy === 'full') {
|
|
867
1007
|
qualityUnavailable = new Map();
|
|
868
1008
|
const qualityReviewerTier = pickReviewer({ loop: 'quality', attemptIndex: 0, baseTier: resolved.slot });
|
|
869
1009
|
heartbeat?.transition({ stage: 'quality_review', stageIndex: 4, reviewRound: 1, attemptCap: maxQualityRows });
|
|
1010
|
+
qualityReviewT0 = Date.now();
|
|
1011
|
+
qualityReviewC0 = runningCostUSD();
|
|
870
1012
|
const initialQuality = await runWithFallback({ assigned: qualityReviewerTier, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runQualityReview(provider, packet, specReport ?? finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block) });
|
|
871
1013
|
if (initialQuality.bothUnavailable) {
|
|
872
1014
|
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: 0, role: 'qualityReviewer', assignedTier: qualityReviewerTier, reason: initialQuality.unavailableReason });
|
|
@@ -898,7 +1040,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
898
1040
|
heartbeat?.transition({ stage: 'quality_rework', stageIndex: 5, reviewRound: qualityAttemptIndex, attemptCap: maxQualityRows });
|
|
899
1041
|
const feedback = qualityResult.findings.length > 0 ? `\n\n## Quality Review Feedback (round ${qualityAttemptIndex}):\n${qualityResult.findings.map(f => `- ${f}`).join('\n')}` : '';
|
|
900
1042
|
const reworkTask = withDoneCondition({ ...task, prompt: `${task.prompt}${feedback}` });
|
|
901
|
-
const reworkCall = await runWithFallback({ assigned: decision.impl, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined, getStatus: (r) => r.status, makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'), call: (provider) => delegateWithEscalation(reworkTask, [provider], { explicitlyPinned: true, onProgress: wrappedOnProgress }) });
|
|
1043
|
+
const reworkCall = await runWithFallback({ assigned: decision.impl, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined, getStatus: (r) => r.status, makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'), call: (provider) => delegateWithEscalation(reworkTask, [provider], { explicitlyPinned: true, onProgress: wrappedOnProgress, taskDeadlineMs, abortSignal: stallController.signal }) });
|
|
902
1044
|
if (reworkCall.fallbackFired || reworkCall.bothUnavailable)
|
|
903
1045
|
fallbackOverrides.push({ role: 'implementer', loop: 'quality', attempt: qualityAttemptIndex, assigned: decision.impl, used: reworkCall.usedTier, reason: (reworkCall.fallbackReason ?? reworkCall.unavailableReason), triggeringStatus: reworkCall.fallbackTriggeringStatus, bothUnavailable: reworkCall.bothUnavailable });
|
|
904
1046
|
if (reworkCall.fallbackFired)
|
|
@@ -962,7 +1104,16 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
962
1104
|
});
|
|
963
1105
|
}
|
|
964
1106
|
const specAggregateStatus = (['approved', 'changes_required', 'skipped', 'error', 'api_error', 'network_error', 'timeout'].includes(specStatus) ? specStatus : 'error');
|
|
1107
|
+
endReviewStage(stats, 'spec_review', specReviewT0, specReviewC0, implementerAgentInfo, runningCostUSD(), specStatus === 'approved' ? 'approved'
|
|
1108
|
+
: specStatus === 'changes_required' ? 'changes_required'
|
|
1109
|
+
: specStatus === 'skipped' ? 'skipped'
|
|
1110
|
+
: specStatus === 'not_applicable' ? 'not_applicable'
|
|
1111
|
+
: 'error', specAttemptIndex - 1);
|
|
965
1112
|
const qualityAggregateStatus = qualityResult.status;
|
|
1113
|
+
endReviewStage(stats, 'quality_review', qualityReviewT0, qualityReviewC0, implementerAgentInfo, runningCostUSD(), qualityResult.status === 'approved' ? 'approved'
|
|
1114
|
+
: qualityResult.status === 'changes_required' ? 'changes_required'
|
|
1115
|
+
: qualityResult.status === 'skipped' ? 'skipped'
|
|
1116
|
+
: 'error', qualityAttemptIndex - 1);
|
|
966
1117
|
const aggregated = aggregateResult(finalReport, specReport, qualityResult.report, specAggregateStatus, qualityAggregateStatus);
|
|
967
1118
|
// File artifact verification: check whether output targets exist on disk after all work.
|
|
968
1119
|
// Only applies when status is ok; non-ok statuses skip verification entirely.
|
|
@@ -979,13 +1130,14 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
979
1130
|
: finalImplResult.status;
|
|
980
1131
|
const specEnvelopeStatus = (specStatus === 'api_error' || specStatus === 'network_error' || specStatus === 'timeout' ? 'error' : specStatus);
|
|
981
1132
|
const qualityEnvelopeStatus = qualityResult.status === 'api_error' || qualityResult.status === 'network_error' || qualityResult.status === 'timeout' ? 'error' : qualityResult.status;
|
|
982
|
-
|
|
1133
|
+
const runResult = {
|
|
983
1134
|
...finalImplResult,
|
|
984
1135
|
status: finalStatus,
|
|
985
1136
|
workerStatus: finalWorkerStatus,
|
|
986
1137
|
concerns,
|
|
987
1138
|
specReviewStatus: specEnvelopeStatus,
|
|
988
1139
|
qualityReviewStatus: qualityEnvelopeStatus,
|
|
1140
|
+
stageStats: stats,
|
|
989
1141
|
specReviewReason: 'errorReason' in specResult ? specResult.errorReason : undefined,
|
|
990
1142
|
qualityReviewReason: 'errorReason' in qualityResult ? qualityResult.errorReason : undefined,
|
|
991
1143
|
structuredReport: aggregated,
|
|
@@ -1004,12 +1156,38 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1004
1156
|
commitError,
|
|
1005
1157
|
verification,
|
|
1006
1158
|
};
|
|
1159
|
+
try {
|
|
1160
|
+
recorder?.recordTaskCompleted({
|
|
1161
|
+
route: _route ?? 'delegate',
|
|
1162
|
+
taskSpec: task,
|
|
1163
|
+
runResult,
|
|
1164
|
+
client: _client ?? 'claude-code',
|
|
1165
|
+
triggeringSkill: _triggeringSkill ?? 'direct',
|
|
1166
|
+
parentModel: task.parentModel ?? null,
|
|
1167
|
+
});
|
|
1168
|
+
}
|
|
1169
|
+
catch { /* silent — bedrock invariant */ }
|
|
1170
|
+
return runResult;
|
|
1007
1171
|
}
|
|
1008
1172
|
catch (err) {
|
|
1009
|
-
|
|
1173
|
+
const errorRunResult = withVerification(workerErrorResult(err));
|
|
1174
|
+
try {
|
|
1175
|
+
recorder?.recordTaskCompleted({
|
|
1176
|
+
route: _route ?? 'delegate',
|
|
1177
|
+
taskSpec: task,
|
|
1178
|
+
runResult: errorRunResult,
|
|
1179
|
+
client: _client ?? 'claude-code',
|
|
1180
|
+
triggeringSkill: _triggeringSkill ?? 'direct',
|
|
1181
|
+
parentModel: task.parentModel ?? null,
|
|
1182
|
+
});
|
|
1183
|
+
}
|
|
1184
|
+
catch { /* silent — bedrock invariant */ }
|
|
1185
|
+
return errorRunResult;
|
|
1010
1186
|
}
|
|
1011
1187
|
finally {
|
|
1188
|
+
heartbeat?.setStage('terminal', 8);
|
|
1012
1189
|
heartbeat?.stop();
|
|
1190
|
+
clearInterval(stallWatchdogInterval);
|
|
1013
1191
|
}
|
|
1014
1192
|
}
|
|
1015
1193
|
//# sourceMappingURL=reviewed-lifecycle.js.map
|