@zhixuan92/multi-model-agent-core 3.10.7 → 3.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/config/schema.d.ts +15 -0
- package/dist/config/schema.d.ts.map +1 -1
- package/dist/config/schema.js +17 -2
- package/dist/config/schema.js.map +1 -1
- package/dist/diagnostics/types.d.ts +11 -0
- package/dist/diagnostics/types.d.ts.map +1 -1
- package/dist/escalation/fallback.d.ts +16 -0
- package/dist/escalation/fallback.d.ts.map +1 -1
- package/dist/escalation/fallback.js +280 -19
- package/dist/escalation/fallback.js.map +1 -1
- package/dist/executors/audit.d.ts.map +1 -1
- package/dist/executors/audit.js +6 -4
- package/dist/executors/audit.js.map +1 -1
- package/dist/executors/debug.d.ts.map +1 -1
- package/dist/executors/debug.js +5 -3
- package/dist/executors/debug.js.map +1 -1
- package/dist/executors/delegate.d.ts +12 -0
- package/dist/executors/delegate.d.ts.map +1 -1
- package/dist/executors/delegate.js +46 -11
- package/dist/executors/delegate.js.map +1 -1
- package/dist/executors/execute-plan.d.ts.map +1 -1
- package/dist/executors/execute-plan.js +6 -4
- package/dist/executors/execute-plan.js.map +1 -1
- package/dist/executors/retry.d.ts.map +1 -1
- package/dist/executors/retry.js +2 -1
- package/dist/executors/retry.js.map +1 -1
- package/dist/executors/review.d.ts.map +1 -1
- package/dist/executors/review.js +2 -1
- package/dist/executors/review.js.map +1 -1
- package/dist/executors/shared-compute.js +4 -4
- package/dist/executors/shared-compute.js.map +1 -1
- package/dist/executors/types.d.ts +1 -1
- package/dist/executors/types.d.ts.map +1 -1
- package/dist/executors/verify.js +2 -2
- package/dist/executors/verify.js.map +1 -1
- package/dist/heartbeat.d.ts +5 -5
- package/dist/heartbeat.d.ts.map +1 -1
- package/dist/heartbeat.js +21 -17
- package/dist/heartbeat.js.map +1 -1
- package/dist/index.d.ts +4 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +5 -3
- package/dist/index.js.map +1 -1
- package/dist/intake/compilers/audit.d.ts.map +1 -1
- package/dist/intake/compilers/audit.js +5 -2
- package/dist/intake/compilers/audit.js.map +1 -1
- package/dist/intake/compilers/debug.d.ts.map +1 -1
- package/dist/intake/compilers/debug.js +4 -0
- package/dist/intake/compilers/debug.js.map +1 -1
- package/dist/intake/compilers/delegate.d.ts +3 -0
- package/dist/intake/compilers/delegate.d.ts.map +1 -1
- package/dist/intake/compilers/delegate.js +5 -1
- package/dist/intake/compilers/delegate.js.map +1 -1
- package/dist/intake/compilers/execute-plan.d.ts.map +1 -1
- package/dist/intake/compilers/execute-plan.js +5 -0
- package/dist/intake/compilers/execute-plan.js.map +1 -1
- package/dist/intake/compilers/review.d.ts.map +1 -1
- package/dist/intake/compilers/review.js +3 -0
- package/dist/intake/compilers/review.js.map +1 -1
- package/dist/intake/compilers/verify.d.ts.map +1 -1
- package/dist/intake/compilers/verify.js +7 -0
- package/dist/intake/compilers/verify.js.map +1 -1
- package/dist/intake/force-clarification.d.ts +5 -0
- package/dist/intake/force-clarification.d.ts.map +1 -0
- package/dist/intake/force-clarification.js +44 -0
- package/dist/intake/force-clarification.js.map +1 -0
- package/dist/intake/pipeline.d.ts +1 -1
- package/dist/intake/pipeline.d.ts.map +1 -1
- package/dist/intake/pipeline.js +32 -1
- package/dist/intake/pipeline.js.map +1 -1
- package/dist/intake/resolve.d.ts.map +1 -1
- package/dist/intake/resolve.js +0 -1
- package/dist/intake/resolve.js.map +1 -1
- package/dist/observability/bus.d.ts.map +1 -1
- package/dist/observability/bus.js +20 -0
- package/dist/observability/bus.js.map +1 -1
- package/dist/observability/events.d.ts +85 -8
- package/dist/observability/events.d.ts.map +1 -1
- package/dist/observability/events.js +77 -2
- package/dist/observability/events.js.map +1 -1
- package/dist/provider.d.ts +1 -0
- package/dist/provider.d.ts.map +1 -1
- package/dist/provider.js +9 -1
- package/dist/provider.js.map +1 -1
- package/dist/review/diff-review.d.ts +2 -1
- package/dist/review/diff-review.d.ts.map +1 -1
- package/dist/review/diff-review.js +1 -0
- package/dist/review/diff-review.js.map +1 -1
- package/dist/review/quality-reviewer.d.ts +2 -2
- package/dist/review/quality-reviewer.d.ts.map +1 -1
- package/dist/review/quality-reviewer.js +21 -9
- package/dist/review/quality-reviewer.js.map +1 -1
- package/dist/review/spec-reviewer.d.ts +1 -1
- package/dist/review/spec-reviewer.d.ts.map +1 -1
- package/dist/review/spec-reviewer.js +3 -1
- package/dist/review/spec-reviewer.js.map +1 -1
- package/dist/routing/canonical-model-identity.d.ts +9 -0
- package/dist/routing/canonical-model-identity.d.ts.map +1 -0
- package/dist/routing/canonical-model-identity.js +54 -0
- package/dist/routing/canonical-model-identity.js.map +1 -0
- package/dist/run-tasks/execute-task.d.ts.map +1 -1
- package/dist/run-tasks/execute-task.js +2 -1
- package/dist/run-tasks/execute-task.js.map +1 -1
- package/dist/run-tasks/index.d.ts.map +1 -1
- package/dist/run-tasks/index.js +2 -1
- package/dist/run-tasks/index.js.map +1 -1
- package/dist/run-tasks/reviewed-lifecycle.d.ts.map +1 -1
- package/dist/run-tasks/reviewed-lifecycle.js +195 -36
- package/dist/run-tasks/reviewed-lifecycle.js.map +1 -1
- package/dist/runners/base/result-builders.d.ts +13 -2
- package/dist/runners/base/result-builders.d.ts.map +1 -1
- package/dist/runners/base/result-builders.js +30 -1
- package/dist/runners/base/result-builders.js.map +1 -1
- package/dist/runners/base/time-check.d.ts +9 -0
- package/dist/runners/base/time-check.d.ts.map +1 -0
- package/dist/runners/base/time-check.js +18 -0
- package/dist/runners/base/time-check.js.map +1 -0
- package/dist/runners/base/usage-accumulator.d.ts +9 -0
- package/dist/runners/base/usage-accumulator.d.ts.map +1 -0
- package/dist/runners/base/usage-accumulator.js +19 -0
- package/dist/runners/base/usage-accumulator.js.map +1 -0
- package/dist/runners/claude-runner.d.ts.map +1 -1
- package/dist/runners/claude-runner.js +141 -180
- package/dist/runners/claude-runner.js.map +1 -1
- package/dist/runners/codex-runner.d.ts.map +1 -1
- package/dist/runners/codex-runner.js +99 -128
- package/dist/runners/codex-runner.js.map +1 -1
- package/dist/runners/error-classification.d.ts +11 -0
- package/dist/runners/error-classification.d.ts.map +1 -1
- package/dist/runners/error-classification.js +51 -0
- package/dist/runners/error-classification.js.map +1 -1
- package/dist/runners/openai-runner.d.ts.map +1 -1
- package/dist/runners/openai-runner.js +125 -172
- package/dist/runners/openai-runner.js.map +1 -1
- package/dist/runners/supervision.d.ts +0 -49
- package/dist/runners/supervision.d.ts.map +1 -1
- package/dist/runners/supervision.js +0 -67
- package/dist/runners/supervision.js.map +1 -1
- package/dist/runners/types.d.ts +15 -5
- package/dist/runners/types.d.ts.map +1 -1
- package/dist/telemetry/concern-classifier.d.ts +1 -1
- package/dist/telemetry/concern-classifier.d.ts.map +1 -1
- package/dist/telemetry/concern-classifier.js +5 -0
- package/dist/telemetry/concern-classifier.js.map +1 -1
- package/dist/telemetry/event-builder.d.ts.map +1 -1
- package/dist/telemetry/event-builder.js +10 -7
- package/dist/telemetry/event-builder.js.map +1 -1
- package/dist/telemetry/field-coverage.js +2 -2
- package/dist/telemetry/field-coverage.js.map +1 -1
- package/dist/telemetry/types.d.ts +142 -94
- package/dist/telemetry/types.d.ts.map +1 -1
- package/dist/telemetry/types.js +23 -17
- package/dist/telemetry/types.js.map +1 -1
- package/dist/types.d.ts +7 -2
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +5 -2
- package/dist/types.js.map +1 -1
- package/package.json +1 -1
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
import { execFile } from 'node:child_process';
|
|
2
2
|
import { promisify } from 'node:util';
|
|
3
|
-
import { computeCostUSD
|
|
3
|
+
import { computeCostUSD } from '../types.js';
|
|
4
4
|
import { createProvider } from '../provider.js';
|
|
5
5
|
import { delegateWithEscalation } from '../delegate-with-escalation.js';
|
|
6
6
|
import { pickEscalation, pickReviewer, maxRowsFor, } from '../escalation/policy.js';
|
|
7
7
|
import { runWithFallback, makeSyntheticRunResult, TRANSPORT_FAILURES, isReviewTransportFailure, } from '../escalation/fallback.js';
|
|
8
8
|
import { findModelCapabilities, findModelProfile } from '../routing/model-profiles.js';
|
|
9
|
+
import { canonicalIdentity } from '../routing/canonical-model-identity.js';
|
|
9
10
|
import { HeartbeatTimer } from '../heartbeat.js';
|
|
10
11
|
import { newStageIdleTracker, snapshotIdle } from './stage-idle-tracker.js';
|
|
11
|
-
import { DEFAULT_TASK_TIMEOUT_MS, DEFAULT_STALL_TIMEOUT_MS } from '../config/schema.js';
|
|
12
|
+
import { DEFAULT_TASK_TIMEOUT_MS, DEFAULT_STALL_TIMEOUT_MS, MAX_TIME_PRESTOP_RATIO } from '../config/schema.js';
|
|
12
13
|
import { runSpecReview } from '../review/spec-reviewer.js';
|
|
13
14
|
import { makeSkippedReviewResult } from '../review/skipped-result.js';
|
|
14
15
|
import { runQualityReview } from '../review/quality-reviewer.js';
|
|
@@ -201,6 +202,17 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
201
202
|
function providerFor(tier) {
|
|
202
203
|
return providers[tier];
|
|
203
204
|
}
|
|
205
|
+
// Compute the implementer's canonical identity for reviewer separation (R3).
|
|
206
|
+
// Used as forbiddenIdentities on reviewer fallback calls so the reviewer
|
|
207
|
+
// never lands on the same effective backend as the implementer.
|
|
208
|
+
const implementerIdentity = (() => {
|
|
209
|
+
try {
|
|
210
|
+
return canonicalIdentity(resolved.provider.config);
|
|
211
|
+
}
|
|
212
|
+
catch {
|
|
213
|
+
return undefined;
|
|
214
|
+
}
|
|
215
|
+
})();
|
|
204
216
|
// Partition filePaths into output targets before the worker runs.
|
|
205
217
|
// Output targets are paths that do not yet exist on disk.
|
|
206
218
|
const { outputTargets } = partitionFilePaths(task.filePaths, task.cwd ?? process.cwd());
|
|
@@ -216,17 +228,54 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
216
228
|
'task_done_summary',
|
|
217
229
|
'fallback', 'fallback_unavailable',
|
|
218
230
|
'escalation', 'escalation_unavailable',
|
|
219
|
-
'stall_abort', 'cost_check',
|
|
231
|
+
'stall_abort', 'cost_check', 'time_check',
|
|
220
232
|
]);
|
|
221
233
|
const shortBatchEarly = verboseBatchIdEarly ? verboseBatchIdEarly.slice(0, 8) : '????????';
|
|
222
234
|
const emitTaskEvent = (event, fields) => {
|
|
223
235
|
if (bus && verboseBatchIdEarly !== undefined) {
|
|
236
|
+
const schemaEvent = event === 'heartbeat_timer' ? 'task_started' : event;
|
|
224
237
|
const cleaned = {};
|
|
225
238
|
for (const [key, value] of Object.entries(fields)) {
|
|
226
239
|
if (value !== undefined)
|
|
227
240
|
cleaned[key] = value;
|
|
228
241
|
}
|
|
229
|
-
|
|
242
|
+
// Keep verbose-line field names stable while emitting schema-declared
|
|
243
|
+
// telemetry envelopes in their authoritative persisted shape. EventSchemas
|
|
244
|
+
// validate the full envelope at EventBus.emit in dev/test, so production
|
|
245
|
+
// emission paths must construct schema-shaped keys before persistence.
|
|
246
|
+
if (schemaEvent === 'task_started') {
|
|
247
|
+
cleaned.route = routeKey || 'delegate';
|
|
248
|
+
cleaned.cwd = task.cwd ?? process.cwd();
|
|
249
|
+
for (const key of ['state', 'stage_count', 'tick_ms', 'reason'])
|
|
250
|
+
delete cleaned[key];
|
|
251
|
+
}
|
|
252
|
+
if (event === 'verify_step') {
|
|
253
|
+
if ('exit_code' in cleaned) {
|
|
254
|
+
cleaned.exitCode = cleaned.exit_code;
|
|
255
|
+
delete cleaned.exit_code;
|
|
256
|
+
}
|
|
257
|
+
if ('duration_ms' in cleaned) {
|
|
258
|
+
cleaned.durationMs = cleaned.duration_ms;
|
|
259
|
+
delete cleaned.duration_ms;
|
|
260
|
+
}
|
|
261
|
+
if ('error_message' in cleaned) {
|
|
262
|
+
cleaned.errorMessage = cleaned.error_message;
|
|
263
|
+
delete cleaned.error_message;
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
if (event === 'task_completed') {
|
|
267
|
+
if ('stages_json' in cleaned) {
|
|
268
|
+
cleaned.stages = cleaned.stages_json;
|
|
269
|
+
delete cleaned.stages_json;
|
|
270
|
+
}
|
|
271
|
+
if (!('cachedTokens' in cleaned))
|
|
272
|
+
cleaned.cachedTokens = null;
|
|
273
|
+
if (!('reasoningTokens' in cleaned))
|
|
274
|
+
cleaned.reasoningTokens = null;
|
|
275
|
+
if (!('stages' in cleaned))
|
|
276
|
+
cleaned.stages = JSON.stringify(stats);
|
|
277
|
+
}
|
|
278
|
+
bus.emit({ event: schemaEvent, ts: new Date().toISOString(), batchId: verboseBatchIdEarly, taskIndex, ...cleaned });
|
|
230
279
|
}
|
|
231
280
|
if (verboseStreamRaw && (verbose || DEFAULT_MODE_EVENTS.has(event))) {
|
|
232
281
|
verboseStreamRaw(composeVerboseLine({ event, ts: new Date().toISOString(), batch: shortBatchEarly, task: taskIndex, ...toVerboseFields(fields) }));
|
|
@@ -398,9 +447,11 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
398
447
|
}
|
|
399
448
|
if (event.kind === 'turn_complete') {
|
|
400
449
|
heartbeat?.markEvent('llm');
|
|
401
|
-
const
|
|
402
|
-
const
|
|
403
|
-
|
|
450
|
+
const providerConfig = _activeRunnerProviderConfig ?? resolved.provider.config;
|
|
451
|
+
const costUSD = computeCostUSD(event.cumulativeInputTokens, event.cumulativeOutputTokens, providerConfig);
|
|
452
|
+
_currentRunnerCostUSD = costUSD ?? 0;
|
|
453
|
+
const cumulativeCostUSD = (_completedRunnerCostUSD ?? 0) + _currentRunnerCostUSD;
|
|
454
|
+
heartbeat?.updateCost(cumulativeCostUSD, null);
|
|
404
455
|
const nowTurn = Date.now();
|
|
405
456
|
const turnDurMs = nowTurn - prevEventAtMs;
|
|
406
457
|
prevEventAtMs = nowTurn;
|
|
@@ -410,7 +461,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
410
461
|
output_tokens: event.cumulativeOutputTokens,
|
|
411
462
|
cost: costUSD,
|
|
412
463
|
duration_ms: turnDurMs,
|
|
413
|
-
provider:
|
|
464
|
+
provider: providerConfig.model,
|
|
414
465
|
});
|
|
415
466
|
}
|
|
416
467
|
}
|
|
@@ -422,7 +473,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
422
473
|
// any in-flight call gets a per-call timeoutMs clamped to remaining
|
|
423
474
|
// budget so it returns its salvage promptly. The user gets *something*
|
|
424
475
|
// back instead of an open-ended retry storm.
|
|
425
|
-
const taskTimeoutMs = task.timeoutMs ?? config.defaults
|
|
476
|
+
const taskTimeoutMs = task.timeoutMs ?? config.defaults?.timeoutMs ?? DEFAULT_TASK_TIMEOUT_MS;
|
|
426
477
|
const taskDeadlineMs = taskStartMs + taskTimeoutMs;
|
|
427
478
|
// Stall watchdog: when no LLM / tool / text event has fired for this
|
|
428
479
|
// many ms, the in-flight runner is force-aborted via `stallController`.
|
|
@@ -484,7 +535,48 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
484
535
|
const model = provider?.config.model ?? config.agents[tier]?.model ?? resolvedModel;
|
|
485
536
|
return { tier, family: modelFamily(model), model };
|
|
486
537
|
};
|
|
487
|
-
|
|
538
|
+
// §3.9: runningCostUSD must be cumulative and monotonic across explicit
|
|
539
|
+
// runner boundaries. Runner progress reports per-runner cumulative token
|
|
540
|
+
// counts, so lifecycle cost is completed runners + current runner partial.
|
|
541
|
+
// Boundaries are closed from actual RunResult.usage.costUSD values rather
|
|
542
|
+
// than inferred from drops; this handles reviewer costs greater than the
|
|
543
|
+
// implementer and preserves reviewer-provider pricing.
|
|
544
|
+
let _completedRunnerCostUSD = null;
|
|
545
|
+
let _currentRunnerCostUSD = 0;
|
|
546
|
+
let _activeRunnerProviderConfig = null;
|
|
547
|
+
let _prevRunningCost = null;
|
|
548
|
+
const runningCostUSD = () => {
|
|
549
|
+
const current = _completedRunnerCostUSD !== null || _currentRunnerCostUSD !== 0
|
|
550
|
+
? (_completedRunnerCostUSD ?? 0) + _currentRunnerCostUSD
|
|
551
|
+
: null;
|
|
552
|
+
if (process.env.NODE_ENV === 'test' || process.env.NODE_ENV === 'development') {
|
|
553
|
+
if (_prevRunningCost !== null && current !== null && current < _prevRunningCost) {
|
|
554
|
+
throw new Error(`runningCostUSD non-monotonic: prev=${_prevRunningCost} now=${current}`);
|
|
555
|
+
}
|
|
556
|
+
_prevRunningCost = current;
|
|
557
|
+
}
|
|
558
|
+
return current;
|
|
559
|
+
};
|
|
560
|
+
const runAccounted = async (provider, call) => {
|
|
561
|
+
if (_activeRunnerProviderConfig !== null) {
|
|
562
|
+
throw new Error('lifecycle cost accounting runner overlap');
|
|
563
|
+
}
|
|
564
|
+
_activeRunnerProviderConfig = provider.config;
|
|
565
|
+
_currentRunnerCostUSD = 0;
|
|
566
|
+
try {
|
|
567
|
+
const result = await call();
|
|
568
|
+
const actualCost = result?.usage?.costUSD
|
|
569
|
+
?? result?.metrics?.costUSD
|
|
570
|
+
?? _currentRunnerCostUSD;
|
|
571
|
+
_completedRunnerCostUSD = (_completedRunnerCostUSD ?? 0) + actualCost;
|
|
572
|
+
_currentRunnerCostUSD = 0;
|
|
573
|
+
heartbeat?.updateCost(_completedRunnerCostUSD, null);
|
|
574
|
+
return result;
|
|
575
|
+
}
|
|
576
|
+
finally {
|
|
577
|
+
_activeRunnerProviderConfig = null;
|
|
578
|
+
}
|
|
579
|
+
};
|
|
488
580
|
const policyEscalated = { spec: false, quality: false, diff: false };
|
|
489
581
|
const emitFallback = (p) => {
|
|
490
582
|
emitTaskEvent('fallback', p);
|
|
@@ -507,8 +599,44 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
507
599
|
// on the base result (set by callers via abortReviewLoop({ ...res, specReviewStatus, ... })).
|
|
508
600
|
// Defaults to 'changes_required' for whichever loop tripped — that's the only state the
|
|
509
601
|
// loop ever fires from, by construction.
|
|
510
|
-
function adaptForAllTiersUnavailable(base, loop, attempt) {
|
|
511
|
-
const
|
|
602
|
+
function adaptForAllTiersUnavailable(base, loop, attempt, resolvedModel, salvageSource) {
|
|
603
|
+
const stageName = loop === 'spec' && attempt === 0 ? 'implementing'
|
|
604
|
+
: loop === 'spec' ? 'spec_rework'
|
|
605
|
+
: 'quality_rework';
|
|
606
|
+
// Promote salvage stage stats + metrics into the global stats map so R2.1
|
|
607
|
+
// (non-empty stages for 'incomplete') passes even when bothUnavailable
|
|
608
|
+
// short-circuits before endBaseStage runs at the call site.
|
|
609
|
+
if (salvageSource?.stageStats) {
|
|
610
|
+
for (const key of Object.keys(salvageSource.stageStats)) {
|
|
611
|
+
const val = salvageSource.stageStats[key];
|
|
612
|
+
if (val)
|
|
613
|
+
stats[key] = val;
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
const existing = stats[stageName];
|
|
617
|
+
if (!existing?.entered) {
|
|
618
|
+
stats[stageName] = {
|
|
619
|
+
stage: stageName,
|
|
620
|
+
entered: true,
|
|
621
|
+
durationMs: existing?.durationMs ?? salvageSource?.durationMs ?? null,
|
|
622
|
+
costUSD: existing?.costUSD ?? salvageSource?.usage?.costUSD ?? null,
|
|
623
|
+
agentTier: implementerAgentInfo.tier,
|
|
624
|
+
modelFamily: modelFamily(implementerAgentInfo.model),
|
|
625
|
+
model: implementerAgentInfo.model,
|
|
626
|
+
maxIdleMs: null,
|
|
627
|
+
totalIdleMs: null,
|
|
628
|
+
activityEvents: null,
|
|
629
|
+
inputTokens: salvageSource?.usage?.inputTokens ?? null,
|
|
630
|
+
outputTokens: salvageSource?.usage?.outputTokens ?? null,
|
|
631
|
+
cachedTokens: salvageSource?.usage?.cachedTokens ?? null,
|
|
632
|
+
reasoningTokens: salvageSource?.usage?.reasoningTokens ?? null,
|
|
633
|
+
turnCount: salvageSource?.turns ?? null,
|
|
634
|
+
toolCallCount: (salvageSource?.toolCalls?.length) || null,
|
|
635
|
+
filesReadCount: (salvageSource?.filesRead?.length) || null,
|
|
636
|
+
filesWrittenCount: (salvageSource?.filesWritten?.length) || null,
|
|
637
|
+
};
|
|
638
|
+
}
|
|
639
|
+
const ship = salvageSource ?? lastNonRejectedImpl?.result ?? base;
|
|
512
640
|
return {
|
|
513
641
|
...ship,
|
|
514
642
|
status: 'incomplete',
|
|
@@ -518,6 +646,13 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
518
646
|
error: `runWithFallback: both tiers unavailable (loop=${loop}, attempt=${attempt}, role=implementer)`,
|
|
519
647
|
agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? ((reviewPolicy === 'full' || reviewPolicy === 'quality_only') ? 'not_applicable' : 'skipped')),
|
|
520
648
|
stageStats: stats,
|
|
649
|
+
models: {
|
|
650
|
+
implementer: salvageSource?.models?.implementer
|
|
651
|
+
?? salvageSource?.stageStats?.[stageName]?.model
|
|
652
|
+
?? resolvedModel,
|
|
653
|
+
specReviewer: ship.models?.specReviewer ?? null,
|
|
654
|
+
qualityReviewer: ship.models?.qualityReviewer ?? null,
|
|
655
|
+
},
|
|
521
656
|
};
|
|
522
657
|
}
|
|
523
658
|
function reviewDidNotReject(status) {
|
|
@@ -541,11 +676,21 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
541
676
|
...(fallbackOverrides.length > 0 ? { fallbackOverrides } : {}),
|
|
542
677
|
};
|
|
543
678
|
};
|
|
544
|
-
const abortReviewLoop = (base, terminationReason, message, aborting) => ({
|
|
679
|
+
const abortReviewLoop = (base, terminationReason, message, aborting, wallClockMs) => ({
|
|
545
680
|
...base,
|
|
546
681
|
status: 'incomplete',
|
|
547
682
|
workerStatus: 'review_loop_aborted',
|
|
548
|
-
terminationReason
|
|
683
|
+
terminationReason: terminationReason === 'round_cap'
|
|
684
|
+
? 'round_cap'
|
|
685
|
+
: {
|
|
686
|
+
cause: terminationReason === 'cost_ceiling' ? 'cost_exceeded' : 'time_ceiling',
|
|
687
|
+
turnsUsed: base.turns,
|
|
688
|
+
hasFileArtifacts: (base.filesWritten ?? []).length > 0,
|
|
689
|
+
usedShell: (base.toolCalls ?? []).some(c => c.startsWith('shell') || c.startsWith('runShell')),
|
|
690
|
+
workerSelfAssessment: 'review_loop_aborted',
|
|
691
|
+
wasPromoted: false,
|
|
692
|
+
...(wallClockMs !== undefined ? { wallClockMs } : {}),
|
|
693
|
+
},
|
|
549
694
|
reviewRounds: reviewRounds(),
|
|
550
695
|
error: message,
|
|
551
696
|
specReviewStatus: aborting === 'spec' ? 'changes_required' : (base.specReviewStatus ?? 'approved'),
|
|
@@ -562,7 +707,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
562
707
|
const verification = await runVerifyStage({
|
|
563
708
|
cwd,
|
|
564
709
|
verifyCommand: task.verifyCommand,
|
|
565
|
-
taskTimeoutMs: task.timeoutMs ?? config.defaults
|
|
710
|
+
taskTimeoutMs: task.timeoutMs ?? config.defaults?.timeoutMs ?? DEFAULT_TASK_TIMEOUT_MS,
|
|
566
711
|
taskStartMs,
|
|
567
712
|
});
|
|
568
713
|
latestVerification = verification;
|
|
@@ -589,7 +734,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
589
734
|
const cause = typeof result.terminationReason === 'object' ? result.terminationReason.cause : result.terminationReason;
|
|
590
735
|
const capExhausted = result.capExhausted
|
|
591
736
|
?? (result.status === 'cost_exceeded' || cause === 'cost_exceeded' || cause === 'cost_ceiling' ? 'cost'
|
|
592
|
-
: result.status === 'timeout' || cause === 'timeout' ? 'wall_clock'
|
|
737
|
+
: result.status === 'timeout' || cause === 'timeout' || cause === 'time_ceiling' ? 'wall_clock'
|
|
593
738
|
: result.status === 'incomplete' && result.turns > 1 ? 'turn'
|
|
594
739
|
: undefined);
|
|
595
740
|
const lifecycleClarificationRequested = result.lifecycleClarificationRequested
|
|
@@ -605,13 +750,14 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
605
750
|
return signalize({
|
|
606
751
|
output: '',
|
|
607
752
|
status: 'error',
|
|
608
|
-
usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0, costUSD: null },
|
|
753
|
+
usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0, costUSD: null, costDeltaVsParentUSD: null, cachedTokens: null, reasoningTokens: null },
|
|
609
754
|
turns: 0,
|
|
610
755
|
filesRead: [],
|
|
611
756
|
filesWritten: [],
|
|
612
757
|
toolCalls: [],
|
|
613
758
|
outputIsDiagnostic: true,
|
|
614
759
|
escalationLog: [],
|
|
760
|
+
parsedFindings: null,
|
|
615
761
|
error: workerError.message,
|
|
616
762
|
errorCode: 'runner_crash',
|
|
617
763
|
structuredError: { code: 'runner_crash', message: workerError.message },
|
|
@@ -831,13 +977,14 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
831
977
|
return withVerification({
|
|
832
978
|
output: `Sub-agent error: task.cwd ${cwd} had pre-existing modifications`,
|
|
833
979
|
status: 'error',
|
|
834
|
-
usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0, costUSD: null },
|
|
980
|
+
usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0, costUSD: null, costDeltaVsParentUSD: null, cachedTokens: null, reasoningTokens: null },
|
|
835
981
|
turns: 0,
|
|
836
982
|
filesRead: [],
|
|
837
983
|
filesWritten: [],
|
|
838
984
|
toolCalls: [],
|
|
839
985
|
outputIsDiagnostic: true,
|
|
840
986
|
escalationLog: [],
|
|
987
|
+
parsedFindings: null,
|
|
841
988
|
error: `task.cwd ${cwd} had pre-existing modifications`,
|
|
842
989
|
errorCode: 'dirty_worktree',
|
|
843
990
|
commits,
|
|
@@ -858,7 +1005,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
858
1005
|
isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined,
|
|
859
1006
|
getStatus: (r) => r.status,
|
|
860
1007
|
makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'),
|
|
861
|
-
call: (provider) => delegateWithEscalation(withDoneCondition(task), [provider], { explicitlyPinned: false, onProgress: wrappedOnProgress, taskDeadlineMs, abortSignal: stallController.signal, assignedTier: initialDecision.impl }),
|
|
1008
|
+
call: (provider) => runAccounted(provider, () => delegateWithEscalation(withDoneCondition(task), [provider], { explicitlyPinned: false, onProgress: wrappedOnProgress, taskDeadlineMs, abortSignal: stallController.signal, assignedTier: initialDecision.impl })),
|
|
862
1009
|
});
|
|
863
1010
|
if (initialImpl.fallbackFired || initialImpl.bothUnavailable) {
|
|
864
1011
|
fallbackOverrides.push({
|
|
@@ -890,7 +1037,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
890
1037
|
assignedTier: initialDecision.impl,
|
|
891
1038
|
reason: initialImpl.unavailableReason,
|
|
892
1039
|
});
|
|
893
|
-
return __recordOnce(adaptForAllTiersUnavailable(initialImpl.result, 'spec', 0));
|
|
1040
|
+
return __recordOnce(adaptForAllTiersUnavailable(initialImpl.result, 'spec', 0, resolvedModel, initialImpl.salvageResult));
|
|
894
1041
|
}
|
|
895
1042
|
const implResult = initialImpl.result;
|
|
896
1043
|
latestAttemptedImpl = { tier: initialImpl.usedTier, result: implResult };
|
|
@@ -1047,10 +1194,11 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1047
1194
|
isTransportFailure: (r) => isReviewTransportFailure(r),
|
|
1048
1195
|
getStatus: (r) => r.status,
|
|
1049
1196
|
makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'),
|
|
1050
|
-
|
|
1197
|
+
forbiddenIdentities: implementerIdentity ? [implementerIdentity] : undefined,
|
|
1198
|
+
call: (provider) => runAccounted(provider, () => runDiffReview({ cwd, diff: evidence.fullDiff, diffTruncated: evidence.diffTruncated, verification, worker: { call: (prompt, opts) => provider.run(prompt, { cwd: opts?.cwd ?? cwd, abortSignal: opts?.abortSignal, timeoutMs: opts?.timeoutMs }) }, taskDeadlineMs, abortSignal: stallController.signal })),
|
|
1051
1199
|
});
|
|
1052
1200
|
if (diffCall.fallbackFired) {
|
|
1053
|
-
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'diff', attempt: 0, role: 'diffReviewer', assignedTier: diffReviewerTier, usedTier: diffCall.usedTier, reason: diffCall.fallbackReason, triggeringStatus: diffCall.fallbackTriggeringStatus, violatesSeparation: diffCall.usedTier === implementerHistory[implementerHistory.length - 1] });
|
|
1201
|
+
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'diff', attempt: 0, role: 'diffReviewer', assignedTier: diffReviewerTier, usedTier: diffCall.usedTier, reason: diffCall.fallbackReason, triggeringStatus: diffCall.fallbackTriggeringStatus, violatesSeparation: diffCall.usedTier === implementerHistory[implementerHistory.length - 1], fallbackSeparationRespected: diffCall.fallbackSeparationRespected, assignedIdentity: diffCall.assignedIdentity ?? null, usedIdentity: diffCall.usedIdentity ?? null });
|
|
1054
1202
|
fallbackOverrides.push({ role: 'diffReviewer', loop: 'diff', attempt: 0, assigned: diffReviewerTier, used: diffCall.usedTier, reason: diffCall.fallbackReason, triggeringStatus: diffCall.fallbackTriggeringStatus, bothUnavailable: diffCall.bothUnavailable });
|
|
1055
1203
|
}
|
|
1056
1204
|
if (diffCall.bothUnavailable) {
|
|
@@ -1118,7 +1266,8 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1118
1266
|
isTransportFailure: (r) => isReviewTransportFailure(r),
|
|
1119
1267
|
getStatus: (r) => r.status,
|
|
1120
1268
|
makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'),
|
|
1121
|
-
|
|
1269
|
+
forbiddenIdentities: implementerIdentity ? [implementerIdentity] : undefined,
|
|
1270
|
+
call: (provider) => runAccounted(provider, () => runSpecReview(provider, packet, effectiveImplReport, fileContents, implResult.toolCalls, task.planContext, evidence.block, taskDeadlineMs, stallController.signal, wrappedOnProgress, cwd)),
|
|
1122
1271
|
});
|
|
1123
1272
|
specReviewDurationMs += Date.now() - initialSpecReviewIterStart;
|
|
1124
1273
|
if (initialSpecReview.bothUnavailable) {
|
|
@@ -1129,7 +1278,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1129
1278
|
else {
|
|
1130
1279
|
specReviewerHistory.push(initialSpecReview.usedTier);
|
|
1131
1280
|
if (initialSpecReview.fallbackFired) {
|
|
1132
|
-
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: 0, role: 'specReviewer', assignedTier: initialReviewerTier, usedTier: initialSpecReview.usedTier, reason: initialSpecReview.fallbackReason, triggeringStatus: initialSpecReview.fallbackTriggeringStatus, violatesSeparation: initialSpecReview.usedTier === implementerHistory[implementerHistory.length - 1] });
|
|
1281
|
+
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: 0, role: 'specReviewer', assignedTier: initialReviewerTier, usedTier: initialSpecReview.usedTier, reason: initialSpecReview.fallbackReason, triggeringStatus: initialSpecReview.fallbackTriggeringStatus, violatesSeparation: initialSpecReview.usedTier === implementerHistory[implementerHistory.length - 1], fallbackSeparationRespected: initialSpecReview.fallbackSeparationRespected, assignedIdentity: initialSpecReview.assignedIdentity ?? null, usedIdentity: initialSpecReview.usedIdentity ?? null });
|
|
1133
1282
|
fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: 0, assigned: initialReviewerTier, used: initialSpecReview.usedTier, reason: initialSpecReview.fallbackReason, triggeringStatus: initialSpecReview.fallbackTriggeringStatus, bothUnavailable: false });
|
|
1134
1283
|
}
|
|
1135
1284
|
}
|
|
@@ -1149,6 +1298,11 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1149
1298
|
emitTaskEvent('cost_check', { stage: 'spec_rework', tripped: true, cost_used_usd: currentCostUSD, cost_cap_usd: maxCostUSD, cost_available: true });
|
|
1150
1299
|
return abortReviewLoop(finalImplResult, 'cost_ceiling', 'cost ceiling reached before spec rework', 'spec');
|
|
1151
1300
|
}
|
|
1301
|
+
const wallClock = Date.now() - taskStartMs;
|
|
1302
|
+
if (wallClock >= MAX_TIME_PRESTOP_RATIO * taskTimeoutMs) {
|
|
1303
|
+
emitTaskEvent('time_check', { stage: 'spec_rework', tripped: true, wallClockMs: wallClock, timeoutMs: taskTimeoutMs });
|
|
1304
|
+
return abortReviewLoop(finalImplResult, 'time_ceiling', `time ceiling reached before spec rework (${wallClock}ms >= 0.8 × ${taskTimeoutMs}ms)`, 'spec', wallClock);
|
|
1305
|
+
}
|
|
1152
1306
|
const decision = pickEscalation({ loop: 'spec', attemptIndex: specAttemptIndex, baseTier: resolved.slot });
|
|
1153
1307
|
if (decision.isEscalated)
|
|
1154
1308
|
emitEscalationEvent('spec', specAttemptIndex, decision);
|
|
@@ -1156,7 +1310,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1156
1310
|
transitionStage('spec_review', 'spec_rework', { stage: 'spec_rework', stageIndex: 3, reviewRound: specAttemptIndex, attemptCap: maxSpecRows }, { attempt: specAttemptIndex, attemptCap: maxSpecRows, implTier: decision.impl, reviewerTier: decision.reviewer, escalated: decision.isEscalated });
|
|
1157
1311
|
const feedback = specResult.findings.length > 0 ? `\n\n## Spec Review Feedback (round ${specAttemptIndex}):\n${specResult.findings.map(f => `- ${f}`).join('\n')}` : '';
|
|
1158
1312
|
const reworkTask = withDoneCondition({ ...task, prompt: `${task.prompt}${feedback}` });
|
|
1159
|
-
const reworkCall = await runWithFallback({ assigned: decision.impl, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined, getStatus: (r) => r.status, makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'), call: (provider) => delegateWithEscalation(reworkTask, [provider], { explicitlyPinned: true, onProgress: wrappedOnProgress, taskDeadlineMs, abortSignal: stallController.signal, assignedTier: decision.impl }) });
|
|
1313
|
+
const reworkCall = await runWithFallback({ assigned: decision.impl, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined, getStatus: (r) => r.status, makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'), call: (provider) => runAccounted(provider, () => delegateWithEscalation(reworkTask, [provider], { explicitlyPinned: true, onProgress: wrappedOnProgress, taskDeadlineMs, abortSignal: stallController.signal, assignedTier: decision.impl })) });
|
|
1160
1314
|
if (reworkCall.fallbackFired || reworkCall.bothUnavailable)
|
|
1161
1315
|
fallbackOverrides.push({ role: 'implementer', loop: 'spec', attempt: specAttemptIndex, assigned: decision.impl, used: reworkCall.usedTier, reason: (reworkCall.fallbackReason ?? reworkCall.unavailableReason), triggeringStatus: reworkCall.fallbackTriggeringStatus, bothUnavailable: reworkCall.bothUnavailable });
|
|
1162
1316
|
if (reworkCall.fallbackFired) {
|
|
@@ -1168,7 +1322,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1168
1322
|
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', assignedTier: decision.impl, reason: reworkCall.unavailableReason });
|
|
1169
1323
|
if (decision.isEscalated)
|
|
1170
1324
|
emitEscalationUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', wantedTier: decision.impl, reason: reworkCall.unavailableReason });
|
|
1171
|
-
return __recordOnce(adaptForAllTiersUnavailable(reworkCall.result, 'spec', specAttemptIndex));
|
|
1325
|
+
return __recordOnce(adaptForAllTiersUnavailable(reworkCall.result, 'spec', specAttemptIndex, resolvedModel, reworkCall.salvageResult));
|
|
1172
1326
|
}
|
|
1173
1327
|
finalImplResult = reworkCall.result;
|
|
1174
1328
|
latestAttemptedImpl = { tier: reworkCall.usedTier, result: finalImplResult };
|
|
@@ -1180,7 +1334,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1180
1334
|
commitReworkStage(stats, 'spec_rework', specReworkAcc, implementerAgentInfo);
|
|
1181
1335
|
transitionStage('spec_rework', 'spec_review', { stage: 'spec_review', stageIndex: 2, reviewRound: specAttemptIndex + 1, attemptCap: maxSpecRows }, null);
|
|
1182
1336
|
const reReviewIterStart = Date.now();
|
|
1183
|
-
const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runSpecReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, task.planContext, evidence.block, taskDeadlineMs, stallController.signal, wrappedOnProgress) });
|
|
1337
|
+
const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), forbiddenIdentities: implementerIdentity ? [implementerIdentity] : undefined, call: (provider) => runAccounted(provider, () => runSpecReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, task.planContext, evidence.block, taskDeadlineMs, stallController.signal, wrappedOnProgress, cwd)) });
|
|
1184
1338
|
specReviewDurationMs += Date.now() - reReviewIterStart;
|
|
1185
1339
|
if (reviewCall.bothUnavailable) {
|
|
1186
1340
|
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'specReviewer', assignedTier: decision.reviewer, reason: reviewCall.unavailableReason });
|
|
@@ -1190,7 +1344,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1190
1344
|
else {
|
|
1191
1345
|
specReviewerHistory.push(reviewCall.usedTier);
|
|
1192
1346
|
if (reviewCall.fallbackFired) {
|
|
1193
|
-
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'specReviewer', assignedTier: decision.reviewer, usedTier: reviewCall.usedTier, reason: reviewCall.fallbackReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, violatesSeparation: reviewCall.usedTier === implementerHistory[implementerHistory.length - 1] });
|
|
1347
|
+
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'specReviewer', assignedTier: decision.reviewer, usedTier: reviewCall.usedTier, reason: reviewCall.fallbackReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, violatesSeparation: reviewCall.usedTier === implementerHistory[implementerHistory.length - 1], fallbackSeparationRespected: reviewCall.fallbackSeparationRespected, assignedIdentity: reviewCall.assignedIdentity ?? null, usedIdentity: reviewCall.usedIdentity ?? null });
|
|
1194
1348
|
fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: specAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.fallbackReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: false });
|
|
1195
1349
|
}
|
|
1196
1350
|
}
|
|
@@ -1234,7 +1388,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1234
1388
|
qualityReviewT0 = Date.now();
|
|
1235
1389
|
qualityReviewC0 = runningCostUSD();
|
|
1236
1390
|
const initialQualityIterStart = Date.now();
|
|
1237
|
-
const initialQuality = await runWithFallback({ assigned: qualityReviewerTier, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runQualityReview(provider, packet, specReport ?? finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block, qualityReviewPromptBuilder, finalImplResult.output, taskDeadlineMs, stallController.signal, wrappedOnProgress) });
|
|
1391
|
+
const initialQuality = await runWithFallback({ assigned: qualityReviewerTier, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), forbiddenIdentities: implementerIdentity ? [implementerIdentity] : undefined, call: (provider) => runAccounted(provider, () => runQualityReview(provider, packet, specReport ?? finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block, qualityReviewPromptBuilder, finalImplResult.output, taskDeadlineMs, stallController.signal, wrappedOnProgress, cwd)) });
|
|
1238
1392
|
qualityReviewDurationMs += Date.now() - initialQualityIterStart;
|
|
1239
1393
|
if (initialQuality.bothUnavailable) {
|
|
1240
1394
|
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: 0, role: 'qualityReviewer', assignedTier: qualityReviewerTier, reason: initialQuality.unavailableReason });
|
|
@@ -1244,7 +1398,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1244
1398
|
else {
|
|
1245
1399
|
qualityReviewerHistory.push(initialQuality.usedTier);
|
|
1246
1400
|
if (initialQuality.fallbackFired) {
|
|
1247
|
-
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: 0, role: 'qualityReviewer', assignedTier: qualityReviewerTier, usedTier: initialQuality.usedTier, reason: initialQuality.fallbackReason, triggeringStatus: initialQuality.fallbackTriggeringStatus, violatesSeparation: initialQuality.usedTier === implementerHistory[implementerHistory.length - 1] });
|
|
1401
|
+
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: 0, role: 'qualityReviewer', assignedTier: qualityReviewerTier, usedTier: initialQuality.usedTier, reason: initialQuality.fallbackReason, triggeringStatus: initialQuality.fallbackTriggeringStatus, violatesSeparation: initialQuality.usedTier === implementerHistory[implementerHistory.length - 1], fallbackSeparationRespected: initialQuality.fallbackSeparationRespected, assignedIdentity: initialQuality.assignedIdentity ?? null, usedIdentity: initialQuality.usedIdentity ?? null });
|
|
1248
1402
|
fallbackOverrides.push({ role: 'qualityReviewer', loop: 'quality', attempt: 0, assigned: qualityReviewerTier, used: initialQuality.usedTier, reason: initialQuality.fallbackReason, triggeringStatus: initialQuality.fallbackTriggeringStatus, bothUnavailable: false });
|
|
1249
1403
|
}
|
|
1250
1404
|
}
|
|
@@ -1288,8 +1442,6 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1288
1442
|
: 'error',
|
|
1289
1443
|
iterationIndex: 1,
|
|
1290
1444
|
findingsReviewed: annotated.length,
|
|
1291
|
-
findingsFlagged: 0, // legacy field — severity correction tracked elsewhere now
|
|
1292
|
-
severityCorrections: 0, // reviewerSeverity field removed in 3.10.5
|
|
1293
1445
|
meanConfidence,
|
|
1294
1446
|
durationMs: Date.now() - qualityReviewT0,
|
|
1295
1447
|
costUSD: runningCostUSD() !== null && qualityReviewC0 !== null ? runningCostUSD() - qualityReviewC0 : null,
|
|
@@ -1307,6 +1459,11 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1307
1459
|
emitTaskEvent('cost_check', { stage: 'quality_rework', tripped: true, cost_used_usd: currentCostUSD, cost_cap_usd: maxCostUSD, cost_available: true });
|
|
1308
1460
|
return abortReviewLoop(finalImplResult, 'cost_ceiling', 'cost ceiling reached before quality rework', 'quality');
|
|
1309
1461
|
}
|
|
1462
|
+
const wallClock = Date.now() - taskStartMs;
|
|
1463
|
+
if (wallClock >= MAX_TIME_PRESTOP_RATIO * taskTimeoutMs) {
|
|
1464
|
+
emitTaskEvent('time_check', { stage: 'quality_rework', tripped: true, wallClockMs: wallClock, timeoutMs: taskTimeoutMs });
|
|
1465
|
+
return abortReviewLoop(finalImplResult, 'time_ceiling', `time ceiling reached before quality rework (${wallClock}ms >= 0.8 × ${taskTimeoutMs}ms)`, 'quality', wallClock);
|
|
1466
|
+
}
|
|
1310
1467
|
const decision = pickEscalation({ loop: 'quality', attemptIndex: qualityAttemptIndex, baseTier: resolved.slot });
|
|
1311
1468
|
if (decision.isEscalated)
|
|
1312
1469
|
emitEscalationEvent('quality', qualityAttemptIndex, decision);
|
|
@@ -1314,7 +1471,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1314
1471
|
transitionStage('quality_review', 'quality_rework', { stage: 'quality_rework', stageIndex: 5, reviewRound: qualityAttemptIndex, attemptCap: maxQualityRows }, { attempt: qualityAttemptIndex, attemptCap: maxQualityRows, implTier: decision.impl, reviewerTier: decision.reviewer, escalated: decision.isEscalated });
|
|
1315
1472
|
const feedback = qualityResult.findings.length > 0 ? `\n\n## Quality Review Feedback (round ${qualityAttemptIndex}):\n${qualityResult.findings.map(f => `- ${f}`).join('\n')}` : '';
|
|
1316
1473
|
const reworkTask = withDoneCondition({ ...task, prompt: `${task.prompt}${feedback}` });
|
|
1317
|
-
const reworkCall = await runWithFallback({ assigned: decision.impl, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined, getStatus: (r) => r.status, makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'), call: (provider) => delegateWithEscalation(reworkTask, [provider], { explicitlyPinned: true, onProgress: wrappedOnProgress, taskDeadlineMs, abortSignal: stallController.signal, assignedTier: decision.impl }) });
|
|
1474
|
+
const reworkCall = await runWithFallback({ assigned: decision.impl, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined, getStatus: (r) => r.status, makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'), call: (provider) => runAccounted(provider, () => delegateWithEscalation(reworkTask, [provider], { explicitlyPinned: true, onProgress: wrappedOnProgress, taskDeadlineMs, abortSignal: stallController.signal, assignedTier: decision.impl })) });
|
|
1318
1475
|
if (reworkCall.fallbackFired || reworkCall.bothUnavailable)
|
|
1319
1476
|
fallbackOverrides.push({ role: 'implementer', loop: 'quality', attempt: qualityAttemptIndex, assigned: decision.impl, used: reworkCall.usedTier, reason: (reworkCall.fallbackReason ?? reworkCall.unavailableReason), triggeringStatus: reworkCall.fallbackTriggeringStatus, bothUnavailable: reworkCall.bothUnavailable });
|
|
1320
1477
|
if (reworkCall.fallbackFired)
|
|
@@ -1323,7 +1480,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1323
1480
|
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'implementer', assignedTier: decision.impl, reason: reworkCall.unavailableReason });
|
|
1324
1481
|
if (decision.isEscalated)
|
|
1325
1482
|
emitEscalationUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'implementer', wantedTier: decision.impl, reason: reworkCall.unavailableReason });
|
|
1326
|
-
return __recordOnce(adaptForAllTiersUnavailable(reworkCall.result, 'quality', qualityAttemptIndex));
|
|
1483
|
+
return __recordOnce(adaptForAllTiersUnavailable(reworkCall.result, 'quality', qualityAttemptIndex, resolvedModel, reworkCall.salvageResult));
|
|
1327
1484
|
}
|
|
1328
1485
|
finalImplResult = reworkCall.result;
|
|
1329
1486
|
latestAttemptedImpl = { tier: reworkCall.usedTier, result: finalImplResult };
|
|
@@ -1335,7 +1492,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1335
1492
|
commitReworkStage(stats, 'quality_rework', qualityReworkAcc, implementerAgentInfo);
|
|
1336
1493
|
transitionStage('quality_rework', 'quality_review', { stage: 'quality_review', stageIndex: 4, reviewRound: qualityAttemptIndex + 1, attemptCap: maxQualityRows }, null);
|
|
1337
1494
|
const qReReviewIterStart = Date.now();
|
|
1338
|
-
const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runQualityReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block, qualityReviewPromptBuilder, finalImplResult.output, taskDeadlineMs, stallController.signal, wrappedOnProgress) });
|
|
1495
|
+
const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), forbiddenIdentities: implementerIdentity ? [implementerIdentity] : undefined, call: (provider) => runAccounted(provider, () => runQualityReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block, qualityReviewPromptBuilder, finalImplResult.output, taskDeadlineMs, stallController.signal, wrappedOnProgress, cwd)) });
|
|
1339
1496
|
qualityReviewDurationMs += Date.now() - qReReviewIterStart;
|
|
1340
1497
|
if (reviewCall.bothUnavailable) {
|
|
1341
1498
|
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'qualityReviewer', assignedTier: decision.reviewer, reason: reviewCall.unavailableReason });
|
|
@@ -1345,7 +1502,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1345
1502
|
else {
|
|
1346
1503
|
qualityReviewerHistory.push(reviewCall.usedTier);
|
|
1347
1504
|
if (reviewCall.fallbackFired) {
|
|
1348
|
-
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'qualityReviewer', assignedTier: decision.reviewer, usedTier: reviewCall.usedTier, reason: reviewCall.fallbackReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, violatesSeparation: reviewCall.usedTier === implementerHistory[implementerHistory.length - 1] });
|
|
1505
|
+
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'qualityReviewer', assignedTier: decision.reviewer, usedTier: reviewCall.usedTier, reason: reviewCall.fallbackReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, violatesSeparation: reviewCall.usedTier === implementerHistory[implementerHistory.length - 1], fallbackSeparationRespected: reviewCall.fallbackSeparationRespected, assignedIdentity: reviewCall.assignedIdentity ?? null, usedIdentity: reviewCall.usedIdentity ?? null });
|
|
1349
1506
|
fallbackOverrides.push({ role: 'qualityReviewer', loop: 'quality', attempt: qualityAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.fallbackReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: false });
|
|
1350
1507
|
}
|
|
1351
1508
|
}
|
|
@@ -1515,6 +1672,8 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1515
1672
|
toolCalls: r.toolCalls?.length ?? 0,
|
|
1516
1673
|
inputTokens: r.usage.inputTokens,
|
|
1517
1674
|
outputTokens: r.usage.outputTokens,
|
|
1675
|
+
cachedTokens: r.usage.cachedTokens ?? null,
|
|
1676
|
+
reasoningTokens: r.usage.reasoningTokens ?? null,
|
|
1518
1677
|
costUSD: r.usage.costUSD,
|
|
1519
1678
|
taskMaxIdleMs: r.taskMaxIdleMs ?? null,
|
|
1520
1679
|
stallTriggered: r.stallTriggered ?? false,
|