@bradtaylorsf/alpha-loop 1.14.0 → 1.14.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +73 -1
- package/dist/cli.js +37 -2
- package/dist/cli.js.map +1 -1
- package/dist/commands/eval.d.ts +22 -0
- package/dist/commands/eval.js +105 -1
- package/dist/commands/eval.js.map +1 -1
- package/dist/commands/evolve-routing.d.ts +24 -0
- package/dist/commands/evolve-routing.js +320 -0
- package/dist/commands/evolve-routing.js.map +1 -0
- package/dist/commands/history.d.ts +2 -0
- package/dist/commands/history.js +95 -1
- package/dist/commands/history.js.map +1 -1
- package/dist/commands/init.d.ts +6 -0
- package/dist/commands/init.js +26 -1
- package/dist/commands/init.js.map +1 -1
- package/dist/commands/report.d.ts +7 -0
- package/dist/commands/report.js +27 -0
- package/dist/commands/report.js.map +1 -0
- package/dist/commands/scan.d.ts +1 -1
- package/dist/commands/scan.js.map +1 -1
- package/dist/engine/agents.d.ts +30 -8
- package/dist/engine/agents.js +94 -10
- package/dist/engine/agents.js.map +1 -1
- package/dist/engine/prerequisites.d.ts +40 -2
- package/dist/engine/prerequisites.js +126 -2
- package/dist/engine/prerequisites.js.map +1 -1
- package/dist/lib/agent.d.ts +39 -2
- package/dist/lib/agent.js +106 -4
- package/dist/lib/agent.js.map +1 -1
- package/dist/lib/config.d.ts +73 -1
- package/dist/lib/config.js +214 -1
- package/dist/lib/config.js.map +1 -1
- package/dist/lib/escalation.d.ts +102 -0
- package/dist/lib/escalation.js +241 -0
- package/dist/lib/escalation.js.map +1 -0
- package/dist/lib/eval-matrix.d.ts +125 -0
- package/dist/lib/eval-matrix.js +317 -0
- package/dist/lib/eval-matrix.js.map +1 -0
- package/dist/lib/eval-report.d.ts +12 -0
- package/dist/lib/eval-report.js +132 -0
- package/dist/lib/eval-report.js.map +1 -0
- package/dist/lib/eval-secret-scan.d.ts +41 -0
- package/dist/lib/eval-secret-scan.js +163 -0
- package/dist/lib/eval-secret-scan.js.map +1 -0
- package/dist/lib/eval.js +7 -4
- package/dist/lib/eval.js.map +1 -1
- package/dist/lib/hardware.d.ts +9 -0
- package/dist/lib/hardware.js +32 -0
- package/dist/lib/hardware.js.map +1 -0
- package/dist/lib/pipeline.d.ts +5 -1
- package/dist/lib/pipeline.js +217 -16
- package/dist/lib/pipeline.js.map +1 -1
- package/dist/lib/prerequisites.js +11 -3
- package/dist/lib/prerequisites.js.map +1 -1
- package/dist/lib/routing-history.d.ts +43 -0
- package/dist/lib/routing-history.js +112 -0
- package/dist/lib/routing-history.js.map +1 -0
- package/dist/lib/routing-promotion.d.ts +95 -0
- package/dist/lib/routing-promotion.js +229 -0
- package/dist/lib/routing-promotion.js.map +1 -0
- package/dist/lib/session.js +13 -0
- package/dist/lib/session.js.map +1 -1
- package/dist/lib/telemetry.d.ts +147 -0
- package/dist/lib/telemetry.js +353 -0
- package/dist/lib/telemetry.js.map +1 -0
- package/package.json +1 -1
package/dist/lib/pipeline.js
CHANGED
|
@@ -5,7 +5,8 @@ import { mkdirSync, readFileSync, writeFileSync, unlinkSync, existsSync } from '
|
|
|
5
5
|
import { join } from 'node:path';
|
|
6
6
|
import { log } from './logger.js';
|
|
7
7
|
import { exec } from './shell.js';
|
|
8
|
-
import { spawnAgent } from './agent.js';
|
|
8
|
+
import { spawnAgent, buildEndpointEnv } from './agent.js';
|
|
9
|
+
import { classifyToolError, classifyToolErrors, EscalationTracker, defaultEscalationStatePath, appendEscalationEventToTrace, } from './escalation.js';
|
|
9
10
|
import { setupWorktree, cleanupWorktree } from './worktree.js';
|
|
10
11
|
import { assignIssue, labelIssue, commentIssue, createPR, mergePR, updateProjectStatus, getIssueComments, } from './github.js';
|
|
11
12
|
import { buildImplementPrompt, buildReviewPrompt, buildAssumptionsPrompt, buildBatchPlanPrompt, buildBatchImplementPrompt, buildBatchReviewPrompt, } from './prompts.js';
|
|
@@ -13,8 +14,9 @@ import { runTests } from './testing.js';
|
|
|
13
14
|
import { runVerify } from './verify.js';
|
|
14
15
|
import { extractLearnings, getLearningContext } from './learning.js';
|
|
15
16
|
import { saveResult, getPreviousResult } from './session.js';
|
|
16
|
-
import { writeTrace, writeTraceMetadata, writeTraceToSubdir, writeConfigSnapshot, writeScores, writeCosts, computeScores, computeCosts, } from './traces.js';
|
|
17
|
-
import { estimateCost } from './config.js';
|
|
17
|
+
import { writeTrace, writeTraceMetadata, writeTraceToSubdir, writeConfigSnapshot, writeScores, writeCosts, computeScores, computeCosts, runDir, } from './traces.js';
|
|
18
|
+
import { estimateCost, getFallbackPolicy, resolveRoutingStage, selectRoutingProfile } from './config.js';
|
|
19
|
+
import { buildStageTelemetry, writeStageTelemetry } from './telemetry.js';
|
|
18
20
|
/** Max diff size to include in learning analysis. */
|
|
19
21
|
const MAX_DIFF_CHARS = 10_000;
|
|
20
22
|
/**
|
|
@@ -212,16 +214,184 @@ export function formatGateFindings(gate, gateType) {
|
|
|
212
214
|
}
|
|
213
215
|
return lines.join('\n');
|
|
214
216
|
}
|
|
217
|
+
/**
|
|
218
|
+
* Build a StageTelemetry entry from the agent result and append it to the
|
|
219
|
+
* run's stages.jsonl file. Called alongside each stepCosts.push site so that
|
|
220
|
+
* every stage invocation — not just the top-level session — emits telemetry.
|
|
221
|
+
*/
|
|
222
|
+
function recordStageTelemetry(session, issueNum, stage, agentResult, config, ctx) {
|
|
223
|
+
try {
|
|
224
|
+
const entry = buildStageTelemetry(agentResult, stage, config, {
|
|
225
|
+
endpoint: ctx?.endpointName,
|
|
226
|
+
endpointType: ctx?.endpointType,
|
|
227
|
+
profile: ctx?.profile,
|
|
228
|
+
issueNum,
|
|
229
|
+
});
|
|
230
|
+
writeStageTelemetry(runDir(session.name), entry);
|
|
231
|
+
}
|
|
232
|
+
catch {
|
|
233
|
+
/* telemetry is best-effort */
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
/**
|
|
237
|
+
* Invoke an agent with retry-with-escalation and the rolling-rate guardrail.
|
|
238
|
+
*
|
|
239
|
+
* Resolves the stage's routing (model + endpoint), runs the agent once, and
|
|
240
|
+
* if the output contains two or more classified tool-call errors escalates
|
|
241
|
+
* the turn to the configured fallback model. Escalation is scoped to the
|
|
242
|
+
* current turn: the caller's next invocation reverts to the primary model.
|
|
243
|
+
*
|
|
244
|
+
* Also honors the guardrail — when the stage is pinned to fallback because
|
|
245
|
+
* the recent error rate exceeded the threshold, the primary is skipped
|
|
246
|
+
* entirely and the fallback runs up-front.
|
|
247
|
+
*/
|
|
248
|
+
async function spawnStageAgent(options, config, ctx) {
|
|
249
|
+
const policy = getFallbackPolicy(config);
|
|
250
|
+
const primary = resolveRoutingStage(config, ctx.stage);
|
|
251
|
+
const primaryModel = primary?.model ?? options.model;
|
|
252
|
+
const primaryEndpoint = primary?.endpoint;
|
|
253
|
+
const primaryEnv = primaryEndpoint ? buildEndpointEnv(primaryEndpoint, primaryModel) : undefined;
|
|
254
|
+
// Expose endpoint info back to the caller so it can tag telemetry.
|
|
255
|
+
const primaryEndpointName = config.routing?.stages?.[ctx.stage]?.endpoint;
|
|
256
|
+
ctx.endpointName = primaryEndpointName ?? 'default';
|
|
257
|
+
ctx.endpointType = primaryEndpoint?.type;
|
|
258
|
+
ctx.profile = selectRoutingProfile(config, ctx.issueNum);
|
|
259
|
+
const hasEscalateTarget = policy?.escalate_to !== undefined;
|
|
260
|
+
const escalateTo = policy?.escalate_to;
|
|
261
|
+
const escalateEndpoint = escalateTo
|
|
262
|
+
? config.routing?.endpoints?.[escalateTo.endpoint]
|
|
263
|
+
: undefined;
|
|
264
|
+
const escalateEnv = escalateTo && escalateEndpoint
|
|
265
|
+
? buildEndpointEnv(escalateEndpoint, escalateTo.model)
|
|
266
|
+
: undefined;
|
|
267
|
+
const nowIso = () => new Date().toISOString();
|
|
268
|
+
const recordEvent = (event) => {
|
|
269
|
+
ctx.events.push(event);
|
|
270
|
+
try {
|
|
271
|
+
appendEscalationEventToTrace(runDir(ctx.session.name), event);
|
|
272
|
+
}
|
|
273
|
+
catch { /* non-fatal */ }
|
|
274
|
+
};
|
|
275
|
+
// Guardrail: stage pinned to fallback — use fallback up-front.
|
|
276
|
+
const reverted = policy?.on_tool_error === 'escalate'
|
|
277
|
+
&& hasEscalateTarget
|
|
278
|
+
&& ctx.tracker.isStageReverted(ctx.stage);
|
|
279
|
+
if (reverted && escalateTo) {
|
|
280
|
+
recordEvent({
|
|
281
|
+
type: 'stage_revert_active',
|
|
282
|
+
stage: ctx.stage,
|
|
283
|
+
from_model: primaryModel,
|
|
284
|
+
to_model: escalateTo.model,
|
|
285
|
+
reason: 'rolling_error_rate_above_threshold',
|
|
286
|
+
turn_index: ctx.turnIndex,
|
|
287
|
+
issue: ctx.issueNum,
|
|
288
|
+
ts: nowIso(),
|
|
289
|
+
});
|
|
290
|
+
ctx.endpointName = escalateTo.endpoint;
|
|
291
|
+
ctx.endpointType = escalateEndpoint?.type;
|
|
292
|
+
const result = await spawnAgent({
|
|
293
|
+
...options,
|
|
294
|
+
model: escalateTo.model,
|
|
295
|
+
env: escalateEnv,
|
|
296
|
+
});
|
|
297
|
+
ctx.tracker.recordTurn({
|
|
298
|
+
stage: ctx.stage,
|
|
299
|
+
errored: classifyToolError(result.output) !== null,
|
|
300
|
+
escalated: true,
|
|
301
|
+
windowSize: policy.escalation_window_issues,
|
|
302
|
+
});
|
|
303
|
+
return result;
|
|
304
|
+
}
|
|
305
|
+
// Normal primary invocation.
|
|
306
|
+
const firstResult = await spawnAgent({
|
|
307
|
+
...options,
|
|
308
|
+
model: primaryModel,
|
|
309
|
+
env: primaryEnv,
|
|
310
|
+
});
|
|
311
|
+
const classified = classifyToolErrors(firstResult.output);
|
|
312
|
+
const errored = classified.length > 0;
|
|
313
|
+
let escalated = false;
|
|
314
|
+
let finalResult = firstResult;
|
|
315
|
+
if (policy?.on_tool_error === 'escalate'
|
|
316
|
+
&& hasEscalateTarget
|
|
317
|
+
&& escalateTo
|
|
318
|
+
&& classified.length >= 2) {
|
|
319
|
+
const last = classified[classified.length - 1];
|
|
320
|
+
recordEvent({
|
|
321
|
+
type: 'escalation',
|
|
322
|
+
stage: ctx.stage,
|
|
323
|
+
from_model: primaryModel,
|
|
324
|
+
to_model: escalateTo.model,
|
|
325
|
+
reason: last.kind,
|
|
326
|
+
turn_index: ctx.turnIndex,
|
|
327
|
+
issue: ctx.issueNum,
|
|
328
|
+
ts: nowIso(),
|
|
329
|
+
});
|
|
330
|
+
ctx.endpointName = escalateTo.endpoint;
|
|
331
|
+
ctx.endpointType = escalateEndpoint?.type;
|
|
332
|
+
finalResult = await spawnAgent({
|
|
333
|
+
...options,
|
|
334
|
+
model: escalateTo.model,
|
|
335
|
+
env: escalateEnv,
|
|
336
|
+
});
|
|
337
|
+
escalated = true;
|
|
338
|
+
}
|
|
339
|
+
if (policy) {
|
|
340
|
+
ctx.tracker.recordTurn({
|
|
341
|
+
stage: ctx.stage,
|
|
342
|
+
errored,
|
|
343
|
+
escalated,
|
|
344
|
+
windowSize: policy.escalation_window_issues,
|
|
345
|
+
});
|
|
346
|
+
if (ctx.tracker.maybeTriggerRevert(ctx.stage, policy)) {
|
|
347
|
+
const untilMs = ctx.tracker.revertUntil(ctx.stage) ?? 0;
|
|
348
|
+
recordEvent({
|
|
349
|
+
type: 'stage_revert',
|
|
350
|
+
stage: ctx.stage,
|
|
351
|
+
from_model: primaryModel,
|
|
352
|
+
to_model: escalateTo?.model ?? primaryModel,
|
|
353
|
+
reason: `rolling_error_rate_above_${policy.escalation_error_threshold}`,
|
|
354
|
+
turn_index: ctx.turnIndex,
|
|
355
|
+
issue: ctx.issueNum,
|
|
356
|
+
ts: nowIso(),
|
|
357
|
+
});
|
|
358
|
+
recordEvent({
|
|
359
|
+
type: 'needs_human_input',
|
|
360
|
+
stage: ctx.stage,
|
|
361
|
+
from_model: primaryModel,
|
|
362
|
+
to_model: escalateTo?.model ?? primaryModel,
|
|
363
|
+
reason: `stage_pinned_to_fallback_until_${new Date(untilMs).toISOString()}`,
|
|
364
|
+
turn_index: ctx.turnIndex,
|
|
365
|
+
issue: ctx.issueNum,
|
|
366
|
+
ts: nowIso(),
|
|
367
|
+
});
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
return finalResult;
|
|
371
|
+
}
|
|
215
372
|
/**
|
|
216
373
|
* Process a single issue through the full pipeline.
|
|
217
374
|
* Steps: status → worktree → plan → implement → test+retry → verify+retry →
|
|
218
375
|
* review → PR → learnings → update → auto-merge → cleanup
|
|
219
376
|
*/
|
|
220
|
-
export async function processIssue(issueNum, title, body, config, session) {
|
|
377
|
+
export async function processIssue(issueNum, title, body, config, session, trackerOverride) {
|
|
221
378
|
const startTime = Date.now();
|
|
222
379
|
const projectDir = process.cwd();
|
|
223
380
|
const stepCosts = [];
|
|
224
381
|
const stepsCompleted = [];
|
|
382
|
+
const escalationEvents = [];
|
|
383
|
+
const tracker = trackerOverride ?? new EscalationTracker({
|
|
384
|
+
statePath: defaultEscalationStatePath(projectDir),
|
|
385
|
+
});
|
|
386
|
+
let turnCounter = 0;
|
|
387
|
+
const stageCtx = (stage) => ({
|
|
388
|
+
stage,
|
|
389
|
+
issueNum,
|
|
390
|
+
turnIndex: ++turnCounter,
|
|
391
|
+
session,
|
|
392
|
+
tracker,
|
|
393
|
+
events: escalationEvents,
|
|
394
|
+
});
|
|
225
395
|
// Setup logging
|
|
226
396
|
mkdirSync(session.logsDir, { recursive: true });
|
|
227
397
|
const logFile = join(session.logsDir, `issue-${issueNum}.log`);
|
|
@@ -309,7 +479,8 @@ Rules:
|
|
|
309
479
|
- Write ONLY the JSON file. Do not create any other files or make any code changes.`;
|
|
310
480
|
// Trace the plan prompt
|
|
311
481
|
tracePrompt(session.name, issueNum, 'plan', planPrompt);
|
|
312
|
-
const
|
|
482
|
+
const planCtx = stageCtx('plan');
|
|
483
|
+
const planResult = await spawnStageAgent({
|
|
313
484
|
agent: config.agent,
|
|
314
485
|
model: config.model,
|
|
315
486
|
prompt: planPrompt,
|
|
@@ -317,10 +488,11 @@ Rules:
|
|
|
317
488
|
logFile: join(session.logsDir, `issue-${issueNum}-plan.log`),
|
|
318
489
|
verbose: config.verbose,
|
|
319
490
|
timeout: config.agentTimeout * 1000,
|
|
320
|
-
});
|
|
491
|
+
}, config, planCtx);
|
|
321
492
|
// Trace the plan output and costs
|
|
322
493
|
traceOutput(session.name, issueNum, 'plan', planResult.output);
|
|
323
494
|
stepCosts.push(buildStepCost('plan', issueNum, planResult, config));
|
|
495
|
+
recordStageTelemetry(session, issueNum, 'plan', planResult, config, planCtx);
|
|
324
496
|
// Detect transient errors (usage limits) during planning
|
|
325
497
|
if (planResult.exitCode !== 0 && isTransientError(planResult.output)) {
|
|
326
498
|
log.warn(`Agent hit a transient error during planning for #${issueNum} — re-queuing`);
|
|
@@ -381,7 +553,8 @@ Do NOT redo work that is already committed. Build on top of existing progress.\n
|
|
|
381
553
|
});
|
|
382
554
|
// Trace the implement prompt
|
|
383
555
|
tracePrompt(session.name, issueNum, 'implement', implementPrompt);
|
|
384
|
-
const
|
|
556
|
+
const implCtx = stageCtx('build');
|
|
557
|
+
const implResult = await spawnStageAgent({
|
|
385
558
|
agent: config.agent,
|
|
386
559
|
model: config.model,
|
|
387
560
|
prompt: implementPrompt,
|
|
@@ -389,10 +562,11 @@ Do NOT redo work that is already committed. Build on top of existing progress.\n
|
|
|
389
562
|
logFile: join(session.logsDir, `issue-${issueNum}-implement.log`),
|
|
390
563
|
verbose: config.verbose,
|
|
391
564
|
timeout: config.agentTimeout * 1000,
|
|
392
|
-
});
|
|
565
|
+
}, config, implCtx);
|
|
393
566
|
// Trace the implement output and costs
|
|
394
567
|
traceOutput(session.name, issueNum, 'implement', implResult.output);
|
|
395
568
|
stepCosts.push(buildStepCost('implement', issueNum, implResult, config));
|
|
569
|
+
recordStageTelemetry(session, issueNum, 'implement', implResult, config, implCtx);
|
|
396
570
|
if (implResult.exitCode !== 0) {
|
|
397
571
|
// Auto-commit any uncommitted work before deciding on cleanup
|
|
398
572
|
const dirtyCheck = exec('git status --porcelain', { cwd: worktreePath });
|
|
@@ -459,7 +633,8 @@ Do NOT redo work that is already committed. Build on top of existing progress.\n
|
|
|
459
633
|
const fixPrompt = `Tests are failing for issue #${issueNum} (attempt ${attempt} of ${config.maxTestRetries}). Fix the failing tests.\n\nTest output:\n${testOutput}\n\nInstructions:\n1. Read the failing test output carefully and identify the ROOT CAUSE\n2. Fix ONLY code related to issue #${issueNum} — do NOT modify test infrastructure, build scripts, or unrelated files\n3. If tests fail due to environment issues (missing venv, wrong port, missing deps), fix only YOUR code — do NOT rewrite the test runner or package.json scripts\n4. Run the tests again to verify\n5. Commit your fixes with a DESCRIPTIVE message that explains WHAT you fixed and WHY it failed.\n Format: fix(#${issueNum}): <what you changed> — <why it was failing>\n Example: fix(#${issueNum}): use port 5435 for postgres — default 5432 conflicts with host service\n DO NOT use generic messages like "fix: resolve test failures"`;
|
|
460
634
|
// Trace fix prompt
|
|
461
635
|
tracePrompt(session.name, issueNum, `fix-${attempt}`, fixPrompt);
|
|
462
|
-
const
|
|
636
|
+
const fixCtx = stageCtx('test_write');
|
|
637
|
+
const fixResult = await spawnStageAgent({
|
|
463
638
|
agent: config.agent,
|
|
464
639
|
model: config.model,
|
|
465
640
|
prompt: fixPrompt,
|
|
@@ -468,10 +643,11 @@ Do NOT redo work that is already committed. Build on top of existing progress.\n
|
|
|
468
643
|
logFile: join(session.logsDir, `issue-${issueNum}-fix-${attempt}.log`),
|
|
469
644
|
verbose: config.verbose,
|
|
470
645
|
timeout: config.agentTimeout * 1000,
|
|
471
|
-
});
|
|
646
|
+
}, config, fixCtx);
|
|
472
647
|
// Trace fix output and costs
|
|
473
648
|
traceOutput(session.name, issueNum, `fix-${attempt}`, fixResult.output);
|
|
474
649
|
stepCosts.push(buildStepCost('test_fix', issueNum, fixResult, config));
|
|
650
|
+
recordStageTelemetry(session, issueNum, 'test_fix', fixResult, config, fixCtx);
|
|
475
651
|
stepsCompleted.push(`fix-${attempt}`);
|
|
476
652
|
// Auto-commit fixes
|
|
477
653
|
const fixStatus = exec('git status --porcelain', { cwd: worktreePath });
|
|
@@ -519,7 +695,8 @@ Do NOT redo work that is already committed. Build on top of existing progress.\n
|
|
|
519
695
|
});
|
|
520
696
|
// Trace review prompt
|
|
521
697
|
tracePrompt(session.name, issueNum, `review${attempt > 1 ? `-${attempt}` : ''}`, reviewPrompt);
|
|
522
|
-
const
|
|
698
|
+
const reviewCtx = stageCtx('review');
|
|
699
|
+
const reviewResult = await spawnStageAgent({
|
|
523
700
|
agent: config.agent,
|
|
524
701
|
model: config.reviewModel,
|
|
525
702
|
prompt: reviewPrompt,
|
|
@@ -527,10 +704,11 @@ Do NOT redo work that is already committed. Build on top of existing progress.\n
|
|
|
527
704
|
logFile: join(session.logsDir, `issue-${issueNum}-review${attempt > 1 ? `-${attempt}` : ''}.log`),
|
|
528
705
|
verbose: config.verbose,
|
|
529
706
|
timeout: config.agentTimeout * 1000,
|
|
530
|
-
});
|
|
707
|
+
}, config, reviewCtx);
|
|
531
708
|
// Trace review output and costs
|
|
532
709
|
traceOutput(session.name, issueNum, `review${attempt > 1 ? `-${attempt}` : ''}`, reviewResult.output);
|
|
533
710
|
stepCosts.push(buildStepCost('review', issueNum, reviewResult, config));
|
|
711
|
+
recordStageTelemetry(session, issueNum, 'review', reviewResult, config, reviewCtx);
|
|
534
712
|
reviewOutput = reviewResult.output;
|
|
535
713
|
}
|
|
536
714
|
catch {
|
|
@@ -554,7 +732,8 @@ Do NOT redo work that is already committed. Build on top of existing progress.\n
|
|
|
554
732
|
const fixPrompt = `The code review for issue #${issueNum} found problems that need to be fixed.\n\n${findings}\n\nInstructions:\n1. Address each finding listed above\n2. Run tests to make sure nothing is broken\n3. Commit your fixes with: git commit -m "fix(#${issueNum}): address review findings"`;
|
|
555
733
|
// Trace review-fix prompt
|
|
556
734
|
tracePrompt(session.name, issueNum, `review-fix-${attempt}`, fixPrompt);
|
|
557
|
-
const
|
|
735
|
+
const reviewFixCtx = stageCtx('build');
|
|
736
|
+
const reviewFixResult = await spawnStageAgent({
|
|
558
737
|
agent: config.agent,
|
|
559
738
|
model: config.model,
|
|
560
739
|
prompt: fixPrompt,
|
|
@@ -563,10 +742,11 @@ Do NOT redo work that is already committed. Build on top of existing progress.\n
|
|
|
563
742
|
logFile: join(session.logsDir, `issue-${issueNum}-review-fix-${attempt}.log`),
|
|
564
743
|
verbose: config.verbose,
|
|
565
744
|
timeout: config.agentTimeout * 1000,
|
|
566
|
-
});
|
|
745
|
+
}, config, reviewFixCtx);
|
|
567
746
|
// Trace review-fix output and costs
|
|
568
747
|
traceOutput(session.name, issueNum, `review-fix-${attempt}`, reviewFixResult.output);
|
|
569
748
|
stepCosts.push(buildStepCost('review', issueNum, reviewFixResult, config));
|
|
749
|
+
recordStageTelemetry(session, issueNum, 'review_fix', reviewFixResult, config, reviewFixCtx);
|
|
570
750
|
// Auto-commit if agent didn't
|
|
571
751
|
const fixStatus = exec('git status --porcelain', { cwd: worktreePath });
|
|
572
752
|
if (fixStatus.stdout.trim()) {
|
|
@@ -647,7 +827,8 @@ Do NOT redo work that is already committed. Build on top of existing progress.\n
|
|
|
647
827
|
const fixPrompt = `Live verification failed for issue #${issueNum} (attempt ${attempt} of ${config.maxTestRetries}).\n\n${findings}\n\nInstructions:\n1. Read the verification findings and identify the ROOT CAUSE\n2. Fix ONLY code related to issue #${issueNum}\n3. Run tests to make sure nothing is broken\n4. Commit your fixes with: git commit -m "fix(#${issueNum}): address verification findings"`;
|
|
648
828
|
// Trace verify-fix prompt
|
|
649
829
|
tracePrompt(session.name, issueNum, `verify-fix-${attempt}`, fixPrompt);
|
|
650
|
-
const
|
|
830
|
+
const verifyFixCtx = stageCtx('test_exec');
|
|
831
|
+
const verifyFixResult = await spawnStageAgent({
|
|
651
832
|
agent: config.agent,
|
|
652
833
|
model: config.model,
|
|
653
834
|
prompt: fixPrompt,
|
|
@@ -656,10 +837,11 @@ Do NOT redo work that is already committed. Build on top of existing progress.\n
|
|
|
656
837
|
logFile: join(session.logsDir, `issue-${issueNum}-verify-fix-${attempt}.log`),
|
|
657
838
|
verbose: config.verbose,
|
|
658
839
|
timeout: config.agentTimeout * 1000,
|
|
659
|
-
});
|
|
840
|
+
}, config, verifyFixCtx);
|
|
660
841
|
// Trace verify-fix output and costs
|
|
661
842
|
traceOutput(session.name, issueNum, `verify-fix-${attempt}`, verifyFixResult.output);
|
|
662
843
|
stepCosts.push(buildStepCost('verify', issueNum, verifyFixResult, config));
|
|
844
|
+
recordStageTelemetry(session, issueNum, 'verify_fix', verifyFixResult, config, verifyFixCtx);
|
|
663
845
|
// Auto-commit if agent didn't
|
|
664
846
|
const fixStatus = exec('git status --porcelain', { cwd: worktreePath });
|
|
665
847
|
if (fixStatus.stdout.trim()) {
|
|
@@ -749,6 +931,9 @@ Do NOT redo work that is already committed. Build on top of existing progress.\n
|
|
|
749
931
|
});
|
|
750
932
|
traceOutput(session.name, issueNum, 'assumptions', assumptionsResult.output);
|
|
751
933
|
stepCosts.push(buildStepCost('assumptions', issueNum, assumptionsResult, config));
|
|
934
|
+
recordStageTelemetry(session, issueNum, 'assumptions', assumptionsResult, config, {
|
|
935
|
+
profile: selectRoutingProfile(config, issueNum),
|
|
936
|
+
});
|
|
752
937
|
if (assumptionsResult.exitCode === 0 && assumptionsResult.output.trim()) {
|
|
753
938
|
commentIssue(config.repo, issueNum, `## AI Implementation Notes\n\n${assumptionsResult.output.trim()}\n\n---\n_Posted by alpha-loop for user validation._`);
|
|
754
939
|
log.success('Posted assumptions/decisions comment');
|
|
@@ -912,6 +1097,7 @@ Do NOT redo work that is already committed. Build on top of existing progress.\n
|
|
|
912
1097
|
verifySkipped,
|
|
913
1098
|
duration,
|
|
914
1099
|
filesChanged,
|
|
1100
|
+
escalationEvents: escalationEvents.length > 0 ? escalationEvents : undefined,
|
|
915
1101
|
};
|
|
916
1102
|
// Save result to session
|
|
917
1103
|
saveResult(session, result);
|
|
@@ -1019,6 +1205,9 @@ Do NOT redo work that is already committed. Build on top of existing progress.\n
|
|
|
1019
1205
|
});
|
|
1020
1206
|
traceOutput(session.name, issues[0].number, 'batch-plan', planResult.output);
|
|
1021
1207
|
stepCosts.push(buildStepCost('plan', issues[0].number, planResult, config));
|
|
1208
|
+
recordStageTelemetry(session, issues[0].number, 'batch-plan', planResult, config, {
|
|
1209
|
+
profile: selectRoutingProfile(config, issues[0].number),
|
|
1210
|
+
});
|
|
1022
1211
|
if (planResult.exitCode !== 0 && isTransientError(planResult.output)) {
|
|
1023
1212
|
log.warn('Agent hit a transient error during batch planning — re-queuing all issues');
|
|
1024
1213
|
for (const issue of issues)
|
|
@@ -1073,6 +1262,9 @@ Do NOT redo work that is already committed. Build on top of existing progress.\n
|
|
|
1073
1262
|
});
|
|
1074
1263
|
traceOutput(session.name, issues[0].number, 'batch-implement', implResult.output);
|
|
1075
1264
|
stepCosts.push(buildStepCost('implement', issues[0].number, implResult, config));
|
|
1265
|
+
recordStageTelemetry(session, issues[0].number, 'batch-implement', implResult, config, {
|
|
1266
|
+
profile: selectRoutingProfile(config, issues[0].number),
|
|
1267
|
+
});
|
|
1076
1268
|
if (implResult.exitCode !== 0) {
|
|
1077
1269
|
// Auto-commit any uncommitted work before deciding on cleanup
|
|
1078
1270
|
const dirtyCheck = exec('git status --porcelain', { cwd: worktreePath });
|
|
@@ -1152,6 +1344,9 @@ Do NOT redo work that is already committed. Build on top of existing progress.\n
|
|
|
1152
1344
|
});
|
|
1153
1345
|
traceOutput(session.name, issues[0].number, `batch-fix-${attempt}`, fixResult.output);
|
|
1154
1346
|
stepCosts.push(buildStepCost('test_fix', issues[0].number, fixResult, config));
|
|
1347
|
+
recordStageTelemetry(session, issues[0].number, 'batch-test_fix', fixResult, config, {
|
|
1348
|
+
profile: selectRoutingProfile(config, issues[0].number),
|
|
1349
|
+
});
|
|
1155
1350
|
const fixStatus = exec('git status --porcelain', { cwd: worktreePath });
|
|
1156
1351
|
if (fixStatus.stdout.trim()) {
|
|
1157
1352
|
exec('git add -A', { cwd: worktreePath });
|
|
@@ -1193,6 +1388,9 @@ Do NOT redo work that is already committed. Build on top of existing progress.\n
|
|
|
1193
1388
|
});
|
|
1194
1389
|
traceOutput(session.name, issues[0].number, `batch-review${attempt > 1 ? `-${attempt}` : ''}`, reviewResult.output);
|
|
1195
1390
|
stepCosts.push(buildStepCost('review', issues[0].number, reviewResult, config));
|
|
1391
|
+
recordStageTelemetry(session, issues[0].number, 'batch-review', reviewResult, config, {
|
|
1392
|
+
profile: selectRoutingProfile(config, issues[0].number),
|
|
1393
|
+
});
|
|
1196
1394
|
reviewOutput = reviewResult.output;
|
|
1197
1395
|
}
|
|
1198
1396
|
catch {
|
|
@@ -1224,6 +1422,9 @@ Do NOT redo work that is already committed. Build on top of existing progress.\n
|
|
|
1224
1422
|
});
|
|
1225
1423
|
traceOutput(session.name, issues[0].number, `batch-review-fix-${attempt}`, reviewFixResult.output);
|
|
1226
1424
|
stepCosts.push(buildStepCost('review', issues[0].number, reviewFixResult, config));
|
|
1425
|
+
recordStageTelemetry(session, issues[0].number, 'batch-review_fix', reviewFixResult, config, {
|
|
1426
|
+
profile: selectRoutingProfile(config, issues[0].number),
|
|
1427
|
+
});
|
|
1227
1428
|
const fixStatus = exec('git status --porcelain', { cwd: worktreePath });
|
|
1228
1429
|
if (fixStatus.stdout.trim()) {
|
|
1229
1430
|
exec('git add -A', { cwd: worktreePath });
|