superlab 0.1.12 → 0.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,830 @@
1
+ const fs = require("node:fs");
2
+ const { spawn } = require("node:child_process");
3
+ const { refreshContext } = require("./context.cjs");
4
+ const { parseEvalProtocol, validateEvalProtocol } = require("./eval_protocol.cjs");
5
+ const {
6
+ isMeaningful,
7
+ parseDurationMs,
8
+ parseInteger,
9
+ sleep,
10
+ } = require("./auto_common.cjs");
11
+ const {
12
+ detectFrozenCoreChanges,
13
+ resolveStageCommand,
14
+ snapshotFrozenCore,
15
+ snapshotPaths,
16
+ splitAutoStages,
17
+ stageContractSnapshot,
18
+ validateAutoMode,
19
+ validateAutoStatus,
20
+ verifyPromotionWriteback,
21
+ verifyStageContract,
22
+ } = require("./auto_contracts.cjs");
23
+ const {
24
+ parseAutoMode,
25
+ parseAutoStatus,
26
+ readWorkflowLanguage,
27
+ resolveRequiredArtifact,
28
+ writeAutoOutcome,
29
+ writeAutoStatus,
30
+ } = require("./auto_state.cjs");
31
+
32
+ function normalizeTransition(value) {
33
+ return (value || "").trim();
34
+ }
35
+
36
+ function isSuccessTransition(value) {
37
+ return ["success", "terminal-success", "campaign-success"].includes((value || "").trim().toLowerCase());
38
+ }
39
+
40
+ function isStopTransition(value) {
41
+ return ["stop", "campaign-stop", "terminal-stop"].includes((value || "").trim().toLowerCase());
42
+ }
43
+
44
+ async function runCommandWithPolling({
45
+ targetDir,
46
+ stage,
47
+ command,
48
+ pollIntervalMs,
49
+ deadlineMs,
50
+ startedAt,
51
+ status,
52
+ lang,
53
+ rungId = "",
54
+ watchTarget = "",
55
+ nextRung = "",
56
+ }) {
57
+ const child = spawn(command, {
58
+ cwd: targetDir,
59
+ shell: true,
60
+ stdio: ["ignore", "pipe", "pipe"],
61
+ });
62
+
63
+ let stdout = "";
64
+ let stderr = "";
65
+ let exitCode = null;
66
+ let signalCode = null;
67
+
68
+ child.stdout.on("data", (chunk) => {
69
+ if (stdout.length < 4000) {
70
+ stdout += chunk.toString("utf8");
71
+ }
72
+ });
73
+ child.stderr.on("data", (chunk) => {
74
+ if (stderr.length < 4000) {
75
+ stderr += chunk.toString("utf8");
76
+ }
77
+ });
78
+ child.on("exit", (code, signal) => {
79
+ exitCode = code;
80
+ signalCode = signal;
81
+ });
82
+
83
+ while (exitCode === null && signalCode === null) {
84
+ if (Date.now() > deadlineMs) {
85
+ child.kill("SIGTERM");
86
+ await sleep(50);
87
+ if (child.exitCode === null && child.signalCode === null) {
88
+ child.kill("SIGKILL");
89
+ }
90
+ throw new Error(`auto stage timed out: ${stage}`);
91
+ }
92
+
93
+ writeAutoStatus(
94
+ targetDir,
95
+ {
96
+ ...status,
97
+ status: "running",
98
+ currentStage: stage,
99
+ currentCommand: command,
100
+ currentRung: rungId,
101
+ watchTarget,
102
+ nextRung,
103
+ startedAt,
104
+ lastHeartbeat: new Date().toISOString(),
105
+ decision: rungId ? `running rung ${rungId}` : `running stage ${stage}`,
106
+ },
107
+ { lang }
108
+ );
109
+ await sleep(pollIntervalMs);
110
+ }
111
+
112
+ if ((exitCode ?? 1) !== 0) {
113
+ throw new Error(`auto stage failed: ${stage}${stderr ? ` | ${stderr.trim()}` : stdout ? ` | ${stdout.trim()}` : ""}`);
114
+ }
115
+
116
+ return {
117
+ stdout: stdout.trim(),
118
+ stderr: stderr.trim(),
119
+ };
120
+ }
121
+
122
+ function runCheckCommand({ targetDir, label, command, deadlineMs }) {
123
+ const remainingMs = Math.max(1000, deadlineMs - Date.now());
124
+ const result = spawn(command, {
125
+ cwd: targetDir,
126
+ shell: true,
127
+ stdio: ["ignore", "pipe", "pipe"],
128
+ });
129
+
130
+ return new Promise((resolve, reject) => {
131
+ let stdout = "";
132
+ let stderr = "";
133
+ let settled = false;
134
+ const timeout = setTimeout(() => {
135
+ if (settled) {
136
+ return;
137
+ }
138
+ settled = true;
139
+ result.kill("SIGTERM");
140
+ reject(new Error(`${label} timed out`));
141
+ }, remainingMs);
142
+
143
+ result.stdout.on("data", (chunk) => {
144
+ if (stdout.length < 4000) {
145
+ stdout += chunk.toString("utf8");
146
+ }
147
+ });
148
+ result.stderr.on("data", (chunk) => {
149
+ if (stderr.length < 4000) {
150
+ stderr += chunk.toString("utf8");
151
+ }
152
+ });
153
+ result.on("error", (error) => {
154
+ if (settled) {
155
+ return;
156
+ }
157
+ settled = true;
158
+ clearTimeout(timeout);
159
+ reject(error);
160
+ });
161
+ result.on("exit", (code, signal) => {
162
+ if (settled) {
163
+ return;
164
+ }
165
+ settled = true;
166
+ clearTimeout(timeout);
167
+ if (signal) {
168
+ reject(new Error(`${label} exited with signal ${signal}`));
169
+ return;
170
+ }
171
+ if (code === 0) {
172
+ resolve({ matched: true, stdout: stdout.trim(), stderr: stderr.trim() });
173
+ return;
174
+ }
175
+ if (code === 1) {
176
+ resolve({ matched: false, stdout: stdout.trim(), stderr: stderr.trim() });
177
+ return;
178
+ }
179
+ reject(
180
+ new Error(
181
+ `${label} failed with exit code ${code}${
182
+ stderr ? ` | ${stderr.trim()}` : stdout ? ` | ${stdout.trim()}` : ""
183
+ }`
184
+ )
185
+ );
186
+ });
187
+ });
188
+ }
189
+
190
+ async function evaluateTerminalGoal({ mode, iteration, targetDir, deadlineMs }) {
191
+ if (mode.terminalGoalType === "rounds") {
192
+ const roundsTarget = parseInteger(mode.terminalGoalTarget, 1);
193
+ return {
194
+ matched: iteration >= roundsTarget,
195
+ reason: `round target ${roundsTarget} reached`,
196
+ };
197
+ }
198
+
199
+ if (mode.terminalGoalType === "task-completion") {
200
+ return {
201
+ matched: iteration >= 1,
202
+ reason: "task-completion terminal goal reached",
203
+ };
204
+ }
205
+
206
+ const successCheck = await runCheckCommand({
207
+ targetDir,
208
+ label: `success check after iteration ${iteration}`,
209
+ command: mode.successCheckCommand,
210
+ deadlineMs,
211
+ });
212
+ return {
213
+ matched: successCheck.matched,
214
+ reason: successCheck.stdout || successCheck.stderr || `metric threshold matched after iteration ${iteration}`,
215
+ };
216
+ }
217
+
218
+ async function startAutoMode({ targetDir, now = new Date() }) {
219
+ const mode = parseAutoMode(targetDir);
220
+ const evalProtocol = parseEvalProtocol(targetDir);
221
+ const issues = validateAutoMode(mode, null, evalProtocol);
222
+ if (issues.length > 0) {
223
+ throw new Error(issues.join(" | "));
224
+ }
225
+ const evalProtocolIssues = validateEvalProtocol(targetDir);
226
+ if (evalProtocolIssues.length > 0) {
227
+ throw new Error(evalProtocolIssues.join(" | "));
228
+ }
229
+ if (mode.approvalStatus !== "approved") {
230
+ throw new Error(`approval status must be approved before auto mode can start (current: ${mode.approvalStatus || "missing"})`);
231
+ }
232
+
233
+ const lang = readWorkflowLanguage(targetDir);
234
+ const timestamp = now.toISOString();
235
+ const status = {
236
+ status: "running",
237
+ currentStage: mode.allowedStages[0] || "run",
238
+ currentCommand: "",
239
+ activeRunId: "",
240
+ iterationCount: "0",
241
+ startedAt: timestamp,
242
+ lastHeartbeat: timestamp,
243
+ lastCheckpoint: "",
244
+ lastSummary: "",
245
+ decision: "armed for bounded auto orchestration",
246
+ };
247
+ writeAutoStatus(targetDir, status, { lang });
248
+
249
+ const startedAt = status.startedAt;
250
+ const pollIntervalMs = parseDurationMs(mode.pollInterval, 1000);
251
+ const maxWallClockMs = parseDurationMs(mode.maxWallClockTime, 60 * 60 * 1000);
252
+ const deadlineMs = Date.now() + maxWallClockMs;
253
+ const maxFailures = parseInteger(mode.maxFailures, 0);
254
+ const maxIterations = parseInteger(mode.maxIterations, 1);
255
+ const requiredArtifact = resolveRequiredArtifact(targetDir, mode.requiredTerminalArtifact);
256
+ const frozenCoreSnapshot = snapshotFrozenCore(targetDir, mode.frozenCore);
257
+ const { loopStages, finalStages } = splitAutoStages(mode.allowedStages);
258
+ const executedStages = [];
259
+ let failureCount = 0;
260
+ let iterationsCompleted = 0;
261
+ let currentStatus = { ...status };
262
+ let successReached = false;
263
+ let stopMatched = false;
264
+ let promotionApplied = false;
265
+ let stopReason = "";
266
+ let finalRung = "";
267
+
268
+ const writeRunningStatus = (overrides = {}) => {
269
+ currentStatus = {
270
+ ...currentStatus,
271
+ status: "running",
272
+ lastHeartbeat: new Date().toISOString(),
273
+ ...overrides,
274
+ };
275
+ writeAutoStatus(targetDir, currentStatus, { lang });
276
+ };
277
+
278
+ const failAutoMode = (message) => {
279
+ currentStatus = {
280
+ ...currentStatus,
281
+ status: "failed",
282
+ lastHeartbeat: new Date().toISOString(),
283
+ decision: message,
284
+ };
285
+ writeAutoStatus(targetDir, currentStatus, { lang });
286
+ writeAutoOutcome(
287
+ targetDir,
288
+ {
289
+ objective: mode.objective,
290
+ experimentLadder: evalProtocol.experimentLadder,
291
+ metricGlossary: evalProtocol.metricGlossary,
292
+ metricSourcePapers: evalProtocol.metricSourcePapers,
293
+ metricImplementationSource: evalProtocol.metricImplementationSource,
294
+ comparisonSourcePapers: evalProtocol.comparisonSourcePapers,
295
+ comparisonImplementationSource: evalProtocol.comparisonImplementationSource,
296
+ deviationFromOriginalImplementation: evalProtocol.deviationFromOriginalImplementation,
297
+ terminalGoalType: mode.terminalGoalType,
298
+ terminalGoalTarget: mode.terminalGoalTarget,
299
+ requiredTerminalArtifact: mode.requiredTerminalArtifact,
300
+ status: "failed",
301
+ goalReached: false,
302
+ stopReason: message,
303
+ promotionApplied,
304
+ finalArtifact: "",
305
+ finalRung,
306
+ executedStages: executedStages.join(", "),
307
+ iterationsCompleted: String(iterationsCompleted),
308
+ startedAt,
309
+ finishedAt: new Date().toISOString(),
310
+ },
311
+ { lang }
312
+ );
313
+ refreshContext({ targetDir });
314
+ throw new Error(message);
315
+ };
316
+
317
+ const runStage = async ({
318
+ stage,
319
+ commandOverride = "",
320
+ rungId = "",
321
+ watchTarget = "",
322
+ nextRung = "",
323
+ }) => {
324
+ const command = resolveStageCommand(mode, stage, commandOverride);
325
+ if (!isMeaningful(command)) {
326
+ throw new Error(`missing auto command for stage: ${stage}`);
327
+ }
328
+
329
+ let stageCompleted = false;
330
+ while (!stageCompleted) {
331
+ try {
332
+ const contract = stageContractSnapshot(targetDir, stage);
333
+ await runCommandWithPolling({
334
+ targetDir,
335
+ stage,
336
+ command,
337
+ pollIntervalMs,
338
+ deadlineMs,
339
+ startedAt,
340
+ status: currentStatus,
341
+ lang,
342
+ rungId,
343
+ watchTarget,
344
+ nextRung,
345
+ });
346
+ verifyStageContract({ stage, snapshot: contract.snapshot });
347
+ executedStages.push(stage);
348
+ finalRung = rungId || finalRung;
349
+ writeRunningStatus({
350
+ currentStage: stage,
351
+ currentCommand: command,
352
+ currentRung: rungId || currentStatus.currentRung,
353
+ watchTarget,
354
+ nextRung,
355
+ decision: rungId ? `completed rung ${rungId}` : `completed stage ${stage}`,
356
+ });
357
+ refreshContext({ targetDir });
358
+
359
+ const frozenCoreChanges = detectFrozenCoreChanges(frozenCoreSnapshot);
360
+ if (frozenCoreChanges.length > 0) {
361
+ failAutoMode(`frozen core changed: ${frozenCoreChanges.join(", ")}`);
362
+ }
363
+
364
+ const stopCheck = await runCheckCommand({
365
+ targetDir,
366
+ label: `stop check after ${rungId || stage}`,
367
+ command: mode.stopCheckCommand,
368
+ deadlineMs,
369
+ });
370
+ if (stopCheck.matched) {
371
+ stopMatched = true;
372
+ stopReason =
373
+ stopCheck.stdout || stopCheck.stderr || `stop condition matched after ${rungId || stage}`;
374
+ }
375
+
376
+ stageCompleted = true;
377
+ } catch (error) {
378
+ failureCount += 1;
379
+ if (failureCount > maxFailures) {
380
+ failAutoMode(error.message);
381
+ }
382
+ writeRunningStatus({
383
+ currentStage: stage,
384
+ currentCommand: command,
385
+ currentRung: rungId || currentStatus.currentRung,
386
+ watchTarget,
387
+ nextRung,
388
+ decision: `retrying ${rungId || stage} after failure ${failureCount}`,
389
+ });
390
+ }
391
+ }
392
+ };
393
+
394
+ const maybeApplyPromotion = async (label) => {
395
+ if (promotionApplied || !isMeaningful(mode.promotionCheckCommand)) {
396
+ return;
397
+ }
398
+ const promotionCheck = await runCheckCommand({
399
+ targetDir,
400
+ label: `promotion check after ${label}`,
401
+ command: mode.promotionCheckCommand,
402
+ deadlineMs,
403
+ });
404
+ if (!promotionCheck.matched) {
405
+ return;
406
+ }
407
+ const promotionSnapshot = snapshotPaths(targetDir, [
408
+ ".lab/context/data-decisions.md",
409
+ ".lab/context/decisions.md",
410
+ ".lab/context/state.md",
411
+ ".lab/context/session-brief.md",
412
+ ]);
413
+ await runCommandWithPolling({
414
+ targetDir,
415
+ stage: "promotion",
416
+ command: mode.promotionCommand,
417
+ pollIntervalMs,
418
+ deadlineMs,
419
+ startedAt,
420
+ status: currentStatus,
421
+ lang,
422
+ rungId: currentStatus.currentRung || "",
423
+ watchTarget: currentStatus.watchTarget || "",
424
+ nextRung: currentStatus.nextRung || "",
425
+ });
426
+ writeRunningStatus({
427
+ currentCommand: mode.promotionCommand,
428
+ decision: `promotion policy matched after ${label}`,
429
+ });
430
+ promotionApplied = true;
431
+ refreshContext({ targetDir });
432
+ verifyPromotionWriteback(targetDir, promotionSnapshot);
433
+ const frozenCoreChangesAfterPromotion = detectFrozenCoreChanges(frozenCoreSnapshot);
434
+ if (frozenCoreChangesAfterPromotion.length > 0) {
435
+ failAutoMode(`frozen core changed: ${frozenCoreChangesAfterPromotion.join(", ")}`);
436
+ }
437
+ };
438
+
439
+ if (evalProtocol.experimentRungs.length > 0) {
440
+ const rungMap = new Map(evalProtocol.experimentRungs.map((rung) => [rung.id, rung]));
441
+ let currentRung = evalProtocol.experimentRungs[0];
442
+
443
+ while (currentRung && iterationsCompleted < Math.max(1, maxIterations)) {
444
+ if (!mode.allowedStages.includes(currentRung.stage)) {
445
+ failAutoMode(`experiment ladder rung ${currentRung.id} uses stage outside allowed stages: ${currentRung.stage}`);
446
+ }
447
+ iterationsCompleted += 1;
448
+ const command = resolveStageCommand(mode, currentRung.stage, currentRung.command);
449
+ writeRunningStatus({
450
+ currentStage: currentRung.stage,
451
+ currentCommand: command,
452
+ iterationCount: String(iterationsCompleted),
453
+ currentRung: currentRung.id,
454
+ watchTarget: currentRung.watch,
455
+ nextRung: "",
456
+ decision: `starting rung ${currentRung.id}`,
457
+ });
458
+
459
+ await runStage({
460
+ stage: currentRung.stage,
461
+ commandOverride: currentRung.command,
462
+ rungId: currentRung.id,
463
+ watchTarget: currentRung.watch,
464
+ });
465
+
466
+ if (stopMatched) {
467
+ const stopTransition = normalizeTransition(currentRung.onStop);
468
+ if (isMeaningful(stopTransition) && !isStopTransition(stopTransition)) {
469
+ if (!rungMap.has(stopTransition)) {
470
+ failAutoMode(`experiment ladder references unknown stop rung: ${stopTransition}`);
471
+ }
472
+ currentRung = rungMap.get(stopTransition);
473
+ stopMatched = false;
474
+ stopReason = "";
475
+ continue;
476
+ }
477
+ stopReason = stopReason || `stop condition matched during rung ${currentRung.id}`;
478
+ break;
479
+ }
480
+
481
+ const gateResult = await runCheckCommand({
482
+ targetDir,
483
+ label: `gate for rung ${currentRung.id}`,
484
+ command: currentRung.gate,
485
+ deadlineMs,
486
+ });
487
+ const transition = normalizeTransition(gateResult.matched ? currentRung.onPass : currentRung.onFail);
488
+ if (!isMeaningful(transition)) {
489
+ failAutoMode(`experiment ladder rung ${currentRung.id} has no ${gateResult.matched ? "pass" : "fail"} transition`);
490
+ }
491
+
492
+ writeRunningStatus({
493
+ currentStage: currentRung.stage,
494
+ currentCommand: command,
495
+ currentRung: currentRung.id,
496
+ watchTarget: currentRung.watch,
497
+ nextRung: transition,
498
+ decision: gateResult.matched
499
+ ? `rung ${currentRung.id} passed gate`
500
+ : `rung ${currentRung.id} failed gate`,
501
+ });
502
+
503
+ await maybeApplyPromotion(`rung ${currentRung.id}`);
504
+
505
+ if (isSuccessTransition(transition)) {
506
+ if (mode.terminalGoalType === "task-completion") {
507
+ successReached = true;
508
+ stopReason = `success terminal rung reached: ${currentRung.id}`;
509
+ break;
510
+ }
511
+ const goalEvaluation = await evaluateTerminalGoal({
512
+ mode,
513
+ iteration: iterationsCompleted,
514
+ targetDir,
515
+ deadlineMs,
516
+ });
517
+ if (!goalEvaluation.matched) {
518
+ failAutoMode(
519
+ `experiment ladder reached success terminal at ${currentRung.id} before terminal goal matched`
520
+ );
521
+ }
522
+ successReached = true;
523
+ stopReason = goalEvaluation.reason || `success terminal rung reached: ${currentRung.id}`;
524
+ break;
525
+ }
526
+
527
+ if (isStopTransition(transition)) {
528
+ stopMatched = true;
529
+ stopReason = `stop terminal rung reached: ${currentRung.id}`;
530
+ break;
531
+ }
532
+
533
+ if (!rungMap.has(transition)) {
534
+ failAutoMode(`experiment ladder references unknown next rung: ${transition}`);
535
+ }
536
+ currentRung = rungMap.get(transition);
537
+ }
538
+
539
+ if (!successReached && !stopMatched && iterationsCompleted >= Math.max(1, maxIterations)) {
540
+ failAutoMode(`max iterations exhausted before completing experiment ladder: ${maxIterations}`);
541
+ }
542
+ } else {
543
+ const stagesPerIteration = loopStages.length > 0 ? loopStages : mode.allowedStages;
544
+
545
+ for (let iteration = 1; iteration <= Math.max(1, maxIterations); iteration += 1) {
546
+ writeRunningStatus({
547
+ currentStage: stagesPerIteration[0] || currentStatus.currentStage,
548
+ currentCommand: mode.stageCommands[stagesPerIteration[0]] || "",
549
+ iterationCount: String(iteration),
550
+ decision: `starting iteration ${iteration}`,
551
+ });
552
+
553
+ for (const stage of stagesPerIteration) {
554
+ await runStage({ stage });
555
+ if (stopMatched || successReached) {
556
+ break;
557
+ }
558
+ }
559
+
560
+ iterationsCompleted = iteration;
561
+
562
+ if (stopMatched || successReached) {
563
+ break;
564
+ }
565
+
566
+ await maybeApplyPromotion(`iteration ${iteration}`);
567
+
568
+ const goalEvaluation = await evaluateTerminalGoal({
569
+ mode,
570
+ iteration,
571
+ targetDir,
572
+ deadlineMs,
573
+ });
574
+ if (goalEvaluation.matched) {
575
+ successReached = true;
576
+ stopReason = goalEvaluation.reason || "completed configured auto goal";
577
+ break;
578
+ }
579
+ }
580
+
581
+ if (!successReached && loopStages.length > 0 && iterationsCompleted >= Math.max(1, maxIterations)) {
582
+ failAutoMode(`max iterations exhausted without meeting success criteria: ${maxIterations}`);
583
+ }
584
+ }
585
+
586
+ if (stopMatched) {
587
+ currentStatus = {
588
+ ...currentStatus,
589
+ status: "stopped",
590
+ currentStage: executedStages.at(-1) || currentStatus.currentStage,
591
+ currentCommand: currentStatus.currentCommand,
592
+ lastHeartbeat: new Date().toISOString(),
593
+ decision: stopReason || "stopped by stop condition",
594
+ };
595
+ writeAutoStatus(targetDir, currentStatus, { lang });
596
+ writeAutoOutcome(
597
+ targetDir,
598
+ {
599
+ objective: mode.objective,
600
+ experimentLadder: evalProtocol.experimentLadder,
601
+ metricGlossary: evalProtocol.metricGlossary,
602
+ terminalGoalType: mode.terminalGoalType,
603
+ terminalGoalTarget: mode.terminalGoalTarget,
604
+ requiredTerminalArtifact: mode.requiredTerminalArtifact,
605
+ status: "stopped",
606
+ goalReached: false,
607
+ stopReason: stopReason || "stopped by stop condition",
608
+ promotionApplied,
609
+ finalArtifact: requiredArtifact.relativePath,
610
+ finalRung,
611
+ executedStages: executedStages.join(", "),
612
+ iterationsCompleted: String(iterationsCompleted),
613
+ startedAt,
614
+ finishedAt: new Date().toISOString(),
615
+ },
616
+ { lang }
617
+ );
618
+ refreshContext({ targetDir });
619
+ return {
620
+ mode,
621
+ status: currentStatus,
622
+ executedStages,
623
+ outcome: {
624
+ experimentLadder: evalProtocol.experimentLadder,
625
+ metricGlossary: evalProtocol.metricGlossary,
626
+ metricSourcePapers: evalProtocol.metricSourcePapers,
627
+ metricImplementationSource: evalProtocol.metricImplementationSource,
628
+ comparisonSourcePapers: evalProtocol.comparisonSourcePapers,
629
+ comparisonImplementationSource: evalProtocol.comparisonImplementationSource,
630
+ deviationFromOriginalImplementation: evalProtocol.deviationFromOriginalImplementation,
631
+ goalType: mode.terminalGoalType,
632
+ goalTarget: mode.terminalGoalTarget,
633
+ goalReached: false,
634
+ stopReason: stopReason || "stopped by stop condition",
635
+ promotionApplied,
636
+ finalArtifact: requiredArtifact.relativePath,
637
+ finalRung,
638
+ },
639
+ };
640
+ }
641
+
642
+ if (evalProtocol.experimentRungs.length === 0) {
643
+ for (const stage of finalStages) {
644
+ await runStage({ stage });
645
+ if (stopMatched) {
646
+ currentStatus = {
647
+ ...currentStatus,
648
+ status: "stopped",
649
+ currentStage: executedStages.at(-1) || currentStatus.currentStage,
650
+ currentCommand: currentStatus.currentCommand,
651
+ lastHeartbeat: new Date().toISOString(),
652
+ decision: stopReason || "stopped by stop condition",
653
+ };
654
+ writeAutoStatus(targetDir, currentStatus, { lang });
655
+ writeAutoOutcome(
656
+ targetDir,
657
+ {
658
+ objective: mode.objective,
659
+ experimentLadder: evalProtocol.experimentLadder,
660
+ metricGlossary: evalProtocol.metricGlossary,
661
+ metricSourcePapers: evalProtocol.metricSourcePapers,
662
+ metricImplementationSource: evalProtocol.metricImplementationSource,
663
+ comparisonSourcePapers: evalProtocol.comparisonSourcePapers,
664
+ comparisonImplementationSource: evalProtocol.comparisonImplementationSource,
665
+ deviationFromOriginalImplementation: evalProtocol.deviationFromOriginalImplementation,
666
+ terminalGoalType: mode.terminalGoalType,
667
+ terminalGoalTarget: mode.terminalGoalTarget,
668
+ requiredTerminalArtifact: mode.requiredTerminalArtifact,
669
+ status: "stopped",
670
+ goalReached: false,
671
+ stopReason: stopReason || "stopped by stop condition",
672
+ promotionApplied,
673
+ finalArtifact: requiredArtifact.relativePath,
674
+ finalRung,
675
+ executedStages: executedStages.join(", "),
676
+ iterationsCompleted: String(iterationsCompleted),
677
+ startedAt,
678
+ finishedAt: new Date().toISOString(),
679
+ },
680
+ { lang }
681
+ );
682
+ refreshContext({ targetDir });
683
+ return {
684
+ mode,
685
+ status: currentStatus,
686
+ executedStages,
687
+ outcome: {
688
+ experimentLadder: evalProtocol.experimentLadder,
689
+ metricGlossary: evalProtocol.metricGlossary,
690
+ metricSourcePapers: evalProtocol.metricSourcePapers,
691
+ metricImplementationSource: evalProtocol.metricImplementationSource,
692
+ comparisonSourcePapers: evalProtocol.comparisonSourcePapers,
693
+ comparisonImplementationSource: evalProtocol.comparisonImplementationSource,
694
+ deviationFromOriginalImplementation: evalProtocol.deviationFromOriginalImplementation,
695
+ goalType: mode.terminalGoalType,
696
+ goalTarget: mode.terminalGoalTarget,
697
+ goalReached: false,
698
+ stopReason: stopReason || "stopped by stop condition",
699
+ promotionApplied,
700
+ finalArtifact: requiredArtifact.relativePath,
701
+ finalRung,
702
+ },
703
+ };
704
+ }
705
+ }
706
+ }
707
+
708
+ if (!fs.existsSync(requiredArtifact.absolutePath)) {
709
+ failAutoMode(`required terminal artifact missing: ${mode.requiredTerminalArtifact}`);
710
+ }
711
+
712
+ currentStatus = {
713
+ ...currentStatus,
714
+ status: "completed",
715
+ currentStage: executedStages.at(-1) || currentStatus.currentStage,
716
+ currentCommand: currentStatus.currentCommand,
717
+ lastHeartbeat: new Date().toISOString(),
718
+ decision: successReached ? "completed configured auto goal" : "completed configured stages",
719
+ };
720
+ writeAutoStatus(targetDir, currentStatus, { lang });
721
+ writeAutoOutcome(
722
+ targetDir,
723
+ {
724
+ objective: mode.objective,
725
+ experimentLadder: evalProtocol.experimentLadder,
726
+ metricGlossary: evalProtocol.metricGlossary,
727
+ metricSourcePapers: evalProtocol.metricSourcePapers,
728
+ metricImplementationSource: evalProtocol.metricImplementationSource,
729
+ comparisonSourcePapers: evalProtocol.comparisonSourcePapers,
730
+ comparisonImplementationSource: evalProtocol.comparisonImplementationSource,
731
+ deviationFromOriginalImplementation: evalProtocol.deviationFromOriginalImplementation,
732
+ terminalGoalType: mode.terminalGoalType,
733
+ terminalGoalTarget: mode.terminalGoalTarget,
734
+ requiredTerminalArtifact: mode.requiredTerminalArtifact,
735
+ status: "completed",
736
+ goalReached: successReached,
737
+ stopReason: stopReason || "completed configured auto goal",
738
+ promotionApplied,
739
+ finalArtifact: requiredArtifact.relativePath,
740
+ finalRung,
741
+ executedStages: executedStages.join(", "),
742
+ iterationsCompleted: String(iterationsCompleted),
743
+ startedAt,
744
+ finishedAt: new Date().toISOString(),
745
+ },
746
+ { lang }
747
+ );
748
+ refreshContext({ targetDir });
749
+ return {
750
+ mode,
751
+ status: currentStatus,
752
+ executedStages,
753
+ outcome: {
754
+ experimentLadder: evalProtocol.experimentLadder,
755
+ metricGlossary: evalProtocol.metricGlossary,
756
+ metricSourcePapers: evalProtocol.metricSourcePapers,
757
+ metricImplementationSource: evalProtocol.metricImplementationSource,
758
+ comparisonSourcePapers: evalProtocol.comparisonSourcePapers,
759
+ comparisonImplementationSource: evalProtocol.comparisonImplementationSource,
760
+ deviationFromOriginalImplementation: evalProtocol.deviationFromOriginalImplementation,
761
+ goalType: mode.terminalGoalType,
762
+ goalTarget: mode.terminalGoalTarget,
763
+ goalReached: successReached,
764
+ stopReason: stopReason || "completed configured auto goal",
765
+ promotionApplied,
766
+ finalArtifact: requiredArtifact.relativePath,
767
+ finalRung,
768
+ },
769
+ };
770
+ }
771
+
772
+ function stopAutoMode({ targetDir, now = new Date() }) {
773
+ const existing = parseAutoStatus(targetDir);
774
+ const mode = parseAutoMode(targetDir);
775
+ const evalProtocol = parseEvalProtocol(targetDir);
776
+ const lang = readWorkflowLanguage(targetDir);
777
+ const status = {
778
+ ...existing,
779
+ status: "stopped",
780
+ lastHeartbeat: now.toISOString(),
781
+ decision: "stopped by operator",
782
+ };
783
+ writeAutoStatus(targetDir, status, { lang });
784
+ writeAutoOutcome(
785
+ targetDir,
786
+ {
787
+ objective: mode.objective,
788
+ experimentLadder: evalProtocol.experimentLadder,
789
+ metricGlossary: evalProtocol.metricGlossary,
790
+ metricSourcePapers: evalProtocol.metricSourcePapers,
791
+ metricImplementationSource: evalProtocol.metricImplementationSource,
792
+ comparisonSourcePapers: evalProtocol.comparisonSourcePapers,
793
+ comparisonImplementationSource: evalProtocol.comparisonImplementationSource,
794
+ deviationFromOriginalImplementation: evalProtocol.deviationFromOriginalImplementation,
795
+ terminalGoalType: mode.terminalGoalType,
796
+ terminalGoalTarget: mode.terminalGoalTarget,
797
+ requiredTerminalArtifact: mode.requiredTerminalArtifact,
798
+ status: "stopped",
799
+ goalReached: false,
800
+ stopReason: "stopped by operator",
801
+ promotionApplied: false,
802
+ finalArtifact: mode.requiredTerminalArtifact || "",
803
+ finalRung: existing.currentRung || "",
804
+ executedStages: "",
805
+ iterationsCompleted: existing.iterationCount || "0",
806
+ startedAt: existing.startedAt || now.toISOString(),
807
+ finishedAt: now.toISOString(),
808
+ },
809
+ { lang }
810
+ );
811
+ refreshContext({ targetDir });
812
+ return status;
813
+ }
814
+
815
+ function getAutoStatus({ targetDir }) {
816
+ const mode = parseAutoMode(targetDir);
817
+ const status = parseAutoStatus(targetDir);
818
+ const evalProtocol = parseEvalProtocol(targetDir);
819
+ return {
820
+ mode,
821
+ status,
822
+ issues: validateAutoMode(mode, status, evalProtocol).concat(validateAutoStatus(status, mode)),
823
+ };
824
+ }
825
+
826
+ module.exports = {
827
+ getAutoStatus,
828
+ startAutoMode,
829
+ stopAutoMode,
830
+ };