superlab 0.1.42 → 0.1.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/auto_contracts.cjs +5 -0
- package/lib/auto_runner.cjs +271 -8
- package/lib/auto_state.cjs +140 -0
- package/lib/i18n.cjs +53 -2
- package/lib/install.cjs +65 -0
- package/package-assets/claude/commands/lab-auto.md +1 -1
- package/package-assets/claude/commands/lab.md +18 -2
- package/package-assets/codex/prompts/lab-auto.md +1 -1
- package/package-assets/codex/prompts/lab.md +19 -3
- package/package-assets/shared/lab/context/auto-ledger.md +35 -0
- package/package-assets/shared/lab/context/auto-mode.md +2 -0
- package/package-assets/shared/skills/lab/SKILL.md +3 -2
- package/package-assets/shared/skills/lab/stages/auto.md +9 -0
- package/package.json +1 -1
package/lib/auto_contracts.cjs
CHANGED
|
@@ -134,6 +134,11 @@ function validateAutoMode(mode, status = null, evalProtocol = null) {
|
|
|
134
134
|
["terminal goal type", mode.terminalGoalType],
|
|
135
135
|
["terminal goal target", mode.terminalGoalTarget],
|
|
136
136
|
["required terminal artifact", mode.requiredTerminalArtifact],
|
|
137
|
+
["primary gate", mode.primaryGate],
|
|
138
|
+
["secondary guard", mode.secondaryGuard],
|
|
139
|
+
["promotion condition", mode.promotionCondition],
|
|
140
|
+
["stop reason", mode.stopReason],
|
|
141
|
+
["escalation reason", mode.escalationReason],
|
|
137
142
|
["max iterations", mode.maxIterations],
|
|
138
143
|
["max wall-clock time", mode.maxWallClockTime],
|
|
139
144
|
["max failures", mode.maxFailures],
|
package/lib/auto_runner.cjs
CHANGED
|
@@ -21,10 +21,13 @@ const {
|
|
|
21
21
|
verifyStageContract,
|
|
22
22
|
} = require("./auto_contracts.cjs");
|
|
23
23
|
const {
|
|
24
|
+
listMissingCurrentAutoModeFields,
|
|
25
|
+
parseAutoLedger,
|
|
24
26
|
parseAutoMode,
|
|
25
27
|
parseAutoStatus,
|
|
26
28
|
readWorkflowLanguage,
|
|
27
29
|
resolveRequiredArtifact,
|
|
30
|
+
writeAutoLedger,
|
|
28
31
|
writeAutoOutcome,
|
|
29
32
|
writeAutoStatus,
|
|
30
33
|
} = require("./auto_state.cjs");
|
|
@@ -33,6 +36,10 @@ function normalizeTransition(value) {
|
|
|
33
36
|
return (value || "").trim();
|
|
34
37
|
}
|
|
35
38
|
|
|
39
|
+
function normalizeObservedState(value) {
|
|
40
|
+
return (value || "").trim().toLowerCase();
|
|
41
|
+
}
|
|
42
|
+
|
|
36
43
|
function isSuccessTransition(value) {
|
|
37
44
|
return ["success", "terminal-success", "campaign-success"].includes((value || "").trim().toLowerCase());
|
|
38
45
|
}
|
|
@@ -41,6 +48,101 @@ function isStopTransition(value) {
|
|
|
41
48
|
return ["stop", "campaign-stop", "terminal-stop"].includes((value || "").trim().toLowerCase());
|
|
42
49
|
}
|
|
43
50
|
|
|
51
|
+
function isLocalProcessAlive(ownerId) {
|
|
52
|
+
const pid = parseInteger(ownerId, null);
|
|
53
|
+
if (!Number.isInteger(pid) || pid <= 0) {
|
|
54
|
+
return false;
|
|
55
|
+
}
|
|
56
|
+
try {
|
|
57
|
+
process.kill(pid, 0);
|
|
58
|
+
return true;
|
|
59
|
+
} catch (error) {
|
|
60
|
+
if (error && error.code === "EPERM") {
|
|
61
|
+
return true;
|
|
62
|
+
}
|
|
63
|
+
return false;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function resolveResumePlan({ mode, evalProtocol, status, ledger, now }) {
|
|
68
|
+
const hasLedgerState = [
|
|
69
|
+
ledger.campaignId,
|
|
70
|
+
ledger.observedState,
|
|
71
|
+
ledger.activeRung,
|
|
72
|
+
ledger.nextTransition,
|
|
73
|
+
ledger.ownerId,
|
|
74
|
+
].some((value) => isMeaningful(value));
|
|
75
|
+
if (!hasLedgerState) {
|
|
76
|
+
return { blockingIssue: "", resumePlan: null };
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
if ((ledger.ownerType || "").trim().toLowerCase() === "local-process" && isLocalProcessAlive(ledger.ownerId)) {
|
|
80
|
+
return {
|
|
81
|
+
blockingIssue: `auto campaign already has a live local owner: ${ledger.ownerId}`,
|
|
82
|
+
resumePlan: null,
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
const startedAt = isMeaningful(status.startedAt) ? status.startedAt : now.toISOString();
|
|
87
|
+
const campaignId = isMeaningful(ledger.campaignId)
|
|
88
|
+
? ledger.campaignId
|
|
89
|
+
: `auto-${startedAt.replace(/[:.]/g, "-")}`;
|
|
90
|
+
const iterationCount = parseInteger(status.iterationCount, 0);
|
|
91
|
+
const observedState = normalizeObservedState(ledger.observedState);
|
|
92
|
+
|
|
93
|
+
if (evalProtocol.experimentRungs.length > 0) {
|
|
94
|
+
const rungMap = new Map(evalProtocol.experimentRungs.map((rung) => [rung.id, rung]));
|
|
95
|
+
const nextTransition = normalizeTransition(ledger.nextTransition || status.nextRung);
|
|
96
|
+
if (
|
|
97
|
+
isMeaningful(nextTransition) &&
|
|
98
|
+
!isSuccessTransition(nextTransition) &&
|
|
99
|
+
!isStopTransition(nextTransition) &&
|
|
100
|
+
rungMap.has(nextTransition)
|
|
101
|
+
) {
|
|
102
|
+
const rung = rungMap.get(nextTransition);
|
|
103
|
+
return {
|
|
104
|
+
blockingIssue: "",
|
|
105
|
+
resumePlan: {
|
|
106
|
+
kind: "ladder",
|
|
107
|
+
rungId: nextTransition,
|
|
108
|
+
stage: rung.stage,
|
|
109
|
+
watchTarget: rung.watch,
|
|
110
|
+
campaignId,
|
|
111
|
+
startedAt,
|
|
112
|
+
iterationsCompleted: iterationCount,
|
|
113
|
+
lastCheckpoint: ledger.lastCheckpoint || status.lastCheckpoint || "",
|
|
114
|
+
reason: `resuming at next rung ${nextTransition}`,
|
|
115
|
+
},
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
const activeRung = normalizeTransition(ledger.activeRung || status.currentRung);
|
|
120
|
+
if (
|
|
121
|
+
["running", "retrying"].includes(observedState) &&
|
|
122
|
+
isMeaningful(activeRung) &&
|
|
123
|
+
rungMap.has(activeRung)
|
|
124
|
+
) {
|
|
125
|
+
const rung = rungMap.get(activeRung);
|
|
126
|
+
return {
|
|
127
|
+
blockingIssue: "",
|
|
128
|
+
resumePlan: {
|
|
129
|
+
kind: "ladder",
|
|
130
|
+
rungId: activeRung,
|
|
131
|
+
stage: rung.stage,
|
|
132
|
+
watchTarget: rung.watch,
|
|
133
|
+
campaignId,
|
|
134
|
+
startedAt,
|
|
135
|
+
iterationsCompleted: Math.max(0, iterationCount - 1),
|
|
136
|
+
lastCheckpoint: ledger.lastCheckpoint || status.lastCheckpoint || "",
|
|
137
|
+
reason: `restarting active rung ${activeRung} after owner exit`,
|
|
138
|
+
},
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
return { blockingIssue: "", resumePlan: null };
|
|
144
|
+
}
|
|
145
|
+
|
|
44
146
|
async function runCommandWithPolling({
|
|
45
147
|
targetDir,
|
|
46
148
|
stage,
|
|
@@ -53,6 +155,8 @@ async function runCommandWithPolling({
|
|
|
53
155
|
rungId = "",
|
|
54
156
|
watchTarget = "",
|
|
55
157
|
nextRung = "",
|
|
158
|
+
ownerInfo = null,
|
|
159
|
+
updateLedger = null,
|
|
56
160
|
}) {
|
|
57
161
|
const child = spawn(command, {
|
|
58
162
|
cwd: targetDir,
|
|
@@ -106,6 +210,20 @@ async function runCommandWithPolling({
|
|
|
106
210
|
},
|
|
107
211
|
{ lang }
|
|
108
212
|
);
|
|
213
|
+
if (typeof updateLedger === "function") {
|
|
214
|
+
updateLedger({
|
|
215
|
+
ownerType: ownerInfo?.ownerType || "local-process",
|
|
216
|
+
ownerId: String(child.pid || ownerInfo?.ownerId || ""),
|
|
217
|
+
command,
|
|
218
|
+
watchTarget,
|
|
219
|
+
activeStage: stage,
|
|
220
|
+
activeRung: rungId,
|
|
221
|
+
startedAt,
|
|
222
|
+
lastObservedAt: new Date().toISOString(),
|
|
223
|
+
observedState: "running",
|
|
224
|
+
nextTransition: nextRung || "",
|
|
225
|
+
});
|
|
226
|
+
}
|
|
109
227
|
await sleep(pollIntervalMs);
|
|
110
228
|
}
|
|
111
229
|
|
|
@@ -217,7 +335,15 @@ async function evaluateTerminalGoal({ mode, iteration, targetDir, deadlineMs })
|
|
|
217
335
|
|
|
218
336
|
async function startAutoMode({ targetDir, now = new Date() }) {
|
|
219
337
|
const mode = parseAutoMode(targetDir);
|
|
338
|
+
const existingStatus = parseAutoStatus(targetDir);
|
|
339
|
+
const existingLedger = parseAutoLedger(targetDir);
|
|
220
340
|
const evalProtocol = parseEvalProtocol(targetDir);
|
|
341
|
+
const missingSchemaFields = listMissingCurrentAutoModeFields(mode);
|
|
342
|
+
if (missingSchemaFields.length > 0) {
|
|
343
|
+
throw new Error(
|
|
344
|
+
`auto-mode.md is missing current contract fields: ${missingSchemaFields.join(", ")}; run \`superlab update --target ${targetDir}\` to apply the managed schema migration, then fill the new fields before starting auto mode`
|
|
345
|
+
);
|
|
346
|
+
}
|
|
221
347
|
const issues = validateAutoMode(mode, null, evalProtocol);
|
|
222
348
|
if (issues.length > 0) {
|
|
223
349
|
throw new Error(issues.join(" | "));
|
|
@@ -229,20 +355,30 @@ async function startAutoMode({ targetDir, now = new Date() }) {
|
|
|
229
355
|
if (mode.approvalStatus !== "approved") {
|
|
230
356
|
throw new Error(`approval status must be approved before auto mode can start (current: ${mode.approvalStatus || "missing"})`);
|
|
231
357
|
}
|
|
358
|
+
const { blockingIssue, resumePlan } = resolveResumePlan({
|
|
359
|
+
mode,
|
|
360
|
+
evalProtocol,
|
|
361
|
+
status: existingStatus,
|
|
362
|
+
ledger: existingLedger,
|
|
363
|
+
now,
|
|
364
|
+
});
|
|
365
|
+
if (blockingIssue) {
|
|
366
|
+
throw new Error(blockingIssue);
|
|
367
|
+
}
|
|
232
368
|
|
|
233
369
|
const lang = readWorkflowLanguage(targetDir);
|
|
234
370
|
const timestamp = now.toISOString();
|
|
235
371
|
const status = {
|
|
236
372
|
status: "running",
|
|
237
|
-
currentStage: mode.allowedStages[0] || "run",
|
|
373
|
+
currentStage: resumePlan?.stage || mode.allowedStages[0] || "run",
|
|
238
374
|
currentCommand: "",
|
|
239
375
|
activeRunId: "",
|
|
240
|
-
iterationCount:
|
|
241
|
-
startedAt: timestamp,
|
|
376
|
+
iterationCount: String(resumePlan?.iterationsCompleted || 0),
|
|
377
|
+
startedAt: resumePlan?.startedAt || timestamp,
|
|
242
378
|
lastHeartbeat: timestamp,
|
|
243
|
-
lastCheckpoint: "",
|
|
244
|
-
lastSummary: "",
|
|
245
|
-
decision: "armed for bounded auto orchestration",
|
|
379
|
+
lastCheckpoint: resumePlan?.lastCheckpoint || "",
|
|
380
|
+
lastSummary: resumePlan?.reason || "",
|
|
381
|
+
decision: resumePlan?.reason || "armed for bounded auto orchestration",
|
|
246
382
|
};
|
|
247
383
|
writeAutoStatus(targetDir, status, { lang });
|
|
248
384
|
|
|
@@ -257,13 +393,36 @@ async function startAutoMode({ targetDir, now = new Date() }) {
|
|
|
257
393
|
const { loopStages, finalStages } = splitAutoStages(mode.allowedStages);
|
|
258
394
|
const executedStages = [];
|
|
259
395
|
let failureCount = 0;
|
|
260
|
-
let iterationsCompleted = 0;
|
|
396
|
+
let iterationsCompleted = resumePlan?.iterationsCompleted || 0;
|
|
261
397
|
let currentStatus = { ...status };
|
|
262
398
|
let successReached = false;
|
|
263
399
|
let stopMatched = false;
|
|
264
400
|
let promotionApplied = false;
|
|
265
401
|
let stopReason = "";
|
|
266
402
|
let finalRung = "";
|
|
403
|
+
const campaignId = resumePlan?.campaignId || `auto-${startedAt.replace(/[:.]/g, "-")}`;
|
|
404
|
+
let currentLedger = {
|
|
405
|
+
campaignId,
|
|
406
|
+
objective: mode.objective,
|
|
407
|
+
activeStage: status.currentStage,
|
|
408
|
+
activeRung: resumePlan?.rungId || "",
|
|
409
|
+
ownerType: "",
|
|
410
|
+
ownerId: "",
|
|
411
|
+
command: "",
|
|
412
|
+
watchTarget: resumePlan?.watchTarget || "",
|
|
413
|
+
startedAt,
|
|
414
|
+
lastObservedAt: timestamp,
|
|
415
|
+
observedState: resumePlan ? "resuming" : "armed",
|
|
416
|
+
lastCheckpoint: resumePlan?.lastCheckpoint || "",
|
|
417
|
+
checkpointSummary: resumePlan?.reason || "auto loop armed and waiting for the first owned command",
|
|
418
|
+
nextTransition: resumePlan?.rungId || "",
|
|
419
|
+
continueBoundary: "Continue while the active owner is still running and no stop condition has matched.",
|
|
420
|
+
stopBoundary: mode.stopConditions,
|
|
421
|
+
escalationBoundary: mode.escalationConditions,
|
|
422
|
+
requiredReadSet: ".lab/context/eval-protocol.md, .lab/context/auto-mode.md, .lab/context/auto-status.md, .lab/context/auto-ledger.md, .lab/context/auto-outcome.md",
|
|
423
|
+
resumeCommand: "",
|
|
424
|
+
};
|
|
425
|
+
writeAutoLedger(targetDir, currentLedger, { lang });
|
|
267
426
|
const outcomeProtocolFields = {
|
|
268
427
|
primaryMetrics: evalProtocol.primaryMetrics,
|
|
269
428
|
secondaryMetrics: evalProtocol.secondaryMetrics,
|
|
@@ -305,6 +464,22 @@ async function startAutoMode({ targetDir, now = new Date() }) {
|
|
|
305
464
|
writeAutoStatus(targetDir, currentStatus, { lang });
|
|
306
465
|
};
|
|
307
466
|
|
|
467
|
+
const writeLedger = (overrides = {}) => {
|
|
468
|
+
currentLedger = {
|
|
469
|
+
...currentLedger,
|
|
470
|
+
activeStage: currentStatus.currentStage || currentLedger.activeStage,
|
|
471
|
+
activeRung: currentStatus.currentRung || currentLedger.activeRung,
|
|
472
|
+
watchTarget: currentStatus.watchTarget || currentLedger.watchTarget,
|
|
473
|
+
lastCheckpoint: currentStatus.lastCheckpoint || currentLedger.lastCheckpoint,
|
|
474
|
+
checkpointSummary: currentStatus.lastSummary || currentLedger.checkpointSummary,
|
|
475
|
+
lastObservedAt: new Date().toISOString(),
|
|
476
|
+
stopBoundary: mode.stopConditions,
|
|
477
|
+
escalationBoundary: mode.escalationConditions,
|
|
478
|
+
...overrides,
|
|
479
|
+
};
|
|
480
|
+
writeAutoLedger(targetDir, currentLedger, { lang });
|
|
481
|
+
};
|
|
482
|
+
|
|
308
483
|
const failAutoMode = (message) => {
|
|
309
484
|
currentStatus = {
|
|
310
485
|
...currentStatus,
|
|
@@ -313,6 +488,13 @@ async function startAutoMode({ targetDir, now = new Date() }) {
|
|
|
313
488
|
decision: message,
|
|
314
489
|
};
|
|
315
490
|
writeAutoStatus(targetDir, currentStatus, { lang });
|
|
491
|
+
writeLedger({
|
|
492
|
+
observedState: "failed",
|
|
493
|
+
ownerType: currentLedger.ownerType || "local-process",
|
|
494
|
+
checkpointSummary: message,
|
|
495
|
+
nextTransition: "terminal-failure",
|
|
496
|
+
resumeCommand: "",
|
|
497
|
+
});
|
|
316
498
|
writeAutoOutcome(
|
|
317
499
|
targetDir,
|
|
318
500
|
{
|
|
@@ -366,6 +548,8 @@ async function startAutoMode({ targetDir, now = new Date() }) {
|
|
|
366
548
|
rungId,
|
|
367
549
|
watchTarget,
|
|
368
550
|
nextRung,
|
|
551
|
+
ownerInfo: { ownerType: "local-process" },
|
|
552
|
+
updateLedger: writeLedger,
|
|
369
553
|
});
|
|
370
554
|
verifyStageContract({ stage, snapshot: contract.snapshot });
|
|
371
555
|
executedStages.push(stage);
|
|
@@ -378,6 +562,18 @@ async function startAutoMode({ targetDir, now = new Date() }) {
|
|
|
378
562
|
nextRung,
|
|
379
563
|
decision: rungId ? `completed rung ${rungId}` : `completed stage ${stage}`,
|
|
380
564
|
});
|
|
565
|
+
writeLedger({
|
|
566
|
+
ownerType: "local-process",
|
|
567
|
+
observedState: "checkpointed",
|
|
568
|
+
command,
|
|
569
|
+
watchTarget,
|
|
570
|
+
activeStage: stage,
|
|
571
|
+
activeRung: rungId || currentStatus.currentRung,
|
|
572
|
+
ownerId: currentLedger.ownerId,
|
|
573
|
+
checkpointSummary: rungId ? `completed rung ${rungId}` : `completed stage ${stage}`,
|
|
574
|
+
nextTransition: nextRung || "",
|
|
575
|
+
resumeCommand: command,
|
|
576
|
+
});
|
|
381
577
|
const frozenCoreChanges = detectFrozenCoreChanges(frozenCoreSnapshot);
|
|
382
578
|
if (frozenCoreChanges.length > 0) {
|
|
383
579
|
failAutoMode(`frozen core changed: ${frozenCoreChanges.join(", ")}`);
|
|
@@ -412,6 +608,17 @@ async function startAutoMode({ targetDir, now = new Date() }) {
|
|
|
412
608
|
nextRung,
|
|
413
609
|
decision: `retrying ${rungId || stage} after failure ${failureCount}`,
|
|
414
610
|
});
|
|
611
|
+
writeLedger({
|
|
612
|
+
ownerType: "local-process",
|
|
613
|
+
observedState: "retrying",
|
|
614
|
+
command,
|
|
615
|
+
watchTarget,
|
|
616
|
+
activeStage: stage,
|
|
617
|
+
activeRung: rungId || currentStatus.currentRung,
|
|
618
|
+
checkpointSummary: `retrying ${rungId || stage} after failure ${failureCount}`,
|
|
619
|
+
nextTransition: rungId || stage,
|
|
620
|
+
resumeCommand: command,
|
|
621
|
+
});
|
|
415
622
|
}
|
|
416
623
|
}
|
|
417
624
|
};
|
|
@@ -451,6 +658,14 @@ async function startAutoMode({ targetDir, now = new Date() }) {
|
|
|
451
658
|
currentCommand: mode.promotionCommand,
|
|
452
659
|
decision: `promotion policy matched after ${label}`,
|
|
453
660
|
});
|
|
661
|
+
writeLedger({
|
|
662
|
+
ownerType: "local-process",
|
|
663
|
+
command: mode.promotionCommand,
|
|
664
|
+
observedState: "checkpointed",
|
|
665
|
+
checkpointSummary: `promotion policy matched after ${label}`,
|
|
666
|
+
nextTransition: "post-promotion refresh",
|
|
667
|
+
resumeCommand: mode.promotionCommand,
|
|
668
|
+
});
|
|
454
669
|
promotionApplied = true;
|
|
455
670
|
const frozenCoreChangesAfterPromotion = detectFrozenCoreChanges(frozenCoreSnapshot);
|
|
456
671
|
if (frozenCoreChangesAfterPromotion.length > 0) {
|
|
@@ -463,7 +678,12 @@ async function startAutoMode({ targetDir, now = new Date() }) {
|
|
|
463
678
|
|
|
464
679
|
if (evalProtocol.experimentRungs.length > 0) {
|
|
465
680
|
const rungMap = new Map(evalProtocol.experimentRungs.map((rung) => [rung.id, rung]));
|
|
466
|
-
let currentRung =
|
|
681
|
+
let currentRung = resumePlan?.kind === "ladder"
|
|
682
|
+
? rungMap.get(resumePlan.rungId)
|
|
683
|
+
: evalProtocol.experimentRungs[0];
|
|
684
|
+
if (!currentRung) {
|
|
685
|
+
failAutoMode(`resume rung is missing from the current experiment ladder: ${resumePlan?.rungId || ""}`);
|
|
686
|
+
}
|
|
467
687
|
|
|
468
688
|
while (currentRung && iterationsCompleted < Math.max(1, maxIterations)) {
|
|
469
689
|
if (!mode.allowedStages.includes(currentRung.stage)) {
|
|
@@ -618,6 +838,12 @@ async function startAutoMode({ targetDir, now = new Date() }) {
|
|
|
618
838
|
decision: stopReason || "stopped by stop condition",
|
|
619
839
|
};
|
|
620
840
|
writeAutoStatus(targetDir, currentStatus, { lang });
|
|
841
|
+
writeLedger({
|
|
842
|
+
observedState: "stopped",
|
|
843
|
+
checkpointSummary: stopReason || "stopped by stop condition",
|
|
844
|
+
nextTransition: "terminal-stop",
|
|
845
|
+
resumeCommand: "",
|
|
846
|
+
});
|
|
621
847
|
writeAutoOutcome(
|
|
622
848
|
targetDir,
|
|
623
849
|
{
|
|
@@ -670,6 +896,12 @@ async function startAutoMode({ targetDir, now = new Date() }) {
|
|
|
670
896
|
decision: stopReason || "stopped by stop condition",
|
|
671
897
|
};
|
|
672
898
|
writeAutoStatus(targetDir, currentStatus, { lang });
|
|
899
|
+
writeLedger({
|
|
900
|
+
observedState: "stopped",
|
|
901
|
+
checkpointSummary: stopReason || "stopped by stop condition",
|
|
902
|
+
nextTransition: "terminal-stop",
|
|
903
|
+
resumeCommand: "",
|
|
904
|
+
});
|
|
673
905
|
writeAutoOutcome(
|
|
674
906
|
targetDir,
|
|
675
907
|
{
|
|
@@ -724,6 +956,12 @@ async function startAutoMode({ targetDir, now = new Date() }) {
|
|
|
724
956
|
decision: successReached ? "completed configured auto goal" : "completed configured stages",
|
|
725
957
|
};
|
|
726
958
|
writeAutoStatus(targetDir, currentStatus, { lang });
|
|
959
|
+
writeLedger({
|
|
960
|
+
observedState: "completed",
|
|
961
|
+
checkpointSummary: successReached ? "completed configured auto goal" : "completed configured stages",
|
|
962
|
+
nextTransition: "terminal-success",
|
|
963
|
+
resumeCommand: "",
|
|
964
|
+
});
|
|
727
965
|
writeAutoOutcome(
|
|
728
966
|
targetDir,
|
|
729
967
|
{
|
|
@@ -805,6 +1043,31 @@ function stopAutoMode({ targetDir, now = new Date() }) {
|
|
|
805
1043
|
decision: "stopped by operator",
|
|
806
1044
|
};
|
|
807
1045
|
writeAutoStatus(targetDir, status, { lang });
|
|
1046
|
+
writeAutoLedger(
|
|
1047
|
+
targetDir,
|
|
1048
|
+
{
|
|
1049
|
+
campaignId: existing.startedAt ? `auto-${existing.startedAt.replace(/[:.]/g, "-")}` : `auto-${now.toISOString().replace(/[:.]/g, "-")}`,
|
|
1050
|
+
objective: mode.objective,
|
|
1051
|
+
activeStage: existing.currentStage || "",
|
|
1052
|
+
activeRung: existing.currentRung || "",
|
|
1053
|
+
ownerType: "local-process",
|
|
1054
|
+
ownerId: "",
|
|
1055
|
+
command: existing.currentCommand || "",
|
|
1056
|
+
watchTarget: existing.watchTarget || "",
|
|
1057
|
+
startedAt: existing.startedAt || now.toISOString(),
|
|
1058
|
+
lastObservedAt: now.toISOString(),
|
|
1059
|
+
observedState: "stopped",
|
|
1060
|
+
lastCheckpoint: existing.lastCheckpoint || "",
|
|
1061
|
+
checkpointSummary: "stopped by operator",
|
|
1062
|
+
nextTransition: "terminal-stop",
|
|
1063
|
+
continueBoundary: "No further automatic progress is allowed until a new approved auto run starts.",
|
|
1064
|
+
stopBoundary: mode.stopConditions,
|
|
1065
|
+
escalationBoundary: mode.escalationConditions,
|
|
1066
|
+
requiredReadSet: ".lab/context/eval-protocol.md, .lab/context/auto-mode.md, .lab/context/auto-status.md, .lab/context/auto-ledger.md, .lab/context/auto-outcome.md",
|
|
1067
|
+
resumeCommand: "",
|
|
1068
|
+
},
|
|
1069
|
+
{ lang }
|
|
1070
|
+
);
|
|
808
1071
|
writeAutoOutcome(
|
|
809
1072
|
targetDir,
|
|
810
1073
|
{
|
package/lib/auto_state.cjs
CHANGED
|
@@ -24,6 +24,11 @@ function parseAutoMode(targetDir) {
|
|
|
24
24
|
terminalGoalType: normalizeScalar(extractValue(text, ["Terminal goal type", "终止目标类型"])),
|
|
25
25
|
terminalGoalTarget: extractValue(text, ["Terminal goal target", "终止目标目标值"]),
|
|
26
26
|
requiredTerminalArtifact: extractValue(text, ["Required terminal artifact", "终止目标工件"]),
|
|
27
|
+
primaryGate: extractValue(text, ["Primary gate", "主 gate"]),
|
|
28
|
+
secondaryGuard: extractValue(text, ["Secondary guard", "次级 guard"]),
|
|
29
|
+
promotionCondition: extractValue(text, ["Promotion condition", "升格条件"]),
|
|
30
|
+
stopReason: extractValue(text, ["Stop reason", "停止原因"]),
|
|
31
|
+
escalationReason: extractValue(text, ["Escalation reason", "升级原因"]),
|
|
27
32
|
maxIterations: extractValue(text, ["Max iterations", "最大迭代轮次"]),
|
|
28
33
|
maxWallClockTime: extractValue(text, ["Max wall-clock time", "最大运行时长"]),
|
|
29
34
|
maxFailures: extractValue(text, ["Max failures", "最大失败次数"]),
|
|
@@ -47,6 +52,25 @@ function parseAutoMode(targetDir) {
|
|
|
47
52
|
};
|
|
48
53
|
}
|
|
49
54
|
|
|
55
|
+
const CURRENT_AUTO_MODE_SCHEMA_FIELDS = [
|
|
56
|
+
["Autonomy level", "autonomyLevel"],
|
|
57
|
+
["Approval status", "approvalStatus"],
|
|
58
|
+
["Terminal goal type", "terminalGoalType"],
|
|
59
|
+
["Terminal goal target", "terminalGoalTarget"],
|
|
60
|
+
["Required terminal artifact", "requiredTerminalArtifact"],
|
|
61
|
+
["Primary gate", "primaryGate"],
|
|
62
|
+
["Secondary guard", "secondaryGuard"],
|
|
63
|
+
["Promotion condition", "promotionCondition"],
|
|
64
|
+
["Stop reason", "stopReason"],
|
|
65
|
+
["Escalation reason", "escalationReason"],
|
|
66
|
+
];
|
|
67
|
+
|
|
68
|
+
function listMissingCurrentAutoModeFields(mode) {
|
|
69
|
+
return CURRENT_AUTO_MODE_SCHEMA_FIELDS
|
|
70
|
+
.filter(([, key]) => !isMeaningful(mode[key]))
|
|
71
|
+
.map(([label]) => label);
|
|
72
|
+
}
|
|
73
|
+
|
|
50
74
|
function parseAutoStatus(targetDir) {
|
|
51
75
|
const text = readFileIfExists(contextFile(targetDir, "auto-status.md"));
|
|
52
76
|
return {
|
|
@@ -68,6 +92,33 @@ function parseAutoStatus(targetDir) {
|
|
|
68
92
|
};
|
|
69
93
|
}
|
|
70
94
|
|
|
95
|
+
function parseAutoLedger(targetDir) {
|
|
96
|
+
const text = readFileIfExists(contextFile(targetDir, "auto-ledger.md"));
|
|
97
|
+
return {
|
|
98
|
+
path: contextFile(targetDir, "auto-ledger.md"),
|
|
99
|
+
text,
|
|
100
|
+
campaignId: extractValue(text, ["Campaign id", "Campaign ID", "活动 id"]),
|
|
101
|
+
objective: extractValue(text, ["Objective", "目标"]),
|
|
102
|
+
activeStage: extractValue(text, ["Active stage", "当前阶段"]),
|
|
103
|
+
activeRung: extractValue(text, ["Active rung", "当前 rung"]),
|
|
104
|
+
ownerType: extractValue(text, ["Owner type", "Owner 类型"]),
|
|
105
|
+
ownerId: extractValue(text, ["Owner id", "Owner ID"]),
|
|
106
|
+
command: extractValue(text, ["Command", "命令"]),
|
|
107
|
+
watchTarget: extractValue(text, ["Watch target", "监视目标"]),
|
|
108
|
+
startedAt: extractValue(text, ["Started at", "开始时间"]),
|
|
109
|
+
lastObservedAt: extractValue(text, ["Last observed at", "最近观察时间"]),
|
|
110
|
+
observedState: extractValue(text, ["Observed state", "观察状态"]),
|
|
111
|
+
lastCheckpoint: extractValue(text, ["Last checkpoint", "最近 checkpoint"]),
|
|
112
|
+
checkpointSummary: extractValue(text, ["Checkpoint summary", "Checkpoint 摘要"]),
|
|
113
|
+
nextTransition: extractValue(text, ["Next transition", "下一转换"]),
|
|
114
|
+
continueBoundary: extractValue(text, ["Continue boundary", "继续边界"]),
|
|
115
|
+
stopBoundary: extractValue(text, ["Stop boundary", "停止边界"]),
|
|
116
|
+
escalationBoundary: extractValue(text, ["Escalation boundary", "升级边界"]),
|
|
117
|
+
requiredReadSet: extractValue(text, ["Required read set", "必要读取集合"]),
|
|
118
|
+
resumeCommand: extractValue(text, ["Resume command", "恢复命令"]),
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
|
|
71
122
|
function renderAutoStatus(status, { lang = "en" } = {}) {
|
|
72
123
|
if (lang === "zh") {
|
|
73
124
|
return `# 自动模式状态
|
|
@@ -240,12 +291,96 @@ function renderAutoOutcome(outcome, { lang = "en" } = {}) {
|
|
|
240
291
|
`;
|
|
241
292
|
}
|
|
242
293
|
|
|
294
|
+
function renderAutoLedger(ledger, { lang = "en" } = {}) {
|
|
295
|
+
if (lang === "zh") {
|
|
296
|
+
return `# 自动运行账本
|
|
297
|
+
|
|
298
|
+
## Campaign
|
|
299
|
+
|
|
300
|
+
- Campaign id: ${ledger.campaignId || ""}
|
|
301
|
+
- Objective: ${ledger.objective || ""}
|
|
302
|
+
- Active stage: ${ledger.activeStage || ""}
|
|
303
|
+
- Active rung: ${ledger.activeRung || ""}
|
|
304
|
+
|
|
305
|
+
## Owner
|
|
306
|
+
|
|
307
|
+
- Owner type: ${ledger.ownerType || ""}
|
|
308
|
+
- Owner id: ${ledger.ownerId || ""}
|
|
309
|
+
- Command: ${ledger.command || ""}
|
|
310
|
+
- Watch target: ${ledger.watchTarget || ""}
|
|
311
|
+
- Started at: ${ledger.startedAt || ""}
|
|
312
|
+
- Last observed at: ${ledger.lastObservedAt || ""}
|
|
313
|
+
- Observed state: ${ledger.observedState || ""}
|
|
314
|
+
|
|
315
|
+
## Checkpoints
|
|
316
|
+
|
|
317
|
+
- Last checkpoint: ${ledger.lastCheckpoint || ""}
|
|
318
|
+
- Checkpoint summary: ${ledger.checkpointSummary || ""}
|
|
319
|
+
- Next transition: ${ledger.nextTransition || ""}
|
|
320
|
+
|
|
321
|
+
## Boundaries
|
|
322
|
+
|
|
323
|
+
- Continue boundary: ${ledger.continueBoundary || ""}
|
|
324
|
+
- Stop boundary: ${ledger.stopBoundary || ""}
|
|
325
|
+
- Escalation boundary: ${ledger.escalationBoundary || ""}
|
|
326
|
+
|
|
327
|
+
## Resume
|
|
328
|
+
|
|
329
|
+
- Required read set: ${ledger.requiredReadSet || ""}
|
|
330
|
+
- Resume command: ${ledger.resumeCommand || ""}
|
|
331
|
+
`;
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
return `# Auto Runtime Ledger
|
|
335
|
+
|
|
336
|
+
## Campaign
|
|
337
|
+
|
|
338
|
+
- Campaign id: ${ledger.campaignId || ""}
|
|
339
|
+
- Objective: ${ledger.objective || ""}
|
|
340
|
+
- Active stage: ${ledger.activeStage || ""}
|
|
341
|
+
- Active rung: ${ledger.activeRung || ""}
|
|
342
|
+
|
|
343
|
+
## Owner
|
|
344
|
+
|
|
345
|
+
- Owner type: ${ledger.ownerType || ""}
|
|
346
|
+
- Owner id: ${ledger.ownerId || ""}
|
|
347
|
+
- Command: ${ledger.command || ""}
|
|
348
|
+
- Watch target: ${ledger.watchTarget || ""}
|
|
349
|
+
- Started at: ${ledger.startedAt || ""}
|
|
350
|
+
- Last observed at: ${ledger.lastObservedAt || ""}
|
|
351
|
+
- Observed state: ${ledger.observedState || ""}
|
|
352
|
+
|
|
353
|
+
## Checkpoints
|
|
354
|
+
|
|
355
|
+
- Last checkpoint: ${ledger.lastCheckpoint || ""}
|
|
356
|
+
- Checkpoint summary: ${ledger.checkpointSummary || ""}
|
|
357
|
+
- Next transition: ${ledger.nextTransition || ""}
|
|
358
|
+
|
|
359
|
+
## Boundaries
|
|
360
|
+
|
|
361
|
+
- Continue boundary: ${ledger.continueBoundary || ""}
|
|
362
|
+
- Stop boundary: ${ledger.stopBoundary || ""}
|
|
363
|
+
- Escalation boundary: ${ledger.escalationBoundary || ""}
|
|
364
|
+
|
|
365
|
+
## Resume
|
|
366
|
+
|
|
367
|
+
- Required read set: ${ledger.requiredReadSet || ""}
|
|
368
|
+
- Resume command: ${ledger.resumeCommand || ""}
|
|
369
|
+
`;
|
|
370
|
+
}
|
|
371
|
+
|
|
243
372
|
function writeAutoOutcome(targetDir, outcome, { lang = "en" } = {}) {
|
|
244
373
|
const filePath = contextFile(targetDir, "auto-outcome.md");
|
|
245
374
|
fs.mkdirSync(path.dirname(filePath), { recursive: true });
|
|
246
375
|
fs.writeFileSync(filePath, renderAutoOutcome(outcome, { lang }).trimEnd() + "\n");
|
|
247
376
|
}
|
|
248
377
|
|
|
378
|
+
function writeAutoLedger(targetDir, ledger, { lang = "en" } = {}) {
|
|
379
|
+
const filePath = contextFile(targetDir, "auto-ledger.md");
|
|
380
|
+
fs.mkdirSync(path.dirname(filePath), { recursive: true });
|
|
381
|
+
fs.writeFileSync(filePath, renderAutoLedger(ledger, { lang }).trimEnd() + "\n");
|
|
382
|
+
}
|
|
383
|
+
|
|
249
384
|
function resolveRequiredArtifact(targetDir, configuredPath) {
|
|
250
385
|
if (!isMeaningful(configuredPath)) {
|
|
251
386
|
return { relativePath: "", absolutePath: "" };
|
|
@@ -258,12 +393,17 @@ function resolveRequiredArtifact(targetDir, configuredPath) {
|
|
|
258
393
|
}
|
|
259
394
|
|
|
260
395
|
module.exports = {
|
|
396
|
+
CURRENT_AUTO_MODE_SCHEMA_FIELDS,
|
|
397
|
+
listMissingCurrentAutoModeFields,
|
|
398
|
+
parseAutoLedger,
|
|
261
399
|
parseAutoMode,
|
|
262
400
|
parseAutoStatus,
|
|
263
401
|
readWorkflowLanguage,
|
|
402
|
+
renderAutoLedger,
|
|
264
403
|
renderAutoOutcome,
|
|
265
404
|
renderAutoStatus,
|
|
266
405
|
resolveRequiredArtifact,
|
|
406
|
+
writeAutoLedger,
|
|
267
407
|
writeAutoOutcome,
|
|
268
408
|
writeAutoStatus,
|
|
269
409
|
};
|
package/lib/i18n.cjs
CHANGED
|
@@ -1563,6 +1563,7 @@ const ZH_SKILL_FILES = {
|
|
|
1563
1563
|
`# 自动模式契约
|
|
1564
1564
|
|
|
1565
1565
|
用这个文件定义 \`/lab:auto\` 的有边界自治执行范围。
|
|
1566
|
+
把 \`.lab/context/auto-ledger.md\` 当成运行时账本,记录 owner、checkpoint、resume 和 stop 边界。
|
|
1566
1567
|
|
|
1567
1568
|
## 目标
|
|
1568
1569
|
|
|
@@ -1602,6 +1603,7 @@ const ZH_SKILL_FILES = {
|
|
|
1602
1603
|
- Rung 的 \`Command\` 应该绑定真实的长任务命令,由它产出最终实验结果。
|
|
1603
1604
|
- 短 watcher 只用于查看进度;当真实实验还在运行时,不要把短 watcher 当成 stage 或 rung 的主命令。
|
|
1604
1605
|
- 当真实实验进程还活着时,只记录进度更新并继续等待。
|
|
1606
|
+
- 当 loop 处于运行态时,把当前 owner、命令和 watch target 写进 \`.lab/context/auto-ledger.md\`。
|
|
1605
1607
|
- Run command:
|
|
1606
1608
|
- Iterate command:
|
|
1607
1609
|
- Review command:
|
|
@@ -1634,6 +1636,43 @@ const ZH_SKILL_FILES = {
|
|
|
1634
1636
|
- Stop conditions:
|
|
1635
1637
|
- Escalation conditions:
|
|
1636
1638
|
- Canonical promotion writeback: update \`.lab/context/data-decisions.md\`、\`.lab/context/decisions.md\` 和 \`.lab/context/workflow-state.md\`,然后刷新 \`state.md\` 等派生视图。
|
|
1639
|
+
`,
|
|
1640
|
+
[path.join(".lab", "context", "auto-ledger.md")]:
|
|
1641
|
+
`# 自动运行账本
|
|
1642
|
+
|
|
1643
|
+
## Campaign
|
|
1644
|
+
|
|
1645
|
+
- Campaign id:
|
|
1646
|
+
- Objective:
|
|
1647
|
+
- Active stage:
|
|
1648
|
+
- Active rung:
|
|
1649
|
+
|
|
1650
|
+
## Owner
|
|
1651
|
+
|
|
1652
|
+
- Owner type:
|
|
1653
|
+
- Owner id:
|
|
1654
|
+
- Command:
|
|
1655
|
+
- Watch target:
|
|
1656
|
+
- Started at:
|
|
1657
|
+
- Last observed at:
|
|
1658
|
+
- Observed state:
|
|
1659
|
+
|
|
1660
|
+
## Checkpoints
|
|
1661
|
+
|
|
1662
|
+
- Last checkpoint:
|
|
1663
|
+
- Checkpoint summary:
|
|
1664
|
+
- Next transition:
|
|
1665
|
+
|
|
1666
|
+
## Boundaries
|
|
1667
|
+
|
|
1668
|
+
- Continue boundary:
|
|
1669
|
+
- Stop boundary:
|
|
1670
|
+
- Escalation boundary:
|
|
1671
|
+
|
|
1672
|
+
## Resume
|
|
1673
|
+
|
|
1674
|
+
- Required read set:
|
|
1675
|
+
- Resume command:
|
|
1637
1676
|
`,
|
|
1638
1677
|
[path.join(".lab", "context", "auto-outcome.md")]:
|
|
1639
1678
|
`# 自动结果
|
|
@@ -2141,7 +2180,7 @@ ZH_CONTENT[path.join(".codex", "prompts", "lab-data.md")] = codexPrompt(
|
|
|
2141
2180
|
ZH_CONTENT[path.join(".codex", "prompts", "lab-auto.md")] = codexPrompt(
|
|
2142
2181
|
"在已批准边界内编排自动实验循环",
|
|
2143
2182
|
"auto mode objective",
|
|
2144
|
-
"使用已安装的 `lab` 技能:`.codex/skills/lab/SKILL.md`。\n\n立刻针对用户当前给出的参数执行 `/lab:auto`,不要只推荐别的 `/lab` 阶段。只有在缺少阻塞性前提时,才明确指出缺什么,并且一次最多追问一个问题。\n\n本命令运行 `/lab:auto` 阶段。它必须读取 `.lab/context/eval-protocol.md`、`.lab/context/auto-mode.md`、`.lab/context/auto-status.md` 与 `.lab/context/auto-outcome.md`,先确认 autonomy level、approval status、terminal goal schema,以及 primary gate、secondary guard、promotion condition、stop reason、escalation reason,再把 eval-protocol 里的指标释义、主表计划、来源约束与结构化实验阶梯当作执行依据,在不修改 mission、framing 和核心 claims 的前提下编排已批准的 `run`、`iterate`、`review`、`report`,轮询长任务完成情况;如果声明了 rung,就保持会话活着并按 rung 转移继续推进。\n首个可见输出块必须是 `Auto preflight`。这个块必须列出已读取文件,并回显 `Autonomy level`、`Approval status`、`Allowed stages`、`Terminal goal`、`Primary gate` 和 `Secondary guard`,然后才能进入执行摘要或动作计划。\n如果 preflight 所需字段缺失、过期或彼此冲突,就必须在执行前停下,并明确指出到底是哪一个字段阻止了 loop 启动。\n如果仓库的 workflow language 是中文,摘要、清单条目、任务标签和进度更新都必须使用中文,除非某个文件路径、代码标识符或字面指标名必须保持原样。\n把 `Layer 3`、`Phase 1`、`Table 2` 这类表达视为论文范围目标;只有显式写成 `Autonomy level L3` 或 `自治级别 L3` 时,才把它当成执行权限级别。\n不要用 `sleep 30`、单次 `pgrep` 或一次性的 `metrics.json` 探针来代替真实长任务命令;当真实实验进程还活着时,只允许发进度更新并继续等待。"
|
|
2183
|
+
"使用已安装的 `lab` 技能:`.codex/skills/lab/SKILL.md`。\n\n立刻针对用户当前给出的参数执行 `/lab:auto`,不要只推荐别的 `/lab` 阶段。只有在缺少阻塞性前提时,才明确指出缺什么,并且一次最多追问一个问题。\n\n本命令运行 `/lab:auto` 阶段。它必须读取 `.lab/context/eval-protocol.md`、`.lab/context/auto-mode.md`、`.lab/context/auto-status.md`、`.lab/context/auto-ledger.md` 与 `.lab/context/auto-outcome.md`,先确认 autonomy level、approval status、terminal goal schema,以及 primary gate、secondary guard、promotion condition、stop reason、escalation reason,再把 eval-protocol 里的指标释义、主表计划、来源约束与结构化实验阶梯当作执行依据,在不修改 mission、framing 和核心 claims 的前提下编排已批准的 `run`、`iterate`、`review`、`report`,轮询长任务完成情况;如果声明了 rung,就保持会话活着并按 rung 转移继续推进。\n首个可见输出块必须是 `Auto preflight`。这个块必须列出已读取文件,并回显 `Autonomy level`、`Approval status`、`Allowed stages`、`Terminal goal`、`Primary gate` 和 `Secondary guard`,然后才能进入执行摘要或动作计划。\n如果 preflight 所需字段缺失、过期或彼此冲突,就必须在执行前停下,并明确指出到底是哪一个字段阻止了 loop 启动。\n当 loop 活着时,必须把当前 owner、观察状态、checkpoint 摘要、继续边界、停止边界和恢复读取集合写进 `.lab/context/auto-ledger.md`。\n如果仓库的 workflow language 是中文,摘要、清单条目、任务标签和进度更新都必须使用中文,除非某个文件路径、代码标识符或字面指标名必须保持原样。\n把 `Layer 3`、`Phase 1`、`Table 2` 这类表达视为论文范围目标;只有显式写成 `Autonomy level L3` 或 `自治级别 L3` 时,才把它当成执行权限级别。\n不要用 `sleep 30`、单次 `pgrep` 或一次性的 `metrics.json` 探针来代替真实长任务命令;当真实实验进程还活着时,只允许发进度更新并继续等待。"
|
|
2145
2184
|
);
|
|
2146
2185
|
|
|
2147
2186
|
ZH_CONTENT[path.join(".claude", "commands", "lab.md")] = claudeCommand(
|
|
@@ -2162,7 +2201,7 @@ ZH_CONTENT[path.join(".claude", "commands", "lab-auto.md")] = claudeCommand(
|
|
|
2162
2201
|
"lab-auto",
|
|
2163
2202
|
"在已批准边界内编排自动实验循环",
|
|
2164
2203
|
"auto mode objective",
|
|
2165
|
-
"使用已安装的 `lab` 技能:`.claude/skills/lab/SKILL.md`。\n\n立刻针对用户当前给出的参数执行 `auto` 阶段,不要只推荐别的 lab 阶段。只有在缺少阻塞性前提时,才明确指出缺什么,并且一次最多追问一个问题。\n\n本命令运行 lab workflow 的 `auto` 阶段。它必须读取 `.lab/context/eval-protocol.md`、`.lab/context/auto-mode.md`、`.lab/context/auto-status.md` 与 `.lab/context/auto-outcome.md`,先确认 autonomy level、approval status、terminal goal schema,以及 primary gate、secondary guard、promotion condition、stop reason、escalation reason,再把 eval-protocol 里的指标释义、主表计划、来源约束与结构化实验阶梯当作执行依据,在不修改 mission、framing 和核心 claims 的前提下编排已批准的 `run`、`iterate`、`review`、`report`,轮询长任务完成情况;如果声明了 rung,就保持会话活着并按 rung 转移继续推进。\n首个可见输出块必须是 `Auto preflight`。这个块必须列出已读取文件,并回显 `Autonomy level`、`Approval status`、`Allowed stages`、`Terminal goal`、`Primary gate` 和 `Secondary guard`,然后才能进入执行摘要或动作计划。\n如果 preflight 所需字段缺失、过期或彼此冲突,就必须在执行前停下,并明确指出到底是哪一个字段阻止了 loop 启动。\n如果仓库的 workflow language 是中文,摘要、清单条目、任务标签和进度更新都必须使用中文,除非某个文件路径、代码标识符或字面指标名必须保持原样。\n把 `Layer 3`、`Phase 1`、`Table 2` 这类表达视为论文范围目标;只有显式写成 `Autonomy level L3` 或 `自治级别 L3` 时,才把它当成执行权限级别。\n不要用 `sleep 30`、单次 `pgrep` 或一次性的 `metrics.json` 探针来代替真实长任务命令;当真实实验进程还活着时,只允许发进度更新并继续等待。"
|
|
2204
|
+
"使用已安装的 `lab` 技能:`.claude/skills/lab/SKILL.md`。\n\n立刻针对用户当前给出的参数执行 `auto` 阶段,不要只推荐别的 lab 阶段。只有在缺少阻塞性前提时,才明确指出缺什么,并且一次最多追问一个问题。\n\n本命令运行 lab workflow 的 `auto` 阶段。它必须读取 `.lab/context/eval-protocol.md`、`.lab/context/auto-mode.md`、`.lab/context/auto-status.md`、`.lab/context/auto-ledger.md` 与 `.lab/context/auto-outcome.md`,先确认 autonomy level、approval status、terminal goal schema,以及 primary gate、secondary guard、promotion condition、stop reason、escalation reason,再把 eval-protocol 里的指标释义、主表计划、来源约束与结构化实验阶梯当作执行依据,在不修改 mission、framing 和核心 claims 的前提下编排已批准的 `run`、`iterate`、`review`、`report`,轮询长任务完成情况;如果声明了 rung,就保持会话活着并按 rung 转移继续推进。\n首个可见输出块必须是 `Auto preflight`。这个块必须列出已读取文件,并回显 `Autonomy level`、`Approval status`、`Allowed stages`、`Terminal goal`、`Primary gate` 和 `Secondary guard`,然后才能进入执行摘要或动作计划。\n如果 preflight 所需字段缺失、过期或彼此冲突,就必须在执行前停下,并明确指出到底是哪一个字段阻止了 loop 启动。\n当 loop 活着时,必须把当前 owner、观察状态、checkpoint 摘要、继续边界、停止边界和恢复读取集合写进 `.lab/context/auto-ledger.md`。\n如果仓库的 workflow language 是中文,摘要、清单条目、任务标签和进度更新都必须使用中文,除非某个文件路径、代码标识符或字面指标名必须保持原样。\n把 `Layer 3`、`Phase 1`、`Table 2` 这类表达视为论文范围目标;只有显式写成 `Autonomy level L3` 或 `自治级别 L3` 时,才把它当成执行权限级别。\n不要用 `sleep 30`、单次 `pgrep` 或一次性的 `metrics.json` 探针来代替真实长任务命令;当真实实验进程还活着时,只允许发进度更新并继续等待。"
|
|
2166
2205
|
);
|
|
2167
2206
|
|
|
2168
2207
|
const zhRecipeQuickPathLine =
|
|
@@ -2171,15 +2210,27 @@ const zhRecipeQuickPathLine =
|
|
|
2171
2210
|
ZH_CONTENT[path.join(".codex", "prompts", "lab.md")] = ZH_CONTENT[
|
|
2172
2211
|
path.join(".codex", "prompts", "lab.md")
|
|
2173
2212
|
].replace(
|
|
2213
|
+
"\n\n## 子命令\n\n",
|
|
2214
|
+
"\n\n## 可接受的阶段写法\n\n- `/lab:idea ...`、`/lab: idea ...`、`/lab idea ...`、`/lab-idea`、`/lab:idea ...`\n- `/lab:data ...`、`/lab: data ...`、`/lab data ...`、`/lab-data`、`/lab:data ...`\n- `/lab:auto ...`、`/lab: auto ...`、`/lab auto ...`、`/lab-auto`、`/lab:auto ...`\n- `/lab:framing ...`、`/lab: framing ...`、`/lab framing ...`、`/lab-framing`、`/lab:framing ...`\n- `/lab:spec ...`、`/lab: spec ...`、`/lab spec ...`、`/lab-spec`、`/lab:spec ...`\n- `/lab:run ...`、`/lab: run ...`、`/lab run ...`、`/lab-run`、`/lab:run ...`\n- `/lab:iterate ...`、`/lab: iterate ...`、`/lab iterate ...`、`/lab-iterate`、`/lab:iterate ...`\n- `/lab:review ...`、`/lab: review ...`、`/lab review ...`、`/lab-review`、`/lab:review ...`\n- `/lab:report ...`、`/lab: report ...`、`/lab report ...`、`/lab-report`、`/lab:report ...`\n- `/lab:write ...`、`/lab: write ...`、`/lab write ...`、`/lab-write`、`/lab:write ...`\n\n## 子命令\n\n"
|
|
2215
|
+
).replace(
|
|
2174
2216
|
"- 始终使用 `skills/lab/SKILL.md` 作为工作流合同。\n",
|
|
2175
2217
|
`- 始终使用 \`skills/lab/SKILL.md\` 作为工作流合同。\n${zhRecipeQuickPathLine}`
|
|
2218
|
+
).replace(
|
|
2219
|
+
"- 用户显式调用 `/lab:<stage>` 时,要立刻执行该 stage,而不是只推荐别的 `/lab` stage。\n",
|
|
2220
|
+
"- 用户只要显式调用某个 stage,无论写成 `/lab:<stage>`、`/lab: <stage>`、`/lab <stage>`、`/lab-<stage>` 还是 `/lab:<stage>`,都要立刻执行该 stage,而不是只推荐别的 `/lab` stage。\n"
|
|
2176
2221
|
);
|
|
2177
2222
|
|
|
2178
2223
|
ZH_CONTENT[path.join(".claude", "commands", "lab.md")] = ZH_CONTENT[
|
|
2179
2224
|
path.join(".claude", "commands", "lab.md")
|
|
2180
2225
|
].replace(
|
|
2226
|
+
"\n\n## 阶段别名\n\n",
|
|
2227
|
+
"\n\n## 可接受的阶段写法\n\n- `/lab:idea ...`、`/lab: idea ...`、`/lab idea ...`、`/lab-idea`、`/lab:idea ...`\n- `/lab:data ...`、`/lab: data ...`、`/lab data ...`、`/lab-data`、`/lab:data ...`\n- `/lab:auto ...`、`/lab: auto ...`、`/lab auto ...`、`/lab-auto`、`/lab:auto ...`\n- `/lab:framing ...`、`/lab: framing ...`、`/lab framing ...`、`/lab-framing`、`/lab:framing ...`\n- `/lab:spec ...`、`/lab: spec ...`、`/lab spec ...`、`/lab-spec`、`/lab:spec ...`\n- `/lab:run ...`、`/lab: run ...`、`/lab run ...`、`/lab-run`、`/lab:run ...`\n- `/lab:iterate ...`、`/lab: iterate ...`、`/lab iterate ...`、`/lab-iterate`、`/lab:iterate ...`\n- `/lab:review ...`、`/lab: review ...`、`/lab review ...`、`/lab-review`、`/lab:review ...`\n- `/lab:report ...`、`/lab: report ...`、`/lab report ...`、`/lab-report`、`/lab:report ...`\n- `/lab:write ...`、`/lab: write ...`、`/lab write ...`、`/lab-write`、`/lab:write ...`\n\n## 阶段别名\n\n"
|
|
2228
|
+
).replace(
|
|
2181
2229
|
"- 始终使用 `skills/lab/SKILL.md` 作为工作流合同。\n",
|
|
2182
2230
|
`- 始终使用 \`skills/lab/SKILL.md\` 作为工作流合同。\n${zhRecipeQuickPathLine}`
|
|
2231
|
+
).replace(
|
|
2232
|
+
"- 用户显式调用 `/lab <stage> ...` 或 `/lab-<stage>` 时,要立刻执行该 stage,而不是只推荐别的阶段。\n",
|
|
2233
|
+
"- 用户只要显式调用某个 stage,无论写成 `/lab:<stage>`、`/lab: <stage>`、`/lab <stage>`、`/lab-<stage>` 还是 `/lab:<stage>`,都要立刻执行该 stage,而不是只推荐别的阶段。\n"
|
|
2183
2234
|
);
|
|
2184
2235
|
|
|
2185
2236
|
ZH_CONTENT[path.join(".codex", "skills", "lab", "SKILL.md")] = `---
|
package/lib/install.cjs
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
const fs = require("node:fs");
|
|
2
2
|
const os = require("node:os");
|
|
3
3
|
const path = require("node:path");
|
|
4
|
+
const { CURRENT_AUTO_MODE_SCHEMA_FIELDS } = require("./auto_state.cjs");
|
|
4
5
|
const { getLocalizedContent } = require("./i18n.cjs");
|
|
5
6
|
|
|
6
7
|
const REPO_ROOT = path.resolve(__dirname, "..");
|
|
@@ -44,6 +45,7 @@ const PROJECT_OWNED_LOCALIZED_PATHS = [
|
|
|
44
45
|
path.join(".lab", "context", "eval-protocol.md"),
|
|
45
46
|
path.join(".lab", "context", "auto-mode.md"),
|
|
46
47
|
path.join(".lab", "context", "auto-status.md"),
|
|
48
|
+
path.join(".lab", "context", "auto-ledger.md"),
|
|
47
49
|
path.join(".lab", "context", "auto-outcome.md"),
|
|
48
50
|
path.join(".lab", "context", "terminology-lock.md"),
|
|
49
51
|
path.join(".lab", "context", "summary.md"),
|
|
@@ -131,6 +133,68 @@ function installLabAssets(targetDir, force) {
|
|
|
131
133
|
);
|
|
132
134
|
}
|
|
133
135
|
|
|
136
|
+
function autoModeSchemaFieldLine(label, lang) {
|
|
137
|
+
if (lang === "zh") {
|
|
138
|
+
const zhDefaults = {
|
|
139
|
+
"Autonomy level": "- 自治级别: L2",
|
|
140
|
+
"Approval status": "- 批准状态: draft",
|
|
141
|
+
"Terminal goal type": "- 终止目标类型: ",
|
|
142
|
+
"Terminal goal target": "- 终止目标目标值: ",
|
|
143
|
+
"Required terminal artifact": "- 终止目标工件: ",
|
|
144
|
+
"Primary gate": "- 主 gate: ",
|
|
145
|
+
"Secondary guard": "- 次级 guard: ",
|
|
146
|
+
"Promotion condition": "- 升格条件: ",
|
|
147
|
+
"Stop reason": "- 停止原因: ",
|
|
148
|
+
"Escalation reason": "- 升级原因: ",
|
|
149
|
+
};
|
|
150
|
+
return zhDefaults[label] || "";
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
const defaults = {
|
|
154
|
+
"Autonomy level": "- Autonomy level: L2",
|
|
155
|
+
"Approval status": "- Approval status: draft",
|
|
156
|
+
"Terminal goal type": "- Terminal goal type: ",
|
|
157
|
+
"Terminal goal target": "- Terminal goal target: ",
|
|
158
|
+
"Required terminal artifact": "- Required terminal artifact: ",
|
|
159
|
+
"Primary gate": "- Primary gate: ",
|
|
160
|
+
"Secondary guard": "- Secondary guard: ",
|
|
161
|
+
"Promotion condition": "- Promotion condition: ",
|
|
162
|
+
"Stop reason": "- Stop reason: ",
|
|
163
|
+
"Escalation reason": "- Escalation reason: ",
|
|
164
|
+
};
|
|
165
|
+
return defaults[label] || "";
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
function migrateAutoModeContext(targetDir, lang) {
|
|
169
|
+
const filePath = path.join(targetDir, ".lab", "context", "auto-mode.md");
|
|
170
|
+
if (!fs.existsSync(filePath)) {
|
|
171
|
+
return;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
const existing = fs.readFileSync(filePath, "utf8");
|
|
175
|
+
const missing = CURRENT_AUTO_MODE_SCHEMA_FIELDS
|
|
176
|
+
.map(([label]) => label)
|
|
177
|
+
.filter((label) => {
|
|
178
|
+
const localizedLabel = autoModeSchemaFieldLine(label, lang).replace(/^- /, "").split(":")[0];
|
|
179
|
+
return !existing.includes(`${label}:`) && !existing.includes(`${localizedLabel}:`);
|
|
180
|
+
});
|
|
181
|
+
if (missing.length === 0) {
|
|
182
|
+
return;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
const trimmed = existing.trimEnd();
|
|
186
|
+
const header =
|
|
187
|
+
lang === "zh"
|
|
188
|
+
? "## Managed schema migration\n\n- 这段由 `superlab update` 补齐,用于把旧版 auto 契约升级到当前 schema。保留现有内容,只补缺失字段。\n"
|
|
189
|
+
: "## Managed schema migration\n\n- This block was added by `superlab update` to bring a legacy auto contract up to the current schema without overwriting your existing content.\n";
|
|
190
|
+
const body = missing
|
|
191
|
+
.map((label) => autoModeSchemaFieldLine(label, lang))
|
|
192
|
+
.filter(Boolean)
|
|
193
|
+
.join("\n");
|
|
194
|
+
|
|
195
|
+
fs.writeFileSync(filePath, `${trimmed}\n\n${header}${body}\n`);
|
|
196
|
+
}
|
|
197
|
+
|
|
134
198
|
function ensureConfiguredProjectDirs(targetDir) {
|
|
135
199
|
const workflowConfigPath = path.join(targetDir, ".lab", "config", "workflow.json");
|
|
136
200
|
if (!fs.existsSync(workflowConfigPath)) {
|
|
@@ -604,6 +668,7 @@ function installSuperlab({
|
|
|
604
668
|
writePlatformShims(targetDir, resolvedLang, force);
|
|
605
669
|
writeManagedCoreFile(targetDir, resolvedLang, { force });
|
|
606
670
|
localizeInstalledAssets(targetDir, resolvedLang, { newlyCreatedProjectOwnedPaths });
|
|
671
|
+
migrateAutoModeContext(targetDir, resolvedLang);
|
|
607
672
|
ensureConfiguredProjectDirs(targetDir);
|
|
608
673
|
chmodScripts(targetDir);
|
|
609
674
|
const metadata = {
|
|
@@ -7,7 +7,7 @@ argument-hint: autonomous campaign target
|
|
|
7
7
|
Use the installed `lab` skill at `.claude/skills/lab/SKILL.md`.
|
|
8
8
|
|
|
9
9
|
Execute the requested `/lab-auto` command against the user's argument now. Do not only recommend another lab stage. If a blocking prerequisite is missing, say exactly what is missing and ask at most one clarifying question.
|
|
10
|
-
This command runs the `auto` stage of the lab workflow. It must read `.lab/context/eval-protocol.md`, `.lab/context/auto-mode.md`, `.lab/context/auto-status.md`, and `.lab/context/auto-outcome.md`, enforce the declared terminal goal schema, make the primary gate, secondary guard, promotion condition, stop reason, and escalation reason explicit, orchestrate approved run, iterate, review, and report stages inside that contract, poll long-running work until completion or stop conditions, and write
|
|
10
|
+
This command runs the `auto` stage of the lab workflow. It must read `.lab/context/eval-protocol.md`, `.lab/context/auto-mode.md`, `.lab/context/auto-status.md`, `.lab/context/auto-ledger.md`, and `.lab/context/auto-outcome.md`, enforce the declared terminal goal schema, make the primary gate, secondary guard, promotion condition, stop reason, and escalation reason explicit, orchestrate approved run, iterate, review, and report stages inside that contract, poll long-running work until completion or stop conditions, and write live owner state plus progress and the final outcome back into `.lab/context/auto-status.md`, `.lab/context/auto-ledger.md`, and `.lab/context/auto-outcome.md`.
|
|
11
11
|
The first visible block must be `Auto preflight`. That first visible block must list the files read and echo `Autonomy level`, `Approval status`, `Allowed stages`, `Terminal goal`, `Primary gate`, and `Secondary guard` before any execution summary or action plan.
|
|
12
12
|
If the preflight block cannot be completed because any required field is missing, stale, or inconsistent, stop before execution and say exactly which field blocked arming the loop.
|
|
13
13
|
When the repository workflow language is Chinese, summaries, checklist items, task labels, and progress updates should be written in Chinese unless a literal identifier must stay unchanged.
|
|
@@ -13,6 +13,21 @@ argument-hint: [stage] [target]
|
|
|
13
13
|
|
|
14
14
|
Use the same repository artifacts and stage boundaries every time.
|
|
15
15
|
|
|
16
|
+
## Accepted Stage Spellings
|
|
17
|
+
|
|
18
|
+
Treat all of these as equivalent stage requests:
|
|
19
|
+
|
|
20
|
+
- `/lab:idea ...`, `/lab: idea ...`, `/lab idea ...`, `/lab-idea`, `/lab:idea ...`
|
|
21
|
+
- `/lab:data ...`, `/lab: data ...`, `/lab data ...`, `/lab-data`, `/lab:data ...`
|
|
22
|
+
- `/lab:auto ...`, `/lab: auto ...`, `/lab auto ...`, `/lab-auto`, `/lab:auto ...`
|
|
23
|
+
- `/lab:framing ...`, `/lab: framing ...`, `/lab framing ...`, `/lab-framing`, `/lab:framing ...`
|
|
24
|
+
- `/lab:spec ...`, `/lab: spec ...`, `/lab spec ...`, `/lab-spec`, `/lab:spec ...`
|
|
25
|
+
- `/lab:run ...`, `/lab: run ...`, `/lab run ...`, `/lab-run`, `/lab:run ...`
|
|
26
|
+
- `/lab:iterate ...`, `/lab: iterate ...`, `/lab iterate ...`, `/lab-iterate`, `/lab:iterate ...`
|
|
27
|
+
- `/lab:review ...`, `/lab: review ...`, `/lab review ...`, `/lab-review`, `/lab:review ...`
|
|
28
|
+
- `/lab:report ...`, `/lab: report ...`, `/lab report ...`, `/lab-report`, `/lab:report ...`
|
|
29
|
+
- `/lab:write ...`, `/lab: write ...`, `/lab write ...`, `/lab-write`, `/lab:write ...`
|
|
30
|
+
|
|
16
31
|
## Stage Aliases
|
|
17
32
|
|
|
18
33
|
- `/lab idea ...` or `/lab-idea`
|
|
@@ -50,7 +65,7 @@ Use the same repository artifacts and stage boundaries every time.
|
|
|
50
65
|
|
|
51
66
|
- Always use `skills/lab/SKILL.md` as the workflow contract.
|
|
52
67
|
- Use `skills/lab/references/recipes.md` as the quick path for common stage chains; do not invent new slash commands or aliases from it.
|
|
53
|
-
- When the user explicitly invokes `/lab <stage
|
|
68
|
+
- When the user explicitly invokes a stage through `/lab:<stage>`, `/lab: <stage>`, `/lab <stage>`, `/lab-<stage>`, or `/lab:<stage>`, execute that stage now against the provided argument instead of only recommending another lab stage.
|
|
54
69
|
- Start by giving the user a concise stage summary. Materialize managed artifacts immediately when the stage contract requires them, then report the output path and next step.
|
|
55
70
|
- When ambiguity matters, ask one clarifying question at a time; when multiple paths are viable, present 2-3 approaches before converging.
|
|
56
71
|
- `spec` is not complete until the approved change is frozen under `.lab/changes/<change-id>/`.
|
|
@@ -72,7 +87,8 @@ Use the same repository artifacts and stage boundaries every time.
|
|
|
72
87
|
- If the request omits the level or mixes it with a paper layer, phase, or table target, `/lab auto` should stop and ask for an explicit autonomy level before arming the loop.
|
|
73
88
|
- The first visible output of a real `/lab auto` run must be `Auto preflight`.
|
|
74
89
|
- That first visible output must show files read plus `Autonomy level`, `Allowed stages`, `Terminal goal`, `Primary gate`, and `Secondary guard`.
|
|
75
|
-
- If the preflight block cannot be completed from `.lab/context/eval-protocol.md`, `.lab/context/auto-mode.md`, `.lab/context/auto-status.md`, and `.lab/context/auto-outcome.md`, `/lab auto` should stop instead of acting like the loop is armed.
|
|
90
|
+
- If the preflight block cannot be completed from `.lab/context/eval-protocol.md`, `.lab/context/auto-mode.md`, `.lab/context/auto-status.md`, `.lab/context/auto-ledger.md`, and `.lab/context/auto-outcome.md`, `/lab auto` should stop instead of acting like the loop is armed.
|
|
91
|
+
- While the loop is alive, `/lab auto` should keep `.lab/context/auto-ledger.md` updated with the active owner, observed state, and resume boundary.
|
|
76
92
|
|
|
77
93
|
- Treat `Autonomy level L1/L2/L3` as the execution privilege level, not as a paper layer, phase, or table number.
|
|
78
94
|
- Treat `paper layer`, `phase`, and `table` as experiment targets. For example, `paper layer 3` or `Phase 1` should not be interpreted as `Autonomy level L3`.
|
|
@@ -6,7 +6,7 @@ argument-hint: autonomous campaign target
|
|
|
6
6
|
Use the installed `lab` skill at `.codex/skills/lab/SKILL.md`.
|
|
7
7
|
|
|
8
8
|
Execute the requested `/lab:auto` stage against the user's argument now. Do not only recommend another lab stage. If a blocking prerequisite is missing, say exactly what is missing and ask at most one clarifying question.
|
|
9
|
-
This command runs the `/lab:auto` stage. It must read `.lab/context/eval-protocol.md`, `.lab/context/auto-mode.md`, `.lab/context/auto-status.md`, and `.lab/context/auto-outcome.md`, enforce the declared terminal goal schema, make the primary gate, secondary guard, promotion condition, stop reason, and escalation reason explicit, orchestrate approved run, iterate, review, and report stages inside that contract, poll long-running work until completion or stop conditions, and write
|
|
9
|
+
This command runs the `/lab:auto` stage. It must read `.lab/context/eval-protocol.md`, `.lab/context/auto-mode.md`, `.lab/context/auto-status.md`, `.lab/context/auto-ledger.md`, and `.lab/context/auto-outcome.md`, enforce the declared terminal goal schema, make the primary gate, secondary guard, promotion condition, stop reason, and escalation reason explicit, orchestrate approved run, iterate, review, and report stages inside that contract, poll long-running work until completion or stop conditions, and write live owner state plus progress and the final outcome back into `.lab/context/auto-status.md`, `.lab/context/auto-ledger.md`, and `.lab/context/auto-outcome.md`.
|
|
10
10
|
The first visible block must be `Auto preflight`. That first visible block must list the files read and echo `Autonomy level`, `Approval status`, `Allowed stages`, `Terminal goal`, `Primary gate`, and `Secondary guard` before any execution summary or action plan.
|
|
11
11
|
If the preflight block cannot be completed because any required field is missing, stale, or inconsistent, stop before execution and say exactly which field blocked arming the loop.
|
|
12
12
|
When the repository workflow language is Chinese, summaries, checklist items, task labels, and progress updates should be written in Chinese unless a literal identifier must stay unchanged.
|
|
@@ -7,7 +7,22 @@ argument-hint: workflow question or stage choice
|
|
|
7
7
|
|
|
8
8
|
`/lab` is a strict research workflow command family. Use the same repository artifacts and stage boundaries every time.
|
|
9
9
|
|
|
10
|
-
##
|
|
10
|
+
## Accepted Stage Spellings
|
|
11
|
+
|
|
12
|
+
Treat all of these as equivalent stage requests:
|
|
13
|
+
|
|
14
|
+
- `/lab:idea ...`, `/lab: idea ...`, `/lab idea ...`, `/lab-idea`, `/lab:idea ...`
|
|
15
|
+
- `/lab:data ...`, `/lab: data ...`, `/lab data ...`, `/lab-data`, `/lab:data ...`
|
|
16
|
+
- `/lab:auto ...`, `/lab: auto ...`, `/lab auto ...`, `/lab-auto`, `/lab:auto ...`
|
|
17
|
+
- `/lab:framing ...`, `/lab: framing ...`, `/lab framing ...`, `/lab-framing`, `/lab:framing ...`
|
|
18
|
+
- `/lab:spec ...`, `/lab: spec ...`, `/lab spec ...`, `/lab-spec`, `/lab:spec ...`
|
|
19
|
+
- `/lab:run ...`, `/lab: run ...`, `/lab run ...`, `/lab-run`, `/lab:run ...`
|
|
20
|
+
- `/lab:iterate ...`, `/lab: iterate ...`, `/lab iterate ...`, `/lab-iterate`, `/lab:iterate ...`
|
|
21
|
+
- `/lab:review ...`, `/lab: review ...`, `/lab review ...`, `/lab-review`, `/lab:review ...`
|
|
22
|
+
- `/lab:report ...`, `/lab: report ...`, `/lab report ...`, `/lab-report`, `/lab:report ...`
|
|
23
|
+
- `/lab:write ...`, `/lab: write ...`, `/lab write ...`, `/lab-write`, `/lab:write ...`
|
|
24
|
+
|
|
25
|
+
## Stage Aliases
|
|
11
26
|
|
|
12
27
|
- `/lab:idea`
|
|
13
28
|
Research the idea through two brainstorm passes and two literature sweeps, define the problem and failure case, compare against closest prior work, then end with a source-backed recommendation and an explicit approval gate before any implementation.
|
|
@@ -44,7 +59,7 @@ argument-hint: workflow question or stage choice
|
|
|
44
59
|
|
|
45
60
|
- Always use `skills/lab/SKILL.md` as the workflow contract.
|
|
46
61
|
- Use `skills/lab/references/recipes.md` as the quick path for common stage chains; do not invent new slash commands or aliases from it.
|
|
47
|
-
- When the user explicitly invokes `/lab:<stage>`, execute that stage now against the provided argument instead of only recommending another `/lab` stage.
|
|
62
|
+
- When the user explicitly invokes a stage through `/lab:<stage>`, `/lab: <stage>`, `/lab <stage>`, `/lab-<stage>`, or `/lab:<stage>`, execute that stage now against the provided argument instead of only recommending another `/lab` stage.
|
|
48
63
|
- Start by giving the user a concise stage summary. Materialize managed artifacts immediately when the stage contract requires them, then report the output path and next step.
|
|
49
64
|
- When ambiguity matters, ask one clarifying question at a time; when multiple paths are viable, present 2-3 approaches before converging.
|
|
50
65
|
- `/lab:spec` is not complete until the approved change is frozen under `.lab/changes/<change-id>/`.
|
|
@@ -66,7 +81,8 @@ argument-hint: workflow question or stage choice
|
|
|
66
81
|
- If the request omits the level or mixes it with a paper layer, phase, or table target, `/lab:auto` should stop and ask for an explicit autonomy level before arming the loop.
|
|
67
82
|
- The first visible output of a real `/lab:auto` run must be `Auto preflight`.
|
|
68
83
|
- That first visible output must show files read plus `Autonomy level`, `Allowed stages`, `Terminal goal`, `Primary gate`, and `Secondary guard`.
|
|
69
|
-
- If the preflight block cannot be completed from `.lab/context/eval-protocol.md`, `.lab/context/auto-mode.md`, `.lab/context/auto-status.md`, and `.lab/context/auto-outcome.md`, `/lab:auto` should stop instead of acting like the loop is armed.
|
|
84
|
+
- If the preflight block cannot be completed from `.lab/context/eval-protocol.md`, `.lab/context/auto-mode.md`, `.lab/context/auto-status.md`, `.lab/context/auto-ledger.md`, and `.lab/context/auto-outcome.md`, `/lab:auto` should stop instead of acting like the loop is armed.
|
|
85
|
+
- While the loop is alive, `/lab:auto` should keep `.lab/context/auto-ledger.md` updated with the active owner, observed state, and resume boundary.
|
|
70
86
|
|
|
71
87
|
- Treat `Autonomy level L1/L2/L3` as the execution privilege level, not as a paper layer, phase, or table number.
|
|
72
88
|
- Treat `paper layer`, `phase`, and `table` as experiment targets. For example, `paper layer 3` or `Phase 1` should not be interpreted as `Autonomy level L3`.
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# Auto Runtime Ledger
|
|
2
|
+
|
|
3
|
+
## Campaign
|
|
4
|
+
|
|
5
|
+
- Campaign id:
|
|
6
|
+
- Objective:
|
|
7
|
+
- Active stage:
|
|
8
|
+
- Active rung:
|
|
9
|
+
|
|
10
|
+
## Owner
|
|
11
|
+
|
|
12
|
+
- Owner type:
|
|
13
|
+
- Owner id:
|
|
14
|
+
- Command:
|
|
15
|
+
- Watch target:
|
|
16
|
+
- Started at:
|
|
17
|
+
- Last observed at:
|
|
18
|
+
- Observed state:
|
|
19
|
+
|
|
20
|
+
## Checkpoints
|
|
21
|
+
|
|
22
|
+
- Last checkpoint:
|
|
23
|
+
- Checkpoint summary:
|
|
24
|
+
- Next transition:
|
|
25
|
+
|
|
26
|
+
## Boundaries
|
|
27
|
+
|
|
28
|
+
- Continue boundary:
|
|
29
|
+
- Stop boundary:
|
|
30
|
+
- Escalation boundary:
|
|
31
|
+
|
|
32
|
+
## Resume
|
|
33
|
+
|
|
34
|
+
- Required read set:
|
|
35
|
+
- Resume command:
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
Use this file to define the bounded autonomous execution envelope for `/lab:auto`.
|
|
4
4
|
Pair it with `.lab/context/eval-protocol.md`, which defines the paper-facing metrics, tables, gates, and benchmark ladder that auto mode should optimize against.
|
|
5
5
|
If `eval-protocol.md` declares structured rung entries, auto mode follows those rung transitions first and uses the stage commands here as per-stage fallbacks.
|
|
6
|
+
Use `.lab/context/auto-ledger.md` as the live runtime ledger for ownership, checkpoints, resume, and stop boundaries.
|
|
6
7
|
|
|
7
8
|
## Objective
|
|
8
9
|
|
|
@@ -42,6 +43,7 @@ If `eval-protocol.md` declares structured rung entries, auto mode follows those
|
|
|
42
43
|
- Rung `Command` should be the real long-running command that owns the experiment result.
|
|
43
44
|
- A short watcher is only a progress probe. Do not use a short watcher as the stage or rung command when the real experiment is still running.
|
|
44
45
|
- While the real experiment process is still alive, only record a progress update and keep waiting.
|
|
46
|
+
- Record the active owner, command, and watch target in `.lab/context/auto-ledger.md` while the loop is alive.
|
|
45
47
|
- Run command:
|
|
46
48
|
- Iterate command:
|
|
47
49
|
- Review command:
|
|
@@ -108,12 +108,13 @@ Use this skill when the user invokes `/lab:*` or asks for the structured researc
|
|
|
108
108
|
### `/lab:auto`
|
|
109
109
|
|
|
110
110
|
- Use this stage to orchestrate approved execution stages with bounded autonomy.
|
|
111
|
-
- Read `.lab/config/workflow.json`, `.lab/context/mission.md`, `.lab/context/state.md`, `.lab/context/workflow-state.md`, `.lab/context/decisions.md`, `.lab/context/data-decisions.md`, `.lab/context/evidence-index.md`, `.lab/context/terminology-lock.md`, `.lab/context/auto-mode.md`, and `.lab/context/auto-
|
|
112
|
-
- Treat `.lab/context/auto-mode.md` as the control contract
|
|
111
|
+
- Read `.lab/config/workflow.json`, `.lab/context/mission.md`, `.lab/context/state.md`, `.lab/context/workflow-state.md`, `.lab/context/decisions.md`, `.lab/context/data-decisions.md`, `.lab/context/evidence-index.md`, `.lab/context/terminology-lock.md`, `.lab/context/auto-mode.md`, `.lab/context/auto-status.md`, and `.lab/context/auto-ledger.md` before acting.
|
|
112
|
+
- Treat `.lab/context/auto-mode.md` as the control contract, `.lab/context/auto-status.md` as the live summary, and `.lab/context/auto-ledger.md` as the runtime ledger.
|
|
113
113
|
- Require `.lab/context/auto-mode.md` to expose `Primary gate`, `Secondary guard`, `Promotion condition`, `Stop reason`, and `Escalation reason` before execution.
|
|
114
114
|
- Require `Autonomy level` and `Approval status` in `.lab/context/auto-mode.md` before execution.
|
|
115
115
|
- Start every `/lab:auto` run with a visible `Auto preflight` summary that reports files read plus `Autonomy level`, `Approval status`, `Allowed stages`, `Terminal goal`, `Primary gate`, and `Secondary guard`.
|
|
116
116
|
- If any required preflight field is missing or inconsistent, stop before any loop action. Do not present a fake auto summary as if the loop were armed.
|
|
117
|
+
- Keep `.lab/context/auto-ledger.md` updated with the active owner, observed state, and resume boundary while the loop is live.
|
|
117
118
|
- Treat `L1` as safe-run validation, `L2` as bounded iteration, and `L3` as aggressive campaign mode.
|
|
118
119
|
- Surface the level guide every time `/lab:auto` starts, and make the detailed guide mandatory when the user omits the level or mixes it with a paper layer, phase, or table target.
|
|
119
120
|
- Reuse `/lab:run`, `/lab:iterate`, `/lab:review`, `/lab:report`, and optional `/lab:write` instead of inventing a second workflow.
|
|
@@ -22,6 +22,7 @@
|
|
|
22
22
|
- `.lab/context/terminology-lock.md`
|
|
23
23
|
- `.lab/context/auto-mode.md`
|
|
24
24
|
- `.lab/context/auto-status.md`
|
|
25
|
+
- `.lab/context/auto-ledger.md`
|
|
25
26
|
- `.lab/context/auto-outcome.md`
|
|
26
27
|
|
|
27
28
|
## Context Write Set
|
|
@@ -36,6 +37,7 @@
|
|
|
36
37
|
- `.lab/context/summary.md`
|
|
37
38
|
- `.lab/context/session-brief.md`
|
|
38
39
|
- `.lab/context/auto-status.md`
|
|
40
|
+
- `.lab/context/auto-ledger.md`
|
|
39
41
|
- `.lab/context/auto-outcome.md`
|
|
40
42
|
|
|
41
43
|
## Boundary Rules
|
|
@@ -48,6 +50,7 @@
|
|
|
48
50
|
- Treat `Sanity and Alternative-Explanation Checks` as the anomaly gate for automation. When a rung yields all-null outputs, suspiciously identical runs, no-op deltas, or impl/result mismatches, pause promotion logic until implementation reality checks, alternative explanations, and at least one cross-check are recorded.
|
|
49
51
|
- Treat paper-template selection as an explicit write-time gate, not as a silent fallback, when the loop is about to create `.tex` deliverables for the first time.
|
|
50
52
|
- Treat `.lab/context/auto-mode.md` as a visible control plane. The contract should make the primary gate, secondary guard, promotion condition, stop reason, and escalation reason explicit before execution starts.
|
|
53
|
+
- Treat `.lab/context/auto-ledger.md` as the live runtime ledger for owner identity, observed state, checkpoint progress, continue boundary, stop boundary, escalation boundary, and resume read set.
|
|
51
54
|
- The contract must declare `Autonomy level` and `Approval status`, and execution starts only when approval is explicitly set to `approved`.
|
|
52
55
|
- The contract must also declare a concrete terminal goal:
|
|
53
56
|
- `rounds`
|
|
@@ -72,6 +75,11 @@
|
|
|
72
75
|
- Keep a poll-based waiting loop instead of sleeping blindly.
|
|
73
76
|
- Do not treat a short watcher such as `sleep 30`, a one-shot `pgrep`, or a single `metrics.json` probe as the rung command when the real experiment is still running.
|
|
74
77
|
- Bind each rung to the real long-running command or process that owns the experiment result.
|
|
78
|
+
- Record the active owner as one of:
|
|
79
|
+
- `local-process`
|
|
80
|
+
- `local-runner`
|
|
81
|
+
- `remote-runner`
|
|
82
|
+
- Every nonterminal `/lab:auto` state must remain resumable from `.lab/context/auto-ledger.md` plus `.lab/context/auto-status.md`.
|
|
75
83
|
- Start every real `/lab:auto` run with a visible `Auto preflight` block before any execution summary or action plan. That first visible output should list:
|
|
76
84
|
- files read
|
|
77
85
|
- `Autonomy level`
|
|
@@ -82,6 +90,7 @@
|
|
|
82
90
|
- `Secondary guard`
|
|
83
91
|
- If any of those preflight fields are missing, stale, or inconsistent, stop before execution and report the blocking field directly.
|
|
84
92
|
- Always write a canonical `.lab/context/auto-outcome.md` when the run completes, stops, or fails.
|
|
93
|
+
- Always keep `.lab/context/auto-ledger.md` in sync with the current active owner while the loop is live.
|
|
85
94
|
- Keep handoff wording stable across auto outcomes and downstream report or write handoffs: record completed work, frozen scope, allowed next action, required read set for the next owner, and the accept or revise or reject boundary.
|
|
86
95
|
- When the evaluation protocol declares structured ladder rungs, execute them as a foreground rung state machine:
|
|
87
96
|
- each rung must declare `Stage`, `Goal`, `Command`, `Watch`, `Gate`, `On pass`, `On fail`, and `On stop`
|