npm - superlab - Versions diffs - 0.1.41 → 0.1.43 - Mend

superlab 0.1.41 → 0.1.43

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/lib/auto_runner.cjs +264 -8
package/lib/auto_state.cjs +114 -0
package/lib/i18n.cjs +43 -4
package/lib/install.cjs +1 -0
package/package-assets/claude/commands/lab-auto.md +3 -1
package/package-assets/claude/commands/lab.md +4 -0
package/package-assets/codex/prompts/lab-auto.md +3 -1
package/package-assets/codex/prompts/lab.md +4 -0
package/package-assets/shared/lab/context/auto-ledger.md +35 -0
package/package-assets/shared/lab/context/auto-mode.md +2 -0
package/package-assets/shared/skills/lab/SKILL.md +5 -2
package/package-assets/shared/skills/lab/stages/auto.md +30 -9
package/package.json +1 -1

package/lib/auto_runner.cjs CHANGED Viewed

@@ -21,10 +21,12 @@ const {
   verifyStageContract,
 } = require("./auto_contracts.cjs");
 const {
+  parseAutoLedger,
   parseAutoMode,
   parseAutoStatus,
   readWorkflowLanguage,
   resolveRequiredArtifact,
+  writeAutoLedger,
   writeAutoOutcome,
   writeAutoStatus,
 } = require("./auto_state.cjs");
@@ -33,6 +35,10 @@ function normalizeTransition(value) {
   return (value || "").trim();
 }
+function normalizeObservedState(value) {
+  return (value || "").trim().toLowerCase();
+}
 function isSuccessTransition(value) {
   return ["success", "terminal-success", "campaign-success"].includes((value || "").trim().toLowerCase());
 }
@@ -41,6 +47,101 @@ function isStopTransition(value) {
   return ["stop", "campaign-stop", "terminal-stop"].includes((value || "").trim().toLowerCase());
 }
+function isLocalProcessAlive(ownerId) {
+  const pid = parseInteger(ownerId, null);
+  if (!Number.isInteger(pid) || pid <= 0) {
+    return false;
+  }
+  try {
+    process.kill(pid, 0);
+    return true;
+  } catch (error) {
+    if (error && error.code === "EPERM") {
+      return true;
+    }
+    return false;
+  }
+}
+function resolveResumePlan({ mode, evalProtocol, status, ledger, now }) {
+  const hasLedgerState = [
+    ledger.campaignId,
+    ledger.observedState,
+    ledger.activeRung,
+    ledger.nextTransition,
+    ledger.ownerId,
+  ].some((value) => isMeaningful(value));
+  if (!hasLedgerState) {
+    return { blockingIssue: "", resumePlan: null };
+  }
+  if ((ledger.ownerType || "").trim().toLowerCase() === "local-process" && isLocalProcessAlive(ledger.ownerId)) {
+    return {
+      blockingIssue: `auto campaign already has a live local owner: ${ledger.ownerId}`,
+      resumePlan: null,
+    };
+  }
+  const startedAt = isMeaningful(status.startedAt) ? status.startedAt : now.toISOString();
+  const campaignId = isMeaningful(ledger.campaignId)
+    ? ledger.campaignId
+    : `auto-${startedAt.replace(/[:.]/g, "-")}`;
+  const iterationCount = parseInteger(status.iterationCount, 0);
+  const observedState = normalizeObservedState(ledger.observedState);
+  if (evalProtocol.experimentRungs.length > 0) {
+    const rungMap = new Map(evalProtocol.experimentRungs.map((rung) => [rung.id, rung]));
+    const nextTransition = normalizeTransition(ledger.nextTransition || status.nextRung);
+    if (
+      isMeaningful(nextTransition) &&
+      !isSuccessTransition(nextTransition) &&
+      !isStopTransition(nextTransition) &&
+      rungMap.has(nextTransition)
+    ) {
+      const rung = rungMap.get(nextTransition);
+      return {
+        blockingIssue: "",
+        resumePlan: {
+          kind: "ladder",
+          rungId: nextTransition,
+          stage: rung.stage,
+          watchTarget: rung.watch,
+          campaignId,
+          startedAt,
+          iterationsCompleted: iterationCount,
+          lastCheckpoint: ledger.lastCheckpoint || status.lastCheckpoint || "",
+          reason: `resuming at next rung ${nextTransition}`,
+        },
+      };
+    }
+    const activeRung = normalizeTransition(ledger.activeRung || status.currentRung);
+    if (
+      ["running", "retrying"].includes(observedState) &&
+      isMeaningful(activeRung) &&
+      rungMap.has(activeRung)
+    ) {
+      const rung = rungMap.get(activeRung);
+      return {
+        blockingIssue: "",
+        resumePlan: {
+          kind: "ladder",
+          rungId: activeRung,
+          stage: rung.stage,
+          watchTarget: rung.watch,
+          campaignId,
+          startedAt,
+          iterationsCompleted: Math.max(0, iterationCount - 1),
+          lastCheckpoint: ledger.lastCheckpoint || status.lastCheckpoint || "",
+          reason: `restarting active rung ${activeRung} after owner exit`,
+        },
+      };
+    }
+  }
+  return { blockingIssue: "", resumePlan: null };
+}
 async function runCommandWithPolling({
   targetDir,
   stage,
@@ -53,6 +154,8 @@ async function runCommandWithPolling({
   rungId = "",
   watchTarget = "",
   nextRung = "",
+  ownerInfo = null,
+  updateLedger = null,
 }) {
   const child = spawn(command, {
     cwd: targetDir,
@@ -106,6 +209,20 @@ async function runCommandWithPolling({
       },
       { lang }
     );
+    if (typeof updateLedger === "function") {
+      updateLedger({
+        ownerType: ownerInfo?.ownerType || "local-process",
+        ownerId: String(child.pid || ownerInfo?.ownerId || ""),
+        command,
+        watchTarget,
+        activeStage: stage,
+        activeRung: rungId,
+        startedAt,
+        lastObservedAt: new Date().toISOString(),
+        observedState: "running",
+        nextTransition: nextRung || "",
+      });
+    }
     await sleep(pollIntervalMs);
   }
@@ -217,6 +334,8 @@ async function evaluateTerminalGoal({ mode, iteration, targetDir, deadlineMs })
 async function startAutoMode({ targetDir, now = new Date() }) {
   const mode = parseAutoMode(targetDir);
+  const existingStatus = parseAutoStatus(targetDir);
+  const existingLedger = parseAutoLedger(targetDir);
   const evalProtocol = parseEvalProtocol(targetDir);
   const issues = validateAutoMode(mode, null, evalProtocol);
   if (issues.length > 0) {
@@ -229,20 +348,30 @@ async function startAutoMode({ targetDir, now = new Date() }) {
   if (mode.approvalStatus !== "approved") {
     throw new Error(`approval status must be approved before auto mode can start (current: ${mode.approvalStatus || "missing"})`);
   }
+  const { blockingIssue, resumePlan } = resolveResumePlan({
+    mode,
+    evalProtocol,
+    status: existingStatus,
+    ledger: existingLedger,
+    now,
+  });
+  if (blockingIssue) {
+    throw new Error(blockingIssue);
+  }
   const lang = readWorkflowLanguage(targetDir);
   const timestamp = now.toISOString();
   const status = {
     status: "running",
-    currentStage: mode.allowedStages[0] || "run",
+    currentStage: resumePlan?.stage || mode.allowedStages[0] || "run",
     currentCommand: "",
     activeRunId: "",
-    iterationCount: "0",
-    startedAt: timestamp,
+    iterationCount: String(resumePlan?.iterationsCompleted || 0),
+    startedAt: resumePlan?.startedAt || timestamp,
     lastHeartbeat: timestamp,
-    lastCheckpoint: "",
-    lastSummary: "",
-    decision: "armed for bounded auto orchestration",
+    lastCheckpoint: resumePlan?.lastCheckpoint || "",
+    lastSummary: resumePlan?.reason || "",
+    decision: resumePlan?.reason || "armed for bounded auto orchestration",
   };
   writeAutoStatus(targetDir, status, { lang });
@@ -257,13 +386,36 @@ async function startAutoMode({ targetDir, now = new Date() }) {
   const { loopStages, finalStages } = splitAutoStages(mode.allowedStages);
   const executedStages = [];
   let failureCount = 0;
-  let iterationsCompleted = 0;
+  let iterationsCompleted = resumePlan?.iterationsCompleted || 0;
   let currentStatus = { ...status };
   let successReached = false;
   let stopMatched = false;
   let promotionApplied = false;
   let stopReason = "";
   let finalRung = "";
+  const campaignId = resumePlan?.campaignId || `auto-${startedAt.replace(/[:.]/g, "-")}`;
+  let currentLedger = {
+    campaignId,
+    objective: mode.objective,
+    activeStage: status.currentStage,
+    activeRung: resumePlan?.rungId || "",
+    ownerType: "",
+    ownerId: "",
+    command: "",
+    watchTarget: resumePlan?.watchTarget || "",
+    startedAt,
+    lastObservedAt: timestamp,
+    observedState: resumePlan ? "resuming" : "armed",
+    lastCheckpoint: resumePlan?.lastCheckpoint || "",
+    checkpointSummary: resumePlan?.reason || "auto loop armed and waiting for the first owned command",
+    nextTransition: resumePlan?.rungId || "",
+    continueBoundary: "Continue while the active owner is still running and no stop condition has matched.",
+    stopBoundary: mode.stopConditions,
+    escalationBoundary: mode.escalationConditions,
+    requiredReadSet: ".lab/context/eval-protocol.md, .lab/context/auto-mode.md, .lab/context/auto-status.md, .lab/context/auto-ledger.md, .lab/context/auto-outcome.md",
+    resumeCommand: "",
+  };
+  writeAutoLedger(targetDir, currentLedger, { lang });
   const outcomeProtocolFields = {
     primaryMetrics: evalProtocol.primaryMetrics,
     secondaryMetrics: evalProtocol.secondaryMetrics,
@@ -305,6 +457,22 @@ async function startAutoMode({ targetDir, now = new Date() }) {
     writeAutoStatus(targetDir, currentStatus, { lang });
   };
+  const writeLedger = (overrides = {}) => {
+    currentLedger = {
+      ...currentLedger,
+      activeStage: currentStatus.currentStage || currentLedger.activeStage,
+      activeRung: currentStatus.currentRung || currentLedger.activeRung,
+      watchTarget: currentStatus.watchTarget || currentLedger.watchTarget,
+      lastCheckpoint: currentStatus.lastCheckpoint || currentLedger.lastCheckpoint,
+      checkpointSummary: currentStatus.lastSummary || currentLedger.checkpointSummary,
+      lastObservedAt: new Date().toISOString(),
+      stopBoundary: mode.stopConditions,
+      escalationBoundary: mode.escalationConditions,
+      ...overrides,
+    };
+    writeAutoLedger(targetDir, currentLedger, { lang });
+  };
   const failAutoMode = (message) => {
     currentStatus = {
       ...currentStatus,
@@ -313,6 +481,13 @@ async function startAutoMode({ targetDir, now = new Date() }) {
       decision: message,
     };
     writeAutoStatus(targetDir, currentStatus, { lang });
+    writeLedger({
+      observedState: "failed",
+      ownerType: currentLedger.ownerType || "local-process",
+      checkpointSummary: message,
+      nextTransition: "terminal-failure",
+      resumeCommand: "",
+    });
     writeAutoOutcome(
       targetDir,
       {
@@ -366,6 +541,8 @@ async function startAutoMode({ targetDir, now = new Date() }) {
           rungId,
           watchTarget,
           nextRung,
+          ownerInfo: { ownerType: "local-process" },
+          updateLedger: writeLedger,
         });
         verifyStageContract({ stage, snapshot: contract.snapshot });
         executedStages.push(stage);
@@ -378,6 +555,18 @@ async function startAutoMode({ targetDir, now = new Date() }) {
           nextRung,
           decision: rungId ? `completed rung ${rungId}` : `completed stage ${stage}`,
         });
+        writeLedger({
+          ownerType: "local-process",
+          observedState: "checkpointed",
+          command,
+          watchTarget,
+          activeStage: stage,
+          activeRung: rungId || currentStatus.currentRung,
+          ownerId: currentLedger.ownerId,
+          checkpointSummary: rungId ? `completed rung ${rungId}` : `completed stage ${stage}`,
+          nextTransition: nextRung || "",
+          resumeCommand: command,
+        });
         const frozenCoreChanges = detectFrozenCoreChanges(frozenCoreSnapshot);
         if (frozenCoreChanges.length > 0) {
           failAutoMode(`frozen core changed: ${frozenCoreChanges.join(", ")}`);
@@ -412,6 +601,17 @@ async function startAutoMode({ targetDir, now = new Date() }) {
           nextRung,
           decision: `retrying ${rungId || stage} after failure ${failureCount}`,
         });
+        writeLedger({
+          ownerType: "local-process",
+          observedState: "retrying",
+          command,
+          watchTarget,
+          activeStage: stage,
+          activeRung: rungId || currentStatus.currentRung,
+          checkpointSummary: `retrying ${rungId || stage} after failure ${failureCount}`,
+          nextTransition: rungId || stage,
+          resumeCommand: command,
+        });
       }
     }
   };
@@ -451,6 +651,14 @@ async function startAutoMode({ targetDir, now = new Date() }) {
       currentCommand: mode.promotionCommand,
       decision: `promotion policy matched after ${label}`,
     });
+    writeLedger({
+      ownerType: "local-process",
+      command: mode.promotionCommand,
+      observedState: "checkpointed",
+      checkpointSummary: `promotion policy matched after ${label}`,
+      nextTransition: "post-promotion refresh",
+      resumeCommand: mode.promotionCommand,
+    });
     promotionApplied = true;
     const frozenCoreChangesAfterPromotion = detectFrozenCoreChanges(frozenCoreSnapshot);
     if (frozenCoreChangesAfterPromotion.length > 0) {
@@ -463,7 +671,12 @@ async function startAutoMode({ targetDir, now = new Date() }) {
   if (evalProtocol.experimentRungs.length > 0) {
     const rungMap = new Map(evalProtocol.experimentRungs.map((rung) => [rung.id, rung]));
-    let currentRung = evalProtocol.experimentRungs[0];
+    let currentRung = resumePlan?.kind === "ladder"
+      ? rungMap.get(resumePlan.rungId)
+      : evalProtocol.experimentRungs[0];
+    if (!currentRung) {
+      failAutoMode(`resume rung is missing from the current experiment ladder: ${resumePlan?.rungId || ""}`);
+    }
     while (currentRung && iterationsCompleted < Math.max(1, maxIterations)) {
       if (!mode.allowedStages.includes(currentRung.stage)) {
@@ -618,6 +831,12 @@ async function startAutoMode({ targetDir, now = new Date() }) {
       decision: stopReason || "stopped by stop condition",
     };
     writeAutoStatus(targetDir, currentStatus, { lang });
+    writeLedger({
+      observedState: "stopped",
+      checkpointSummary: stopReason || "stopped by stop condition",
+      nextTransition: "terminal-stop",
+      resumeCommand: "",
+    });
     writeAutoOutcome(
       targetDir,
       {
@@ -670,6 +889,12 @@ async function startAutoMode({ targetDir, now = new Date() }) {
           decision: stopReason || "stopped by stop condition",
         };
         writeAutoStatus(targetDir, currentStatus, { lang });
+        writeLedger({
+          observedState: "stopped",
+          checkpointSummary: stopReason || "stopped by stop condition",
+          nextTransition: "terminal-stop",
+          resumeCommand: "",
+        });
         writeAutoOutcome(
           targetDir,
           {
@@ -724,6 +949,12 @@ async function startAutoMode({ targetDir, now = new Date() }) {
     decision: successReached ? "completed configured auto goal" : "completed configured stages",
   };
   writeAutoStatus(targetDir, currentStatus, { lang });
+  writeLedger({
+    observedState: "completed",
+    checkpointSummary: successReached ? "completed configured auto goal" : "completed configured stages",
+    nextTransition: "terminal-success",
+    resumeCommand: "",
+  });
   writeAutoOutcome(
     targetDir,
     {
@@ -805,6 +1036,31 @@ function stopAutoMode({ targetDir, now = new Date() }) {
     decision: "stopped by operator",
   };
   writeAutoStatus(targetDir, status, { lang });
+  writeAutoLedger(
+    targetDir,
+    {
+      campaignId: existing.startedAt ? `auto-${existing.startedAt.replace(/[:.]/g, "-")}` : `auto-${now.toISOString().replace(/[:.]/g, "-")}`,
+      objective: mode.objective,
+      activeStage: existing.currentStage || "",
+      activeRung: existing.currentRung || "",
+      ownerType: "local-process",
+      ownerId: "",
+      command: existing.currentCommand || "",
+      watchTarget: existing.watchTarget || "",
+      startedAt: existing.startedAt || now.toISOString(),
+      lastObservedAt: now.toISOString(),
+      observedState: "stopped",
+      lastCheckpoint: existing.lastCheckpoint || "",
+      checkpointSummary: "stopped by operator",
+      nextTransition: "terminal-stop",
+      continueBoundary: "No further automatic progress is allowed until a new approved auto run starts.",
+      stopBoundary: mode.stopConditions,
+      escalationBoundary: mode.escalationConditions,
+      requiredReadSet: ".lab/context/eval-protocol.md, .lab/context/auto-mode.md, .lab/context/auto-status.md, .lab/context/auto-ledger.md, .lab/context/auto-outcome.md",
+      resumeCommand: "",
+    },
+    { lang }
+  );
   writeAutoOutcome(
     targetDir,
     {

package/lib/auto_state.cjs CHANGED Viewed

@@ -68,6 +68,33 @@ function parseAutoStatus(targetDir) {
   };
 }
+function parseAutoLedger(targetDir) {
+  const text = readFileIfExists(contextFile(targetDir, "auto-ledger.md"));
+  return {
+    path: contextFile(targetDir, "auto-ledger.md"),
+    text,
+    campaignId: extractValue(text, ["Campaign id", "Campaign ID", "活动 id"]),
+    objective: extractValue(text, ["Objective", "目标"]),
+    activeStage: extractValue(text, ["Active stage", "当前阶段"]),
+    activeRung: extractValue(text, ["Active rung", "当前 rung"]),
+    ownerType: extractValue(text, ["Owner type", "Owner 类型"]),
+    ownerId: extractValue(text, ["Owner id", "Owner ID"]),
+    command: extractValue(text, ["Command", "命令"]),
+    watchTarget: extractValue(text, ["Watch target", "监视目标"]),
+    startedAt: extractValue(text, ["Started at", "开始时间"]),
+    lastObservedAt: extractValue(text, ["Last observed at", "最近观察时间"]),
+    observedState: extractValue(text, ["Observed state", "观察状态"]),
+    lastCheckpoint: extractValue(text, ["Last checkpoint", "最近 checkpoint"]),
+    checkpointSummary: extractValue(text, ["Checkpoint summary", "Checkpoint 摘要"]),
+    nextTransition: extractValue(text, ["Next transition", "下一转换"]),
+    continueBoundary: extractValue(text, ["Continue boundary", "继续边界"]),
+    stopBoundary: extractValue(text, ["Stop boundary", "停止边界"]),
+    escalationBoundary: extractValue(text, ["Escalation boundary", "升级边界"]),
+    requiredReadSet: extractValue(text, ["Required read set", "必要读取集合"]),
+    resumeCommand: extractValue(text, ["Resume command", "恢复命令"]),
+  };
+}
 function renderAutoStatus(status, { lang = "en" } = {}) {
   if (lang === "zh") {
     return `# 自动模式状态
@@ -240,12 +267,96 @@ function renderAutoOutcome(outcome, { lang = "en" } = {}) {
 `;
 }
+function renderAutoLedger(ledger, { lang = "en" } = {}) {
+  if (lang === "zh") {
+    return `# 自动运行账本
+## Campaign
+- Campaign id: ${ledger.campaignId || ""}
+- Objective: ${ledger.objective || ""}
+- Active stage: ${ledger.activeStage || ""}
+- Active rung: ${ledger.activeRung || ""}
+## Owner
+- Owner type: ${ledger.ownerType || ""}
+- Owner id: ${ledger.ownerId || ""}
+- Command: ${ledger.command || ""}
+- Watch target: ${ledger.watchTarget || ""}
+- Started at: ${ledger.startedAt || ""}
+- Last observed at: ${ledger.lastObservedAt || ""}
+- Observed state: ${ledger.observedState || ""}
+## Checkpoints
+- Last checkpoint: ${ledger.lastCheckpoint || ""}
+- Checkpoint summary: ${ledger.checkpointSummary || ""}
+- Next transition: ${ledger.nextTransition || ""}
+## Boundaries
+- Continue boundary: ${ledger.continueBoundary || ""}
+- Stop boundary: ${ledger.stopBoundary || ""}
+- Escalation boundary: ${ledger.escalationBoundary || ""}
+## Resume
+- Required read set: ${ledger.requiredReadSet || ""}
+- Resume command: ${ledger.resumeCommand || ""}
+`;
+  }
+  return `# Auto Runtime Ledger
+## Campaign
+- Campaign id: ${ledger.campaignId || ""}
+- Objective: ${ledger.objective || ""}
+- Active stage: ${ledger.activeStage || ""}
+- Active rung: ${ledger.activeRung || ""}
+## Owner
+- Owner type: ${ledger.ownerType || ""}
+- Owner id: ${ledger.ownerId || ""}
+- Command: ${ledger.command || ""}
+- Watch target: ${ledger.watchTarget || ""}
+- Started at: ${ledger.startedAt || ""}
+- Last observed at: ${ledger.lastObservedAt || ""}
+- Observed state: ${ledger.observedState || ""}
+## Checkpoints
+- Last checkpoint: ${ledger.lastCheckpoint || ""}
+- Checkpoint summary: ${ledger.checkpointSummary || ""}
+- Next transition: ${ledger.nextTransition || ""}
+## Boundaries
+- Continue boundary: ${ledger.continueBoundary || ""}
+- Stop boundary: ${ledger.stopBoundary || ""}
+- Escalation boundary: ${ledger.escalationBoundary || ""}
+## Resume
+- Required read set: ${ledger.requiredReadSet || ""}
+- Resume command: ${ledger.resumeCommand || ""}
+`;
+}
 function writeAutoOutcome(targetDir, outcome, { lang = "en" } = {}) {
   const filePath = contextFile(targetDir, "auto-outcome.md");
   fs.mkdirSync(path.dirname(filePath), { recursive: true });
   fs.writeFileSync(filePath, renderAutoOutcome(outcome, { lang }).trimEnd() + "\n");
 }
+function writeAutoLedger(targetDir, ledger, { lang = "en" } = {}) {
+  const filePath = contextFile(targetDir, "auto-ledger.md");
+  fs.mkdirSync(path.dirname(filePath), { recursive: true });
+  fs.writeFileSync(filePath, renderAutoLedger(ledger, { lang }).trimEnd() + "\n");
+}
 function resolveRequiredArtifact(targetDir, configuredPath) {
   if (!isMeaningful(configuredPath)) {
     return { relativePath: "", absolutePath: "" };
@@ -258,12 +369,15 @@ function resolveRequiredArtifact(targetDir, configuredPath) {
 }
 module.exports = {
+  parseAutoLedger,
   parseAutoMode,
   parseAutoStatus,
   readWorkflowLanguage,
+  renderAutoLedger,
   renderAutoOutcome,
   renderAutoStatus,
   resolveRequiredArtifact,
+  writeAutoLedger,
   writeAutoOutcome,
   writeAutoStatus,
 };

package/lib/i18n.cjs CHANGED Viewed

@@ -1563,6 +1563,7 @@ const ZH_SKILL_FILES = {
 `# 自动模式契约
 用这个文件定义 \`/lab:auto\` 的有边界自治执行范围。
+把 \`.lab/context/auto-ledger.md\` 当成运行时账本，记录 owner、checkpoint、resume 和 stop 边界。
 ## 目标
@@ -1602,6 +1603,7 @@ const ZH_SKILL_FILES = {
 - Rung 的 \`Command\` 应该绑定真实的长任务命令，由它产出最终实验结果。
 - 短 watcher 只用于查看进度；当真实实验还在运行时，不要把短 watcher 当成 stage 或 rung 的主命令。
 - 当真实实验进程还活着时，只记录进度更新并继续等待。
+- 当 loop 处于运行态时，把当前 owner、命令和 watch target 写进 \`.lab/context/auto-ledger.md\`。
 - Run command:
 - Iterate command:
 - Review command:
@@ -1634,6 +1636,43 @@ const ZH_SKILL_FILES = {
 - Stop conditions:
 - Escalation conditions:
 - Canonical promotion writeback: update \`.lab/context/data-decisions.md\`、\`.lab/context/decisions.md\` 和 \`.lab/context/workflow-state.md\`，然后刷新 \`state.md\` 等派生视图。
+`,
+  [path.join(".lab", "context", "auto-ledger.md")]:
+`# 自动运行账本
+## Campaign
+- Campaign id:
+- Objective:
+- Active stage:
+- Active rung:
+## Owner
+- Owner type:
+- Owner id:
+- Command:
+- Watch target:
+- Started at:
+- Last observed at:
+- Observed state:
+## Checkpoints
+- Last checkpoint:
+- Checkpoint summary:
+- Next transition:
+## Boundaries
+- Continue boundary:
+- Stop boundary:
+- Escalation boundary:
+## Resume
+- Required read set:
+- Resume command:
 `,
   [path.join(".lab", "context", "auto-outcome.md")]:
 `# 自动结果
@@ -2129,7 +2168,7 @@ ZH_CONTENT[path.join(".lab", ".managed", "templates", "framing.md")] = `# 论文
 ZH_CONTENT[path.join(".codex", "prompts", "lab.md")] = codexPrompt(
   "查看 /lab 研究工作流总览并选择合适阶段",
   "workflow question 或 stage choice",
-  "# `/lab` for Codex\n\n`/lab` 是严格的研究工作流命令族。每次都使用同一套仓库工件和阶段边界。\n\n## 子命令\n\n- `/lab:idea`\n  先做两轮脑暴和两轮文献检索，再定义问题与 failure case、对比最接近前作，并输出带 approval gate 的 source-backed recommendation。\n\n- `/lab:data`\n  把已批准的 idea 转成数据集与 benchmark 方案，记录数据集年份、使用过该数据集的论文、下载来源、许可或访问限制，以及 classic-public、recent-strong-public、claim-specific 三类 benchmark 的纳入理由，和 canonical baselines、strong historical baselines、recent strong public methods、closest prior work 四类对比方法的纳入理由。\n\n- `/lab:auto`\n  在不改变 mission、framing 和核心 claims 的前提下，读取 eval-protocol 与 auto-mode 契约并自动编排 `run`、`iterate`、`review`、`report`，必要时扩展数据集、benchmark 和 comparison methods，并在满足升格策略时自动升级 primary package。启动前必须选定 autonomy level、声明 terminal goal，并显式写清 primary gate、secondary guard、promotion condition、stop reason 和 escalation reason，再批准契约。\n\n- `/lab:framing`\n  通过审计当前领域与相邻领域的术语，锁定 paper-facing 的方法名、模块名、论文题目和 contribution bullets，并在 section 起草前保留 approval gate。\n\n- `/lab:spec`\n  把已批准的 idea 转成 `.lab/changes/<change-id>/` 下的一个 lab change 目录，并在其中写出 `proposal`、`design`、`spec`、`tasks`。\n\n- `/lab:run`\n  执行最小有意义验证运行，登记 run，并生成第一版标准化评估摘要。\n\n- `/lab:iterate`\n  在冻结 mission、阈值、verification commands 与 `completion_promise` 的前提下执行有边界的实验迭代。\n\n- `/lab:review`\n  以 reviewer mode 审查文档或结果，先给短摘要，再输出 findings、fatal flaws、fix priority 和 residual risks。\n\n- `/lab:report`\n  从 runs 和 iterations 工件生成最终研究报告。\n\n- `/lab:write`\n  使用已安装 `lab` skill 下 vendored 的 paper-writing references，把稳定 report 工件转成论文 section。\n\n## 调度规则\n\n- 始终使用 `skills/lab/SKILL.md` 作为工作流合同。\n- 用户显式调用 `/lab:<stage>` 时，要立刻执行该 stage，而不是只推荐别的 `/lab` stage。\n- 先给简洁的阶段摘要；只要 stage 合同要求受管工件，就应立刻落盘，再回报输出路径和下一步。\n- 如果歧义会影响结论，一次只问一个问题；如果有多条可行路径，先给 2-3 个方案再收敛。\n- `/lab:spec` 前应已有经批准的数据集与 benchmark 方案。\n- `/lab:run`、`/lab:iterate`、`/lab:auto`、`/lab:report` 都应遵循 `.lab/context/eval-protocol.md`。\n- `.lab/context/eval-protocol.md` 不只定义主指标和主表，也应定义指标释义、实验阶梯，以及指标和对比实现的来源。\n- `/lab:auto` 只编排已批准边界内的执行阶段，不替代手动的 idea/data/framing/spec 决策。\n- `/lab:write` 前必须已有经批准的 `/lab:framing` 工件。\n\n## 如何输入 `/lab:auto`\n\n## `/lab:auto` 层级指南\n\n- `L1`：适合安全验证、一轮 bounded 真实运行，或简单 report 刷新。\n- `L2`：默认推荐级别，适合冻结核心边界内的常规实验迭代。\n- `L3`：激进 campaign 级别，只在你明确想做更大范围探索和可选写作时使用。\n- 如果不确定，默认推荐 `L2`。\n- 如果用户输入没写级别，或者把级别和 `paper layer`、`phase`、`table` 混用了，就应先停下来，要求用户明确选 `L1/L2/L3`。\n\n- 把 `Autonomy level L1/L2/L3` 视为执行权限级别，不要和论文里的 layer、phase、table 编号混用。\n- 把 `paper layer`、`phase`、`table` 视为实验目标。例如 `paper layer 3` 或 `Phase 1` 不是 `Autonomy level L3`。\n- 一条好的 `/lab:auto` 输入应至少说清：objective、自治级别、terminal goal、scope、allowed modifications。\n- 如果 workflow language 是中文，摘要、清单条目、任务标签和进度更新都应使用中文，除非文件路径、代码标识符或字面指标名必须保持原样。\n- 示例：`/lab:auto 自治级别 L2。目标：推进 paper layer 3。终止条件：完成 bounded protocol、测试、最小实现和一轮小规模结果。允许修改：配置、数据接入、评估脚本。`\n"
+  "# `/lab` for Codex\n\n`/lab` 是严格的研究工作流命令族。每次都使用同一套仓库工件和阶段边界。\n\n## 子命令\n\n- `/lab:idea`\n  先做两轮脑暴和两轮文献检索，再定义问题与 failure case、对比最接近前作，并输出带 approval gate 的 source-backed recommendation。\n\n- `/lab:data`\n  把已批准的 idea 转成数据集与 benchmark 方案，记录数据集年份、使用过该数据集的论文、下载来源、许可或访问限制，以及 classic-public、recent-strong-public、claim-specific 三类 benchmark 的纳入理由，和 canonical baselines、strong historical baselines、recent strong public methods、closest prior work 四类对比方法的纳入理由。\n\n- `/lab:auto`\n  在不改变 mission、framing 和核心 claims 的前提下，读取 eval-protocol 与 auto-mode 契约并自动编排 `run`、`iterate`、`review`、`report`，必要时扩展数据集、benchmark 和 comparison methods，并在满足升格策略时自动升级 primary package。启动前必须选定 autonomy level、声明 terminal goal，并显式写清 primary gate、secondary guard、promotion condition、stop reason 和 escalation reason，再批准契约。\n\n- `/lab:framing`\n  通过审计当前领域与相邻领域的术语，锁定 paper-facing 的方法名、模块名、论文题目和 contribution bullets，并在 section 起草前保留 approval gate。\n\n- `/lab:spec`\n  把已批准的 idea 转成 `.lab/changes/<change-id>/` 下的一个 lab change 目录，并在其中写出 `proposal`、`design`、`spec`、`tasks`。\n\n- `/lab:run`\n  执行最小有意义验证运行，登记 run，并生成第一版标准化评估摘要。\n\n- `/lab:iterate`\n  在冻结 mission、阈值、verification commands 与 `completion_promise` 的前提下执行有边界的实验迭代。\n\n- `/lab:review`\n  以 reviewer mode 审查文档或结果，先给短摘要，再输出 findings、fatal flaws、fix priority 和 residual risks。\n\n- `/lab:report`\n  从 runs 和 iterations 工件生成最终研究报告。\n\n- `/lab:write`\n  使用已安装 `lab` skill 下 vendored 的 paper-writing references，把稳定 report 工件转成论文 section。\n\n## 调度规则\n\n- 始终使用 `skills/lab/SKILL.md` 作为工作流合同。\n- 用户显式调用 `/lab:<stage>` 时，要立刻执行该 stage，而不是只推荐别的 `/lab` stage。\n- 先给简洁的阶段摘要；只要 stage 合同要求受管工件，就应立刻落盘，再回报输出路径和下一步。\n- 如果歧义会影响结论，一次只问一个问题；如果有多条可行路径，先给 2-3 个方案再收敛。\n- `/lab:spec` 前应已有经批准的数据集与 benchmark 方案。\n- `/lab:run`、`/lab:iterate`、`/lab:auto`、`/lab:report` 都应遵循 `.lab/context/eval-protocol.md`。\n- `.lab/context/eval-protocol.md` 不只定义主指标和主表，也应定义指标释义、实验阶梯，以及指标和对比实现的来源。\n- `/lab:auto` 只编排已批准边界内的执行阶段，不替代手动的 idea/data/framing/spec 决策。\n- `/lab:write` 前必须已有经批准的 `/lab:framing` 工件。\n\n## 如何输入 `/lab:auto`\n\n## `/lab:auto` 层级指南\n\n- `L1`：适合安全验证、一轮 bounded 真实运行，或简单 report 刷新。\n- `L2`：默认推荐级别，适合冻结核心边界内的常规实验迭代。\n- `L3`：激进 campaign 级别，只在你明确想做更大范围探索和可选写作时使用。\n- 如果不确定，默认推荐 `L2`。\n- 如果用户输入没写级别，或者把级别和 `paper layer`、`phase`、`table` 混用了，就应先停下来，要求用户明确选 `L1/L2/L3`。\n- 真正的 `/lab:auto` 首个可见输出必须是 `Auto preflight`。\n- 这个首个可见输出必须展示已读取文件，以及 `Autonomy level`、`Allowed stages`、`Terminal goal`、`Primary gate`、`Secondary guard`。\n- 如果无法从 `.lab/context/eval-protocol.md`、`.lab/context/auto-mode.md`、`.lab/context/auto-status.md` 和 `.lab/context/auto-outcome.md` 完成 preflight，就必须停下，而不是假装 loop 已经 armed。\n\n- 把 `Autonomy level L1/L2/L3` 视为执行权限级别，不要和论文里的 layer、phase、table 编号混用。\n- 把 `paper layer`、`phase`、`table` 视为实验目标。例如 `paper layer 3` 或 `Phase 1` 不是 `Autonomy level L3`。\n- 一条好的 `/lab:auto` 输入应至少说清：objective、自治级别、terminal goal、scope、allowed modifications。\n- 如果 workflow language 是中文，摘要、清单条目、任务标签和进度更新都应使用中文，除非文件路径、代码标识符或字面指标名必须保持原样。\n- 示例：`/lab:auto 自治级别 L2。目标：推进 paper layer 3。终止条件：完成 bounded protocol、测试、最小实现和一轮小规模结果。允许修改：配置、数据接入、评估脚本。`\n"
 );
 ZH_CONTENT[path.join(".codex", "prompts", "lab-data.md")] = codexPrompt(
@@ -2141,14 +2180,14 @@ ZH_CONTENT[path.join(".codex", "prompts", "lab-data.md")] = codexPrompt(
 ZH_CONTENT[path.join(".codex", "prompts", "lab-auto.md")] = codexPrompt(
   "在已批准边界内编排自动实验循环",
   "auto mode objective",
-  "使用已安装的 `lab` 技能：`.codex/skills/lab/SKILL.md`。\n\n立刻针对用户当前给出的参数执行 `/lab:auto`，不要只推荐别的 `/lab` 阶段。只有在缺少阻塞性前提时，才明确指出缺什么，并且一次最多追问一个问题。\n\n本命令运行 `/lab:auto` 阶段。它必须读取 `.lab/context/eval-protocol.md`、`.lab/context/auto-mode.md`、`.lab/context/auto-status.md` 与 `.lab/context/auto-outcome.md`，先确认 autonomy level、approval status、terminal goal schema，以及 primary gate、secondary guard、promotion condition、stop reason、escalation reason，再把 eval-protocol 里的指标释义、主表计划、来源约束与结构化实验阶梯当作执行依据，在不修改 mission、framing 和核心 claims 的前提下编排已批准的 `run`、`iterate`、`review`、`report`，轮询长任务完成情况；如果声明了 rung，就保持会话活着并按 rung 转移继续推进。\n如果仓库的 workflow language 是中文，摘要、清单条目、任务标签和进度更新都必须使用中文，除非某个文件路径、代码标识符或字面指标名必须保持原样。\n把 `Layer 3`、`Phase 1`、`Table 2` 这类表达视为论文范围目标；只有显式写成 `Autonomy level L3` 或 `自治级别 L3` 时，才把它当成执行权限级别。\n不要用 `sleep 30`、单次 `pgrep` 或一次性的 `metrics.json` 探针来代替真实长任务命令；当真实实验进程还活着时，只允许发进度更新并继续等待。"
+  "使用已安装的 `lab` 技能：`.codex/skills/lab/SKILL.md`。\n\n立刻针对用户当前给出的参数执行 `/lab:auto`，不要只推荐别的 `/lab` 阶段。只有在缺少阻塞性前提时，才明确指出缺什么，并且一次最多追问一个问题。\n\n本命令运行 `/lab:auto` 阶段。它必须读取 `.lab/context/eval-protocol.md`、`.lab/context/auto-mode.md`、`.lab/context/auto-status.md`、`.lab/context/auto-ledger.md` 与 `.lab/context/auto-outcome.md`，先确认 autonomy level、approval status、terminal goal schema，以及 primary gate、secondary guard、promotion condition、stop reason、escalation reason，再把 eval-protocol 里的指标释义、主表计划、来源约束与结构化实验阶梯当作执行依据，在不修改 mission、framing 和核心 claims 的前提下编排已批准的 `run`、`iterate`、`review`、`report`，轮询长任务完成情况；如果声明了 rung，就保持会话活着并按 rung 转移继续推进。\n首个可见输出块必须是 `Auto preflight`。这个块必须列出已读取文件，并回显 `Autonomy level`、`Approval status`、`Allowed stages`、`Terminal goal`、`Primary gate` 和 `Secondary guard`，然后才能进入执行摘要或动作计划。\n如果 preflight 所需字段缺失、过期或彼此冲突，就必须在执行前停下，并明确指出到底是哪一个字段阻止了 loop 启动。\n当 loop 活着时，必须把当前 owner、观察状态、checkpoint 摘要、继续边界、停止边界和恢复读取集合写进 `.lab/context/auto-ledger.md`。\n如果仓库的 workflow language 是中文，摘要、清单条目、任务标签和进度更新都必须使用中文，除非某个文件路径、代码标识符或字面指标名必须保持原样。\n把 `Layer 3`、`Phase 1`、`Table 2` 这类表达视为论文范围目标；只有显式写成 `Autonomy level L3` 或 `自治级别 L3` 时，才把它当成执行权限级别。\n不要用 `sleep 30`、单次 `pgrep` 或一次性的 `metrics.json` 探针来代替真实长任务命令；当真实实验进程还活着时，只允许发进度更新并继续等待。"
 );
 ZH_CONTENT[path.join(".claude", "commands", "lab.md")] = claudeCommand(
   "lab",
   "查看 /lab 研究工作流总览并选择合适阶段",
   "[stage] [target]",
-  "# `/lab` for Claude\n\n`/lab` 是 Claude Code 里的 lab 工作流分发入口。调用方式有两种：\n\n- `/lab <stage> ...`\n- `/lab-idea`、`/lab-data`、`/lab-auto`、`/lab-framing`、`/lab-spec`、`/lab-run`、`/lab-iterate`、`/lab-review`、`/lab-report`、`/lab-write`\n\n## 阶段别名\n\n- `/lab idea ...` 或 `/lab-idea`\n- `/lab data ...` 或 `/lab-data`\n- `/lab auto ...` 或 `/lab-auto`\n- `/lab framing ...` 或 `/lab-framing`\n- `/lab spec ...` 或 `/lab-spec`\n- `/lab run ...` 或 `/lab-run`\n- `/lab iterate ...` 或 `/lab-iterate`\n- `/lab review ...` 或 `/lab-review`\n- `/lab report ...` 或 `/lab-report`\n- `/lab write ...` 或 `/lab-write`\n\n## 调度规则\n\n- 始终使用 `skills/lab/SKILL.md` 作为工作流合同。\n- 用户显式调用 `/lab <stage> ...` 或 `/lab-<stage>` 时，要立刻执行该 stage，而不是只推荐别的阶段。\n- 先给简洁的阶段摘要；只要 stage 合同要求受管工件，就应立刻落盘，再回报输出路径和下一步。\n- 如果歧义会影响结论，一次只问一个问题；如果有多条可行路径，先给 2-3 个方案再收敛。\n- `spec` 前应已有经批准的数据集与 benchmark 方案。\n- `run`、`iterate`、`auto`、`report` 都应遵循 `.lab/context/eval-protocol.md`。\n- `auto` 只编排已批准边界内的执行阶段，不替代手动的 idea/data/framing/spec 决策。\n- `write` 前必须已有经批准的 `framing` 工件。\n\n## 如何输入 `/lab auto`\n\n## `/lab auto` 层级指南\n\n- `L1`：适合安全验证、一轮 bounded 真实运行，或简单 report 刷新。\n- `L2`：默认推荐级别，适合冻结核心边界内的常规实验迭代。\n- `L3`：激进 campaign 级别，只在你明确想做更大范围探索和可选写作时使用。\n- 如果不确定，默认推荐 `L2`。\n- 如果用户输入没写级别，或者把级别和 `paper layer`、`phase`、`table` 混用了，就应先停下来，要求用户明确选 `L1/L2/L3`。\n\n- 把 `Autonomy level L1/L2/L3` 视为执行权限级别，不要和论文里的 layer、phase、table 编号混用。\n- 把 `paper layer`、`phase`、`table` 视为实验目标。例如 `paper layer 3` 或 `Phase 1` 不是 `Autonomy level L3`。\n- 一条好的 `/lab auto` 输入应至少说清：objective、自治级别、terminal goal、scope、allowed modifications。\n- 如果 workflow language 是中文，摘要、清单条目、任务标签和进度更新都应使用中文，除非文件路径、代码标识符或字面指标名必须保持原样。\n- 示例：`/lab auto 自治级别 L2。目标：推进 paper layer 3。终止条件：完成 bounded protocol、测试、最小实现和一轮小规模结果。允许修改：配置、数据接入、评估脚本。`\n"
+  "# `/lab` for Claude\n\n`/lab` 是 Claude Code 里的 lab 工作流分发入口。调用方式有两种：\n\n- `/lab <stage> ...`\n- `/lab-idea`、`/lab-data`、`/lab-auto`、`/lab-framing`、`/lab-spec`、`/lab-run`、`/lab-iterate`、`/lab-review`、`/lab-report`、`/lab-write`\n\n## 阶段别名\n\n- `/lab idea ...` 或 `/lab-idea`\n- `/lab data ...` 或 `/lab-data`\n- `/lab auto ...` 或 `/lab-auto`\n- `/lab framing ...` 或 `/lab-framing`\n- `/lab spec ...` 或 `/lab-spec`\n- `/lab run ...` 或 `/lab-run`\n- `/lab iterate ...` 或 `/lab-iterate`\n- `/lab review ...` 或 `/lab-review`\n- `/lab report ...` 或 `/lab-report`\n- `/lab write ...` 或 `/lab-write`\n\n## 调度规则\n\n- 始终使用 `skills/lab/SKILL.md` 作为工作流合同。\n- 用户显式调用 `/lab <stage> ...` 或 `/lab-<stage>` 时，要立刻执行该 stage，而不是只推荐别的阶段。\n- 先给简洁的阶段摘要；只要 stage 合同要求受管工件，就应立刻落盘，再回报输出路径和下一步。\n- 如果歧义会影响结论，一次只问一个问题；如果有多条可行路径，先给 2-3 个方案再收敛。\n- `spec` 前应已有经批准的数据集与 benchmark 方案。\n- `run`、`iterate`、`auto`、`report` 都应遵循 `.lab/context/eval-protocol.md`。\n- `auto` 只编排已批准边界内的执行阶段，不替代手动的 idea/data/framing/spec 决策。\n- `write` 前必须已有经批准的 `framing` 工件。\n\n## 如何输入 `/lab auto`\n\n## `/lab auto` 层级指南\n\n- `L1`：适合安全验证、一轮 bounded 真实运行，或简单 report 刷新。\n- `L2`：默认推荐级别，适合冻结核心边界内的常规实验迭代。\n- `L3`：激进 campaign 级别，只在你明确想做更大范围探索和可选写作时使用。\n- 如果不确定，默认推荐 `L2`。\n- 如果用户输入没写级别，或者把级别和 `paper layer`、`phase`、`table` 混用了，就应先停下来，要求用户明确选 `L1/L2/L3`。\n- 真正的 `/lab auto` 首个可见输出必须是 `Auto preflight`。\n- 这个首个可见输出必须展示已读取文件，以及 `Autonomy level`、`Allowed stages`、`Terminal goal`、`Primary gate`、`Secondary guard`。\n- 如果无法从 `.lab/context/eval-protocol.md`、`.lab/context/auto-mode.md`、`.lab/context/auto-status.md` 和 `.lab/context/auto-outcome.md` 完成 preflight，就必须停下，而不是假装 loop 已经 armed。\n\n- 把 `Autonomy level L1/L2/L3` 视为执行权限级别，不要和论文里的 layer、phase、table 编号混用。\n- 把 `paper layer`、`phase`、`table` 视为实验目标。例如 `paper layer 3` 或 `Phase 1` 不是 `Autonomy level L3`。\n- 一条好的 `/lab auto` 输入应至少说清：objective、自治级别、terminal goal、scope、allowed modifications。\n- 如果 workflow language 是中文，摘要、清单条目、任务标签和进度更新都应使用中文，除非文件路径、代码标识符或字面指标名必须保持原样。\n- 示例：`/lab auto 自治级别 L2。目标：推进 paper layer 3。终止条件：完成 bounded protocol、测试、最小实现和一轮小规模结果。允许修改：配置、数据接入、评估脚本。`\n"
 );
 ZH_CONTENT[path.join(".claude", "commands", "lab-data.md")] = claudeCommand(
@@ -2162,7 +2201,7 @@ ZH_CONTENT[path.join(".claude", "commands", "lab-auto.md")] = claudeCommand(
   "lab-auto",
   "在已批准边界内编排自动实验循环",
   "auto mode objective",
-  "使用已安装的 `lab` 技能：`.claude/skills/lab/SKILL.md`。\n\n立刻针对用户当前给出的参数执行 `auto` 阶段，不要只推荐别的 lab 阶段。只有在缺少阻塞性前提时，才明确指出缺什么，并且一次最多追问一个问题。\n\n本命令运行 lab workflow 的 `auto` 阶段。它必须读取 `.lab/context/eval-protocol.md`、`.lab/context/auto-mode.md`、`.lab/context/auto-status.md` 与 `.lab/context/auto-outcome.md`，先确认 autonomy level、approval status、terminal goal schema，以及 primary gate、secondary guard、promotion condition、stop reason、escalation reason，再把 eval-protocol 里的指标释义、主表计划、来源约束与结构化实验阶梯当作执行依据，在不修改 mission、framing 和核心 claims 的前提下编排已批准的 `run`、`iterate`、`review`、`report`，轮询长任务完成情况；如果声明了 rung，就保持会话活着并按 rung 转移继续推进。\n如果仓库的 workflow language 是中文，摘要、清单条目、任务标签和进度更新都必须使用中文，除非某个文件路径、代码标识符或字面指标名必须保持原样。\n把 `Layer 3`、`Phase 1`、`Table 2` 这类表达视为论文范围目标；只有显式写成 `Autonomy level L3` 或 `自治级别 L3` 时，才把它当成执行权限级别。\n不要用 `sleep 30`、单次 `pgrep` 或一次性的 `metrics.json` 探针来代替真实长任务命令；当真实实验进程还活着时，只允许发进度更新并继续等待。"
+  "使用已安装的 `lab` 技能：`.claude/skills/lab/SKILL.md`。\n\n立刻针对用户当前给出的参数执行 `auto` 阶段，不要只推荐别的 lab 阶段。只有在缺少阻塞性前提时，才明确指出缺什么，并且一次最多追问一个问题。\n\n本命令运行 lab workflow 的 `auto` 阶段。它必须读取 `.lab/context/eval-protocol.md`、`.lab/context/auto-mode.md`、`.lab/context/auto-status.md`、`.lab/context/auto-ledger.md` 与 `.lab/context/auto-outcome.md`，先确认 autonomy level、approval status、terminal goal schema，以及 primary gate、secondary guard、promotion condition、stop reason、escalation reason，再把 eval-protocol 里的指标释义、主表计划、来源约束与结构化实验阶梯当作执行依据，在不修改 mission、framing 和核心 claims 的前提下编排已批准的 `run`、`iterate`、`review`、`report`，轮询长任务完成情况；如果声明了 rung，就保持会话活着并按 rung 转移继续推进。\n首个可见输出块必须是 `Auto preflight`。这个块必须列出已读取文件，并回显 `Autonomy level`、`Approval status`、`Allowed stages`、`Terminal goal`、`Primary gate` 和 `Secondary guard`，然后才能进入执行摘要或动作计划。\n如果 preflight 所需字段缺失、过期或彼此冲突，就必须在执行前停下，并明确指出到底是哪一个字段阻止了 loop 启动。\n当 loop 活着时，必须把当前 owner、观察状态、checkpoint 摘要、继续边界、停止边界和恢复读取集合写进 `.lab/context/auto-ledger.md`。\n如果仓库的 workflow language 是中文，摘要、清单条目、任务标签和进度更新都必须使用中文，除非某个文件路径、代码标识符或字面指标名必须保持原样。\n把 `Layer 3`、`Phase 1`、`Table 2` 这类表达视为论文范围目标；只有显式写成 `Autonomy level L3` 或 `自治级别 L3` 时，才把它当成执行权限级别。\n不要用 `sleep 30`、单次 `pgrep` 或一次性的 `metrics.json` 探针来代替真实长任务命令；当真实实验进程还活着时，只允许发进度更新并继续等待。"
 );
 const zhRecipeQuickPathLine =

package/lib/install.cjs CHANGED Viewed

@@ -44,6 +44,7 @@ const PROJECT_OWNED_LOCALIZED_PATHS = [
   path.join(".lab", "context", "eval-protocol.md"),
   path.join(".lab", "context", "auto-mode.md"),
   path.join(".lab", "context", "auto-status.md"),
+  path.join(".lab", "context", "auto-ledger.md"),
   path.join(".lab", "context", "auto-outcome.md"),
   path.join(".lab", "context", "terminology-lock.md"),
   path.join(".lab", "context", "summary.md"),

package/package-assets/claude/commands/lab-auto.md CHANGED Viewed

@@ -7,7 +7,9 @@ argument-hint: autonomous campaign target
 Use the installed `lab` skill at `.claude/skills/lab/SKILL.md`.
 Execute the requested `/lab-auto` command against the user's argument now. Do not only recommend another lab stage. If a blocking prerequisite is missing, say exactly what is missing and ask at most one clarifying question.
-This command runs the `auto` stage of the lab workflow. It must read `.lab/context/eval-protocol.md`, `.lab/context/auto-mode.md`, `.lab/context/auto-status.md`, and `.lab/context/auto-outcome.md`, enforce the declared terminal goal schema, make the primary gate, secondary guard, promotion condition, stop reason, and escalation reason explicit, orchestrate approved run, iterate, review, and report stages inside that contract, poll long-running work until completion or stop conditions, and write progress plus the final outcome back into `.lab/context/auto-status.md` and `.lab/context/auto-outcome.md`.
+This command runs the `auto` stage of the lab workflow. It must read `.lab/context/eval-protocol.md`, `.lab/context/auto-mode.md`, `.lab/context/auto-status.md`, `.lab/context/auto-ledger.md`, and `.lab/context/auto-outcome.md`, enforce the declared terminal goal schema, make the primary gate, secondary guard, promotion condition, stop reason, and escalation reason explicit, orchestrate approved run, iterate, review, and report stages inside that contract, poll long-running work until completion or stop conditions, and write live owner state plus progress and the final outcome back into `.lab/context/auto-status.md`, `.lab/context/auto-ledger.md`, and `.lab/context/auto-outcome.md`.
+The first visible block must be `Auto preflight`. That first visible block must list the files read and echo `Autonomy level`, `Approval status`, `Allowed stages`, `Terminal goal`, `Primary gate`, and `Secondary guard` before any execution summary or action plan.
+If the preflight block cannot be completed because any required field is missing, stale, or inconsistent, stop before execution and say exactly which field blocked arming the loop.
 When the repository workflow language is Chinese, summaries, checklist items, task labels, and progress updates should be written in Chinese unless a literal identifier must stay unchanged.
 Treat `Layer 3`, `Phase 1`, or `Table 2` as paper-scope targets. Treat `Autonomy level L3` as the execution permission level.
 Do not replace the real long-running experiment command with a short watcher such as `sleep 30`, `pgrep`, or a one-shot `metrics.json` probe. While the real experiment process is still alive, emit only a progress update and keep waiting.

package/package-assets/claude/commands/lab.md CHANGED Viewed

@@ -70,6 +70,10 @@ Use the same repository artifacts and stage boundaries every time.
 - `L3` is the aggressive campaign level. Use it only when you explicitly want broad exploration, larger search space changes, and optional manuscript-writing work.
 - If you are unsure, choose `L2`.
 - If the request omits the level or mixes it with a paper layer, phase, or table target, `/lab auto` should stop and ask for an explicit autonomy level before arming the loop.
+- The first visible output of a real `/lab auto` run must be `Auto preflight`.
+- That first visible output must show files read plus `Autonomy level`, `Allowed stages`, `Terminal goal`, `Primary gate`, and `Secondary guard`.
+- If the preflight block cannot be completed from `.lab/context/eval-protocol.md`, `.lab/context/auto-mode.md`, `.lab/context/auto-status.md`, `.lab/context/auto-ledger.md`, and `.lab/context/auto-outcome.md`, `/lab auto` should stop instead of acting like the loop is armed.
+- While the loop is alive, `/lab auto` should keep `.lab/context/auto-ledger.md` updated with the active owner, observed state, and resume boundary.
 - Treat `Autonomy level L1/L2/L3` as the execution privilege level, not as a paper layer, phase, or table number.
 - Treat `paper layer`, `phase`, and `table` as experiment targets. For example, `paper layer 3` or `Phase 1` should not be interpreted as `Autonomy level L3`.

package/package-assets/codex/prompts/lab-auto.md CHANGED Viewed

@@ -6,7 +6,9 @@ argument-hint: autonomous campaign target
 Use the installed `lab` skill at `.codex/skills/lab/SKILL.md`.
 Execute the requested `/lab:auto` stage against the user's argument now. Do not only recommend another lab stage. If a blocking prerequisite is missing, say exactly what is missing and ask at most one clarifying question.
-This command runs the `/lab:auto` stage. It must read `.lab/context/eval-protocol.md`, `.lab/context/auto-mode.md`, `.lab/context/auto-status.md`, and `.lab/context/auto-outcome.md`, enforce the declared terminal goal schema, make the primary gate, secondary guard, promotion condition, stop reason, and escalation reason explicit, orchestrate approved run, iterate, review, and report stages inside that contract, poll long-running work until completion or stop conditions, and write progress plus the final outcome back into `.lab/context/auto-status.md` and `.lab/context/auto-outcome.md`.
+This command runs the `/lab:auto` stage. It must read `.lab/context/eval-protocol.md`, `.lab/context/auto-mode.md`, `.lab/context/auto-status.md`, `.lab/context/auto-ledger.md`, and `.lab/context/auto-outcome.md`, enforce the declared terminal goal schema, make the primary gate, secondary guard, promotion condition, stop reason, and escalation reason explicit, orchestrate approved run, iterate, review, and report stages inside that contract, poll long-running work until completion or stop conditions, and write live owner state plus progress and the final outcome back into `.lab/context/auto-status.md`, `.lab/context/auto-ledger.md`, and `.lab/context/auto-outcome.md`.
+The first visible block must be `Auto preflight`. That first visible block must list the files read and echo `Autonomy level`, `Approval status`, `Allowed stages`, `Terminal goal`, `Primary gate`, and `Secondary guard` before any execution summary or action plan.
+If the preflight block cannot be completed because any required field is missing, stale, or inconsistent, stop before execution and say exactly which field blocked arming the loop.
 When the repository workflow language is Chinese, summaries, checklist items, task labels, and progress updates should be written in Chinese unless a literal identifier must stay unchanged.
 Treat `Layer 3`, `Phase 1`, or `Table 2` as paper-scope targets. Treat `Autonomy level L3` as the execution permission level.
 Do not replace the real long-running experiment command with a short watcher such as `sleep 30`, `pgrep`, or a one-shot `metrics.json` probe. While the real experiment process is still alive, emit only a progress update and keep waiting.

package/package-assets/codex/prompts/lab.md CHANGED Viewed

@@ -64,6 +64,10 @@ argument-hint: workflow question or stage choice
 - `L3` is the aggressive campaign level. Use it only when you explicitly want broad exploration, larger search space changes, and optional manuscript-writing work.
 - If you are unsure, choose `L2`.
 - If the request omits the level or mixes it with a paper layer, phase, or table target, `/lab:auto` should stop and ask for an explicit autonomy level before arming the loop.
+- The first visible output of a real `/lab:auto` run must be `Auto preflight`.
+- That first visible output must show files read plus `Autonomy level`, `Allowed stages`, `Terminal goal`, `Primary gate`, and `Secondary guard`.
+- If the preflight block cannot be completed from `.lab/context/eval-protocol.md`, `.lab/context/auto-mode.md`, `.lab/context/auto-status.md`, `.lab/context/auto-ledger.md`, and `.lab/context/auto-outcome.md`, `/lab:auto` should stop instead of acting like the loop is armed.
+- While the loop is alive, `/lab:auto` should keep `.lab/context/auto-ledger.md` updated with the active owner, observed state, and resume boundary.
 - Treat `Autonomy level L1/L2/L3` as the execution privilege level, not as a paper layer, phase, or table number.
 - Treat `paper layer`, `phase`, and `table` as experiment targets. For example, `paper layer 3` or `Phase 1` should not be interpreted as `Autonomy level L3`.

package/package-assets/shared/lab/context/auto-ledger.md ADDED Viewed

@@ -0,0 +1,35 @@
+# Auto Runtime Ledger
+## Campaign
+- Campaign id:
+- Objective:
+- Active stage:
+- Active rung:
+## Owner
+- Owner type:
+- Owner id:
+- Command:
+- Watch target:
+- Started at:
+- Last observed at:
+- Observed state:
+## Checkpoints
+- Last checkpoint:
+- Checkpoint summary:
+- Next transition:
+## Boundaries
+- Continue boundary:
+- Stop boundary:
+- Escalation boundary:
+## Resume
+- Required read set:
+- Resume command:

package/package-assets/shared/lab/context/auto-mode.md CHANGED Viewed

@@ -3,6 +3,7 @@
 Use this file to define the bounded autonomous execution envelope for `/lab:auto`.
 Pair it with `.lab/context/eval-protocol.md`, which defines the paper-facing metrics, tables, gates, and benchmark ladder that auto mode should optimize against.
 If `eval-protocol.md` declares structured rung entries, auto mode follows those rung transitions first and uses the stage commands here as per-stage fallbacks.
+Use `.lab/context/auto-ledger.md` as the live runtime ledger for ownership, checkpoints, resume, and stop boundaries.
 ## Objective
@@ -42,6 +43,7 @@ If `eval-protocol.md` declares structured rung entries, auto mode follows those
 - Rung `Command` should be the real long-running command that owns the experiment result.
 - A short watcher is only a progress probe. Do not use a short watcher as the stage or rung command when the real experiment is still running.
 - While the real experiment process is still alive, only record a progress update and keep waiting.
+- Record the active owner, command, and watch target in `.lab/context/auto-ledger.md` while the loop is alive.
 - Run command:
 - Iterate command:
 - Review command:

package/package-assets/shared/skills/lab/SKILL.md CHANGED Viewed

@@ -108,10 +108,13 @@ Use this skill when the user invokes `/lab:*` or asks for the structured researc
 ### `/lab:auto`
 - Use this stage to orchestrate approved execution stages with bounded autonomy.
-- Read `.lab/config/workflow.json`, `.lab/context/mission.md`, `.lab/context/state.md`, `.lab/context/workflow-state.md`, `.lab/context/decisions.md`, `.lab/context/data-decisions.md`, `.lab/context/evidence-index.md`, `.lab/context/terminology-lock.md`, `.lab/context/auto-mode.md`, and `.lab/context/auto-status.md` before acting.
-- Treat `.lab/context/auto-mode.md` as the control contract and `.lab/context/auto-status.md` as the live state file.
+- Read `.lab/config/workflow.json`, `.lab/context/mission.md`, `.lab/context/state.md`, `.lab/context/workflow-state.md`, `.lab/context/decisions.md`, `.lab/context/data-decisions.md`, `.lab/context/evidence-index.md`, `.lab/context/terminology-lock.md`, `.lab/context/auto-mode.md`, `.lab/context/auto-status.md`, and `.lab/context/auto-ledger.md` before acting.
+- Treat `.lab/context/auto-mode.md` as the control contract, `.lab/context/auto-status.md` as the live summary, and `.lab/context/auto-ledger.md` as the runtime ledger.
 - Require `.lab/context/auto-mode.md` to expose `Primary gate`, `Secondary guard`, `Promotion condition`, `Stop reason`, and `Escalation reason` before execution.
 - Require `Autonomy level` and `Approval status` in `.lab/context/auto-mode.md` before execution.
+- Start every `/lab:auto` run with a visible `Auto preflight` summary that reports files read plus `Autonomy level`, `Approval status`, `Allowed stages`, `Terminal goal`, `Primary gate`, and `Secondary guard`.
+- If any required preflight field is missing or inconsistent, stop before any loop action. Do not present a fake auto summary as if the loop were armed.
+- Keep `.lab/context/auto-ledger.md` updated with the active owner, observed state, and resume boundary while the loop is live.
 - Treat `L1` as safe-run validation, `L2` as bounded iteration, and `L3` as aggressive campaign mode.
 - Surface the level guide every time `/lab:auto` starts, and make the detailed guide mandatory when the user omits the level or mixes it with a paper layer, phase, or table target.
 - Reuse `/lab:run`, `/lab:iterate`, `/lab:review`, `/lab:report`, and optional `/lab:write` instead of inventing a second workflow.

package/package-assets/shared/skills/lab/stages/auto.md CHANGED Viewed

@@ -2,6 +2,7 @@
 ## Required Output
+- visible `Auto preflight` summary
 - Auto Mode Contract
 - approved auto-mode contract
 - live auto-status state
@@ -21,6 +22,7 @@
 - `.lab/context/terminology-lock.md`
 - `.lab/context/auto-mode.md`
 - `.lab/context/auto-status.md`
+- `.lab/context/auto-ledger.md`
 - `.lab/context/auto-outcome.md`
 ## Context Write Set
@@ -35,6 +37,7 @@
 - `.lab/context/summary.md`
 - `.lab/context/session-brief.md`
 - `.lab/context/auto-status.md`
+- `.lab/context/auto-ledger.md`
 - `.lab/context/auto-outcome.md`
 ## Boundary Rules
@@ -47,6 +50,7 @@
 - Treat `Sanity and Alternative-Explanation Checks` as the anomaly gate for automation. When a rung yields all-null outputs, suspiciously identical runs, no-op deltas, or impl/result mismatches, pause promotion logic until implementation reality checks, alternative explanations, and at least one cross-check are recorded.
 - Treat paper-template selection as an explicit write-time gate, not as a silent fallback, when the loop is about to create `.tex` deliverables for the first time.
 - Treat `.lab/context/auto-mode.md` as a visible control plane. The contract should make the primary gate, secondary guard, promotion condition, stop reason, and escalation reason explicit before execution starts.
+- Treat `.lab/context/auto-ledger.md` as the live runtime ledger for owner identity, observed state, checkpoint progress, continue boundary, stop boundary, escalation boundary, and resume read set.
 - The contract must declare `Autonomy level` and `Approval status`, and execution starts only when approval is explicitly set to `approved`.
 - The contract must also declare a concrete terminal goal:
   - `rounds`
@@ -71,7 +75,22 @@
 - Keep a poll-based waiting loop instead of sleeping blindly.
 - Do not treat a short watcher such as `sleep 30`, a one-shot `pgrep`, or a single `metrics.json` probe as the rung command when the real experiment is still running.
 - Bind each rung to the real long-running command or process that owns the experiment result.
+- Record the active owner as one of:
+  - `local-process`
+  - `local-runner`
+  - `remote-runner`
+- Every nonterminal `/lab:auto` state must remain resumable from `.lab/context/auto-ledger.md` plus `.lab/context/auto-status.md`.
+- Start every real `/lab:auto` run with a visible `Auto preflight` block before any execution summary or action plan. That first visible output should list:
+  - files read
+  - `Autonomy level`
+  - `Approval status`
+  - `Allowed stages`
+  - `Terminal goal`
+  - `Primary gate`
+  - `Secondary guard`
+- If any of those preflight fields are missing, stale, or inconsistent, stop before execution and report the blocking field directly.
 - Always write a canonical `.lab/context/auto-outcome.md` when the run completes, stops, or fails.
+- Always keep `.lab/context/auto-ledger.md` in sync with the current active owner while the loop is live.
 - Keep handoff wording stable across auto outcomes and downstream report or write handoffs: record completed work, frozen scope, allowed next action, required read set for the next owner, and the accept or revise or reject boundary.
 - When the evaluation protocol declares structured ladder rungs, execute them as a foreground rung state machine:
   - each rung must declare `Stage`, `Goal`, `Command`, `Watch`, `Gate`, `On pass`, `On fail`, and `On stop`
@@ -93,15 +112,16 @@
 ## Minimum Procedure
 1. Validate the auto-mode contract
-2. Confirm the approved autonomy level matches the requested stage envelope
-3. Set or refresh auto-status
-4. Choose the next allowed `/lab` stage or structured ladder rung
-5. Launch the bounded action
-6. Poll for process completion, checkpoint movement, or summary generation while keeping the session alive
-7. Evaluate the declared rung gate and transition to the next rung when structured ladder mode is active
-8. Evaluate the declared terminal goal semantics at the correct boundary
-9. Evaluate the primary gate, secondary guard, promotion condition, stop reason, and escalation reason at the correct boundary
-10. Write auto-outcome and decide continue, promote, stop, or escalate
+2. Emit the visible `Auto preflight` summary
+3. Confirm the approved autonomy level matches the requested stage envelope
+4. Set or refresh auto-status
+5. Choose the next allowed `/lab` stage or structured ladder rung
+6. Launch the bounded action
+7. Poll for process completion, checkpoint movement, or summary generation while keeping the session alive
+8. Evaluate the declared rung gate and transition to the next rung when structured ladder mode is active
+9. Evaluate the declared terminal goal semantics at the correct boundary
+10. Evaluate the primary gate, secondary guard, promotion condition, stop reason, and escalation reason at the correct boundary
+11. Write auto-outcome and decide continue, promote, stop, or escalate
 ## Interaction Contract
@@ -113,6 +133,7 @@
 - If you are unsure, choose `L2`.
 - Start with a concise summary of the objective, the frozen core, and the next automatic stage.
+- The first visible output must be `Auto preflight`, not a result summary or stage recommendation.
 - Always surface the level guide before execution.
 - If the contract is incomplete, ask one clarifying question at a time.
 - If multiple next actions are credible, present 2-3 bounded options with trade-offs before arming a long run.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "superlab",
-  "version": "0.1.41",
+  "version": "0.1.43",
   "description": "Strict /lab research workflow installer for Codex and Claude",
   "keywords": [
     "codex",