superlab 0.1.47 → 0.1.48
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/auto_contracts.cjs +18 -0
- package/lib/auto_runner.cjs +9 -1
- package/lib/i18n.cjs +61 -12
- package/package-assets/claude/commands/lab/auto.md +11 -0
- package/package-assets/claude/commands/lab-auto.md +11 -0
- package/package-assets/claude/commands/lab.md +16 -6
- package/package-assets/claude/commands/lab:auto.md +11 -0
- package/package-assets/claude/commands/lab/357/274/232auto.md +11 -0
- package/package-assets/codex/prompts/lab/auto.md +11 -0
- package/package-assets/codex/prompts/lab-auto.md +11 -0
- package/package-assets/codex/prompts/lab.md +16 -6
- package/package-assets/codex/prompts/lab:auto.md +11 -0
- package/package-assets/codex/prompts/lab/357/274/232auto.md +11 -0
- package/package-assets/shared/skills/lab/SKILL.md +11 -2
- package/package-assets/shared/skills/lab/stages/auto.md +14 -9
- package/package.json +1 -1
package/lib/auto_contracts.cjs
CHANGED
|
@@ -99,6 +99,23 @@ function normalizeRequestedAutoContract(requested = {}) {
|
|
|
99
99
|
};
|
|
100
100
|
}
|
|
101
101
|
|
|
102
|
+
function resolveRequestedAutonomyLevel({
|
|
103
|
+
requestedAutonomyLevel = "",
|
|
104
|
+
currentAutonomyLevel = "",
|
|
105
|
+
} = {}) {
|
|
106
|
+
const requested = normalizeScalar(requestedAutonomyLevel || "");
|
|
107
|
+
if (isMeaningful(requested)) {
|
|
108
|
+
return requested;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
const current = normalizeScalar(currentAutonomyLevel || "");
|
|
112
|
+
if (isMeaningful(current)) {
|
|
113
|
+
return current;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
return "l2";
|
|
117
|
+
}
|
|
118
|
+
|
|
102
119
|
function sameStageSet(left, right) {
|
|
103
120
|
if (left.length !== right.length) {
|
|
104
121
|
return false;
|
|
@@ -530,6 +547,7 @@ module.exports = {
|
|
|
530
547
|
inferCampaignKind,
|
|
531
548
|
isLocalProcessAlive,
|
|
532
549
|
normalizeRequestedAutoContract,
|
|
550
|
+
resolveRequestedAutonomyLevel,
|
|
533
551
|
resolveFrozenCoreEntries,
|
|
534
552
|
resolveStageCommand,
|
|
535
553
|
snapshotFrozenCore,
|
package/lib/auto_runner.cjs
CHANGED
|
@@ -15,6 +15,7 @@ const {
|
|
|
15
15
|
inferCampaignKind,
|
|
16
16
|
isLocalProcessAlive,
|
|
17
17
|
normalizeRequestedAutoContract,
|
|
18
|
+
resolveRequestedAutonomyLevel,
|
|
18
19
|
resolveStageCommand,
|
|
19
20
|
snapshotFrozenCore,
|
|
20
21
|
snapshotPaths,
|
|
@@ -164,7 +165,10 @@ function buildRolledOverAutoMode(mode, requested, now) {
|
|
|
164
165
|
campaignKind,
|
|
165
166
|
campaignStartedAt: now.toISOString(),
|
|
166
167
|
objective: requested.objective || mode.objective,
|
|
167
|
-
autonomyLevel:
|
|
168
|
+
autonomyLevel: resolveRequestedAutonomyLevel({
|
|
169
|
+
requestedAutonomyLevel: requested.autonomyLevel,
|
|
170
|
+
currentAutonomyLevel: mode.autonomyLevel,
|
|
171
|
+
}),
|
|
168
172
|
approvalStatus: "draft",
|
|
169
173
|
allowedStages,
|
|
170
174
|
successCriteria: "",
|
|
@@ -489,6 +493,10 @@ async function startAutoMode({ targetDir, now = new Date(), requestedContract =
|
|
|
489
493
|
}
|
|
490
494
|
if (fit.classification === "hard-mismatch") {
|
|
491
495
|
const normalizedRequest = normalizeRequestedAutoContract(requestedContract);
|
|
496
|
+
normalizedRequest.autonomyLevel = resolveRequestedAutonomyLevel({
|
|
497
|
+
requestedAutonomyLevel: normalizedRequest.autonomyLevel,
|
|
498
|
+
currentAutonomyLevel: mode.autonomyLevel,
|
|
499
|
+
});
|
|
492
500
|
const { newMode, archivedPaths } = rolloverAutoCampaign({
|
|
493
501
|
targetDir,
|
|
494
502
|
mode,
|
package/lib/i18n.cjs
CHANGED
|
@@ -2168,7 +2168,7 @@ ZH_CONTENT[path.join(".lab", ".managed", "templates", "framing.md")] = `# 论文
|
|
|
2168
2168
|
ZH_CONTENT[path.join(".codex", "prompts", "lab.md")] = codexPrompt(
|
|
2169
2169
|
"查看 /lab 研究工作流总览并选择合适阶段",
|
|
2170
2170
|
"workflow question 或 stage choice",
|
|
2171
|
-
"# `/lab` for Codex\n\n`/lab` 是严格的研究工作流命令族。每次都使用同一套仓库工件和阶段边界。\n\n## 子命令\n\n- `/lab:idea`\n 先做两轮脑暴和两轮文献检索,再定义问题与 failure case、对比最接近前作,并输出带 approval gate 的 source-backed recommendation。\n\n- `/lab:data`\n 把已批准的 idea 转成数据集与 benchmark 方案,记录数据集年份、使用过该数据集的论文、下载来源、许可或访问限制,以及 classic-public、recent-strong-public、claim-specific 三类 benchmark 的纳入理由,和 canonical baselines、strong historical baselines、recent strong public methods、closest prior work 四类对比方法的纳入理由。\n\n- `/lab:auto`\n 在不改变 mission、framing 和核心 claims 的前提下,读取 eval-protocol 与 auto-mode 契约并自动编排 `run`、`iterate`、`review`、`report`,必要时扩展数据集、benchmark 和 comparison methods,并在满足升格策略时自动升级 primary package。启动前必须选定 autonomy level、声明 terminal goal,并显式写清 primary gate、secondary guard、promotion condition、stop reason 和 escalation reason,再批准契约。\n\n- `/lab:framing`\n 通过审计当前领域与相邻领域的术语,锁定 paper-facing 的方法名、模块名、论文题目和 contribution bullets,并在 section 起草前保留 approval gate。\n\n- `/lab:spec`\n 把已批准的 idea 转成 `.lab/changes/<change-id>/` 下的一个 lab change 目录,并在其中写出 `proposal`、`design`、`spec`、`tasks`。\n\n- `/lab:run`\n 执行最小有意义验证运行,登记 run,并生成第一版标准化评估摘要。\n\n- `/lab:iterate`\n 在冻结 mission、阈值、verification commands 与 `completion_promise` 的前提下执行有边界的实验迭代。\n\n- `/lab:review`\n 以 reviewer mode 审查文档或结果,先给短摘要,再输出 findings、fatal flaws、fix priority 和 residual risks。\n\n- `/lab:report`\n 从 runs 和 iterations 工件生成最终研究报告。\n\n- `/lab:write`\n 使用已安装 `lab` skill 下 vendored 的 paper-writing references,把稳定 report 工件转成论文 section。\n\n## 调度规则\n\n- 始终使用 `skills/lab/SKILL.md` 作为工作流合同。\n- 用户显式调用 `/lab:<stage>` 时,要立刻执行该 stage,而不是只推荐别的 `/lab` stage。\n- 先给简洁的阶段摘要;只要 stage 合同要求受管工件,就应立刻落盘,再回报输出路径和下一步。\n- 如果歧义会影响结论,一次只问一个问题;如果有多条可行路径,先给 2-3 个方案再收敛。\n- `/lab:spec` 前应已有经批准的数据集与 benchmark 方案。\n- `/lab:run`、`/lab:iterate`、`/lab:auto`、`/lab:report` 都应遵循 `.lab/context/eval-protocol.md`。\n- `.lab/context/eval-protocol.md` 不只定义主指标和主表,也应定义指标释义、实验阶梯,以及指标和对比实现的来源。\n- `/lab:auto` 只编排已批准边界内的执行阶段,不替代手动的 idea/data/framing/spec 决策。\n- `/lab:write` 前必须已有经批准的 `/lab:framing` 工件。\n\n## 如何输入 `/lab:auto`\n\n## `/lab:auto` 层级指南\n\n- `L1`:适合安全验证、一轮 bounded 真实运行,或简单 report 刷新。\n- `L2`:默认推荐级别,适合冻结核心边界内的常规实验迭代。\n- `L3
|
|
2171
|
+
"# `/lab` for Codex\n\n`/lab` 是严格的研究工作流命令族。每次都使用同一套仓库工件和阶段边界。\n\n## 子命令\n\n- `/lab:idea`\n 先做两轮脑暴和两轮文献检索,再定义问题与 failure case、对比最接近前作,并输出带 approval gate 的 source-backed recommendation。\n\n- `/lab:data`\n 把已批准的 idea 转成数据集与 benchmark 方案,记录数据集年份、使用过该数据集的论文、下载来源、许可或访问限制,以及 classic-public、recent-strong-public、claim-specific 三类 benchmark 的纳入理由,和 canonical baselines、strong historical baselines、recent strong public methods、closest prior work 四类对比方法的纳入理由。\n\n- `/lab:auto`\n 在不改变 mission、framing 和核心 claims 的前提下,读取 eval-protocol 与 auto-mode 契约并自动编排 `run`、`iterate`、`review`、`report`,必要时扩展数据集、benchmark 和 comparison methods,并在满足升格策略时自动升级 primary package。启动前必须选定 autonomy level、声明 terminal goal,并显式写清 primary gate、secondary guard、promotion condition、stop reason 和 escalation reason,再批准契约。\n\n- `/lab:framing`\n 通过审计当前领域与相邻领域的术语,锁定 paper-facing 的方法名、模块名、论文题目和 contribution bullets,并在 section 起草前保留 approval gate。\n\n- `/lab:spec`\n 把已批准的 idea 转成 `.lab/changes/<change-id>/` 下的一个 lab change 目录,并在其中写出 `proposal`、`design`、`spec`、`tasks`。\n\n- `/lab:run`\n 执行最小有意义验证运行,登记 run,并生成第一版标准化评估摘要。\n\n- `/lab:iterate`\n 在冻结 mission、阈值、verification commands 与 `completion_promise` 的前提下执行有边界的实验迭代。\n\n- `/lab:review`\n 以 reviewer mode 审查文档或结果,先给短摘要,再输出 findings、fatal flaws、fix priority 和 residual risks。\n\n- `/lab:report`\n 从 runs 和 iterations 工件生成最终研究报告。\n\n- `/lab:write`\n 使用已安装 `lab` skill 下 vendored 的 paper-writing references,把稳定 report 工件转成论文 section。\n\n## 调度规则\n\n- 始终使用 `skills/lab/SKILL.md` 作为工作流合同。\n- 用户显式调用 `/lab:<stage>` 时,要立刻执行该 stage,而不是只推荐别的 `/lab` stage。\n- 先给简洁的阶段摘要;只要 stage 合同要求受管工件,就应立刻落盘,再回报输出路径和下一步。\n- 如果歧义会影响结论,一次只问一个问题;如果有多条可行路径,先给 2-3 个方案再收敛。\n- `/lab:spec` 前应已有经批准的数据集与 benchmark 方案。\n- `/lab:run`、`/lab:iterate`、`/lab:auto`、`/lab:report` 都应遵循 `.lab/context/eval-protocol.md`。\n- `.lab/context/eval-protocol.md` 不只定义主指标和主表,也应定义指标释义、实验阶梯,以及指标和对比实现的来源。\n- `/lab:auto` 只编排已批准边界内的执行阶段,不替代手动的 idea/data/framing/spec 决策。\n- `/lab:write` 前必须已有经批准的 `/lab:framing` 工件。\n\n## 如何输入 `/lab:auto`\n\n## `/lab:auto` 层级指南\n\n- 继续当前活动:`/lab-auto: 继续`。\n- 以最高执行级别继续当前活动:`/lab-auto: L3,继续`。\n- `L1`:适合安全验证、一轮 bounded 真实运行,或简单 report 刷新。\n- `L2`:默认推荐级别,适合冻结核心边界内的常规实验迭代。\n- `L3`:继续推进直到命中边界的级别;适合你明确想让 auto 在已批准 envelope 内默认继续,而不是为常规实现分歧停下。\n- 如果用户没写级别,默认按 `L2` 处理。\n- 接受短写:`L1`、`L2`、`L3`,以及小写 `l1/l2/l3`;`最高级别`、`最高自治` 也按 `L3` 处理。\n- 如果用户只写 `继续`,且当前已有 active 或可恢复 campaign,就直接继承当前 campaign 的级别。\n- 只有当级别本身真的有歧义时,才停下来追问,例如 `第三层`、`phase 3`、`table 3`。\n- 已批准的 `L2` 和 `L3` 执行 campaign 默认进入执行模式,而不是重新打开 brainstorming 或 spec-review。\n- 在执行模式里,不要把常规实现路径选择、helper script、路径修正、数据集适配、同 family 候选切换或普通自检重新路由到 brainstorming、spec review,或 reviewer/explorer 子智能体。\n- 只有当用户明确要求设计或审阅帮助、contract fit 需要新 campaign,或 escalation condition 明确要求独立复核时,才从执行模式切到设计或 reviewer 模式。\n- 真正的 `/lab:auto` 首个可见输出必须是 `Auto preflight`。\n- 这个首个可见输出必须展示已读取文件,以及 `Autonomy level`、`Allowed stages`、`Terminal goal`、`Primary gate`、`Secondary guard`。\n- 如果无法从 `.lab/context/eval-protocol.md`、`.lab/context/auto-mode.md`、`.lab/context/auto-status.md` 和 `.lab/context/auto-outcome.md` 完成 preflight,就必须停下,而不是假装 loop 已经 armed。\n\n- 把 `Autonomy level L1/L2/L3` 视为执行权限级别,不要和论文里的 layer、phase、table 编号混用。\n- 把 `paper layer`、`phase`、`table` 视为实验目标。例如 `paper layer 3` 或 `Phase 1` 不是 `Autonomy level L3`。\n- 一条好的 `/lab:auto` 输入应至少说清:objective、terminal goal、scope、allowed modifications;只有在你想覆盖默认或当前 campaign 时才需要额外写自治级别。\n- 如果 workflow language 是中文,摘要、清单条目、任务标签和进度更新都应使用中文,除非文件路径、代码标识符或字面指标名必须保持原样。\n- 示例:`/lab:auto: 继续`\n- 示例:`/lab:auto: L3,继续`\n- 示例:`/lab:auto 目标:推进 paper layer 3。终止条件:完成 bounded protocol、测试、最小实现和一轮小规模结果。允许修改:配置、数据接入、评估脚本。`\n"
|
|
2172
2172
|
);
|
|
2173
2173
|
|
|
2174
2174
|
ZH_CONTENT[path.join(".codex", "prompts", "lab-data.md")] = codexPrompt(
|
|
@@ -2180,14 +2180,14 @@ ZH_CONTENT[path.join(".codex", "prompts", "lab-data.md")] = codexPrompt(
|
|
|
2180
2180
|
ZH_CONTENT[path.join(".codex", "prompts", "lab-auto.md")] = codexPrompt(
|
|
2181
2181
|
"在已批准边界内编排自动实验循环",
|
|
2182
2182
|
"auto mode objective",
|
|
2183
|
-
"使用已安装的 `lab` 技能:`.codex/skills/lab/SKILL.md`。\n\n立刻针对用户当前给出的参数执行 `/lab:auto`,不要只推荐别的 `/lab` 阶段。只有在缺少阻塞性前提时,才明确指出缺什么,并且一次最多追问一个问题。\n\n先把用户请求规范化成可交给 CLI 的 auto contract 字段:objective、autonomy level、campaign kind、allowed stages,以及任何不改变范围就能明确的 terminal-goal 提示。\n你的第一步执行动作必须是对当前项目运行 `superlab auto start`,而不是自己直接改写 `.lab/context/auto-mode.md`、`.lab/context/auto-status.md`、`.lab/context/auto-ledger.md` 或 `.lab/context/auto-outcome.md`。\n把规范化后的字段通过 CLI 参数传下去,包括 `--objective`、`--campaign-kind`、`--allowed-stages`,以及在用户已明确或已隐含时传 `--autonomy-level`。\nCLI 返回后的 runtime 结果才是事实来源。如果 CLI 报 rollover、conflict、缺字段,或已经成功启动 campaign,就如实回报,不要绕过 CLI 自己做 prompt 侧写回。\n\n本命令运行 `/lab:auto` 阶段。它必须读取 `.lab/context/eval-protocol.md`、`.lab/context/auto-mode.md`、`.lab/context/auto-status.md`、`.lab/context/auto-ledger.md` 与 `.lab/context/auto-outcome.md`,先确认 autonomy level、approval status、terminal goal schema,以及 primary gate、secondary guard、promotion condition、stop reason、escalation reason,再把 eval-protocol 里的指标释义、主表计划、来源约束与结构化实验阶梯当作执行依据,在不修改 mission、framing 和核心 claims 的前提下编排已批准的 `run`、`iterate`、`review`、`report`,轮询长任务完成情况;如果声明了 rung,就保持会话活着并按 rung 转移继续推进。\n首个可见输出块必须是 `Auto preflight`。这个块必须列出已读取文件,并回显 `Autonomy level`、`Approval status`、`Allowed stages`、`Terminal goal`、`Primary gate` 和 `Secondary guard`,然后才能进入执行摘要或动作计划。\n如果 preflight 所需字段缺失、过期或彼此冲突,就必须在执行前停下,并明确指出到底是哪一个字段阻止了 loop 启动。\n当 loop 活着时,必须把当前 owner、观察状态、checkpoint 摘要、继续边界、停止边界和恢复读取集合写进 `.lab/context/auto-ledger.md`。\n如果仓库的 workflow language 是中文,摘要、清单条目、任务标签和进度更新都必须使用中文,除非某个文件路径、代码标识符或字面指标名必须保持原样。\n把 `Layer 3`、`Phase 1`、`Table 2` 这类表达视为论文范围目标;只有显式写成 `Autonomy level L3` 或 `自治级别 L3` 时,才把它当成执行权限级别。\n不要用 `sleep 30`、单次 `pgrep` 或一次性的 `metrics.json` 探针来代替真实长任务命令;当真实实验进程还活着时,只允许发进度更新并继续等待。"
|
|
2183
|
+
"使用已安装的 `lab` 技能:`.codex/skills/lab/SKILL.md`。\n\n继续当前活动:`/lab-auto: 继续`。\n以最高执行级别继续当前活动:`/lab-auto: L3,继续`。\n\n立刻针对用户当前给出的参数执行 `/lab:auto`,不要只推荐别的 `/lab` 阶段。只有在缺少阻塞性前提时,才明确指出缺什么,并且一次最多追问一个问题。\n\n先把用户请求规范化成可交给 CLI 的 auto contract 字段:objective、autonomy level、campaign kind、allowed stages,以及任何不改变范围就能明确的 terminal-goal 提示。\n如果用户没写级别,默认按 `L2` 处理;接受 `L1/L2/L3`、`l1/l2/l3` 这类短写,`最高级别`、`最高自治` 也按 `L3` 处理。\n如果用户只写了 `继续`,且当前已有 active 或可恢复 campaign,就直接继承当前 campaign 的级别,而不是要求用户重复写。\n如果你想沿用 runtime 里已存的 campaign 级别继续,就直接写 `/lab-auto: 继续`。\n只有当级别本身真的有歧义时,才停下来追问,例如 `第三层`、`phase 3`、`table 3`。\n已批准的 `L2` 和 `L3` 执行 campaign 默认进入执行模式。\n在执行模式里,不要进入 brainstorming,不要进入 spec review,也不要为了常规实现路径选择、helper script、路径修正、数据集适配、同 family 候选切换或普通自检而生成 reviewer、explorer 或其他子智能体循环。\n只有当用户明确要求设计或审阅帮助、contract fit 需要新 campaign,或 contract 的 escalation condition 明确要求独立复核时,才从执行模式切到设计或 reviewer 模式。\n你的第一步执行动作必须是对当前项目运行 `superlab auto start`,而不是自己直接改写 `.lab/context/auto-mode.md`、`.lab/context/auto-status.md`、`.lab/context/auto-ledger.md` 或 `.lab/context/auto-outcome.md`。\n把规范化后的字段通过 CLI 参数传下去,包括 `--objective`、`--campaign-kind`、`--allowed-stages`,以及在用户已明确或已隐含时传 `--autonomy-level`。\nCLI 返回后的 runtime 结果才是事实来源。如果 CLI 报 rollover、conflict、缺字段,或已经成功启动 campaign,就如实回报,不要绕过 CLI 自己做 prompt 侧写回。\n\n本命令运行 `/lab:auto` 阶段。它必须读取 `.lab/context/eval-protocol.md`、`.lab/context/auto-mode.md`、`.lab/context/auto-status.md`、`.lab/context/auto-ledger.md` 与 `.lab/context/auto-outcome.md`,先确认 autonomy level、approval status、terminal goal schema,以及 primary gate、secondary guard、promotion condition、stop reason、escalation reason,再把 eval-protocol 里的指标释义、主表计划、来源约束与结构化实验阶梯当作执行依据,在不修改 mission、framing 和核心 claims 的前提下编排已批准的 `run`、`iterate`、`review`、`report`,轮询长任务完成情况;如果声明了 rung,就保持会话活着并按 rung 转移继续推进。\n首个可见输出块必须是 `Auto preflight`。这个块必须列出已读取文件,并回显 `Autonomy level`、`Approval status`、`Allowed stages`、`Terminal goal`、`Primary gate` 和 `Secondary guard`,然后才能进入执行摘要或动作计划。\n如果 preflight 所需字段缺失、过期或彼此冲突,就必须在执行前停下,并明确指出到底是哪一个字段阻止了 loop 启动。\n当 loop 活着时,必须把当前 owner、观察状态、checkpoint 摘要、继续边界、停止边界和恢复读取集合写进 `.lab/context/auto-ledger.md`。\n如果仓库的 workflow language 是中文,摘要、清单条目、任务标签和进度更新都必须使用中文,除非某个文件路径、代码标识符或字面指标名必须保持原样。\n把 `Layer 3`、`Phase 1`、`Table 2` 这类表达视为论文范围目标;只有显式写成 `Autonomy level L3` 或 `自治级别 L3` 时,才把它当成执行权限级别。\n不要用 `sleep 30`、单次 `pgrep` 或一次性的 `metrics.json` 探针来代替真实长任务命令;当真实实验进程还活着时,只允许发进度更新并继续等待。"
|
|
2184
2184
|
);
|
|
2185
2185
|
|
|
2186
2186
|
ZH_CONTENT[path.join(".claude", "commands", "lab.md")] = claudeCommand(
|
|
2187
2187
|
"lab",
|
|
2188
2188
|
"查看 /lab 研究工作流总览并选择合适阶段",
|
|
2189
2189
|
"[stage] [target]",
|
|
2190
|
-
"# `/lab` for Claude\n\n`/lab` 是 Claude Code 里的 lab 工作流分发入口。调用方式有两种:\n\n- `/lab <stage> ...`\n- `/lab-idea`、`/lab-data`、`/lab-auto`、`/lab-framing`、`/lab-spec`、`/lab-run`、`/lab-iterate`、`/lab-review`、`/lab-report`、`/lab-write`\n\n## 阶段别名\n\n- `/lab idea ...` 或 `/lab-idea`\n- `/lab data ...` 或 `/lab-data`\n- `/lab auto ...` 或 `/lab-auto`\n- `/lab framing ...` 或 `/lab-framing`\n- `/lab spec ...` 或 `/lab-spec`\n- `/lab run ...` 或 `/lab-run`\n- `/lab iterate ...` 或 `/lab-iterate`\n- `/lab review ...` 或 `/lab-review`\n- `/lab report ...` 或 `/lab-report`\n- `/lab write ...` 或 `/lab-write`\n\n## 调度规则\n\n- 始终使用 `skills/lab/SKILL.md` 作为工作流合同。\n- 用户显式调用 `/lab <stage> ...` 或 `/lab-<stage>` 时,要立刻执行该 stage,而不是只推荐别的阶段。\n- 先给简洁的阶段摘要;只要 stage 合同要求受管工件,就应立刻落盘,再回报输出路径和下一步。\n- 如果歧义会影响结论,一次只问一个问题;如果有多条可行路径,先给 2-3 个方案再收敛。\n- `spec` 前应已有经批准的数据集与 benchmark 方案。\n- `run`、`iterate`、`auto`、`report` 都应遵循 `.lab/context/eval-protocol.md`。\n- `auto` 只编排已批准边界内的执行阶段,不替代手动的 idea/data/framing/spec 决策。\n- `write` 前必须已有经批准的 `framing` 工件。\n\n## 如何输入 `/lab auto`\n\n## `/lab auto` 层级指南\n\n- `L1`:适合安全验证、一轮 bounded 真实运行,或简单 report 刷新。\n- `L2`:默认推荐级别,适合冻结核心边界内的常规实验迭代。\n- `L3
|
|
2190
|
+
"# `/lab` for Claude\n\n`/lab` 是 Claude Code 里的 lab 工作流分发入口。调用方式有两种:\n\n- `/lab <stage> ...`\n- `/lab-idea`、`/lab-data`、`/lab-auto`、`/lab-framing`、`/lab-spec`、`/lab-run`、`/lab-iterate`、`/lab-review`、`/lab-report`、`/lab-write`\n\n## 阶段别名\n\n- `/lab idea ...` 或 `/lab-idea`\n- `/lab data ...` 或 `/lab-data`\n- `/lab auto ...` 或 `/lab-auto`\n- `/lab framing ...` 或 `/lab-framing`\n- `/lab spec ...` 或 `/lab-spec`\n- `/lab run ...` 或 `/lab-run`\n- `/lab iterate ...` 或 `/lab-iterate`\n- `/lab review ...` 或 `/lab-review`\n- `/lab report ...` 或 `/lab-report`\n- `/lab write ...` 或 `/lab-write`\n\n## 调度规则\n\n- 始终使用 `skills/lab/SKILL.md` 作为工作流合同。\n- 用户显式调用 `/lab <stage> ...` 或 `/lab-<stage>` 时,要立刻执行该 stage,而不是只推荐别的阶段。\n- 先给简洁的阶段摘要;只要 stage 合同要求受管工件,就应立刻落盘,再回报输出路径和下一步。\n- 如果歧义会影响结论,一次只问一个问题;如果有多条可行路径,先给 2-3 个方案再收敛。\n- `spec` 前应已有经批准的数据集与 benchmark 方案。\n- `run`、`iterate`、`auto`、`report` 都应遵循 `.lab/context/eval-protocol.md`。\n- `auto` 只编排已批准边界内的执行阶段,不替代手动的 idea/data/framing/spec 决策。\n- `write` 前必须已有经批准的 `framing` 工件。\n\n## 如何输入 `/lab auto`\n\n## `/lab auto` 层级指南\n\n- 继续当前活动:`/lab-auto: 继续`。\n- 以最高执行级别继续当前活动:`/lab-auto: L3,继续`。\n- `L1`:适合安全验证、一轮 bounded 真实运行,或简单 report 刷新。\n- `L2`:默认推荐级别,适合冻结核心边界内的常规实验迭代。\n- `L3`:继续推进直到命中边界的级别;适合你明确想让 auto 在已批准 envelope 内默认继续,而不是为常规实现分歧停下。\n- 如果用户没写级别,默认按 `L2` 处理。\n- 接受短写:`L1`、`L2`、`L3`,以及小写 `l1/l2/l3`;`最高级别`、`最高自治` 也按 `L3` 处理。\n- 如果用户只写 `继续`,且当前已有 active 或可恢复 campaign,就直接继承当前 campaign 的级别。\n- 只有当级别本身真的有歧义时,才停下来追问,例如 `第三层`、`phase 3`、`table 3`。\n- 已批准的 `L2` 和 `L3` 执行 campaign 默认进入执行模式,而不是重新打开 brainstorming 或 spec-review。\n- 在执行模式里,不要把常规实现路径选择、helper script、路径修正、数据集适配、同 family 候选切换或普通自检重新路由到 brainstorming、spec review,或 reviewer/explorer 子智能体。\n- 只有当用户明确要求设计或审阅帮助、contract fit 需要新 campaign,或 escalation condition 明确要求独立复核时,才从执行模式切到设计或 reviewer 模式。\n- 真正的 `/lab auto` 首个可见输出必须是 `Auto preflight`。\n- 这个首个可见输出必须展示已读取文件,以及 `Autonomy level`、`Allowed stages`、`Terminal goal`、`Primary gate`、`Secondary guard`。\n- 如果无法从 `.lab/context/eval-protocol.md`、`.lab/context/auto-mode.md`、`.lab/context/auto-status.md` 和 `.lab/context/auto-outcome.md` 完成 preflight,就必须停下,而不是假装 loop 已经 armed。\n\n- 把 `Autonomy level L1/L2/L3` 视为执行权限级别,不要和论文里的 layer、phase、table 编号混用。\n- 把 `paper layer`、`phase`、`table` 视为实验目标。例如 `paper layer 3` 或 `Phase 1` 不是 `Autonomy level L3`。\n- 一条好的 `/lab auto` 输入应至少说清:objective、terminal goal、scope、allowed modifications;只有在你想覆盖默认或当前 campaign 时才需要额外写自治级别。\n- 如果 workflow language 是中文,摘要、清单条目、任务标签和进度更新都应使用中文,除非文件路径、代码标识符或字面指标名必须保持原样。\n- 示例:`/lab-auto: 继续`\n- 示例:`/lab-auto: L3,继续`\n- 示例:`/lab auto 目标:推进 paper layer 3。终止条件:完成 bounded protocol、测试、最小实现和一轮小规模结果。允许修改:配置、数据接入、评估脚本。`\n"
|
|
2191
2191
|
);
|
|
2192
2192
|
|
|
2193
2193
|
ZH_CONTENT[path.join(".claude", "commands", "lab-data.md")] = claudeCommand(
|
|
@@ -2201,7 +2201,40 @@ ZH_CONTENT[path.join(".claude", "commands", "lab-auto.md")] = claudeCommand(
|
|
|
2201
2201
|
"lab-auto",
|
|
2202
2202
|
"在已批准边界内编排自动实验循环",
|
|
2203
2203
|
"auto mode objective",
|
|
2204
|
-
"使用已安装的 `lab` 技能:`.claude/skills/lab/SKILL.md`。\n\n立刻针对用户当前给出的参数执行 `auto` 阶段,不要只推荐别的 lab 阶段。只有在缺少阻塞性前提时,才明确指出缺什么,并且一次最多追问一个问题。\n\n先把用户请求规范化成可交给 CLI 的 auto contract 字段:objective、autonomy level、campaign kind、allowed stages,以及任何不改变范围就能明确的 terminal-goal 提示。\n你的第一步执行动作必须是对当前项目运行 `superlab auto start`,而不是自己直接改写 `.lab/context/auto-mode.md`、`.lab/context/auto-status.md`、`.lab/context/auto-ledger.md` 或 `.lab/context/auto-outcome.md`。\n把规范化后的字段通过 CLI 参数传下去,包括 `--objective`、`--campaign-kind`、`--allowed-stages`,以及在用户已明确或已隐含时传 `--autonomy-level`。\nCLI 返回后的 runtime 结果才是事实来源。如果 CLI 报 rollover、conflict、缺字段,或已经成功启动 campaign,就如实回报,不要绕过 CLI 自己做 prompt 侧写回。\n\n本命令运行 lab workflow 的 `auto` 阶段。它必须读取 `.lab/context/eval-protocol.md`、`.lab/context/auto-mode.md`、`.lab/context/auto-status.md`、`.lab/context/auto-ledger.md` 与 `.lab/context/auto-outcome.md`,先确认 autonomy level、approval status、terminal goal schema,以及 primary gate、secondary guard、promotion condition、stop reason、escalation reason,再把 eval-protocol 里的指标释义、主表计划、来源约束与结构化实验阶梯当作执行依据,在不修改 mission、framing 和核心 claims 的前提下编排已批准的 `run`、`iterate`、`review`、`report`,轮询长任务完成情况;如果声明了 rung,就保持会话活着并按 rung 转移继续推进。\n首个可见输出块必须是 `Auto preflight`。这个块必须列出已读取文件,并回显 `Autonomy level`、`Approval status`、`Allowed stages`、`Terminal goal`、`Primary gate` 和 `Secondary guard`,然后才能进入执行摘要或动作计划。\n如果 preflight 所需字段缺失、过期或彼此冲突,就必须在执行前停下,并明确指出到底是哪一个字段阻止了 loop 启动。\n当 loop 活着时,必须把当前 owner、观察状态、checkpoint 摘要、继续边界、停止边界和恢复读取集合写进 `.lab/context/auto-ledger.md`。\n如果仓库的 workflow language 是中文,摘要、清单条目、任务标签和进度更新都必须使用中文,除非某个文件路径、代码标识符或字面指标名必须保持原样。\n把 `Layer 3`、`Phase 1`、`Table 2` 这类表达视为论文范围目标;只有显式写成 `Autonomy level L3` 或 `自治级别 L3` 时,才把它当成执行权限级别。\n不要用 `sleep 30`、单次 `pgrep` 或一次性的 `metrics.json` 探针来代替真实长任务命令;当真实实验进程还活着时,只允许发进度更新并继续等待。"
|
|
2204
|
+
"使用已安装的 `lab` 技能:`.claude/skills/lab/SKILL.md`。\n\n继续当前活动:`/lab-auto: 继续`。\n以最高执行级别继续当前活动:`/lab-auto: L3,继续`。\n\n立刻针对用户当前给出的参数执行 `auto` 阶段,不要只推荐别的 lab 阶段。只有在缺少阻塞性前提时,才明确指出缺什么,并且一次最多追问一个问题。\n\n先把用户请求规范化成可交给 CLI 的 auto contract 字段:objective、autonomy level、campaign kind、allowed stages,以及任何不改变范围就能明确的 terminal-goal 提示。\n如果用户没写级别,默认按 `L2` 处理;接受 `L1/L2/L3`、`l1/l2/l3` 这类短写,`最高级别`、`最高自治` 也按 `L3` 处理。\n如果用户只写了 `继续`,且当前已有 active 或可恢复 campaign,就直接继承当前 campaign 的级别,而不是要求用户重复写。\n如果你想沿用 runtime 里已存的 campaign 级别继续,就直接写 `/lab-auto: 继续`。\n只有当级别本身真的有歧义时,才停下来追问,例如 `第三层`、`phase 3`、`table 3`。\n已批准的 `L2` 和 `L3` 执行 campaign 默认进入执行模式。\n在执行模式里,不要进入 brainstorming,不要进入 spec review,也不要为了常规实现路径选择、helper script、路径修正、数据集适配、同 family 候选切换或普通自检而生成 reviewer、explorer 或其他子智能体循环。\n只有当用户明确要求设计或审阅帮助、contract fit 需要新 campaign,或 contract 的 escalation condition 明确要求独立复核时,才从执行模式切到设计或 reviewer 模式。\n你的第一步执行动作必须是对当前项目运行 `superlab auto start`,而不是自己直接改写 `.lab/context/auto-mode.md`、`.lab/context/auto-status.md`、`.lab/context/auto-ledger.md` 或 `.lab/context/auto-outcome.md`。\n把规范化后的字段通过 CLI 参数传下去,包括 `--objective`、`--campaign-kind`、`--allowed-stages`,以及在用户已明确或已隐含时传 `--autonomy-level`。\nCLI 返回后的 runtime 结果才是事实来源。如果 CLI 报 rollover、conflict、缺字段,或已经成功启动 campaign,就如实回报,不要绕过 CLI 自己做 prompt 侧写回。\n\n本命令运行 lab workflow 的 `auto` 阶段。它必须读取 `.lab/context/eval-protocol.md`、`.lab/context/auto-mode.md`、`.lab/context/auto-status.md`、`.lab/context/auto-ledger.md` 与 `.lab/context/auto-outcome.md`,先确认 autonomy level、approval status、terminal goal schema,以及 primary gate、secondary guard、promotion condition、stop reason、escalation reason,再把 eval-protocol 里的指标释义、主表计划、来源约束与结构化实验阶梯当作执行依据,在不修改 mission、framing 和核心 claims 的前提下编排已批准的 `run`、`iterate`、`review`、`report`,轮询长任务完成情况;如果声明了 rung,就保持会话活着并按 rung 转移继续推进。\n首个可见输出块必须是 `Auto preflight`。这个块必须列出已读取文件,并回显 `Autonomy level`、`Approval status`、`Allowed stages`、`Terminal goal`、`Primary gate` 和 `Secondary guard`,然后才能进入执行摘要或动作计划。\n如果 preflight 所需字段缺失、过期或彼此冲突,就必须在执行前停下,并明确指出到底是哪一个字段阻止了 loop 启动。\n当 loop 活着时,必须把当前 owner、观察状态、checkpoint 摘要、继续边界、停止边界和恢复读取集合写进 `.lab/context/auto-ledger.md`。\n如果仓库的 workflow language 是中文,摘要、清单条目、任务标签和进度更新都必须使用中文,除非某个文件路径、代码标识符或字面指标名必须保持原样。\n把 `Layer 3`、`Phase 1`、`Table 2` 这类表达视为论文范围目标;只有显式写成 `Autonomy level L3` 或 `自治级别 L3` 时,才把它当成执行权限级别。\n不要用 `sleep 30`、单次 `pgrep` 或一次性的 `metrics.json` 探针来代替真实长任务命令;当真实实验进程还活着时,只允许发进度更新并继续等待。"
|
|
2205
|
+
);
|
|
2206
|
+
|
|
2207
|
+
const zhAutoPriorityCodexLine =
|
|
2208
|
+
"显式的 `/lab:auto` 或 `/lab-auto` 请求,其优先级高于 brainstorming、spec review 这类更宽的创作或审阅技能路径。";
|
|
2209
|
+
const zhAutoPriorityClaudeLine =
|
|
2210
|
+
"显式的 `/lab auto` 或 `/lab-auto` 请求,其优先级高于 brainstorming、spec review 这类更宽的创作或审阅技能路径。";
|
|
2211
|
+
|
|
2212
|
+
ZH_CONTENT[path.join(".codex", "prompts", "lab.md")] = ZH_CONTENT[
|
|
2213
|
+
path.join(".codex", "prompts", "lab.md")
|
|
2214
|
+
].replace(
|
|
2215
|
+
"- 已批准的 `L2` 和 `L3` 执行 campaign 默认进入执行模式,而不是重新打开 brainstorming 或 spec-review。",
|
|
2216
|
+
`- ${zhAutoPriorityCodexLine}\n- 已批准的 \`L2\` 和 \`L3\` 执行 campaign 默认进入执行模式,而不是重新打开 brainstorming 或 spec-review。`
|
|
2217
|
+
);
|
|
2218
|
+
|
|
2219
|
+
ZH_CONTENT[path.join(".codex", "prompts", "lab-auto.md")] = ZH_CONTENT[
|
|
2220
|
+
path.join(".codex", "prompts", "lab-auto.md")
|
|
2221
|
+
].replace(
|
|
2222
|
+
"已批准的 `L2` 和 `L3` 执行 campaign 默认进入执行模式。",
|
|
2223
|
+
`${zhAutoPriorityCodexLine}\n已批准的 \`L2\` 和 \`L3\` 执行 campaign 默认进入执行模式。`
|
|
2224
|
+
);
|
|
2225
|
+
|
|
2226
|
+
ZH_CONTENT[path.join(".claude", "commands", "lab.md")] = ZH_CONTENT[
|
|
2227
|
+
path.join(".claude", "commands", "lab.md")
|
|
2228
|
+
].replace(
|
|
2229
|
+
"- 已批准的 `L2` 和 `L3` 执行 campaign 默认进入执行模式,而不是重新打开 brainstorming 或 spec-review。",
|
|
2230
|
+
`- ${zhAutoPriorityClaudeLine}\n- 已批准的 \`L2\` 和 \`L3\` 执行 campaign 默认进入执行模式,而不是重新打开 brainstorming 或 spec-review。`
|
|
2231
|
+
);
|
|
2232
|
+
|
|
2233
|
+
ZH_CONTENT[path.join(".claude", "commands", "lab-auto.md")] = ZH_CONTENT[
|
|
2234
|
+
path.join(".claude", "commands", "lab-auto.md")
|
|
2235
|
+
].replace(
|
|
2236
|
+
"已批准的 `L2` 和 `L3` 执行 campaign 默认进入执行模式。",
|
|
2237
|
+
`${zhAutoPriorityClaudeLine}\n已批准的 \`L2\` 和 \`L3\` 执行 campaign 默认进入执行模式。`
|
|
2205
2238
|
);
|
|
2206
2239
|
|
|
2207
2240
|
const zhRecipeQuickPathLine =
|
|
@@ -2763,7 +2796,7 @@ ZH_CONTENT[path.join(".codex", "skills", "lab", "stages", "auto.md")] = `# \`/la
|
|
|
2763
2796
|
- 级别含义固定为:
|
|
2764
2797
|
- \`L1\`:safe run,只允许 \`run\`、\`review\`、\`report\`
|
|
2765
2798
|
- \`L2\`:bounded iteration,允许 \`run\`、\`iterate\`、\`review\`、\`report\`
|
|
2766
|
-
- \`L3
|
|
2799
|
+
- \`L3\`:在已批准 envelope 内持续推进直到命中边界;framing 已批准时才可额外编排 \`write\`
|
|
2767
2800
|
- 默认只编排 \`run\`、\`iterate\`、\`review\`、\`report\`;只有 framing 已批准时才可选 \`write\`。
|
|
2768
2801
|
- 不要自动修改 mission、paper-facing framing 或核心 claims。
|
|
2769
2802
|
- 可以在 exploration envelope 内增加数据集、benchmark 和 comparison methods。
|
|
@@ -2797,10 +2830,16 @@ ZH_CONTENT[path.join(".codex", "skills", "lab", "stages", "auto.md")] = `# \`/la
|
|
|
2797
2830
|
|
|
2798
2831
|
## 层级指南
|
|
2799
2832
|
|
|
2833
|
+
- 继续当前活动:\`/lab-auto: 继续\`。
|
|
2834
|
+
- 以最高执行级别继续当前活动:\`/lab-auto: L3,继续\`。
|
|
2800
2835
|
- \`L1\` = safe validation
|
|
2801
2836
|
- \`L2\` = 默认推荐的 bounded iteration
|
|
2802
|
-
- \`L3\` =
|
|
2837
|
+
- \`L3\` = 在已批准 envelope 内持续推进直到命中边界
|
|
2803
2838
|
- 如果不确定,默认推荐 \`L2\`。
|
|
2839
|
+
- 显式的 \`/lab:auto\` 或 \`/lab-auto\` 请求,其优先级高于 brainstorming、spec review 这类更宽的创作或审阅技能路径。
|
|
2840
|
+
- 已批准的 \`L2\` 和 \`L3\` 执行 campaign 默认进入执行模式,而不是重新打开 brainstorming 或 spec-review。
|
|
2841
|
+
- 在执行模式里,不要把常规实现路径选择、helper script、路径修正、数据集适配、同 family 候选切换或普通自检重新路由到 brainstorming、spec review,或 reviewer/explorer 子智能体。
|
|
2842
|
+
- 只有当用户明确要求设计或审阅帮助、contract fit 需要新 campaign,或 escalation condition 明确要求独立复核时,才从执行模式切到设计或 reviewer 模式。
|
|
2804
2843
|
|
|
2805
2844
|
- 开始前先简洁说明:objective、frozen core 和下一自动阶段。
|
|
2806
2845
|
- 如果契约本身不完整,一次只追问一个问题。
|
|
@@ -2808,12 +2847,11 @@ ZH_CONTENT[path.join(".codex", "skills", "lab", "stages", "auto.md")] = `# \`/la
|
|
|
2808
2847
|
- 只有当下一步会离开已批准的 exploration envelope、超出选定 autonomy level,或实质改变 frozen core 时,才保留人工 approval gate。
|
|
2809
2848
|
- 每次进入 \`/lab:auto\` 都要先给出这份层级指南。
|
|
2810
2849
|
- 先做输入归一化:把 \`Autonomy level L1/L2/L3\` 视为执行权限级别,把 \`Layer 3\`、\`Phase 1\`、\`Table 2\` 视为论文范围目标。
|
|
2811
|
-
-
|
|
2812
|
-
|
|
2813
|
-
|
|
2814
|
-
|
|
2815
|
-
|
|
2816
|
-
- 给完这版详细说明后,再追问一个明确的 \`L1/L2/L3\` 选择;在用户明确选级别前不要启动循环。
|
|
2850
|
+
- 如果用户没有写自治级别,默认按 \`L2\` 处理,不要为了“只差一个级别”先停下来。
|
|
2851
|
+
- 接受 \`L1/L2/L3\`、\`l1/l2/l3\` 这类短写,\`最高级别\`、\`最高自治\` 也按 \`L3\` 处理。
|
|
2852
|
+
- 如果用户只写 \`继续\`,且当前已有 active 或可恢复 campaign,就直接继承当前 campaign 的级别。
|
|
2853
|
+
- 只有当级别本身真的有歧义时,才停下来追问,例如 \`第三层\`、\`phase 3\`、\`table 3\`。
|
|
2854
|
+
- 如果用户显式调用 \`/lab:auto\` 或 \`/lab-auto\`,就保持在 auto 执行路径里;只要请求仍在已批准 execution envelope 内,即使目标听起来像 feature selection、baseline selection、离散化或 candidate sweep,也不要重新路由到 brainstorming 或 spec review。
|
|
2817
2855
|
- 如果用户同时提了论文层、实验 phase 和自治级别,先用一句话重述:objective、自治级别、terminal goal、scope、allowed modifications。
|
|
2818
2856
|
- 如果 workflow language 是中文,摘要、清单条目、任务标签和进度更新都应使用中文,除非文件路径、代码标识符或字面指标名必须保持原样。
|
|
2819
2857
|
- 当循环进入 \`report\` 时,要主动给出用户可读的白话总结,解释主指标、次级指标和主表作用;不要等用户额外发一句“解释这些指标”。
|
|
@@ -2839,6 +2877,17 @@ ZH_CONTENT[path.join(".codex", "skills", "lab", "stages", "auto.md")] = `# \`/la
|
|
|
2839
2877
|
- 当真实实验进程还活着时,只允许发进度更新并继续等待,不能把这一 rung 当作已经完成。
|
|
2840
2878
|
`;
|
|
2841
2879
|
|
|
2880
|
+
ZH_CONTENT[path.join(".claude", "skills", "lab", "stages", "auto.md")] =
|
|
2881
|
+
ZH_CONTENT[path.join(".codex", "skills", "lab", "stages", "auto.md")];
|
|
2882
|
+
ZH_CONTENT[path.join(".codex", "skills", "lab", "stages", "auto.md")] = ZH_CONTENT[
|
|
2883
|
+
path.join(".codex", "skills", "lab", "stages", "auto.md")
|
|
2884
|
+
].replace(
|
|
2885
|
+
"- 已批准的 \\`L2\\` 和 \\`L3\\` 执行 campaign 默认进入执行模式,而不是重新打开 brainstorming 或 spec-review。",
|
|
2886
|
+
"- 显式的 \\`/lab:auto\\` 或 \\`/lab-auto\\` 请求,其优先级高于 brainstorming、spec review 这类更宽的创作或审阅技能路径。\\n- 已批准的 \\`L2\\` 和 \\`L3\\` 执行 campaign 默认进入执行模式,而不是重新打开 brainstorming 或 spec-review。"
|
|
2887
|
+
).replace(
|
|
2888
|
+
"- 只有当级别本身真的有歧义时,才停下来追问,例如 \\`第三层\\`、\\`phase 3\\`、\\`table 3\\`。",
|
|
2889
|
+
"- 只有当级别本身真的有歧义时,才停下来追问,例如 \\`第三层\\`、\\`phase 3\\`、\\`table 3\\`。\n- 如果用户显式调用 \\`/lab:auto\\` 或 \\`/lab-auto\\`,就保持在 auto 执行路径里;只要请求仍在已批准 execution envelope 内,即使目标听起来像 feature selection、baseline selection、离散化或 candidate sweep,也不要重新路由到 brainstorming 或 spec review。"
|
|
2890
|
+
);
|
|
2842
2891
|
ZH_CONTENT[path.join(".claude", "skills", "lab", "stages", "auto.md")] =
|
|
2843
2892
|
ZH_CONTENT[path.join(".codex", "skills", "lab", "stages", "auto.md")];
|
|
2844
2893
|
ZH_CONTENT[path.join(".claude", "skills", "lab", "stages", "report.md")] =
|
|
@@ -6,8 +6,19 @@ argument-hint: autonomous campaign target
|
|
|
6
6
|
|
|
7
7
|
Use the installed `lab` skill at `.claude/skills/lab/SKILL.md`.
|
|
8
8
|
|
|
9
|
+
Quick continue: `/lab-auto: 继续`.
|
|
10
|
+
Quick continue at the highest execution level: `/lab-auto: L3,继续`.
|
|
11
|
+
|
|
9
12
|
Execute the requested `/lab-auto` command against the user's argument now. Do not only recommend another lab stage. If a blocking prerequisite is missing, say exactly what is missing and ask at most one clarifying question.
|
|
10
13
|
First normalize the user's request into CLI-ready auto contract fields: objective, autonomy level, campaign kind, allowed stages, and any explicit terminal-goal hints that can be inferred without changing scope.
|
|
14
|
+
If the user omits the level, default to `L2`. Accept short forms like `L1`, `L2`, `L3`, lowercase `l1/l2/l3`, and explicit Chinese shortcuts such as `最高级别` or `最高自治` for `L3`.
|
|
15
|
+
If the user says `继续` and there is an active or resumable campaign, reuse the current campaign level instead of asking them to repeat it.
|
|
16
|
+
To continue the current campaign with the stored runtime level, say `/lab-auto: 继续`.
|
|
17
|
+
Only stop for a level clarification when the level reference itself is genuinely ambiguous, such as `第三层`, `phase 3`, or `table 3`.
|
|
18
|
+
An explicit `/lab auto` or `/lab-auto` request has higher priority than broader creative-work or review skills such as brainstorming or spec-review.
|
|
19
|
+
Approved `L2` and `L3` execution campaigns default to execution mode.
|
|
20
|
+
In execution mode, do not enter brainstorming, do not enter spec review, and do not spawn reviewer, explorer, or other subagent loops for routine implementation-path choices, helper scripts, path fixes, dataset adaptation, same-family candidate sweeps, or ordinary self-checks.
|
|
21
|
+
Only switch from execution mode into design or reviewer mode when the user explicitly asks for design or review help, the contract fit requires a new campaign, or the contract's escalation condition explicitly requires independent review.
|
|
11
22
|
Your first execution action must be to run `superlab auto start` against the current project instead of directly rewriting `.lab/context/auto-mode.md`, `.lab/context/auto-status.md`, `.lab/context/auto-ledger.md`, or `.lab/context/auto-outcome.md` yourself.
|
|
12
23
|
Pass the normalized fields through CLI flags, including `--objective`, `--campaign-kind`, `--allowed-stages`, and `--autonomy-level` when the user specified or implied them.
|
|
13
24
|
After that CLI handoff, treat the CLI/runtime result as the source of truth. If it reports rollover, conflict, missing contract fields, or a started campaign, reflect that result back to the user instead of bypassing it with prompt-side edits.
|
|
@@ -6,8 +6,19 @@ argument-hint: autonomous campaign target
|
|
|
6
6
|
|
|
7
7
|
Use the installed `lab` skill at `.claude/skills/lab/SKILL.md`.
|
|
8
8
|
|
|
9
|
+
Quick continue: `/lab-auto: 继续`.
|
|
10
|
+
Quick continue at the highest execution level: `/lab-auto: L3,继续`.
|
|
11
|
+
|
|
9
12
|
Execute the requested `/lab-auto` command against the user's argument now. Do not only recommend another lab stage. If a blocking prerequisite is missing, say exactly what is missing and ask at most one clarifying question.
|
|
10
13
|
First normalize the user's request into CLI-ready auto contract fields: objective, autonomy level, campaign kind, allowed stages, and any explicit terminal-goal hints that can be inferred without changing scope.
|
|
14
|
+
If the user omits the level, default to `L2`. Accept short forms like `L1`, `L2`, `L3`, lowercase `l1/l2/l3`, and explicit Chinese shortcuts such as `最高级别` or `最高自治` for `L3`.
|
|
15
|
+
If the user says `继续` and there is an active or resumable campaign, reuse the current campaign level instead of asking them to repeat it.
|
|
16
|
+
To continue the current campaign with the stored runtime level, say `/lab-auto: 继续`.
|
|
17
|
+
Only stop for a level clarification when the level reference itself is genuinely ambiguous, such as `第三层`, `phase 3`, or `table 3`.
|
|
18
|
+
An explicit `/lab auto` or `/lab-auto` request has higher priority than broader creative-work or review skills such as brainstorming or spec-review.
|
|
19
|
+
Approved `L2` and `L3` execution campaigns default to execution mode.
|
|
20
|
+
In execution mode, do not enter brainstorming, do not enter spec review, and do not spawn reviewer, explorer, or other subagent loops for routine implementation-path choices, helper scripts, path fixes, dataset adaptation, same-family candidate sweeps, or ordinary self-checks.
|
|
21
|
+
Only switch from execution mode into design or reviewer mode when the user explicitly asks for design or review help, the contract fit requires a new campaign, or the contract's escalation condition explicitly requires independent review.
|
|
11
22
|
Your first execution action must be to run `superlab auto start` against the current project instead of directly rewriting `.lab/context/auto-mode.md`, `.lab/context/auto-status.md`, `.lab/context/auto-ledger.md`, or `.lab/context/auto-outcome.md` yourself.
|
|
12
23
|
Pass the normalized fields through CLI flags, including `--objective`, `--campaign-kind`, `--allowed-stages`, and `--autonomy-level` when the user specified or implied them.
|
|
13
24
|
After that CLI handoff, treat the CLI/runtime result as the source of truth. If it reports rollover, conflict, missing contract fields, or a started campaign, reflect that result back to the user instead of bypassing it with prompt-side edits.
|
|
@@ -81,11 +81,19 @@ Treat all of these as equivalent stage requests:
|
|
|
81
81
|
|
|
82
82
|
## Level Guide for `/lab auto`
|
|
83
83
|
|
|
84
|
+
- Continue the current campaign: `/lab-auto: 继续`.
|
|
85
|
+
- Continue the current campaign at the highest execution level: `/lab-auto: L3,继续`.
|
|
84
86
|
- `L1` is the safe validation level. Use it for a smoke run, one bounded real run, or a simple review/report refresh when you do not want automatic iteration.
|
|
85
87
|
- `L2` is the default recommended level. Use it for bounded experiment iteration inside a frozen core when you want auto to keep running until a gate, stop condition, or terminal goal is hit.
|
|
86
|
-
- `L3` is the
|
|
87
|
-
- If
|
|
88
|
-
-
|
|
88
|
+
- `L3` is the continue-until-boundary level. Use it when you want auto to keep pushing inside the approved envelope without stopping for routine implementation-path choices.
|
|
89
|
+
- If the request omits the level, default to `L2`.
|
|
90
|
+
- Accept short level forms such as `L1`, `L2`, `L3`, lowercase `l1/l2/l3`, and explicit Chinese shortcuts like `最高级别` or `最高自治` for `L3`.
|
|
91
|
+
- If the user says `继续` and there is an active or resumable campaign, reuse the current campaign level instead of asking them to repeat it.
|
|
92
|
+
- Only stop and ask when a level reference is genuinely ambiguous, such as `第三层`, `phase 3`, or `table 3`.
|
|
93
|
+
- An explicit `/lab auto` or `/lab-auto` request has higher priority than broader creative-work or review skills such as brainstorming or spec-review.
|
|
94
|
+
- Approved `L2` and `L3` execution campaigns default to execution mode instead of reopening brainstorming or spec-review loops.
|
|
95
|
+
- In execution mode, do not route routine implementation-path choices, helper scripts, path fixes, dataset adaptation, same-family candidate sweeps, or ordinary self-checks into brainstorming, spec review, or reviewer or explorer subagents.
|
|
96
|
+
- Only switch from execution mode into design or reviewer mode when the user explicitly asks for design or review help, the contract fit requires a new campaign, or the contract's escalation condition explicitly requires independent review.
|
|
89
97
|
- The first visible output of a real `/lab auto` run must be `Auto preflight`.
|
|
90
98
|
- That first visible output must show files read plus `Autonomy level`, `Allowed stages`, `Terminal goal`, `Primary gate`, and `Secondary guard`.
|
|
91
99
|
- If the preflight block cannot be completed from `.lab/context/eval-protocol.md`, `.lab/context/auto-mode.md`, `.lab/context/auto-status.md`, `.lab/context/auto-ledger.md`, and `.lab/context/auto-outcome.md`, `/lab auto` should stop instead of acting like the loop is armed.
|
|
@@ -95,10 +103,12 @@ Treat all of these as equivalent stage requests:
|
|
|
95
103
|
- Treat `paper layer`, `phase`, and `table` as experiment targets. For example, `paper layer 3` or `Phase 1` should not be interpreted as `Autonomy level L3`.
|
|
96
104
|
- A good `/lab auto` request should name:
|
|
97
105
|
- the objective
|
|
98
|
-
- the autonomy level
|
|
106
|
+
- the autonomy level when you want to override the default or current campaign
|
|
99
107
|
- the terminal goal
|
|
100
108
|
- the scope or phase to advance
|
|
101
109
|
- the allowed modifications
|
|
102
110
|
- If the repository workflow language is Chinese, summaries, checklist items, task labels, and progress updates should be written in Chinese unless a code identifier or file path must stay literal.
|
|
103
|
-
- Good
|
|
104
|
-
- `/lab
|
|
111
|
+
- Good examples:
|
|
112
|
+
- `/lab-auto: 继续`
|
|
113
|
+
- `/lab-auto: L3,继续`
|
|
114
|
+
- `/lab auto Objective: advance paper layer 3 through one bounded protocol improvement. Terminal goal: task-completion. Scope: bounded protocol, tests, one minimal implementation, and one small run. Allowed modifications: configuration, evaluation script, and data-loading logic only.`
|
|
@@ -6,8 +6,19 @@ argument-hint: autonomous campaign target
|
|
|
6
6
|
|
|
7
7
|
Use the installed `lab` skill at `.claude/skills/lab/SKILL.md`.
|
|
8
8
|
|
|
9
|
+
Quick continue: `/lab-auto: 继续`.
|
|
10
|
+
Quick continue at the highest execution level: `/lab-auto: L3,继续`.
|
|
11
|
+
|
|
9
12
|
Execute the requested `/lab-auto` command against the user's argument now. Do not only recommend another lab stage. If a blocking prerequisite is missing, say exactly what is missing and ask at most one clarifying question.
|
|
10
13
|
First normalize the user's request into CLI-ready auto contract fields: objective, autonomy level, campaign kind, allowed stages, and any explicit terminal-goal hints that can be inferred without changing scope.
|
|
14
|
+
If the user omits the level, default to `L2`. Accept short forms like `L1`, `L2`, `L3`, lowercase `l1/l2/l3`, and explicit Chinese shortcuts such as `最高级别` or `最高自治` for `L3`.
|
|
15
|
+
If the user says `继续` and there is an active or resumable campaign, reuse the current campaign level instead of asking them to repeat it.
|
|
16
|
+
To continue the current campaign with the stored runtime level, say `/lab-auto: 继续`.
|
|
17
|
+
Only stop for a level clarification when the level reference itself is genuinely ambiguous, such as `第三层`, `phase 3`, or `table 3`.
|
|
18
|
+
An explicit `/lab auto` or `/lab-auto` request has higher priority than broader creative-work or review skills such as brainstorming or spec-review.
|
|
19
|
+
Approved `L2` and `L3` execution campaigns default to execution mode.
|
|
20
|
+
In execution mode, do not enter brainstorming, do not enter spec review, and do not spawn reviewer, explorer, or other subagent loops for routine implementation-path choices, helper scripts, path fixes, dataset adaptation, same-family candidate sweeps, or ordinary self-checks.
|
|
21
|
+
Only switch from execution mode into design or reviewer mode when the user explicitly asks for design or review help, the contract fit requires a new campaign, or the contract's escalation condition explicitly requires independent review.
|
|
11
22
|
Your first execution action must be to run `superlab auto start` against the current project instead of directly rewriting `.lab/context/auto-mode.md`, `.lab/context/auto-status.md`, `.lab/context/auto-ledger.md`, or `.lab/context/auto-outcome.md` yourself.
|
|
12
23
|
Pass the normalized fields through CLI flags, including `--objective`, `--campaign-kind`, `--allowed-stages`, and `--autonomy-level` when the user specified or implied them.
|
|
13
24
|
After that CLI handoff, treat the CLI/runtime result as the source of truth. If it reports rollover, conflict, missing contract fields, or a started campaign, reflect that result back to the user instead of bypassing it with prompt-side edits.
|
|
@@ -6,8 +6,19 @@ argument-hint: autonomous campaign target
|
|
|
6
6
|
|
|
7
7
|
Use the installed `lab` skill at `.claude/skills/lab/SKILL.md`.
|
|
8
8
|
|
|
9
|
+
Quick continue: `/lab-auto: 继续`.
|
|
10
|
+
Quick continue at the highest execution level: `/lab-auto: L3,继续`.
|
|
11
|
+
|
|
9
12
|
Execute the requested `/lab-auto` command against the user's argument now. Do not only recommend another lab stage. If a blocking prerequisite is missing, say exactly what is missing and ask at most one clarifying question.
|
|
10
13
|
First normalize the user's request into CLI-ready auto contract fields: objective, autonomy level, campaign kind, allowed stages, and any explicit terminal-goal hints that can be inferred without changing scope.
|
|
14
|
+
If the user omits the level, default to `L2`. Accept short forms like `L1`, `L2`, `L3`, lowercase `l1/l2/l3`, and explicit Chinese shortcuts such as `最高级别` or `最高自治` for `L3`.
|
|
15
|
+
If the user says `继续` and there is an active or resumable campaign, reuse the current campaign level instead of asking them to repeat it.
|
|
16
|
+
To continue the current campaign with the stored runtime level, say `/lab-auto: 继续`.
|
|
17
|
+
Only stop for a level clarification when the level reference itself is genuinely ambiguous, such as `第三层`, `phase 3`, or `table 3`.
|
|
18
|
+
An explicit `/lab auto` or `/lab-auto` request has higher priority than broader creative-work or review skills such as brainstorming or spec-review.
|
|
19
|
+
Approved `L2` and `L3` execution campaigns default to execution mode.
|
|
20
|
+
In execution mode, do not enter brainstorming, do not enter spec review, and do not spawn reviewer, explorer, or other subagent loops for routine implementation-path choices, helper scripts, path fixes, dataset adaptation, same-family candidate sweeps, or ordinary self-checks.
|
|
21
|
+
Only switch from execution mode into design or reviewer mode when the user explicitly asks for design or review help, the contract fit requires a new campaign, or the contract's escalation condition explicitly requires independent review.
|
|
11
22
|
Your first execution action must be to run `superlab auto start` against the current project instead of directly rewriting `.lab/context/auto-mode.md`, `.lab/context/auto-status.md`, `.lab/context/auto-ledger.md`, or `.lab/context/auto-outcome.md` yourself.
|
|
12
23
|
Pass the normalized fields through CLI flags, including `--objective`, `--campaign-kind`, `--allowed-stages`, and `--autonomy-level` when the user specified or implied them.
|
|
13
24
|
After that CLI handoff, treat the CLI/runtime result as the source of truth. If it reports rollover, conflict, missing contract fields, or a started campaign, reflect that result back to the user instead of bypassing it with prompt-side edits.
|
|
@@ -5,8 +5,19 @@ argument-hint: autonomous campaign target
|
|
|
5
5
|
|
|
6
6
|
Use the installed `lab` skill at `.codex/skills/lab/SKILL.md`.
|
|
7
7
|
|
|
8
|
+
Quick continue: `/lab-auto: 继续`.
|
|
9
|
+
Quick continue at the highest execution level: `/lab-auto: L3,继续`.
|
|
10
|
+
|
|
8
11
|
Execute the requested `/lab:auto` stage against the user's argument now. Do not only recommend another lab stage. If a blocking prerequisite is missing, say exactly what is missing and ask at most one clarifying question.
|
|
9
12
|
First normalize the user's request into CLI-ready auto contract fields: objective, autonomy level, campaign kind, allowed stages, and any explicit terminal-goal hints that can be inferred without changing scope.
|
|
13
|
+
If the user omits the level, default to `L2`. Accept short forms like `L1`, `L2`, `L3`, lowercase `l1/l2/l3`, and explicit Chinese shortcuts such as `最高级别` or `最高自治` for `L3`.
|
|
14
|
+
If the user says `继续` and there is an active or resumable campaign, reuse the current campaign level instead of asking them to repeat it.
|
|
15
|
+
To continue the current campaign with the stored runtime level, say `/lab-auto: 继续`.
|
|
16
|
+
Only stop for a level clarification when the level reference itself is genuinely ambiguous, such as `第三层`, `phase 3`, or `table 3`.
|
|
17
|
+
An explicit `/lab:auto` or `/lab-auto` request has higher priority than broader creative-work or review skills such as brainstorming or spec-review.
|
|
18
|
+
Approved `L2` and `L3` execution campaigns default to execution mode.
|
|
19
|
+
In execution mode, do not enter brainstorming, do not enter spec review, and do not spawn reviewer, explorer, or other subagent loops for routine implementation-path choices, helper scripts, path fixes, dataset adaptation, same-family candidate sweeps, or ordinary self-checks.
|
|
20
|
+
Only switch from execution mode into design or reviewer mode when the user explicitly asks for design or review help, the contract fit requires a new campaign, or the contract's escalation condition explicitly requires independent review.
|
|
10
21
|
Your first execution action must be to run `superlab auto start` against the current project instead of directly rewriting `.lab/context/auto-mode.md`, `.lab/context/auto-status.md`, `.lab/context/auto-ledger.md`, or `.lab/context/auto-outcome.md` yourself.
|
|
11
22
|
Pass the normalized fields through CLI flags, including `--objective`, `--campaign-kind`, `--allowed-stages`, and `--autonomy-level` when the user specified or implied them.
|
|
12
23
|
After that CLI handoff, treat the CLI/runtime result as the source of truth. If it reports rollover, conflict, missing contract fields, or a started campaign, reflect that result back to the user instead of bypassing it with prompt-side edits.
|
|
@@ -5,8 +5,19 @@ argument-hint: autonomous campaign target
|
|
|
5
5
|
|
|
6
6
|
Use the installed `lab` skill at `.codex/skills/lab/SKILL.md`.
|
|
7
7
|
|
|
8
|
+
Quick continue: `/lab-auto: 继续`.
|
|
9
|
+
Quick continue at the highest execution level: `/lab-auto: L3,继续`.
|
|
10
|
+
|
|
8
11
|
Execute the requested `/lab:auto` stage against the user's argument now. Do not only recommend another lab stage. If a blocking prerequisite is missing, say exactly what is missing and ask at most one clarifying question.
|
|
9
12
|
First normalize the user's request into CLI-ready auto contract fields: objective, autonomy level, campaign kind, allowed stages, and any explicit terminal-goal hints that can be inferred without changing scope.
|
|
13
|
+
If the user omits the level, default to `L2`. Accept short forms like `L1`, `L2`, `L3`, lowercase `l1/l2/l3`, and explicit Chinese shortcuts such as `最高级别` or `最高自治` for `L3`.
|
|
14
|
+
If the user says `继续` and there is an active or resumable campaign, reuse the current campaign level instead of asking them to repeat it.
|
|
15
|
+
To continue the current campaign with the stored runtime level, say `/lab-auto: 继续`.
|
|
16
|
+
Only stop for a level clarification when the level reference itself is genuinely ambiguous, such as `第三层`, `phase 3`, or `table 3`.
|
|
17
|
+
An explicit `/lab:auto` or `/lab-auto` request has higher priority than broader creative-work or review skills such as brainstorming or spec-review.
|
|
18
|
+
Approved `L2` and `L3` execution campaigns default to execution mode.
|
|
19
|
+
In execution mode, do not enter brainstorming, do not enter spec review, and do not spawn reviewer, explorer, or other subagent loops for routine implementation-path choices, helper scripts, path fixes, dataset adaptation, same-family candidate sweeps, or ordinary self-checks.
|
|
20
|
+
Only switch from execution mode into design or reviewer mode when the user explicitly asks for design or review help, the contract fit requires a new campaign, or the contract's escalation condition explicitly requires independent review.
|
|
10
21
|
Your first execution action must be to run `superlab auto start` against the current project instead of directly rewriting `.lab/context/auto-mode.md`, `.lab/context/auto-status.md`, `.lab/context/auto-ledger.md`, or `.lab/context/auto-outcome.md` yourself.
|
|
11
22
|
Pass the normalized fields through CLI flags, including `--objective`, `--campaign-kind`, `--allowed-stages`, and `--autonomy-level` when the user specified or implied them.
|
|
12
23
|
After that CLI handoff, treat the CLI/runtime result as the source of truth. If it reports rollover, conflict, missing contract fields, or a started campaign, reflect that result back to the user instead of bypassing it with prompt-side edits.
|
|
@@ -75,11 +75,19 @@ Treat all of these as equivalent stage requests:
|
|
|
75
75
|
|
|
76
76
|
## Level Guide for `/lab:auto`
|
|
77
77
|
|
|
78
|
+
- Continue the current campaign: `/lab-auto: 继续`.
|
|
79
|
+
- Continue the current campaign at the highest execution level: `/lab-auto: L3,继续`.
|
|
78
80
|
- `L1` is the safe validation level. Use it for a smoke run, one bounded real run, or a simple review/report refresh when you do not want automatic iteration.
|
|
79
81
|
- `L2` is the default recommended level. Use it for bounded experiment iteration inside a frozen core when you want auto to keep running until a gate, stop condition, or terminal goal is hit.
|
|
80
|
-
- `L3` is the
|
|
81
|
-
- If
|
|
82
|
-
-
|
|
82
|
+
- `L3` is the continue-until-boundary level. Use it when you want auto to keep pushing inside the approved envelope without stopping for routine implementation-path choices.
|
|
83
|
+
- If the request omits the level, default to `L2`.
|
|
84
|
+
- Accept short level forms such as `L1`, `L2`, `L3`, lowercase `l1/l2/l3`, and explicit Chinese shortcuts like `最高级别` or `最高自治` for `L3`.
|
|
85
|
+
- If the user says `继续` and there is an active or resumable campaign, reuse the current campaign level instead of asking them to repeat it.
|
|
86
|
+
- Only stop and ask when a level reference is genuinely ambiguous, such as `第三层`, `phase 3`, or `table 3`.
|
|
87
|
+
- An explicit `/lab:auto` or `/lab-auto` request has higher priority than broader creative-work or review skills such as brainstorming or spec-review.
|
|
88
|
+
- Approved `L2` and `L3` execution campaigns default to execution mode instead of reopening brainstorming or spec-review loops.
|
|
89
|
+
- In execution mode, do not route routine implementation-path choices, helper scripts, path fixes, dataset adaptation, same-family candidate sweeps, or ordinary self-checks into brainstorming, spec review, or reviewer or explorer subagents.
|
|
90
|
+
- Only switch from execution mode into design or reviewer mode when the user explicitly asks for design or review help, the contract fit requires a new campaign, or the contract's escalation condition explicitly requires independent review.
|
|
83
91
|
- The first visible output of a real `/lab:auto` run must be `Auto preflight`.
|
|
84
92
|
- That first visible output must show files read plus `Autonomy level`, `Allowed stages`, `Terminal goal`, `Primary gate`, and `Secondary guard`.
|
|
85
93
|
- If the preflight block cannot be completed from `.lab/context/eval-protocol.md`, `.lab/context/auto-mode.md`, `.lab/context/auto-status.md`, `.lab/context/auto-ledger.md`, and `.lab/context/auto-outcome.md`, `/lab:auto` should stop instead of acting like the loop is armed.
|
|
@@ -89,10 +97,12 @@ Treat all of these as equivalent stage requests:
|
|
|
89
97
|
- Treat `paper layer`, `phase`, and `table` as experiment targets. For example, `paper layer 3` or `Phase 1` should not be interpreted as `Autonomy level L3`.
|
|
90
98
|
- A good `/lab:auto` request should name:
|
|
91
99
|
- the objective
|
|
92
|
-
- the autonomy level
|
|
100
|
+
- the autonomy level when you want to override the default or current campaign
|
|
93
101
|
- the terminal goal
|
|
94
102
|
- the scope or phase to advance
|
|
95
103
|
- the allowed modifications
|
|
96
104
|
- If the repository workflow language is Chinese, summaries, checklist items, task labels, and progress updates should be written in Chinese unless a code identifier or file path must stay literal.
|
|
97
|
-
- Good
|
|
98
|
-
- `/lab:auto
|
|
105
|
+
- Good examples:
|
|
106
|
+
- `/lab:auto: 继续`
|
|
107
|
+
- `/lab:auto: L3,继续`
|
|
108
|
+
- `/lab:auto Objective: advance paper layer 3 through one bounded protocol improvement. Terminal goal: task-completion. Scope: bounded protocol, tests, one minimal implementation, and one small run. Allowed modifications: configuration, evaluation script, and data-loading logic only.`
|
|
@@ -5,8 +5,19 @@ argument-hint: autonomous campaign target
|
|
|
5
5
|
|
|
6
6
|
Use the installed `lab` skill at `.codex/skills/lab/SKILL.md`.
|
|
7
7
|
|
|
8
|
+
Quick continue: `/lab-auto: 继续`.
|
|
9
|
+
Quick continue at the highest execution level: `/lab-auto: L3,继续`.
|
|
10
|
+
|
|
8
11
|
Execute the requested `/lab:auto` stage against the user's argument now. Do not only recommend another lab stage. If a blocking prerequisite is missing, say exactly what is missing and ask at most one clarifying question.
|
|
9
12
|
First normalize the user's request into CLI-ready auto contract fields: objective, autonomy level, campaign kind, allowed stages, and any explicit terminal-goal hints that can be inferred without changing scope.
|
|
13
|
+
If the user omits the level, default to `L2`. Accept short forms like `L1`, `L2`, `L3`, lowercase `l1/l2/l3`, and explicit Chinese shortcuts such as `最高级别` or `最高自治` for `L3`.
|
|
14
|
+
If the user says `继续` and there is an active or resumable campaign, reuse the current campaign level instead of asking them to repeat it.
|
|
15
|
+
To continue the current campaign with the stored runtime level, say `/lab-auto: 继续`.
|
|
16
|
+
Only stop for a level clarification when the level reference itself is genuinely ambiguous, such as `第三层`, `phase 3`, or `table 3`.
|
|
17
|
+
An explicit `/lab:auto` or `/lab-auto` request has higher priority than broader creative-work or review skills such as brainstorming or spec-review.
|
|
18
|
+
Approved `L2` and `L3` execution campaigns default to execution mode.
|
|
19
|
+
In execution mode, do not enter brainstorming, do not enter spec review, and do not spawn reviewer, explorer, or other subagent loops for routine implementation-path choices, helper scripts, path fixes, dataset adaptation, same-family candidate sweeps, or ordinary self-checks.
|
|
20
|
+
Only switch from execution mode into design or reviewer mode when the user explicitly asks for design or review help, the contract fit requires a new campaign, or the contract's escalation condition explicitly requires independent review.
|
|
10
21
|
Your first execution action must be to run `superlab auto start` against the current project instead of directly rewriting `.lab/context/auto-mode.md`, `.lab/context/auto-status.md`, `.lab/context/auto-ledger.md`, or `.lab/context/auto-outcome.md` yourself.
|
|
11
22
|
Pass the normalized fields through CLI flags, including `--objective`, `--campaign-kind`, `--allowed-stages`, and `--autonomy-level` when the user specified or implied them.
|
|
12
23
|
After that CLI handoff, treat the CLI/runtime result as the source of truth. If it reports rollover, conflict, missing contract fields, or a started campaign, reflect that result back to the user instead of bypassing it with prompt-side edits.
|
|
@@ -5,8 +5,19 @@ argument-hint: autonomous campaign target
|
|
|
5
5
|
|
|
6
6
|
Use the installed `lab` skill at `.codex/skills/lab/SKILL.md`.
|
|
7
7
|
|
|
8
|
+
Quick continue: `/lab-auto: 继续`.
|
|
9
|
+
Quick continue at the highest execution level: `/lab-auto: L3,继续`.
|
|
10
|
+
|
|
8
11
|
Execute the requested `/lab:auto` stage against the user's argument now. Do not only recommend another lab stage. If a blocking prerequisite is missing, say exactly what is missing and ask at most one clarifying question.
|
|
9
12
|
First normalize the user's request into CLI-ready auto contract fields: objective, autonomy level, campaign kind, allowed stages, and any explicit terminal-goal hints that can be inferred without changing scope.
|
|
13
|
+
If the user omits the level, default to `L2`. Accept short forms like `L1`, `L2`, `L3`, lowercase `l1/l2/l3`, and explicit Chinese shortcuts such as `最高级别` or `最高自治` for `L3`.
|
|
14
|
+
If the user says `继续` and there is an active or resumable campaign, reuse the current campaign level instead of asking them to repeat it.
|
|
15
|
+
To continue the current campaign with the stored runtime level, say `/lab-auto: 继续`.
|
|
16
|
+
Only stop for a level clarification when the level reference itself is genuinely ambiguous, such as `第三层`, `phase 3`, or `table 3`.
|
|
17
|
+
An explicit `/lab:auto` or `/lab-auto` request has higher priority than broader creative-work or review skills such as brainstorming or spec-review.
|
|
18
|
+
Approved `L2` and `L3` execution campaigns default to execution mode.
|
|
19
|
+
In execution mode, do not enter brainstorming, do not enter spec review, and do not spawn reviewer, explorer, or other subagent loops for routine implementation-path choices, helper scripts, path fixes, dataset adaptation, same-family candidate sweeps, or ordinary self-checks.
|
|
20
|
+
Only switch from execution mode into design or reviewer mode when the user explicitly asks for design or review help, the contract fit requires a new campaign, or the contract's escalation condition explicitly requires independent review.
|
|
10
21
|
Your first execution action must be to run `superlab auto start` against the current project instead of directly rewriting `.lab/context/auto-mode.md`, `.lab/context/auto-status.md`, `.lab/context/auto-ledger.md`, or `.lab/context/auto-outcome.md` yourself.
|
|
11
22
|
Pass the normalized fields through CLI flags, including `--objective`, `--campaign-kind`, `--allowed-stages`, and `--autonomy-level` when the user specified or implied them.
|
|
12
23
|
After that CLI handoff, treat the CLI/runtime result as the source of truth. If it reports rollover, conflict, missing contract fields, or a started campaign, reflect that result back to the user instead of bypassing it with prompt-side edits.
|
|
@@ -115,8 +115,17 @@ Use this skill when the user invokes `/lab:*` or asks for the structured researc
|
|
|
115
115
|
- Start every `/lab:auto` run with a visible `Auto preflight` summary that reports files read plus `Autonomy level`, `Approval status`, `Allowed stages`, `Terminal goal`, `Primary gate`, and `Secondary guard`.
|
|
116
116
|
- If any required preflight field is missing or inconsistent, stop before any loop action. Do not present a fake auto summary as if the loop were armed.
|
|
117
117
|
- Keep `.lab/context/auto-ledger.md` updated with the active owner, observed state, and resume boundary while the loop is live.
|
|
118
|
-
- Treat `L1` as safe-run validation, `L2` as bounded iteration, and `L3` as
|
|
119
|
-
- Surface the level guide every time `/lab:auto` starts
|
|
118
|
+
- Treat `L1` as safe-run validation, `L2` as bounded iteration, and `L3` as continue-until-boundary execution inside the approved envelope.
|
|
119
|
+
- Surface the level guide every time `/lab:auto` starts.
|
|
120
|
+
- If the user omits the level, default to `L2`.
|
|
121
|
+
- Accept short forms such as `L1`, `L2`, `L3`, lowercase `l1/l2/l3`, and explicit Chinese shortcuts like `最高级别` or `最高自治` for `L3`.
|
|
122
|
+
- If the user says `继续` and there is an active or resumable campaign, reuse the current campaign level.
|
|
123
|
+
- Only require a clarification when the level reference itself is genuinely ambiguous, such as `第三层`, `phase 3`, or `table 3`.
|
|
124
|
+
- Treat an explicit `/lab:auto` or `/lab-auto` request as higher priority than broader creative-work or review skills such as brainstorming or spec-review.
|
|
125
|
+
- Treat approved `L2` and `L3` campaigns as execution mode by default.
|
|
126
|
+
- In execution mode, do not reopen brainstorming, do not reopen spec review, and do not spawn reviewer, explorer, or other subagent loops for routine implementation-path choices, helper scripts, path fixes, dataset adaptation, same-family candidate sweeps, or ordinary self-checks.
|
|
127
|
+
- Only switch from execution mode into design or reviewer mode when the user explicitly asks for design or review help, the contract fit requires a new campaign, or the contract's escalation condition explicitly requires independent review.
|
|
128
|
+
- If an explicit `/lab:auto` request stays inside the approved execution envelope, keep it in the auto execution path even when it mentions design-like tasks such as feature selection, baseline selection, discretization, or candidate sweeps.
|
|
120
129
|
- Reuse `/lab:run`, `/lab:iterate`, `/lab:review`, `/lab:report`, and optional `/lab:write` instead of inventing a second workflow.
|
|
121
130
|
- Do not automatically change the research mission, paper-facing framing, or core claims.
|
|
122
131
|
- You may add exploratory datasets, benchmarks, and comparison methods inside the approved exploration envelope.
|
|
@@ -66,7 +66,11 @@
|
|
|
66
66
|
- Recommended level meanings:
|
|
67
67
|
- `L1`: safe run validation over `run`, `review`, and `report`
|
|
68
68
|
- `L2`: bounded iteration over `run`, `iterate`, `review`, and `report`
|
|
69
|
-
|
|
69
|
+
- `L3`: continue-until-boundary execution over the approved envelope, and it may also include `write` when framing is already approved
|
|
70
|
+
- An explicit `/lab:auto` or `/lab-auto` request has higher priority than broader creative-work or review skills such as brainstorming or spec-review.
|
|
71
|
+
- Approved `L2` and `L3` execution campaigns default to execution mode.
|
|
72
|
+
- In execution mode, do not reopen brainstorming, do not reopen spec review, and do not spawn reviewer, explorer, or other subagent loops for routine implementation-path choices, helper scripts, path fixes, dataset adaptation, same-family candidate sweeps, or ordinary self-checks.
|
|
73
|
+
- Only switch from execution mode into design or reviewer mode when the user explicitly asks for design or review help, the contract fit requires a new campaign, or the contract's escalation condition explicitly requires independent review.
|
|
70
74
|
- Default allowed stages are `run`, `iterate`, `review`, and `report`. Only include `write` when framing is already approved and manuscript drafting is within scope.
|
|
71
75
|
- Do not automatically change the research mission, paper-facing framing, or core claims.
|
|
72
76
|
- You may add exploratory datasets, benchmarks, and comparison methods inside the exploration envelope.
|
|
@@ -127,9 +131,11 @@
|
|
|
127
131
|
|
|
128
132
|
## Level Guide
|
|
129
133
|
|
|
134
|
+
- Continue the current campaign: `/lab-auto: 继续`.
|
|
135
|
+
- Continue the current campaign at the highest execution level: `/lab-auto: L3,继续`.
|
|
130
136
|
- `L1` = safe validation
|
|
131
137
|
- `L2` = default recommended bounded iteration
|
|
132
|
-
- `L3` =
|
|
138
|
+
- `L3` = continue-until-boundary execution inside the approved envelope
|
|
133
139
|
- If you are unsure, choose `L2`.
|
|
134
140
|
|
|
135
141
|
- Start with a concise summary of the objective, the frozen core, and the next automatic stage.
|
|
@@ -144,15 +150,14 @@
|
|
|
144
150
|
- Normalize ambiguous user requests before arming the loop.
|
|
145
151
|
- Treat `Autonomy level L1/L2/L3` as execution privilege only.
|
|
146
152
|
- Treat `Layer`, `Phase`, and `Table` references as paper-structure or experiment-scope targets, not as autonomy levels.
|
|
147
|
-
- If the user does not name an autonomy level,
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
- Ask for one explicit level choice before arming the loop after that detailed guide.
|
|
153
|
+
- If the user does not name an autonomy level, default to `L2` instead of stopping for a level-only clarification.
|
|
154
|
+
- Accept short level forms such as `L1`, `L2`, `L3`, lowercase `l1/l2/l3`, and explicit Chinese shortcuts like `最高级别` or `最高自治` for `L3`.
|
|
155
|
+
- If the user says `继续` and there is an active or resumable campaign, reuse the current campaign level instead of asking them to repeat it.
|
|
156
|
+
- Only stop for a level clarification when the level reference itself is genuinely ambiguous, such as `第三层`, `phase 3`, or `table 3`.
|
|
157
|
+
- If the user explicitly invokes `/lab:auto` or `/lab-auto`, keep the request in the auto execution path even when the objective sounds design-heavy, such as feature selection, baseline selection, discretization, or candidate sweeps, so long as the requested work stays inside the approved execution envelope.
|
|
153
158
|
- Example:
|
|
154
159
|
- `Layer 3` means a paper layer or experiment target.
|
|
155
|
-
- `Autonomy level L3` means the
|
|
160
|
+
- `Autonomy level L3` means the continue-until-boundary permission envelope.
|
|
156
161
|
- If the user mixes framework work and experiment work in one request, restate a normalized contract with:
|
|
157
162
|
- objective
|
|
158
163
|
- autonomy level
|