npm - superlab - Versions diffs - 0.1.74 → 0.1.75 - Mend

superlab 0.1.74 → 0.1.75

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/lib/i18n.cjs +26 -0
package/package-assets/claude/commands/lab.md +1 -0
package/package-assets/codex/prompts/lab/auto.md +1 -0
package/package-assets/codex/prompts/lab-auto.md +1 -0
package/package-assets/codex/prompts/lab.md +1 -0
package/package-assets/codex/prompts/lab:auto.md +1 -0
package/package-assets/codex/prompts/lab/357/274/232auto.md +1 -0
package/package-assets/shared/skills/lab/SKILL.md +1 -0
package/package-assets/shared/skills/lab/stages/auto.md +10 -0
package/package.json +1 -1

package/lib/i18n.cjs CHANGED Viewed

@@ -2703,6 +2703,8 @@ const zhAutoPriorityCodexLine =
   "显式的 `/lab:auto` 或 `/lab-auto` 请求，其优先级高于 brainstorming、spec review 这类更宽的创作或审阅技能路径。";
 const zhAutoPriorityClaudeLine =
   "显式的 `/lab auto` 或 `/lab-auto` 请求，其优先级高于 brainstorming、spec review 这类更宽的创作或审阅技能路径。";
+const zhAutoVisibleCloseoutLine =
+  "最终可见收尾必须直接消费已通过校验的 stage report：展示请求交付物或目标的状态、核心说明表的关键行、证据路径、验证命令和验证结果、已知缺口，以及下一步动作和原因。不能只用“已完成”“已推送”或流水账命令日志结束。";
 ZH_CONTENT[path.join(".codex", "prompts", "lab.md")] = ZH_CONTENT[
   path.join(".codex", "prompts", "lab.md")
@@ -2716,6 +2718,9 @@ ZH_CONTENT[path.join(".codex", "prompts", "lab-auto.md")] = ZH_CONTENT[
 ].replace(
   "已批准的 `L2` 和 `L3` 执行 campaign 默认进入执行模式。",
   `${zhAutoPriorityCodexLine}\n已批准的 \`L2\` 和 \`L3\` 执行 campaign 默认进入执行模式。`
+).replace(
+  "不要用 `sleep 30`、单次 `pgrep` 或一次性的 `metrics.json` 探针来代替真实长任务命令；当真实实验进程还活着时，只允许在出现有意义变化时发进度更新，并继续等待。没有新变化时，也只按保活节奏汇报，不要让用户触发下一次轮询。",
+  "不要用 `sleep 30`、单次 `pgrep` 或一次性的 `metrics.json` 探针来代替真实长任务命令；当真实实验进程还活着时，只允许在出现有意义变化时发进度更新，并继续等待。没有新变化时，也只按保活节奏汇报，不要让用户触发下一次轮询。\n\n" + zhAutoVisibleCloseoutLine
 );
 ZH_CONTENT[path.join(".claude", "commands", "lab.md")] = ZH_CONTENT[
@@ -2730,6 +2735,9 @@ ZH_CONTENT[path.join(".claude", "commands", "lab-auto.md")] = ZH_CONTENT[
 ].replace(
   "已批准的 `L2` 和 `L3` 执行 campaign 默认进入执行模式。",
   `${zhAutoPriorityClaudeLine}\n已批准的 \`L2\` 和 \`L3\` 执行 campaign 默认进入执行模式。`
+).replace(
+  "不要用 `sleep 30`、单次 `pgrep` 或一次性的 `metrics.json` 探针来代替真实长任务命令；当真实实验进程还活着时，只允许在出现有意义变化时发进度更新，并继续等待。没有新变化时，也只按保活节奏汇报，不要让用户触发下一次轮询。",
+  "不要用 `sleep 30`、单次 `pgrep` 或一次性的 `metrics.json` 探针来代替真实长任务命令；当真实实验进程还活着时，只允许在出现有意义变化时发进度更新，并继续等待。没有新变化时，也只按保活节奏汇报，不要让用户触发下一次轮询。\n\n" + zhAutoVisibleCloseoutLine
 );
 const zhRecipeQuickPathLine =
@@ -2761,6 +2769,12 @@ ZH_CONTENT[path.join(".claude", "commands", "lab.md")] = ZH_CONTENT[
   "- 用户只要显式调用某个 stage，无论写成 `/lab:<stage>`、`/lab: <stage>`、`/lab <stage>`、`/lab-<stage>` 还是 `/lab：<stage>`，都要立刻执行该 stage，而不是只推荐别的阶段。\n- 如果输入看起来像 stage 请求，但又不属于上述受支持写法，就必须停下并要求用户用精确的 stage 名重述，而不是自己猜。\n"
 );
+for (const rootPromptKey of [path.join(".codex", "prompts", "lab.md"), path.join(".claude", "commands", "lab.md")]) {
+  if (ZH_CONTENT[rootPromptKey] && !ZH_CONTENT[rootPromptKey].includes("最终可见收尾")) {
+    ZH_CONTENT[rootPromptKey] += `\n\n${zhAutoVisibleCloseoutLine}\n`;
+  }
+}
 ZH_CONTENT[path.join(".codex", "skills", "lab", "SKILL.md")] = `---
 name: lab
 description: 严格研究工作流，覆盖 idea、data、auto、framing、spec、run、iterate、review、report 和 paper-writing。
@@ -3405,6 +3419,18 @@ ZH_CONTENT[path.join(".codex", "skills", "lab", "stages", "auto.md")] = ZH_CONTE
   "- 只有当级别本身真的有歧义时，才停下来追问，例如 \\`第三层\\`、\\`phase 3\\`、\\`table 3\\`。",
   "- 只有当级别本身真的有歧义时，才停下来追问，例如 \\`第三层\\`、\\`phase 3\\`、\\`table 3\\`。\n- 如果用户显式调用 \\`/lab:auto\\` 或 \\`/lab-auto\\`，就保持在 auto 执行路径里；只要请求仍在已批准 execution envelope 内，即使目标听起来像 feature selection、baseline selection、离散化或 candidate sweep，也不要重新路由到 brainstorming 或 spec review。"
 );
+const zhAutoStageVisibleCloseout = `
+## 最终可见收尾
+- 最终可见收尾必须在 stage report 校验通过后给出，不能只写“已完成”“已推送”或命令流水账。
+- 最终可见收尾必须直接来自已校验的阶段报告，而不是另起一套临场叙述。
+- 最终可见收尾至少包含：请求交付物或目标及状态、核心说明表关键行、证据路径、验证命令和验证结果、已知缺口、下一步动作和为什么这样做。
+- 如果说“已完成”，也必须同时写明仍然存在的 handoff 边界，例如 PDF 编译、版面检查、外部审批、预算耗尽、冻结核心风险或环境缺失。
+`;
+if (!ZH_CONTENT[path.join(".codex", "skills", "lab", "stages", "auto.md")].includes("最终可见收尾")) {
+  ZH_CONTENT[path.join(".codex", "skills", "lab", "stages", "auto.md")] += zhAutoStageVisibleCloseout;
+}
 ZH_CONTENT[path.join(".claude", "skills", "lab", "stages", "auto.md")] =
   ZH_CONTENT[path.join(".codex", "skills", "lab", "stages", "auto.md")];
 ZH_CONTENT[path.join(".claude", "skills", "lab", "stages", "report.md")] =

package/package-assets/claude/commands/lab.md CHANGED Viewed

@@ -100,6 +100,7 @@ Treat all of these as equivalent stage requests:
 - While the loop is alive, `/lab auto` should keep `.lab/context/auto-ledger.md` updated with the active owner, observed state, and resume boundary.
 - Separate internal polling from user-facing progress reports.
 - While the loop is healthy, `/lab auto` should report to the user only on a meaningful change or at the keepalive cadence recorded in the current contract or runtime state, and it should not ask the user to trigger the next poll.
+- Final visible closeout must consume the validated stage report: show requested deliverable statuses, Core Explanation Table rows, evidence paths, validation/verification commands and results, known gaps, and the next action. Do not end with only "done", "pushed", or a chronological command log.
 - Treat `Autonomy level L1/L2/L3` as the execution privilege level, not as a paper layer, phase, or table number.
 - Treat `paper layer`, `phase`, and `table` as experiment targets. For example, `paper layer 3` or `Phase 1` should not be interpreted as `Autonomy level L3`.

package/package-assets/codex/prompts/lab/auto.md CHANGED Viewed

@@ -27,3 +27,4 @@ If the preflight block cannot be completed because any required field is missing
 When the repository workflow language is Chinese, summaries, checklist items, task labels, and progress updates should be written in Chinese unless a literal identifier must stay unchanged.
 Treat `Layer 3`, `Phase 1`, or `Table 2` as paper-scope targets. Treat `Autonomy level L3` as the execution permission level.
 Do not replace the real long-running experiment command with a short watcher such as `sleep 30`, `pgrep`, or a one-shot `metrics.json` probe. While the real experiment process is still alive, emit only a progress update and keep waiting.
+Final visible closeout is mandatory when `/lab:auto` reaches stop, failure, escalation, or handoff. After validating the stage report, the final answer must consume that report directly: list the requested deliverables or objectives with status, summarize the Core Explanation Table rows, provide evidence paths, show validation/verification commands and validation results, name known gaps or commands that could not run, and state the next action plus why it is appropriate. Do not end with only `done`, `pushed`, `completed`, or a chronological command log.

package/package-assets/codex/prompts/lab-auto.md CHANGED Viewed

@@ -27,3 +27,4 @@ If the preflight block cannot be completed because any required field is missing
 When the repository workflow language is Chinese, summaries, checklist items, task labels, and progress updates should be written in Chinese unless a literal identifier must stay unchanged.
 Treat `Layer 3`, `Phase 1`, or `Table 2` as paper-scope targets. Treat `Autonomy level L3` as the execution permission level.
 Do not replace the real long-running experiment command with a short watcher such as `sleep 30`, `pgrep`, or a one-shot `metrics.json` probe. While the real experiment process is still alive, emit only a progress update and keep waiting.
+Final visible closeout is mandatory when `/lab:auto` reaches stop, failure, escalation, or handoff. After validating the stage report, the final answer must consume that report directly: list the requested deliverables or objectives with status, summarize the Core Explanation Table rows, provide evidence paths, show validation/verification commands and validation results, name known gaps or commands that could not run, and state the next action plus why it is appropriate. Do not end with only `done`, `pushed`, `completed`, or a chronological command log.

package/package-assets/codex/prompts/lab.md CHANGED Viewed

@@ -94,6 +94,7 @@ Treat all of these as equivalent stage requests:
 - While the loop is alive, `/lab:auto` should keep `.lab/context/auto-ledger.md` updated with the active owner, observed state, and resume boundary.
 - Separate internal polling from user-facing progress reports.
 - While the loop is healthy, `/lab:auto` should report to the user only on a meaningful change or at the keepalive cadence recorded in the current contract or runtime state, and it should not ask the user to trigger the next poll.
+- Final visible closeout must consume the validated stage report: show requested deliverable statuses, Core Explanation Table rows, evidence paths, validation/verification commands and results, known gaps, and the next action. Do not end with only "done", "pushed", or a chronological command log.
 - Treat `Autonomy level L1/L2/L3` as the execution privilege level, not as a paper layer, phase, or table number.
 - Treat `paper layer`, `phase`, and `table` as experiment targets. For example, `paper layer 3` or `Phase 1` should not be interpreted as `Autonomy level L3`.

package/package-assets/codex/prompts/lab:auto.md CHANGED Viewed

@@ -27,3 +27,4 @@ If the preflight block cannot be completed because any required field is missing
 When the repository workflow language is Chinese, summaries, checklist items, task labels, and progress updates should be written in Chinese unless a literal identifier must stay unchanged.
 Treat `Layer 3`, `Phase 1`, or `Table 2` as paper-scope targets. Treat `Autonomy level L3` as the execution permission level.
 Do not replace the real long-running experiment command with a short watcher such as `sleep 30`, `pgrep`, or a one-shot `metrics.json` probe. While the real experiment process is still alive, emit only a progress update and keep waiting.
+Final visible closeout is mandatory when `/lab:auto` reaches stop, failure, escalation, or handoff. After validating the stage report, the final answer must consume that report directly: list the requested deliverables or objectives with status, summarize the Core Explanation Table rows, provide evidence paths, show validation/verification commands and validation results, name known gaps or commands that could not run, and state the next action plus why it is appropriate. Do not end with only `done`, `pushed`, `completed`, or a chronological command log.

package/package-assets/codex/prompts/lab/357/274/232auto.md CHANGED Viewed

@@ -27,3 +27,4 @@ If the preflight block cannot be completed because any required field is missing
 When the repository workflow language is Chinese, summaries, checklist items, task labels, and progress updates should be written in Chinese unless a literal identifier must stay unchanged.
 Treat `Layer 3`, `Phase 1`, or `Table 2` as paper-scope targets. Treat `Autonomy level L3` as the execution permission level.
 Do not replace the real long-running experiment command with a short watcher such as `sleep 30`, `pgrep`, or a one-shot `metrics.json` probe. While the real experiment process is still alive, emit only a progress update and keep waiting.
+Final visible closeout is mandatory when `/lab:auto` reaches stop, failure, escalation, or handoff. After validating the stage report, the final answer must consume that report directly: list the requested deliverables or objectives with status, summarize the Core Explanation Table rows, provide evidence paths, show validation/verification commands and validation results, name known gaps or commands that could not run, and state the next action plus why it is appropriate. Do not end with only `done`, `pushed`, `completed`, or a chronological command log.

package/package-assets/shared/skills/lab/SKILL.md CHANGED Viewed

@@ -49,6 +49,7 @@ Use this skill when the user invokes `/lab:*` or asks for the structured researc
 - If the stage says improvement is needed, do not choose `stop` unless the next action states a concrete terminal boundary such as budget exhaustion, frozen-core risk, safety or integrity failure, impossible target, or a required approval boundary. Otherwise choose `continue`, `revise`, `rerun`, or `escalate`.
 - Stage reports are closeout and handoff artifacts, not a new user command and not a replacement for stage-specific artifacts such as idea memos, iteration reports, final reports, or write-iteration records.
 - Run `.lab/.managed/scripts/validate_stage_report.py --stage-report <stage-report> --stage <stage>` before claiming the stage is complete, and include the stage-report path plus validation result in the final user-facing summary.
+- For `/lab:auto`, the final user-facing answer must visibly consume the validated stage report: summarize requested deliverable statuses, Core Explanation Table rows, evidence paths, validation/verification commands and results, known gaps, and the next action. A chat-only chronological result list is not a valid closeout.
 - Final paper output should default to LaTeX, and its manuscript language should be decided separately from the workflow language.
 - Separate sourced facts from model-generated hypotheses.
 - Preserve failed runs, failed ideas, and limitations.

package/package-assets/shared/skills/lab/stages/auto.md CHANGED Viewed

@@ -230,3 +230,13 @@
 - Fill the `Core Explanation Table` in plain language: background, why now, what ran, how the loop ran, what worked, what did not work, what was verified, what remains unverified, what needs improvement and why, how to improve and why, key evidence, and the continue/stop/revise/rerun/escalate/handoff decision.
 - If the table says improvement is needed, the next action may be `stop` only when a terminal boundary is explicitly named; otherwise choose `continue`, `revise`, `rerun`, or `escalate`.
 - Run `.lab/.managed/scripts/validate_stage_report.py --stage-report <stage-report> --stage auto` and include the report path plus validation result in the final user-facing summary.
+- Final visible closeout is mandatory after validation. Do not end `/lab:auto` with only "done", "pushed", "completed", or a chronological command log.
+- The final visible closeout must be derived from the validated stage report, not from a separate improvised narrative.
+- The final visible closeout must include:
+  - the user's requested deliverables or objectives and their status: completed, repaired, failed-gate, not promoted, blocked, or handoff
+  - the key Core Explanation Table rows: what was done, how it was done, what worked, what did not work, what was verified, what remains unverified, whether improvement is needed and why, how to improve and why
+  - evidence paths and primary artifacts
+  - validation/verification commands and validation result, including commands that could not run
+  - known gaps or compile/runtime limitations
+  - next action and why that action is appropriate
+- If the final answer says the work is "completed", it must still name any remaining handoff boundary such as PDF compile, layout check, external approval, budget exhaustion, frozen-core risk, or missing environment.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "superlab",
-  "version": "0.1.74",
+  "version": "0.1.75",
   "description": "Strict /lab research workflow installer for Codex and Claude",
   "keywords": [
     "codex",