npm - superlab - Versions diffs - 0.1.69 → 0.1.71 - Mend

superlab 0.1.69 → 0.1.71

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/lib/i18n.cjs CHANGED Viewed

@@ -887,6 +887,60 @@ const ZH_SKILL_FILES = {
 - [ ] 标准化并验证评估摘要。
 - [ ] 执行有边界的 iteration rounds。
 - [ ] 产出 final report。
+`,
+  [path.join(".lab", ".managed", "templates", "stage-report.md")]:
+`# 阶段报告
+## 规则预检
+- Rule source file:
+- Rule source revision:
+- Project version:
+- Resolved stage:
+- Resolved mode:
+- Resolved target:
+- Preflight stamp:
+- Override reason, if any:
+## 阶段身份
+- Stage:
+- Target:
+- Date:
+- Status:
+- Primary artifact:
+- Next owner:
+## 核心说明表
+| 问题 | 白话回答 |
+|---|---|
+| 这是什么阶段？ |  |
+| 背景是什么？ |  |
+| 为什么现在要做？ |  |
+| 这轮具体做了什么？ |  |
+| 怎么做的？ |  |
+| 结果好的地方是什么？ |  |
+| 结果坏的地方是什么？ |  |
+| 这验证了什么？ |  |
+| 还没有验证什么？ |  |
+| 是否需要改进？为什么？ |  |
+| 下一步怎么改？为什么这样改？ |  |
+| 关键证据在哪里？ |  |
+| 现在应该继续、停止、重做还是升级？ |  |
+## 证据与工件
+- 主工件：
+- 支撑工件：
+- 验证命令：
+- 已知缺口：
+## 下一步动作
+- 决策：continue / stop / revise / rerun / escalate / handoff
+- 具体下一步：
+- 为什么这样做：
 `,
   [path.join(".lab", ".managed", "templates", "iteration-report.md")]:
 `# 迭代报告
@@ -3020,6 +3074,41 @@ ZH_CONTENT[path.join(".claude", "skills", "lab", "stages", "auto.md")] =
 ZH_CONTENT[path.join(".claude", "skills", "lab", "stages", "report.md")] =
   ZH_CONTENT[path.join(".codex", "skills", "lab", "stages", "report.md")];
+const zhStageReportCloseout = `
+## 阶段报告收尾
+- 阶段进入最终交接、停止、失败或升级前，必须用 \`.lab/.managed/templates/stage-report.md\` 写或更新一个 \`.lab/stage-reports/<date>--<stage>--<target>.md\`。
+- \`核心说明表\` 必须用白话写清背景、为什么现在做、做了什么、怎么做、好结果、坏结果、验证了什么、还没验证什么、是否需要改进及原因、下一步怎么改及原因、关键证据，以及 continue/stop/revise/rerun/escalate/handoff 决策。
+- 阶段报告是收尾和交接工件，不是新命令，也不能替代 idea、iteration report、final report 或 write iteration 等阶段专属工件。
+- 收尾前运行 \`.lab/.managed/scripts/validate_stage_report.py --stage-report <stage-report> --stage <stage>\`，并在最终给用户的总结里给出阶段报告路径和校验结果。
+`;
+for (const platformRoot of [".codex", ".claude"]) {
+  for (const stage of ["idea", "data", "auto", "framing", "spec", "run", "iterate", "review", "report", "write"]) {
+    const key = path.join(platformRoot, "skills", "lab", "stages", `${stage}.md`);
+    if (ZH_CONTENT[key] && !ZH_CONTENT[key].includes("validate_stage_report.py")) {
+      ZH_CONTENT[key] += zhStageReportCloseout;
+    }
+  }
+}
+const zhStageReportGlobalRules =
+  "- 每个 `/lab:*` 阶段进入最终交接前，都必须在 `.lab/stage-reports/` 下写或更新一个白话阶段报告，并通过 `.lab/.managed/scripts/validate_stage_report.py` 校验。\n" +
+  "- 阶段报告必须包含核心说明表，讲清背景、为什么做、做了什么、怎么做、好坏结果、验证与未验证边界、是否需要改进及原因、下一步怎么改及原因、关键证据和继续/停止/修订/重跑/升级/交接决策。\n" +
+  "- 阶段报告只是统一收尾和交接层，不新增用户命令，也不替代各阶段自己的受管工件。\n";
+for (const platformRoot of [".codex", ".claude"]) {
+  const key = path.join(platformRoot, "skills", "lab", "SKILL.md");
+  if (ZH_CONTENT[key] && !ZH_CONTENT[key].includes("validate_stage_report.py")) {
+    ZH_CONTENT[key] = ZH_CONTENT[key].replace(
+      "- 项目里已安装的规则优先于模型记忆；如果记忆里的旧做法和 `.lab/.managed/rule-manifest.json` 记录的规则冲突，以项目里安装的规则为准。\n",
+      "- 项目里已安装的规则优先于模型记忆；如果记忆里的旧做法和 `.lab/.managed/rule-manifest.json` 记录的规则冲突，以项目里安装的规则为准。\n" +
+        zhStageReportGlobalRules
+    );
+  }
+}
 function getLocalizedContent(relativePath, lang) {
   if (lang !== "zh") {
     return null;

package/lib/install.cjs CHANGED Viewed

@@ -674,6 +674,7 @@ function localizeInstalledAssets(targetDir, lang, { newlyCreatedProjectOwnedPath
     path.join(".lab", ".managed", "templates", "design.md"),
     path.join(".lab", ".managed", "templates", "spec.md"),
     path.join(".lab", ".managed", "templates", "tasks.md"),
+    path.join(".lab", ".managed", "templates", "stage-report.md"),
     path.join(".lab", ".managed", "templates", "iteration-report.md"),
     path.join(".lab", ".managed", "templates", "review-checklist.md"),
     path.join(".lab", ".managed", "templates", "final-report.md"),

package/package-assets/shared/lab/.managed/scripts/validate_collaborator_report.py CHANGED Viewed

@@ -51,6 +51,21 @@ SOURCE_SECTION_PATH_MARKERS = (
 SOURCE_SECTION_CITATION_MARKERS = ("Citation:", "引用：")
 SOURCE_SECTION_ROLE_MARKERS = ("What it established:", "What it does:", "What it measures:", "做了什么：", "衡量什么：")
 SOURCE_SECTION_LIMITATION_MARKERS = ("Limitation", "局限")
+METRIC_GUIDE_DETAIL_MARKERS = {
+    "evaluation target": ("Evaluation target:", "What is evaluated:", "评估对象：", "评估什么："),
+    "test-set prediction": ("Test-set prediction used:", "Prediction used:", "测试集预测：", "预测量："),
+    "ranking or grouping": ("Ranking or grouping step:", "Ranking step:", "Grouping step:", "排序或分组：", "排序步骤：", "分组步骤："),
+    "calculation sketch": (
+        "Aggregation / calculation sketch:",
+        "Calculation sketch:",
+        "Approximate calculation:",
+        "大致计算：",
+        "近似公式：",
+        "聚合方式：",
+    ),
+    "direction and scale": ("Direction and scale:", "Metric direction:", "方向与尺度：", "方向：", "越高/越低："),
+    "comparability boundary": ("Comparability boundary:", "What not to compare:", "可比性边界：", "不能比较："),
+}
 def parse_args():
@@ -99,6 +114,35 @@ def validate_source_sections(text: str, label: str) -> list[str]:
     return issues
+def has_marker_with_value(body: str, markers: tuple[str, ...]) -> bool:
+    for line in body.splitlines():
+        stripped = line.strip()
+        for marker in markers:
+            if marker not in stripped:
+                continue
+            value = stripped.split(marker, 1)[1].strip()
+            if value and value not in {"-", "—", "TODO", "TBD", "待补", "待定"}:
+                return True
+    return False
+def validate_metric_guide_detail(text: str, label: str) -> list[str]:
+    body = extract_section_body(text, REPORT_REQUIRED_SECTIONS["Metric Guide"])
+    if not body:
+        return []
+    missing = [
+        detail_name
+        for detail_name, markers in METRIC_GUIDE_DETAIL_MARKERS.items()
+        if not has_marker_with_value(body, markers)
+    ]
+    if not missing:
+        return []
+    return [
+        f"{label} section 'Metric Guide' must explain metric computation details: "
+        f"{', '.join(missing)}"
+    ]
 def validate(path_str: str, required_sections: dict[str, list[str]], label: str) -> list[str]:
     path = Path(path_str)
     if not path.exists():
@@ -108,7 +152,7 @@ def validate(path_str: str, required_sections: dict[str, list[str]], label: str)
     if missing:
         return [f"{label} is missing required sections: {', '.join(missing)}"]
     if label == "report.md":
-        return validate_source_sections(text, label)
+        return validate_source_sections(text, label) + validate_metric_guide_detail(text, label)
     return []

package/package-assets/shared/lab/.managed/scripts/validate_manuscript_delivery.py CHANGED Viewed

@@ -38,6 +38,7 @@ REQUIRED_TABLE_NOTE_MARKERS = (
     "% Important caveat:",
 )
 WIDTH_CONTROL_NOTE_MARKER = "% Width control:"
+WIDE_PLAIN_TABULAR_COLUMN_LIMIT = 7
 TABLE_ABBREVIATION_EXCEPTIONS = {"TODO", "TBD"}
 PLACEHOLDER_TABLE_NOTE_PREFIXES = (
     "explain ",
@@ -97,6 +98,109 @@ def contains_any(text: str, needles: tuple[str, ...]) -> bool:
     return any(needle.lower() in lowered for needle in needles)
+def read_braced_group(text: str, start: int) -> tuple[str, int] | None:
+    if start >= len(text) or text[start] != "{":
+        return None
+    depth = 0
+    content_start = start + 1
+    for index in range(start, len(text)):
+        char = text[index]
+        if char == "{":
+            depth += 1
+        elif char == "}":
+            depth -= 1
+            if depth == 0:
+                return text[content_start:index], index + 1
+    return None
+def skip_whitespace(text: str, index: int) -> int:
+    while index < len(text) and text[index].isspace():
+        index += 1
+    return index
+def extract_plain_tabular_specs(text: str) -> list[str]:
+    specs: list[str] = []
+    needle = r"\begin{tabular}"
+    search_from = 0
+    while True:
+        index = text.find(needle, search_from)
+        if index == -1:
+            return specs
+        spec_start = skip_whitespace(text, index + len(needle))
+        group = read_braced_group(text, spec_start)
+        if group is not None:
+            specs.append(group[0])
+            search_from = group[1]
+        else:
+            search_from = index + len(needle)
+def count_column_spec(spec: str) -> tuple[int, bool]:
+    count = 0
+    has_width_aware_column = False
+    index = 0
+    while index < len(spec):
+        char = spec[index]
+        if char in "lcr":
+            count += 1
+            index += 1
+            continue
+        if char == "X":
+            count += 1
+            has_width_aware_column = True
+            index += 1
+            continue
+        if char in "pmb":
+            count += 1
+            has_width_aware_column = True
+            index = skip_whitespace(spec, index + 1)
+            if index < len(spec) and spec[index] == "{":
+                group = read_braced_group(spec, index)
+                index = group[1] if group is not None else index + 1
+            continue
+        if char == "*":
+            index = skip_whitespace(spec, index + 1)
+            repeat_group = read_braced_group(spec, index)
+            if repeat_group is None:
+                continue
+            repeat_text, index = repeat_group
+            index = skip_whitespace(spec, index)
+            repeated_spec_group = read_braced_group(spec, index)
+            if repeated_spec_group is None:
+                continue
+            repeated_spec, index = repeated_spec_group
+            try:
+                repeat_count = int(repeat_text.strip())
+            except ValueError:
+                repeat_count = 1
+            nested_count, nested_width_aware = count_column_spec(repeated_spec)
+            count += repeat_count * nested_count
+            has_width_aware_column = has_width_aware_column or nested_width_aware
+            continue
+        if char in "@!<>":
+            index = skip_whitespace(spec, index + 1)
+            if index < len(spec) and spec[index] == "{":
+                group = read_braced_group(spec, index)
+                index = group[1] if group is not None else index + 1
+            continue
+        index += 1
+    return count, has_width_aware_column
+def has_width_control_command(text: str) -> bool:
+    return any(
+        token in text
+        for token in (
+            r"\begin{tabularx}",
+            r"\begin{tabular*}",
+            r"\resizebox{",
+            r"\setlength{\tabcolsep}",
+        )
+    )
 def find_workflow_config(start_path: Path) -> Path | None:
     search_roots = [start_path, *start_path.parents]
     for root in search_roots:
@@ -315,6 +419,18 @@ def check_table_file(path: Path, issues: list[str], label: str):
             continue
         if value < 3.0:
             issues.append(f"{label} sets \\tabcolsep below the safe range for paper-facing main tables")
+    for spec in extract_plain_tabular_specs(text):
+        column_count, has_width_aware_column = count_column_spec(spec)
+        if (
+            column_count >= WIDE_PLAIN_TABULAR_COLUMN_LIMIT
+            and not has_width_aware_column
+            and not has_width_control_command(text)
+        ):
+            issues.append(
+                f"{label} uses a wide plain tabular layout ({column_count} columns) without a width-aware strategy; "
+                "use tabularx or p columns, split the table, move secondary metrics to appendix, "
+                "or document last-resort width control"
+            )
 def check_figure_file(path: Path, issues: list[str], label: str):

package/package-assets/shared/lab/.managed/scripts/validate_section_draft.py CHANGED Viewed

@@ -241,6 +241,22 @@ INTERNAL_EXPERIMENT_PROVENANCE_PHRASES = (
     "调参运行",
     "调参轮次",
 )
+INTERNAL_EXPERIMENT_PLANNING_PATTERNS = (
+    r"current\s+[\d.]+\s+only\s+shows?.*need(?:s|ed)?\s+(?:a\s+)?(?:new\s+)?holdout",
+    r"(?:new|additional)\s+holdout\s+(?:and|or)\s+(?:more\s+)?natural(?:ized)?\s+(?:payload|attack|statement)",
+    r"(?:small[- ]batch|pilot[- ]batch).*(?:gate|gating)",
+    r"(?:freeze|freezing).*(?:payload|attack statement|trigger)",
+    r"(?:api|API).*(?:budget|cost|scale)",
+    r"新增\s*(?:holdout|外部|样本|实验).*验证",
+    r"还需要\s*新增.*验证",
+    r"后文.*边界",
+    r"当前\s*[\d.]+\s*只能说明.*不能外推.*(?:还需要|需要)",
+    r"小批量.*(?:门控|gate)",
+    r"(?:冻结|固定).*(?:payload|载荷|攻击语句|触发语句)",
+    r"(?:不能|不得).*边跑边调",
+    r"API\s*(?:规模|预算|成本)",
+    r"(?:按设计|设计上).*(?:失败|不通过).*(?:过拟合|调参)",
+)
 INTERNAL_CONFIG_LABEL_PATTERN = re.compile(
     r"\b[a-z]{1,4}\d+(?:[-_][a-z]?\d+(?:\.\d+)?){1,4}\b",
     flags=re.IGNORECASE,
@@ -265,6 +281,10 @@ def check_common_section_gate_risks(text: str, issues: list[str]):
         issues.append(
             "reader-facing prose appears to contain internal experiment provenance or tuning/config labels; move run provenance to workflow notes or map it to paper-facing diagnostic terminology"
         )
+    if any(re.search(pattern, prose_text, flags=re.IGNORECASE) for pattern in INTERNAL_EXPERIMENT_PLANNING_PATTERNS):
+        issues.append(
+            "reader-facing prose appears to contain internal experiment planning or holdout-expansion rationale; keep plans, gates, payload-freezing notes, and future validation logistics in workflow artifacts instead of the manuscript"
+        )
     if contains_any(
         prose_text,
         (

package/package-assets/shared/lab/.managed/scripts/validate_stage_report.py ADDED Viewed

@@ -0,0 +1,301 @@
+#!/usr/bin/env python3
+import argparse
+import re
+import sys
+from pathlib import Path
+REQUIRED_SECTIONS = {
+    "Rule Preflight": [r"^##\s+Rule Preflight\s*$", r"^##\s+规则预检\s*$"],
+    "Stage Identity": [r"^##\s+Stage Identity\s*$", r"^##\s+阶段身份\s*$"],
+    "Core Explanation Table": [r"^##\s+Core Explanation Table\s*$", r"^##\s+核心说明表\s*$"],
+    "Evidence And Artifacts": [r"^##\s+Evidence And Artifacts\s*$", r"^##\s+证据与工件\s*$"],
+    "Next Action": [r"^##\s+Next Action\s*$", r"^##\s+下一步动作\s*$"],
+}
+REQUIRED_CORE_ROWS = {
+    "stage": ("这是什么阶段", "what stage is this", "stage"),
+    "background": ("背景是什么", "background"),
+    "why_now": ("为什么现在要做", "why now", "why this stage ran"),
+    "what_done": ("这轮具体做了什么", "what this stage did", "what did this stage do"),
+    "how_done": ("怎么做的", "how it was done", "how was it done"),
+    "worked": ("结果好的地方是什么", "what worked"),
+    "did_not_work": ("结果坏的地方是什么", "what did not work", "negative result"),
+    "verifies": ("这验证了什么", "what this verifies", "what was verified"),
+    "unverified": ("还没有验证什么", "what remains unverified", "not yet verified"),
+    "improve_why": ("是否需要改进", "need improvement", "what needs improvement"),
+    "how_improve": ("下一步怎么改", "how to improve"),
+    "evidence": ("关键证据在哪里", "key evidence", "evidence"),
+    "decision": ("现在应该继续", "continue, stop", "decision"),
+}
+PLACEHOLDER_VALUES = {
+    "",
+    "-",
+    "--",
+    "—",
+    "todo",
+    "tbd",
+    "n/a",
+    "na",
+    "none",
+    "待补",
+    "待定",
+    "无",
+}
+SHALLOW_VALUES = {
+    "done",
+    "ok",
+    "pass",
+    "passed",
+    "符合预期",
+    "已完成",
+    "继续优化",
+    "继续推进",
+    "没有问题",
+}
+INTERNAL_META_PATTERNS = [
+    r"用户说",
+    r"我来解释",
+    r"我会",
+    r"我已经",
+    r"你要求",
+    r"\bagent\b",
+    r"\bsubagent\b",
+    r"\bprompt\b",
+    r"提示词",
+    r"按.*技能",
+    r"service-style",
+    r"AI-assistant",
+]
+WHY_MARKERS = (
+    "because",
+    "so that",
+    "therefore",
+    "reason",
+    "why",
+    "因为",
+    "所以",
+    "因此",
+    "原因",
+    "以便",
+    "用于",
+    "避免",
+)
+def parse_args():
+    parser = argparse.ArgumentParser(description="Validate a plain-language lab stage report.")
+    parser.add_argument("--stage-report", required=True, help="Path to the stage report markdown file.")
+    parser.add_argument("--stage", default="", help="Expected lab stage name, such as run, auto, or write.")
+    return parser.parse_args()
+def normalize(text: str) -> str:
+    return re.sub(r"\s+", " ", text.strip().lower())
+def extract_section(text: str, patterns: list[str]) -> str:
+    for pattern in patterns:
+        match = re.search(pattern, text, flags=re.MULTILINE)
+        if not match:
+            continue
+        start = match.end()
+        next_heading = re.search(r"^##\s+", text[start:], flags=re.MULTILINE)
+        end = start + next_heading.start() if next_heading else len(text)
+        return text[start:end].strip()
+    return ""
+def find_missing_sections(text: str) -> list[str]:
+    missing = []
+    for name, patterns in REQUIRED_SECTIONS.items():
+        if not any(re.search(pattern, text, flags=re.MULTILINE) for pattern in patterns):
+            missing.append(name)
+    return missing
+def parse_core_table_rows(text: str) -> dict[str, str]:
+    section = extract_section(text, REQUIRED_SECTIONS["Core Explanation Table"])
+    rows = {}
+    for raw_line in section.splitlines():
+        line = raw_line.strip()
+        if not line.startswith("|") or line.count("|") < 3:
+            continue
+        cells = [cell.strip() for cell in line.strip("|").split("|")]
+        if len(cells) < 2:
+            continue
+        question = normalize(cells[0])
+        answer = cells[1].strip()
+        if question in {"question", "---", ""}:
+            continue
+        rows[question] = answer
+    return rows
+def find_row_value(rows: dict[str, str], markers: tuple[str, ...]) -> str | None:
+    normalized_markers = tuple(normalize(marker) for marker in markers)
+    for question, answer in rows.items():
+        if any(marker in question for marker in normalized_markers):
+            return answer
+    return None
+def is_blank_or_placeholder(value: str | None) -> bool:
+    if value is None:
+        return True
+    compact = normalize(value).strip(" .:;，。；：")
+    return compact in PLACEHOLDER_VALUES
+def has_marker_with_value(body: str, markers: tuple[str, ...]) -> bool:
+    for line in body.splitlines():
+        stripped = line.strip()
+        for marker in markers:
+            if marker not in stripped:
+                continue
+            value = stripped.split(marker, 1)[1].strip()
+            return not is_blank_or_placeholder(value)
+    return False
+def is_shallow(value: str | None) -> bool:
+    if value is None:
+        return True
+    compact = normalize(value).strip(" .:;，。；：")
+    return compact in SHALLOW_VALUES or len(compact) < 8
+def has_why(value: str) -> bool:
+    lowered = normalize(value)
+    return any(marker in lowered for marker in WHY_MARKERS)
+def validate_core_table(text: str) -> list[str]:
+    issues = []
+    rows = parse_core_table_rows(text)
+    missing_rows = []
+    for row_name, markers in REQUIRED_CORE_ROWS.items():
+        value = find_row_value(rows, markers)
+        if is_blank_or_placeholder(value):
+            missing_rows.append(row_name)
+    if missing_rows:
+        issues.append(f"Core Explanation Table is missing non-empty answers for: {', '.join(missing_rows)}")
+    for row_name in ("did_not_work", "verifies", "improve_why", "how_improve", "decision"):
+        value = find_row_value(rows, REQUIRED_CORE_ROWS[row_name])
+        if is_shallow(value):
+            issues.append(f"Core Explanation Table row '{row_name}' is too shallow")
+    for row_name in ("improve_why", "how_improve"):
+        value = find_row_value(rows, REQUIRED_CORE_ROWS[row_name])
+        if value and not has_why(value):
+            issues.append(f"Core Explanation Table row '{row_name}' must include a reason, not only an action")
+    return issues
+def validate_evidence_section(text: str) -> list[str]:
+    issues = []
+    body = extract_section(text, REQUIRED_SECTIONS["Evidence And Artifacts"])
+    marker_groups = (
+        ("Primary artifact:", "主工件："),
+        ("Supporting artifacts:", "支撑工件："),
+        ("Validation commands:", "验证命令："),
+        ("Known gaps:", "已知缺口："),
+    )
+    if not body:
+        return ["Evidence And Artifacts section is empty"]
+    for group in marker_groups:
+        if not any(marker in body for marker in group):
+            issues.append(f"Evidence And Artifacts is missing '{group[0]}'")
+            continue
+        if not has_marker_with_value(body, group):
+            issues.append(f"Evidence And Artifacts field '{group[0]}' must have a non-empty value")
+    return issues
+def validate_rule_preflight(text: str) -> list[str]:
+    body = extract_section(text, REQUIRED_SECTIONS["Rule Preflight"])
+    marker_groups = (
+        ("Rule source file:",),
+        ("Rule source revision:",),
+        ("Project version:",),
+        ("Resolved stage:",),
+        ("Resolved mode:",),
+        ("Resolved target:",),
+        ("Preflight stamp:",),
+    )
+    issues = []
+    for group in marker_groups:
+        marker = group[0]
+        if marker not in body:
+            issues.append(f"Rule Preflight is missing '{marker}'")
+            continue
+        if not has_marker_with_value(body, group):
+            issues.append(f"Rule Preflight field '{marker}' must have a non-empty value")
+    return issues
+def validate_next_action(text: str) -> list[str]:
+    body = extract_section(text, REQUIRED_SECTIONS["Next Action"])
+    if is_shallow(body):
+        return ["Next Action section must state a concrete decision and next step"]
+    allowed = ("continue", "stop", "revise", "rerun", "escalate", "handoff", "继续", "停止", "修订", "重跑", "升级", "交接")
+    if not any(marker in normalize(body) for marker in allowed):
+        return ["Next Action must choose continue, stop, revise, rerun, escalate, or handoff"]
+    if not has_why(body):
+        return ["Next Action must include why the next step is appropriate"]
+    return []
+def validate_stage_identity(text: str, expected_stage: str) -> list[str]:
+    if not expected_stage:
+        return []
+    body = extract_section(text, REQUIRED_SECTIONS["Stage Identity"])
+    if expected_stage.lower() not in body.lower():
+        return [f"Stage Identity must mention expected stage '{expected_stage}'"]
+    return []
+def validate_internal_meta(text: str) -> list[str]:
+    issues = []
+    for pattern in INTERNAL_META_PATTERNS:
+        if re.search(pattern, text, flags=re.IGNORECASE):
+            issues.append(f"stage report contains internal or service-style meta language: {pattern}")
+    return issues
+def validate(path: Path, expected_stage: str = "") -> list[str]:
+    if not path.exists():
+        return [f"stage report does not exist: {path}"]
+    text = path.read_text(encoding="utf-8")
+    issues = []
+    missing_sections = find_missing_sections(text)
+    if missing_sections:
+        issues.append(f"stage report is missing required sections: {', '.join(missing_sections)}")
+    if not missing_sections:
+        issues.extend(validate_rule_preflight(text))
+        issues.extend(validate_stage_identity(text, expected_stage))
+        issues.extend(validate_core_table(text))
+        issues.extend(validate_evidence_section(text))
+        issues.extend(validate_next_action(text))
+    issues.extend(validate_internal_meta(text))
+    return issues
+def main():
+    args = parse_args()
+    issues = validate(Path(args.stage_report), args.stage)
+    if issues:
+        for issue in issues:
+            print(issue, file=sys.stderr)
+        return 1
+    print("stage report is valid")
+    return 0
+if __name__ == "__main__":
+    raise SystemExit(main())

package/package-assets/shared/lab/.managed/templates/final-report.md CHANGED Viewed

@@ -51,6 +51,12 @@
 - Primary metric plain-language explanation:
 - Secondary metric plain-language explanation:
 - Health or support metrics and why they are not the main claim:
+- Evaluation target:
+- Test-set prediction used:
+- Ranking or grouping step:
+- Aggregation / calculation sketch:
+- Direction and scale:
+- Comparability boundary:
 ## Background Sources

package/package-assets/shared/lab/.managed/templates/main-tables.md CHANGED Viewed

@@ -17,6 +17,12 @@
 - Primary metric plain-language explanation:
 - Secondary metric plain-language explanation:
 - Health or support metrics and how to read them:
+- Evaluation target:
+- Test-set prediction used:
+- Ranking or grouping step:
+- Aggregation / calculation sketch:
+- Direction and scale:
+- Comparability boundary:
 ## Final Performance Summary

package/package-assets/shared/lab/.managed/templates/paper-table.tex CHANGED Viewed

@@ -2,18 +2,18 @@
 \caption{One-sentence message of the table and the evaluation protocol.}
 \label{tab:placeholder}
 \centering
-\begin{tabular}{lcc}
+\begin{tabularx}{\linewidth}{>{\raggedright\arraybackslash}Xcc}
 \toprule
 Method & Metric 1 $\uparrow$ & Metric 2 $\uparrow$ \\
 \midrule
 Ours & 0.0000 & 0.0000 \\
 Baseline & 0.0000 & 0.0000 \\
 \bottomrule
-\end{tabular}
+\end{tabularx}
 % Rows: explain what each row represents.
 % Columns: explain what each column represents and its direction.
 % Metric definitions: expand local abbreviations, units, denominators, or event conditions.
 % Comparison scope: explain which setting, split, attack family, or benchmark scope this table covers.
 % Important caveat: state any omitted metrics, zero-valued metrics, or appendix-only reporting decision.
-% Width control: first shorten headers, move secondary metrics out of the main table, and reduce or split columns; only then adjust \setlength{\tabcolsep}{...} conservatively or use \resizebox{\linewidth}{!}{...} as a documented last resort.
+% Width control: default to bounded columns with tabularx or p{...}; first shorten headers, move secondary metrics out of the main table, and reduce or split columns; only then adjust \setlength{\tabcolsep}{...} conservatively or use \resizebox{\linewidth}{!}{...} as a documented last resort.
 \end{table}

package/package-assets/shared/lab/.managed/templates/paper.tex CHANGED Viewed

@@ -4,6 +4,8 @@
 \usepackage{hyperref}
 \usepackage{graphicx}
 \usepackage{booktabs}
+\usepackage{array}
+\usepackage{tabularx}
 \title{Paper Title}
 \author{Author Name}

package/package-assets/shared/lab/.managed/templates/stage-report.md ADDED Viewed

@@ -0,0 +1,52 @@
+# Stage Report
+## Rule Preflight
+- Rule source file:
+- Rule source revision:
+- Project version:
+- Resolved stage:
+- Resolved mode:
+- Resolved target:
+- Preflight stamp:
+- Override reason, if any:
+## Stage Identity
+- Stage:
+- Target:
+- Date:
+- Status:
+- Primary artifact:
+- Next owner:
+## Core Explanation Table
+| Question | Plain Answer |
+|---|---|
+| 这是什么阶段？ |  |
+| 背景是什么？ |  |
+| 为什么现在要做？ |  |
+| 这轮具体做了什么？ |  |
+| 怎么做的？ |  |
+| 结果好的地方是什么？ |  |
+| 结果坏的地方是什么？ |  |
+| 这验证了什么？ |  |
+| 还没有验证什么？ |  |
+| 是否需要改进？为什么？ |  |
+| 下一步怎么改？为什么这样改？ |  |
+| 关键证据在哪里？ |  |
+| 现在应该继续、停止、重做还是升级？ |  |
+## Evidence And Artifacts
+- Primary artifact:
+- Supporting artifacts:
+- Validation commands:
+- Known gaps:
+## Next Action
+- Decision: continue / stop / revise / rerun / escalate / handoff
+- Concrete next step:
+- Why this next step:

package/package-assets/shared/lab/.managed/templates/write-iteration.md CHANGED Viewed

@@ -86,6 +86,9 @@
 - Were all abbreviations expanded at local first mention:
 - Did each main table include a local table note:
 - Can a reader interpret rows and columns without chasing Method:
+- Table width audit:
+- Did any main table use a wide plain `tabular` layout:
+- If width control was needed, was the table first shortened, split, moved partly to appendix, or converted to `tabularx` / bounded columns before using `\tabcolsep` or `\resizebox`:
 - If this section used canonical short names before their defining section, was a local naming bridge added:
 - Did model and ablation labels stay canonical instead of drifting into narrative aliases:
@@ -141,6 +144,7 @@
 - Did the round avoid copying reference wording, claims, metrics, captions, or conclusions:
 - Did final prose avoid service-style or AI-assistant meta language:
 - Did final prose avoid workflow-only placeholder language:
+- Did final prose avoid internal experiment planning, future-holdout logistics, gates, payload-freezing notes, API-budget notes, and automation triage language:
 - Validator command and result:
 ## Decision

package/package-assets/shared/skills/lab/SKILL.md CHANGED Viewed

@@ -42,6 +42,10 @@ Use this skill when the user invokes `/lab:*` or asks for the structured researc
 - Generate the `Rule Preflight` block from `.lab/.managed/rule-manifest.json` with the managed preflight renderer instead of handwriting it from memory.
 - Treat missing, stale, or contradictory `Rule Preflight` data as a stage-contract failure.
 - Project-installed rules take priority over model memory. If remembered patterns conflict with the installed rule source, follow the installed source recorded in `.lab/.managed/rule-manifest.json`.
+- Before a `/lab:*` stage reaches a final handoff, write or update one plain-language stage report under `.lab/stage-reports/` from `.lab/.managed/templates/stage-report.md`.
+- The stage report must include a filled `Core Explanation Table` that answers, in workflow language and plain language: background, why now, what was done, how it was done, what worked, what did not work, what was verified, what remains unverified, whether improvement is needed and why, how to improve and why, key evidence, and the continue/stop/revise/rerun/escalate/handoff decision.
+- Stage reports are closeout and handoff artifacts, not a new user command and not a replacement for stage-specific artifacts such as idea memos, iteration reports, final reports, or write-iteration records.
+- Run `.lab/.managed/scripts/validate_stage_report.py --stage-report <stage-report> --stage <stage>` before claiming the stage is complete, and include the stage-report path plus validation result in the final user-facing summary.
 - Final paper output should default to LaTeX, and its manuscript language should be decided separately from the workflow language.
 - Separate sourced facts from model-generated hypotheses.
 - Preserve failed runs, failed ideas, and limitations.
@@ -210,6 +214,7 @@ Use this skill when the user invokes `/lab:*` or asks for the structured researc
 - Read `.lab/context/mission.md`, `.lab/context/state.md`, `.lab/context/workflow-state.md`, `.lab/context/decisions.md`, `.lab/context/evidence-index.md`, and `.lab/context/data-decisions.md` before drafting.
 - Read `.lab/context/eval-protocol.md` before choosing tables, thresholds, or final result framing.
 - Keep metric definitions, comparison semantics, and implementation references anchored to the approved evaluation protocol instead of re-deriving them during reporting.
+- In `report.md`, explain each primary metric with a computation guide: what is evaluated, which test-set predictions or scores are used, whether examples are sorted, grouped, bucketed, or paired, how the value is aggregated or approximately calculated, what direction and scale mean, and what cannot be compared across datasets, splits, or implementations.
 - Aggregate them with `.lab/.managed/scripts/summarize_iterations.py`.
 - Write the final document with `.lab/.managed/templates/final-report.md`, the managed table summary with `.lab/.managed/templates/main-tables.md`, and the internal handoff with `.lab/.managed/templates/artifact-status.md`.
 - Keep failed attempts and limitations visible.
@@ -272,10 +277,12 @@ Use this skill when the user invokes `/lab:*` or asks for the structured researc
 - Use the same metric names across Method, Experiments, captions, table headers, table notes, and result summaries; remove forbidden aliases from reader-facing LaTeX instead of letting legacy metric names drift.
 - Run `.lab/.managed/scripts/validate_metric_glossary.py` in metric-bearing draft, final-draft, or export rounds and record the result in the latest write iteration artifact.
 - Do not treat `\resizebox{\linewidth}{!}{...}` as the default main-table fit strategy.
-- Fit paper-facing main tables by redesign first: shorten headers, move secondary metrics out of the main table, reduce or split columns, then adjust `\tabcolsep` conservatively; only use `\resizebox` as a last resort and document why.
+- Wide plain `tabular` layouts with many columns are not manuscript-ready by default; prefer `tabularx` or bounded `p{...}` columns for text-heavy or multi-metric tables.
+- Fit paper-facing main tables by redesign first: shorten headers, move secondary metrics out of the main table, reduce or split columns, prefer `tabularx` or bounded columns, then adjust `\tabcolsep` conservatively; only use `\resizebox` as a last resort and document why.
 - Keep `\tabcolsep` adjustments conservative and avoid shrinking below a roughly readable floor for paper-facing main tables.
 - Do not rely on `\scriptsize` or `\tiny` as the default way to make a main table fit.
 - Keep internal identifiers, tuning-run labels, probe names, config strings, rerun ids, and package labels out of prose unless they are mapped once for the reader and then moved back out of prose.
+- Keep internal experiment planning out of manuscript prose: future holdout expansion, small-batch gates, payload freezing, API budgets, automation decisions, and overfitting triage logic belong in lab artifacts, not paper-facing sections.
 - Do not rely on unexplained jargon density as a substitute for academic tone.
 - Bind each claim to evidence from `report`, iteration reports, or normalized summaries.
 - Use the write-stage contract in `.codex/skills/lab/stages/write.md` or `.claude/skills/lab/stages/write.md` as the single source of truth for template choice, paper-plan requirements, section-specific references, validator calls, asset coverage, and final manuscript gates.
@@ -314,6 +321,7 @@ Use this skill when the user invokes `/lab:*` or asks for the structured researc
 - No auto start without an explicit autonomy level and `Approval status: approved`.
 - No final report without validated normalized results.
 - No paper-writing round without stable report artifacts, an approved framing artifact, evidence links, and LaTeX manuscript output.
+- No stage-final handoff without a validated plain-language stage report.
 - No final-draft or export round without passing section-quality, claim-safety, and manuscript-delivery validation.
 - No final-draft or export round with mismatched `workflow_language` and `paper_language` unless the latest write iteration records the language decision audit that justified the final manuscript language and the persisted workflow-language paper-layer path.

package/package-assets/shared/skills/lab/references/paper-writing/section-style-policies.md CHANGED Viewed

@@ -122,6 +122,7 @@ These are paper-facing defaults. They are not project-specific branding rules.
 - Self-evaluations such as "结果也很清楚", "the defense results are very clear", or "the table is self-explanatory".
 - Layout-process commentary in scientific prose, such as "由于表列较多，这里采用页宽自适应排版" or "we use page-width adaptive layout here".
 - Claims that a table "proves" something when the evidence only supports a bounded empirical result.
+- Internal experiment-planning prose, such as "还需要新增 holdout", "小批量门控", "冻结 payload", "不能边跑边调", "API 规模估计", or "if all scores are 1.0000, treat it as overfitting".
 - Service-style or AI-assistant meta language such as "用户说", "按你的要求", "我来解释", "let me explain", or "as requested by the user".
 - Workflow-only placeholder language such as "图的意图", "资产意图", "占位符", "workflow-language", or "sync this wording".

package/package-assets/shared/skills/lab/stages/auto.md CHANGED Viewed

@@ -194,3 +194,9 @@
 - If the user chooses to convert, persist `paper_language_finalization_decision: convert-to-paper-language`
 - While the real experiment process is still alive, emit only a progress update and keep waiting. Do not present a terminal summary for that rung until the process exits or the rung hits an explicit stop boundary.
 - While the loop is healthy, do not ask the user to trigger the next poll. Keep polling until a meaningful change, keepalive boundary, stop boundary, escalation boundary, or terminal boundary is reached.
+## Stage Report Closeout
+- At every stop, failure, escalation, or final handoff, write or update `.lab/stage-reports/<date>--auto--<target>.md` from `.lab/.managed/templates/stage-report.md`.
+- Fill the `Core Explanation Table` in plain language: background, why now, what ran, how the loop ran, what worked, what did not work, what was verified, what remains unverified, what needs improvement and why, how to improve and why, key evidence, and the continue/stop/revise/rerun/escalate/handoff decision.
+- Run `.lab/.managed/scripts/validate_stage_report.py --stage-report <stage-report> --stage auto` and include the report path plus validation result in the final user-facing summary.

package/package-assets/shared/skills/lab/stages/data.md CHANGED Viewed

@@ -66,3 +66,9 @@
 6. Recommended approved dataset package
 7. Risks and exclusions
 8. Approval gate
+## Stage Report Closeout
+- Before final handoff, write or update `.lab/stage-reports/<date>--data--<target>.md` from `.lab/.managed/templates/stage-report.md`.
+- Fill the `Core Explanation Table` in plain language: background, why now, what changed, how the dataset package was chosen, what worked, what did not work, what was verified, what remains unverified, what needs improvement and why, how to improve and why, key evidence, and the continue/stop/revise/rerun/escalate/handoff decision.
+- Run `.lab/.managed/scripts/validate_stage_report.py --stage-report <stage-report> --stage data` and include the report path plus validation result in the final user-facing summary.

package/package-assets/shared/skills/lab/stages/framing.md CHANGED Viewed

@@ -69,3 +69,9 @@
 5. Recommended framing pack
 6. Forbidden claims and wording
 7. Approval gate
+## Stage Report Closeout
+- Before final handoff, write or update `.lab/stage-reports/<date>--framing--<target>.md` from `.lab/.managed/templates/stage-report.md`.
+- Fill the `Core Explanation Table` in plain language: background, why now, what naming or framing changed, how it was checked, what worked, what did not work, what was verified, what remains unverified, what needs improvement and why, how to improve and why, key evidence, and the continue/stop/revise/rerun/escalate/handoff decision.
+- Run `.lab/.managed/scripts/validate_stage_report.py --stage-report <stage-report> --stage framing` and include the report path plus validation result in the final user-facing summary.

package/package-assets/shared/skills/lab/stages/idea.md CHANGED Viewed

@@ -119,6 +119,12 @@
 28. Minimum viable experiment
 29. Idea source log aligned with the two literature sweeps
+## Stage Report Closeout
+- Before final handoff, write or update `.lab/stage-reports/<date>--idea--<target>.md` from `.lab/.managed/templates/stage-report.md`.
+- Fill the `Core Explanation Table` in plain language: background, why now, what idea work was done, how sources and brainstorm passes were used, what worked, what did not work, what was verified, what remains unverified, what needs improvement and why, how to improve and why, key evidence, and the continue/stop/revise/rerun/escalate/handoff decision.
+- Run `.lab/.managed/scripts/validate_stage_report.py --stage-report <stage-report> --stage idea` and include the report path plus validation result in the final user-facing summary.
 ## Writing Standard
 - Keep the problem statement short, concrete, and easy to scan.

package/package-assets/shared/skills/lab/stages/iterate.md CHANGED Viewed

@@ -78,3 +78,9 @@ If the loop stops without success, record:
 - If the next move depends on an unresolved assumption, ask one clarifying question at a time.
 - If more than one next hypothesis is credible, present 2-3 approaches with trade-offs and recommend the next bounded experiment before changing the mission state.
 - Keep an approval gate when a proposed change would alter the frozen mission instead of only changing the implementation hypothesis.
+## Stage Report Closeout
+- Before final handoff, write or update `.lab/stage-reports/<date>--iterate--<target>.md` from `.lab/.managed/templates/stage-report.md`.
+- Fill the `Core Explanation Table` in plain language: background, why now, what rounds ran, how the loop evaluated them, what worked, what did not work, what was verified, what remains unverified, what needs improvement and why, how to improve and why, key evidence, and the continue/stop/revise/rerun/escalate/handoff decision.
+- Run `.lab/.managed/scripts/validate_stage_report.py --stage-report <stage-report> --stage iterate` and include the report path plus validation result in the final user-facing summary.

package/package-assets/shared/skills/lab/stages/report.md CHANGED Viewed

@@ -10,6 +10,7 @@
 - method overview
 - selected metrics summary
 - plain-language metric guide
+- metric computation guide that explains what is evaluated, which test-set predictions are used, whether examples are sorted or grouped, how values are aggregated or approximately calculated, metric direction and scale, and comparability boundaries
 - background sources
 - method and baseline sources
 - metric sources
@@ -52,6 +53,8 @@
 - Do not restate metric definitions, baseline behavior, or comparison implementations from memory; use the approved evaluation protocol and its recorded sources.
 - Carry the approved `Primary metrics`, `Secondary metrics`, and `Required terminal evidence` into both the report and the managed main-tables artifact.
 - Explain the selected primary and secondary metrics in plain language for the user: what each metric measures, whether higher or lower is better, and whether it is a main result metric or only a health/support metric.
+- For every primary metric, also explain enough of the computation for a collaborator to reproduce the idea without reading code: what is evaluated, which test-set predictions or scores are used, whether the examples are sorted, bucketed, grouped, or paired, how the resulting values are aggregated or approximately calculated, what direction and scale mean, and which comparisons are invalid across datasets, splits, or metric implementations.
+- If a metric depends on ranking, the report must name the ranking score and the order. If it depends on a contrast, the report must name the compared conditions or groups. If it depends on an average, rate, area, threshold crossing, or recovery amount, the report must give a simple calculation sketch.
 - If coverage, completeness, confidence, or similar health metrics appear, explicitly say that they describe experimental reliability rather than the main scientific effect.
 - Pull the core background references, method or baseline references, and metric references out of the approved evaluation protocol instead of hiding them in `.lab/context/*`.
 - Treat `report.md` as an external-review-ready memo. Source sections must not rely on local file paths or internal provenance notes; they must give a few human-readable anchor references instead.
@@ -87,3 +90,9 @@
 - If a missing assumption would change report interpretation, ask one clarifying question at a time.
 - If there are multiple defensible report framings, present 2-3 approaches with trade-offs and recommend the most evidence-faithful framing before writing.
 - Keep an approval gate when the reporting frame would materially affect what the paper later claims.
+## Stage Report Closeout
+- Before final handoff, write or update `.lab/stage-reports/<date>--report--<target>.md` from `.lab/.managed/templates/stage-report.md`.
+- Fill the `Core Explanation Table` in plain language: background, why now, what report artifacts were produced, how evidence was carried forward, what worked, what did not work, what was verified, what remains unverified, what needs improvement and why, how to improve and why, key evidence, and the continue/stop/revise/rerun/escalate/handoff decision.
+- Run `.lab/.managed/scripts/validate_stage_report.py --stage-report <stage-report> --stage report` and include the report path plus validation result in the final user-facing summary.

package/package-assets/shared/skills/lab/stages/review.md CHANGED Viewed

@@ -58,3 +58,9 @@
 - If there are multiple legitimate review framings, present 2-3 approaches with trade-offs and recommend the strictest useful framing.
 - Do not use brainstorming to soften critique; once scope is clear, stay in reviewer mode and deliver findings directly.
 - Call out the strongest remaining alternative explanation and the strongest boundary risk when either one could materially narrow the claim.
+## Stage Report Closeout
+- Before final handoff, write or update `.lab/stage-reports/<date>--review--<target>.md` from `.lab/.managed/templates/stage-report.md`.
+- Fill the `Core Explanation Table` in plain language: background, why now, what was reviewed, how the review was performed, what worked, what did not work, what was verified, what remains unverified, what needs improvement and why, how to improve and why, key evidence, and the continue/stop/revise/rerun/escalate/handoff decision.
+- Run `.lab/.managed/scripts/validate_stage_report.py --stage-report <stage-report> --stage review` and include the report path plus validation result in the final user-facing summary.

package/package-assets/shared/skills/lab/stages/run.md CHANGED Viewed

@@ -55,3 +55,9 @@
 - If the next run depends on an unresolved assumption, ask one clarifying question at a time.
 - If there are multiple defensible tiny-run options, present 2-3 approaches with trade-offs and recommend the cheapest informative run.
 - Only ask for approval when choosing a run path would materially spend more time or compute than the default smallest experiment.
+## Stage Report Closeout
+- Before final handoff, write or update `.lab/stage-reports/<date>--run--<target>.md` from `.lab/.managed/templates/stage-report.md`.
+- Fill the `Core Explanation Table` in plain language: background, why now, what ran, how it ran, what worked, what did not work, what was verified, what remains unverified, what needs improvement and why, how to improve and why, key evidence, and the continue/stop/revise/rerun/escalate/handoff decision.
+- Run `.lab/.managed/scripts/validate_stage_report.py --stage-report <stage-report> --stage run` and include the report path plus validation result in the final user-facing summary.

package/package-assets/shared/skills/lab/stages/spec.md CHANGED Viewed

@@ -72,3 +72,9 @@
 - evaluation normalization
 - bounded iteration
 - final report
+## Stage Report Closeout
+- Before final handoff, write or update `.lab/stage-reports/<date>--spec--<target>.md` from `.lab/.managed/templates/stage-report.md`.
+- Fill the `Core Explanation Table` in plain language: background, why now, what change artifacts were created, how the spec was structured, what worked, what did not work, what was verified, what remains unverified, what needs improvement and why, how to improve and why, key evidence, and the continue/stop/revise/rerun/escalate/handoff decision.
+- Run `.lab/.managed/scripts/validate_stage_report.py --stage-report <stage-report> --stage spec` and include the report path plus validation result in the final user-facing summary.

package/package-assets/shared/skills/lab/stages/write.md CHANGED Viewed

@@ -165,6 +165,8 @@ Do not enter prose polish until the current section has passed the reference-con
 - Do not use labels containing `_` or `-` in reader-facing prose.
 - Keep internal identifiers, config keys, and experiment package labels out of reader-facing prose unless they are mapped once for the reader and then moved back out of prose.
 - Keep run provenance such as tuning-run labels, probe names, internal config strings, rerun ids, and package labels out of reader-facing prose. If the evidence is useful, rewrite it as a bounded paper-facing diagnostic or move the raw provenance to workflow notes or appendix metadata.
+- Keep internal experiment planning out of reader-facing prose. Do not write paper sentences that explain future holdout expansion, small-batch gates, payload freezing, API budget, "if all scores are 1.0000 then treat as overfitting", or why a next automation round is needed.
+- When an experiment boundary matters, report only the scientific scope already supported by the evidence. Put the operational plan for collecting new attacks, new papers, new markers, or additional holdout cases into `.lab/changes/`, `.lab/iterations/`, or report artifacts, not into manuscript sections.
 - Do not use unexplained terminology density as a substitute for academic tone.
 - Keep service-style or AI-assistant meta language out of manuscript prose. Phrases such as "用户说", "按你的要求", "我来解释", "下面我", "this version", or "as requested by the user" belong in workflow notes, not in paper-facing sections, captions, table notes, or analysis assets.
 - Keep workflow-only placeholder language out of manuscript prose. Phrases such as "图的意图", "资产意图", "占位符", "workflow-language", "translation layer", or "sync this wording" belong in authoring artifacts, not in reader-facing LaTeX.
@@ -178,10 +180,12 @@ Do not enter prose polish until the current section has passed the reference-con
 - If a metric's denominator, event condition, score scale, or comparison scope differs by setting, define a separate entry or explicitly scope the metric in `.lab/writing/metric-glossary.md`.
 - Deprecated or forbidden metric aliases must be removed from reader-facing LaTeX instead of explained away locally.
 - Do not treat `\resizebox{\linewidth}{!}{...}` as the default way to fit a main table.
-- Main-table width control should follow this order: shorten headers while preserving local explanations, move secondary metrics to appendix-only, reduce or split columns, adjust `\tabcolsep` conservatively, and only then consider `\resizebox` as a last resort.
+- Wide plain `tabular` layouts with many columns are not manuscript-ready by default; final/export validation should force a width-aware table design instead of silently accepting likely overfull tables.
+- Main-table width control should follow this order: shorten headers while preserving local explanations, move secondary metrics to appendix-only, reduce or split columns, prefer `tabularx` or bounded `p{...}` columns, adjust `\tabcolsep` conservatively, and only then consider `\resizebox` as a last resort.
 - When `\tabcolsep` is adjusted for a paper-facing main table, keep it in a safe range and avoid shrinking below roughly `3pt`; prefer `4pt` or `5pt` when a small reduction is enough.
 - Do not use `\scriptsize` or `\tiny` as the default main-table fit strategy. If a table only fits after aggressive font shrinking, redesign the table instead of forcing it into the page.
 - If a paper-facing main table uses `\resizebox` or non-default width control, explain the width-control rationale in the same table note.
+- Prefer `tabularx` for paper-facing main tables whose first column or text-heavy columns need bounded line wrapping; use plain `tabular` only for compact tables with a small column count.
 - Every main table should have a short table-introduction sentence before it and a short interpretation sentence after it so the reader knows what question the table answers and how to read the result.
 - Build the paper asset plan before prose when the section carries introduction, experimental, method, related-work, or conclusion claims:
   - record the asset coverage targets and gaps for the current paper
@@ -221,6 +225,7 @@ Do not enter prose polish until the current section has passed the reference-con
 - Table assets must also include a local table note that explains row meaning, column meaning, metric definitions, comparison scope, and any important caveat.
 - The local table note must contain real reader-facing explanations, not the default template phrases such as "explain what each row represents" or "expand local abbreviations".
 - Table assets must not rely on aggressive width hacks by default; if width control is still needed after table redesign, document it locally and keep it readable.
+- Table assets with seven or more columns should be split, moved partly to appendix, or written with width-aware columns such as `tabularx` or `p{...}` instead of a plain `tabular` layout.
 - Figure placeholders may record what the final figure should show and why the reader needs it in authoring comments, the paper plan, or the write-iteration artifact, but the caption itself must remain paper-facing and must not contain "Figure intent", "图的意图", "asset intent", "占位符", or similar workflow language.
 - Core asset coverage for a paper-facing final draft should include a problem-setting or teaser figure, a method overview figure, a results overview figure, a main-results table, an ablation table, and one additional analysis asset.
 - Keep `.lab/writing/plan.md` synchronized with the current table plan, figure plan, citation plan, and section-to-asset map whenever manuscript assets change.
@@ -298,3 +303,9 @@ Do not enter prose polish until the current section has passed the reference-con
 - If the user asks to continue tightening the same section, default to a section-level acceptance review first instead of another immediate prose-polish pass.
 - Only recommend another tighten/compress/polish pass after the current section has passed the section-level acceptance gate.
 - If the round introduces or revises key terms, abbreviations, metrics, or mechanism names, include a short terminology note in the final user-facing response that says the full form, approved short form if any, what each term is, and why it matters here, and point to `.lab/writing/terminology-glossary.md` plus the write iteration artifact for the full terminology audit.
+## Stage Report Closeout
+- Before final handoff, write or update `.lab/stage-reports/<date>--write--<target>.md` from `.lab/.managed/templates/stage-report.md`.
+- Fill the `Core Explanation Table` in plain language: background, why now, what section or asset changed, how evidence and writing rules were applied, what worked, what did not work, what was verified, what remains unverified, what needs improvement and why, how to improve and why, key evidence, and the continue/stop/revise/rerun/escalate/handoff decision.
+- Run `.lab/.managed/scripts/validate_stage_report.py --stage-report <stage-report> --stage write` and include the report path plus validation result in the final user-facing summary.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "superlab",
-  "version": "0.1.69",
+  "version": "0.1.71",
   "description": "Strict /lab research workflow installer for Codex and Claude",
   "keywords": [
     "codex",