npm - superlab - Versions diffs - 0.1.18 → 0.1.20 - Mend

superlab 0.1.18 → 0.1.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/bin/superlab.cjs +8 -0
package/lib/auto_runner.cjs +7 -5
package/lib/context.cjs +626 -0
package/lib/i18n.cjs +61 -0
package/package-assets/shared/lab/.managed/templates/final-report.md +38 -0
package/package-assets/shared/lab/context/eval-protocol.md +1 -0
package/package-assets/shared/lab/context/mission.md +2 -0
package/package-assets/shared/skills/lab/SKILL.md +8 -3
package/package-assets/shared/skills/lab/stages/auto.md +4 -0
package/package-assets/shared/skills/lab/stages/idea.md +3 -0
package/package-assets/shared/skills/lab/stages/iterate.md +2 -0
package/package-assets/shared/skills/lab/stages/report.md +15 -0
package/package-assets/shared/skills/lab/stages/run.md +3 -0
package/package.json +1 -1

package/bin/superlab.cjs CHANGED Viewed

@@ -12,6 +12,8 @@ const {
 } = require("../lib/install.cjs");
 const {
   archiveContext,
+  collaboratorEvalIssues,
+  collaboratorMissionIssues,
   pruneContext,
   refreshContext,
 } = require("../lib/context.cjs");
@@ -788,6 +790,8 @@ function printDoctor(options) {
   const templateIssues = validatePaperTemplateRoot(options.targetDir, config);
   const dataDecisionIssues = validateDataDecisions(options.targetDir);
   const evalProtocolIssues = validateEvalProtocol(options.targetDir);
+  const missionContextIssues = collaboratorMissionIssues(options.targetDir);
+  const collaboratorProtocolIssues = collaboratorEvalIssues(options.targetDir);
   const rootIssues = validateProjectRoots(options.targetDir, config);
   const autoStatus = getAutoStatus({ targetDir: options.targetDir });
   const autoIssues = autoStatus.issues;
@@ -806,6 +810,8 @@ function printDoctor(options) {
     templateIssues.length > 0 ||
     dataDecisionIssues.length > 0 ||
     evalProtocolIssues.length > 0 ||
+    missionContextIssues.length > 0 ||
+    collaboratorProtocolIssues.length > 0 ||
     rootIssues.length > 0 ||
     autoIssues.length > 0
   ) {
@@ -820,6 +826,8 @@ function printDoctor(options) {
       templateIssues,
       dataDecisionIssues,
       evalProtocolIssues,
+      missionContextIssues,
+      collaboratorProtocolIssues,
       rootIssues,
       autoIssues
     );

package/lib/auto_runner.cjs CHANGED Viewed

@@ -253,7 +253,7 @@ async function startAutoMode({ targetDir, now = new Date() }) {
   const maxFailures = parseInteger(mode.maxFailures, 0);
   const maxIterations = parseInteger(mode.maxIterations, 1);
   const requiredArtifact = resolveRequiredArtifact(targetDir, mode.requiredTerminalArtifact);
-  const frozenCoreSnapshot = snapshotFrozenCore(targetDir, mode.frozenCore);
+  let frozenCoreSnapshot = snapshotFrozenCore(targetDir, mode.frozenCore);
   const { loopStages, finalStages } = splitAutoStages(mode.allowedStages);
   const executedStages = [];
   let failureCount = 0;
@@ -363,13 +363,14 @@ async function startAutoMode({ targetDir, now = new Date() }) {
           nextRung,
           decision: rungId ? `completed rung ${rungId}` : `completed stage ${stage}`,
         });
-        refreshContext({ targetDir });
         const frozenCoreChanges = detectFrozenCoreChanges(frozenCoreSnapshot);
         if (frozenCoreChanges.length > 0) {
           failAutoMode(`frozen core changed: ${frozenCoreChanges.join(", ")}`);
         }
+        refreshContext({ targetDir });
+        frozenCoreSnapshot = snapshotFrozenCore(targetDir, mode.frozenCore);
         const stopCheck = await runCheckCommand({
           targetDir,
           label: `stop check after ${rungId || stage}`,
@@ -437,12 +438,13 @@ async function startAutoMode({ targetDir, now = new Date() }) {
       decision: `promotion policy matched after ${label}`,
     });
     promotionApplied = true;
-    refreshContext({ targetDir });
-    verifyPromotionWriteback(targetDir, promotionSnapshot);
     const frozenCoreChangesAfterPromotion = detectFrozenCoreChanges(frozenCoreSnapshot);
     if (frozenCoreChangesAfterPromotion.length > 0) {
       failAutoMode(`frozen core changed: ${frozenCoreChangesAfterPromotion.join(", ")}`);
     }
+    refreshContext({ targetDir });
+    verifyPromotionWriteback(targetDir, promotionSnapshot);
+    frozenCoreSnapshot = snapshotFrozenCore(targetDir, mode.frozenCore);
   };
   if (evalProtocol.experimentRungs.length > 0) {

package/lib/context.cjs CHANGED Viewed

@@ -2,6 +2,69 @@ const fs = require("node:fs");
 const path = require("node:path");
 const { parseEvalProtocol } = require("./eval_protocol.cjs");
+const PLACEHOLDER_VALUES = new Set(["", "tbd", "none", "待补充", "无"]);
+const MISSION_COLLABORATOR_FIELDS = [
+  { name: "One-sentence problem", labels: ["One-sentence problem", "一句话问题"] },
+  { name: "Why it matters", labels: ["Why it matters", "为什么重要"] },
+  { name: "Primary metric", labels: ["Primary metric", "主指标"] },
+  { name: "Success threshold", labels: ["Success threshold", "成功阈值"] },
+  { name: "Dataset or benchmark scope", labels: ["Dataset or benchmark scope", "数据集或 benchmark 范围"] },
+  { name: "Approved direction", labels: ["Approved direction", "已批准方向"] },
+];
+const EVAL_COLLABORATOR_FIELDS = [
+  { name: "Primary evaluation objective", labels: ["Primary evaluation objective", "主评估目标"] },
+  { name: "Primary metrics", labels: ["Primary metrics", "主指标"] },
+  { name: "Secondary metrics", labels: ["Secondary metrics", "次级指标"] },
+  { name: "Table plan", labels: ["Table plan", "主表计划"] },
+  { name: "Metric glossary", labels: ["Metric glossary", "指标释义"] },
+  { name: "Background sources", labels: ["Background sources", "背景来源"] },
+  {
+    name: "Method and baseline source papers",
+    labels: ["Method and baseline source papers", "方法与基线来源论文"],
+  },
+  {
+    name: "Method and baseline implementation source",
+    labels: ["Method and baseline implementation source", "方法与基线实现来源"],
+  },
+  { name: "Metric source papers", labels: ["Metric source papers", "指标来源论文"] },
+  { name: "Required output artifacts", labels: ["Required output artifacts", "必要输出工件"] },
+];
+const REPORT_FIELDS = {
+  problem: ["Research problem in plain language", "研究问题白话解释", "研究问题"],
+  whyItMatters: ["Why this problem matters", "为什么这个问题重要"],
+  setting: ["What setting or workflow this report is actually about", "这份报告实际对应的场景或流程"],
+  primaryMetrics: ["Primary metrics", "主指标"],
+  secondaryMetrics: ["Secondary metrics", "次级指标"],
+  requiredTerminalEvidence: ["Required terminal evidence", "必要终局证据"],
+  metricGuidePrimary: ["Primary metric plain-language explanation", "主指标白话解释"],
+  metricGuideSecondary: ["Secondary metric plain-language explanation", "次级指标白话解释"],
+  metricGuideSupport: [
+    "Health or support metrics and why they are not the main claim",
+    "健康度或支持性指标以及它们为什么不是主 claim",
+  ],
+  backgroundSources: ["Most important background papers or benchmark references", "最重要的背景论文或 benchmark 参考"],
+  backgroundAnchors: ["Why these are the right background anchors", "为什么这些是合适的背景锚点"],
+  methodBasis: ["Our method source or implementation basis", "我们的方法来源或实现基础"],
+  baselineSourcePapers: ["Baseline and comparison source papers", "基线与对比方法来源论文"],
+  baselineImplementationSources: [
+    "Baseline and comparison implementation sources",
+    "基线与对比方法实现来源",
+  ],
+  metricSourcePapers: ["Metric source papers", "指标来源论文"],
+  metricImplementationSource: ["Metric implementation source", "指标实现来源"],
+  metricDeviation: ["Deviation from original implementation", "与原始实现的偏差"],
+  datasets: ["Datasets", "数据集"],
+  baselines: ["Baselines", "基线"],
+  metrics: ["Metrics", "指标"],
+  finalPerformanceSummary: ["Final performance summary", "最终表现总结"],
+  tableCoverage: ["Table coverage", "表格覆盖范围"],
+};
+const TERMINOLOGY_FIELDS = {
+  methodName: ["Method name", "方法名"],
+  shortName: ["Short name or acronym", "简称或缩写"],
+  contributionBullets: ["Contribution bullets", "贡献 bullets", "Contribution bullets："],
+};
 function contextFile(targetDir, name) {
   return path.join(targetDir, ".lab", "context", name);
 }
@@ -58,6 +121,278 @@ function joinNonEmpty(parts, separator = "; ") {
   return parts.filter(Boolean).join(separator);
 }
+function isMeaningful(value) {
+  return !PLACEHOLDER_VALUES.has((value || "").trim().toLowerCase());
+}
+function readWorkflowConfig(targetDir) {
+  const configPath = path.join(targetDir, ".lab", "config", "workflow.json");
+  if (!fs.existsSync(configPath)) {
+    return {};
+  }
+  try {
+    return JSON.parse(fs.readFileSync(configPath, "utf8"));
+  } catch {
+    return {};
+  }
+}
+function resolveProjectPath(targetDir, configuredPath) {
+  if (!configuredPath || typeof configuredPath !== "string") {
+    return "";
+  }
+  return path.resolve(targetDir, configuredPath);
+}
+function getCollaboratorDeliverablePaths(targetDir) {
+  const config = readWorkflowConfig(targetDir);
+  const deliverablesRoot = resolveProjectPath(targetDir, config.deliverables_root || "docs/research");
+  return {
+    deliverablesRoot,
+    reportPath: path.join(deliverablesRoot, "report.md"),
+    mainTablesPath: path.join(deliverablesRoot, "main-tables.md"),
+  };
+}
+function hasCollaboratorFacingDeliverables(targetDir) {
+  const { reportPath, mainTablesPath } = getCollaboratorDeliverablePaths(targetDir);
+  return fs.existsSync(reportPath) || fs.existsSync(mainTablesPath);
+}
+function missingCollaboratorFields(text, fields) {
+  return fields.filter((field) => !isMeaningful(extractValue(text, field.labels))).map((field) => field.name);
+}
+function collaboratorMissionIssues(targetDir) {
+  if (!hasCollaboratorFacingDeliverables(targetDir)) {
+    return [];
+  }
+  const mission = readFileIfExists(contextFile(targetDir, "mission.md"));
+  if (!mission) {
+    return [];
+  }
+  const missing = missingCollaboratorFields(mission, MISSION_COLLABORATOR_FIELDS);
+  return missing.length > 0
+    ? [`mission context is still skeletal for collaborator-facing reporting: ${missing.join(", ")}`]
+    : [];
+}
+function collaboratorEvalIssues(targetDir) {
+  if (!hasCollaboratorFacingDeliverables(targetDir)) {
+    return [];
+  }
+  const protocol = readFileIfExists(contextFile(targetDir, "eval-protocol.md"));
+  if (!protocol) {
+    return [];
+  }
+  const missing = missingCollaboratorFields(protocol, EVAL_COLLABORATOR_FIELDS);
+  return missing.length > 0
+    ? [`evaluation protocol is still skeletal for collaborator-facing reporting: ${missing.join(", ")}`]
+    : [];
+}
+function extractReportValue(reportText, key) {
+  return extractValue(reportText, REPORT_FIELDS[key] || []);
+}
+function mergePreferred(existingValue, ...candidates) {
+  if (isMeaningful(existingValue)) {
+    return existingValue;
+  }
+  for (const candidate of candidates) {
+    if (isMeaningful(candidate)) {
+      return candidate;
+    }
+  }
+  return "";
+}
+function firstMetric(metrics) {
+  return (metrics || "")
+    .split(/[;,]/)
+    .map((value) => value.trim())
+    .filter(Boolean)[0] || "";
+}
+function buildMissionContextText(lang, fields) {
+  if (lang === "zh") {
+    return `# 研究主线
+## 核心问题
+- 一句话问题: ${fields.problem || "待补充"}
+- 为什么重要: ${fields.whyItMatters || "待补充"}
+- 目标失败场景: ${fields.targetFailureCase || "待补充"}
+## 成功标准
+- 主指标: ${fields.primaryMetric || "待补充"}
+- 成功阈值: ${fields.successThreshold || "待补充"}
+- 必须对比的 baseline: ${fields.requiredBaselineComparison || "待补充"}
+- 最小证据要求: ${fields.minimumEvidenceRequirement || "待补充"}
+## 冻结边界
+- 数据集或 benchmark 范围: ${fields.datasetScope || "待补充"}
+- 切分策略: ${fields.splitPolicy || "待补充"}
+- 评估协议: ${fields.evaluationProtocol || "待补充"}
+- 硬约束: ${fields.hardConstraints || "待补充"}
+## 当前状态
+- 已批准方向: ${fields.approvedDirection || "待补充"}
+- 当前 owner 或会话: ${fields.currentOwner || "待补充"}
+- 最近一次允许更新 mission 的阶段: ${fields.latestStage || "待补充"}
+- 回填来源: ${fields.hydrationProvenance || "待补充"}
+- 协作者可读状态: ${fields.collaboratorReadyStatus || "待补充"}
+`;
+  }
+  return `# Research Mission
+## Core Problem
+- One-sentence problem: ${fields.problem || "TBD"}
+- Why it matters: ${fields.whyItMatters || "TBD"}
+- Target failure case: ${fields.targetFailureCase || "TBD"}
+## Success Criteria
+- Primary metric: ${fields.primaryMetric || "TBD"}
+- Success threshold: ${fields.successThreshold || "TBD"}
+- Required baseline comparison: ${fields.requiredBaselineComparison || "TBD"}
+- Minimum evidence requirement: ${fields.minimumEvidenceRequirement || "TBD"}
+## Frozen Boundaries
+- Dataset or benchmark scope: ${fields.datasetScope || "TBD"}
+- Split policy: ${fields.splitPolicy || "TBD"}
+- Evaluation protocol: ${fields.evaluationProtocol || "TBD"}
+- Hard constraints: ${fields.hardConstraints || "TBD"}
+## Current Status
+- Approved direction: ${fields.approvedDirection || "TBD"}
+- Current owner or session: ${fields.currentOwner || "TBD"}
+- Latest stage to update this mission: ${fields.latestStage || "TBD"}
+- Hydration provenance: ${fields.hydrationProvenance || "TBD"}
+- Collaborator-ready status: ${fields.collaboratorReadyStatus || "TBD"}
+`;
+}
+function buildEvalProtocolText(lang, fields, rungs) {
+  const rungBlocks = (rungs || [])
+    .map((rung) => {
+      if (lang === "zh") {
+        return `### Rung: ${rung.id}
+- 阶段: ${rung.stage || "待补充"}
+- 目标: ${rung.goal || "待补充"}
+- 命令: ${rung.command || "待补充"}
+- 监视目标: ${rung.watch || "待补充"}
+- gate 命令: ${rung.gate || "待补充"}
+- 通过后: ${rung.onPass || "待补充"}
+- 失败后: ${rung.onFail || "待补充"}
+- 停止后: ${rung.onStop || "待补充"}`;
+      }
+      return `### Rung: ${rung.id}
+- Stage: ${rung.stage || "TBD"}
+- Goal: ${rung.goal || "TBD"}
+- Command: ${rung.command || "TBD"}
+- Watch: ${rung.watch || "TBD"}
+- Gate: ${rung.gate || "TBD"}
+- On pass: ${rung.onPass || "TBD"}
+- On fail: ${rung.onFail || "TBD"}
+- On stop: ${rung.onStop || "TBD"}`;
+    })
+    .join("\n\n");
+  if (lang === "zh") {
+    return `# 评估协议
+用这份文件定义 \`/lab:run\`、\`/lab:iterate\`、\`/lab:auto\` 和 \`/lab:report\` 共用的论文导向评估目标、主表计划、gate 与 benchmark ladder。
+## 主评估目标
+- 主评估目标: ${fields.primaryEvaluationObjective || "待补充"}
+- 主指标: ${fields.primaryMetrics || "待补充"}
+- 次级指标: ${fields.secondaryMetrics || "待补充"}
+- 必要终局证据: ${fields.requiredTerminalEvidence || "待补充"}
+## 主表计划
+- 主表计划: ${fields.tablePlan || "待补充"}
+- 每张表必须支撑的 claims: ${fields.requiredClaimsPerTable || "待补充"}
+## 指标释义
+- 指标释义: ${fields.metricGlossary || "待补充"}
+- 回填来源: ${fields.hydrationProvenance || "待补充"}
+- 背景来源: ${fields.backgroundSources || "待补充"}
+- 方法与基线来源论文: ${fields.methodAndBaselineSourcePapers || "待补充"}
+- 方法与基线实现来源: ${fields.methodAndBaselineImplementationSource || "待补充"}
+- 指标来源论文: ${fields.metricSourcePapers || "待补充"}
+- 指标实现来源: ${fields.metricImplementationSource || "待补充"}
+- 对比方法来源论文: ${fields.comparisonSourcePapers || "待补充"}
+- 对比方法实现来源: ${fields.comparisonImplementationSource || "待补充"}
+- 与原始实现的偏差: ${fields.deviationFromOriginalImplementation || "待补充"}
+## Gate Ladder
+- 实验阶梯: ${fields.experimentLadder || "待补充"}
+- benchmark 阶梯: ${fields.benchmarkLadder || "待补充"}
+- 对比方法 gate: ${fields.comparisonGate || "待补充"}
+- 升格 gate: ${fields.promotionGate || "待补充"}
+- 最小样本量: ${fields.minimumSampleSizes || "待补充"}
+- 必要输出工件: ${fields.requiredOutputArtifacts || "待补充"}
+${rungBlocks}
+`;
+  }
+  return `# Evaluation Protocol
+Use this file to define the paper-facing evaluation target, table plan, gates, and benchmark ladder shared by \`/lab:run\`, \`/lab:iterate\`, \`/lab:auto\`, and \`/lab:report\`.
+## Primary Evaluation Objective
+- Primary evaluation objective: ${fields.primaryEvaluationObjective || "TBD"}
+- Primary metrics: ${fields.primaryMetrics || "TBD"}
+- Secondary metrics: ${fields.secondaryMetrics || "TBD"}
+- Required terminal evidence: ${fields.requiredTerminalEvidence || "TBD"}
+## Table Plan
+- Table plan: ${fields.tablePlan || "TBD"}
+- Required claims per table: ${fields.requiredClaimsPerTable || "TBD"}
+## Metric Glossary
+- Metric glossary: ${fields.metricGlossary || "TBD"}
+- Hydration provenance: ${fields.hydrationProvenance || "TBD"}
+- Background sources: ${fields.backgroundSources || "TBD"}
+- Method and baseline source papers: ${fields.methodAndBaselineSourcePapers || "TBD"}
+- Method and baseline implementation source: ${fields.methodAndBaselineImplementationSource || "TBD"}
+- Metric source papers: ${fields.metricSourcePapers || "TBD"}
+- Metric implementation source: ${fields.metricImplementationSource || "TBD"}
+- Comparison source papers: ${fields.comparisonSourcePapers || "TBD"}
+- Comparison implementation source: ${fields.comparisonImplementationSource || "TBD"}
+- Deviation from original implementation: ${fields.deviationFromOriginalImplementation || "TBD"}
+## Gate Ladder
+- Experiment ladder: ${fields.experimentLadder || "TBD"}
+- Benchmark ladder: ${fields.benchmarkLadder || "TBD"}
+- Comparison gate: ${fields.comparisonGate || "TBD"}
+- Promotion gate: ${fields.promotionGate || "TBD"}
+- Minimum sample sizes: ${fields.minimumSampleSizes || "TBD"}
+- Required output artifacts: ${fields.requiredOutputArtifacts || "TBD"}
+${rungBlocks}
+`;
+}
 function extractClaim(text) {
   const blocks = text
     .split(/\n(?=\d+\.\s)/)
@@ -76,6 +411,264 @@ function labelValue(text, englishLabels, chineseLabels = []) {
   return extractValue(text, [...englishLabels, ...chineseLabels]);
 }
+function collectHydrationSources(targetDir) {
+  const { reportPath, mainTablesPath } = getCollaboratorDeliverablePaths(targetDir);
+  return [
+    fs.existsSync(reportPath) ? path.relative(targetDir, reportPath) : "",
+    fs.existsSync(mainTablesPath) ? path.relative(targetDir, mainTablesPath) : "",
+    readFileIfExists(contextFile(targetDir, "data-decisions.md")) ? ".lab/context/data-decisions.md" : "",
+    readFileIfExists(contextFile(targetDir, "state.md")) ? ".lab/context/state.md" : "",
+    readFileIfExists(contextFile(targetDir, "evidence-index.md")) ? ".lab/context/evidence-index.md" : "",
+  ].filter(Boolean);
+}
+function hydrateMissionContext(targetDir) {
+  if (!hasCollaboratorFacingDeliverables(targetDir)) {
+    return false;
+  }
+  const lang = readWorkflowLanguage(targetDir);
+  const missionText = readFileIfExists(contextFile(targetDir, "mission.md"));
+  const stateText = readFileIfExists(contextFile(targetDir, "state.md"));
+  const evidenceText = readFileIfExists(contextFile(targetDir, "evidence-index.md"));
+  const dataDecisions = readFileIfExists(contextFile(targetDir, "data-decisions.md"));
+  const reportText = readFileIfExists(getCollaboratorDeliverablePaths(targetDir).reportPath);
+  const evalProtocol = parseEvalProtocol(targetDir);
+  const hydrationSources = collectHydrationSources(targetDir);
+  const fields = {
+    problem: mergePreferred(
+      extractValue(missionText, ["One-sentence problem", "一句话问题"]),
+      extractReportValue(reportText, "problem"),
+      extractValue(stateText, ["Current objective", "当前目标", "Current objective"])
+    ),
+    whyItMatters: mergePreferred(
+      extractValue(missionText, ["Why it matters", "为什么重要"]),
+      extractReportValue(reportText, "whyItMatters")
+    ),
+    targetFailureCase: extractValue(missionText, ["Target failure case", "目标失败场景"]),
+    primaryMetric: mergePreferred(
+      extractValue(missionText, ["Primary metric", "主指标"]),
+      evalProtocol.primaryMetrics,
+      extractReportValue(reportText, "primaryMetrics"),
+      firstMetric(evalProtocol.primaryMetrics)
+    ),
+    successThreshold: mergePreferred(
+      extractValue(missionText, ["Success threshold", "成功阈值"]),
+      extractValue(readFileIfExists(contextFile(targetDir, "auto-outcome.md")), ["Terminal goal target", "终止目标目标值"])
+    ),
+    requiredBaselineComparison: extractValue(
+      missionText,
+      ["Required baseline comparison", "必须对比的 baseline"]
+    ),
+    minimumEvidenceRequirement: extractValue(
+      missionText,
+      ["Minimum evidence requirement", "最小证据要求"]
+    ),
+    datasetScope: mergePreferred(
+      extractValue(missionText, ["Dataset or benchmark scope", "数据集或 benchmark 范围"]),
+      extractValue(dataDecisions, ["Approved dataset package", "Approved datasets", "已批准数据集包", "已批准数据集"]),
+      extractReportValue(reportText, "datasets")
+    ),
+    splitPolicy: extractValue(missionText, ["Split policy", "切分策略"]),
+    evaluationProtocol: mergePreferred(
+      extractValue(missionText, ["Evaluation protocol", "评估协议"]),
+      ".lab/context/eval-protocol.md"
+    ),
+    hardConstraints: extractValue(missionText, ["Hard constraints", "硬约束"]),
+    approvedDirection: mergePreferred(
+      extractValue(missionText, ["Approved direction", "已批准方向"]),
+      extractClaim(evidenceText)
+    ),
+    currentOwner: extractValue(missionText, ["Current owner or session", "当前 owner 或会话"]),
+    latestStage: mergePreferred(
+      extractValue(missionText, ["Latest stage to update this mission", "最近一次允许更新 mission 的阶段"]),
+      extractValue(stateText, ["Active stage", "当前阶段", "Stage"])
+    ),
+  };
+  const before = missionText;
+  fields.hydrationProvenance = hydrationSources.length > 0 ? hydrationSources.join("; ") : "";
+  fields.collaboratorReadyStatus =
+    missingCollaboratorFields(buildMissionContextText(lang, fields), MISSION_COLLABORATOR_FIELDS).length === 0
+      ? "hydrated"
+      : "artifact-anchored interim";
+  const nextText = buildMissionContextText(lang, fields);
+  if (nextText !== before) {
+    writeContextFile(targetDir, "mission.md", nextText);
+    return true;
+  }
+  return false;
+}
+function hydrateEvalProtocol(targetDir) {
+  if (!hasCollaboratorFacingDeliverables(targetDir)) {
+    return false;
+  }
+  const lang = readWorkflowLanguage(targetDir);
+  const protocol = parseEvalProtocol(targetDir);
+  const missionText = readFileIfExists(contextFile(targetDir, "mission.md"));
+  const reportText = readFileIfExists(getCollaboratorDeliverablePaths(targetDir).reportPath);
+  const dataDecisions = readFileIfExists(contextFile(targetDir, "data-decisions.md"));
+  const hydrationSources = collectHydrationSources(targetDir);
+  const { reportPath, mainTablesPath } = getCollaboratorDeliverablePaths(targetDir);
+  const requiredOutputs = [
+    fs.existsSync(reportPath) ? path.relative(targetDir, reportPath) : "",
+    fs.existsSync(mainTablesPath) ? path.relative(targetDir, mainTablesPath) : "",
+    ".lab/context/evidence-index.md",
+  ]
+    .filter(Boolean)
+    .join(", ");
+  const fields = {
+    primaryEvaluationObjective: mergePreferred(
+      protocol.primaryEvaluationObjective,
+      extractValue(missionText, ["One-sentence problem", "一句话问题"]),
+      extractReportValue(reportText, "problem")
+    ),
+    primaryMetrics: mergePreferred(
+      protocol.primaryMetrics,
+      extractValue(missionText, ["Primary metric", "主指标"]),
+      extractReportValue(reportText, "primaryMetrics")
+    ),
+    secondaryMetrics: mergePreferred(
+      protocol.secondaryMetrics,
+      extractReportValue(reportText, "secondaryMetrics")
+    ),
+    requiredTerminalEvidence: mergePreferred(
+      protocol.requiredTerminalEvidence,
+      extractReportValue(reportText, "requiredTerminalEvidence"),
+      requiredOutputs
+    ),
+    tablePlan: mergePreferred(
+      protocol.tablePlan,
+      fs.existsSync(mainTablesPath) ? `See ${path.relative(targetDir, mainTablesPath)}` : ""
+    ),
+    requiredClaimsPerTable: protocol.requiredClaimsPerTable,
+    metricGlossary: mergePreferred(
+      protocol.metricGlossary,
+      joinNonEmpty(
+        [
+          extractReportValue(reportText, "metricGuidePrimary"),
+          extractReportValue(reportText, "metricGuideSecondary"),
+          extractReportValue(reportText, "metricGuideSupport"),
+        ],
+        " | "
+      )
+    ),
+    backgroundSources: mergePreferred(
+      protocol.backgroundSources,
+      extractReportValue(reportText, "backgroundSources"),
+      extractValue(dataDecisions, ["Papers that used the approved datasets", "使用过已批准数据集的论文", "使用过该数据集的论文"])
+    ),
+    methodAndBaselineSourcePapers: mergePreferred(
+      protocol.methodAndBaselineSourcePapers,
+      extractReportValue(reportText, "baselineSourcePapers")
+    ),
+    methodAndBaselineImplementationSource: mergePreferred(
+      protocol.methodAndBaselineImplementationSource,
+      extractReportValue(reportText, "baselineImplementationSources")
+    ),
+    metricSourcePapers: mergePreferred(
+      protocol.metricSourcePapers,
+      extractReportValue(reportText, "metricSourcePapers")
+    ),
+    metricImplementationSource: mergePreferred(
+      protocol.metricImplementationSource,
+      extractReportValue(reportText, "metricImplementationSource")
+    ),
+    comparisonSourcePapers: mergePreferred(
+      protocol.comparisonSourcePapers,
+      extractReportValue(reportText, "baselineSourcePapers")
+    ),
+    comparisonImplementationSource: mergePreferred(
+      protocol.comparisonImplementationSource,
+      extractReportValue(reportText, "baselineImplementationSources")
+    ),
+    deviationFromOriginalImplementation: mergePreferred(
+      protocol.deviationFromOriginalImplementation,
+      extractReportValue(reportText, "metricDeviation")
+    ),
+    benchmarkLadder: protocol.benchmarkLadder,
+    experimentLadder: protocol.experimentLadder,
+    comparisonGate: protocol.comparisonGate,
+    promotionGate: protocol.promotionGate,
+    minimumSampleSizes: protocol.minimumSampleSizes,
+    requiredOutputArtifacts: mergePreferred(protocol.requiredOutputArtifacts, requiredOutputs),
+    hydrationProvenance: hydrationSources.length > 0 ? hydrationSources.join("; ") : "",
+  };
+  const nextText = buildEvalProtocolText(lang, fields, protocol.experimentRungs);
+  if (nextText !== protocol.text) {
+    writeContextFile(targetDir, "eval-protocol.md", nextText);
+    return true;
+  }
+  return false;
+}
+function getCollaboratorReportStatus(targetDir) {
+  const missionIssues = collaboratorMissionIssues(targetDir);
+  const evalIssues = collaboratorEvalIssues(targetDir);
+  const issues = missionIssues.concat(evalIssues);
+  if (issues.length > 0) {
+    return {
+      mode: "artifact-anchored interim",
+      readiness: "hydrated but incomplete",
+      reason: issues.join(" | "),
+      issues,
+    };
+  }
+  return {
+    mode: "collaborator-ready",
+    readiness: "ready",
+    reason: "canonical mission and evaluation context are complete enough for collaborator-facing reporting",
+    issues: [],
+  };
+}
+function upsertSection(text, heading, bodyLines) {
+  const sectionText = `${heading}\n\n${bodyLines.join("\n")}`.trimEnd();
+  const pattern = new RegExp(`^${heading.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\s*$[\\s\\S]*?(?=^##\\s|\\Z)`, "m");
+  if (pattern.test(text)) {
+    return text.replace(pattern, sectionText);
+  }
+  return `${text.trimEnd()}\n\n${sectionText}\n`;
+}
+function syncCollaboratorReportStatus(targetDir, status) {
+  const { reportPath } = getCollaboratorDeliverablePaths(targetDir);
+  if (!fs.existsSync(reportPath)) {
+    return false;
+  }
+  const reportText = fs.readFileSync(reportPath, "utf8");
+  const heading = /^#\s/.test(reportText) ? "## Report Status" : "# Report Status";
+  const nextText = upsertSection(reportText, heading, [
+    `- Report mode: ${status.mode}`,
+    `- Canonical context readiness: ${status.readiness}`,
+    `- Why the current mode is appropriate: ${status.reason}`,
+  ]);
+  if (nextText !== reportText) {
+    fs.writeFileSync(reportPath, nextText.trimEnd() + "\n");
+    return true;
+  }
+  return false;
+}
+function hydrateCanonicalContext(targetDir) {
+  const missionChanged = hydrateMissionContext(targetDir);
+  const evalChanged = hydrateEvalProtocol(targetDir);
+  const reportStatus = getCollaboratorReportStatus(targetDir);
+  const reportChanged = syncCollaboratorReportStatus(targetDir, reportStatus);
+  return {
+    missionChanged,
+    evalChanged,
+    reportChanged,
+    reportStatus,
+  };
+}
 function renderSummary(lang, data) {
   if (lang === "zh") {
     return `# 研究摘要
@@ -95,6 +688,11 @@ function renderSummary(lang, data) {
 - Auto stop reason: ${data.autoStopReason || "待补充"}
 - Auto final artifact: ${data.autoFinalArtifact || "待补充"}
 - Auto final rung: ${data.autoFinalRung || "待补充"}
+- Collaborator report mode: ${data.reportMode || "待补充"}
+- Canonical context readiness: ${data.reportReadiness || "待补充"}
+- Why this report mode is active: ${data.reportModeReason || "待补充"}
+- Method name: ${data.methodName || "待补充"}
+- Contribution bullets: ${data.contributionBullets || "待补充"}
 - Eval objective: ${data.evalObjective || "待补充"}
 - Primary metrics: ${data.evalPrimaryMetrics || "待补充"}
 - Secondary metrics: ${data.evalSecondaryMetrics || "待补充"}
@@ -153,6 +751,11 @@ function renderSummary(lang, data) {
 - Auto stop reason: ${data.autoStopReason || "TBD"}
 - Auto final artifact: ${data.autoFinalArtifact || "TBD"}
 - Auto final rung: ${data.autoFinalRung || "TBD"}
+- Collaborator report mode: ${data.reportMode || "TBD"}
+- Canonical context readiness: ${data.reportReadiness || "TBD"}
+- Why this report mode is active: ${data.reportModeReason || "TBD"}
+- Method name: ${data.methodName || "TBD"}
+- Contribution bullets: ${data.contributionBullets || "TBD"}
 - Eval objective: ${data.evalObjective || "TBD"}
 - Primary metrics: ${data.evalPrimaryMetrics || "TBD"}
 - Secondary metrics: ${data.evalSecondaryMetrics || "TBD"}
@@ -266,6 +869,11 @@ ${data.problem || "待补充"}
 - Auto stop reason: ${data.autoStopReason || "待补充"}
 - Auto final artifact: ${data.autoFinalArtifact || "待补充"}
 - Auto final rung: ${data.autoFinalRung || "待补充"}
+- Collaborator report mode: ${data.reportMode || "待补充"}
+- Canonical context readiness: ${data.reportReadiness || "待补充"}
+- Why this report mode is active: ${data.reportModeReason || "待补充"}
+- Method name: ${data.methodName || "待补充"}
+- Contribution bullets: ${data.contributionBullets || "待补充"}
 - Eval objective: ${data.evalObjective || "待补充"}
 - Primary metrics: ${data.evalPrimaryMetrics || "待补充"}
 - Secondary metrics: ${data.evalSecondaryMetrics || "待补充"}
@@ -335,6 +943,11 @@ ${data.problem || "TBD"}
 - Auto stop reason: ${data.autoStopReason || "TBD"}
 - Auto final artifact: ${data.autoFinalArtifact || "TBD"}
 - Auto final rung: ${data.autoFinalRung || "TBD"}
+- Collaborator report mode: ${data.reportMode || "TBD"}
+- Canonical context readiness: ${data.reportReadiness || "TBD"}
+- Why this report mode is active: ${data.reportModeReason || "TBD"}
+- Method name: ${data.methodName || "TBD"}
+- Contribution bullets: ${data.contributionBullets || "TBD"}
 - Eval objective: ${data.evalObjective || "TBD"}
 - Primary metrics: ${data.evalPrimaryMetrics || "TBD"}
 - Secondary metrics: ${data.evalSecondaryMetrics || "TBD"}
@@ -381,11 +994,13 @@ ${data.problem || "TBD"}
 }
 function buildContextSnapshot(targetDir) {
+  const reportStatus = getCollaboratorReportStatus(targetDir);
   const mission = readFileIfExists(contextFile(targetDir, "mission.md"));
   const state = readFileIfExists(contextFile(targetDir, "state.md"));
   const evidence = readFileIfExists(contextFile(targetDir, "evidence-index.md"));
   const questions = readFileIfExists(contextFile(targetDir, "open-questions.md"));
   const dataDecisions = readFileIfExists(contextFile(targetDir, "data-decisions.md"));
+  const terminologyLock = readFileIfExists(contextFile(targetDir, "terminology-lock.md"));
   const autoMode = readFileIfExists(contextFile(targetDir, "auto-mode.md"));
   const autoStatus = readFileIfExists(contextFile(targetDir, "auto-status.md"));
   const autoOutcome = readFileIfExists(contextFile(targetDir, "auto-outcome.md"));
@@ -592,6 +1207,11 @@ function buildContextSnapshot(targetDir) {
     autoStopReason: extractValue(autoOutcome, ["Stop reason", "停止原因"]),
     autoFinalArtifact: extractValue(autoOutcome, ["Final artifact", "最终工件"]),
     autoFinalRung: extractValue(autoOutcome, ["Final rung", "最终 rung"]),
+    reportMode: reportStatus.mode,
+    reportReadiness: reportStatus.readiness,
+    reportModeReason: reportStatus.reason,
+    methodName: extractValue(terminologyLock, TERMINOLOGY_FIELDS.methodName),
+    contributionBullets: extractValue(terminologyLock, TERMINOLOGY_FIELDS.contributionBullets),
     evalObjective: evalProtocol.primaryEvaluationObjective,
     evalPrimaryMetrics: evalProtocol.primaryMetrics,
     evalSecondaryMetrics: evalProtocol.secondaryMetrics,
@@ -619,6 +1239,7 @@ function writeContextFile(targetDir, name, content) {
 }
 function refreshContext({ targetDir }) {
+  hydrateCanonicalContext(targetDir);
   const lang = readWorkflowLanguage(targetDir);
   const snapshot = buildContextSnapshot(targetDir);
   writeContextFile(targetDir, "summary.md", renderSummary(lang, snapshot));
@@ -695,6 +1316,11 @@ function archiveContext({ targetDir, now = new Date() }) {
 module.exports = {
   archiveContext,
+  collaboratorEvalIssues,
+  collaboratorMissionIssues,
+  getCollaboratorReportStatus,
+  hasCollaboratorFacingDeliverables,
+  hydrateCanonicalContext,
   pruneContext,
   refreshContext,
 };

package/lib/i18n.cjs CHANGED Viewed

@@ -289,7 +289,11 @@ const ZH_SKILL_FILES = {
 ## 必要输出
+- 报告状态：collaborator-ready 或 artifact-anchored interim
 - 给用户看的总结
+- 问题与背景的白话说明
+- 数据集场景说明
+- 贡献总结
 - 方法概述
 - 选定指标摘要
 - 指标白话释义
@@ -301,6 +305,7 @@ const ZH_SKILL_FILES = {
 - 失败尝试
 - 局限性
 - 下一步
+- 单独列出的工件状态，而不是混进已验证结果
 ## 上下文读取
@@ -312,6 +317,8 @@ const ZH_SKILL_FILES = {
 ## 上下文写回
+- \`.lab/context/mission.md\`
+- \`.lab/context/eval-protocol.md\`
 - \`.lab/context/state.md\`
 - \`.lab/context/evidence-index.md\`
@@ -324,7 +331,15 @@ const ZH_SKILL_FILES = {
 - 必须把已批准的主指标、次级指标和必要终局证据明确写进 \`report.md\` 与受管的 \`main-tables.md\`。
 - 必须用白话解释选定的主指标和次级指标：每个指标在衡量什么、越高还是越低更好、它是主结果指标还是健康度/支持性指标。
 - 如果出现 coverage、completeness、confidence 或类似健康度指标，必须明确说明这类指标回答的是“实验是否跑稳、证据是否完整”，而不是主要科学效应本身。
+- 要把最关键的背景来源、方法/基线来源和指标来源直接写进报告，不要把它们藏在 \`.lab/context/*\` 里。
+- 如果 \`.lab/context/terminology-lock.md\` 里已经冻结了方法名和 contribution bullets，就必须把它们带进报告。
+- 方法概述必须用协作者能读懂的话说明：我们的方法大致怎么做、相对 closest prior work 或 strongest baseline 改了什么、这些 prior 方法各自做了什么，以及它们为什么在当前 claim 下仍然不够。
+- 只保留少量最关键的 prior work/baseline 锚点；每个锚点都要用一句话交代它做了什么和它的局限。
+- 在起草报告前，先检查 \`.lab/context/mission.md\` 和 \`.lab/context/eval-protocol.md\` 是否仍是模板空壳。
+- 如果 canonical context 还是空壳，要先根据 frozen result artifacts、data-decisions、evidence-index 和已批准上下文回填“最小可信版本”，再写报告。
+- 如果回填后仍缺少协作者可读所需的关键字段，就必须把输出降级成 \`artifact-anchored interim report\`，不能冒充最终协作者报告。
 - 如果报告依赖了对原始指标或原始实现的偏差，必须明确写出这个偏差。
+- workflow 工件状态、rerun id 或 LaTeX 骨架状态不能混进“已验证主结果”；这些内容必须单列到工件状态部分。
 - 如果 workflow language 是中文，\`report.md\` 和 \`<deliverables_root>/main-tables.md\` 也应使用中文，除非文件路径、代码标识符或字面指标名必须保持原样。
 - 解释优先保守，不要写成营销文案。
 - 要给 \`/lab:write\` 留下清晰 handoff，尤其是 section draft 可以直接引用的证据链接。
@@ -334,6 +349,7 @@ const ZH_SKILL_FILES = {
 - 开始前先简洁说明：campaign outcome、选定的主指标和次级指标、最强已支撑 claim、最大的报告风险。
 - 当该阶段由 \`/lab:auto\` 进入时，要主动给出用户可读的白话总结，不要等用户再追问“这些指标是什么意思”或“这些表怎么看”。
 - 把 \`report.md\` 当作给用户看的工件，而不是内部 dump。术语第一次出现时就解释；先讲结论，再讲术语。
+- 把 contribution bullets 当作协作者可读的最终主张摘要，而不是内部 TODO；每条都必须和当前证据边界对齐。
 - 如果某个未决前提会改变报告解释，一次只问一个问题。
 - 如果存在多种报告 framing，先给 2-3 个方案、trade-offs 和推荐项，优先最忠于证据的 framing。
 - 如果某种 framing 会实质影响后续论文 claim，要保留 approval gate。
@@ -691,6 +707,12 @@ const ZH_SKILL_FILES = {
   [path.join(".lab", ".managed", "templates", "final-report.md")]:
 `# 最终报告
+## 报告状态
+- 报告模式：collaborator-ready 或 artifact-anchored interim
+- canonical context 完整度：
+- 为什么当前只能用这个模式：
 ## 给用户看的总结
 - 一句话结论：
@@ -698,6 +720,33 @@ const ZH_SKILL_FILES = {
 - 还没有被证明的内容：
 - 当前最大报告风险：
+## 问题与背景
+- 这项研究在解决什么问题：
+- 为什么这个问题重要：
+- 当前报告到底覆盖了什么 setting 或 workflow：
+## 数据集场景说明
+- 数据集或 benchmark 1 代表什么真实场景：
+- 数据集或 benchmark 2 代表什么真实场景：
+- 数据集或 benchmark 3 代表什么真实场景：
+## 贡献总结
+- Contribution bullets：
+- 当前证据最强的贡献：
+- 仍需要更强证据的贡献：
+## 方法概述
+- 已批准的方法名：
+- 方法白话总结：
+- 相比 prior work 这套方法改变了什么：
+- 最相关的 prior work 或 baseline 锚点：
+- 这些 prior 方法各自做了什么：
+- 为什么这些 prior 方法在这里仍然不够：
 ## 选定指标
 - 主指标：
@@ -740,6 +789,11 @@ const ZH_SKILL_FILES = {
 - 最终表现摘要：
 - 主表覆盖情况：
+## 工件状态
+- 已就绪的交付物或工作流工件：
+- 这些工件状态为什么不是科学结论：
 ## 主要结果
 - 主要发现 1：
@@ -953,6 +1007,7 @@ const ZH_SKILL_FILES = {
 - 一句话问题：
 - 为什么重要：
 - 目标失败场景：
+- 回填来源：
 ## 成功标准
@@ -973,6 +1028,7 @@ const ZH_SKILL_FILES = {
 - 已批准方向：
 - 当前 owner 或会话：
 - 最近更新该 mission 的 stage：
+- 协作者可读状态：
 `,
   [path.join(".lab", "context", "state.md")]:
 `# 工作流状态
@@ -1966,6 +2022,7 @@ ZH_CONTENT[path.join(".lab", "context", "eval-protocol.md")] = `# 评估协议
 ## 指标释义
 - 指标释义：
+- 回填来源：
 - 背景来源：
 - 方法与基线来源论文：
 - 方法与基线实现来源：
@@ -2021,10 +2078,12 @@ ZH_CONTENT[path.join(".codex", "skills", "lab", "stages", "auto.md")] = `# \`/la
 ## 上下文写回
+- \`.lab/context/mission.md\`
 - \`.lab/context/state.md\`
 - \`.lab/context/decisions.md\`
 - \`.lab/context/data-decisions.md\`
 - \`.lab/context/evidence-index.md\`
+- \`.lab/context/eval-protocol.md\`
 - \`.lab/context/summary.md\`
 - \`.lab/context/session-brief.md\`
 - \`.lab/context/auto-status.md\`
@@ -2053,6 +2112,8 @@ ZH_CONTENT[path.join(".codex", "skills", "lab", "stages", "auto.md")] = `# \`/la
   - \`review\` 更新规范审查上下文
   - \`report\` 写出 \`<deliverables_root>/report.md\`
   - \`write\` 写出 \`<deliverables_root>/paper/\` 下的 LaTeX 产物
+- 如果即将进入 \`report\`，而 \`.lab/context/mission.md\` 或 \`.lab/context/eval-protocol.md\` 仍是模板空壳，就先根据冻结工件和已批准上下文回填最小可信版本。
+- 如果回填后仍缺少协作者可读所需的关键字段，就必须强制生成 \`artifact-anchored interim report\`，不能冒充最终协作者报告。
 - promotion 成功后，必须写回 \`data-decisions.md\`、\`decisions.md\`、\`state.md\` 和 \`session-brief.md\`。
 - 如果某个指标或对比 claim 在评估协议里没有带来源的定义，就不能拿它做 stop 或 promotion 判断。

package/package-assets/shared/lab/.managed/templates/final-report.md CHANGED Viewed

@@ -1,5 +1,11 @@
 # Final Report
+## Report Status
+- Report mode: collaborator-ready or artifact-anchored interim
+- Canonical context readiness:
+- Why the current mode is appropriate:
 ## Reader Summary
 - One-sentence conclusion:
@@ -7,6 +13,33 @@
 - What is still unproven:
 - Biggest reporting risk:
+## Problem and Background
+- Research problem in plain language:
+- Why this problem matters:
+- What setting or workflow this report is actually about:
+## Dataset Scene Notes
+- Dataset or benchmark 1 and what real-world setting it represents:
+- Dataset or benchmark 2 and what real-world setting it represents:
+- Dataset or benchmark 3 and what real-world setting it represents:
+## Contribution Summary
+- Contribution bullets:
+- Strongest supported contribution:
+- Contributions that still need stronger evidence:
+## Method Overview
+- Approved method name:
+- Plain-language method summary:
+- What this method changes relative to prior work:
+- Most relevant prior work or baseline anchors:
+- What those prior methods do:
+- Why those prior methods are still insufficient here:
 ## Selected Metrics
 - Primary metrics:
@@ -56,6 +89,11 @@
 - Final performance summary:
 - Table coverage:
+## Artifact Status
+- Deliverables or workflow artifacts that are ready:
+- Artifact status notes that are not scientific findings:
 ## Main Results
 Summarize validated iteration outcomes.

package/package-assets/shared/lab/context/eval-protocol.md CHANGED Viewed

@@ -17,6 +17,7 @@ Use this file to define the paper-facing evaluation objective, table plan, gates
 ## Metric Glossary
 - Metric glossary:
+- Hydration provenance:
 - Background sources:
 - Method and baseline source papers:
 - Method and baseline implementation source:

package/package-assets/shared/lab/context/mission.md CHANGED Viewed

@@ -5,6 +5,7 @@
 - One-sentence problem:
 - Why it matters:
 - Target failure case:
+- Hydration provenance:
 ## Success Criteria
@@ -25,3 +26,4 @@
 - Approved direction:
 - Current owner or session:
 - Last stage that updated this mission:
+- Collaborator-ready status:

package/package-assets/shared/skills/lab/SKILL.md CHANGED Viewed

@@ -46,6 +46,7 @@ Use this skill when the user invokes `/lab:*` or asks for the structured researc
 - Keep an explicit approval gate before `/lab:spec`.
 - Write idea artifacts with the template in `.lab/.managed/templates/idea.md`.
 - Update `.lab/context/mission.md`, `.lab/context/decisions.md`, and `.lab/context/open-questions.md` after convergence.
+- Do not leave `.lab/context/mission.md` as a template shell once the problem statement and approved direction are known.
 - Do not implement code in this stage.
 ### `/lab:data`
@@ -111,7 +112,8 @@ Use this skill when the user invokes `/lab:*` or asks for the structured researc
 - Normalize the result with `.lab/.managed/scripts/eval_report.py`.
 - Validate normalized output with `.lab/.managed/scripts/validate_results.py`.
 - Read `.lab/context/eval-protocol.md` before choosing the smallest run so the first experiment already targets the approved tables, metrics, and gates.
-- Update `.lab/context/state.md` and `.lab/context/evidence-index.md` after the run.
+- Update `.lab/context/state.md`, `.lab/context/evidence-index.md`, and `.lab/context/eval-protocol.md` after the run.
+- If the evaluation protocol is still skeletal, initialize the smallest trustworthy source-backed version before treating the run as the protocol anchor.
 ### `/lab:iterate`
@@ -131,7 +133,8 @@ Use this skill when the user invokes `/lab:*` or asks for the structured researc
 - Keep metric definitions, baseline behavior, and comparison implementations anchored to the source-backed evaluation protocol before changing thresholds, gates, or ladder transitions.
 - Switch to diagnostic mode if risk increases for two consecutive rounds.
 - Write round reports with `.lab/.managed/templates/iteration-report.md`.
-- Update `.lab/context/state.md`, `.lab/context/decisions.md`, `.lab/context/evidence-index.md`, and `.lab/context/open-questions.md` each round as needed.
+- Update `.lab/context/state.md`, `.lab/context/decisions.md`, `.lab/context/evidence-index.md`, `.lab/context/open-questions.md`, and `.lab/context/eval-protocol.md` each round as needed.
+- Keep `.lab/context/eval-protocol.md` synchronized with accepted ladder changes, benchmark scope, and source-backed implementation deviations.
 - Stop at threshold success or iteration cap, and record blockers plus next-best actions when the campaign ends without success.
 ### `/lab:review`
@@ -153,7 +156,9 @@ Use this skill when the user invokes `/lab:*` or asks for the structured researc
 - Aggregate them with `.lab/.managed/scripts/summarize_iterations.py`.
 - Write the final document with `.lab/.managed/templates/final-report.md` and the managed table summary with `.lab/.managed/templates/main-tables.md`.
 - Keep failed attempts and limitations visible.
-- Update `.lab/context/state.md` and `.lab/context/evidence-index.md` with report-level handoff notes.
+- Update `.lab/context/mission.md`, `.lab/context/eval-protocol.md`, `.lab/context/state.md`, and `.lab/context/evidence-index.md` with report-level handoff notes.
+- If canonical context is still skeletal, hydrate the smallest trustworthy version from frozen artifacts before finalizing the report.
+- If collaborator-critical fields remain missing after hydration, downgrade to an `artifact-anchored interim report` instead of presenting a final collaborator-ready report.
 ### `/lab:write`

package/package-assets/shared/skills/lab/stages/auto.md CHANGED Viewed

@@ -24,6 +24,8 @@
 ## Context Write Set
+- `.lab/context/mission.md`
+- `.lab/context/eval-protocol.md`
 - `.lab/context/state.md`
 - `.lab/context/decisions.md`
 - `.lab/context/data-decisions.md`
@@ -63,6 +65,8 @@
   - keep the session alive while the current rung is running
   - write the current rung, watch target, and next rung to `.lab/context/auto-status.md`
 - Reuse the existing `/lab:run`, `/lab:iterate`, `/lab:review`, `/lab:report`, and optional `/lab:write` contracts instead of inventing a parallel workflow.
+- If the loop is about to reach `report` while `.lab/context/mission.md` or `.lab/context/eval-protocol.md` is still skeletal, hydrate the smallest trustworthy canonical version from frozen artifacts and approved context before drafting the report.
+- If hydration still leaves collaborator-critical fields blank, force `report` to emit an `artifact-anchored interim report` instead of a collaborator-ready final report.
 - Enforce stage contracts, not just exit codes:
   - `run` and `iterate` must change persistent outputs under `results_root`
   - `review` must update canonical review context

package/package-assets/shared/skills/lab/stages/idea.md CHANGED Viewed

@@ -3,6 +3,7 @@
 ## Required Output
 - one-sentence problem statement
+- why the problem matters in plain language
 - failure case
 - idea classification
 - contribution category
@@ -18,6 +19,7 @@
 - critique before convergence
 - minimum viable experiment
 - explicit approval gate before `/lab:spec`
+- canonical mission context updated with the approved problem, importance, failure case, and direction
 ## Evidence Discipline
@@ -60,3 +62,4 @@
 - State why the target problem matters before talking about the method.
 - Compare against existing methods explicitly, not by vague novelty language.
 - The three meaningful points should each fit in one direct sentence.
+- Do not leave `.lab/context/mission.md` as an empty template after convergence; write the approved problem, why it matters, the current benchmark scope, and the approved direction back into canonical context.

package/package-assets/shared/skills/lab/stages/iterate.md CHANGED Viewed

@@ -29,6 +29,7 @@ Declare and keep fixed:
 - `.lab/context/decisions.md`
 - `.lab/context/evidence-index.md`
 - `.lab/context/open-questions.md`
+- `.lab/context/eval-protocol.md`
 ## Per-Round Output
@@ -62,6 +63,7 @@ If the loop stops without success, record:
 - Do not accumulate long-lived results under `.lab/changes/<change-id>/runs`.
 - Do not change metric definitions, baseline semantics, or comparison implementations unless the approved evaluation protocol records both their sources and any deviations.
 - When you change ladders, sample sizes, or promotion gates, keep the resulting logic anchored to the source-backed evaluation protocol instead of ad-hoc chat reasoning.
+- Keep `.lab/context/eval-protocol.md` synchronized with the active benchmark scope, ladder gates, source-backed metric definitions, and any accepted implementation deviations instead of leaving it as a stale template.
 ## Interaction Contract

package/package-assets/shared/skills/lab/stages/report.md CHANGED Viewed

@@ -2,7 +2,11 @@
 ## Required Output
+- report status: collaborator-ready or artifact-anchored interim
 - reader summary for the user
+- problem and background in plain language
+- dataset scene notes in plain language
+- contribution summary
 - method overview
 - selected metrics summary
 - plain-language metric guide
@@ -17,6 +21,7 @@
 - failed attempts
 - limitations
 - next steps
+- artifact status kept separate from validated findings
 ## Context Read Set
@@ -30,6 +35,8 @@
 ## Context Write Set
+- `.lab/context/mission.md`
+- `.lab/context/eval-protocol.md`
 - `.lab/context/state.md`
 - `.lab/context/evidence-index.md`
@@ -43,8 +50,15 @@
 - Explain the selected primary and secondary metrics in plain language for the user: what each metric measures, whether higher or lower is better, and whether it is a main result metric or only a health/support metric.
 - If coverage, completeness, confidence, or similar health metrics appear, explicitly say that they describe experimental reliability rather than the main scientific effect.
 - Pull the core background references, method or baseline references, and metric references out of the approved evaluation protocol instead of hiding them in `.lab/context/*`.
+- Pull the approved method name and contribution bullets out of `.lab/context/terminology-lock.md` when that framing context exists; do not silently drop them from the collaborator-facing report.
+- Explain the method overview in collaborator language: what the method roughly does, what changed relative to the closest prior work or strongest baseline, what those prior methods do, and why they remain insufficient for the approved claim.
+- When citing prior work or baselines in the method overview, include only the few anchor references a collaborator needs, and summarize their role and limitation in one short line each.
 - Report only the few references a collaborator needs to orient themselves quickly; do not turn `report.md` into a full bibliography dump.
 - If the report depends on a deviation from an original metric or implementation, state that deviation explicitly instead of smoothing it over.
+- Before drafting the report, inspect `.lab/context/mission.md` and `.lab/context/eval-protocol.md` for skeletal template fields.
+- If either canonical context file is still skeletal, hydrate the smallest trustworthy version from frozen result artifacts, dataset decisions, evidence-index, and prior approved context, and write that back before finalizing the report.
+- If collaborator-critical fields still remain missing after hydration, downgrade the output to an `artifact-anchored interim report` instead of presenting it as a final collaborator-ready report.
+- Do not mix workflow deliverable status, rerun ids, or manuscript skeleton status into validated scientific findings; keep those in a separate artifact-status section.
 - If `.lab/config/workflow.json` sets the workflow language to Chinese, write `report.md` and `<deliverables_root>/main-tables.md` in Chinese unless a file path, code identifier, or literal metric name must remain unchanged.
 - Prefer conservative interpretation over marketing language.
 - Leave a clear handoff path into `/lab:write` with evidence links that section drafts can cite.
@@ -54,6 +68,7 @@
 - Start with a concise summary of the campaign outcome, the selected primary and secondary metrics, the strongest supported claim, and the biggest reporting risk.
 - Proactively deliver a user-readable plain-language summary when the stage is reached from `/lab:auto`; do not wait for a separate follow-up request asking what the metrics or tables mean.
 - Treat `report.md` as a user-facing artifact rather than an internal dump. Prefer plain-language explanations before jargon, and explain each metric the first time it matters.
+- Treat contribution bullets as collaborator-facing claim summaries, not as internal TODOs; tie each one to the current evidence boundary.
 - If a missing assumption would change report interpretation, ask one clarifying question at a time.
 - If there are multiple defensible report framings, present 2-3 approaches with trade-offs and recommend the most evidence-faithful framing before writing.
 - Keep an approval gate when the reporting frame would materially affect what the paper later claims.

package/package-assets/shared/skills/lab/stages/run.md CHANGED Viewed

@@ -6,6 +6,7 @@
 - run registry entry
 - normalized evaluation summary
 - validation result for the normalized summary
+- canonical evaluation context initialized or refined when the active protocol is still skeletal
 ## Context Read Set
@@ -19,6 +20,7 @@
 - `.lab/context/state.md`
 - `.lab/context/evidence-index.md`
+- `.lab/context/eval-protocol.md`
 ## Constraints
@@ -26,6 +28,7 @@
 - Fail fast on data, environment, or metric wiring problems.
 - Tie the run to the approved evaluation protocol, not just an ad-hoc chat goal.
 - Do not invent metric definitions, baseline behavior, or comparison implementations from memory; anchor them to the approved evaluation protocol and its recorded sources.
+- If `.lab/context/eval-protocol.md` is still skeletal, write the smallest trustworthy version of the current evaluation objective, metric set, ladder, and source-backed implementation notes before treating the run as the new protocol anchor.
 - Record the exact launch command and output location.
 - Write durable run outputs, logs, and checkpoints under `results_root`.
 - Write figures or plots under `figures_root`.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "superlab",
-  "version": "0.1.18",
+  "version": "0.1.20",
   "description": "Strict /lab research workflow installer for Codex and Claude",
   "keywords": [
     "codex",