npm - superlab - Versions diffs - 0.1.19 → 0.1.21 - Mend

superlab 0.1.19 → 0.1.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/bin/superlab.cjs +0 -0
package/lib/context.cjs +78 -1
package/lib/i18n.cjs +22 -0
package/package-assets/shared/lab/.managed/templates/final-report.md +15 -0
package/package-assets/shared/skills/lab/stages/report.md +6 -0
package/package.json +1 -1

package/bin/superlab.cjs CHANGED Viewed

File without changes

package/lib/context.cjs CHANGED Viewed

@@ -29,6 +29,35 @@ const EVAL_COLLABORATOR_FIELDS = [
   { name: "Metric source papers", labels: ["Metric source papers", "指标来源论文"] },
   { name: "Required output artifacts", labels: ["Required output artifacts", "必要输出工件"] },
 ];
+const REPORT_REQUIRED_SECTIONS = [
+  { name: "Report Status", patterns: [/^##\s+Report Status\s*$/m, /^##\s+报告状态\s*$/m] },
+  { name: "Reader Summary", patterns: [/^##\s+Reader Summary\s*$/m, /^##\s+给用户看的总结\s*$/m] },
+  { name: "Problem and Background", patterns: [/^##\s+Problem and Background\s*$/m, /^##\s+问题与背景\s*$/m] },
+  { name: "Dataset Scene Notes", patterns: [/^##\s+Dataset Scene Notes\s*$/m, /^##\s+数据集场景说明\s*$/m] },
+  { name: "Contribution Summary", patterns: [/^##\s+Contribution Summary\s*$/m, /^##\s+贡献总结\s*$/m] },
+  { name: "Method Overview", patterns: [/^##\s+Method Overview\s*$/m, /^##\s+方法概述\s*$/m] },
+  { name: "Selected Metrics", patterns: [/^##\s+Selected Metrics\s*$/m, /^##\s+选定指标\s*$/m] },
+  { name: "Metric Guide", patterns: [/^##\s+Metric Guide\s*$/m, /^##\s+指标白话释义\s*$/m] },
+  { name: "Background Sources", patterns: [/^##\s+Background Sources\s*$/m, /^##\s+背景来源\s*$/m] },
+  {
+    name: "Method and Baseline Sources",
+    patterns: [/^##\s+Method and Baseline Sources\s*$/m, /^##\s+方法与基线来源\s*$/m],
+  },
+  { name: "Metric Sources", patterns: [/^##\s+Metric Sources\s*$/m, /^##\s+指标来源\s*$/m] },
+];
+const MAIN_TABLES_REQUIRED_SECTIONS = [
+  { name: "Reader Summary", patterns: [/^##\s+Reader Summary\s*$/m, /^##\s+给用户看的总结\s*$/m] },
+  { name: "Selected Metrics", patterns: [/^##\s+Selected Metrics\s*$/m, /^##\s+选定指标\s*$/m] },
+  { name: "Metric Guide", patterns: [/^##\s+Metric Guide\s*$/m, /^##\s+指标白话释义\s*$/m] },
+  {
+    name: "Final Performance Summary",
+    patterns: [/^##\s+Final Performance Summary\s*$/m, /^##\s+最终表现摘要\s*$/m],
+  },
+  {
+    name: "How to Read These Tables",
+    patterns: [/^##\s+How to Read These Tables\s*$/m, /^##\s+怎么读这些表\s*$/m],
+  },
+];
 const REPORT_FIELDS = {
   problem: ["Research problem in plain language", "研究问题白话解释", "研究问题"],
   whyItMatters: ["Why this problem matters", "为什么这个问题重要"],
@@ -59,6 +88,11 @@ const REPORT_FIELDS = {
   finalPerformanceSummary: ["Final performance summary", "最终表现总结"],
   tableCoverage: ["Table coverage", "表格覆盖范围"],
 };
+const TERMINOLOGY_FIELDS = {
+  methodName: ["Method name", "方法名"],
+  shortName: ["Short name or acronym", "简称或缩写"],
+  contributionBullets: ["Contribution bullets", "贡献 bullets", "Contribution bullets："],
+};
 function contextFile(targetDir, name) {
   return path.join(targetDir, ".lab", "context", name);
@@ -186,6 +220,36 @@ function collaboratorEvalIssues(targetDir) {
     : [];
 }
+function missingRequiredSections(text, sections) {
+  if (!text) {
+    return sections.map((section) => section.name);
+  }
+  return sections
+    .filter((section) => !section.patterns.some((pattern) => pattern.test(text)))
+    .map((section) => section.name);
+}
+function collaboratorReportIssues(targetDir) {
+  if (!hasCollaboratorFacingDeliverables(targetDir)) {
+    return [];
+  }
+  const { reportPath, mainTablesPath } = getCollaboratorDeliverablePaths(targetDir);
+  const issues = [];
+  if (fs.existsSync(reportPath)) {
+    const missing = missingRequiredSections(readFileIfExists(reportPath), REPORT_REQUIRED_SECTIONS);
+    if (missing.length > 0) {
+      issues.push(`report.md is missing required collaborator-facing sections: ${missing.join(", ")}`);
+    }
+  }
+  if (fs.existsSync(mainTablesPath)) {
+    const missing = missingRequiredSections(readFileIfExists(mainTablesPath), MAIN_TABLES_REQUIRED_SECTIONS);
+    if (missing.length > 0) {
+      issues.push(`main-tables.md is missing required collaborator-facing sections: ${missing.join(", ")}`);
+    }
+  }
+  return issues;
+}
 function extractReportValue(reportText, key) {
   return extractValue(reportText, REPORT_FIELDS[key] || []);
 }
@@ -606,7 +670,8 @@ function hydrateEvalProtocol(targetDir) {
 function getCollaboratorReportStatus(targetDir) {
   const missionIssues = collaboratorMissionIssues(targetDir);
   const evalIssues = collaboratorEvalIssues(targetDir);
-  const issues = missionIssues.concat(evalIssues);
+  const reportIssues = collaboratorReportIssues(targetDir);
+  const issues = missionIssues.concat(evalIssues, reportIssues);
   if (issues.length > 0) {
     return {
       mode: "artifact-anchored interim",
@@ -686,6 +751,8 @@ function renderSummary(lang, data) {
 - Collaborator report mode: ${data.reportMode || "待补充"}
 - Canonical context readiness: ${data.reportReadiness || "待补充"}
 - Why this report mode is active: ${data.reportModeReason || "待补充"}
+- Method name: ${data.methodName || "待补充"}
+- Contribution bullets: ${data.contributionBullets || "待补充"}
 - Eval objective: ${data.evalObjective || "待补充"}
 - Primary metrics: ${data.evalPrimaryMetrics || "待补充"}
 - Secondary metrics: ${data.evalSecondaryMetrics || "待补充"}
@@ -747,6 +814,8 @@ function renderSummary(lang, data) {
 - Collaborator report mode: ${data.reportMode || "TBD"}
 - Canonical context readiness: ${data.reportReadiness || "TBD"}
 - Why this report mode is active: ${data.reportModeReason || "TBD"}
+- Method name: ${data.methodName || "TBD"}
+- Contribution bullets: ${data.contributionBullets || "TBD"}
 - Eval objective: ${data.evalObjective || "TBD"}
 - Primary metrics: ${data.evalPrimaryMetrics || "TBD"}
 - Secondary metrics: ${data.evalSecondaryMetrics || "TBD"}
@@ -863,6 +932,8 @@ ${data.problem || "待补充"}
 - Collaborator report mode: ${data.reportMode || "待补充"}
 - Canonical context readiness: ${data.reportReadiness || "待补充"}
 - Why this report mode is active: ${data.reportModeReason || "待补充"}
+- Method name: ${data.methodName || "待补充"}
+- Contribution bullets: ${data.contributionBullets || "待补充"}
 - Eval objective: ${data.evalObjective || "待补充"}
 - Primary metrics: ${data.evalPrimaryMetrics || "待补充"}
 - Secondary metrics: ${data.evalSecondaryMetrics || "待补充"}
@@ -935,6 +1006,8 @@ ${data.problem || "TBD"}
 - Collaborator report mode: ${data.reportMode || "TBD"}
 - Canonical context readiness: ${data.reportReadiness || "TBD"}
 - Why this report mode is active: ${data.reportModeReason || "TBD"}
+- Method name: ${data.methodName || "TBD"}
+- Contribution bullets: ${data.contributionBullets || "TBD"}
 - Eval objective: ${data.evalObjective || "TBD"}
 - Primary metrics: ${data.evalPrimaryMetrics || "TBD"}
 - Secondary metrics: ${data.evalSecondaryMetrics || "TBD"}
@@ -987,6 +1060,7 @@ function buildContextSnapshot(targetDir) {
   const evidence = readFileIfExists(contextFile(targetDir, "evidence-index.md"));
   const questions = readFileIfExists(contextFile(targetDir, "open-questions.md"));
   const dataDecisions = readFileIfExists(contextFile(targetDir, "data-decisions.md"));
+  const terminologyLock = readFileIfExists(contextFile(targetDir, "terminology-lock.md"));
   const autoMode = readFileIfExists(contextFile(targetDir, "auto-mode.md"));
   const autoStatus = readFileIfExists(contextFile(targetDir, "auto-status.md"));
   const autoOutcome = readFileIfExists(contextFile(targetDir, "auto-outcome.md"));
@@ -1196,6 +1270,8 @@ function buildContextSnapshot(targetDir) {
     reportMode: reportStatus.mode,
     reportReadiness: reportStatus.readiness,
     reportModeReason: reportStatus.reason,
+    methodName: extractValue(terminologyLock, TERMINOLOGY_FIELDS.methodName),
+    contributionBullets: extractValue(terminologyLock, TERMINOLOGY_FIELDS.contributionBullets),
     evalObjective: evalProtocol.primaryEvaluationObjective,
     evalPrimaryMetrics: evalProtocol.primaryMetrics,
     evalSecondaryMetrics: evalProtocol.secondaryMetrics,
@@ -1302,6 +1378,7 @@ module.exports = {
   archiveContext,
   collaboratorEvalIssues,
   collaboratorMissionIssues,
+  collaboratorReportIssues,
   getCollaboratorReportStatus,
   hasCollaboratorFacingDeliverables,
   hydrateCanonicalContext,

package/lib/i18n.cjs CHANGED Viewed

@@ -293,6 +293,7 @@ const ZH_SKILL_FILES = {
 - 给用户看的总结
 - 问题与背景的白话说明
 - 数据集场景说明
+- 贡献总结
 - 方法概述
 - 选定指标摘要
 - 指标白话释义
@@ -330,9 +331,14 @@ const ZH_SKILL_FILES = {
 - 必须把已批准的主指标、次级指标和必要终局证据明确写进 \`report.md\` 与受管的 \`main-tables.md\`。
 - 必须用白话解释选定的主指标和次级指标：每个指标在衡量什么、越高还是越低更好、它是主结果指标还是健康度/支持性指标。
 - 如果出现 coverage、completeness、confidence 或类似健康度指标，必须明确说明这类指标回答的是“实验是否跑稳、证据是否完整”，而不是主要科学效应本身。
+- 要把最关键的背景来源、方法/基线来源和指标来源直接写进报告，不要把它们藏在 \`.lab/context/*\` 里。
+- 如果 \`.lab/context/terminology-lock.md\` 里已经冻结了方法名和 contribution bullets，就必须把它们带进报告。
+- 方法概述必须用协作者能读懂的话说明：我们的方法大致怎么做、相对 closest prior work 或 strongest baseline 改了什么、这些 prior 方法各自做了什么，以及它们为什么在当前 claim 下仍然不够。
+- 只保留少量最关键的 prior work/baseline 锚点；每个锚点都要用一句话交代它做了什么和它的局限。
 - 在起草报告前，先检查 \`.lab/context/mission.md\` 和 \`.lab/context/eval-protocol.md\` 是否仍是模板空壳。
 - 如果 canonical context 还是空壳，要先根据 frozen result artifacts、data-decisions、evidence-index 和已批准上下文回填“最小可信版本”，再写报告。
 - 如果回填后仍缺少协作者可读所需的关键字段，就必须把输出降级成 \`artifact-anchored interim report\`，不能冒充最终协作者报告。
+- 如果现有的 \`report.md\` 或 \`main-tables.md\` 缺少受管模板要求的协作者可读章节，也必须视为报告缺陷；rerun 需要补齐这些缺失块，不能直接宣称“正文无变化”或把这次 rerun 当成 no-op。
 - 如果报告依赖了对原始指标或原始实现的偏差，必须明确写出这个偏差。
 - workflow 工件状态、rerun id 或 LaTeX 骨架状态不能混进“已验证主结果”；这些内容必须单列到工件状态部分。
 - 如果 workflow language 是中文，\`report.md\` 和 \`<deliverables_root>/main-tables.md\` 也应使用中文，除非文件路径、代码标识符或字面指标名必须保持原样。
@@ -344,6 +350,7 @@ const ZH_SKILL_FILES = {
 - 开始前先简洁说明：campaign outcome、选定的主指标和次级指标、最强已支撑 claim、最大的报告风险。
 - 当该阶段由 \`/lab:auto\` 进入时，要主动给出用户可读的白话总结，不要等用户再追问“这些指标是什么意思”或“这些表怎么看”。
 - 把 \`report.md\` 当作给用户看的工件，而不是内部 dump。术语第一次出现时就解释；先讲结论，再讲术语。
+- 把 contribution bullets 当作协作者可读的最终主张摘要，而不是内部 TODO；每条都必须和当前证据边界对齐。
 - 如果某个未决前提会改变报告解释，一次只问一个问题。
 - 如果存在多种报告 framing，先给 2-3 个方案、trade-offs 和推荐项，优先最忠于证据的 framing。
 - 如果某种 framing 会实质影响后续论文 claim，要保留 approval gate。
@@ -726,6 +733,21 @@ const ZH_SKILL_FILES = {
 - 数据集或 benchmark 2 代表什么真实场景：
 - 数据集或 benchmark 3 代表什么真实场景：
+## 贡献总结
+- Contribution bullets：
+- 当前证据最强的贡献：
+- 仍需要更强证据的贡献：
+## 方法概述
+- 已批准的方法名：
+- 方法白话总结：
+- 相比 prior work 这套方法改变了什么：
+- 最相关的 prior work 或 baseline 锚点：
+- 这些 prior 方法各自做了什么：
+- 为什么这些 prior 方法在这里仍然不够：
 ## 选定指标
 - 主指标：

package/package-assets/shared/lab/.managed/templates/final-report.md CHANGED Viewed

@@ -25,6 +25,21 @@
 - Dataset or benchmark 2 and what real-world setting it represents:
 - Dataset or benchmark 3 and what real-world setting it represents:
+## Contribution Summary
+- Contribution bullets:
+- Strongest supported contribution:
+- Contributions that still need stronger evidence:
+## Method Overview
+- Approved method name:
+- Plain-language method summary:
+- What this method changes relative to prior work:
+- Most relevant prior work or baseline anchors:
+- What those prior methods do:
+- Why those prior methods are still insufficient here:
 ## Selected Metrics
 - Primary metrics:

package/package-assets/shared/skills/lab/stages/report.md CHANGED Viewed

@@ -6,6 +6,7 @@
 - reader summary for the user
 - problem and background in plain language
 - dataset scene notes in plain language
+- contribution summary
 - method overview
 - selected metrics summary
 - plain-language metric guide
@@ -49,11 +50,15 @@
 - Explain the selected primary and secondary metrics in plain language for the user: what each metric measures, whether higher or lower is better, and whether it is a main result metric or only a health/support metric.
 - If coverage, completeness, confidence, or similar health metrics appear, explicitly say that they describe experimental reliability rather than the main scientific effect.
 - Pull the core background references, method or baseline references, and metric references out of the approved evaluation protocol instead of hiding them in `.lab/context/*`.
+- Pull the approved method name and contribution bullets out of `.lab/context/terminology-lock.md` when that framing context exists; do not silently drop them from the collaborator-facing report.
+- Explain the method overview in collaborator language: what the method roughly does, what changed relative to the closest prior work or strongest baseline, what those prior methods do, and why they remain insufficient for the approved claim.
+- When citing prior work or baselines in the method overview, include only the few anchor references a collaborator needs, and summarize their role and limitation in one short line each.
 - Report only the few references a collaborator needs to orient themselves quickly; do not turn `report.md` into a full bibliography dump.
 - If the report depends on a deviation from an original metric or implementation, state that deviation explicitly instead of smoothing it over.
 - Before drafting the report, inspect `.lab/context/mission.md` and `.lab/context/eval-protocol.md` for skeletal template fields.
 - If either canonical context file is still skeletal, hydrate the smallest trustworthy version from frozen result artifacts, dataset decisions, evidence-index, and prior approved context, and write that back before finalizing the report.
 - If collaborator-critical fields still remain missing after hydration, downgrade the output to an `artifact-anchored interim report` instead of presenting it as a final collaborator-ready report.
+- If the existing `report.md` or `main-tables.md` is missing required collaborator-facing sections from the managed templates, treat that as a report deficiency. A rerun must repair the missing sections instead of declaring "no content change" or treating the rerun as a no-op.
 - Do not mix workflow deliverable status, rerun ids, or manuscript skeleton status into validated scientific findings; keep those in a separate artifact-status section.
 - If `.lab/config/workflow.json` sets the workflow language to Chinese, write `report.md` and `<deliverables_root>/main-tables.md` in Chinese unless a file path, code identifier, or literal metric name must remain unchanged.
 - Prefer conservative interpretation over marketing language.
@@ -64,6 +69,7 @@
 - Start with a concise summary of the campaign outcome, the selected primary and secondary metrics, the strongest supported claim, and the biggest reporting risk.
 - Proactively deliver a user-readable plain-language summary when the stage is reached from `/lab:auto`; do not wait for a separate follow-up request asking what the metrics or tables mean.
 - Treat `report.md` as a user-facing artifact rather than an internal dump. Prefer plain-language explanations before jargon, and explain each metric the first time it matters.
+- Treat contribution bullets as collaborator-facing claim summaries, not as internal TODOs; tie each one to the current evidence boundary.
 - If a missing assumption would change report interpretation, ask one clarifying question at a time.
 - If there are multiple defensible report framings, present 2-3 approaches with trade-offs and recommend the most evidence-faithful framing before writing.
 - Keep an approval gate when the reporting frame would materially affect what the paper later claims.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "superlab",
-  "version": "0.1.19",
+  "version": "0.1.21",
   "description": "Strict /lab research workflow installer for Codex and Claude",
   "keywords": [
     "codex",