npm - superlab - Versions diffs - 0.1.17 → 0.1.18 - Mend

superlab 0.1.17 → 0.1.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/lib/auto_runner.cjs +6 -0
package/lib/auto_state.cjs +6 -0
package/lib/context.cjs +15 -0
package/lib/eval_protocol.cjs +15 -0
package/lib/i18n.cjs +20 -0
package/package-assets/shared/lab/.managed/templates/final-report.md +17 -0
package/package-assets/shared/lab/context/eval-protocol.md +3 -0
package/package-assets/shared/skills/lab/stages/report.md +5 -0
package/package.json +1 -1

package/lib/auto_runner.cjs CHANGED Viewed

@@ -270,6 +270,9 @@ async function startAutoMode({ targetDir, now = new Date() }) {
     requiredTerminalEvidence: evalProtocol.requiredTerminalEvidence,
     experimentLadder: evalProtocol.experimentLadder,
     metricGlossary: evalProtocol.metricGlossary,
+    backgroundSources: evalProtocol.backgroundSources,
+    methodAndBaselineSourcePapers: evalProtocol.methodAndBaselineSourcePapers,
+    methodAndBaselineImplementationSource: evalProtocol.methodAndBaselineImplementationSource,
     metricSourcePapers: evalProtocol.metricSourcePapers,
     metricImplementationSource: evalProtocol.metricImplementationSource,
     comparisonSourcePapers: evalProtocol.comparisonSourcePapers,
@@ -755,6 +758,9 @@ function stopAutoMode({ targetDir, now = new Date() }) {
     requiredTerminalEvidence: evalProtocol.requiredTerminalEvidence,
     experimentLadder: evalProtocol.experimentLadder,
     metricGlossary: evalProtocol.metricGlossary,
+    backgroundSources: evalProtocol.backgroundSources,
+    methodAndBaselineSourcePapers: evalProtocol.methodAndBaselineSourcePapers,
+    methodAndBaselineImplementationSource: evalProtocol.methodAndBaselineImplementationSource,
     metricSourcePapers: evalProtocol.metricSourcePapers,
     metricImplementationSource: evalProtocol.metricImplementationSource,
     comparisonSourcePapers: evalProtocol.comparisonSourcePapers,

package/lib/auto_state.cjs CHANGED Viewed

@@ -146,6 +146,9 @@ function renderAutoOutcome(outcome, { lang = "en" } = {}) {
 - 必要终局证据: ${outcome.requiredTerminalEvidence || ""}
 - 实验阶梯: ${outcome.experimentLadder || ""}
 - 指标释义: ${outcome.metricGlossary || ""}
+- 背景来源: ${outcome.backgroundSources || ""}
+- 方法与基线来源论文: ${outcome.methodAndBaselineSourcePapers || ""}
+- 方法与基线实现来源: ${outcome.methodAndBaselineImplementationSource || ""}
 - 指标来源论文: ${outcome.metricSourcePapers || ""}
 - 指标实现来源: ${outcome.metricImplementationSource || ""}
 - 对比方法来源论文: ${outcome.comparisonSourcePapers || ""}
@@ -180,6 +183,9 @@ function renderAutoOutcome(outcome, { lang = "en" } = {}) {
 - Required terminal evidence: ${outcome.requiredTerminalEvidence || ""}
 - Experiment ladder: ${outcome.experimentLadder || ""}
 - Metric glossary: ${outcome.metricGlossary || ""}
+- Background sources: ${outcome.backgroundSources || ""}
+- Method and baseline source papers: ${outcome.methodAndBaselineSourcePapers || ""}
+- Method and baseline implementation source: ${outcome.methodAndBaselineImplementationSource || ""}
 - Metric source papers: ${outcome.metricSourcePapers || ""}
 - Metric implementation source: ${outcome.metricImplementationSource || ""}
 - Comparison source papers: ${outcome.comparisonSourcePapers || ""}

package/lib/context.cjs CHANGED Viewed

@@ -101,6 +101,9 @@ function renderSummary(lang, data) {
 - Required terminal evidence: ${data.evalRequiredTerminalEvidence || "待补充"}
 - Table plan: ${data.evalTablePlan || "待补充"}
 - Metric glossary: ${data.evalMetricGlossary || "待补充"}
+- Background sources: ${data.evalBackgroundSources || "待补充"}
+- Method and baseline source papers: ${data.evalMethodAndBaselineSourcePapers || "待补充"}
+- Method and baseline implementation source: ${data.evalMethodAndBaselineImplementationSource || "待补充"}
 - Metric source papers: ${data.evalMetricSourcePapers || "待补充"}
 - Metric implementation source: ${data.evalMetricImplementationSource || "待补充"}
 - Comparison source papers: ${data.evalComparisonSourcePapers || "待补充"}
@@ -156,6 +159,9 @@ function renderSummary(lang, data) {
 - Required terminal evidence: ${data.evalRequiredTerminalEvidence || "TBD"}
 - Table plan: ${data.evalTablePlan || "TBD"}
 - Metric glossary: ${data.evalMetricGlossary || "TBD"}
+- Background sources: ${data.evalBackgroundSources || "TBD"}
+- Method and baseline source papers: ${data.evalMethodAndBaselineSourcePapers || "TBD"}
+- Method and baseline implementation source: ${data.evalMethodAndBaselineImplementationSource || "TBD"}
 - Metric source papers: ${data.evalMetricSourcePapers || "TBD"}
 - Metric implementation source: ${data.evalMetricImplementationSource || "TBD"}
 - Comparison source papers: ${data.evalComparisonSourcePapers || "TBD"}
@@ -266,6 +272,9 @@ ${data.problem || "待补充"}
 - Required terminal evidence: ${data.evalRequiredTerminalEvidence || "待补充"}
 - Table plan: ${data.evalTablePlan || "待补充"}
 - Metric glossary: ${data.evalMetricGlossary || "待补充"}
+- Background sources: ${data.evalBackgroundSources || "待补充"}
+- Method and baseline source papers: ${data.evalMethodAndBaselineSourcePapers || "待补充"}
+- Method and baseline implementation source: ${data.evalMethodAndBaselineImplementationSource || "待补充"}
 - Metric source papers: ${data.evalMetricSourcePapers || "待补充"}
 - Metric implementation source: ${data.evalMetricImplementationSource || "待补充"}
 - Comparison source papers: ${data.evalComparisonSourcePapers || "待补充"}
@@ -332,6 +341,9 @@ ${data.problem || "TBD"}
 - Required terminal evidence: ${data.evalRequiredTerminalEvidence || "TBD"}
 - Table plan: ${data.evalTablePlan || "TBD"}
 - Metric glossary: ${data.evalMetricGlossary || "TBD"}
+- Background sources: ${data.evalBackgroundSources || "TBD"}
+- Method and baseline source papers: ${data.evalMethodAndBaselineSourcePapers || "TBD"}
+- Method and baseline implementation source: ${data.evalMethodAndBaselineImplementationSource || "TBD"}
 - Metric source papers: ${data.evalMetricSourcePapers || "TBD"}
 - Metric implementation source: ${data.evalMetricImplementationSource || "TBD"}
 - Comparison source papers: ${data.evalComparisonSourcePapers || "TBD"}
@@ -586,6 +598,9 @@ function buildContextSnapshot(targetDir) {
     evalRequiredTerminalEvidence: evalProtocol.requiredTerminalEvidence,
     evalTablePlan: evalProtocol.tablePlan,
     evalMetricGlossary: evalProtocol.metricGlossary,
+    evalBackgroundSources: evalProtocol.backgroundSources,
+    evalMethodAndBaselineSourcePapers: evalProtocol.methodAndBaselineSourcePapers,
+    evalMethodAndBaselineImplementationSource: evalProtocol.methodAndBaselineImplementationSource,
     evalMetricSourcePapers: evalProtocol.metricSourcePapers,
     evalMetricImplementationSource: evalProtocol.metricImplementationSource,
     evalComparisonSourcePapers: evalProtocol.comparisonSourcePapers,

package/lib/eval_protocol.cjs CHANGED Viewed

@@ -39,6 +39,21 @@ const EVAL_PROTOCOL_FIELDS = [
     key: "metricGlossary",
     labels: ["Metric glossary", "指标释义"],
   },
+  {
+    name: "Background sources",
+    key: "backgroundSources",
+    labels: ["Background sources", "背景来源"],
+  },
+  {
+    name: "Method and baseline source papers",
+    key: "methodAndBaselineSourcePapers",
+    labels: ["Method and baseline source papers", "方法与基线来源论文"],
+  },
+  {
+    name: "Method and baseline implementation source",
+    key: "methodAndBaselineImplementationSource",
+    labels: ["Method and baseline implementation source", "方法与基线实现来源"],
+  },
   {
     name: "Metric source papers",
     key: "metricSourcePapers",

package/lib/i18n.cjs CHANGED Viewed

@@ -710,6 +710,23 @@ const ZH_SKILL_FILES = {
 - 次级指标在衡量什么：
 - 健康度/支持性指标在衡量什么，为什么它们不是主结论：
+## 背景来源
+- 最关键的背景论文或 benchmark 参考：
+- 为什么这些来源足以锚定当前问题：
+## 方法与基线来源
+- 我们的方法来源或实现基础：
+- baseline 与 comparison 的来源论文：
+- baseline 与 comparison 的实现来源：
+## 指标来源
+- 指标来源论文：
+- 指标实现来源：
+- 与原始实现的偏差：
 ## 怎么看主表
 - Table 1 负责回答什么：
@@ -1949,6 +1966,9 @@ ZH_CONTENT[path.join(".lab", "context", "eval-protocol.md")] = `# 评估协议
 ## 指标释义
 - 指标释义：
+- 背景来源：
+- 方法与基线来源论文：
+- 方法与基线实现来源：
 - 指标来源论文：
 - 指标实现来源：
 - 对比方法来源论文：

package/package-assets/shared/lab/.managed/templates/final-report.md CHANGED Viewed

@@ -19,6 +19,23 @@
 - Secondary metric plain-language explanation:
 - Health or support metrics and why they are not the main claim:
+## Background Sources
+- Most important background papers or benchmark references:
+- Why these are the right background anchors:
+## Method and Baseline Sources
+- Our method source or implementation basis:
+- Baseline and comparison source papers:
+- Baseline and comparison implementation sources:
+## Metric Sources
+- Metric source papers:
+- Metric implementation source:
+- Deviation from original implementation:
 ## Experiment Setup
 - Datasets:

package/package-assets/shared/lab/context/eval-protocol.md CHANGED Viewed

@@ -17,6 +17,9 @@ Use this file to define the paper-facing evaluation objective, table plan, gates
 ## Metric Glossary
 - Metric glossary:
+- Background sources:
+- Method and baseline source papers:
+- Method and baseline implementation source:
 - Metric source papers:
 - Metric implementation source:
 - Comparison source papers:

package/package-assets/shared/skills/lab/stages/report.md CHANGED Viewed

@@ -6,6 +6,9 @@
 - method overview
 - selected metrics summary
 - plain-language metric guide
+- background sources
+- method and baseline sources
+- metric sources
 - experiment setup
 - validated main results
 - managed main tables artifact under `<deliverables_root>/main-tables.md`
@@ -39,6 +42,8 @@
 - Carry the approved `Primary metrics`, `Secondary metrics`, and `Required terminal evidence` into both the report and the managed main-tables artifact.
 - Explain the selected primary and secondary metrics in plain language for the user: what each metric measures, whether higher or lower is better, and whether it is a main result metric or only a health/support metric.
 - If coverage, completeness, confidence, or similar health metrics appear, explicitly say that they describe experimental reliability rather than the main scientific effect.
+- Pull the core background references, method or baseline references, and metric references out of the approved evaluation protocol instead of hiding them in `.lab/context/*`.
+- Report only the few references a collaborator needs to orient themselves quickly; do not turn `report.md` into a full bibliography dump.
 - If the report depends on a deviation from an original metric or implementation, state that deviation explicitly instead of smoothing it over.
 - If `.lab/config/workflow.json` sets the workflow language to Chinese, write `report.md` and `<deliverables_root>/main-tables.md` in Chinese unless a file path, code identifier, or literal metric name must remain unchanged.
 - Prefer conservative interpretation over marketing language.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "superlab",
-  "version": "0.1.17",
+  "version": "0.1.18",
   "description": "Strict /lab research workflow installer for Codex and Claude",
   "keywords": [
     "codex",