superlab 0.1.16 → 0.1.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -270,6 +270,9 @@ async function startAutoMode({ targetDir, now = new Date() }) {
270
270
  requiredTerminalEvidence: evalProtocol.requiredTerminalEvidence,
271
271
  experimentLadder: evalProtocol.experimentLadder,
272
272
  metricGlossary: evalProtocol.metricGlossary,
273
+ backgroundSources: evalProtocol.backgroundSources,
274
+ methodAndBaselineSourcePapers: evalProtocol.methodAndBaselineSourcePapers,
275
+ methodAndBaselineImplementationSource: evalProtocol.methodAndBaselineImplementationSource,
273
276
  metricSourcePapers: evalProtocol.metricSourcePapers,
274
277
  metricImplementationSource: evalProtocol.metricImplementationSource,
275
278
  comparisonSourcePapers: evalProtocol.comparisonSourcePapers,
@@ -755,6 +758,9 @@ function stopAutoMode({ targetDir, now = new Date() }) {
755
758
  requiredTerminalEvidence: evalProtocol.requiredTerminalEvidence,
756
759
  experimentLadder: evalProtocol.experimentLadder,
757
760
  metricGlossary: evalProtocol.metricGlossary,
761
+ backgroundSources: evalProtocol.backgroundSources,
762
+ methodAndBaselineSourcePapers: evalProtocol.methodAndBaselineSourcePapers,
763
+ methodAndBaselineImplementationSource: evalProtocol.methodAndBaselineImplementationSource,
758
764
  metricSourcePapers: evalProtocol.metricSourcePapers,
759
765
  metricImplementationSource: evalProtocol.metricImplementationSource,
760
766
  comparisonSourcePapers: evalProtocol.comparisonSourcePapers,
@@ -146,6 +146,9 @@ function renderAutoOutcome(outcome, { lang = "en" } = {}) {
146
146
  - 必要终局证据: ${outcome.requiredTerminalEvidence || ""}
147
147
  - 实验阶梯: ${outcome.experimentLadder || ""}
148
148
  - 指标释义: ${outcome.metricGlossary || ""}
149
+ - 背景来源: ${outcome.backgroundSources || ""}
150
+ - 方法与基线来源论文: ${outcome.methodAndBaselineSourcePapers || ""}
151
+ - 方法与基线实现来源: ${outcome.methodAndBaselineImplementationSource || ""}
149
152
  - 指标来源论文: ${outcome.metricSourcePapers || ""}
150
153
  - 指标实现来源: ${outcome.metricImplementationSource || ""}
151
154
  - 对比方法来源论文: ${outcome.comparisonSourcePapers || ""}
@@ -180,6 +183,9 @@ function renderAutoOutcome(outcome, { lang = "en" } = {}) {
180
183
  - Required terminal evidence: ${outcome.requiredTerminalEvidence || ""}
181
184
  - Experiment ladder: ${outcome.experimentLadder || ""}
182
185
  - Metric glossary: ${outcome.metricGlossary || ""}
186
+ - Background sources: ${outcome.backgroundSources || ""}
187
+ - Method and baseline source papers: ${outcome.methodAndBaselineSourcePapers || ""}
188
+ - Method and baseline implementation source: ${outcome.methodAndBaselineImplementationSource || ""}
183
189
  - Metric source papers: ${outcome.metricSourcePapers || ""}
184
190
  - Metric implementation source: ${outcome.metricImplementationSource || ""}
185
191
  - Comparison source papers: ${outcome.comparisonSourcePapers || ""}
package/lib/context.cjs CHANGED
@@ -101,6 +101,9 @@ function renderSummary(lang, data) {
101
101
  - Required terminal evidence: ${data.evalRequiredTerminalEvidence || "待补充"}
102
102
  - Table plan: ${data.evalTablePlan || "待补充"}
103
103
  - Metric glossary: ${data.evalMetricGlossary || "待补充"}
104
+ - Background sources: ${data.evalBackgroundSources || "待补充"}
105
+ - Method and baseline source papers: ${data.evalMethodAndBaselineSourcePapers || "待补充"}
106
+ - Method and baseline implementation source: ${data.evalMethodAndBaselineImplementationSource || "待补充"}
104
107
  - Metric source papers: ${data.evalMetricSourcePapers || "待补充"}
105
108
  - Metric implementation source: ${data.evalMetricImplementationSource || "待补充"}
106
109
  - Comparison source papers: ${data.evalComparisonSourcePapers || "待补充"}
@@ -156,6 +159,9 @@ function renderSummary(lang, data) {
156
159
  - Required terminal evidence: ${data.evalRequiredTerminalEvidence || "TBD"}
157
160
  - Table plan: ${data.evalTablePlan || "TBD"}
158
161
  - Metric glossary: ${data.evalMetricGlossary || "TBD"}
162
+ - Background sources: ${data.evalBackgroundSources || "TBD"}
163
+ - Method and baseline source papers: ${data.evalMethodAndBaselineSourcePapers || "TBD"}
164
+ - Method and baseline implementation source: ${data.evalMethodAndBaselineImplementationSource || "TBD"}
159
165
  - Metric source papers: ${data.evalMetricSourcePapers || "TBD"}
160
166
  - Metric implementation source: ${data.evalMetricImplementationSource || "TBD"}
161
167
  - Comparison source papers: ${data.evalComparisonSourcePapers || "TBD"}
@@ -266,6 +272,9 @@ ${data.problem || "待补充"}
266
272
  - Required terminal evidence: ${data.evalRequiredTerminalEvidence || "待补充"}
267
273
  - Table plan: ${data.evalTablePlan || "待补充"}
268
274
  - Metric glossary: ${data.evalMetricGlossary || "待补充"}
275
+ - Background sources: ${data.evalBackgroundSources || "待补充"}
276
+ - Method and baseline source papers: ${data.evalMethodAndBaselineSourcePapers || "待补充"}
277
+ - Method and baseline implementation source: ${data.evalMethodAndBaselineImplementationSource || "待补充"}
269
278
  - Metric source papers: ${data.evalMetricSourcePapers || "待补充"}
270
279
  - Metric implementation source: ${data.evalMetricImplementationSource || "待补充"}
271
280
  - Comparison source papers: ${data.evalComparisonSourcePapers || "待补充"}
@@ -332,6 +341,9 @@ ${data.problem || "TBD"}
332
341
  - Required terminal evidence: ${data.evalRequiredTerminalEvidence || "TBD"}
333
342
  - Table plan: ${data.evalTablePlan || "TBD"}
334
343
  - Metric glossary: ${data.evalMetricGlossary || "TBD"}
344
+ - Background sources: ${data.evalBackgroundSources || "TBD"}
345
+ - Method and baseline source papers: ${data.evalMethodAndBaselineSourcePapers || "TBD"}
346
+ - Method and baseline implementation source: ${data.evalMethodAndBaselineImplementationSource || "TBD"}
335
347
  - Metric source papers: ${data.evalMetricSourcePapers || "TBD"}
336
348
  - Metric implementation source: ${data.evalMetricImplementationSource || "TBD"}
337
349
  - Comparison source papers: ${data.evalComparisonSourcePapers || "TBD"}
@@ -586,6 +598,9 @@ function buildContextSnapshot(targetDir) {
586
598
  evalRequiredTerminalEvidence: evalProtocol.requiredTerminalEvidence,
587
599
  evalTablePlan: evalProtocol.tablePlan,
588
600
  evalMetricGlossary: evalProtocol.metricGlossary,
601
+ evalBackgroundSources: evalProtocol.backgroundSources,
602
+ evalMethodAndBaselineSourcePapers: evalProtocol.methodAndBaselineSourcePapers,
603
+ evalMethodAndBaselineImplementationSource: evalProtocol.methodAndBaselineImplementationSource,
589
604
  evalMetricSourcePapers: evalProtocol.metricSourcePapers,
590
605
  evalMetricImplementationSource: evalProtocol.metricImplementationSource,
591
606
  evalComparisonSourcePapers: evalProtocol.comparisonSourcePapers,
@@ -39,6 +39,21 @@ const EVAL_PROTOCOL_FIELDS = [
39
39
  key: "metricGlossary",
40
40
  labels: ["Metric glossary", "指标释义"],
41
41
  },
42
+ {
43
+ name: "Background sources",
44
+ key: "backgroundSources",
45
+ labels: ["Background sources", "背景来源"],
46
+ },
47
+ {
48
+ name: "Method and baseline source papers",
49
+ key: "methodAndBaselineSourcePapers",
50
+ labels: ["Method and baseline source papers", "方法与基线来源论文"],
51
+ },
52
+ {
53
+ name: "Method and baseline implementation source",
54
+ key: "methodAndBaselineImplementationSource",
55
+ labels: ["Method and baseline implementation source", "方法与基线实现来源"],
56
+ },
42
57
  {
43
58
  name: "Metric source papers",
44
59
  key: "metricSourcePapers",
package/lib/i18n.cjs CHANGED
@@ -55,7 +55,7 @@ const ZH_CONTENT = {
55
55
  [path.join(".codex", "prompts", "lab-report.md")]: codexPrompt(
56
56
  "基于验证后的迭代工件生成最终报告",
57
57
  "report context",
58
- "使用已安装的 `lab` 技能:`.codex/skills/lab/SKILL.md`。\n\n立刻针对用户当前给出的参数执行 `/lab:report`,不要只推荐别的 `/lab` 阶段。只有在缺少阻塞性前提时,才明确指出缺什么,并且一次最多追问一个问题。\n\n本命令运行 `/lab:report` 阶段。它必须汇总标准化摘要、保留失败尝试和局限,并生成最终实验报告。"
58
+ "使用已安装的 `lab` 技能:`.codex/skills/lab/SKILL.md`。\n\n立刻针对用户当前给出的参数执行 `/lab:report`,不要只推荐别的 `/lab` 阶段。只有在缺少阻塞性前提时,才明确指出缺什么,并且一次最多追问一个问题。\n\n本命令运行 `/lab:report` 阶段。它必须生成给用户直接阅读的最终实验报告和受管的 `main-tables.md`,明确写出主指标、次级指标和必要终局证据,并用白话解释这些指标分别衡量什么、哪些只是健康度或支持性指标、以及每张主表到底证明了什么和没证明什么。"
59
59
  ),
60
60
  [path.join(".codex", "prompts", "lab-write.md")]: codexPrompt(
61
61
  "把验证过的研究工件转成论文 section,并按小步方式修订",
@@ -102,7 +102,7 @@ const ZH_CONTENT = {
102
102
  "lab-report",
103
103
  "基于验证后的迭代工件生成最终报告",
104
104
  "report context",
105
- "使用已安装的 `lab` 技能:`.claude/skills/lab/SKILL.md`。\n\n立刻针对用户当前给出的参数执行 `report` 阶段,不要只推荐别的 lab 阶段。只有在缺少阻塞性前提时,才明确指出缺什么,并且一次最多追问一个问题。\n\n本命令运行 lab workflow 的 `report` 阶段。它必须汇总标准化摘要、保留失败尝试和局限,并生成最终实验报告。"
105
+ "使用已安装的 `lab` 技能:`.claude/skills/lab/SKILL.md`。\n\n立刻针对用户当前给出的参数执行 `report` 阶段,不要只推荐别的 lab 阶段。只有在缺少阻塞性前提时,才明确指出缺什么,并且一次最多追问一个问题。\n\n本命令运行 lab workflow 的 `report` 阶段。它必须生成给用户直接阅读的最终实验报告和受管的 `main-tables.md`,明确写出主指标、次级指标和必要终局证据,并用白话解释这些指标分别衡量什么、哪些只是健康度或支持性指标、以及每张主表到底证明了什么和没证明什么。"
106
106
  ),
107
107
  [path.join(".claude", "commands", "lab-write.md")]: claudeCommand(
108
108
  "lab-write",
@@ -289,11 +289,14 @@ const ZH_SKILL_FILES = {
289
289
 
290
290
  ## 必要输出
291
291
 
292
+ - 给用户看的总结
292
293
  - 方法概述
293
294
  - 选定指标摘要
295
+ - 指标白话释义
294
296
  - 实验设置
295
297
  - 已验证主结果
296
298
  - 位于 \`<deliverables_root>/main-tables.md\` 的受管主表工件
299
+ - 怎么看主表的阅读指引
297
300
  - 消融
298
301
  - 失败尝试
299
302
  - 局限性
@@ -319,6 +322,8 @@ const ZH_SKILL_FILES = {
319
322
  - 主表结构、gate 和最终结果 framing 必须对齐已批准的评估协议。
320
323
  - 不要凭记忆重述指标定义、baseline 行为或对比方法实现;直接引用评估协议里记录的来源。
321
324
  - 必须把已批准的主指标、次级指标和必要终局证据明确写进 \`report.md\` 与受管的 \`main-tables.md\`。
325
+ - 必须用白话解释选定的主指标和次级指标:每个指标在衡量什么、越高还是越低更好、它是主结果指标还是健康度/支持性指标。
326
+ - 如果出现 coverage、completeness、confidence 或类似健康度指标,必须明确说明这类指标回答的是“实验是否跑稳、证据是否完整”,而不是主要科学效应本身。
322
327
  - 如果报告依赖了对原始指标或原始实现的偏差,必须明确写出这个偏差。
323
328
  - 如果 workflow language 是中文,\`report.md\` 和 \`<deliverables_root>/main-tables.md\` 也应使用中文,除非文件路径、代码标识符或字面指标名必须保持原样。
324
329
  - 解释优先保守,不要写成营销文案。
@@ -327,6 +332,8 @@ const ZH_SKILL_FILES = {
327
332
  ## 交互约束
328
333
 
329
334
  - 开始前先简洁说明:campaign outcome、选定的主指标和次级指标、最强已支撑 claim、最大的报告风险。
335
+ - 当该阶段由 \`/lab:auto\` 进入时,要主动给出用户可读的白话总结,不要等用户再追问“这些指标是什么意思”或“这些表怎么看”。
336
+ - 把 \`report.md\` 当作给用户看的工件,而不是内部 dump。术语第一次出现时就解释;先讲结论,再讲术语。
330
337
  - 如果某个未决前提会改变报告解释,一次只问一个问题。
331
338
  - 如果存在多种报告 framing,先给 2-3 个方案、trade-offs 和推荐项,优先最忠于证据的 framing。
332
339
  - 如果某种 framing 会实质影响后续论文 claim,要保留 approval gate。
@@ -684,10 +691,12 @@ const ZH_SKILL_FILES = {
684
691
  [path.join(".lab", ".managed", "templates", "final-report.md")]:
685
692
  `# 最终报告
686
693
 
687
- ## 目标
694
+ ## 给用户看的总结
688
695
 
689
- - 本轮研究目标:
690
- - 是否达标:
696
+ - 一句话结论:
697
+ - 已经被验证的内容:
698
+ - 还没有被证明的内容:
699
+ - 当前最大报告风险:
691
700
 
692
701
  ## 选定指标
693
702
 
@@ -695,6 +704,36 @@ const ZH_SKILL_FILES = {
695
704
  - 次级指标:
696
705
  - 必要终局证据:
697
706
 
707
+ ## 指标白话释义
708
+
709
+ - 主指标在衡量什么:
710
+ - 次级指标在衡量什么:
711
+ - 健康度/支持性指标在衡量什么,为什么它们不是主结论:
712
+
713
+ ## 背景来源
714
+
715
+ - 最关键的背景论文或 benchmark 参考:
716
+ - 为什么这些来源足以锚定当前问题:
717
+
718
+ ## 方法与基线来源
719
+
720
+ - 我们的方法来源或实现基础:
721
+ - baseline 与 comparison 的来源论文:
722
+ - baseline 与 comparison 的实现来源:
723
+
724
+ ## 指标来源
725
+
726
+ - 指标来源论文:
727
+ - 指标实现来源:
728
+ - 与原始实现的偏差:
729
+
730
+ ## 怎么看主表
731
+
732
+ - Table 1 负责回答什么:
733
+ - Table 2 负责回答什么:
734
+ - Table 3 负责回答什么:
735
+ - Table 4 负责回答什么:
736
+
698
737
  ## 主表工件
699
738
 
700
739
  - 受管主表路径:\`<deliverables_root>/main-tables.md\`
@@ -719,18 +758,37 @@ const ZH_SKILL_FILES = {
719
758
  [path.join(".lab", ".managed", "templates", "main-tables.md")]:
720
759
  `# 主表工件
721
760
 
761
+ ## 给用户看的总结
762
+
763
+ - 用户可直接复述的结论:
764
+ - 这些主表证明了什么:
765
+ - 这些主表还不能证明什么:
766
+
722
767
  ## 选定指标
723
768
 
724
769
  - 主指标:
725
770
  - 次级指标:
726
771
  - 必要终局证据:
727
772
 
773
+ ## 指标白话释义
774
+
775
+ - 主指标在衡量什么:
776
+ - 次级指标在衡量什么:
777
+ - 健康度/支持性指标该怎么读:
778
+
728
779
  ## 最终表现摘要
729
780
 
730
781
  - 主要结果摘要:
731
782
  - 最重要数字:
732
783
  - 报告边界:
733
784
 
785
+ ## 怎么看这些表
786
+
787
+ - Table 1 负责回答什么:
788
+ - Table 2 负责回答什么:
789
+ - Table 3 负责回答什么:
790
+ - Table 4 负责回答什么:
791
+
734
792
  ## Table 1
735
793
 
736
794
  - 作用:
@@ -1908,6 +1966,9 @@ ZH_CONTENT[path.join(".lab", "context", "eval-protocol.md")] = `# 评估协议
1908
1966
  ## 指标释义
1909
1967
 
1910
1968
  - 指标释义:
1969
+ - 背景来源:
1970
+ - 方法与基线来源论文:
1971
+ - 方法与基线实现来源:
1911
1972
  - 指标来源论文:
1912
1973
  - 指标实现来源:
1913
1974
  - 对比方法来源论文:
@@ -2015,6 +2076,7 @@ ZH_CONTENT[path.join(".codex", "skills", "lab", "stages", "auto.md")] = `# \`/la
2015
2076
  - 先做输入归一化:把 \`Autonomy level L1/L2/L3\` 视为执行权限级别,把 \`Layer 3\`、\`Phase 1\`、\`Table 2\` 视为论文范围目标。
2016
2077
  - 如果用户同时提了论文层、实验 phase 和自治级别,先用一句话重述:objective、自治级别、terminal goal、scope、allowed modifications。
2017
2078
  - 如果 workflow language 是中文,摘要、清单条目、任务标签和进度更新都应使用中文,除非文件路径、代码标识符或字面指标名必须保持原样。
2079
+ - 当循环进入 \`report\` 时,要主动给出用户可读的白话总结,解释主指标、次级指标和主表作用;不要等用户额外发一句“解释这些指标”。
2018
2080
  - 当循环即将进入 \`write\`,且 \`paper_template_root\` 为空时:
2019
2081
  - 如果 \`paper_template_decision\` 是 \`unconfirmed\`,必须先追问一次:继续使用默认 scaffold,还是先接入模板目录
2020
2082
  - 如果用户选择默认 scaffold,就持久化 \`paper_template_decision: default-scaffold\`
@@ -2027,6 +2089,8 @@ ZH_CONTENT[path.join(".codex", "skills", "lab", "stages", "auto.md")] = `# \`/la
2027
2089
 
2028
2090
  ZH_CONTENT[path.join(".claude", "skills", "lab", "stages", "auto.md")] =
2029
2091
  ZH_CONTENT[path.join(".codex", "skills", "lab", "stages", "auto.md")];
2092
+ ZH_CONTENT[path.join(".claude", "skills", "lab", "stages", "report.md")] =
2093
+ ZH_CONTENT[path.join(".codex", "skills", "lab", "stages", "report.md")];
2030
2094
 
2031
2095
  function getLocalizedContent(relativePath, lang) {
2032
2096
  if (lang !== "zh") {
@@ -7,4 +7,4 @@ argument-hint: report context
7
7
  Use the installed `lab` skill at `.claude/skills/lab/SKILL.md`.
8
8
 
9
9
  Execute the requested `/lab-report` command against the user's argument now. Do not only recommend another lab stage. If a blocking prerequisite is missing, say exactly what is missing and ask at most one clarifying question.
10
- This command runs the `report` stage of the lab workflow. Follow the installed skill, stage guide, and the project assets under `.lab/`.
10
+ This command runs the `report` stage of the lab workflow. It must produce a user-facing final report plus the managed `main-tables.md` artifact, explicitly carry the approved primary and secondary metrics forward, explain the selected metrics in plain language, say which metrics are only health or support metrics, and explain what each main table proves or does not prove.
@@ -6,4 +6,4 @@ argument-hint: report context
6
6
  Use the installed `lab` skill at `.codex/skills/lab/SKILL.md`.
7
7
 
8
8
  Execute the requested `/lab:report` stage against the user's argument now. Do not only recommend another lab stage. If a blocking prerequisite is missing, say exactly what is missing and ask at most one clarifying question.
9
- This command runs the `/lab:report` stage. Follow the installed skill, stage guide, and the project assets under `.lab/`.
9
+ This command runs the `/lab:report` stage. It must produce a user-facing final report plus the managed `main-tables.md` artifact, explicitly carry the approved primary and secondary metrics forward, explain the selected metrics in plain language, say which metrics are only health or support metrics, and explain what each main table proves or does not prove.
@@ -1,8 +1,11 @@
1
1
  # Final Report
2
2
 
3
- ## Overview
3
+ ## Reader Summary
4
4
 
5
- Summarize the method and overall outcome.
5
+ - One-sentence conclusion:
6
+ - What is validated:
7
+ - What is still unproven:
8
+ - Biggest reporting risk:
6
9
 
7
10
  ## Selected Metrics
8
11
 
@@ -10,6 +13,29 @@ Summarize the method and overall outcome.
10
13
  - Secondary metrics:
11
14
  - Required terminal evidence:
12
15
 
16
+ ## Metric Guide
17
+
18
+ - Primary metric plain-language explanation:
19
+ - Secondary metric plain-language explanation:
20
+ - Health or support metrics and why they are not the main claim:
21
+
22
+ ## Background Sources
23
+
24
+ - Most important background papers or benchmark references:
25
+ - Why these are the right background anchors:
26
+
27
+ ## Method and Baseline Sources
28
+
29
+ - Our method source or implementation basis:
30
+ - Baseline and comparison source papers:
31
+ - Baseline and comparison implementation sources:
32
+
33
+ ## Metric Sources
34
+
35
+ - Metric source papers:
36
+ - Metric implementation source:
37
+ - Deviation from original implementation:
38
+
13
39
  ## Experiment Setup
14
40
 
15
41
  - Datasets:
@@ -17,6 +43,13 @@ Summarize the method and overall outcome.
17
43
  - Baselines:
18
44
  - Metrics:
19
45
 
46
+ ## How to Read the Main Tables
47
+
48
+ - Table 1 is for:
49
+ - Table 2 is for:
50
+ - Table 3 is for:
51
+ - Table 4 is for:
52
+
20
53
  ## Main Tables
21
54
 
22
55
  - Managed main tables artifact: `<deliverables_root>/main-tables.md`
@@ -1,17 +1,36 @@
1
1
  # Main Tables
2
2
 
3
+ ## Reader Summary
4
+
5
+ - User-facing takeaway:
6
+ - What the tables prove:
7
+ - What the tables do not yet prove:
8
+
3
9
  ## Selected Metrics
4
10
 
5
11
  - Primary metrics:
6
12
  - Secondary metrics:
7
13
  - Required terminal evidence:
8
14
 
15
+ ## Metric Guide
16
+
17
+ - Primary metric plain-language explanation:
18
+ - Secondary metric plain-language explanation:
19
+ - Health or support metrics and how to read them:
20
+
9
21
  ## Final Performance Summary
10
22
 
11
23
  - Main result summary:
12
24
  - Most important numbers:
13
25
  - Reporting caveat:
14
26
 
27
+ ## How to Read These Tables
28
+
29
+ - Table 1 is for:
30
+ - Table 2 is for:
31
+ - Table 3 is for:
32
+ - Table 4 is for:
33
+
15
34
  ## Table 1
16
35
 
17
36
  - Purpose:
@@ -17,6 +17,9 @@ Use this file to define the paper-facing evaluation objective, table plan, gates
17
17
  ## Metric Glossary
18
18
 
19
19
  - Metric glossary:
20
+ - Background sources:
21
+ - Method and baseline source papers:
22
+ - Method and baseline implementation source:
20
23
  - Metric source papers:
21
24
  - Metric implementation source:
22
25
  - Comparison source papers:
@@ -108,6 +108,7 @@
108
108
  - Then ask at most one clarifying question if a blocking field is still missing.
109
109
  - If `.lab/config/workflow.json` sets the workflow language to Chinese, write summaries, options, checklist items, task labels, and progress updates in Chinese unless a file path, code identifier, or literal metric name must remain unchanged.
110
110
  - When the loop reaches `report`, apply the same workflow-language rule to `report.md` and the managed `main-tables.md` artifact.
111
+ - When the loop reaches `report`, proactively deliver a user-facing plain-language summary of the selected metrics, what they mean, what the tables prove, and what remains unproven. Do not wait for a separate user request asking for interpretation.
111
112
  - When the loop is about to enter `write` and `paper_template_root` is empty:
112
113
  - if `paper_template_decision` is `unconfirmed`, ask one explicit question: continue with the default scaffold or attach a template directory first
113
114
  - if the user chooses the default scaffold, persist `paper_template_decision: default-scaffold`
@@ -2,11 +2,17 @@
2
2
 
3
3
  ## Required Output
4
4
 
5
+ - reader summary for the user
5
6
  - method overview
6
7
  - selected metrics summary
8
+ - plain-language metric guide
9
+ - background sources
10
+ - method and baseline sources
11
+ - metric sources
7
12
  - experiment setup
8
13
  - validated main results
9
14
  - managed main tables artifact under `<deliverables_root>/main-tables.md`
15
+ - how-to-read-the-tables guide
10
16
  - ablations
11
17
  - failed attempts
12
18
  - limitations
@@ -34,6 +40,10 @@
34
40
  - Structure tables, gates, and main claims against the approved evaluation protocol.
35
41
  - Do not restate metric definitions, baseline behavior, or comparison implementations from memory; use the approved evaluation protocol and its recorded sources.
36
42
  - Carry the approved `Primary metrics`, `Secondary metrics`, and `Required terminal evidence` into both the report and the managed main-tables artifact.
43
+ - Explain the selected primary and secondary metrics in plain language for the user: what each metric measures, whether higher or lower is better, and whether it is a main result metric or only a health/support metric.
44
+ - If coverage, completeness, confidence, or similar health metrics appear, explicitly say that they describe experimental reliability rather than the main scientific effect.
45
+ - Pull the core background references, method or baseline references, and metric references out of the approved evaluation protocol instead of hiding them in `.lab/context/*`.
46
+ - Report only the few references a collaborator needs to orient themselves quickly; do not turn `report.md` into a full bibliography dump.
37
47
  - If the report depends on a deviation from an original metric or implementation, state that deviation explicitly instead of smoothing it over.
38
48
  - If `.lab/config/workflow.json` sets the workflow language to Chinese, write `report.md` and `<deliverables_root>/main-tables.md` in Chinese unless a file path, code identifier, or literal metric name must remain unchanged.
39
49
  - Prefer conservative interpretation over marketing language.
@@ -42,6 +52,8 @@
42
52
  ## Interaction Contract
43
53
 
44
54
  - Start with a concise summary of the campaign outcome, the selected primary and secondary metrics, the strongest supported claim, and the biggest reporting risk.
55
+ - Proactively deliver a user-readable plain-language summary when the stage is reached from `/lab:auto`; do not wait for a separate follow-up request asking what the metrics or tables mean.
56
+ - Treat `report.md` as a user-facing artifact rather than an internal dump. Prefer plain-language explanations before jargon, and explain each metric the first time it matters.
45
57
  - If a missing assumption would change report interpretation, ask one clarifying question at a time.
46
58
  - If there are multiple defensible report framings, present 2-3 approaches with trade-offs and recommend the most evidence-faithful framing before writing.
47
59
  - Keep an approval gate when the reporting frame would materially affect what the paper later claims.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "superlab",
3
- "version": "0.1.16",
3
+ "version": "0.1.18",
4
4
  "description": "Strict /lab research workflow installer for Codex and Claude",
5
5
  "keywords": [
6
6
  "codex",