superlab 0.1.17 → 0.1.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -270,6 +270,9 @@ async function startAutoMode({ targetDir, now = new Date() }) {
270
270
  requiredTerminalEvidence: evalProtocol.requiredTerminalEvidence,
271
271
  experimentLadder: evalProtocol.experimentLadder,
272
272
  metricGlossary: evalProtocol.metricGlossary,
273
+ backgroundSources: evalProtocol.backgroundSources,
274
+ methodAndBaselineSourcePapers: evalProtocol.methodAndBaselineSourcePapers,
275
+ methodAndBaselineImplementationSource: evalProtocol.methodAndBaselineImplementationSource,
273
276
  metricSourcePapers: evalProtocol.metricSourcePapers,
274
277
  metricImplementationSource: evalProtocol.metricImplementationSource,
275
278
  comparisonSourcePapers: evalProtocol.comparisonSourcePapers,
@@ -755,6 +758,9 @@ function stopAutoMode({ targetDir, now = new Date() }) {
755
758
  requiredTerminalEvidence: evalProtocol.requiredTerminalEvidence,
756
759
  experimentLadder: evalProtocol.experimentLadder,
757
760
  metricGlossary: evalProtocol.metricGlossary,
761
+ backgroundSources: evalProtocol.backgroundSources,
762
+ methodAndBaselineSourcePapers: evalProtocol.methodAndBaselineSourcePapers,
763
+ methodAndBaselineImplementationSource: evalProtocol.methodAndBaselineImplementationSource,
758
764
  metricSourcePapers: evalProtocol.metricSourcePapers,
759
765
  metricImplementationSource: evalProtocol.metricImplementationSource,
760
766
  comparisonSourcePapers: evalProtocol.comparisonSourcePapers,
@@ -146,6 +146,9 @@ function renderAutoOutcome(outcome, { lang = "en" } = {}) {
146
146
  - 必要终局证据: ${outcome.requiredTerminalEvidence || ""}
147
147
  - 实验阶梯: ${outcome.experimentLadder || ""}
148
148
  - 指标释义: ${outcome.metricGlossary || ""}
149
+ - 背景来源: ${outcome.backgroundSources || ""}
150
+ - 方法与基线来源论文: ${outcome.methodAndBaselineSourcePapers || ""}
151
+ - 方法与基线实现来源: ${outcome.methodAndBaselineImplementationSource || ""}
149
152
  - 指标来源论文: ${outcome.metricSourcePapers || ""}
150
153
  - 指标实现来源: ${outcome.metricImplementationSource || ""}
151
154
  - 对比方法来源论文: ${outcome.comparisonSourcePapers || ""}
@@ -180,6 +183,9 @@ function renderAutoOutcome(outcome, { lang = "en" } = {}) {
180
183
  - Required terminal evidence: ${outcome.requiredTerminalEvidence || ""}
181
184
  - Experiment ladder: ${outcome.experimentLadder || ""}
182
185
  - Metric glossary: ${outcome.metricGlossary || ""}
186
+ - Background sources: ${outcome.backgroundSources || ""}
187
+ - Method and baseline source papers: ${outcome.methodAndBaselineSourcePapers || ""}
188
+ - Method and baseline implementation source: ${outcome.methodAndBaselineImplementationSource || ""}
183
189
  - Metric source papers: ${outcome.metricSourcePapers || ""}
184
190
  - Metric implementation source: ${outcome.metricImplementationSource || ""}
185
191
  - Comparison source papers: ${outcome.comparisonSourcePapers || ""}
package/lib/context.cjs CHANGED
@@ -101,6 +101,9 @@ function renderSummary(lang, data) {
101
101
  - Required terminal evidence: ${data.evalRequiredTerminalEvidence || "待补充"}
102
102
  - Table plan: ${data.evalTablePlan || "待补充"}
103
103
  - Metric glossary: ${data.evalMetricGlossary || "待补充"}
104
+ - Background sources: ${data.evalBackgroundSources || "待补充"}
105
+ - Method and baseline source papers: ${data.evalMethodAndBaselineSourcePapers || "待补充"}
106
+ - Method and baseline implementation source: ${data.evalMethodAndBaselineImplementationSource || "待补充"}
104
107
  - Metric source papers: ${data.evalMetricSourcePapers || "待补充"}
105
108
  - Metric implementation source: ${data.evalMetricImplementationSource || "待补充"}
106
109
  - Comparison source papers: ${data.evalComparisonSourcePapers || "待补充"}
@@ -156,6 +159,9 @@ function renderSummary(lang, data) {
156
159
  - Required terminal evidence: ${data.evalRequiredTerminalEvidence || "TBD"}
157
160
  - Table plan: ${data.evalTablePlan || "TBD"}
158
161
  - Metric glossary: ${data.evalMetricGlossary || "TBD"}
162
+ - Background sources: ${data.evalBackgroundSources || "TBD"}
163
+ - Method and baseline source papers: ${data.evalMethodAndBaselineSourcePapers || "TBD"}
164
+ - Method and baseline implementation source: ${data.evalMethodAndBaselineImplementationSource || "TBD"}
159
165
  - Metric source papers: ${data.evalMetricSourcePapers || "TBD"}
160
166
  - Metric implementation source: ${data.evalMetricImplementationSource || "TBD"}
161
167
  - Comparison source papers: ${data.evalComparisonSourcePapers || "TBD"}
@@ -266,6 +272,9 @@ ${data.problem || "待补充"}
266
272
  - Required terminal evidence: ${data.evalRequiredTerminalEvidence || "待补充"}
267
273
  - Table plan: ${data.evalTablePlan || "待补充"}
268
274
  - Metric glossary: ${data.evalMetricGlossary || "待补充"}
275
+ - Background sources: ${data.evalBackgroundSources || "待补充"}
276
+ - Method and baseline source papers: ${data.evalMethodAndBaselineSourcePapers || "待补充"}
277
+ - Method and baseline implementation source: ${data.evalMethodAndBaselineImplementationSource || "待补充"}
269
278
  - Metric source papers: ${data.evalMetricSourcePapers || "待补充"}
270
279
  - Metric implementation source: ${data.evalMetricImplementationSource || "待补充"}
271
280
  - Comparison source papers: ${data.evalComparisonSourcePapers || "待补充"}
@@ -332,6 +341,9 @@ ${data.problem || "TBD"}
332
341
  - Required terminal evidence: ${data.evalRequiredTerminalEvidence || "TBD"}
333
342
  - Table plan: ${data.evalTablePlan || "TBD"}
334
343
  - Metric glossary: ${data.evalMetricGlossary || "TBD"}
344
+ - Background sources: ${data.evalBackgroundSources || "TBD"}
345
+ - Method and baseline source papers: ${data.evalMethodAndBaselineSourcePapers || "TBD"}
346
+ - Method and baseline implementation source: ${data.evalMethodAndBaselineImplementationSource || "TBD"}
335
347
  - Metric source papers: ${data.evalMetricSourcePapers || "TBD"}
336
348
  - Metric implementation source: ${data.evalMetricImplementationSource || "TBD"}
337
349
  - Comparison source papers: ${data.evalComparisonSourcePapers || "TBD"}
@@ -586,6 +598,9 @@ function buildContextSnapshot(targetDir) {
586
598
  evalRequiredTerminalEvidence: evalProtocol.requiredTerminalEvidence,
587
599
  evalTablePlan: evalProtocol.tablePlan,
588
600
  evalMetricGlossary: evalProtocol.metricGlossary,
601
+ evalBackgroundSources: evalProtocol.backgroundSources,
602
+ evalMethodAndBaselineSourcePapers: evalProtocol.methodAndBaselineSourcePapers,
603
+ evalMethodAndBaselineImplementationSource: evalProtocol.methodAndBaselineImplementationSource,
589
604
  evalMetricSourcePapers: evalProtocol.metricSourcePapers,
590
605
  evalMetricImplementationSource: evalProtocol.metricImplementationSource,
591
606
  evalComparisonSourcePapers: evalProtocol.comparisonSourcePapers,
@@ -39,6 +39,21 @@ const EVAL_PROTOCOL_FIELDS = [
39
39
  key: "metricGlossary",
40
40
  labels: ["Metric glossary", "指标释义"],
41
41
  },
42
+ {
43
+ name: "Background sources",
44
+ key: "backgroundSources",
45
+ labels: ["Background sources", "背景来源"],
46
+ },
47
+ {
48
+ name: "Method and baseline source papers",
49
+ key: "methodAndBaselineSourcePapers",
50
+ labels: ["Method and baseline source papers", "方法与基线来源论文"],
51
+ },
52
+ {
53
+ name: "Method and baseline implementation source",
54
+ key: "methodAndBaselineImplementationSource",
55
+ labels: ["Method and baseline implementation source", "方法与基线实现来源"],
56
+ },
42
57
  {
43
58
  name: "Metric source papers",
44
59
  key: "metricSourcePapers",
package/lib/i18n.cjs CHANGED
@@ -710,6 +710,23 @@ const ZH_SKILL_FILES = {
710
710
  - 次级指标在衡量什么:
711
711
  - 健康度/支持性指标在衡量什么,为什么它们不是主结论:
712
712
 
713
+ ## 背景来源
714
+
715
+ - 最关键的背景论文或 benchmark 参考:
716
+ - 为什么这些来源足以锚定当前问题:
717
+
718
+ ## 方法与基线来源
719
+
720
+ - 我们的方法来源或实现基础:
721
+ - baseline 与 comparison 的来源论文:
722
+ - baseline 与 comparison 的实现来源:
723
+
724
+ ## 指标来源
725
+
726
+ - 指标来源论文:
727
+ - 指标实现来源:
728
+ - 与原始实现的偏差:
729
+
713
730
  ## 怎么看主表
714
731
 
715
732
  - Table 1 负责回答什么:
@@ -1949,6 +1966,9 @@ ZH_CONTENT[path.join(".lab", "context", "eval-protocol.md")] = `# 评估协议
1949
1966
  ## 指标释义
1950
1967
 
1951
1968
  - 指标释义:
1969
+ - 背景来源:
1970
+ - 方法与基线来源论文:
1971
+ - 方法与基线实现来源:
1952
1972
  - 指标来源论文:
1953
1973
  - 指标实现来源:
1954
1974
  - 对比方法来源论文:
@@ -19,6 +19,23 @@
19
19
  - Secondary metric plain-language explanation:
20
20
  - Health or support metrics and why they are not the main claim:
21
21
 
22
+ ## Background Sources
23
+
24
+ - Most important background papers or benchmark references:
25
+ - Why these are the right background anchors:
26
+
27
+ ## Method and Baseline Sources
28
+
29
+ - Our method source or implementation basis:
30
+ - Baseline and comparison source papers:
31
+ - Baseline and comparison implementation sources:
32
+
33
+ ## Metric Sources
34
+
35
+ - Metric source papers:
36
+ - Metric implementation source:
37
+ - Deviation from original implementation:
38
+
22
39
  ## Experiment Setup
23
40
 
24
41
  - Datasets:
@@ -17,6 +17,9 @@ Use this file to define the paper-facing evaluation objective, table plan, gates
17
17
  ## Metric Glossary
18
18
 
19
19
  - Metric glossary:
20
+ - Background sources:
21
+ - Method and baseline source papers:
22
+ - Method and baseline implementation source:
20
23
  - Metric source papers:
21
24
  - Metric implementation source:
22
25
  - Comparison source papers:
@@ -6,6 +6,9 @@
6
6
  - method overview
7
7
  - selected metrics summary
8
8
  - plain-language metric guide
9
+ - background sources
10
+ - method and baseline sources
11
+ - metric sources
9
12
  - experiment setup
10
13
  - validated main results
11
14
  - managed main tables artifact under `<deliverables_root>/main-tables.md`
@@ -39,6 +42,8 @@
39
42
  - Carry the approved `Primary metrics`, `Secondary metrics`, and `Required terminal evidence` into both the report and the managed main-tables artifact.
40
43
  - Explain the selected primary and secondary metrics in plain language for the user: what each metric measures, whether higher or lower is better, and whether it is a main result metric or only a health/support metric.
41
44
  - If coverage, completeness, confidence, or similar health metrics appear, explicitly say that they describe experimental reliability rather than the main scientific effect.
45
+ - Pull the core background references, method or baseline references, and metric references out of the approved evaluation protocol instead of hiding them in `.lab/context/*`.
46
+ - Report only the few references a collaborator needs to orient themselves quickly; do not turn `report.md` into a full bibliography dump.
42
47
  - If the report depends on a deviation from an original metric or implementation, state that deviation explicitly instead of smoothing it over.
43
48
  - If `.lab/config/workflow.json` sets the workflow language to Chinese, write `report.md` and `<deliverables_root>/main-tables.md` in Chinese unless a file path, code identifier, or literal metric name must remain unchanged.
44
49
  - Prefer conservative interpretation over marketing language.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "superlab",
3
- "version": "0.1.17",
3
+ "version": "0.1.18",
4
4
  "description": "Strict /lab research workflow installer for Codex and Claude",
5
5
  "keywords": [
6
6
  "codex",