superlab 0.1.19 → 0.1.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/superlab.cjs CHANGED
File without changes
package/lib/context.cjs CHANGED
@@ -59,6 +59,11 @@ const REPORT_FIELDS = {
59
59
  finalPerformanceSummary: ["Final performance summary", "最终表现总结"],
60
60
  tableCoverage: ["Table coverage", "表格覆盖范围"],
61
61
  };
62
+ const TERMINOLOGY_FIELDS = {
63
+ methodName: ["Method name", "方法名"],
64
+ shortName: ["Short name or acronym", "简称或缩写"],
65
+ contributionBullets: ["Contribution bullets", "贡献 bullets", "Contribution bullets:"],
66
+ };
62
67
 
63
68
  function contextFile(targetDir, name) {
64
69
  return path.join(targetDir, ".lab", "context", name);
@@ -686,6 +691,8 @@ function renderSummary(lang, data) {
686
691
  - Collaborator report mode: ${data.reportMode || "待补充"}
687
692
  - Canonical context readiness: ${data.reportReadiness || "待补充"}
688
693
  - Why this report mode is active: ${data.reportModeReason || "待补充"}
694
+ - Method name: ${data.methodName || "待补充"}
695
+ - Contribution bullets: ${data.contributionBullets || "待补充"}
689
696
  - Eval objective: ${data.evalObjective || "待补充"}
690
697
  - Primary metrics: ${data.evalPrimaryMetrics || "待补充"}
691
698
  - Secondary metrics: ${data.evalSecondaryMetrics || "待补充"}
@@ -747,6 +754,8 @@ function renderSummary(lang, data) {
747
754
  - Collaborator report mode: ${data.reportMode || "TBD"}
748
755
  - Canonical context readiness: ${data.reportReadiness || "TBD"}
749
756
  - Why this report mode is active: ${data.reportModeReason || "TBD"}
757
+ - Method name: ${data.methodName || "TBD"}
758
+ - Contribution bullets: ${data.contributionBullets || "TBD"}
750
759
  - Eval objective: ${data.evalObjective || "TBD"}
751
760
  - Primary metrics: ${data.evalPrimaryMetrics || "TBD"}
752
761
  - Secondary metrics: ${data.evalSecondaryMetrics || "TBD"}
@@ -863,6 +872,8 @@ ${data.problem || "待补充"}
863
872
  - Collaborator report mode: ${data.reportMode || "待补充"}
864
873
  - Canonical context readiness: ${data.reportReadiness || "待补充"}
865
874
  - Why this report mode is active: ${data.reportModeReason || "待补充"}
875
+ - Method name: ${data.methodName || "待补充"}
876
+ - Contribution bullets: ${data.contributionBullets || "待补充"}
866
877
  - Eval objective: ${data.evalObjective || "待补充"}
867
878
  - Primary metrics: ${data.evalPrimaryMetrics || "待补充"}
868
879
  - Secondary metrics: ${data.evalSecondaryMetrics || "待补充"}
@@ -935,6 +946,8 @@ ${data.problem || "TBD"}
935
946
  - Collaborator report mode: ${data.reportMode || "TBD"}
936
947
  - Canonical context readiness: ${data.reportReadiness || "TBD"}
937
948
  - Why this report mode is active: ${data.reportModeReason || "TBD"}
949
+ - Method name: ${data.methodName || "TBD"}
950
+ - Contribution bullets: ${data.contributionBullets || "TBD"}
938
951
  - Eval objective: ${data.evalObjective || "TBD"}
939
952
  - Primary metrics: ${data.evalPrimaryMetrics || "TBD"}
940
953
  - Secondary metrics: ${data.evalSecondaryMetrics || "TBD"}
@@ -987,6 +1000,7 @@ function buildContextSnapshot(targetDir) {
987
1000
  const evidence = readFileIfExists(contextFile(targetDir, "evidence-index.md"));
988
1001
  const questions = readFileIfExists(contextFile(targetDir, "open-questions.md"));
989
1002
  const dataDecisions = readFileIfExists(contextFile(targetDir, "data-decisions.md"));
1003
+ const terminologyLock = readFileIfExists(contextFile(targetDir, "terminology-lock.md"));
990
1004
  const autoMode = readFileIfExists(contextFile(targetDir, "auto-mode.md"));
991
1005
  const autoStatus = readFileIfExists(contextFile(targetDir, "auto-status.md"));
992
1006
  const autoOutcome = readFileIfExists(contextFile(targetDir, "auto-outcome.md"));
@@ -1196,6 +1210,8 @@ function buildContextSnapshot(targetDir) {
1196
1210
  reportMode: reportStatus.mode,
1197
1211
  reportReadiness: reportStatus.readiness,
1198
1212
  reportModeReason: reportStatus.reason,
1213
+ methodName: extractValue(terminologyLock, TERMINOLOGY_FIELDS.methodName),
1214
+ contributionBullets: extractValue(terminologyLock, TERMINOLOGY_FIELDS.contributionBullets),
1199
1215
  evalObjective: evalProtocol.primaryEvaluationObjective,
1200
1216
  evalPrimaryMetrics: evalProtocol.primaryMetrics,
1201
1217
  evalSecondaryMetrics: evalProtocol.secondaryMetrics,
package/lib/i18n.cjs CHANGED
@@ -293,6 +293,7 @@ const ZH_SKILL_FILES = {
293
293
  - 给用户看的总结
294
294
  - 问题与背景的白话说明
295
295
  - 数据集场景说明
296
+ - 贡献总结
296
297
  - 方法概述
297
298
  - 选定指标摘要
298
299
  - 指标白话释义
@@ -330,6 +331,10 @@ const ZH_SKILL_FILES = {
330
331
  - 必须把已批准的主指标、次级指标和必要终局证据明确写进 \`report.md\` 与受管的 \`main-tables.md\`。
331
332
  - 必须用白话解释选定的主指标和次级指标:每个指标在衡量什么、越高还是越低更好、它是主结果指标还是健康度/支持性指标。
332
333
  - 如果出现 coverage、completeness、confidence 或类似健康度指标,必须明确说明这类指标回答的是“实验是否跑稳、证据是否完整”,而不是主要科学效应本身。
334
+ - 要把最关键的背景来源、方法/基线来源和指标来源直接写进报告,不要把它们藏在 \`.lab/context/*\` 里。
335
+ - 如果 \`.lab/context/terminology-lock.md\` 里已经冻结了方法名和 contribution bullets,就必须把它们带进报告。
336
+ - 方法概述必须用协作者能读懂的话说明:我们的方法大致怎么做、相对 closest prior work 或 strongest baseline 改了什么、这些 prior 方法各自做了什么,以及它们为什么在当前 claim 下仍然不够。
337
+ - 只保留少量最关键的 prior work/baseline 锚点;每个锚点都要用一句话交代它做了什么和它的局限。
333
338
  - 在起草报告前,先检查 \`.lab/context/mission.md\` 和 \`.lab/context/eval-protocol.md\` 是否仍是模板空壳。
334
339
  - 如果 canonical context 还是空壳,要先根据 frozen result artifacts、data-decisions、evidence-index 和已批准上下文回填“最小可信版本”,再写报告。
335
340
  - 如果回填后仍缺少协作者可读所需的关键字段,就必须把输出降级成 \`artifact-anchored interim report\`,不能冒充最终协作者报告。
@@ -344,6 +349,7 @@ const ZH_SKILL_FILES = {
344
349
  - 开始前先简洁说明:campaign outcome、选定的主指标和次级指标、最强已支撑 claim、最大的报告风险。
345
350
  - 当该阶段由 \`/lab:auto\` 进入时,要主动给出用户可读的白话总结,不要等用户再追问“这些指标是什么意思”或“这些表怎么看”。
346
351
  - 把 \`report.md\` 当作给用户看的工件,而不是内部 dump。术语第一次出现时就解释;先讲结论,再讲术语。
352
+ - 把 contribution bullets 当作协作者可读的最终主张摘要,而不是内部 TODO;每条都必须和当前证据边界对齐。
347
353
  - 如果某个未决前提会改变报告解释,一次只问一个问题。
348
354
  - 如果存在多种报告 framing,先给 2-3 个方案、trade-offs 和推荐项,优先最忠于证据的 framing。
349
355
  - 如果某种 framing 会实质影响后续论文 claim,要保留 approval gate。
@@ -726,6 +732,21 @@ const ZH_SKILL_FILES = {
726
732
  - 数据集或 benchmark 2 代表什么真实场景:
727
733
  - 数据集或 benchmark 3 代表什么真实场景:
728
734
 
735
+ ## 贡献总结
736
+
737
+ - Contribution bullets:
738
+ - 当前证据最强的贡献:
739
+ - 仍需要更强证据的贡献:
740
+
741
+ ## 方法概述
742
+
743
+ - 已批准的方法名:
744
+ - 方法白话总结:
745
+ - 相比 prior work 这套方法改变了什么:
746
+ - 最相关的 prior work 或 baseline 锚点:
747
+ - 这些 prior 方法各自做了什么:
748
+ - 为什么这些 prior 方法在这里仍然不够:
749
+
729
750
  ## 选定指标
730
751
 
731
752
  - 主指标:
@@ -25,6 +25,21 @@
25
25
  - Dataset or benchmark 2 and what real-world setting it represents:
26
26
  - Dataset or benchmark 3 and what real-world setting it represents:
27
27
 
28
+ ## Contribution Summary
29
+
30
+ - Contribution bullets:
31
+ - Strongest supported contribution:
32
+ - Contributions that still need stronger evidence:
33
+
34
+ ## Method Overview
35
+
36
+ - Approved method name:
37
+ - Plain-language method summary:
38
+ - What this method changes relative to prior work:
39
+ - Most relevant prior work or baseline anchors:
40
+ - What those prior methods do:
41
+ - Why those prior methods are still insufficient here:
42
+
28
43
  ## Selected Metrics
29
44
 
30
45
  - Primary metrics:
@@ -6,6 +6,7 @@
6
6
  - reader summary for the user
7
7
  - problem and background in plain language
8
8
  - dataset scene notes in plain language
9
+ - contribution summary
9
10
  - method overview
10
11
  - selected metrics summary
11
12
  - plain-language metric guide
@@ -49,6 +50,9 @@
49
50
  - Explain the selected primary and secondary metrics in plain language for the user: what each metric measures, whether higher or lower is better, and whether it is a main result metric or only a health/support metric.
50
51
  - If coverage, completeness, confidence, or similar health metrics appear, explicitly say that they describe experimental reliability rather than the main scientific effect.
51
52
  - Pull the core background references, method or baseline references, and metric references out of the approved evaluation protocol instead of hiding them in `.lab/context/*`.
53
+ - Pull the approved method name and contribution bullets out of `.lab/context/terminology-lock.md` when that framing context exists; do not silently drop them from the collaborator-facing report.
54
+ - Explain the method overview in collaborator language: what the method roughly does, what changed relative to the closest prior work or strongest baseline, what those prior methods do, and why they remain insufficient for the approved claim.
55
+ - When citing prior work or baselines in the method overview, include only the few anchor references a collaborator needs, and summarize their role and limitation in one short line each.
52
56
  - Report only the few references a collaborator needs to orient themselves quickly; do not turn `report.md` into a full bibliography dump.
53
57
  - If the report depends on a deviation from an original metric or implementation, state that deviation explicitly instead of smoothing it over.
54
58
  - Before drafting the report, inspect `.lab/context/mission.md` and `.lab/context/eval-protocol.md` for skeletal template fields.
@@ -64,6 +68,7 @@
64
68
  - Start with a concise summary of the campaign outcome, the selected primary and secondary metrics, the strongest supported claim, and the biggest reporting risk.
65
69
  - Proactively deliver a user-readable plain-language summary when the stage is reached from `/lab:auto`; do not wait for a separate follow-up request asking what the metrics or tables mean.
66
70
  - Treat `report.md` as a user-facing artifact rather than an internal dump. Prefer plain-language explanations before jargon, and explain each metric the first time it matters.
71
+ - Treat contribution bullets as collaborator-facing claim summaries, not as internal TODOs; tie each one to the current evidence boundary.
67
72
  - If a missing assumption would change report interpretation, ask one clarifying question at a time.
68
73
  - If there are multiple defensible report framings, present 2-3 approaches with trade-offs and recommend the most evidence-faithful framing before writing.
69
74
  - Keep an approval gate when the reporting frame would materially affect what the paper later claims.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "superlab",
3
- "version": "0.1.19",
3
+ "version": "0.1.20",
4
4
  "description": "Strict /lab research workflow installer for Codex and Claude",
5
5
  "keywords": [
6
6
  "codex",