superlab 0.1.19 → 0.1.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/superlab.cjs
CHANGED
|
File without changes
|
package/lib/context.cjs
CHANGED
|
@@ -29,6 +29,35 @@ const EVAL_COLLABORATOR_FIELDS = [
|
|
|
29
29
|
{ name: "Metric source papers", labels: ["Metric source papers", "指标来源论文"] },
|
|
30
30
|
{ name: "Required output artifacts", labels: ["Required output artifacts", "必要输出工件"] },
|
|
31
31
|
];
|
|
32
|
+
const REPORT_REQUIRED_SECTIONS = [
|
|
33
|
+
{ name: "Report Status", patterns: [/^##\s+Report Status\s*$/m, /^##\s+报告状态\s*$/m] },
|
|
34
|
+
{ name: "Reader Summary", patterns: [/^##\s+Reader Summary\s*$/m, /^##\s+给用户看的总结\s*$/m] },
|
|
35
|
+
{ name: "Problem and Background", patterns: [/^##\s+Problem and Background\s*$/m, /^##\s+问题与背景\s*$/m] },
|
|
36
|
+
{ name: "Dataset Scene Notes", patterns: [/^##\s+Dataset Scene Notes\s*$/m, /^##\s+数据集场景说明\s*$/m] },
|
|
37
|
+
{ name: "Contribution Summary", patterns: [/^##\s+Contribution Summary\s*$/m, /^##\s+贡献总结\s*$/m] },
|
|
38
|
+
{ name: "Method Overview", patterns: [/^##\s+Method Overview\s*$/m, /^##\s+方法概述\s*$/m] },
|
|
39
|
+
{ name: "Selected Metrics", patterns: [/^##\s+Selected Metrics\s*$/m, /^##\s+选定指标\s*$/m] },
|
|
40
|
+
{ name: "Metric Guide", patterns: [/^##\s+Metric Guide\s*$/m, /^##\s+指标白话释义\s*$/m] },
|
|
41
|
+
{ name: "Background Sources", patterns: [/^##\s+Background Sources\s*$/m, /^##\s+背景来源\s*$/m] },
|
|
42
|
+
{
|
|
43
|
+
name: "Method and Baseline Sources",
|
|
44
|
+
patterns: [/^##\s+Method and Baseline Sources\s*$/m, /^##\s+方法与基线来源\s*$/m],
|
|
45
|
+
},
|
|
46
|
+
{ name: "Metric Sources", patterns: [/^##\s+Metric Sources\s*$/m, /^##\s+指标来源\s*$/m] },
|
|
47
|
+
];
|
|
48
|
+
const MAIN_TABLES_REQUIRED_SECTIONS = [
|
|
49
|
+
{ name: "Reader Summary", patterns: [/^##\s+Reader Summary\s*$/m, /^##\s+给用户看的总结\s*$/m] },
|
|
50
|
+
{ name: "Selected Metrics", patterns: [/^##\s+Selected Metrics\s*$/m, /^##\s+选定指标\s*$/m] },
|
|
51
|
+
{ name: "Metric Guide", patterns: [/^##\s+Metric Guide\s*$/m, /^##\s+指标白话释义\s*$/m] },
|
|
52
|
+
{
|
|
53
|
+
name: "Final Performance Summary",
|
|
54
|
+
patterns: [/^##\s+Final Performance Summary\s*$/m, /^##\s+最终表现摘要\s*$/m],
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
name: "How to Read These Tables",
|
|
58
|
+
patterns: [/^##\s+How to Read These Tables\s*$/m, /^##\s+怎么读这些表\s*$/m],
|
|
59
|
+
},
|
|
60
|
+
];
|
|
32
61
|
const REPORT_FIELDS = {
|
|
33
62
|
problem: ["Research problem in plain language", "研究问题白话解释", "研究问题"],
|
|
34
63
|
whyItMatters: ["Why this problem matters", "为什么这个问题重要"],
|
|
@@ -59,6 +88,11 @@ const REPORT_FIELDS = {
|
|
|
59
88
|
finalPerformanceSummary: ["Final performance summary", "最终表现总结"],
|
|
60
89
|
tableCoverage: ["Table coverage", "表格覆盖范围"],
|
|
61
90
|
};
|
|
91
|
+
const TERMINOLOGY_FIELDS = {
|
|
92
|
+
methodName: ["Method name", "方法名"],
|
|
93
|
+
shortName: ["Short name or acronym", "简称或缩写"],
|
|
94
|
+
contributionBullets: ["Contribution bullets", "贡献 bullets", "Contribution bullets:"],
|
|
95
|
+
};
|
|
62
96
|
|
|
63
97
|
function contextFile(targetDir, name) {
|
|
64
98
|
return path.join(targetDir, ".lab", "context", name);
|
|
@@ -186,6 +220,36 @@ function collaboratorEvalIssues(targetDir) {
|
|
|
186
220
|
: [];
|
|
187
221
|
}
|
|
188
222
|
|
|
223
|
+
function missingRequiredSections(text, sections) {
|
|
224
|
+
if (!text) {
|
|
225
|
+
return sections.map((section) => section.name);
|
|
226
|
+
}
|
|
227
|
+
return sections
|
|
228
|
+
.filter((section) => !section.patterns.some((pattern) => pattern.test(text)))
|
|
229
|
+
.map((section) => section.name);
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
function collaboratorReportIssues(targetDir) {
|
|
233
|
+
if (!hasCollaboratorFacingDeliverables(targetDir)) {
|
|
234
|
+
return [];
|
|
235
|
+
}
|
|
236
|
+
const { reportPath, mainTablesPath } = getCollaboratorDeliverablePaths(targetDir);
|
|
237
|
+
const issues = [];
|
|
238
|
+
if (fs.existsSync(reportPath)) {
|
|
239
|
+
const missing = missingRequiredSections(readFileIfExists(reportPath), REPORT_REQUIRED_SECTIONS);
|
|
240
|
+
if (missing.length > 0) {
|
|
241
|
+
issues.push(`report.md is missing required collaborator-facing sections: ${missing.join(", ")}`);
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
if (fs.existsSync(mainTablesPath)) {
|
|
245
|
+
const missing = missingRequiredSections(readFileIfExists(mainTablesPath), MAIN_TABLES_REQUIRED_SECTIONS);
|
|
246
|
+
if (missing.length > 0) {
|
|
247
|
+
issues.push(`main-tables.md is missing required collaborator-facing sections: ${missing.join(", ")}`);
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
return issues;
|
|
251
|
+
}
|
|
252
|
+
|
|
189
253
|
function extractReportValue(reportText, key) {
|
|
190
254
|
return extractValue(reportText, REPORT_FIELDS[key] || []);
|
|
191
255
|
}
|
|
@@ -606,7 +670,8 @@ function hydrateEvalProtocol(targetDir) {
|
|
|
606
670
|
function getCollaboratorReportStatus(targetDir) {
|
|
607
671
|
const missionIssues = collaboratorMissionIssues(targetDir);
|
|
608
672
|
const evalIssues = collaboratorEvalIssues(targetDir);
|
|
609
|
-
const
|
|
673
|
+
const reportIssues = collaboratorReportIssues(targetDir);
|
|
674
|
+
const issues = missionIssues.concat(evalIssues, reportIssues);
|
|
610
675
|
if (issues.length > 0) {
|
|
611
676
|
return {
|
|
612
677
|
mode: "artifact-anchored interim",
|
|
@@ -686,6 +751,8 @@ function renderSummary(lang, data) {
|
|
|
686
751
|
- Collaborator report mode: ${data.reportMode || "待补充"}
|
|
687
752
|
- Canonical context readiness: ${data.reportReadiness || "待补充"}
|
|
688
753
|
- Why this report mode is active: ${data.reportModeReason || "待补充"}
|
|
754
|
+
- Method name: ${data.methodName || "待补充"}
|
|
755
|
+
- Contribution bullets: ${data.contributionBullets || "待补充"}
|
|
689
756
|
- Eval objective: ${data.evalObjective || "待补充"}
|
|
690
757
|
- Primary metrics: ${data.evalPrimaryMetrics || "待补充"}
|
|
691
758
|
- Secondary metrics: ${data.evalSecondaryMetrics || "待补充"}
|
|
@@ -747,6 +814,8 @@ function renderSummary(lang, data) {
|
|
|
747
814
|
- Collaborator report mode: ${data.reportMode || "TBD"}
|
|
748
815
|
- Canonical context readiness: ${data.reportReadiness || "TBD"}
|
|
749
816
|
- Why this report mode is active: ${data.reportModeReason || "TBD"}
|
|
817
|
+
- Method name: ${data.methodName || "TBD"}
|
|
818
|
+
- Contribution bullets: ${data.contributionBullets || "TBD"}
|
|
750
819
|
- Eval objective: ${data.evalObjective || "TBD"}
|
|
751
820
|
- Primary metrics: ${data.evalPrimaryMetrics || "TBD"}
|
|
752
821
|
- Secondary metrics: ${data.evalSecondaryMetrics || "TBD"}
|
|
@@ -863,6 +932,8 @@ ${data.problem || "待补充"}
|
|
|
863
932
|
- Collaborator report mode: ${data.reportMode || "待补充"}
|
|
864
933
|
- Canonical context readiness: ${data.reportReadiness || "待补充"}
|
|
865
934
|
- Why this report mode is active: ${data.reportModeReason || "待补充"}
|
|
935
|
+
- Method name: ${data.methodName || "待补充"}
|
|
936
|
+
- Contribution bullets: ${data.contributionBullets || "待补充"}
|
|
866
937
|
- Eval objective: ${data.evalObjective || "待补充"}
|
|
867
938
|
- Primary metrics: ${data.evalPrimaryMetrics || "待补充"}
|
|
868
939
|
- Secondary metrics: ${data.evalSecondaryMetrics || "待补充"}
|
|
@@ -935,6 +1006,8 @@ ${data.problem || "TBD"}
|
|
|
935
1006
|
- Collaborator report mode: ${data.reportMode || "TBD"}
|
|
936
1007
|
- Canonical context readiness: ${data.reportReadiness || "TBD"}
|
|
937
1008
|
- Why this report mode is active: ${data.reportModeReason || "TBD"}
|
|
1009
|
+
- Method name: ${data.methodName || "TBD"}
|
|
1010
|
+
- Contribution bullets: ${data.contributionBullets || "TBD"}
|
|
938
1011
|
- Eval objective: ${data.evalObjective || "TBD"}
|
|
939
1012
|
- Primary metrics: ${data.evalPrimaryMetrics || "TBD"}
|
|
940
1013
|
- Secondary metrics: ${data.evalSecondaryMetrics || "TBD"}
|
|
@@ -987,6 +1060,7 @@ function buildContextSnapshot(targetDir) {
|
|
|
987
1060
|
const evidence = readFileIfExists(contextFile(targetDir, "evidence-index.md"));
|
|
988
1061
|
const questions = readFileIfExists(contextFile(targetDir, "open-questions.md"));
|
|
989
1062
|
const dataDecisions = readFileIfExists(contextFile(targetDir, "data-decisions.md"));
|
|
1063
|
+
const terminologyLock = readFileIfExists(contextFile(targetDir, "terminology-lock.md"));
|
|
990
1064
|
const autoMode = readFileIfExists(contextFile(targetDir, "auto-mode.md"));
|
|
991
1065
|
const autoStatus = readFileIfExists(contextFile(targetDir, "auto-status.md"));
|
|
992
1066
|
const autoOutcome = readFileIfExists(contextFile(targetDir, "auto-outcome.md"));
|
|
@@ -1196,6 +1270,8 @@ function buildContextSnapshot(targetDir) {
|
|
|
1196
1270
|
reportMode: reportStatus.mode,
|
|
1197
1271
|
reportReadiness: reportStatus.readiness,
|
|
1198
1272
|
reportModeReason: reportStatus.reason,
|
|
1273
|
+
methodName: extractValue(terminologyLock, TERMINOLOGY_FIELDS.methodName),
|
|
1274
|
+
contributionBullets: extractValue(terminologyLock, TERMINOLOGY_FIELDS.contributionBullets),
|
|
1199
1275
|
evalObjective: evalProtocol.primaryEvaluationObjective,
|
|
1200
1276
|
evalPrimaryMetrics: evalProtocol.primaryMetrics,
|
|
1201
1277
|
evalSecondaryMetrics: evalProtocol.secondaryMetrics,
|
|
@@ -1302,6 +1378,7 @@ module.exports = {
|
|
|
1302
1378
|
archiveContext,
|
|
1303
1379
|
collaboratorEvalIssues,
|
|
1304
1380
|
collaboratorMissionIssues,
|
|
1381
|
+
collaboratorReportIssues,
|
|
1305
1382
|
getCollaboratorReportStatus,
|
|
1306
1383
|
hasCollaboratorFacingDeliverables,
|
|
1307
1384
|
hydrateCanonicalContext,
|
package/lib/i18n.cjs
CHANGED
|
@@ -293,6 +293,7 @@ const ZH_SKILL_FILES = {
|
|
|
293
293
|
- 给用户看的总结
|
|
294
294
|
- 问题与背景的白话说明
|
|
295
295
|
- 数据集场景说明
|
|
296
|
+
- 贡献总结
|
|
296
297
|
- 方法概述
|
|
297
298
|
- 选定指标摘要
|
|
298
299
|
- 指标白话释义
|
|
@@ -330,9 +331,14 @@ const ZH_SKILL_FILES = {
|
|
|
330
331
|
- 必须把已批准的主指标、次级指标和必要终局证据明确写进 \`report.md\` 与受管的 \`main-tables.md\`。
|
|
331
332
|
- 必须用白话解释选定的主指标和次级指标:每个指标在衡量什么、越高还是越低更好、它是主结果指标还是健康度/支持性指标。
|
|
332
333
|
- 如果出现 coverage、completeness、confidence 或类似健康度指标,必须明确说明这类指标回答的是“实验是否跑稳、证据是否完整”,而不是主要科学效应本身。
|
|
334
|
+
- 要把最关键的背景来源、方法/基线来源和指标来源直接写进报告,不要把它们藏在 \`.lab/context/*\` 里。
|
|
335
|
+
- 如果 \`.lab/context/terminology-lock.md\` 里已经冻结了方法名和 contribution bullets,就必须把它们带进报告。
|
|
336
|
+
- 方法概述必须用协作者能读懂的话说明:我们的方法大致怎么做、相对 closest prior work 或 strongest baseline 改了什么、这些 prior 方法各自做了什么,以及它们为什么在当前 claim 下仍然不够。
|
|
337
|
+
- 只保留少量最关键的 prior work/baseline 锚点;每个锚点都要用一句话交代它做了什么和它的局限。
|
|
333
338
|
- 在起草报告前,先检查 \`.lab/context/mission.md\` 和 \`.lab/context/eval-protocol.md\` 是否仍是模板空壳。
|
|
334
339
|
- 如果 canonical context 还是空壳,要先根据 frozen result artifacts、data-decisions、evidence-index 和已批准上下文回填“最小可信版本”,再写报告。
|
|
335
340
|
- 如果回填后仍缺少协作者可读所需的关键字段,就必须把输出降级成 \`artifact-anchored interim report\`,不能冒充最终协作者报告。
|
|
341
|
+
- 如果现有的 \`report.md\` 或 \`main-tables.md\` 缺少受管模板要求的协作者可读章节,也必须视为报告缺陷;rerun 需要补齐这些缺失块,不能直接宣称“正文无变化”或把这次 rerun 当成 no-op。
|
|
336
342
|
- 如果报告依赖了对原始指标或原始实现的偏差,必须明确写出这个偏差。
|
|
337
343
|
- workflow 工件状态、rerun id 或 LaTeX 骨架状态不能混进“已验证主结果”;这些内容必须单列到工件状态部分。
|
|
338
344
|
- 如果 workflow language 是中文,\`report.md\` 和 \`<deliverables_root>/main-tables.md\` 也应使用中文,除非文件路径、代码标识符或字面指标名必须保持原样。
|
|
@@ -344,6 +350,7 @@ const ZH_SKILL_FILES = {
|
|
|
344
350
|
- 开始前先简洁说明:campaign outcome、选定的主指标和次级指标、最强已支撑 claim、最大的报告风险。
|
|
345
351
|
- 当该阶段由 \`/lab:auto\` 进入时,要主动给出用户可读的白话总结,不要等用户再追问“这些指标是什么意思”或“这些表怎么看”。
|
|
346
352
|
- 把 \`report.md\` 当作给用户看的工件,而不是内部 dump。术语第一次出现时就解释;先讲结论,再讲术语。
|
|
353
|
+
- 把 contribution bullets 当作协作者可读的最终主张摘要,而不是内部 TODO;每条都必须和当前证据边界对齐。
|
|
347
354
|
- 如果某个未决前提会改变报告解释,一次只问一个问题。
|
|
348
355
|
- 如果存在多种报告 framing,先给 2-3 个方案、trade-offs 和推荐项,优先最忠于证据的 framing。
|
|
349
356
|
- 如果某种 framing 会实质影响后续论文 claim,要保留 approval gate。
|
|
@@ -726,6 +733,21 @@ const ZH_SKILL_FILES = {
|
|
|
726
733
|
- 数据集或 benchmark 2 代表什么真实场景:
|
|
727
734
|
- 数据集或 benchmark 3 代表什么真实场景:
|
|
728
735
|
|
|
736
|
+
## 贡献总结
|
|
737
|
+
|
|
738
|
+
- Contribution bullets:
|
|
739
|
+
- 当前证据最强的贡献:
|
|
740
|
+
- 仍需要更强证据的贡献:
|
|
741
|
+
|
|
742
|
+
## 方法概述
|
|
743
|
+
|
|
744
|
+
- 已批准的方法名:
|
|
745
|
+
- 方法白话总结:
|
|
746
|
+
- 相比 prior work 这套方法改变了什么:
|
|
747
|
+
- 最相关的 prior work 或 baseline 锚点:
|
|
748
|
+
- 这些 prior 方法各自做了什么:
|
|
749
|
+
- 为什么这些 prior 方法在这里仍然不够:
|
|
750
|
+
|
|
729
751
|
## 选定指标
|
|
730
752
|
|
|
731
753
|
- 主指标:
|
|
@@ -25,6 +25,21 @@
|
|
|
25
25
|
- Dataset or benchmark 2 and what real-world setting it represents:
|
|
26
26
|
- Dataset or benchmark 3 and what real-world setting it represents:
|
|
27
27
|
|
|
28
|
+
## Contribution Summary
|
|
29
|
+
|
|
30
|
+
- Contribution bullets:
|
|
31
|
+
- Strongest supported contribution:
|
|
32
|
+
- Contributions that still need stronger evidence:
|
|
33
|
+
|
|
34
|
+
## Method Overview
|
|
35
|
+
|
|
36
|
+
- Approved method name:
|
|
37
|
+
- Plain-language method summary:
|
|
38
|
+
- What this method changes relative to prior work:
|
|
39
|
+
- Most relevant prior work or baseline anchors:
|
|
40
|
+
- What those prior methods do:
|
|
41
|
+
- Why those prior methods are still insufficient here:
|
|
42
|
+
|
|
28
43
|
## Selected Metrics
|
|
29
44
|
|
|
30
45
|
- Primary metrics:
|
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
- reader summary for the user
|
|
7
7
|
- problem and background in plain language
|
|
8
8
|
- dataset scene notes in plain language
|
|
9
|
+
- contribution summary
|
|
9
10
|
- method overview
|
|
10
11
|
- selected metrics summary
|
|
11
12
|
- plain-language metric guide
|
|
@@ -49,11 +50,15 @@
|
|
|
49
50
|
- Explain the selected primary and secondary metrics in plain language for the user: what each metric measures, whether higher or lower is better, and whether it is a main result metric or only a health/support metric.
|
|
50
51
|
- If coverage, completeness, confidence, or similar health metrics appear, explicitly say that they describe experimental reliability rather than the main scientific effect.
|
|
51
52
|
- Pull the core background references, method or baseline references, and metric references out of the approved evaluation protocol instead of hiding them in `.lab/context/*`.
|
|
53
|
+
- Pull the approved method name and contribution bullets out of `.lab/context/terminology-lock.md` when that framing context exists; do not silently drop them from the collaborator-facing report.
|
|
54
|
+
- Explain the method overview in collaborator language: what the method roughly does, what changed relative to the closest prior work or strongest baseline, what those prior methods do, and why they remain insufficient for the approved claim.
|
|
55
|
+
- When citing prior work or baselines in the method overview, include only the few anchor references a collaborator needs, and summarize their role and limitation in one short line each.
|
|
52
56
|
- Report only the few references a collaborator needs to orient themselves quickly; do not turn `report.md` into a full bibliography dump.
|
|
53
57
|
- If the report depends on a deviation from an original metric or implementation, state that deviation explicitly instead of smoothing it over.
|
|
54
58
|
- Before drafting the report, inspect `.lab/context/mission.md` and `.lab/context/eval-protocol.md` for skeletal template fields.
|
|
55
59
|
- If either canonical context file is still skeletal, hydrate the smallest trustworthy version from frozen result artifacts, dataset decisions, evidence-index, and prior approved context, and write that back before finalizing the report.
|
|
56
60
|
- If collaborator-critical fields still remain missing after hydration, downgrade the output to an `artifact-anchored interim report` instead of presenting it as a final collaborator-ready report.
|
|
61
|
+
- If the existing `report.md` or `main-tables.md` is missing required collaborator-facing sections from the managed templates, treat that as a report deficiency. A rerun must repair the missing sections instead of declaring "no content change" or treating the rerun as a no-op.
|
|
57
62
|
- Do not mix workflow deliverable status, rerun ids, or manuscript skeleton status into validated scientific findings; keep those in a separate artifact-status section.
|
|
58
63
|
- If `.lab/config/workflow.json` sets the workflow language to Chinese, write `report.md` and `<deliverables_root>/main-tables.md` in Chinese unless a file path, code identifier, or literal metric name must remain unchanged.
|
|
59
64
|
- Prefer conservative interpretation over marketing language.
|
|
@@ -64,6 +69,7 @@
|
|
|
64
69
|
- Start with a concise summary of the campaign outcome, the selected primary and secondary metrics, the strongest supported claim, and the biggest reporting risk.
|
|
65
70
|
- Proactively deliver a user-readable plain-language summary when the stage is reached from `/lab:auto`; do not wait for a separate follow-up request asking what the metrics or tables mean.
|
|
66
71
|
- Treat `report.md` as a user-facing artifact rather than an internal dump. Prefer plain-language explanations before jargon, and explain each metric the first time it matters.
|
|
72
|
+
- Treat contribution bullets as collaborator-facing claim summaries, not as internal TODOs; tie each one to the current evidence boundary.
|
|
67
73
|
- If a missing assumption would change report interpretation, ask one clarifying question at a time.
|
|
68
74
|
- If there are multiple defensible report framings, present 2-3 approaches with trade-offs and recommend the most evidence-faithful framing before writing.
|
|
69
75
|
- Keep an approval gate when the reporting frame would materially affect what the paper later claims.
|