superlab 0.1.14 → 0.1.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -13
- package/README.zh-CN.md +22 -13
- package/bin/superlab.cjs +38 -0
- package/lib/auto_contracts.cjs +7 -3
- package/lib/auto_runner.cjs +33 -52
- package/lib/auto_state.cjs +27 -21
- package/lib/context.cjs +15 -0
- package/lib/i18n.cjs +150 -78
- package/lib/install.cjs +14 -10
- package/package-assets/claude/commands/lab-auto.md +13 -0
- package/package-assets/claude/commands/lab-data.md +10 -0
- package/package-assets/claude/commands/lab-framing.md +10 -0
- package/package-assets/claude/commands/lab-idea.md +10 -0
- package/package-assets/claude/commands/lab-iterate.md +10 -0
- package/package-assets/claude/commands/lab-report.md +10 -0
- package/package-assets/claude/commands/lab-review.md +10 -0
- package/package-assets/claude/commands/lab-run.md +10 -0
- package/package-assets/claude/commands/lab-spec.md +10 -0
- package/package-assets/claude/commands/lab-write.md +10 -0
- package/package-assets/claude/commands/lab.md +32 -27
- package/package-assets/codex/prompts/lab-write.md +1 -1
- package/package-assets/codex/prompts/lab.md +1 -0
- package/package-assets/shared/lab/.managed/templates/final-report.md +12 -0
- package/package-assets/shared/lab/.managed/templates/main-tables.md +37 -0
- package/package-assets/shared/lab/config/workflow.json +3 -1
- package/package-assets/shared/lab/context/auto-outcome.md +3 -0
- package/package-assets/shared/skills/lab/SKILL.md +6 -2
- package/package-assets/shared/skills/lab/references/paper-writing/abstract.md +7 -1
- package/package-assets/shared/skills/lab/references/paper-writing/examples/abstract/template-a.md +21 -0
- package/package-assets/shared/skills/lab/references/paper-writing/examples/abstract/template-b.md +34 -0
- package/package-assets/shared/skills/lab/references/paper-writing/examples/abstract/template-c.md +28 -0
- package/package-assets/shared/skills/lab/references/paper-writing/examples/abstract-examples.md +13 -0
- package/package-assets/shared/skills/lab/references/paper-writing/examples/index.md +21 -0
- package/package-assets/shared/skills/lab/references/paper-writing/examples/introduction/novel-task-challenge-decomposition.md +18 -0
- package/package-assets/shared/skills/lab/references/paper-writing/examples/introduction/pipeline-not-recommended-abstract-only.md +30 -0
- package/package-assets/shared/skills/lab/references/paper-writing/examples/introduction/pipeline-version-1-one-contribution-multi-advantages.md +30 -0
- package/package-assets/shared/skills/lab/references/paper-writing/examples/introduction/pipeline-version-2-two-contributions.md +34 -0
- package/package-assets/shared/skills/lab/references/paper-writing/examples/introduction/pipeline-version-3-new-module-on-existing-pipeline.md +18 -0
- package/package-assets/shared/skills/lab/references/paper-writing/examples/introduction/pipeline-version-4-observation-driven.md +16 -0
- package/package-assets/shared/skills/lab/references/paper-writing/examples/introduction/technical-challenge-version-1-existing-task.md +32 -0
- package/package-assets/shared/skills/lab/references/paper-writing/examples/introduction/technical-challenge-version-2-existing-task-insight-backed-by-traditional.md +33 -0
- package/package-assets/shared/skills/lab/references/paper-writing/examples/introduction/technical-challenge-version-3-novel-task.md +21 -0
- package/package-assets/shared/skills/lab/references/paper-writing/examples/introduction/version-1-task-then-application.md +14 -0
- package/package-assets/shared/skills/lab/references/paper-writing/examples/introduction/version-2-application-first.md +10 -0
- package/package-assets/shared/skills/lab/references/paper-writing/examples/introduction/version-3-general-to-specific-setting.md +14 -0
- package/package-assets/shared/skills/lab/references/paper-writing/examples/introduction/version-4-open-with-challenge.md +20 -0
- package/package-assets/shared/skills/lab/references/paper-writing/examples/introduction-examples.md +25 -0
- package/package-assets/shared/skills/lab/references/paper-writing/examples/method/example-of-the-three-elements.md +67 -0
- package/package-assets/shared/skills/lab/references/paper-writing/examples/method/method-writing-common-issues-note.md +10 -0
- package/package-assets/shared/skills/lab/references/paper-writing/examples/method/module-design-instant-ngp.md +55 -0
- package/package-assets/shared/skills/lab/references/paper-writing/examples/method/module-motivation-patterns.md +15 -0
- package/package-assets/shared/skills/lab/references/paper-writing/examples/method/module-triad-neural-body.md +19 -0
- package/package-assets/shared/skills/lab/references/paper-writing/examples/method/neural-body-annotated-figure-text.md +66 -0
- package/package-assets/shared/skills/lab/references/paper-writing/examples/method/overview-template.md +30 -0
- package/package-assets/shared/skills/lab/references/paper-writing/examples/method/pre-writing-questions.md +17 -0
- package/package-assets/shared/skills/lab/references/paper-writing/examples/method/section-skeleton.md +9 -0
- package/package-assets/shared/skills/lab/references/paper-writing/examples/method-examples.md +24 -0
- package/package-assets/shared/skills/lab/references/paper-writing/introduction.md +7 -1
- package/package-assets/shared/skills/lab/references/paper-writing/method.md +6 -2
- package/package-assets/shared/skills/lab/references/paper-writing-integration.md +26 -0
- package/package-assets/shared/skills/lab/stages/auto.md +9 -1
- package/package-assets/shared/skills/lab/stages/report.md +5 -1
- package/package-assets/shared/skills/lab/stages/write.md +16 -1
- package/package.json +1 -1
- package/package-assets/claude/commands/lab/auto.md +0 -14
- package/package-assets/claude/commands/lab/data.md +0 -11
- package/package-assets/claude/commands/lab/framing.md +0 -11
- package/package-assets/claude/commands/lab/idea.md +0 -11
- package/package-assets/claude/commands/lab/iterate.md +0 -11
- package/package-assets/claude/commands/lab/report.md +0 -11
- package/package-assets/claude/commands/lab/review.md +0 -11
- package/package-assets/claude/commands/lab/run.md +0 -11
- package/package-assets/claude/commands/lab/spec.md +0 -11
- package/package-assets/claude/commands/lab/write.md +0 -11
package/README.md
CHANGED
|
@@ -45,7 +45,7 @@ This writes:
|
|
|
45
45
|
|
|
46
46
|
- `.codex/prompts/lab-*.md`
|
|
47
47
|
- `.codex/skills/lab/`
|
|
48
|
-
- `.claude/commands/lab
|
|
48
|
+
- `.claude/commands/lab*.md`
|
|
49
49
|
- `.claude/skills/lab/`
|
|
50
50
|
- `AGENTS.md`
|
|
51
51
|
- `CLAUDE.md`
|
|
@@ -179,7 +179,7 @@ superlab auto stop
|
|
|
179
179
|
|
|
180
180
|
- `run` and `iterate` must change persistent outputs under `results_root`
|
|
181
181
|
- `review` must update canonical review context
|
|
182
|
-
- `report` must write `<deliverables_root>/report.md`
|
|
182
|
+
- `report` must write `<deliverables_root>/report.md` and `<deliverables_root>/main-tables.md`
|
|
183
183
|
- `write` must produce LaTeX output under `<deliverables_root>/paper/`
|
|
184
184
|
- a successful promotion must write back into `.lab/context/data-decisions.md`, `.lab/context/decisions.md`, `.lab/context/state.md`, and `.lab/context/session-brief.md`
|
|
185
185
|
- every run must end with `.lab/context/auto-outcome.md`, including why it stopped, whether the terminal goal was reached, and which artifact is the final outcome
|
|
@@ -246,10 +246,15 @@ Stages should follow that file rather than guess language locally.
|
|
|
246
246
|
- Python 3.10+
|
|
247
247
|
- Git
|
|
248
248
|
|
|
249
|
-
`/lab:write` ships with vendored paper-writing references under the installed `lab` skill, so it does not depend on an extra runtime skill installation.
|
|
249
|
+
`/lab:write` ships with vendored paper-writing references and the upstream example bank for `abstract`, `introduction`, and `method` under the installed `lab` skill, so it does not depend on an extra runtime skill installation.
|
|
250
250
|
|
|
251
251
|
## Command Set
|
|
252
252
|
|
|
253
|
+
Codex and Claude use different slash-entry syntax:
|
|
254
|
+
|
|
255
|
+
- Codex: `/lab:idea`, `/lab:auto`, `/lab:write`
|
|
256
|
+
- Claude Code: `/lab idea ...` or `/lab-idea`; `/lab auto ...` or `/lab-auto`; `/lab write ...` or `/lab-write`
|
|
257
|
+
|
|
253
258
|
- `/lab:idea` researches an idea, critiques it, and writes the initial research framing.
|
|
254
259
|
- `/lab:data` turns the approved idea into a dataset and benchmark package with years, paper usage, source audit, download plans, explicit benchmark-role rationale for classic-public, recent-strong-public, and claim-specific benchmarks, and explicit comparison rationale for canonical baselines, strong historical baselines, recent strong public methods, and closest prior work.
|
|
255
260
|
- `/lab:framing` locks paper-facing method names, module names, titles, and contribution wording before drafting.
|
|
@@ -281,19 +286,20 @@ See the source command docs in [commands/codex/lab.md](/Users/zhouhao119/coding/
|
|
|
281
286
|
|
|
282
287
|
## Typical Flow
|
|
283
288
|
|
|
284
|
-
1.
|
|
285
|
-
2.
|
|
286
|
-
3.
|
|
287
|
-
4.
|
|
288
|
-
5.
|
|
289
|
-
6.
|
|
290
|
-
7.
|
|
291
|
-
8.
|
|
292
|
-
9.
|
|
289
|
+
1. In Codex, run `/lab:idea`; in Claude Code, run `/lab idea ...` or `/lab-idea`.
|
|
290
|
+
2. In Codex, run `/lab:data`; in Claude Code, run `/lab data ...` or `/lab-data`.
|
|
291
|
+
3. In Codex, run `/lab:spec`; in Claude Code, run `/lab spec ...` or `/lab-spec`.
|
|
292
|
+
4. In Codex, run `/lab:run`; in Claude Code, run `/lab run ...` or `/lab-run`.
|
|
293
|
+
5. In Codex, run `/lab:iterate`; in Claude Code, run `/lab iterate ...` or `/lab-iterate`.
|
|
294
|
+
6. In Codex, run `/lab:review`; in Claude Code, run `/lab review ...` or `/lab-review`.
|
|
295
|
+
7. In Codex, run `/lab:report`; in Claude Code, run `/lab report ...` or `/lab-report`.
|
|
296
|
+
8. In Codex, run `/lab:framing`; in Claude Code, run `/lab framing ...` or `/lab-framing`.
|
|
297
|
+
9. In Codex, run `/lab:write`; in Claude Code, run `/lab write ...` or `/lab-write`.
|
|
293
298
|
|
|
294
299
|
`/lab:write` writes final manuscript output under the configured `deliverables_root` (default: `docs/research`):
|
|
295
300
|
|
|
296
301
|
- `docs/research/report.md`
|
|
302
|
+
- `docs/research/main-tables.md`
|
|
297
303
|
- `docs/research/paper/main.tex`
|
|
298
304
|
- `docs/research/paper/sections/*.tex`
|
|
299
305
|
|
|
@@ -303,7 +309,10 @@ Internal writing-control artifacts stay under:
|
|
|
303
309
|
- `.lab/writing/plan.md`
|
|
304
310
|
- `.lab/writing/iterations/*.md`
|
|
305
311
|
|
|
306
|
-
If `paper_template_root` is configured, `/lab:write` should inspect that template directory first and align drafting to it.
|
|
312
|
+
If `paper_template_root` is configured, `/lab:write` should inspect that template directory first and align drafting to it.
|
|
313
|
+
If no template is configured, the first manuscript-writing round should ask once whether to continue with the managed default LaTeX scaffold or attach a template directory first.
|
|
314
|
+
If the user approves the default scaffold, persist that choice in `.lab/config/workflow.json` and stop asking on ordinary rounds.
|
|
315
|
+
At the final export or final-draft boundary, if the project is still on the default scaffold and no attached template exists, ask one final reminder question before finalizing.
|
|
307
316
|
|
|
308
317
|
`/lab` treats `.lab/` as the workflow-control layer only. Durable outputs should use natural project roots:
|
|
309
318
|
|
package/README.zh-CN.md
CHANGED
|
@@ -43,7 +43,7 @@ npx github:zhouhaoUCAS/superlab init
|
|
|
43
43
|
|
|
44
44
|
- `.codex/prompts/lab-*.md`
|
|
45
45
|
- `.codex/skills/lab/`
|
|
46
|
-
- `.claude/commands/lab
|
|
46
|
+
- `.claude/commands/lab*.md`
|
|
47
47
|
- `.claude/skills/lab/`
|
|
48
48
|
- `AGENTS.md`
|
|
49
49
|
- `CLAUDE.md`
|
|
@@ -177,7 +177,7 @@ superlab auto stop
|
|
|
177
177
|
|
|
178
178
|
- `run` 和 `iterate` 必须更新 `results_root` 下的持久输出
|
|
179
179
|
- `review` 必须更新规范的审查上下文
|
|
180
|
-
- `report` 必须写出 `<deliverables_root>/report.md`
|
|
180
|
+
- `report` 必须写出 `<deliverables_root>/report.md` 和 `<deliverables_root>/main-tables.md`
|
|
181
181
|
- `write` 必须写出 `<deliverables_root>/paper/` 下的 LaTeX 论文产物
|
|
182
182
|
- promotion 成功后必须写回 `.lab/context/data-decisions.md`、`.lab/context/decisions.md`、`.lab/context/state.md` 和 `.lab/context/session-brief.md`
|
|
183
183
|
- 每次运行都必须写出 `.lab/context/auto-outcome.md`,记录为什么停止、是否达到终止目标,以及哪一个工件是最终结果
|
|
@@ -244,10 +244,15 @@ superlab init --lang en
|
|
|
244
244
|
- Python 3.10+
|
|
245
245
|
- Git
|
|
246
246
|
|
|
247
|
-
`/lab:write` 自带 vendored 的 paper-writing
|
|
247
|
+
`/lab:write` 自带 vendored 的 paper-writing 章节参考,以及 `abstract`、`introduction`、`method` 对应的 upstream example bank,不再依赖额外安装一个运行时写作 skill。
|
|
248
248
|
|
|
249
249
|
## 命令集合
|
|
250
250
|
|
|
251
|
+
Codex 和 Claude 的命令入口不一样:
|
|
252
|
+
|
|
253
|
+
- Codex:`/lab:idea`、`/lab:auto`、`/lab:write`
|
|
254
|
+
- Claude Code:`/lab idea ...` 或 `/lab-idea`;`/lab auto ...` 或 `/lab-auto`;`/lab write ...` 或 `/lab-write`
|
|
255
|
+
|
|
251
256
|
- `/lab:idea` 调研 idea、文献、数据集、指标和 baseline,并输出初始方案。
|
|
252
257
|
- `/lab:data` 把已批准的 idea 收敛成数据集与 benchmark 方案,要求记录年份、使用论文、来源审计、下载计划,并明确 classic-public、recent-strong-public、claim-specific 三类 benchmark 的纳入理由,以及 canonical baselines、strong historical baselines、recent strong public methods、closest prior work 四类对比方法的纳入理由。
|
|
253
258
|
- `/lab:framing` 在正式写作前收紧方法名、模块名、论文题目和 contribution wording。
|
|
@@ -266,19 +271,20 @@ superlab init --lang en
|
|
|
266
271
|
## 使用流程
|
|
267
272
|
|
|
268
273
|
1. 在目标项目执行 `superlab init`。
|
|
269
|
-
2. 在 Codex
|
|
270
|
-
3.
|
|
271
|
-
4. 再执行 `/lab:spec`。
|
|
272
|
-
5. 用 `/lab:run` 打通最小实验链路。
|
|
273
|
-
6. 用 `/lab:iterate` 进行多轮迭代。
|
|
274
|
-
7. 在关键节点运行 `/lab:review`。
|
|
275
|
-
8. 最后用 `/lab:report` 产出总报告。
|
|
276
|
-
9. 用 `/lab:framing` 收紧题目、命名和 contribution wording。
|
|
277
|
-
10. 用 `/lab:write` 把稳定结果写成论文各 section。
|
|
274
|
+
2. 在 Codex 中调用 `/lab:idea`;在 Claude Code 中调用 `/lab idea ...` 或 `/lab-idea`。
|
|
275
|
+
3. 经确认后,在 Codex 中执行 `/lab:data`,或在 Claude Code 中执行 `/lab data ...` / `/lab-data`,锁定数据集、下载来源、benchmark 类别覆盖,以及各类对比方法的纳入理由。
|
|
276
|
+
4. 再执行 `/lab:spec`,或在 Claude Code 中执行 `/lab spec ...` / `/lab-spec`。
|
|
277
|
+
5. 用 `/lab:run`,或在 Claude Code 中用 `/lab run ...` / `/lab-run` 打通最小实验链路。
|
|
278
|
+
6. 用 `/lab:iterate`,或在 Claude Code 中用 `/lab iterate ...` / `/lab-iterate` 进行多轮迭代。
|
|
279
|
+
7. 在关键节点运行 `/lab:review`,或在 Claude Code 中运行 `/lab review ...` / `/lab-review`。
|
|
280
|
+
8. 最后用 `/lab:report`,或在 Claude Code 中用 `/lab report ...` / `/lab-report` 产出总报告。
|
|
281
|
+
9. 用 `/lab:framing`,或在 Claude Code 中用 `/lab framing ...` / `/lab-framing` 收紧题目、命名和 contribution wording。
|
|
282
|
+
10. 用 `/lab:write`,或在 Claude Code 中用 `/lab write ...` / `/lab-write` 把稳定结果写成论文各 section。
|
|
278
283
|
|
|
279
284
|
`/lab:write` 会把最终可交付物写到 `deliverables_root` 指定的目录,默认是 `docs/research`:
|
|
280
285
|
|
|
281
286
|
- `docs/research/report.md`
|
|
287
|
+
- `docs/research/main-tables.md`
|
|
282
288
|
- `docs/research/paper/main.tex`
|
|
283
289
|
- `docs/research/paper/sections/*.tex`
|
|
284
290
|
|
|
@@ -288,7 +294,10 @@ superlab init --lang en
|
|
|
288
294
|
- `.lab/writing/plan.md`
|
|
289
295
|
- `.lab/writing/iterations/*.md`
|
|
290
296
|
|
|
291
|
-
如果配置了 `paper_template_root`,`/lab:write`
|
|
297
|
+
如果配置了 `paper_template_root`,`/lab:write` 应先检查该模板目录并按其结构写作。
|
|
298
|
+
如果没有配置模板,第一次进入论文 `.tex` 写作时应先追问一次:继续使用内置默认 LaTeX scaffold,还是先接入模板目录。
|
|
299
|
+
如果用户确认先用默认 scaffold,就把这个决定持久化到 `.lab/config/workflow.json`,后续普通轮次不再重复追问。
|
|
300
|
+
但在最终导出或最终定稿节点,如果项目仍在使用默认 scaffold 且没有接入模板,应再提醒一次,给用户最后切换模板的机会。
|
|
292
301
|
|
|
293
302
|
`/lab` 把 `.lab/` 视为工作流控制层,不是正式结果目录。持久输出应按自然根目录放置:
|
|
294
303
|
|
package/bin/superlab.cjs
CHANGED
|
@@ -603,6 +603,8 @@ function attachPaperTemplate({ targetDir, templatePath }) {
|
|
|
603
603
|
}
|
|
604
604
|
|
|
605
605
|
config.paper_template_root = storedProjectPath(targetDir, normalizedTemplatePath);
|
|
606
|
+
config.paper_template_decision = "attached-template";
|
|
607
|
+
config.paper_template_final_reminder_acknowledged = true;
|
|
606
608
|
writeWorkflowConfig(targetDir, config);
|
|
607
609
|
return {
|
|
608
610
|
storedPath: config.paper_template_root,
|
|
@@ -660,6 +662,33 @@ function validateWorkflowConfig(config) {
|
|
|
660
662
|
issues.push("paper_template_root must not point inside .lab");
|
|
661
663
|
}
|
|
662
664
|
}
|
|
665
|
+
const validTemplateDecisions = new Set(["unconfirmed", "default-scaffold", "attached-template"]);
|
|
666
|
+
if (
|
|
667
|
+
config.paper_template_decision !== undefined &&
|
|
668
|
+
!validTemplateDecisions.has(config.paper_template_decision)
|
|
669
|
+
) {
|
|
670
|
+
issues.push("invalid paper_template_decision");
|
|
671
|
+
}
|
|
672
|
+
if (
|
|
673
|
+
config.paper_template_final_reminder_acknowledged !== undefined &&
|
|
674
|
+
typeof config.paper_template_final_reminder_acknowledged !== "boolean"
|
|
675
|
+
) {
|
|
676
|
+
issues.push("invalid paper_template_final_reminder_acknowledged");
|
|
677
|
+
}
|
|
678
|
+
if (
|
|
679
|
+
typeof config.paper_template_root === "string" &&
|
|
680
|
+
config.paper_template_root.trim() !== "" &&
|
|
681
|
+
config.paper_template_decision === "default-scaffold"
|
|
682
|
+
) {
|
|
683
|
+
issues.push("paper_template_decision conflicts with configured paper_template_root");
|
|
684
|
+
}
|
|
685
|
+
if (
|
|
686
|
+
typeof config.paper_template_root === "string" &&
|
|
687
|
+
config.paper_template_root.trim() === "" &&
|
|
688
|
+
config.paper_template_decision === "attached-template"
|
|
689
|
+
) {
|
|
690
|
+
issues.push("paper_template_decision requires a configured paper_template_root");
|
|
691
|
+
}
|
|
663
692
|
return issues;
|
|
664
693
|
}
|
|
665
694
|
|
|
@@ -922,6 +951,15 @@ async function main() {
|
|
|
922
951
|
console.log(`stages executed: ${result.executedStages.join(", ")}`);
|
|
923
952
|
console.log(`goal type: ${result.outcome.goalType}`);
|
|
924
953
|
console.log(`goal target: ${result.outcome.goalTarget}`);
|
|
954
|
+
if (result.outcome.primaryMetrics) {
|
|
955
|
+
console.log(`primary metrics: ${result.outcome.primaryMetrics}`);
|
|
956
|
+
}
|
|
957
|
+
if (result.outcome.secondaryMetrics) {
|
|
958
|
+
console.log(`secondary metrics: ${result.outcome.secondaryMetrics}`);
|
|
959
|
+
}
|
|
960
|
+
if (result.outcome.requiredTerminalEvidence) {
|
|
961
|
+
console.log(`required terminal evidence: ${result.outcome.requiredTerminalEvidence}`);
|
|
962
|
+
}
|
|
925
963
|
if (result.outcome.experimentLadder) {
|
|
926
964
|
console.log(`experiment ladder: ${result.outcome.experimentLadder}`);
|
|
927
965
|
}
|
package/lib/auto_contracts.cjs
CHANGED
|
@@ -285,7 +285,10 @@ function stageContractSnapshot(targetDir, stage) {
|
|
|
285
285
|
run: [resultsRoot],
|
|
286
286
|
iterate: [resultsRoot],
|
|
287
287
|
review: REVIEW_CONTEXT_FILES.map((relativePath) => path.resolve(targetDir, relativePath)),
|
|
288
|
-
report: [
|
|
288
|
+
report: [
|
|
289
|
+
path.join(deliverablesRoot, "report.md"),
|
|
290
|
+
path.join(deliverablesRoot, "main-tables.md"),
|
|
291
|
+
],
|
|
289
292
|
write: [
|
|
290
293
|
path.join(deliverablesRoot, "paper", "main.tex"),
|
|
291
294
|
path.join(deliverablesRoot, "paper", "sections"),
|
|
@@ -322,8 +325,9 @@ function verifyStageContract({ stage, snapshot }) {
|
|
|
322
325
|
}
|
|
323
326
|
|
|
324
327
|
if (stage === "report") {
|
|
325
|
-
|
|
326
|
-
|
|
328
|
+
const missing = Array.from(snapshot.keys()).filter((absolutePath) => !changedPaths.includes(absolutePath));
|
|
329
|
+
if (missing.length > 0) {
|
|
330
|
+
throw new Error("report stage did not produce the deliverable report.md and main-tables.md under deliverables_root");
|
|
327
331
|
}
|
|
328
332
|
return;
|
|
329
333
|
}
|
package/lib/auto_runner.cjs
CHANGED
|
@@ -264,6 +264,18 @@ async function startAutoMode({ targetDir, now = new Date() }) {
|
|
|
264
264
|
let promotionApplied = false;
|
|
265
265
|
let stopReason = "";
|
|
266
266
|
let finalRung = "";
|
|
267
|
+
const outcomeProtocolFields = {
|
|
268
|
+
primaryMetrics: evalProtocol.primaryMetrics,
|
|
269
|
+
secondaryMetrics: evalProtocol.secondaryMetrics,
|
|
270
|
+
requiredTerminalEvidence: evalProtocol.requiredTerminalEvidence,
|
|
271
|
+
experimentLadder: evalProtocol.experimentLadder,
|
|
272
|
+
metricGlossary: evalProtocol.metricGlossary,
|
|
273
|
+
metricSourcePapers: evalProtocol.metricSourcePapers,
|
|
274
|
+
metricImplementationSource: evalProtocol.metricImplementationSource,
|
|
275
|
+
comparisonSourcePapers: evalProtocol.comparisonSourcePapers,
|
|
276
|
+
comparisonImplementationSource: evalProtocol.comparisonImplementationSource,
|
|
277
|
+
deviationFromOriginalImplementation: evalProtocol.deviationFromOriginalImplementation,
|
|
278
|
+
};
|
|
267
279
|
|
|
268
280
|
const writeRunningStatus = (overrides = {}) => {
|
|
269
281
|
currentStatus = {
|
|
@@ -287,13 +299,7 @@ async function startAutoMode({ targetDir, now = new Date() }) {
|
|
|
287
299
|
targetDir,
|
|
288
300
|
{
|
|
289
301
|
objective: mode.objective,
|
|
290
|
-
|
|
291
|
-
metricGlossary: evalProtocol.metricGlossary,
|
|
292
|
-
metricSourcePapers: evalProtocol.metricSourcePapers,
|
|
293
|
-
metricImplementationSource: evalProtocol.metricImplementationSource,
|
|
294
|
-
comparisonSourcePapers: evalProtocol.comparisonSourcePapers,
|
|
295
|
-
comparisonImplementationSource: evalProtocol.comparisonImplementationSource,
|
|
296
|
-
deviationFromOriginalImplementation: evalProtocol.deviationFromOriginalImplementation,
|
|
302
|
+
...outcomeProtocolFields,
|
|
297
303
|
terminalGoalType: mode.terminalGoalType,
|
|
298
304
|
terminalGoalTarget: mode.terminalGoalTarget,
|
|
299
305
|
requiredTerminalArtifact: mode.requiredTerminalArtifact,
|
|
@@ -597,8 +603,7 @@ async function startAutoMode({ targetDir, now = new Date() }) {
|
|
|
597
603
|
targetDir,
|
|
598
604
|
{
|
|
599
605
|
objective: mode.objective,
|
|
600
|
-
|
|
601
|
-
metricGlossary: evalProtocol.metricGlossary,
|
|
606
|
+
...outcomeProtocolFields,
|
|
602
607
|
terminalGoalType: mode.terminalGoalType,
|
|
603
608
|
terminalGoalTarget: mode.terminalGoalTarget,
|
|
604
609
|
requiredTerminalArtifact: mode.requiredTerminalArtifact,
|
|
@@ -620,14 +625,8 @@ async function startAutoMode({ targetDir, now = new Date() }) {
|
|
|
620
625
|
mode,
|
|
621
626
|
status: currentStatus,
|
|
622
627
|
executedStages,
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
metricGlossary: evalProtocol.metricGlossary,
|
|
626
|
-
metricSourcePapers: evalProtocol.metricSourcePapers,
|
|
627
|
-
metricImplementationSource: evalProtocol.metricImplementationSource,
|
|
628
|
-
comparisonSourcePapers: evalProtocol.comparisonSourcePapers,
|
|
629
|
-
comparisonImplementationSource: evalProtocol.comparisonImplementationSource,
|
|
630
|
-
deviationFromOriginalImplementation: evalProtocol.deviationFromOriginalImplementation,
|
|
628
|
+
outcome: {
|
|
629
|
+
...outcomeProtocolFields,
|
|
631
630
|
goalType: mode.terminalGoalType,
|
|
632
631
|
goalTarget: mode.terminalGoalTarget,
|
|
633
632
|
goalReached: false,
|
|
@@ -656,13 +655,7 @@ async function startAutoMode({ targetDir, now = new Date() }) {
|
|
|
656
655
|
targetDir,
|
|
657
656
|
{
|
|
658
657
|
objective: mode.objective,
|
|
659
|
-
|
|
660
|
-
metricGlossary: evalProtocol.metricGlossary,
|
|
661
|
-
metricSourcePapers: evalProtocol.metricSourcePapers,
|
|
662
|
-
metricImplementationSource: evalProtocol.metricImplementationSource,
|
|
663
|
-
comparisonSourcePapers: evalProtocol.comparisonSourcePapers,
|
|
664
|
-
comparisonImplementationSource: evalProtocol.comparisonImplementationSource,
|
|
665
|
-
deviationFromOriginalImplementation: evalProtocol.deviationFromOriginalImplementation,
|
|
658
|
+
...outcomeProtocolFields,
|
|
666
659
|
terminalGoalType: mode.terminalGoalType,
|
|
667
660
|
terminalGoalTarget: mode.terminalGoalTarget,
|
|
668
661
|
requiredTerminalArtifact: mode.requiredTerminalArtifact,
|
|
@@ -685,13 +678,7 @@ async function startAutoMode({ targetDir, now = new Date() }) {
|
|
|
685
678
|
status: currentStatus,
|
|
686
679
|
executedStages,
|
|
687
680
|
outcome: {
|
|
688
|
-
|
|
689
|
-
metricGlossary: evalProtocol.metricGlossary,
|
|
690
|
-
metricSourcePapers: evalProtocol.metricSourcePapers,
|
|
691
|
-
metricImplementationSource: evalProtocol.metricImplementationSource,
|
|
692
|
-
comparisonSourcePapers: evalProtocol.comparisonSourcePapers,
|
|
693
|
-
comparisonImplementationSource: evalProtocol.comparisonImplementationSource,
|
|
694
|
-
deviationFromOriginalImplementation: evalProtocol.deviationFromOriginalImplementation,
|
|
681
|
+
...outcomeProtocolFields,
|
|
695
682
|
goalType: mode.terminalGoalType,
|
|
696
683
|
goalTarget: mode.terminalGoalTarget,
|
|
697
684
|
goalReached: false,
|
|
@@ -722,13 +709,7 @@ async function startAutoMode({ targetDir, now = new Date() }) {
|
|
|
722
709
|
targetDir,
|
|
723
710
|
{
|
|
724
711
|
objective: mode.objective,
|
|
725
|
-
|
|
726
|
-
metricGlossary: evalProtocol.metricGlossary,
|
|
727
|
-
metricSourcePapers: evalProtocol.metricSourcePapers,
|
|
728
|
-
metricImplementationSource: evalProtocol.metricImplementationSource,
|
|
729
|
-
comparisonSourcePapers: evalProtocol.comparisonSourcePapers,
|
|
730
|
-
comparisonImplementationSource: evalProtocol.comparisonImplementationSource,
|
|
731
|
-
deviationFromOriginalImplementation: evalProtocol.deviationFromOriginalImplementation,
|
|
712
|
+
...outcomeProtocolFields,
|
|
732
713
|
terminalGoalType: mode.terminalGoalType,
|
|
733
714
|
terminalGoalTarget: mode.terminalGoalTarget,
|
|
734
715
|
requiredTerminalArtifact: mode.requiredTerminalArtifact,
|
|
@@ -751,13 +732,7 @@ async function startAutoMode({ targetDir, now = new Date() }) {
|
|
|
751
732
|
status: currentStatus,
|
|
752
733
|
executedStages,
|
|
753
734
|
outcome: {
|
|
754
|
-
|
|
755
|
-
metricGlossary: evalProtocol.metricGlossary,
|
|
756
|
-
metricSourcePapers: evalProtocol.metricSourcePapers,
|
|
757
|
-
metricImplementationSource: evalProtocol.metricImplementationSource,
|
|
758
|
-
comparisonSourcePapers: evalProtocol.comparisonSourcePapers,
|
|
759
|
-
comparisonImplementationSource: evalProtocol.comparisonImplementationSource,
|
|
760
|
-
deviationFromOriginalImplementation: evalProtocol.deviationFromOriginalImplementation,
|
|
735
|
+
...outcomeProtocolFields,
|
|
761
736
|
goalType: mode.terminalGoalType,
|
|
762
737
|
goalTarget: mode.terminalGoalTarget,
|
|
763
738
|
goalReached: successReached,
|
|
@@ -774,6 +749,18 @@ function stopAutoMode({ targetDir, now = new Date() }) {
|
|
|
774
749
|
const mode = parseAutoMode(targetDir);
|
|
775
750
|
const evalProtocol = parseEvalProtocol(targetDir);
|
|
776
751
|
const lang = readWorkflowLanguage(targetDir);
|
|
752
|
+
const outcomeProtocolFields = {
|
|
753
|
+
primaryMetrics: evalProtocol.primaryMetrics,
|
|
754
|
+
secondaryMetrics: evalProtocol.secondaryMetrics,
|
|
755
|
+
requiredTerminalEvidence: evalProtocol.requiredTerminalEvidence,
|
|
756
|
+
experimentLadder: evalProtocol.experimentLadder,
|
|
757
|
+
metricGlossary: evalProtocol.metricGlossary,
|
|
758
|
+
metricSourcePapers: evalProtocol.metricSourcePapers,
|
|
759
|
+
metricImplementationSource: evalProtocol.metricImplementationSource,
|
|
760
|
+
comparisonSourcePapers: evalProtocol.comparisonSourcePapers,
|
|
761
|
+
comparisonImplementationSource: evalProtocol.comparisonImplementationSource,
|
|
762
|
+
deviationFromOriginalImplementation: evalProtocol.deviationFromOriginalImplementation,
|
|
763
|
+
};
|
|
777
764
|
const status = {
|
|
778
765
|
...existing,
|
|
779
766
|
status: "stopped",
|
|
@@ -785,13 +772,7 @@ function stopAutoMode({ targetDir, now = new Date() }) {
|
|
|
785
772
|
targetDir,
|
|
786
773
|
{
|
|
787
774
|
objective: mode.objective,
|
|
788
|
-
|
|
789
|
-
metricGlossary: evalProtocol.metricGlossary,
|
|
790
|
-
metricSourcePapers: evalProtocol.metricSourcePapers,
|
|
791
|
-
metricImplementationSource: evalProtocol.metricImplementationSource,
|
|
792
|
-
comparisonSourcePapers: evalProtocol.comparisonSourcePapers,
|
|
793
|
-
comparisonImplementationSource: evalProtocol.comparisonImplementationSource,
|
|
794
|
-
deviationFromOriginalImplementation: evalProtocol.deviationFromOriginalImplementation,
|
|
775
|
+
...outcomeProtocolFields,
|
|
795
776
|
terminalGoalType: mode.terminalGoalType,
|
|
796
777
|
terminalGoalTarget: mode.terminalGoalTarget,
|
|
797
778
|
requiredTerminalArtifact: mode.requiredTerminalArtifact,
|
package/lib/auto_state.cjs
CHANGED
|
@@ -140,30 +140,33 @@ function renderAutoOutcome(outcome, { lang = "en" } = {}) {
|
|
|
140
140
|
|
|
141
141
|
## 目标
|
|
142
142
|
|
|
143
|
-
-
|
|
144
|
-
-
|
|
145
|
-
-
|
|
146
|
-
-
|
|
147
|
-
-
|
|
148
|
-
-
|
|
149
|
-
-
|
|
150
|
-
-
|
|
151
|
-
-
|
|
152
|
-
-
|
|
153
|
-
-
|
|
143
|
+
- 目标: ${outcome.objective || ""}
|
|
144
|
+
- 主指标: ${outcome.primaryMetrics || ""}
|
|
145
|
+
- 次级指标: ${outcome.secondaryMetrics || ""}
|
|
146
|
+
- 必要终局证据: ${outcome.requiredTerminalEvidence || ""}
|
|
147
|
+
- 实验阶梯: ${outcome.experimentLadder || ""}
|
|
148
|
+
- 指标释义: ${outcome.metricGlossary || ""}
|
|
149
|
+
- 指标来源论文: ${outcome.metricSourcePapers || ""}
|
|
150
|
+
- 指标实现来源: ${outcome.metricImplementationSource || ""}
|
|
151
|
+
- 对比方法来源论文: ${outcome.comparisonSourcePapers || ""}
|
|
152
|
+
- 对比方法实现来源: ${outcome.comparisonImplementationSource || ""}
|
|
153
|
+
- 与原始实现的偏差: ${outcome.deviationFromOriginalImplementation || ""}
|
|
154
|
+
- 终止目标类型: ${outcome.terminalGoalType || ""}
|
|
155
|
+
- 终止目标目标值: ${outcome.terminalGoalTarget || ""}
|
|
156
|
+
- 必要终止工件: ${outcome.requiredTerminalArtifact || ""}
|
|
154
157
|
|
|
155
158
|
## 结果
|
|
156
159
|
|
|
157
|
-
-
|
|
158
|
-
-
|
|
159
|
-
-
|
|
160
|
-
-
|
|
161
|
-
-
|
|
162
|
-
-
|
|
163
|
-
-
|
|
164
|
-
-
|
|
165
|
-
-
|
|
166
|
-
-
|
|
160
|
+
- 状态: ${outcome.status || ""}
|
|
161
|
+
- 目标是否达成: ${outcome.goalReached ? "是" : "否"}
|
|
162
|
+
- 停止原因: ${outcome.stopReason || ""}
|
|
163
|
+
- 是否已升格: ${outcome.promotionApplied ? "是" : "否"}
|
|
164
|
+
- 最终工件: ${outcome.finalArtifact || ""}
|
|
165
|
+
- 最终 rung: ${outcome.finalRung || ""}
|
|
166
|
+
- 已执行阶段: ${outcome.executedStages || ""}
|
|
167
|
+
- 已完成迭代数: ${outcome.iterationsCompleted || "0"}
|
|
168
|
+
- 开始时间: ${outcome.startedAt || ""}
|
|
169
|
+
- 结束时间: ${outcome.finishedAt || ""}
|
|
167
170
|
`;
|
|
168
171
|
}
|
|
169
172
|
|
|
@@ -172,6 +175,9 @@ function renderAutoOutcome(outcome, { lang = "en" } = {}) {
|
|
|
172
175
|
## Goal
|
|
173
176
|
|
|
174
177
|
- Objective: ${outcome.objective || ""}
|
|
178
|
+
- Primary metrics: ${outcome.primaryMetrics || ""}
|
|
179
|
+
- Secondary metrics: ${outcome.secondaryMetrics || ""}
|
|
180
|
+
- Required terminal evidence: ${outcome.requiredTerminalEvidence || ""}
|
|
175
181
|
- Experiment ladder: ${outcome.experimentLadder || ""}
|
|
176
182
|
- Metric glossary: ${outcome.metricGlossary || ""}
|
|
177
183
|
- Metric source papers: ${outcome.metricSourcePapers || ""}
|
package/lib/context.cjs
CHANGED
|
@@ -96,6 +96,9 @@ function renderSummary(lang, data) {
|
|
|
96
96
|
- Auto final artifact: ${data.autoFinalArtifact || "待补充"}
|
|
97
97
|
- Auto final rung: ${data.autoFinalRung || "待补充"}
|
|
98
98
|
- Eval objective: ${data.evalObjective || "待补充"}
|
|
99
|
+
- Primary metrics: ${data.evalPrimaryMetrics || "待补充"}
|
|
100
|
+
- Secondary metrics: ${data.evalSecondaryMetrics || "待补充"}
|
|
101
|
+
- Required terminal evidence: ${data.evalRequiredTerminalEvidence || "待补充"}
|
|
99
102
|
- Table plan: ${data.evalTablePlan || "待补充"}
|
|
100
103
|
- Metric glossary: ${data.evalMetricGlossary || "待补充"}
|
|
101
104
|
- Metric source papers: ${data.evalMetricSourcePapers || "待补充"}
|
|
@@ -148,6 +151,9 @@ function renderSummary(lang, data) {
|
|
|
148
151
|
- Auto final artifact: ${data.autoFinalArtifact || "TBD"}
|
|
149
152
|
- Auto final rung: ${data.autoFinalRung || "TBD"}
|
|
150
153
|
- Eval objective: ${data.evalObjective || "TBD"}
|
|
154
|
+
- Primary metrics: ${data.evalPrimaryMetrics || "TBD"}
|
|
155
|
+
- Secondary metrics: ${data.evalSecondaryMetrics || "TBD"}
|
|
156
|
+
- Required terminal evidence: ${data.evalRequiredTerminalEvidence || "TBD"}
|
|
151
157
|
- Table plan: ${data.evalTablePlan || "TBD"}
|
|
152
158
|
- Metric glossary: ${data.evalMetricGlossary || "TBD"}
|
|
153
159
|
- Metric source papers: ${data.evalMetricSourcePapers || "TBD"}
|
|
@@ -255,6 +261,9 @@ ${data.problem || "待补充"}
|
|
|
255
261
|
- Auto final artifact: ${data.autoFinalArtifact || "待补充"}
|
|
256
262
|
- Auto final rung: ${data.autoFinalRung || "待补充"}
|
|
257
263
|
- Eval objective: ${data.evalObjective || "待补充"}
|
|
264
|
+
- Primary metrics: ${data.evalPrimaryMetrics || "待补充"}
|
|
265
|
+
- Secondary metrics: ${data.evalSecondaryMetrics || "待补充"}
|
|
266
|
+
- Required terminal evidence: ${data.evalRequiredTerminalEvidence || "待补充"}
|
|
258
267
|
- Table plan: ${data.evalTablePlan || "待补充"}
|
|
259
268
|
- Metric glossary: ${data.evalMetricGlossary || "待补充"}
|
|
260
269
|
- Metric source papers: ${data.evalMetricSourcePapers || "待补充"}
|
|
@@ -318,6 +327,9 @@ ${data.problem || "TBD"}
|
|
|
318
327
|
- Auto final artifact: ${data.autoFinalArtifact || "TBD"}
|
|
319
328
|
- Auto final rung: ${data.autoFinalRung || "TBD"}
|
|
320
329
|
- Eval objective: ${data.evalObjective || "TBD"}
|
|
330
|
+
- Primary metrics: ${data.evalPrimaryMetrics || "TBD"}
|
|
331
|
+
- Secondary metrics: ${data.evalSecondaryMetrics || "TBD"}
|
|
332
|
+
- Required terminal evidence: ${data.evalRequiredTerminalEvidence || "TBD"}
|
|
321
333
|
- Table plan: ${data.evalTablePlan || "TBD"}
|
|
322
334
|
- Metric glossary: ${data.evalMetricGlossary || "TBD"}
|
|
323
335
|
- Metric source papers: ${data.evalMetricSourcePapers || "TBD"}
|
|
@@ -569,6 +581,9 @@ function buildContextSnapshot(targetDir) {
|
|
|
569
581
|
autoFinalArtifact: extractValue(autoOutcome, ["Final artifact", "最终工件"]),
|
|
570
582
|
autoFinalRung: extractValue(autoOutcome, ["Final rung", "最终 rung"]),
|
|
571
583
|
evalObjective: evalProtocol.primaryEvaluationObjective,
|
|
584
|
+
evalPrimaryMetrics: evalProtocol.primaryMetrics,
|
|
585
|
+
evalSecondaryMetrics: evalProtocol.secondaryMetrics,
|
|
586
|
+
evalRequiredTerminalEvidence: evalProtocol.requiredTerminalEvidence,
|
|
572
587
|
evalTablePlan: evalProtocol.tablePlan,
|
|
573
588
|
evalMetricGlossary: evalProtocol.metricGlossary,
|
|
574
589
|
evalMetricSourcePapers: evalProtocol.metricSourcePapers,
|