superlab 0.1.26 → 0.1.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/README.md +3 -0
  2. package/README.zh-CN.md +3 -0
  3. package/bin/superlab.cjs +11 -0
  4. package/lib/auto_contracts.cjs +1 -1
  5. package/lib/auto_runner.cjs +1 -1
  6. package/lib/context.cjs +30 -198
  7. package/lib/i18n.cjs +229 -19
  8. package/lib/lab_write_contract.json +8 -0
  9. package/package-assets/claude/commands/lab-idea.md +1 -1
  10. package/package-assets/claude/commands/lab-write.md +1 -1
  11. package/package-assets/claude/commands/lab.md +4 -3
  12. package/package-assets/codex/prompts/lab-idea.md +1 -1
  13. package/package-assets/codex/prompts/lab-write.md +1 -1
  14. package/package-assets/codex/prompts/lab.md +4 -3
  15. package/package-assets/shared/lab/.managed/scripts/validate_idea_artifact.py +147 -0
  16. package/package-assets/shared/lab/.managed/scripts/validate_manuscript_delivery.py +50 -4
  17. package/package-assets/shared/lab/.managed/scripts/validate_paper_claims.py +86 -0
  18. package/package-assets/shared/lab/.managed/scripts/validate_paper_plan.py +263 -0
  19. package/package-assets/shared/lab/.managed/scripts/validate_section_draft.py +181 -0
  20. package/package-assets/shared/lab/.managed/templates/idea.md +43 -0
  21. package/package-assets/shared/lab/.managed/templates/paper-plan.md +78 -0
  22. package/package-assets/shared/lab/config/workflow.json +2 -1
  23. package/package-assets/shared/lab/context/auto-mode.md +1 -1
  24. package/package-assets/shared/lab/context/next-action.md +4 -4
  25. package/package-assets/shared/lab/context/session-brief.md +8 -1
  26. package/package-assets/shared/lab/context/summary.md +14 -3
  27. package/package-assets/shared/skills/lab/SKILL.md +17 -16
  28. package/package-assets/shared/skills/lab/references/paper-writing/examples/abstract/template-b.md +2 -2
  29. package/package-assets/shared/skills/lab/references/paper-writing/examples/conclusion/conservative-claim-boundary.md +13 -13
  30. package/package-assets/shared/skills/lab/references/paper-writing/examples/experiments/main-results-and-ablation-latex.md +18 -17
  31. package/package-assets/shared/skills/lab/references/paper-writing/examples/experiments-examples.md +1 -1
  32. package/package-assets/shared/skills/lab/references/paper-writing/examples/index.md +1 -1
  33. package/package-assets/shared/skills/lab/references/paper-writing/examples/introduction/pipeline-version-1-one-contribution-multi-advantages.md +3 -3
  34. package/package-assets/shared/skills/lab/references/paper-writing/examples/introduction/pipeline-version-2-two-contributions.md +1 -1
  35. package/package-assets/shared/skills/lab/references/paper-writing/examples/method/annotated-figure-to-text.md +66 -0
  36. package/package-assets/shared/skills/lab/references/paper-writing/examples/method/example-of-the-three-elements.md +11 -11
  37. package/package-assets/shared/skills/lab/references/paper-writing/examples/method/{module-design-instant-ngp.md → module-design-multiresolution-encoding.md} +1 -1
  38. package/package-assets/shared/skills/lab/references/paper-writing/examples/method/{module-triad-neural-body.md → module-triad-anchored-representation.md} +4 -4
  39. package/package-assets/shared/skills/lab/references/paper-writing/examples/method/overview-template.md +4 -4
  40. package/package-assets/shared/skills/lab/references/paper-writing/examples/method/pre-writing-questions.md +4 -3
  41. package/package-assets/shared/skills/lab/references/paper-writing/examples/method-examples.md +4 -4
  42. package/package-assets/shared/skills/lab/references/paper-writing/examples/related-work/closest-prior-gap-template.md +12 -12
  43. package/package-assets/shared/skills/lab/references/paper-writing/examples/related-work/topic-comparison-template.md +2 -2
  44. package/package-assets/shared/skills/lab/stages/auto.md +6 -2
  45. package/package-assets/shared/skills/lab/stages/data.md +0 -1
  46. package/package-assets/shared/skills/lab/stages/framing.md +0 -1
  47. package/package-assets/shared/skills/lab/stages/idea.md +30 -13
  48. package/package-assets/shared/skills/lab/stages/write.md +28 -4
  49. package/package.json +1 -1
  50. package/package-assets/shared/skills/lab/references/paper-writing/examples/method/neural-body-annotated-figure-text.md +0 -66
@@ -0,0 +1,181 @@
1
+ #!/usr/bin/env python3
2
+ import argparse
3
+ import re
4
+ import sys
5
+ from pathlib import Path
6
+
7
+
8
+ def parse_args():
9
+ parser = argparse.ArgumentParser(
10
+ description="Validate section-specific paper-writing quality expectations."
11
+ )
12
+ parser.add_argument(
13
+ "--section",
14
+ required=True,
15
+ choices=["abstract", "introduction", "related-work", "method", "experiments", "conclusion"],
16
+ help="Section type being validated",
17
+ )
18
+ parser.add_argument("--section-file", required=True, help="Path to the section .tex file")
19
+ parser.add_argument(
20
+ "--mode",
21
+ required=True,
22
+ choices=["draft", "final"],
23
+ help="Draft rounds emit warnings; final rounds fail on missing section structure",
24
+ )
25
+ return parser.parse_args()
26
+
27
+
28
+ def read_text(path: Path) -> str:
29
+ return path.read_text(encoding="utf-8")
30
+
31
+
32
+ def contains_any(text: str, needles: tuple[str, ...]) -> bool:
33
+ lowered = text.lower()
34
+ return any(needle.lower() in lowered for needle in needles)
35
+
36
+
37
+ def check_abstract(text: str, issues: list[str]):
38
+ numbers = re.findall(r"\b\d+(?:\.\d+)?\b", text)
39
+ if len(numbers) > 6:
40
+ issues.append("abstract has overly dense benchmark-number dumping")
41
+ if not contains_any(text, ("challenge", "gap", "however", "difficulty", "challenge", "难点", "挑战", "不足")):
42
+ issues.append("abstract should state the core challenge or gap")
43
+ if not contains_any(text, ("boundary", "bounded", "limitation", "however", "but", "局限", "边界", "限制")):
44
+ issues.append("abstract should include a bounded result or explicit limitation")
45
+
46
+
47
+ def check_introduction(text: str, issues: list[str]):
48
+ has_problem_figure = contains_any(
49
+ text,
50
+ (
51
+ r"\input{figures/problem-setting}",
52
+ r"\begin{figure}",
53
+ r"\ref{fig:problem-setting}",
54
+ ),
55
+ )
56
+ if not has_problem_figure:
57
+ issues.append("introduction should consume a problem-setting figure")
58
+ if not contains_any(
59
+ text,
60
+ (
61
+ "contribution",
62
+ "contributions",
63
+ "贡献",
64
+ "我们的贡献",
65
+ ),
66
+ ):
67
+ issues.append("introduction should make contributions explicit")
68
+ if not contains_any(
69
+ text,
70
+ (
71
+ "however",
72
+ "prior work",
73
+ "existing methods",
74
+ "falls short",
75
+ "gap",
76
+ "limitation",
77
+ "然而",
78
+ "现有方法",
79
+ "不足",
80
+ ),
81
+ ):
82
+ issues.append("introduction should explain what is missing in prior work")
83
+
84
+
85
+ def check_related_work(text: str, issues: list[str]):
86
+ if r"\cite{" not in text:
87
+ issues.append("related work should include citation-driven comparisons")
88
+
89
+
90
+ def check_method(text: str, issues: list[str]):
91
+ has_method_figure = contains_any(
92
+ text,
93
+ (
94
+ r"\input{figures/method-overview}",
95
+ r"\begin{figure}",
96
+ r"\ref{fig:method-overview}",
97
+ ),
98
+ )
99
+ if not has_method_figure:
100
+ issues.append("method should consume a method-overview figure")
101
+ if not contains_any(text, ("motivation", "motivate", "动机")):
102
+ issues.append("method should explain module motivation")
103
+ if not contains_any(text, ("design", "we first", "we then", "设计")):
104
+ issues.append("method should explain the concrete design")
105
+ if not contains_any(text, ("advantage", "benefit", "improves", "优势", "收益")):
106
+ issues.append("method should explain the technical advantage")
107
+
108
+
109
+ def check_experiments(text: str, issues: list[str]):
110
+ if not contains_any(
111
+ text,
112
+ (
113
+ r"\input{analysis/analysis-asset}",
114
+ r"\input{tables/analysis}",
115
+ r"\ref{fig:analysis",
116
+ r"\ref{tab:analysis",
117
+ ),
118
+ ):
119
+ issues.append("experiments should consume an analysis asset")
120
+ if not contains_any(
121
+ text,
122
+ (
123
+ "represents",
124
+ "corresponds to",
125
+ "setting",
126
+ "scenario",
127
+ "scene",
128
+ "task setting",
129
+ "benchmark family",
130
+ "场景",
131
+ "设定",
132
+ "任务",
133
+ ),
134
+ ):
135
+ issues.append("experiments should include benchmark scene notes")
136
+
137
+
138
+ def check_conclusion(text: str, issues: list[str]):
139
+ if not contains_any(text, ("limitation", "limitations", "bounded", "局限", "限制", "边界")):
140
+ issues.append("conclusion should state at least one limitation or boundary")
141
+ if not contains_any(text, ("future work", "next step", "future direction", "下一步", "未来工作")):
142
+ issues.append("conclusion should state one next step or future direction")
143
+
144
+
145
+ SECTION_CHECKS = {
146
+ "abstract": check_abstract,
147
+ "introduction": check_introduction,
148
+ "related-work": check_related_work,
149
+ "method": check_method,
150
+ "experiments": check_experiments,
151
+ "conclusion": check_conclusion,
152
+ }
153
+
154
+
155
+ def main():
156
+ args = parse_args()
157
+ section_path = Path(args.section_file)
158
+ if not section_path.exists():
159
+ print(f"section file does not exist: {section_path}", file=sys.stderr)
160
+ return 1
161
+
162
+ text = read_text(section_path)
163
+ issues: list[str] = []
164
+ SECTION_CHECKS[args.section](text, issues)
165
+
166
+ if not issues:
167
+ print("section draft is valid")
168
+ return 0
169
+
170
+ if args.mode == "draft":
171
+ for issue in issues:
172
+ print(f"WARNING: {issue}")
173
+ return 0
174
+
175
+ for issue in issues:
176
+ print(issue, file=sys.stderr)
177
+ return 1
178
+
179
+
180
+ if __name__ == "__main__":
181
+ raise SystemExit(main())
@@ -1,9 +1,19 @@
1
1
  # Idea Artifact
2
2
 
3
+ ## Scenario
4
+
5
+ - Real-world setting:
6
+ - Who would care if this problem were solved:
7
+
3
8
  ## One-Sentence Problem
4
9
 
5
10
  State the problem in one short sentence.
6
11
 
12
+ ## Why It Matters
13
+
14
+ - Why this matters in practice:
15
+ - What breaks if we ignore it:
16
+
7
17
  ## Failure Case
8
18
 
9
19
  - Where current methods fail:
@@ -48,6 +58,32 @@ Suggested levels:
48
58
  - Shared assumption:
49
59
  - Why that assumption breaks here:
50
60
 
61
+ ## Literature Scoping Bundle
62
+
63
+ - Default target source count:
64
+ - Actual source count:
65
+ - Closest prior bucket:
66
+ - Recent strong papers:
67
+ - Benchmark or evaluation papers:
68
+ - Survey or taxonomy papers:
69
+ - Adjacent-field papers:
70
+ - If the total is below the default target, why:
71
+
72
+ ## Closest Prior Work Comparison
73
+
74
+ - Prior work 1:
75
+ - Citation:
76
+ - What it does:
77
+ - Why it is relevant here:
78
+ - Limitation for the current problem:
79
+ - Difference from our direction:
80
+ - Prior work 2:
81
+ - Citation:
82
+ - What it does:
83
+ - Why it is relevant here:
84
+ - Limitation for the current problem:
85
+ - Difference from our direction:
86
+
51
87
  ## Why Ours Is Different
52
88
 
53
89
  - Existing methods rely on:
@@ -55,6 +91,11 @@ Suggested levels:
55
91
  - Expected advantage:
56
92
  - Evidence needed to prove the advantage:
57
93
 
94
+ ## Rough Approach
95
+
96
+ - Plain-language description of how this would work:
97
+ - Why this design might resolve the failure case:
98
+
58
99
  ## Three Meaningful Points
59
100
 
60
101
  1. Significance:
@@ -67,6 +108,7 @@ Suggested levels:
67
108
  - Benchmark conventions:
68
109
  - Typical datasets:
69
110
  - Typical metrics:
111
+ - Credible baseline shortlist:
70
112
 
71
113
  ## Candidate Approaches
72
114
 
@@ -90,6 +132,7 @@ Suggested levels:
90
132
  - Primary metric:
91
133
  - Secondary metrics:
92
134
  - Minimum viable experiment:
135
+ - Fastest way to falsify the idea:
93
136
 
94
137
  ## Critique Summary
95
138
 
@@ -24,6 +24,84 @@
24
24
  - Limitation sources:
25
25
  - Claims that still need more evidence:
26
26
 
27
+ ## Asset Coverage Targets
28
+
29
+ - Core asset floor:
30
+ - Required coverage categories:
31
+ - Current planned core assets:
32
+ - Coverage risks or gaps:
33
+
34
+ ## Table Plan
35
+
36
+ - Main results table:
37
+ - Asset file:
38
+ - Section:
39
+ - Message:
40
+ - Evidence:
41
+ - Status:
42
+ - Ablation table:
43
+ - Asset file:
44
+ - Section:
45
+ - Message:
46
+ - Evidence:
47
+ - Status:
48
+
49
+ ## Figure Plan
50
+
51
+ - Problem setting or teaser figure:
52
+ - Asset file:
53
+ - Section:
54
+ - Figure intent:
55
+ - Evidence:
56
+ - Status:
57
+ - Method overview figure:
58
+ - Asset file:
59
+ - Section:
60
+ - Figure intent:
61
+ - Evidence:
62
+ - Status:
63
+ - Results overview figure:
64
+ - Asset file:
65
+ - Section:
66
+ - Figure intent:
67
+ - Evidence:
68
+ - Status:
69
+
70
+ ## Analysis Asset Plan
71
+
72
+ - Analysis asset:
73
+ - Asset file:
74
+ - Asset type:
75
+ - Section:
76
+ - Asset intent:
77
+ - Evidence:
78
+ - Status:
79
+
80
+ ## Citation Plan
81
+
82
+ - Background anchor:
83
+ - Citation or source anchor:
84
+ - Section:
85
+ - Why it matters:
86
+ - Status:
87
+ - Closest prior work:
88
+ - Citation or source anchor:
89
+ - Section:
90
+ - Why it matters:
91
+ - Status:
92
+ - Metric or benchmark source:
93
+ - Citation or source anchor:
94
+ - Section:
95
+ - Why it matters:
96
+ - Status:
97
+
98
+ ## Section-to-Asset Map
99
+
100
+ - Introduction:
101
+ - Method:
102
+ - Experiments:
103
+ - Related Work:
104
+
27
105
  ## Writing Order
28
106
 
29
107
  1. First section target:
@@ -7,5 +7,6 @@
7
7
  "deliverables_root": "docs/research",
8
8
  "paper_template_root": "",
9
9
  "paper_template_decision": "unconfirmed",
10
- "paper_template_final_reminder_acknowledged": false
10
+ "paper_template_final_reminder_acknowledged": false,
11
+ "paper_language_finalization_decision": "unconfirmed"
11
12
  }
@@ -68,4 +68,4 @@ If `eval-protocol.md` declares structured rung entries, auto mode follows those
68
68
 
69
69
  - Stop conditions:
70
70
  - Escalation conditions:
71
- - Canonical promotion writeback: update `.lab/context/data-decisions.md`, `.lab/context/decisions.md`, `.lab/context/state.md`, and `.lab/context/session-brief.md`.
71
+ - Canonical promotion writeback: update `.lab/context/data-decisions.md`, `.lab/context/decisions.md`, `.lab/context/state.md`, and `.lab/context/workflow-state.md`.
@@ -5,15 +5,15 @@
5
5
  - Action:
6
6
  - Success signal:
7
7
 
8
- ## If Success
8
+ ## After Completion
9
9
 
10
10
  - Next action:
11
11
 
12
- ## If Failure
12
+ ## If Blocked
13
13
 
14
14
  - Fallback action:
15
15
 
16
- ## Human Decision Needed
16
+ ## Escalation
17
17
 
18
18
  - Question:
19
- - Why it blocks progress:
19
+ - Escalate when:
@@ -3,6 +3,8 @@
3
3
  ## Active Stage
4
4
 
5
5
  - Stage:
6
+ - Current objective:
7
+ - Immediate next action:
6
8
 
7
9
  ## Mission
8
10
 
@@ -11,10 +13,15 @@ One sentence describing the active research mission.
11
13
  ## Best Current Path
12
14
 
13
15
  - Approved direction:
14
- - Why this is the active path:
16
+ - Strongest supported claim:
15
17
  - Auto mode:
16
18
  - Auto objective:
17
19
  - Auto decision:
20
+ - Collaborator report mode:
21
+ - Canonical context readiness:
22
+ - Method name:
23
+ - Primary metrics:
24
+ - Secondary metrics:
18
25
 
19
26
  ## Main Risk
20
27
 
@@ -1,12 +1,23 @@
1
1
  # Research Summary
2
2
 
3
3
  ## Current Direction
4
-
5
- Summarize the current approved research direction in 5-10 lines.
6
-
4
+ - Mission:
5
+ - Approved direction:
6
+ - Active stage:
7
+ - Current objective:
7
8
  - Auto mode:
8
9
  - Auto objective:
9
10
  - Auto decision:
11
+ - Collaborator report mode:
12
+ - Canonical context readiness:
13
+ - Method name:
14
+ - Contribution bullets:
15
+ - Eval objective:
16
+ - Primary metrics:
17
+ - Secondary metrics:
18
+ - Dataset package:
19
+ - Benchmark role:
20
+ - Comparison suite:
10
21
 
11
22
  ## Strongest Evidence
12
23
 
@@ -34,17 +34,22 @@ Use this skill when the user invokes `/lab:*` or asks for the structured researc
34
34
  ### `/lab:idea`
35
35
 
36
36
  - Search relevant literature, baselines, datasets, and evaluation metrics before proposing a plan.
37
+ - Build a literature-scoping bundle before claiming novelty. The default target is 20 relevant sources unless the field is genuinely too narrow and that exception is written down.
37
38
  - Read `.lab/context/mission.md` and `.lab/context/open-questions.md` before drafting.
39
+ - Read `.lab/config/workflow.json` before drafting and follow its `workflow_language` for idea artifacts.
38
40
  - Ask one clarifying question at a time when critical ambiguity remains.
39
- - State the problem, the failure case, and why the problem matters before proposing solutions.
41
+ - State the scenario, the problem, the failure case, and why the problem matters before proposing solutions.
40
42
  - Classify the idea by contribution category and breakthrough level.
41
43
  - Compare against existing methods explicitly and state why the idea should be better.
44
+ - Include a closest-prior-work comparison and a plain-language description of how the proposed direction would work.
42
45
  - Distinguish sourced evidence from generated innovation claims.
43
46
  - End with three meaningful points that are clear, short, and easy to scan.
44
47
  - Produce 2-3 candidate approaches with trade-offs before recommending one.
45
48
  - Critique the idea before converging on it.
49
+ - Include a minimum viable experiment before approval.
46
50
  - Keep an explicit approval gate before `/lab:spec`.
47
51
  - Write idea artifacts with the template in `.lab/.managed/templates/idea.md`.
52
+ - Run `.lab/.managed/scripts/validate_idea_artifact.py --idea <idea-artifact> --workflow-config .lab/config/workflow.json` before treating the idea as converged.
48
53
  - Update `.lab/context/mission.md`, `.lab/context/decisions.md`, and `.lab/context/open-questions.md` after convergence.
49
54
  - Do not leave `.lab/context/mission.md` as a template shell once the problem statement and approved direction are known.
50
55
  - Do not implement code in this stage.
@@ -91,9 +96,9 @@ Use this skill when the user invokes `/lab:*` or asks for the structured researc
91
96
  - Reuse `/lab:run`, `/lab:iterate`, `/lab:review`, `/lab:report`, and optional `/lab:write` instead of inventing a second workflow.
92
97
  - Do not automatically change the research mission, paper-facing framing, or core claims.
93
98
  - You may add exploratory datasets, benchmarks, and comparison methods inside the approved exploration envelope.
94
- - You may promote an exploratory addition to the primary package only after the promotion policy in `auto-mode.md` is satisfied and the promotion is written back into `.lab/context/data-decisions.md`, `.lab/context/decisions.md`, `.lab/context/state.md`, and `.lab/context/session-brief.md`.
99
+ - You may promote an exploratory addition to the primary package only after the promotion policy in `auto-mode.md` is satisfied and the promotion is written back into `.lab/context/data-decisions.md`, `.lab/context/decisions.md`, `.lab/context/state.md`, and `.lab/context/workflow-state.md`.
95
100
  - Poll long-running commands until they complete, time out, or hit a stop condition.
96
- - Update `.lab/context/auto-status.md`, `.lab/context/state.md`, `.lab/context/workflow-state.md`, `.lab/context/decisions.md`, `.lab/context/data-decisions.md`, `.lab/context/evidence-index.md`, and `.lab/context/session-brief.md` as the campaign advances.
101
+ - Update `.lab/context/auto-status.md`, `.lab/context/state.md`, `.lab/context/workflow-state.md`, `.lab/context/decisions.md`, `.lab/context/data-decisions.md`, and `.lab/context/evidence-index.md` as the campaign advances, then refresh the derived handoff files.
97
102
  - Keep an explicit approval gate when a proposed action would leave the frozen core defined by the auto-mode contract.
98
103
 
99
104
  ### `/lab:spec`
@@ -165,26 +170,21 @@ Use this skill when the user invokes `/lab:*` or asks for the structured researc
165
170
 
166
171
  - Start only after `report` artifacts are stable enough to support paper claims.
167
172
  - Start only after an approved framing artifact exists at `.lab/writing/framing.md`.
168
- - Read `.lab/config/workflow.json` before drafting and enforce its `paper_language` and `paper_format`.
169
- - If `paper_template_root` is empty and `paper_template_decision` is `unconfirmed`, ask once whether to continue with the managed default scaffold or attach a template directory first; persist the answer before drafting `.tex`.
170
- - If the project is still on the default scaffold at a final export or final-draft boundary and `paper_template_final_reminder_acknowledged` is `false`, ask one final reminder question before finalizing.
173
+ - Read `.lab/config/workflow.json` before drafting and enforce its `workflow_language`, `paper_language`, and `paper_format`.
171
174
  - Read `.lab/context/mission.md`, `.lab/context/decisions.md`, `.lab/context/evidence-index.md`, and `.lab/context/data-decisions.md` before drafting.
172
175
  - Write one paper section or one explicit subproblem per round.
176
+ - Ordinary manuscript drafting rounds should follow `workflow_language`.
177
+ - If `workflow_language` and `paper_language` differ, the first final-draft or export round must ask once whether to keep the draft language or convert the final manuscript to `paper_language`, then persist that choice.
173
178
  - Bind each claim to evidence from `report`, iteration reports, or normalized summaries.
174
- - Write planning artifacts with `.lab/.managed/templates/paper-plan.md`, `.lab/.managed/templates/paper-section.md`, and `.lab/.managed/templates/write-iteration.md`.
175
- - Write final manuscript artifacts with `.lab/.managed/templates/paper.tex`, `.lab/.managed/templates/paper-section.tex`, `.lab/.managed/templates/paper-table.tex`, `.lab/.managed/templates/paper-figure.tex`, and `.lab/.managed/templates/paper-references.bib`.
176
- - Use the vendored paper-writing references under `skills/lab/references/paper-writing/`.
177
- - For any section with a bundled example bank, also use the vendored example-bank files under `skills/lab/references/paper-writing/examples/`.
178
- - Load only the current section guide, the matching examples index when one exists, 1-2 matching concrete example files, plus `paper-review.md` and `does-my-writing-flow-source.md`.
179
- - Build a compact mini-outline before prose.
180
- - Build the paper asset plan before prose when the section carries method or experiments claims.
179
+ - Use the write-stage contract in `.codex/skills/lab/stages/write.md` or `.claude/skills/lab/stages/write.md` as the single source of truth for template choice, paper-plan requirements, section-specific references, validator calls, asset coverage, and final manuscript gates.
180
+ - Use the vendored paper-writing references under `skills/lab/references/paper-writing/` and the matching example-bank files under `skills/lab/references/paper-writing/examples/`.
181
+ - Treat `.lab/writing/plan.md` as the write-time source of truth for tables, figures, citations, and asset coverage.
182
+ - Treat section-quality, claim-safety, and manuscript-delivery checks as the canonical acceptance gates for final-draft or export rounds.
181
183
  - For each subsection, explicitly cover motivation, design, and technical advantage when applicable.
182
184
  - Keep terminology stable across rounds and sections.
183
185
  - If a claim is not supported by evidence, weaken or remove it.
184
186
  - Treat tables, figures, citations, and bibliography as core manuscript content rather than optional polish.
185
187
  - Keep paper-facing LaTeX free of absolute local paths, rerun ids, shell transcripts, and internal workflow provenance.
186
- - Materialize real LaTeX tables and figure placeholders instead of leaving all evidence inside prose paragraphs.
187
- - Run `.lab/.managed/scripts/validate_manuscript_delivery.py --paper-dir <deliverables_root>/paper` before accepting a final-draft or export round.
188
188
  - Before finalizing a round, append and answer the five-dimension self-review checklist and revise unresolved items.
189
189
  - Apply paper-writing discipline without changing experimental truth.
190
190
  - If the evidence is insufficient, stop and route back to `review` or `iterate`.
@@ -199,7 +199,8 @@ Use this skill when the user invokes `/lab:*` or asks for the structured researc
199
199
  - No unconstrained auto mode. Every `/lab:auto` campaign must declare allowed stages, stop conditions, and a promotion policy in `.lab/context/auto-mode.md`.
200
200
  - No auto start without an explicit autonomy level and `Approval status: approved`.
201
201
  - No final report without validated normalized results.
202
- - No paper-writing round without stable report artifacts, an approved framing artifact, evidence links, LaTeX manuscript output, and a passing manuscript-delivery validation for final-draft or export rounds.
202
+ - No paper-writing round without stable report artifacts, an approved framing artifact, evidence links, and LaTeX manuscript output.
203
+ - No final-draft or export round without passing section-quality, claim-safety, and manuscript-delivery validation.
203
204
 
204
205
  ## References
205
206
 
@@ -16,7 +16,7 @@
16
16
 
17
17
  % Introduce the technical contribution that implements the insight in one to two sentences (usually mention the technical term/name only, without describing every detailed step. The term should be easy to understand and should not create a jump in reading. This ability is very important for writing a good abstract.)
18
18
  %% Example 1: To do this, we first present a label-efficient depth estimation framework using the internal representations of diffusion models. At the sampling phase, we utilize two guidance techniques to self-condition the generated image using the estimated depth map, the first of which uses pseudo-labeling, and the subsequent one uses a depth-domain diffusion prior.
19
- %% Example 2: To this end, we propose Neural Body, a new human body representation which assumes that the learned neural representations at different frames share the same set of latent codes anchored to a deformable mesh
19
+ %% Example 2: To this end, we propose AnchorField, a structured representation in which different observations share the same set of latent codes anchored to a deformable support.
20
20
 
21
21
  % Introduce the benefits of technical novelty
22
22
  %% Example 2: so that the observations across frames can be naturally integrated. The deformable mesh also provides geometric guidance for the network to learn 3D representations more efficiently.
@@ -29,6 +29,6 @@
29
29
  1. `This paper addresses the challenge of novel view synthesis for a human performer from a very sparse set of camera views.`
30
30
  2. `... representation learning will be ill-posed if the views are highly sparse.`
31
31
  3. `To solve this ill-posed problem, our key idea is to integrate observations over video frames.`
32
- 4. `To this end, we propose Neural Body ...`
32
+ 4. `To this end, we propose AnchorField ...`
33
33
  5. `... observations across frames can be naturally integrated ... provides geometric guidance ...`
34
34
  6. `Experiments show [main result].`
@@ -6,22 +6,22 @@ boundary explicit.
6
6
  ```tex
7
7
  \section{Conclusion}
8
8
 
9
- This paper shows that adding a structured ranking backbone together with a
10
- post-hoc calibration stage improves uplift ranking under the frozen benchmark
11
- protocol. Across the three benchmark families used in this work, the full model
9
+ This paper shows that adding a structured intermediate module together with a
10
+ lightweight adjustment stage improves performance under a fixed evaluation
11
+ protocol. Across the benchmark families used in this work, the full model
12
12
  consistently matches or exceeds the strongest baselines and remains stronger
13
13
  than the key ablated variants. This makes the main claim narrower than a
14
- universal superiority claim but stronger than a single-dataset win.
14
+ universal superiority claim but stronger than a single-setting win.
15
15
 
16
- We do not claim that the current method solves uplift modeling in every domain
17
- or that every design choice helps equally on every benchmark. In particular, the
18
- calibration stage appears beneficial on some datasets and neutral on others,
19
- which means its value should be interpreted as setting-dependent rather than as
20
- a guaranteed gain. That boundary is consistent with recent benchmarking
21
- practice, which argues for claim discipline and protocol-specific interpretation
22
- rather than broad overgeneralization~\cite{carlini2019evaluating}.
16
+ We do not claim that the current method solves the broader problem in every
17
+ domain or that every design choice helps equally on every benchmark. In
18
+ particular, the adjustment stage appears beneficial in some settings and
19
+ neutral in others, which means its value should be interpreted as
20
+ setting-dependent rather than as a guaranteed gain. That boundary is consistent
21
+ with recent benchmarking practice, which argues for claim discipline and
22
+ protocol-specific interpretation rather than broad overgeneralization~\cite{carlini2019evaluating}.
23
23
 
24
24
  The most useful next step is to extend the evaluation to a broader set of
25
- benchmark slices and to test whether the same ranking-versus-calibration split
26
- remains useful when the label distribution shifts more aggressively.
25
+ benchmark slices and to test whether the same backbone-versus-adjustment split
26
+ remains useful when the data distribution shifts more aggressively.
27
27
  ```
@@ -1,6 +1,6 @@
1
1
  # Main Results and Ablation LaTeX Example
2
2
 
3
- This file is a complete paper-facing LaTeX example for the experiments section.
3
+ This file is a complete manuscript-ready LaTeX example for the experiments section.
4
4
  Reuse the structure, caption logic, and prose-to-table linkage. Replace the
5
5
  placeholder methods, metrics, and values with the current project's evidence.
6
6
 
@@ -14,13 +14,13 @@ Source inspiration:
14
14
 
15
15
  ```tex
16
16
  \begin{table}[t]
17
- \caption{Main benchmark results under the frozen evaluation protocol. Higher is better on all metrics.}
17
+ \caption{Main benchmark results under the fixed evaluation protocol. Higher is better on all metrics.}
18
18
  \label{tab:main-results}
19
19
  \centering
20
20
  \resizebox{0.92\linewidth}{!}{
21
21
  \begin{tabular}{lccc}
22
22
  \toprule
23
- Method & AUUC $\uparrow$ & Qini $\uparrow$ & Calibration Error $\downarrow$ \\
23
+ Method & Primary Metric $\uparrow$ & Secondary Metric $\uparrow$ & Error Metric $\downarrow$ \\
24
24
  \midrule
25
25
  Strongest baseline & 0.1421 & 0.0873 & 0.0612 \\
26
26
  Closest prior work & 0.1488 & 0.0915 & 0.0544 \\
@@ -32,7 +32,7 @@ Ours & \textbf{0.1564} & \textbf{0.0987} & \textbf{0.0418} \\
32
32
  ```
33
33
 
34
34
  Table message:
35
- - `Does the proposed method beat the strongest baselines under the frozen protocol?`
35
+ - `Does the proposed method beat the strongest baselines under the fixed evaluation protocol?`
36
36
 
37
37
  ## Ablation Table
38
38
 
@@ -47,7 +47,7 @@ Variant & AUUC $\uparrow$ \\
47
47
  \midrule
48
48
  Ours & \textbf{0.1564} \\
49
49
  w/o structure module & 0.1497 \\
50
- w/o calibration stage & 0.1510 \\
50
+ w/o final adjustment stage & 0.1510 \\
51
51
  w/ shuffled auxiliary signal & 0.1458 \\
52
52
  \bottomrule
53
53
  \end{tabular}
@@ -65,19 +65,20 @@ Table message:
65
65
 
66
66
  Table~\ref{tab:main-results} answers the main ranking question: whether the full
67
67
  method remains stronger than the closest prior work and the strongest practical
68
- baseline under the frozen protocol. Our method achieves the best AUUC and Qini
69
- while also reducing calibration error, which means the gain is not coming from
70
- trading ranking quality against stability.
68
+ baseline under the fixed evaluation protocol. Our method achieves the best
69
+ primary and secondary metrics while also reducing the error metric, which means
70
+ the gain is not coming from trading one objective against stability.
71
71
 
72
72
  Table~\ref{tab:ablations} then asks a narrower mechanism question. Removing the
73
- structure module causes the largest drop, so the main gain is tied to structured
74
- signal modeling rather than to a generic increase in capacity. Removing the
75
- calibration stage leads to a smaller but still visible drop, which supports the
76
- claim that calibration improves final ranking quality without being the sole
77
- driver of the result. The shuffled-signal variant acts as a negative control and
78
- shows that the gain does not survive when the auxiliary information is broken.
73
+ structure module causes the largest drop, so the main gain is tied to explicit
74
+ structure modeling rather than to a generic increase in capacity. Removing the
75
+ final adjustment stage leads to a smaller but still visible drop, which
76
+ supports the claim that the adjustment helps the exposed prediction without
77
+ being the sole driver of the result. The shuffled-signal variant acts as a
78
+ negative control and shows that the gain does not survive when the auxiliary
79
+ information is broken.
79
80
 
80
- One caveat is that the calibration gain may remain neutral on some benchmarks,
81
- so the paper should not overclaim that every component helps equally on every
82
- dataset.
81
+ One caveat is that the final adjustment gain may remain neutral in some
82
+ settings, so the paper should not overclaim that every component helps equally
83
+ on every dataset.
83
84
  ```
@@ -1,6 +1,6 @@
1
1
  # Experiments Example Patterns
2
2
 
3
- Use these examples when turning validated results into paper-facing LaTeX assets.
3
+ Use these examples when turning validated results into manuscript-ready LaTeX assets.
4
4
  The referenced files contain complete LaTeX environments and section-level prose
5
5
  glue, not just checklists.
6
6