superlab 0.1.26 → 0.1.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -0
- package/README.zh-CN.md +3 -0
- package/bin/superlab.cjs +11 -0
- package/lib/auto_contracts.cjs +1 -1
- package/lib/auto_runner.cjs +1 -1
- package/lib/context.cjs +30 -198
- package/lib/i18n.cjs +229 -19
- package/lib/lab_write_contract.json +8 -0
- package/package-assets/claude/commands/lab-idea.md +1 -1
- package/package-assets/claude/commands/lab-write.md +1 -1
- package/package-assets/claude/commands/lab.md +4 -3
- package/package-assets/codex/prompts/lab-idea.md +1 -1
- package/package-assets/codex/prompts/lab-write.md +1 -1
- package/package-assets/codex/prompts/lab.md +4 -3
- package/package-assets/shared/lab/.managed/scripts/validate_idea_artifact.py +147 -0
- package/package-assets/shared/lab/.managed/scripts/validate_manuscript_delivery.py +50 -4
- package/package-assets/shared/lab/.managed/scripts/validate_paper_claims.py +86 -0
- package/package-assets/shared/lab/.managed/scripts/validate_paper_plan.py +263 -0
- package/package-assets/shared/lab/.managed/scripts/validate_section_draft.py +181 -0
- package/package-assets/shared/lab/.managed/templates/idea.md +43 -0
- package/package-assets/shared/lab/.managed/templates/paper-plan.md +78 -0
- package/package-assets/shared/lab/config/workflow.json +2 -1
- package/package-assets/shared/lab/context/auto-mode.md +1 -1
- package/package-assets/shared/lab/context/next-action.md +4 -4
- package/package-assets/shared/lab/context/session-brief.md +8 -1
- package/package-assets/shared/lab/context/summary.md +14 -3
- package/package-assets/shared/skills/lab/SKILL.md +17 -16
- package/package-assets/shared/skills/lab/references/paper-writing/examples/abstract/template-b.md +2 -2
- package/package-assets/shared/skills/lab/references/paper-writing/examples/conclusion/conservative-claim-boundary.md +13 -13
- package/package-assets/shared/skills/lab/references/paper-writing/examples/experiments/main-results-and-ablation-latex.md +18 -17
- package/package-assets/shared/skills/lab/references/paper-writing/examples/experiments-examples.md +1 -1
- package/package-assets/shared/skills/lab/references/paper-writing/examples/index.md +1 -1
- package/package-assets/shared/skills/lab/references/paper-writing/examples/introduction/pipeline-version-1-one-contribution-multi-advantages.md +3 -3
- package/package-assets/shared/skills/lab/references/paper-writing/examples/introduction/pipeline-version-2-two-contributions.md +1 -1
- package/package-assets/shared/skills/lab/references/paper-writing/examples/method/annotated-figure-to-text.md +66 -0
- package/package-assets/shared/skills/lab/references/paper-writing/examples/method/example-of-the-three-elements.md +11 -11
- package/package-assets/shared/skills/lab/references/paper-writing/examples/method/{module-design-instant-ngp.md → module-design-multiresolution-encoding.md} +1 -1
- package/package-assets/shared/skills/lab/references/paper-writing/examples/method/{module-triad-neural-body.md → module-triad-anchored-representation.md} +4 -4
- package/package-assets/shared/skills/lab/references/paper-writing/examples/method/overview-template.md +4 -4
- package/package-assets/shared/skills/lab/references/paper-writing/examples/method/pre-writing-questions.md +4 -3
- package/package-assets/shared/skills/lab/references/paper-writing/examples/method-examples.md +4 -4
- package/package-assets/shared/skills/lab/references/paper-writing/examples/related-work/closest-prior-gap-template.md +12 -12
- package/package-assets/shared/skills/lab/references/paper-writing/examples/related-work/topic-comparison-template.md +2 -2
- package/package-assets/shared/skills/lab/stages/auto.md +6 -2
- package/package-assets/shared/skills/lab/stages/data.md +0 -1
- package/package-assets/shared/skills/lab/stages/framing.md +0 -1
- package/package-assets/shared/skills/lab/stages/idea.md +30 -13
- package/package-assets/shared/skills/lab/stages/write.md +28 -4
- package/package.json +1 -1
- package/package-assets/shared/skills/lab/references/paper-writing/examples/method/neural-body-annotated-figure-text.md +0 -66
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
import argparse
|
|
3
|
+
import re
|
|
4
|
+
import sys
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def parse_args():
|
|
9
|
+
parser = argparse.ArgumentParser(
|
|
10
|
+
description="Validate section-specific paper-writing quality expectations."
|
|
11
|
+
)
|
|
12
|
+
parser.add_argument(
|
|
13
|
+
"--section",
|
|
14
|
+
required=True,
|
|
15
|
+
choices=["abstract", "introduction", "related-work", "method", "experiments", "conclusion"],
|
|
16
|
+
help="Section type being validated",
|
|
17
|
+
)
|
|
18
|
+
parser.add_argument("--section-file", required=True, help="Path to the section .tex file")
|
|
19
|
+
parser.add_argument(
|
|
20
|
+
"--mode",
|
|
21
|
+
required=True,
|
|
22
|
+
choices=["draft", "final"],
|
|
23
|
+
help="Draft rounds emit warnings; final rounds fail on missing section structure",
|
|
24
|
+
)
|
|
25
|
+
return parser.parse_args()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def read_text(path: Path) -> str:
|
|
29
|
+
return path.read_text(encoding="utf-8")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def contains_any(text: str, needles: tuple[str, ...]) -> bool:
|
|
33
|
+
lowered = text.lower()
|
|
34
|
+
return any(needle.lower() in lowered for needle in needles)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def check_abstract(text: str, issues: list[str]):
|
|
38
|
+
numbers = re.findall(r"\b\d+(?:\.\d+)?\b", text)
|
|
39
|
+
if len(numbers) > 6:
|
|
40
|
+
issues.append("abstract has overly dense benchmark-number dumping")
|
|
41
|
+
if not contains_any(text, ("challenge", "gap", "however", "difficulty", "challenge", "难点", "挑战", "不足")):
|
|
42
|
+
issues.append("abstract should state the core challenge or gap")
|
|
43
|
+
if not contains_any(text, ("boundary", "bounded", "limitation", "however", "but", "局限", "边界", "限制")):
|
|
44
|
+
issues.append("abstract should include a bounded result or explicit limitation")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def check_introduction(text: str, issues: list[str]):
|
|
48
|
+
has_problem_figure = contains_any(
|
|
49
|
+
text,
|
|
50
|
+
(
|
|
51
|
+
r"\input{figures/problem-setting}",
|
|
52
|
+
r"\begin{figure}",
|
|
53
|
+
r"\ref{fig:problem-setting}",
|
|
54
|
+
),
|
|
55
|
+
)
|
|
56
|
+
if not has_problem_figure:
|
|
57
|
+
issues.append("introduction should consume a problem-setting figure")
|
|
58
|
+
if not contains_any(
|
|
59
|
+
text,
|
|
60
|
+
(
|
|
61
|
+
"contribution",
|
|
62
|
+
"contributions",
|
|
63
|
+
"贡献",
|
|
64
|
+
"我们的贡献",
|
|
65
|
+
),
|
|
66
|
+
):
|
|
67
|
+
issues.append("introduction should make contributions explicit")
|
|
68
|
+
if not contains_any(
|
|
69
|
+
text,
|
|
70
|
+
(
|
|
71
|
+
"however",
|
|
72
|
+
"prior work",
|
|
73
|
+
"existing methods",
|
|
74
|
+
"falls short",
|
|
75
|
+
"gap",
|
|
76
|
+
"limitation",
|
|
77
|
+
"然而",
|
|
78
|
+
"现有方法",
|
|
79
|
+
"不足",
|
|
80
|
+
),
|
|
81
|
+
):
|
|
82
|
+
issues.append("introduction should explain what is missing in prior work")
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def check_related_work(text: str, issues: list[str]):
|
|
86
|
+
if r"\cite{" not in text:
|
|
87
|
+
issues.append("related work should include citation-driven comparisons")
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def check_method(text: str, issues: list[str]):
|
|
91
|
+
has_method_figure = contains_any(
|
|
92
|
+
text,
|
|
93
|
+
(
|
|
94
|
+
r"\input{figures/method-overview}",
|
|
95
|
+
r"\begin{figure}",
|
|
96
|
+
r"\ref{fig:method-overview}",
|
|
97
|
+
),
|
|
98
|
+
)
|
|
99
|
+
if not has_method_figure:
|
|
100
|
+
issues.append("method should consume a method-overview figure")
|
|
101
|
+
if not contains_any(text, ("motivation", "motivate", "动机")):
|
|
102
|
+
issues.append("method should explain module motivation")
|
|
103
|
+
if not contains_any(text, ("design", "we first", "we then", "设计")):
|
|
104
|
+
issues.append("method should explain the concrete design")
|
|
105
|
+
if not contains_any(text, ("advantage", "benefit", "improves", "优势", "收益")):
|
|
106
|
+
issues.append("method should explain the technical advantage")
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def check_experiments(text: str, issues: list[str]):
|
|
110
|
+
if not contains_any(
|
|
111
|
+
text,
|
|
112
|
+
(
|
|
113
|
+
r"\input{analysis/analysis-asset}",
|
|
114
|
+
r"\input{tables/analysis}",
|
|
115
|
+
r"\ref{fig:analysis",
|
|
116
|
+
r"\ref{tab:analysis",
|
|
117
|
+
),
|
|
118
|
+
):
|
|
119
|
+
issues.append("experiments should consume an analysis asset")
|
|
120
|
+
if not contains_any(
|
|
121
|
+
text,
|
|
122
|
+
(
|
|
123
|
+
"represents",
|
|
124
|
+
"corresponds to",
|
|
125
|
+
"setting",
|
|
126
|
+
"scenario",
|
|
127
|
+
"scene",
|
|
128
|
+
"task setting",
|
|
129
|
+
"benchmark family",
|
|
130
|
+
"场景",
|
|
131
|
+
"设定",
|
|
132
|
+
"任务",
|
|
133
|
+
),
|
|
134
|
+
):
|
|
135
|
+
issues.append("experiments should include benchmark scene notes")
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def check_conclusion(text: str, issues: list[str]):
|
|
139
|
+
if not contains_any(text, ("limitation", "limitations", "bounded", "局限", "限制", "边界")):
|
|
140
|
+
issues.append("conclusion should state at least one limitation or boundary")
|
|
141
|
+
if not contains_any(text, ("future work", "next step", "future direction", "下一步", "未来工作")):
|
|
142
|
+
issues.append("conclusion should state one next step or future direction")
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
SECTION_CHECKS = {
|
|
146
|
+
"abstract": check_abstract,
|
|
147
|
+
"introduction": check_introduction,
|
|
148
|
+
"related-work": check_related_work,
|
|
149
|
+
"method": check_method,
|
|
150
|
+
"experiments": check_experiments,
|
|
151
|
+
"conclusion": check_conclusion,
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def main():
|
|
156
|
+
args = parse_args()
|
|
157
|
+
section_path = Path(args.section_file)
|
|
158
|
+
if not section_path.exists():
|
|
159
|
+
print(f"section file does not exist: {section_path}", file=sys.stderr)
|
|
160
|
+
return 1
|
|
161
|
+
|
|
162
|
+
text = read_text(section_path)
|
|
163
|
+
issues: list[str] = []
|
|
164
|
+
SECTION_CHECKS[args.section](text, issues)
|
|
165
|
+
|
|
166
|
+
if not issues:
|
|
167
|
+
print("section draft is valid")
|
|
168
|
+
return 0
|
|
169
|
+
|
|
170
|
+
if args.mode == "draft":
|
|
171
|
+
for issue in issues:
|
|
172
|
+
print(f"WARNING: {issue}")
|
|
173
|
+
return 0
|
|
174
|
+
|
|
175
|
+
for issue in issues:
|
|
176
|
+
print(issue, file=sys.stderr)
|
|
177
|
+
return 1
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
if __name__ == "__main__":
|
|
181
|
+
raise SystemExit(main())
|
|
@@ -1,9 +1,19 @@
|
|
|
1
1
|
# Idea Artifact
|
|
2
2
|
|
|
3
|
+
## Scenario
|
|
4
|
+
|
|
5
|
+
- Real-world setting:
|
|
6
|
+
- Who would care if this problem were solved:
|
|
7
|
+
|
|
3
8
|
## One-Sentence Problem
|
|
4
9
|
|
|
5
10
|
State the problem in one short sentence.
|
|
6
11
|
|
|
12
|
+
## Why It Matters
|
|
13
|
+
|
|
14
|
+
- Why this matters in practice:
|
|
15
|
+
- What breaks if we ignore it:
|
|
16
|
+
|
|
7
17
|
## Failure Case
|
|
8
18
|
|
|
9
19
|
- Where current methods fail:
|
|
@@ -48,6 +58,32 @@ Suggested levels:
|
|
|
48
58
|
- Shared assumption:
|
|
49
59
|
- Why that assumption breaks here:
|
|
50
60
|
|
|
61
|
+
## Literature Scoping Bundle
|
|
62
|
+
|
|
63
|
+
- Default target source count:
|
|
64
|
+
- Actual source count:
|
|
65
|
+
- Closest prior bucket:
|
|
66
|
+
- Recent strong papers:
|
|
67
|
+
- Benchmark or evaluation papers:
|
|
68
|
+
- Survey or taxonomy papers:
|
|
69
|
+
- Adjacent-field papers:
|
|
70
|
+
- If the total is below the default target, why:
|
|
71
|
+
|
|
72
|
+
## Closest Prior Work Comparison
|
|
73
|
+
|
|
74
|
+
- Prior work 1:
|
|
75
|
+
- Citation:
|
|
76
|
+
- What it does:
|
|
77
|
+
- Why it is relevant here:
|
|
78
|
+
- Limitation for the current problem:
|
|
79
|
+
- Difference from our direction:
|
|
80
|
+
- Prior work 2:
|
|
81
|
+
- Citation:
|
|
82
|
+
- What it does:
|
|
83
|
+
- Why it is relevant here:
|
|
84
|
+
- Limitation for the current problem:
|
|
85
|
+
- Difference from our direction:
|
|
86
|
+
|
|
51
87
|
## Why Ours Is Different
|
|
52
88
|
|
|
53
89
|
- Existing methods rely on:
|
|
@@ -55,6 +91,11 @@ Suggested levels:
|
|
|
55
91
|
- Expected advantage:
|
|
56
92
|
- Evidence needed to prove the advantage:
|
|
57
93
|
|
|
94
|
+
## Rough Approach
|
|
95
|
+
|
|
96
|
+
- Plain-language description of how this would work:
|
|
97
|
+
- Why this design might resolve the failure case:
|
|
98
|
+
|
|
58
99
|
## Three Meaningful Points
|
|
59
100
|
|
|
60
101
|
1. Significance:
|
|
@@ -67,6 +108,7 @@ Suggested levels:
|
|
|
67
108
|
- Benchmark conventions:
|
|
68
109
|
- Typical datasets:
|
|
69
110
|
- Typical metrics:
|
|
111
|
+
- Credible baseline shortlist:
|
|
70
112
|
|
|
71
113
|
## Candidate Approaches
|
|
72
114
|
|
|
@@ -90,6 +132,7 @@ Suggested levels:
|
|
|
90
132
|
- Primary metric:
|
|
91
133
|
- Secondary metrics:
|
|
92
134
|
- Minimum viable experiment:
|
|
135
|
+
- Fastest way to falsify the idea:
|
|
93
136
|
|
|
94
137
|
## Critique Summary
|
|
95
138
|
|
|
@@ -24,6 +24,84 @@
|
|
|
24
24
|
- Limitation sources:
|
|
25
25
|
- Claims that still need more evidence:
|
|
26
26
|
|
|
27
|
+
## Asset Coverage Targets
|
|
28
|
+
|
|
29
|
+
- Core asset floor:
|
|
30
|
+
- Required coverage categories:
|
|
31
|
+
- Current planned core assets:
|
|
32
|
+
- Coverage risks or gaps:
|
|
33
|
+
|
|
34
|
+
## Table Plan
|
|
35
|
+
|
|
36
|
+
- Main results table:
|
|
37
|
+
- Asset file:
|
|
38
|
+
- Section:
|
|
39
|
+
- Message:
|
|
40
|
+
- Evidence:
|
|
41
|
+
- Status:
|
|
42
|
+
- Ablation table:
|
|
43
|
+
- Asset file:
|
|
44
|
+
- Section:
|
|
45
|
+
- Message:
|
|
46
|
+
- Evidence:
|
|
47
|
+
- Status:
|
|
48
|
+
|
|
49
|
+
## Figure Plan
|
|
50
|
+
|
|
51
|
+
- Problem setting or teaser figure:
|
|
52
|
+
- Asset file:
|
|
53
|
+
- Section:
|
|
54
|
+
- Figure intent:
|
|
55
|
+
- Evidence:
|
|
56
|
+
- Status:
|
|
57
|
+
- Method overview figure:
|
|
58
|
+
- Asset file:
|
|
59
|
+
- Section:
|
|
60
|
+
- Figure intent:
|
|
61
|
+
- Evidence:
|
|
62
|
+
- Status:
|
|
63
|
+
- Results overview figure:
|
|
64
|
+
- Asset file:
|
|
65
|
+
- Section:
|
|
66
|
+
- Figure intent:
|
|
67
|
+
- Evidence:
|
|
68
|
+
- Status:
|
|
69
|
+
|
|
70
|
+
## Analysis Asset Plan
|
|
71
|
+
|
|
72
|
+
- Analysis asset:
|
|
73
|
+
- Asset file:
|
|
74
|
+
- Asset type:
|
|
75
|
+
- Section:
|
|
76
|
+
- Asset intent:
|
|
77
|
+
- Evidence:
|
|
78
|
+
- Status:
|
|
79
|
+
|
|
80
|
+
## Citation Plan
|
|
81
|
+
|
|
82
|
+
- Background anchor:
|
|
83
|
+
- Citation or source anchor:
|
|
84
|
+
- Section:
|
|
85
|
+
- Why it matters:
|
|
86
|
+
- Status:
|
|
87
|
+
- Closest prior work:
|
|
88
|
+
- Citation or source anchor:
|
|
89
|
+
- Section:
|
|
90
|
+
- Why it matters:
|
|
91
|
+
- Status:
|
|
92
|
+
- Metric or benchmark source:
|
|
93
|
+
- Citation or source anchor:
|
|
94
|
+
- Section:
|
|
95
|
+
- Why it matters:
|
|
96
|
+
- Status:
|
|
97
|
+
|
|
98
|
+
## Section-to-Asset Map
|
|
99
|
+
|
|
100
|
+
- Introduction:
|
|
101
|
+
- Method:
|
|
102
|
+
- Experiments:
|
|
103
|
+
- Related Work:
|
|
104
|
+
|
|
27
105
|
## Writing Order
|
|
28
106
|
|
|
29
107
|
1. First section target:
|
|
@@ -7,5 +7,6 @@
|
|
|
7
7
|
"deliverables_root": "docs/research",
|
|
8
8
|
"paper_template_root": "",
|
|
9
9
|
"paper_template_decision": "unconfirmed",
|
|
10
|
-
"paper_template_final_reminder_acknowledged": false
|
|
10
|
+
"paper_template_final_reminder_acknowledged": false,
|
|
11
|
+
"paper_language_finalization_decision": "unconfirmed"
|
|
11
12
|
}
|
|
@@ -68,4 +68,4 @@ If `eval-protocol.md` declares structured rung entries, auto mode follows those
|
|
|
68
68
|
|
|
69
69
|
- Stop conditions:
|
|
70
70
|
- Escalation conditions:
|
|
71
|
-
- Canonical promotion writeback: update `.lab/context/data-decisions.md`, `.lab/context/decisions.md`, `.lab/context/state.md`, and `.lab/context/
|
|
71
|
+
- Canonical promotion writeback: update `.lab/context/data-decisions.md`, `.lab/context/decisions.md`, `.lab/context/state.md`, and `.lab/context/workflow-state.md`.
|
|
@@ -5,15 +5,15 @@
|
|
|
5
5
|
- Action:
|
|
6
6
|
- Success signal:
|
|
7
7
|
|
|
8
|
-
##
|
|
8
|
+
## After Completion
|
|
9
9
|
|
|
10
10
|
- Next action:
|
|
11
11
|
|
|
12
|
-
## If
|
|
12
|
+
## If Blocked
|
|
13
13
|
|
|
14
14
|
- Fallback action:
|
|
15
15
|
|
|
16
|
-
##
|
|
16
|
+
## Escalation
|
|
17
17
|
|
|
18
18
|
- Question:
|
|
19
|
-
-
|
|
19
|
+
- Escalate when:
|
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
## Active Stage
|
|
4
4
|
|
|
5
5
|
- Stage:
|
|
6
|
+
- Current objective:
|
|
7
|
+
- Immediate next action:
|
|
6
8
|
|
|
7
9
|
## Mission
|
|
8
10
|
|
|
@@ -11,10 +13,15 @@ One sentence describing the active research mission.
|
|
|
11
13
|
## Best Current Path
|
|
12
14
|
|
|
13
15
|
- Approved direction:
|
|
14
|
-
-
|
|
16
|
+
- Strongest supported claim:
|
|
15
17
|
- Auto mode:
|
|
16
18
|
- Auto objective:
|
|
17
19
|
- Auto decision:
|
|
20
|
+
- Collaborator report mode:
|
|
21
|
+
- Canonical context readiness:
|
|
22
|
+
- Method name:
|
|
23
|
+
- Primary metrics:
|
|
24
|
+
- Secondary metrics:
|
|
18
25
|
|
|
19
26
|
## Main Risk
|
|
20
27
|
|
|
@@ -1,12 +1,23 @@
|
|
|
1
1
|
# Research Summary
|
|
2
2
|
|
|
3
3
|
## Current Direction
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
4
|
+
- Mission:
|
|
5
|
+
- Approved direction:
|
|
6
|
+
- Active stage:
|
|
7
|
+
- Current objective:
|
|
7
8
|
- Auto mode:
|
|
8
9
|
- Auto objective:
|
|
9
10
|
- Auto decision:
|
|
11
|
+
- Collaborator report mode:
|
|
12
|
+
- Canonical context readiness:
|
|
13
|
+
- Method name:
|
|
14
|
+
- Contribution bullets:
|
|
15
|
+
- Eval objective:
|
|
16
|
+
- Primary metrics:
|
|
17
|
+
- Secondary metrics:
|
|
18
|
+
- Dataset package:
|
|
19
|
+
- Benchmark role:
|
|
20
|
+
- Comparison suite:
|
|
10
21
|
|
|
11
22
|
## Strongest Evidence
|
|
12
23
|
|
|
@@ -34,17 +34,22 @@ Use this skill when the user invokes `/lab:*` or asks for the structured researc
|
|
|
34
34
|
### `/lab:idea`
|
|
35
35
|
|
|
36
36
|
- Search relevant literature, baselines, datasets, and evaluation metrics before proposing a plan.
|
|
37
|
+
- Build a literature-scoping bundle before claiming novelty. The default target is 20 relevant sources unless the field is genuinely too narrow and that exception is written down.
|
|
37
38
|
- Read `.lab/context/mission.md` and `.lab/context/open-questions.md` before drafting.
|
|
39
|
+
- Read `.lab/config/workflow.json` before drafting and follow its `workflow_language` for idea artifacts.
|
|
38
40
|
- Ask one clarifying question at a time when critical ambiguity remains.
|
|
39
|
-
- State the problem, the failure case, and why the problem matters before proposing solutions.
|
|
41
|
+
- State the scenario, the problem, the failure case, and why the problem matters before proposing solutions.
|
|
40
42
|
- Classify the idea by contribution category and breakthrough level.
|
|
41
43
|
- Compare against existing methods explicitly and state why the idea should be better.
|
|
44
|
+
- Include a closest-prior-work comparison and a plain-language description of how the proposed direction would work.
|
|
42
45
|
- Distinguish sourced evidence from generated innovation claims.
|
|
43
46
|
- End with three meaningful points that are clear, short, and easy to scan.
|
|
44
47
|
- Produce 2-3 candidate approaches with trade-offs before recommending one.
|
|
45
48
|
- Critique the idea before converging on it.
|
|
49
|
+
- Include a minimum viable experiment before approval.
|
|
46
50
|
- Keep an explicit approval gate before `/lab:spec`.
|
|
47
51
|
- Write idea artifacts with the template in `.lab/.managed/templates/idea.md`.
|
|
52
|
+
- Run `.lab/.managed/scripts/validate_idea_artifact.py --idea <idea-artifact> --workflow-config .lab/config/workflow.json` before treating the idea as converged.
|
|
48
53
|
- Update `.lab/context/mission.md`, `.lab/context/decisions.md`, and `.lab/context/open-questions.md` after convergence.
|
|
49
54
|
- Do not leave `.lab/context/mission.md` as a template shell once the problem statement and approved direction are known.
|
|
50
55
|
- Do not implement code in this stage.
|
|
@@ -91,9 +96,9 @@ Use this skill when the user invokes `/lab:*` or asks for the structured researc
|
|
|
91
96
|
- Reuse `/lab:run`, `/lab:iterate`, `/lab:review`, `/lab:report`, and optional `/lab:write` instead of inventing a second workflow.
|
|
92
97
|
- Do not automatically change the research mission, paper-facing framing, or core claims.
|
|
93
98
|
- You may add exploratory datasets, benchmarks, and comparison methods inside the approved exploration envelope.
|
|
94
|
-
- You may promote an exploratory addition to the primary package only after the promotion policy in `auto-mode.md` is satisfied and the promotion is written back into `.lab/context/data-decisions.md`, `.lab/context/decisions.md`, `.lab/context/state.md`, and `.lab/context/
|
|
99
|
+
- You may promote an exploratory addition to the primary package only after the promotion policy in `auto-mode.md` is satisfied and the promotion is written back into `.lab/context/data-decisions.md`, `.lab/context/decisions.md`, `.lab/context/state.md`, and `.lab/context/workflow-state.md`.
|
|
95
100
|
- Poll long-running commands until they complete, time out, or hit a stop condition.
|
|
96
|
-
- Update `.lab/context/auto-status.md`, `.lab/context/state.md`, `.lab/context/workflow-state.md`, `.lab/context/decisions.md`, `.lab/context/data-decisions.md`, `.lab/context/evidence-index.md
|
|
101
|
+
- Update `.lab/context/auto-status.md`, `.lab/context/state.md`, `.lab/context/workflow-state.md`, `.lab/context/decisions.md`, `.lab/context/data-decisions.md`, and `.lab/context/evidence-index.md` as the campaign advances, then refresh the derived handoff files.
|
|
97
102
|
- Keep an explicit approval gate when a proposed action would leave the frozen core defined by the auto-mode contract.
|
|
98
103
|
|
|
99
104
|
### `/lab:spec`
|
|
@@ -165,26 +170,21 @@ Use this skill when the user invokes `/lab:*` or asks for the structured researc
|
|
|
165
170
|
|
|
166
171
|
- Start only after `report` artifacts are stable enough to support paper claims.
|
|
167
172
|
- Start only after an approved framing artifact exists at `.lab/writing/framing.md`.
|
|
168
|
-
- Read `.lab/config/workflow.json` before drafting and enforce its `paper_language
|
|
169
|
-
- If `paper_template_root` is empty and `paper_template_decision` is `unconfirmed`, ask once whether to continue with the managed default scaffold or attach a template directory first; persist the answer before drafting `.tex`.
|
|
170
|
-
- If the project is still on the default scaffold at a final export or final-draft boundary and `paper_template_final_reminder_acknowledged` is `false`, ask one final reminder question before finalizing.
|
|
173
|
+
- Read `.lab/config/workflow.json` before drafting and enforce its `workflow_language`, `paper_language`, and `paper_format`.
|
|
171
174
|
- Read `.lab/context/mission.md`, `.lab/context/decisions.md`, `.lab/context/evidence-index.md`, and `.lab/context/data-decisions.md` before drafting.
|
|
172
175
|
- Write one paper section or one explicit subproblem per round.
|
|
176
|
+
- Ordinary manuscript drafting rounds should follow `workflow_language`.
|
|
177
|
+
- If `workflow_language` and `paper_language` differ, the first final-draft or export round must ask once whether to keep the draft language or convert the final manuscript to `paper_language`, then persist that choice.
|
|
173
178
|
- Bind each claim to evidence from `report`, iteration reports, or normalized summaries.
|
|
174
|
-
-
|
|
175
|
-
-
|
|
176
|
-
-
|
|
177
|
-
-
|
|
178
|
-
- Load only the current section guide, the matching examples index when one exists, 1-2 matching concrete example files, plus `paper-review.md` and `does-my-writing-flow-source.md`.
|
|
179
|
-
- Build a compact mini-outline before prose.
|
|
180
|
-
- Build the paper asset plan before prose when the section carries method or experiments claims.
|
|
179
|
+
- Use the write-stage contract in `.codex/skills/lab/stages/write.md` or `.claude/skills/lab/stages/write.md` as the single source of truth for template choice, paper-plan requirements, section-specific references, validator calls, asset coverage, and final manuscript gates.
|
|
180
|
+
- Use the vendored paper-writing references under `skills/lab/references/paper-writing/` and the matching example-bank files under `skills/lab/references/paper-writing/examples/`.
|
|
181
|
+
- Treat `.lab/writing/plan.md` as the write-time source of truth for tables, figures, citations, and asset coverage.
|
|
182
|
+
- Treat section-quality, claim-safety, and manuscript-delivery checks as the canonical acceptance gates for final-draft or export rounds.
|
|
181
183
|
- For each subsection, explicitly cover motivation, design, and technical advantage when applicable.
|
|
182
184
|
- Keep terminology stable across rounds and sections.
|
|
183
185
|
- If a claim is not supported by evidence, weaken or remove it.
|
|
184
186
|
- Treat tables, figures, citations, and bibliography as core manuscript content rather than optional polish.
|
|
185
187
|
- Keep paper-facing LaTeX free of absolute local paths, rerun ids, shell transcripts, and internal workflow provenance.
|
|
186
|
-
- Materialize real LaTeX tables and figure placeholders instead of leaving all evidence inside prose paragraphs.
|
|
187
|
-
- Run `.lab/.managed/scripts/validate_manuscript_delivery.py --paper-dir <deliverables_root>/paper` before accepting a final-draft or export round.
|
|
188
188
|
- Before finalizing a round, append and answer the five-dimension self-review checklist and revise unresolved items.
|
|
189
189
|
- Apply paper-writing discipline without changing experimental truth.
|
|
190
190
|
- If the evidence is insufficient, stop and route back to `review` or `iterate`.
|
|
@@ -199,7 +199,8 @@ Use this skill when the user invokes `/lab:*` or asks for the structured researc
|
|
|
199
199
|
- No unconstrained auto mode. Every `/lab:auto` campaign must declare allowed stages, stop conditions, and a promotion policy in `.lab/context/auto-mode.md`.
|
|
200
200
|
- No auto start without an explicit autonomy level and `Approval status: approved`.
|
|
201
201
|
- No final report without validated normalized results.
|
|
202
|
-
- No paper-writing round without stable report artifacts, an approved framing artifact, evidence links, LaTeX manuscript output
|
|
202
|
+
- No paper-writing round without stable report artifacts, an approved framing artifact, evidence links, and LaTeX manuscript output.
|
|
203
|
+
- No final-draft or export round without passing section-quality, claim-safety, and manuscript-delivery validation.
|
|
203
204
|
|
|
204
205
|
## References
|
|
205
206
|
|
package/package-assets/shared/skills/lab/references/paper-writing/examples/abstract/template-b.md
CHANGED
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
|
|
17
17
|
% Introduce the technical contribution that implements the insight in one to two sentences (usually mention the technical term/name only, without describing every detailed step. The term should be easy to understand and should not create a jump in reading. This ability is very important for writing a good abstract.)
|
|
18
18
|
%% Example 1: To do this, we first present a label-efficient depth estimation framework using the internal representations of diffusion models. At the sampling phase, we utilize two guidance techniques to self-condition the generated image using the estimated depth map, the first of which uses pseudo-labeling, and the subsequent one uses a depth-domain diffusion prior.
|
|
19
|
-
%% Example 2: To this end, we propose
|
|
19
|
+
%% Example 2: To this end, we propose AnchorField, a structured representation in which different observations share the same set of latent codes anchored to a deformable support.
|
|
20
20
|
|
|
21
21
|
% Introduce the benefits of technical novelty
|
|
22
22
|
%% Example 2: so that the observations across frames can be naturally integrated. The deformable mesh also provides geometric guidance for the network to learn 3D representations more efficiently.
|
|
@@ -29,6 +29,6 @@
|
|
|
29
29
|
1. `This paper addresses the challenge of novel view synthesis for a human performer from a very sparse set of camera views.`
|
|
30
30
|
2. `... representation learning will be ill-posed if the views are highly sparse.`
|
|
31
31
|
3. `To solve this ill-posed problem, our key idea is to integrate observations over video frames.`
|
|
32
|
-
4. `To this end, we propose
|
|
32
|
+
4. `To this end, we propose AnchorField ...`
|
|
33
33
|
5. `... observations across frames can be naturally integrated ... provides geometric guidance ...`
|
|
34
34
|
6. `Experiments show [main result].`
|
|
@@ -6,22 +6,22 @@ boundary explicit.
|
|
|
6
6
|
```tex
|
|
7
7
|
\section{Conclusion}
|
|
8
8
|
|
|
9
|
-
This paper shows that adding a structured
|
|
10
|
-
|
|
11
|
-
protocol. Across the
|
|
9
|
+
This paper shows that adding a structured intermediate module together with a
|
|
10
|
+
lightweight adjustment stage improves performance under a fixed evaluation
|
|
11
|
+
protocol. Across the benchmark families used in this work, the full model
|
|
12
12
|
consistently matches or exceeds the strongest baselines and remains stronger
|
|
13
13
|
than the key ablated variants. This makes the main claim narrower than a
|
|
14
|
-
universal superiority claim but stronger than a single-
|
|
14
|
+
universal superiority claim but stronger than a single-setting win.
|
|
15
15
|
|
|
16
|
-
We do not claim that the current method solves
|
|
17
|
-
or that every design choice helps equally on every benchmark. In
|
|
18
|
-
|
|
19
|
-
which means its value should be interpreted as
|
|
20
|
-
a guaranteed gain. That boundary is consistent
|
|
21
|
-
practice, which argues for claim discipline and
|
|
22
|
-
rather than broad overgeneralization~\cite{carlini2019evaluating}.
|
|
16
|
+
We do not claim that the current method solves the broader problem in every
|
|
17
|
+
domain or that every design choice helps equally on every benchmark. In
|
|
18
|
+
particular, the adjustment stage appears beneficial in some settings and
|
|
19
|
+
neutral in others, which means its value should be interpreted as
|
|
20
|
+
setting-dependent rather than as a guaranteed gain. That boundary is consistent
|
|
21
|
+
with recent benchmarking practice, which argues for claim discipline and
|
|
22
|
+
protocol-specific interpretation rather than broad overgeneralization~\cite{carlini2019evaluating}.
|
|
23
23
|
|
|
24
24
|
The most useful next step is to extend the evaluation to a broader set of
|
|
25
|
-
benchmark slices and to test whether the same
|
|
26
|
-
remains useful when the
|
|
25
|
+
benchmark slices and to test whether the same backbone-versus-adjustment split
|
|
26
|
+
remains useful when the data distribution shifts more aggressively.
|
|
27
27
|
```
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Main Results and Ablation LaTeX Example
|
|
2
2
|
|
|
3
|
-
This file is a complete
|
|
3
|
+
This file is a complete manuscript-ready LaTeX example for the experiments section.
|
|
4
4
|
Reuse the structure, caption logic, and prose-to-table linkage. Replace the
|
|
5
5
|
placeholder methods, metrics, and values with the current project's evidence.
|
|
6
6
|
|
|
@@ -14,13 +14,13 @@ Source inspiration:
|
|
|
14
14
|
|
|
15
15
|
```tex
|
|
16
16
|
\begin{table}[t]
|
|
17
|
-
\caption{Main benchmark results under the
|
|
17
|
+
\caption{Main benchmark results under the fixed evaluation protocol. Higher is better on all metrics.}
|
|
18
18
|
\label{tab:main-results}
|
|
19
19
|
\centering
|
|
20
20
|
\resizebox{0.92\linewidth}{!}{
|
|
21
21
|
\begin{tabular}{lccc}
|
|
22
22
|
\toprule
|
|
23
|
-
Method &
|
|
23
|
+
Method & Primary Metric $\uparrow$ & Secondary Metric $\uparrow$ & Error Metric $\downarrow$ \\
|
|
24
24
|
\midrule
|
|
25
25
|
Strongest baseline & 0.1421 & 0.0873 & 0.0612 \\
|
|
26
26
|
Closest prior work & 0.1488 & 0.0915 & 0.0544 \\
|
|
@@ -32,7 +32,7 @@ Ours & \textbf{0.1564} & \textbf{0.0987} & \textbf{0.0418} \\
|
|
|
32
32
|
```
|
|
33
33
|
|
|
34
34
|
Table message:
|
|
35
|
-
- `Does the proposed method beat the strongest baselines under the
|
|
35
|
+
- `Does the proposed method beat the strongest baselines under the fixed evaluation protocol?`
|
|
36
36
|
|
|
37
37
|
## Ablation Table
|
|
38
38
|
|
|
@@ -47,7 +47,7 @@ Variant & AUUC $\uparrow$ \\
|
|
|
47
47
|
\midrule
|
|
48
48
|
Ours & \textbf{0.1564} \\
|
|
49
49
|
w/o structure module & 0.1497 \\
|
|
50
|
-
w/o
|
|
50
|
+
w/o final adjustment stage & 0.1510 \\
|
|
51
51
|
w/ shuffled auxiliary signal & 0.1458 \\
|
|
52
52
|
\bottomrule
|
|
53
53
|
\end{tabular}
|
|
@@ -65,19 +65,20 @@ Table message:
|
|
|
65
65
|
|
|
66
66
|
Table~\ref{tab:main-results} answers the main ranking question: whether the full
|
|
67
67
|
method remains stronger than the closest prior work and the strongest practical
|
|
68
|
-
baseline under the
|
|
69
|
-
while also reducing
|
|
70
|
-
trading
|
|
68
|
+
baseline under the fixed evaluation protocol. Our method achieves the best
|
|
69
|
+
primary and secondary metrics while also reducing the error metric, which means
|
|
70
|
+
the gain is not coming from trading one objective against stability.
|
|
71
71
|
|
|
72
72
|
Table~\ref{tab:ablations} then asks a narrower mechanism question. Removing the
|
|
73
|
-
structure module causes the largest drop, so the main gain is tied to
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
claim that
|
|
77
|
-
driver of the result. The shuffled-signal variant acts as a
|
|
78
|
-
shows that the gain does not survive when the auxiliary
|
|
73
|
+
structure module causes the largest drop, so the main gain is tied to explicit
|
|
74
|
+
structure modeling rather than to a generic increase in capacity. Removing the
|
|
75
|
+
final adjustment stage leads to a smaller but still visible drop, which
|
|
76
|
+
supports the claim that the adjustment helps the exposed prediction without
|
|
77
|
+
being the sole driver of the result. The shuffled-signal variant acts as a
|
|
78
|
+
negative control and shows that the gain does not survive when the auxiliary
|
|
79
|
+
information is broken.
|
|
79
80
|
|
|
80
|
-
One caveat is that the
|
|
81
|
-
so the paper should not overclaim that every component helps equally
|
|
82
|
-
dataset.
|
|
81
|
+
One caveat is that the final adjustment gain may remain neutral in some
|
|
82
|
+
settings, so the paper should not overclaim that every component helps equally
|
|
83
|
+
on every dataset.
|
|
83
84
|
```
|
package/package-assets/shared/skills/lab/references/paper-writing/examples/experiments-examples.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Experiments Example Patterns
|
|
2
2
|
|
|
3
|
-
Use these examples when turning validated results into
|
|
3
|
+
Use these examples when turning validated results into manuscript-ready LaTeX assets.
|
|
4
4
|
The referenced files contain complete LaTeX environments and section-level prose
|
|
5
5
|
glue, not just checklists.
|
|
6
6
|
|