superlab 0.1.65 → 0.1.67
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/lab_write_contract.json +4 -4
- package/package-assets/claude/commands/lab/write.md +2 -1
- package/package-assets/claude/commands/lab-write.md +2 -1
- package/package-assets/claude/commands/lab:write.md +2 -1
- package/package-assets/claude/commands/lab/357/274/232write.md +2 -1
- package/package-assets/codex/prompts/lab/write.md +2 -1
- package/package-assets/codex/prompts/lab-write.md +2 -1
- package/package-assets/codex/prompts/lab:write.md +2 -1
- package/package-assets/codex/prompts/lab/357/274/232write.md +2 -1
- package/package-assets/shared/lab/.managed/scripts/validate_manuscript_delivery.py +42 -61
- package/package-assets/shared/lab/.managed/scripts/validate_paper_plan.py +2 -0
- package/package-assets/shared/lab/.managed/scripts/validate_reference_consumption.py +230 -0
- package/package-assets/shared/lab/.managed/scripts/validate_section_draft.py +50 -106
- package/package-assets/shared/lab/.managed/templates/paper-figure.tex +2 -1
- package/package-assets/shared/lab/.managed/templates/paper-plan.md +4 -4
- package/package-assets/shared/lab/.managed/templates/reference-consumption-plan.md +34 -0
- package/package-assets/shared/lab/.managed/templates/write-iteration.md +15 -27
- package/package-assets/shared/skills/lab/SKILL.md +3 -0
- package/package-assets/shared/skills/lab/references/paper-writing/examples/experiments/figure-placeholder-and-discussion.md +10 -6
- package/package-assets/shared/skills/lab/references/paper-writing/examples/experiments-examples.md +1 -1
- package/package-assets/shared/skills/lab/references/paper-writing/section-style-policies.md +12 -0
- package/package-assets/shared/skills/lab/stages/write.md +27 -18
- package/package.json +1 -1
- package/package-assets/shared/lab/.managed/scripts/extract_reference_paper_structure.py +0 -910
- package/package-assets/shared/lab/.managed/templates/reference-template-intake.md +0 -40
|
@@ -39,6 +39,29 @@ REQUIRED_TABLE_NOTE_MARKERS = (
|
|
|
39
39
|
)
|
|
40
40
|
WIDTH_CONTROL_NOTE_MARKER = "% Width control:"
|
|
41
41
|
TABLE_ABBREVIATION_EXCEPTIONS = {"TODO", "TBD"}
|
|
42
|
+
SERVICE_STYLE_PHRASES = (
|
|
43
|
+
"user asked",
|
|
44
|
+
"the user asked",
|
|
45
|
+
"as requested by the user",
|
|
46
|
+
"let me explain",
|
|
47
|
+
"i will explain",
|
|
48
|
+
"用户说",
|
|
49
|
+
"用户要求",
|
|
50
|
+
"按你的要求",
|
|
51
|
+
"我来解释",
|
|
52
|
+
"我会说明",
|
|
53
|
+
)
|
|
54
|
+
WORKFLOW_ONLY_MANUSCRIPT_PHRASES = (
|
|
55
|
+
"Figure intent:",
|
|
56
|
+
"Asset intent:",
|
|
57
|
+
"figure intent:",
|
|
58
|
+
"asset intent:",
|
|
59
|
+
"图的意图",
|
|
60
|
+
"资产意图",
|
|
61
|
+
"占位符",
|
|
62
|
+
"workflow-language",
|
|
63
|
+
"translation layer",
|
|
64
|
+
)
|
|
42
65
|
|
|
43
66
|
|
|
44
67
|
def parse_args():
|
|
@@ -53,6 +76,15 @@ def read_text(path: Path) -> str:
|
|
|
53
76
|
return path.read_text(encoding="utf-8")
|
|
54
77
|
|
|
55
78
|
|
|
79
|
+
def strip_latex_comments(text: str) -> str:
|
|
80
|
+
return re.sub(r"%.*", "", text)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def contains_any(text: str, needles: tuple[str, ...]) -> bool:
|
|
84
|
+
lowered = text.lower()
|
|
85
|
+
return any(needle.lower() in lowered for needle in needles)
|
|
86
|
+
|
|
87
|
+
|
|
56
88
|
def find_workflow_config(start_path: Path) -> Path | None:
|
|
57
89
|
search_roots = [start_path, *start_path.parents]
|
|
58
90
|
for root in search_roots:
|
|
@@ -149,54 +181,6 @@ def extract_table_note(text: str) -> str:
|
|
|
149
181
|
return "\n".join(line.strip() for line in text.splitlines() if line.strip().startswith("%"))
|
|
150
182
|
|
|
151
183
|
|
|
152
|
-
def extract_tabular_specs(text: str) -> list[str]:
|
|
153
|
-
return re.findall(r"\\begin\{tabular\}(?:\[[^\]]*\])?\{([^}]*)\}", text)
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
def extract_tabular_bodies(text: str) -> list[str]:
|
|
157
|
-
return [
|
|
158
|
-
match.group(1)
|
|
159
|
-
for match in re.finditer(
|
|
160
|
-
r"\\begin\{tabular\}(?:\[[^\]]*\])?\{[^}]*\}([\s\S]*?)\\end\{tabular\}",
|
|
161
|
-
text,
|
|
162
|
-
)
|
|
163
|
-
]
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
def strip_latex_for_table_cell(cell: str) -> str:
|
|
167
|
-
cell = re.sub(r"%.*", "", cell)
|
|
168
|
-
cell = re.sub(r"\\(?:textbf|mathbf|emph)\{([^}]*)\}", r"\1", cell)
|
|
169
|
-
cell = re.sub(r"\\[A-Za-z@*]+(?:\[[^\]]*\])?", " ", cell)
|
|
170
|
-
cell = cell.replace("$", " ")
|
|
171
|
-
return cell.strip()
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
def check_numeric_precision_consistency(text: str, issues: list[str], label: str):
|
|
175
|
-
for body in extract_tabular_bodies(text):
|
|
176
|
-
column_precisions: dict[int, set[int]] = {}
|
|
177
|
-
rows = re.split(r"\\\\", body)
|
|
178
|
-
for row in rows:
|
|
179
|
-
row = re.sub(r"\\(?:toprule|midrule|bottomrule|cmidrule)(?:\{[^}]*\})?", " ", row)
|
|
180
|
-
if "&" not in row:
|
|
181
|
-
continue
|
|
182
|
-
cells = [strip_latex_for_table_cell(cell) for cell in row.split("&")]
|
|
183
|
-
for index, cell in enumerate(cells):
|
|
184
|
-
matches = re.findall(r"(?<![A-Za-z])[-+]?\d+\.(\d+)", cell)
|
|
185
|
-
if not matches:
|
|
186
|
-
continue
|
|
187
|
-
column_precisions.setdefault(index, set()).update(len(match) for match in matches)
|
|
188
|
-
inconsistent_columns = [
|
|
189
|
-
str(index + 1)
|
|
190
|
-
for index, precisions in sorted(column_precisions.items())
|
|
191
|
-
if len(precisions) > 1
|
|
192
|
-
]
|
|
193
|
-
if inconsistent_columns:
|
|
194
|
-
issues.append(
|
|
195
|
-
f"{label} has inconsistent numeric precision in column(s): {', '.join(inconsistent_columns)}"
|
|
196
|
-
)
|
|
197
|
-
return
|
|
198
|
-
|
|
199
|
-
|
|
200
184
|
def detect_uppercase_abbreviations(text: str) -> set[str]:
|
|
201
185
|
return {
|
|
202
186
|
token
|
|
@@ -258,16 +242,8 @@ def check_table_file(path: Path, issues: list[str], label: str):
|
|
|
258
242
|
issues.append(f"{label} must contain a table environment")
|
|
259
243
|
if r"\caption{" not in text or r"\label{" not in text:
|
|
260
244
|
issues.append(f"{label} must contain both caption and label")
|
|
261
|
-
first_caption = text.find(r"\caption{")
|
|
262
|
-
first_tabular = text.find(r"\begin{tabular}")
|
|
263
|
-
if first_caption != -1 and first_tabular != -1 and first_caption > first_tabular:
|
|
264
|
-
issues.append(f"{label} should place its caption before the tabular body")
|
|
265
245
|
if not all(token in text for token in (r"\toprule", r"\midrule", r"\bottomrule")):
|
|
266
246
|
issues.append(f"{label} must use booktabs structure")
|
|
267
|
-
if any("|" in spec for spec in extract_tabular_specs(text)) or r"\vline" in text:
|
|
268
|
-
issues.append(f"{label} must not use vertical table rules; use booktabs spacing instead")
|
|
269
|
-
if r"\hline" in text or r"\cline" in text:
|
|
270
|
-
issues.append(f"{label} must not mix legacy \\hline/\\cline rules with booktabs tables")
|
|
271
247
|
if not all(marker in text for marker in REQUIRED_TABLE_NOTE_MARKERS):
|
|
272
248
|
issues.append(f"{label} must include a local table note scaffold")
|
|
273
249
|
caption_text = extract_caption(text)
|
|
@@ -290,7 +266,6 @@ def check_table_file(path: Path, issues: list[str], label: str):
|
|
|
290
266
|
continue
|
|
291
267
|
if value < 3.0:
|
|
292
268
|
issues.append(f"{label} sets \\tabcolsep below the safe range for paper-facing main tables")
|
|
293
|
-
check_numeric_precision_consistency(text, issues, label)
|
|
294
269
|
|
|
295
270
|
|
|
296
271
|
def check_figure_file(path: Path, issues: list[str], label: str):
|
|
@@ -309,8 +284,11 @@ def check_figure_file(path: Path, issues: list[str], label: str):
|
|
|
309
284
|
issues.append(f"{label} must contain a figure environment")
|
|
310
285
|
if r"\caption{" not in text or r"\label{" not in text:
|
|
311
286
|
issues.append(f"{label} must contain both caption and label")
|
|
312
|
-
|
|
313
|
-
|
|
287
|
+
paper_facing_text = strip_latex_comments(text)
|
|
288
|
+
if contains_any(paper_facing_text, SERVICE_STYLE_PHRASES):
|
|
289
|
+
issues.append(f"{label} contains service-style or AI-assistant meta language")
|
|
290
|
+
if contains_any(paper_facing_text, WORKFLOW_ONLY_MANUSCRIPT_PHRASES):
|
|
291
|
+
issues.append(f"{label} contains workflow-only placeholder language in paper-facing text")
|
|
314
292
|
|
|
315
293
|
|
|
316
294
|
def check_analysis_asset(path: Path, issues: list[str]):
|
|
@@ -322,8 +300,11 @@ def check_analysis_asset(path: Path, issues: list[str]):
|
|
|
322
300
|
issues.append("analysis/analysis-asset.tex must contain a table or figure environment")
|
|
323
301
|
if r"\caption{" not in text or r"\label{" not in text:
|
|
324
302
|
issues.append("analysis/analysis-asset.tex must contain both caption and label")
|
|
325
|
-
|
|
326
|
-
|
|
303
|
+
paper_facing_text = strip_latex_comments(text)
|
|
304
|
+
if contains_any(paper_facing_text, SERVICE_STYLE_PHRASES):
|
|
305
|
+
issues.append("analysis/analysis-asset.tex contains service-style or AI-assistant meta language")
|
|
306
|
+
if contains_any(paper_facing_text, WORKFLOW_ONLY_MANUSCRIPT_PHRASES):
|
|
307
|
+
issues.append("analysis/analysis-asset.tex contains workflow-only placeholder language in paper-facing text")
|
|
327
308
|
|
|
328
309
|
|
|
329
310
|
def require_section_reference(section_text: str, label: str | None, issues: list[str], section_name: str, asset_name: str):
|
|
@@ -82,6 +82,7 @@ TABLE_DETAIL_FIELDS = (
|
|
|
82
82
|
FIGURE_DETAIL_FIELDS = (
|
|
83
83
|
"Asset file",
|
|
84
84
|
"Section",
|
|
85
|
+
"Figure role",
|
|
85
86
|
"Figure intent",
|
|
86
87
|
"Evidence",
|
|
87
88
|
"Status",
|
|
@@ -95,6 +96,7 @@ ANALYSIS_DETAIL_FIELDS = (
|
|
|
95
96
|
"Asset file",
|
|
96
97
|
"Asset type",
|
|
97
98
|
"Section",
|
|
99
|
+
"Asset role",
|
|
98
100
|
"Asset intent",
|
|
99
101
|
"Evidence",
|
|
100
102
|
"Status",
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import argparse
|
|
5
|
+
import re
|
|
6
|
+
import sys
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from paper_topology import find_project_root
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
SECTION_CHOICES = (
|
|
13
|
+
"abstract",
|
|
14
|
+
"introduction",
|
|
15
|
+
"related-work",
|
|
16
|
+
"method",
|
|
17
|
+
"experiments",
|
|
18
|
+
"conclusion",
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
REQUIRED_HEADINGS = (
|
|
22
|
+
"## Sources",
|
|
23
|
+
"## Adopted Structure Slots",
|
|
24
|
+
"## Rejected or Waived Slots",
|
|
25
|
+
"## Section Mapping",
|
|
26
|
+
"## Asset Mapping",
|
|
27
|
+
"## Reuse Boundary",
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
EXPERIMENT_CORE_SLOTS = {
|
|
31
|
+
"dataset_description": ("dataset_description", "dataset statistics", "dataset", "datasets", "数据集"),
|
|
32
|
+
"split_protocol": ("split_protocol", "split protocol", "split", "train/test", "划分", "切分"),
|
|
33
|
+
"baseline_setup": ("baseline_setup", "baseline", "baselines", "comparator", "comparators", "基线", "对比方法"),
|
|
34
|
+
"metric_definition": ("metric_definition", "metric", "metrics", "ranking", "指标", "排序"),
|
|
35
|
+
"main_results": ("main_results", "main result", "main results", "primary comparison", "主结果", "主要结果"),
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
EXPERIMENT_WAIVABLE_SLOTS = {
|
|
39
|
+
"ablation": ("ablation", "component analysis", "消融", "组件分析"),
|
|
40
|
+
"sensitivity": ("sensitivity", "robustness", "trade-off", "敏感性", "稳健性"),
|
|
41
|
+
"implementation_details": ("implementation_details", "implementation", "reproducibility", "实现", "复现"),
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
SERVICE_STYLE_PHRASES = (
|
|
45
|
+
"user asked",
|
|
46
|
+
"the user asked",
|
|
47
|
+
"as requested by the user",
|
|
48
|
+
"i will explain",
|
|
49
|
+
"let me explain",
|
|
50
|
+
"below i",
|
|
51
|
+
"用户说",
|
|
52
|
+
"用户要求",
|
|
53
|
+
"按你的要求",
|
|
54
|
+
"我来解释",
|
|
55
|
+
"我会说明",
|
|
56
|
+
"下面我",
|
|
57
|
+
"这版",
|
|
58
|
+
"已完成",
|
|
59
|
+
"已按",
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
WORKFLOW_ONLY_PHRASES = (
|
|
63
|
+
"figure intent",
|
|
64
|
+
"asset intent",
|
|
65
|
+
"placeholder",
|
|
66
|
+
"workflow-language",
|
|
67
|
+
"review layer",
|
|
68
|
+
"translation layer",
|
|
69
|
+
"图的意图",
|
|
70
|
+
"资产意图",
|
|
71
|
+
"占位符",
|
|
72
|
+
"工作流语言",
|
|
73
|
+
"同步到",
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def parse_args() -> argparse.Namespace:
|
|
78
|
+
parser = argparse.ArgumentParser(
|
|
79
|
+
description="Validate that reference-paper structure was consumed as structure, not as shallow prose polish."
|
|
80
|
+
)
|
|
81
|
+
parser.add_argument("--section", required=True, choices=SECTION_CHOICES)
|
|
82
|
+
parser.add_argument("--section-file", required=True)
|
|
83
|
+
parser.add_argument("--consumption-plan", help="Path to .lab/writing/reference-patterns/consumption-plan/<section>.md")
|
|
84
|
+
parser.add_argument("--mode", required=True, choices=("draft", "final"))
|
|
85
|
+
return parser.parse_args()
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def read_text(path: Path) -> str:
|
|
89
|
+
return path.read_text(encoding="utf-8")
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def contains_any(text: str, phrases: tuple[str, ...]) -> bool:
|
|
93
|
+
lowered = text.lower()
|
|
94
|
+
return any(phrase.lower() in lowered for phrase in phrases)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def strip_latex_comments(text: str) -> str:
|
|
98
|
+
return re.sub(r"%.*", "", text)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def infer_consumption_plan(section_file: Path, section: str) -> Path | None:
|
|
102
|
+
project_root = find_project_root(section_file.resolve())
|
|
103
|
+
if project_root is None:
|
|
104
|
+
return None
|
|
105
|
+
return project_root / ".lab" / "writing" / "reference-patterns" / "consumption-plan" / f"{section}.md"
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def section_block(text: str, heading: str) -> str:
|
|
109
|
+
match = re.search(
|
|
110
|
+
rf"^##\s+{re.escape(heading.removeprefix('## ').strip())}\s*$([\s\S]*?)(?=^##\s+|\Z)",
|
|
111
|
+
text,
|
|
112
|
+
flags=re.MULTILINE,
|
|
113
|
+
)
|
|
114
|
+
return match.group(1) if match else ""
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def nonempty_bullets(block: str) -> list[str]:
|
|
118
|
+
return [
|
|
119
|
+
line.strip()
|
|
120
|
+
for line in block.splitlines()
|
|
121
|
+
if line.strip().startswith("-") and line.strip().lower() not in {"- none", "- n/a", "- na"}
|
|
122
|
+
]
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def line_mentions_any(line: str, aliases: tuple[str, ...]) -> bool:
|
|
126
|
+
lowered = line.lower()
|
|
127
|
+
return any(alias.lower() in lowered for alias in aliases)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def slot_is_mapped(plan_text: str, aliases: tuple[str, ...]) -> bool:
|
|
131
|
+
mapping_blocks = "\n".join(
|
|
132
|
+
[
|
|
133
|
+
section_block(plan_text, "Adopted Structure Slots"),
|
|
134
|
+
section_block(plan_text, "Section Mapping"),
|
|
135
|
+
section_block(plan_text, "Asset Mapping"),
|
|
136
|
+
]
|
|
137
|
+
)
|
|
138
|
+
for line in mapping_blocks.splitlines():
|
|
139
|
+
if line_mentions_any(line, aliases) and ("->" in line or ":" in line):
|
|
140
|
+
return True
|
|
141
|
+
return False
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def slot_is_waived(plan_text: str, aliases: tuple[str, ...]) -> bool:
|
|
145
|
+
waiver_block = section_block(plan_text, "Rejected or Waived Slots")
|
|
146
|
+
for line in waiver_block.splitlines():
|
|
147
|
+
lowered = line.lower()
|
|
148
|
+
if line_mentions_any(line, aliases) and any(token in lowered for token in ("waiv", "not used", "not applicable", "不采用", "不适用", "跳过")):
|
|
149
|
+
return True
|
|
150
|
+
return False
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def validate_plan_shape(plan_text: str, issues: list[str]) -> None:
|
|
154
|
+
for heading in REQUIRED_HEADINGS:
|
|
155
|
+
if heading not in plan_text:
|
|
156
|
+
issues.append(f"reference consumption plan is missing required heading: {heading}")
|
|
157
|
+
|
|
158
|
+
source_block = section_block(plan_text, "Sources")
|
|
159
|
+
if len(nonempty_bullets(source_block)) == 0:
|
|
160
|
+
issues.append("reference consumption plan must list at least one reference source")
|
|
161
|
+
|
|
162
|
+
boundary = section_block(plan_text, "Reuse Boundary").lower()
|
|
163
|
+
required_boundary_terms = ("structure", "wording", "claims", "metrics", "captions", "conclusions")
|
|
164
|
+
if not all(term in boundary for term in required_boundary_terms):
|
|
165
|
+
issues.append(
|
|
166
|
+
"reuse boundary must explicitly say to reuse structure only and not copy wording, claims, metrics, captions, or conclusions"
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def validate_experiment_slots(plan_text: str, issues: list[str]) -> None:
|
|
171
|
+
for slot, aliases in EXPERIMENT_CORE_SLOTS.items():
|
|
172
|
+
if not slot_is_mapped(plan_text, aliases):
|
|
173
|
+
issues.append(f"experiments reference consumption must map required slot: {slot}")
|
|
174
|
+
|
|
175
|
+
for slot, aliases in EXPERIMENT_WAIVABLE_SLOTS.items():
|
|
176
|
+
if not slot_is_mapped(plan_text, aliases) and not slot_is_waived(plan_text, aliases):
|
|
177
|
+
issues.append(f"experiments reference consumption must map or explicitly waive slot: {slot}")
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def validate_section_text(section_text: str, issues: list[str]) -> None:
|
|
181
|
+
prose_text = strip_latex_comments(section_text)
|
|
182
|
+
if contains_any(prose_text, SERVICE_STYLE_PHRASES):
|
|
183
|
+
issues.append("section prose contains service-style or AI-assistant meta language; rewrite as paper-facing academic prose")
|
|
184
|
+
if contains_any(prose_text, WORKFLOW_ONLY_PHRASES):
|
|
185
|
+
issues.append("section prose contains workflow-only placeholder language; move authoring notes out of the manuscript")
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def main() -> int:
|
|
189
|
+
args = parse_args()
|
|
190
|
+
section_path = Path(args.section_file)
|
|
191
|
+
if not section_path.exists():
|
|
192
|
+
print(f"section file does not exist: {section_path}", file=sys.stderr)
|
|
193
|
+
return 1
|
|
194
|
+
|
|
195
|
+
plan_path = Path(args.consumption_plan) if args.consumption_plan else infer_consumption_plan(section_path, args.section)
|
|
196
|
+
if plan_path is None or not plan_path.exists():
|
|
197
|
+
message = (
|
|
198
|
+
"missing reference consumption plan; create "
|
|
199
|
+
f".lab/writing/reference-patterns/consumption-plan/{args.section}.md before reference-guided deep writing"
|
|
200
|
+
)
|
|
201
|
+
if args.mode == "draft":
|
|
202
|
+
print(f"WARNING: {message}")
|
|
203
|
+
return 0
|
|
204
|
+
print(message, file=sys.stderr)
|
|
205
|
+
return 1
|
|
206
|
+
|
|
207
|
+
issues: list[str] = []
|
|
208
|
+
plan_text = read_text(plan_path)
|
|
209
|
+
section_text = read_text(section_path)
|
|
210
|
+
validate_plan_shape(plan_text, issues)
|
|
211
|
+
if args.section == "experiments":
|
|
212
|
+
validate_experiment_slots(plan_text, issues)
|
|
213
|
+
validate_section_text(section_text, issues)
|
|
214
|
+
|
|
215
|
+
if not issues:
|
|
216
|
+
print("reference consumption is valid")
|
|
217
|
+
return 0
|
|
218
|
+
|
|
219
|
+
if args.mode == "draft":
|
|
220
|
+
for issue in issues:
|
|
221
|
+
print(f"WARNING: {issue}")
|
|
222
|
+
return 0
|
|
223
|
+
|
|
224
|
+
for issue in issues:
|
|
225
|
+
print(issue, file=sys.stderr)
|
|
226
|
+
return 1
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
if __name__ == "__main__":
|
|
230
|
+
raise SystemExit(main())
|
|
@@ -198,9 +198,49 @@ SECTION_STYLE_WARNINGS = {
|
|
|
198
198
|
],
|
|
199
199
|
}
|
|
200
200
|
|
|
201
|
+
SERVICE_STYLE_PHRASES = (
|
|
202
|
+
"user asked",
|
|
203
|
+
"the user asked",
|
|
204
|
+
"as requested by the user",
|
|
205
|
+
"let me explain",
|
|
206
|
+
"i will explain",
|
|
207
|
+
"below i",
|
|
208
|
+
"用户说",
|
|
209
|
+
"用户要求",
|
|
210
|
+
"按你的要求",
|
|
211
|
+
"我来解释",
|
|
212
|
+
"我会说明",
|
|
213
|
+
"下面我",
|
|
214
|
+
"这版",
|
|
215
|
+
"已完成",
|
|
216
|
+
"已按",
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
WORKFLOW_ONLY_MANUSCRIPT_PHRASES = (
|
|
220
|
+
"figure intent",
|
|
221
|
+
"asset intent",
|
|
222
|
+
"placeholder",
|
|
223
|
+
"workflow-language",
|
|
224
|
+
"translation layer",
|
|
225
|
+
"review layer",
|
|
226
|
+
"图的意图",
|
|
227
|
+
"资产意图",
|
|
228
|
+
"占位符",
|
|
229
|
+
"工作流语言",
|
|
230
|
+
"同步到",
|
|
231
|
+
)
|
|
232
|
+
|
|
201
233
|
|
|
202
234
|
def check_common_section_gate_risks(text: str, issues: list[str]):
|
|
203
235
|
prose_text = strip_latex_commands(text)
|
|
236
|
+
if contains_any(prose_text, SERVICE_STYLE_PHRASES):
|
|
237
|
+
issues.append(
|
|
238
|
+
"service-style or AI-assistant meta language appears in reader-facing prose; rewrite it as academic manuscript text"
|
|
239
|
+
)
|
|
240
|
+
if contains_any(prose_text, WORKFLOW_ONLY_MANUSCRIPT_PHRASES):
|
|
241
|
+
issues.append(
|
|
242
|
+
"workflow-only placeholder language appears in reader-facing prose; move authoring notes out of the manuscript"
|
|
243
|
+
)
|
|
204
244
|
if re.search(r"\b[a-z0-9]+(?:_[a-z0-9]+)+\b", prose_text):
|
|
205
245
|
issues.append(
|
|
206
246
|
"reader-facing prose appears to contain internal identifier-like tokens; map them once for the reader and move them back out of prose before more polishing"
|
|
@@ -302,6 +342,16 @@ def check_neighbor_asset_files(section: str, section_path: Path, issues: list[st
|
|
|
302
342
|
issues.append(
|
|
303
343
|
f"{section} section is missing the required paper-layer asset file: {asset_path.as_posix()}"
|
|
304
344
|
)
|
|
345
|
+
continue
|
|
346
|
+
asset_text = strip_latex_commands(read_text(asset_path))
|
|
347
|
+
if contains_any(asset_text, SERVICE_STYLE_PHRASES):
|
|
348
|
+
issues.append(
|
|
349
|
+
f"{asset_path.as_posix()} contains service-style or AI-assistant meta language; rewrite it as paper-facing asset text"
|
|
350
|
+
)
|
|
351
|
+
if contains_any(asset_text, WORKFLOW_ONLY_MANUSCRIPT_PHRASES):
|
|
352
|
+
issues.append(
|
|
353
|
+
f"{asset_path.as_posix()} contains workflow-only placeholder language; move authoring notes out of captions and paper-facing asset text"
|
|
354
|
+
)
|
|
305
355
|
|
|
306
356
|
|
|
307
357
|
def check_paper_topology_targeting(section_path: Path, issues: list[str]):
|
|
@@ -504,100 +554,6 @@ def check_method(text: str, issues: list[str]):
|
|
|
504
554
|
issues.append("method should explain the technical advantage")
|
|
505
555
|
|
|
506
556
|
|
|
507
|
-
def has_performance_claim(text: str) -> bool:
|
|
508
|
-
return contains_any(
|
|
509
|
-
text,
|
|
510
|
-
(
|
|
511
|
-
"outperform",
|
|
512
|
-
"outperforms",
|
|
513
|
-
"improve",
|
|
514
|
-
"improves",
|
|
515
|
-
"improved",
|
|
516
|
-
"gain",
|
|
517
|
-
"gains",
|
|
518
|
-
"better",
|
|
519
|
-
"stronger",
|
|
520
|
-
"superior",
|
|
521
|
-
"state-of-the-art",
|
|
522
|
-
"sota",
|
|
523
|
-
"reduce",
|
|
524
|
-
"reduces",
|
|
525
|
-
"降低",
|
|
526
|
-
"提升",
|
|
527
|
-
"优于",
|
|
528
|
-
"超过",
|
|
529
|
-
"更好",
|
|
530
|
-
"增益",
|
|
531
|
-
),
|
|
532
|
-
)
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
def has_numeric_or_table_evidence(text: str) -> bool:
|
|
536
|
-
if re.search(r"\b\d+\.\d+\b", text):
|
|
537
|
-
return True
|
|
538
|
-
if re.search(r"\b\d+(?:\.\d+)?\s*(?:%|pp|points?|AUUC|Qini|AUC|F1)\b", text, flags=re.IGNORECASE):
|
|
539
|
-
return True
|
|
540
|
-
if r"\pm" in text:
|
|
541
|
-
return True
|
|
542
|
-
return bool(
|
|
543
|
-
re.search(r"\\(?:auto|c|C)?ref\{(?:tab|fig):", text)
|
|
544
|
-
or re.search(r"\b(?:Table|Figure|Fig\.|表|图)~?\\ref\{", text)
|
|
545
|
-
)
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
def has_generic_comparator_without_anchor(text: str) -> bool:
|
|
549
|
-
generic_comparator = contains_any(
|
|
550
|
-
text,
|
|
551
|
-
(
|
|
552
|
-
"previous methods",
|
|
553
|
-
"prior methods",
|
|
554
|
-
"existing methods",
|
|
555
|
-
"several baselines",
|
|
556
|
-
"the baselines",
|
|
557
|
-
"baseline suite",
|
|
558
|
-
"previous work",
|
|
559
|
-
"prior work",
|
|
560
|
-
"现有方法",
|
|
561
|
-
"已有方法",
|
|
562
|
-
"若干基线",
|
|
563
|
-
"基线集合",
|
|
564
|
-
),
|
|
565
|
-
)
|
|
566
|
-
if not generic_comparator:
|
|
567
|
-
return False
|
|
568
|
-
if r"\cite{" in text or r"\citet{" in text or r"\citep{" in text:
|
|
569
|
-
return False
|
|
570
|
-
return not bool(re.search(r"\b[A-Z][A-Za-z0-9-]{2,}(?:\s*,\s*[A-Z][A-Za-z0-9-]{2,})+", text))
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
def has_repeated_split_protocol(text: str) -> bool:
|
|
574
|
-
return bool(
|
|
575
|
-
re.search(r"\b\d+\s+(?:random\s+)?(?:splits|seeds|runs)\b", text, flags=re.IGNORECASE)
|
|
576
|
-
or re.search(r"\bacross\s+(?:random\s+)?(?:splits|seeds|runs)\b", text, flags=re.IGNORECASE)
|
|
577
|
-
or re.search(r"\b重复\s*\d+\s*次", text)
|
|
578
|
-
)
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
def has_variance_report(text: str) -> bool:
|
|
582
|
-
return contains_any(
|
|
583
|
-
text,
|
|
584
|
-
(
|
|
585
|
-
r"\pm",
|
|
586
|
-
"standard deviation",
|
|
587
|
-
"std",
|
|
588
|
-
"confidence interval",
|
|
589
|
-
"confidence intervals",
|
|
590
|
-
"ci",
|
|
591
|
-
"variance",
|
|
592
|
-
"mean",
|
|
593
|
-
"平均",
|
|
594
|
-
"标准差",
|
|
595
|
-
"置信区间",
|
|
596
|
-
"方差",
|
|
597
|
-
),
|
|
598
|
-
)
|
|
599
|
-
|
|
600
|
-
|
|
601
557
|
def check_experiments(text: str, issues: list[str]):
|
|
602
558
|
if not contains_any(
|
|
603
559
|
text,
|
|
@@ -625,18 +581,6 @@ def check_experiments(text: str, issues: list[str]):
|
|
|
625
581
|
),
|
|
626
582
|
):
|
|
627
583
|
issues.append("experiments should include benchmark scene notes")
|
|
628
|
-
if has_performance_claim(text) and not has_numeric_or_table_evidence(text):
|
|
629
|
-
issues.append(
|
|
630
|
-
"experiment performance claims should tie to concrete metric or numeric evidence instead of prose-only claims"
|
|
631
|
-
)
|
|
632
|
-
if has_generic_comparator_without_anchor(text):
|
|
633
|
-
issues.append(
|
|
634
|
-
"experiments use generic comparator names; name the comparator family, table anchor, or citations before more polish"
|
|
635
|
-
)
|
|
636
|
-
if has_repeated_split_protocol(text) and not has_variance_report(text):
|
|
637
|
-
issues.append(
|
|
638
|
-
"repeated split or seed protocol should report variance, confidence intervals, or an explicit variance disposition"
|
|
639
|
-
)
|
|
640
584
|
|
|
641
585
|
|
|
642
586
|
def check_conclusion(text: str, issues: list[str]):
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
\begin{figure}[t]
|
|
2
2
|
\centering
|
|
3
3
|
\fbox{\rule{0pt}{1.2in}\rule{0.9\linewidth}{0pt}}
|
|
4
|
-
|
|
4
|
+
% Authoring note: record what this figure should show and why the reader needs it in the paper plan or write-iteration artifact, not in the caption.
|
|
5
|
+
\caption{Figure title. Replace with a paper-facing caption that states the visual content and the supported claim.}
|
|
5
6
|
\label{fig:placeholder}
|
|
6
7
|
\end{figure}
|
|
@@ -51,19 +51,19 @@
|
|
|
51
51
|
- Problem setting or teaser figure:
|
|
52
52
|
- Asset file:
|
|
53
53
|
- Section:
|
|
54
|
-
- Figure
|
|
54
|
+
- Figure role:
|
|
55
55
|
- Evidence:
|
|
56
56
|
- Status:
|
|
57
57
|
- Method overview figure:
|
|
58
58
|
- Asset file:
|
|
59
59
|
- Section:
|
|
60
|
-
- Figure
|
|
60
|
+
- Figure role:
|
|
61
61
|
- Evidence:
|
|
62
62
|
- Status:
|
|
63
63
|
- Results overview figure:
|
|
64
64
|
- Asset file:
|
|
65
65
|
- Section:
|
|
66
|
-
- Figure
|
|
66
|
+
- Figure role:
|
|
67
67
|
- Evidence:
|
|
68
68
|
- Status:
|
|
69
69
|
|
|
@@ -73,7 +73,7 @@
|
|
|
73
73
|
- Asset file:
|
|
74
74
|
- Asset type:
|
|
75
75
|
- Section:
|
|
76
|
-
- Asset
|
|
76
|
+
- Asset role:
|
|
77
77
|
- Evidence:
|
|
78
78
|
- Status:
|
|
79
79
|
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# Reference Consumption Plan
|
|
2
|
+
|
|
3
|
+
## Sources
|
|
4
|
+
|
|
5
|
+
- Source:
|
|
6
|
+
|
|
7
|
+
## Adopted Structure Slots
|
|
8
|
+
|
|
9
|
+
- slot_name -> target subsection / paragraph / asset
|
|
10
|
+
|
|
11
|
+
## Rejected or Waived Slots
|
|
12
|
+
|
|
13
|
+
- slot_name -> waiver reason
|
|
14
|
+
|
|
15
|
+
## Section Mapping
|
|
16
|
+
|
|
17
|
+
- slot_name -> current-paper section or subsection
|
|
18
|
+
|
|
19
|
+
## Paragraph Role Mapping
|
|
20
|
+
|
|
21
|
+
- paragraph_role -> current-paper paragraph
|
|
22
|
+
|
|
23
|
+
## Asset Mapping
|
|
24
|
+
|
|
25
|
+
- asset_role -> current-paper figure/table/analysis asset
|
|
26
|
+
|
|
27
|
+
## Reuse Boundary
|
|
28
|
+
|
|
29
|
+
- Reuse structure, section order, paragraph roles, asset function, placement logic, and bridge logic only.
|
|
30
|
+
- Do not copy wording, claims, metrics, captions, or conclusions from reference papers.
|
|
31
|
+
|
|
32
|
+
## Validation
|
|
33
|
+
|
|
34
|
+
- `validate_reference_consumption.py` result:
|