superlab 0.1.66 → 0.1.68
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/i18n.cjs +0 -6
- package/lib/lab_write_contract.json +4 -4
- package/package-assets/claude/commands/lab/write.md +2 -1
- package/package-assets/claude/commands/lab-write.md +2 -1
- package/package-assets/claude/commands/lab:write.md +2 -1
- package/package-assets/claude/commands/lab/357/274/232write.md +2 -1
- package/package-assets/codex/prompts/lab/write.md +2 -1
- package/package-assets/codex/prompts/lab-write.md +2 -1
- package/package-assets/codex/prompts/lab:write.md +2 -1
- package/package-assets/codex/prompts/lab/357/274/232write.md +2 -1
- package/package-assets/shared/lab/.managed/scripts/validate_manuscript_delivery.py +119 -83
- package/package-assets/shared/lab/.managed/scripts/validate_paper_plan.py +2 -0
- package/package-assets/shared/lab/.managed/scripts/validate_reference_consumption.py +348 -0
- package/package-assets/shared/lab/.managed/scripts/validate_section_draft.py +70 -141
- package/package-assets/shared/lab/.managed/templates/paper-figure.tex +2 -1
- package/package-assets/shared/lab/.managed/templates/paper-plan.md +4 -4
- package/package-assets/shared/lab/.managed/templates/reference-consumption-plan.md +41 -0
- package/package-assets/shared/lab/.managed/templates/write-iteration.md +17 -27
- package/package-assets/shared/skills/lab/SKILL.md +6 -1
- package/package-assets/shared/skills/lab/references/paper-writing/examples/experiments/figure-placeholder-and-discussion.md +10 -6
- package/package-assets/shared/skills/lab/references/paper-writing/examples/experiments-examples.md +1 -1
- package/package-assets/shared/skills/lab/references/paper-writing/section-style-policies.md +12 -0
- package/package-assets/shared/skills/lab/stages/write.md +32 -20
- package/package.json +1 -1
- package/package-assets/shared/lab/.managed/scripts/extract_reference_paper_structure.py +0 -1200
- package/package-assets/shared/lab/.managed/templates/reference-template-intake.md +0 -50
|
@@ -39,6 +39,41 @@ REQUIRED_TABLE_NOTE_MARKERS = (
|
|
|
39
39
|
)
|
|
40
40
|
WIDTH_CONTROL_NOTE_MARKER = "% Width control:"
|
|
41
41
|
TABLE_ABBREVIATION_EXCEPTIONS = {"TODO", "TBD"}
|
|
42
|
+
PLACEHOLDER_TABLE_NOTE_PREFIXES = (
|
|
43
|
+
"explain ",
|
|
44
|
+
"state ",
|
|
45
|
+
"expand ",
|
|
46
|
+
"describe ",
|
|
47
|
+
"fill ",
|
|
48
|
+
"todo",
|
|
49
|
+
"tbd",
|
|
50
|
+
"待补",
|
|
51
|
+
"待定",
|
|
52
|
+
"说明每",
|
|
53
|
+
)
|
|
54
|
+
SERVICE_STYLE_PHRASES = (
|
|
55
|
+
"user asked",
|
|
56
|
+
"the user asked",
|
|
57
|
+
"as requested by the user",
|
|
58
|
+
"let me explain",
|
|
59
|
+
"i will explain",
|
|
60
|
+
"用户说",
|
|
61
|
+
"用户要求",
|
|
62
|
+
"按你的要求",
|
|
63
|
+
"我来解释",
|
|
64
|
+
"我会说明",
|
|
65
|
+
)
|
|
66
|
+
WORKFLOW_ONLY_MANUSCRIPT_PHRASES = (
|
|
67
|
+
"Figure intent:",
|
|
68
|
+
"Asset intent:",
|
|
69
|
+
"figure intent:",
|
|
70
|
+
"asset intent:",
|
|
71
|
+
"图的意图",
|
|
72
|
+
"资产意图",
|
|
73
|
+
"占位符",
|
|
74
|
+
"workflow-language",
|
|
75
|
+
"translation layer",
|
|
76
|
+
)
|
|
42
77
|
|
|
43
78
|
|
|
44
79
|
def parse_args():
|
|
@@ -53,6 +88,15 @@ def read_text(path: Path) -> str:
|
|
|
53
88
|
return path.read_text(encoding="utf-8")
|
|
54
89
|
|
|
55
90
|
|
|
91
|
+
def strip_latex_comments(text: str) -> str:
|
|
92
|
+
return re.sub(r"%.*", "", text)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def contains_any(text: str, needles: tuple[str, ...]) -> bool:
|
|
96
|
+
lowered = text.lower()
|
|
97
|
+
return any(needle.lower() in lowered for needle in needles)
|
|
98
|
+
|
|
99
|
+
|
|
56
100
|
def find_workflow_config(start_path: Path) -> Path | None:
|
|
57
101
|
search_roots = [start_path, *start_path.parents]
|
|
58
102
|
for root in search_roots:
|
|
@@ -67,6 +111,23 @@ def load_workflow_config(path: Path) -> dict:
|
|
|
67
111
|
return json.loads(path.read_text(encoding="utf-8"))
|
|
68
112
|
|
|
69
113
|
|
|
114
|
+
def project_root_from_workflow_config(path: Path) -> Path:
|
|
115
|
+
if path.parent.name == "config" and path.parent.parent.name == ".lab":
|
|
116
|
+
return path.parent.parent.parent
|
|
117
|
+
return path.parent.parent
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def is_workflow_language_layer_target(paper_dir: Path, workflow_config_path: Path, config: dict) -> bool:
|
|
121
|
+
workflow_language = config.get("workflow_language")
|
|
122
|
+
paper_language = config.get("paper_language", workflow_language)
|
|
123
|
+
if not workflow_language or workflow_language == paper_language:
|
|
124
|
+
return False
|
|
125
|
+
project_root = project_root_from_workflow_config(workflow_config_path)
|
|
126
|
+
deliverables_root = (project_root / config.get("deliverables_root", "docs/research")).resolve()
|
|
127
|
+
workflow_layer_dir = (deliverables_root / "paper" / "workflow-language").resolve()
|
|
128
|
+
return paper_dir.resolve() == workflow_layer_dir
|
|
129
|
+
|
|
130
|
+
|
|
70
131
|
def extract_section_body(text: str, patterns: tuple[str, ...]) -> str:
|
|
71
132
|
for pattern in patterns:
|
|
72
133
|
match = re.search(pattern, text, flags=re.MULTILINE)
|
|
@@ -149,54 +210,6 @@ def extract_table_note(text: str) -> str:
|
|
|
149
210
|
return "\n".join(line.strip() for line in text.splitlines() if line.strip().startswith("%"))
|
|
150
211
|
|
|
151
212
|
|
|
152
|
-
def extract_tabular_specs(text: str) -> list[str]:
|
|
153
|
-
return re.findall(r"\\begin\{tabular\}(?:\[[^\]]*\])?\{([^}]*)\}", text)
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
def extract_tabular_bodies(text: str) -> list[str]:
|
|
157
|
-
return [
|
|
158
|
-
match.group(1)
|
|
159
|
-
for match in re.finditer(
|
|
160
|
-
r"\\begin\{tabular\}(?:\[[^\]]*\])?\{[^}]*\}([\s\S]*?)\\end\{tabular\}",
|
|
161
|
-
text,
|
|
162
|
-
)
|
|
163
|
-
]
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
def strip_latex_for_table_cell(cell: str) -> str:
|
|
167
|
-
cell = re.sub(r"%.*", "", cell)
|
|
168
|
-
cell = re.sub(r"\\(?:textbf|mathbf|emph)\{([^}]*)\}", r"\1", cell)
|
|
169
|
-
cell = re.sub(r"\\[A-Za-z@*]+(?:\[[^\]]*\])?", " ", cell)
|
|
170
|
-
cell = cell.replace("$", " ")
|
|
171
|
-
return cell.strip()
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
def check_numeric_precision_consistency(text: str, issues: list[str], label: str):
|
|
175
|
-
for body in extract_tabular_bodies(text):
|
|
176
|
-
column_precisions: dict[int, set[int]] = {}
|
|
177
|
-
rows = re.split(r"\\\\", body)
|
|
178
|
-
for row in rows:
|
|
179
|
-
row = re.sub(r"\\(?:toprule|midrule|bottomrule|cmidrule)(?:\{[^}]*\})?", " ", row)
|
|
180
|
-
if "&" not in row:
|
|
181
|
-
continue
|
|
182
|
-
cells = [strip_latex_for_table_cell(cell) for cell in row.split("&")]
|
|
183
|
-
for index, cell in enumerate(cells):
|
|
184
|
-
matches = re.findall(r"(?<![A-Za-z])[-+]?\d+\.(\d+)", cell)
|
|
185
|
-
if not matches:
|
|
186
|
-
continue
|
|
187
|
-
column_precisions.setdefault(index, set()).update(len(match) for match in matches)
|
|
188
|
-
inconsistent_columns = [
|
|
189
|
-
str(index + 1)
|
|
190
|
-
for index, precisions in sorted(column_precisions.items())
|
|
191
|
-
if len(precisions) > 1
|
|
192
|
-
]
|
|
193
|
-
if inconsistent_columns:
|
|
194
|
-
issues.append(
|
|
195
|
-
f"{label} has inconsistent numeric precision in column(s): {', '.join(inconsistent_columns)}"
|
|
196
|
-
)
|
|
197
|
-
return
|
|
198
|
-
|
|
199
|
-
|
|
200
213
|
def detect_uppercase_abbreviations(text: str) -> set[str]:
|
|
201
214
|
return {
|
|
202
215
|
token
|
|
@@ -217,6 +230,13 @@ def section_references_label(text: str, label: str) -> bool:
|
|
|
217
230
|
return bool(re.search(REF_PATTERN_TEMPLATE % re.escape(label), text))
|
|
218
231
|
|
|
219
232
|
|
|
233
|
+
def section_includes_asset(text: str, asset_path: str) -> bool:
|
|
234
|
+
escaped = re.escape(asset_path)
|
|
235
|
+
return bool(
|
|
236
|
+
re.search(rf"\\(?:input|include|wlpath)\{{{escaped}(?:\.tex)?\}}", text)
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
|
|
220
240
|
def check_exists(path: Path, issues: list[str], label: str):
|
|
221
241
|
if not path.exists():
|
|
222
242
|
issues.append(f"missing required file: {label} ({path})")
|
|
@@ -258,18 +278,23 @@ def check_table_file(path: Path, issues: list[str], label: str):
|
|
|
258
278
|
issues.append(f"{label} must contain a table environment")
|
|
259
279
|
if r"\caption{" not in text or r"\label{" not in text:
|
|
260
280
|
issues.append(f"{label} must contain both caption and label")
|
|
261
|
-
first_caption = text.find(r"\caption{")
|
|
262
|
-
first_tabular = text.find(r"\begin{tabular}")
|
|
263
|
-
if first_caption != -1 and first_tabular != -1 and first_caption > first_tabular:
|
|
264
|
-
issues.append(f"{label} should place its caption before the tabular body")
|
|
265
281
|
if not all(token in text for token in (r"\toprule", r"\midrule", r"\bottomrule")):
|
|
266
282
|
issues.append(f"{label} must use booktabs structure")
|
|
267
|
-
if any("|" in spec for spec in extract_tabular_specs(text)) or r"\vline" in text:
|
|
268
|
-
issues.append(f"{label} must not use vertical table rules; use booktabs spacing instead")
|
|
269
|
-
if r"\hline" in text or r"\cline" in text:
|
|
270
|
-
issues.append(f"{label} must not mix legacy \\hline/\\cline rules with booktabs tables")
|
|
271
283
|
if not all(marker in text for marker in REQUIRED_TABLE_NOTE_MARKERS):
|
|
272
284
|
issues.append(f"{label} must include a local table note scaffold")
|
|
285
|
+
for marker in REQUIRED_TABLE_NOTE_MARKERS:
|
|
286
|
+
for line in text.splitlines():
|
|
287
|
+
stripped = line.strip()
|
|
288
|
+
if not stripped.startswith(marker):
|
|
289
|
+
continue
|
|
290
|
+
value = stripped.removeprefix(marker).strip().lower()
|
|
291
|
+
if (
|
|
292
|
+
not value
|
|
293
|
+
or "placeholder" in value
|
|
294
|
+
or "占位" in value
|
|
295
|
+
or any(value.startswith(prefix) for prefix in PLACEHOLDER_TABLE_NOTE_PREFIXES)
|
|
296
|
+
):
|
|
297
|
+
issues.append(f"{label} contains placeholder table note text for `{marker}`")
|
|
273
298
|
caption_text = extract_caption(text)
|
|
274
299
|
note_text = extract_table_note(text)
|
|
275
300
|
local_context = "\n".join((caption_text, note_text))
|
|
@@ -290,7 +315,6 @@ def check_table_file(path: Path, issues: list[str], label: str):
|
|
|
290
315
|
continue
|
|
291
316
|
if value < 3.0:
|
|
292
317
|
issues.append(f"{label} sets \\tabcolsep below the safe range for paper-facing main tables")
|
|
293
|
-
check_numeric_precision_consistency(text, issues, label)
|
|
294
318
|
|
|
295
319
|
|
|
296
320
|
def check_figure_file(path: Path, issues: list[str], label: str):
|
|
@@ -309,8 +333,11 @@ def check_figure_file(path: Path, issues: list[str], label: str):
|
|
|
309
333
|
issues.append(f"{label} must contain a figure environment")
|
|
310
334
|
if r"\caption{" not in text or r"\label{" not in text:
|
|
311
335
|
issues.append(f"{label} must contain both caption and label")
|
|
312
|
-
|
|
313
|
-
|
|
336
|
+
paper_facing_text = strip_latex_comments(text)
|
|
337
|
+
if contains_any(paper_facing_text, SERVICE_STYLE_PHRASES):
|
|
338
|
+
issues.append(f"{label} contains service-style or AI-assistant meta language")
|
|
339
|
+
if contains_any(paper_facing_text, WORKFLOW_ONLY_MANUSCRIPT_PHRASES):
|
|
340
|
+
issues.append(f"{label} contains workflow-only placeholder language in paper-facing text")
|
|
314
341
|
|
|
315
342
|
|
|
316
343
|
def check_analysis_asset(path: Path, issues: list[str]):
|
|
@@ -322,8 +349,11 @@ def check_analysis_asset(path: Path, issues: list[str]):
|
|
|
322
349
|
issues.append("analysis/analysis-asset.tex must contain a table or figure environment")
|
|
323
350
|
if r"\caption{" not in text or r"\label{" not in text:
|
|
324
351
|
issues.append("analysis/analysis-asset.tex must contain both caption and label")
|
|
325
|
-
|
|
326
|
-
|
|
352
|
+
paper_facing_text = strip_latex_comments(text)
|
|
353
|
+
if contains_any(paper_facing_text, SERVICE_STYLE_PHRASES):
|
|
354
|
+
issues.append("analysis/analysis-asset.tex contains service-style or AI-assistant meta language")
|
|
355
|
+
if contains_any(paper_facing_text, WORKFLOW_ONLY_MANUSCRIPT_PHRASES):
|
|
356
|
+
issues.append("analysis/analysis-asset.tex contains workflow-only placeholder language in paper-facing text")
|
|
327
357
|
|
|
328
358
|
|
|
329
359
|
def require_section_reference(section_text: str, label: str | None, issues: list[str], section_name: str, asset_name: str):
|
|
@@ -340,10 +370,9 @@ def check_introduction_section(paper_dir: Path, issues: list[str]):
|
|
|
340
370
|
return
|
|
341
371
|
text = read_text(introduction)
|
|
342
372
|
has_problem_figure = any(
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
r"\
|
|
346
|
-
r"\begin{figure}",
|
|
373
|
+
(
|
|
374
|
+
section_includes_asset(text, "figures/problem-setting"),
|
|
375
|
+
r"\begin{figure}" in text,
|
|
347
376
|
)
|
|
348
377
|
)
|
|
349
378
|
if not has_problem_figure:
|
|
@@ -357,25 +386,22 @@ def check_experiments_section(paper_dir: Path, issues: list[str]):
|
|
|
357
386
|
return
|
|
358
387
|
text = read_text(experiments)
|
|
359
388
|
has_table = any(
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
r"\
|
|
364
|
-
r"\begin{table}",
|
|
389
|
+
(
|
|
390
|
+
section_includes_asset(text, "tables/main-results"),
|
|
391
|
+
section_includes_asset(text, "tables/ablations"),
|
|
392
|
+
r"\begin{table}" in text,
|
|
365
393
|
)
|
|
366
394
|
)
|
|
367
395
|
has_figure = any(
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
r"\
|
|
371
|
-
r"\begin{figure}",
|
|
396
|
+
(
|
|
397
|
+
section_includes_asset(text, "figures/results-overview"),
|
|
398
|
+
r"\begin{figure}" in text,
|
|
372
399
|
)
|
|
373
400
|
)
|
|
374
401
|
has_analysis = any(
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
r"\input{tables/analysis}",
|
|
402
|
+
(
|
|
403
|
+
section_includes_asset(text, "analysis/analysis-asset"),
|
|
404
|
+
section_includes_asset(text, "tables/analysis"),
|
|
379
405
|
)
|
|
380
406
|
)
|
|
381
407
|
if not has_table:
|
|
@@ -429,10 +455,9 @@ def check_method_section(paper_dir: Path, issues: list[str]):
|
|
|
429
455
|
return
|
|
430
456
|
text = read_text(method)
|
|
431
457
|
has_figure = any(
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
r"\
|
|
435
|
-
r"\begin{figure}",
|
|
458
|
+
(
|
|
459
|
+
section_includes_asset(text, "figures/method-overview"),
|
|
460
|
+
r"\begin{figure}" in text,
|
|
436
461
|
)
|
|
437
462
|
)
|
|
438
463
|
if not has_figure:
|
|
@@ -462,6 +487,17 @@ def check_language_layers(paper_dir: Path, issues: list[str]):
|
|
|
462
487
|
if not workflow_language or workflow_language == paper_language:
|
|
463
488
|
return
|
|
464
489
|
|
|
490
|
+
if is_workflow_language_layer_target(paper_dir, workflow_config, config):
|
|
491
|
+
sections_dir = paper_dir / "sections"
|
|
492
|
+
section_text = "\n".join(
|
|
493
|
+
read_text(path) for path in sorted(sections_dir.glob("*.tex")) if path.is_file()
|
|
494
|
+
)
|
|
495
|
+
if section_text and not text_looks_like_language(section_text, workflow_language):
|
|
496
|
+
issues.append(
|
|
497
|
+
f"workflow-language paper-layer sections should follow workflow_language={workflow_language}"
|
|
498
|
+
)
|
|
499
|
+
return
|
|
500
|
+
|
|
465
501
|
if finalization_decision == "unconfirmed":
|
|
466
502
|
issues.append(
|
|
467
503
|
"workflow_language and paper_language differ; confirm paper_language_finalization_decision before finalizing the manuscript"
|
|
@@ -657,7 +693,7 @@ def check_latest_write_iteration_preflight(paper_dir: Path, issues: list[str]):
|
|
|
657
693
|
if workflow_config is None:
|
|
658
694
|
return
|
|
659
695
|
|
|
660
|
-
project_root = workflow_config
|
|
696
|
+
project_root = project_root_from_workflow_config(workflow_config)
|
|
661
697
|
manifest_path = project_root / ".lab" / ".managed" / "rule-manifest.json"
|
|
662
698
|
if not manifest_path.exists():
|
|
663
699
|
return
|
|
@@ -82,6 +82,7 @@ TABLE_DETAIL_FIELDS = (
|
|
|
82
82
|
FIGURE_DETAIL_FIELDS = (
|
|
83
83
|
"Asset file",
|
|
84
84
|
"Section",
|
|
85
|
+
"Figure role",
|
|
85
86
|
"Figure intent",
|
|
86
87
|
"Evidence",
|
|
87
88
|
"Status",
|
|
@@ -95,6 +96,7 @@ ANALYSIS_DETAIL_FIELDS = (
|
|
|
95
96
|
"Asset file",
|
|
96
97
|
"Asset type",
|
|
97
98
|
"Section",
|
|
99
|
+
"Asset role",
|
|
98
100
|
"Asset intent",
|
|
99
101
|
"Evidence",
|
|
100
102
|
"Status",
|
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import argparse
|
|
5
|
+
import re
|
|
6
|
+
import sys
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from paper_topology import find_project_root
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
SECTION_CHOICES = (
|
|
13
|
+
"abstract",
|
|
14
|
+
"introduction",
|
|
15
|
+
"related-work",
|
|
16
|
+
"method",
|
|
17
|
+
"experiments",
|
|
18
|
+
"conclusion",
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
REQUIRED_HEADINGS = (
|
|
22
|
+
"## Sources",
|
|
23
|
+
"## Adopted Structure Slots",
|
|
24
|
+
"## Rejected or Waived Slots",
|
|
25
|
+
"## Section Mapping",
|
|
26
|
+
"## Paragraph Role Mapping",
|
|
27
|
+
"## Asset Mapping",
|
|
28
|
+
"## Reuse Boundary",
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
EXPERIMENT_CORE_SLOTS = {
|
|
32
|
+
"dataset_description": ("dataset_description", "dataset statistics", "dataset", "datasets", "数据集"),
|
|
33
|
+
"split_protocol": ("split_protocol", "split protocol", "split", "train/test", "划分", "切分"),
|
|
34
|
+
"baseline_setup": ("baseline_setup", "baseline", "baselines", "comparator", "comparators", "基线", "对比方法"),
|
|
35
|
+
"metric_definition": ("metric_definition", "metric", "metrics", "ranking", "指标", "排序"),
|
|
36
|
+
"main_results": ("main_results", "main result", "main results", "primary comparison", "主结果", "主要结果"),
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
EXPERIMENT_WAIVABLE_SLOTS = {
|
|
40
|
+
"ablation": ("ablation", "component analysis", "消融", "组件分析"),
|
|
41
|
+
"sensitivity": ("sensitivity", "robustness", "trade-off", "敏感性", "稳健性"),
|
|
42
|
+
"implementation_details": ("implementation_details", "implementation", "reproducibility", "实现", "复现"),
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
EXPERIMENT_TEXT_ALIASES = {
|
|
46
|
+
"dataset_description": (
|
|
47
|
+
"dataset",
|
|
48
|
+
"datasets",
|
|
49
|
+
"benchmark",
|
|
50
|
+
"benchmarks",
|
|
51
|
+
"cohort",
|
|
52
|
+
"population",
|
|
53
|
+
"数据集",
|
|
54
|
+
"基准",
|
|
55
|
+
"样本",
|
|
56
|
+
),
|
|
57
|
+
"split_protocol": (
|
|
58
|
+
"split",
|
|
59
|
+
"splits",
|
|
60
|
+
"train",
|
|
61
|
+
"test",
|
|
62
|
+
"training",
|
|
63
|
+
"testing",
|
|
64
|
+
"random",
|
|
65
|
+
"seed",
|
|
66
|
+
"划分",
|
|
67
|
+
"切分",
|
|
68
|
+
"训练",
|
|
69
|
+
"测试",
|
|
70
|
+
),
|
|
71
|
+
"baseline_setup": (
|
|
72
|
+
"baseline",
|
|
73
|
+
"baselines",
|
|
74
|
+
"comparator",
|
|
75
|
+
"comparators",
|
|
76
|
+
"prior method",
|
|
77
|
+
"compared",
|
|
78
|
+
"comparison",
|
|
79
|
+
"基线",
|
|
80
|
+
"对比方法",
|
|
81
|
+
"比较",
|
|
82
|
+
),
|
|
83
|
+
"metric_definition": (
|
|
84
|
+
"metric",
|
|
85
|
+
"metrics",
|
|
86
|
+
"auuc",
|
|
87
|
+
"qini",
|
|
88
|
+
"primary metric",
|
|
89
|
+
"ranking",
|
|
90
|
+
"higher is better",
|
|
91
|
+
"指标",
|
|
92
|
+
"排序",
|
|
93
|
+
"越高越好",
|
|
94
|
+
),
|
|
95
|
+
"main_results": (
|
|
96
|
+
"main result",
|
|
97
|
+
"main results",
|
|
98
|
+
"primary comparison",
|
|
99
|
+
"table",
|
|
100
|
+
"result",
|
|
101
|
+
"results",
|
|
102
|
+
"主结果",
|
|
103
|
+
"主要结果",
|
|
104
|
+
"结果",
|
|
105
|
+
"表",
|
|
106
|
+
),
|
|
107
|
+
"ablation": (
|
|
108
|
+
"ablation",
|
|
109
|
+
"ablations",
|
|
110
|
+
"component",
|
|
111
|
+
"variant",
|
|
112
|
+
"消融",
|
|
113
|
+
"组件",
|
|
114
|
+
"变体",
|
|
115
|
+
),
|
|
116
|
+
"sensitivity": (
|
|
117
|
+
"sensitivity",
|
|
118
|
+
"robustness",
|
|
119
|
+
"stability",
|
|
120
|
+
"diagnostic",
|
|
121
|
+
"trade-off",
|
|
122
|
+
"敏感性",
|
|
123
|
+
"稳健性",
|
|
124
|
+
"稳定性",
|
|
125
|
+
"诊断",
|
|
126
|
+
),
|
|
127
|
+
"implementation_details": (
|
|
128
|
+
"implementation",
|
|
129
|
+
"training",
|
|
130
|
+
"hyperparameter",
|
|
131
|
+
"reproducibility",
|
|
132
|
+
"实现",
|
|
133
|
+
"训练",
|
|
134
|
+
"复现",
|
|
135
|
+
),
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
SERVICE_STYLE_PHRASES = (
|
|
139
|
+
"user asked",
|
|
140
|
+
"the user asked",
|
|
141
|
+
"as requested by the user",
|
|
142
|
+
"i will explain",
|
|
143
|
+
"let me explain",
|
|
144
|
+
"below i",
|
|
145
|
+
"用户说",
|
|
146
|
+
"用户要求",
|
|
147
|
+
"按你的要求",
|
|
148
|
+
"我来解释",
|
|
149
|
+
"我会说明",
|
|
150
|
+
"下面我",
|
|
151
|
+
"这版",
|
|
152
|
+
"已完成",
|
|
153
|
+
"已按",
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
WORKFLOW_ONLY_PHRASES = (
|
|
157
|
+
"figure intent",
|
|
158
|
+
"asset intent",
|
|
159
|
+
"placeholder",
|
|
160
|
+
"workflow-language",
|
|
161
|
+
"review layer",
|
|
162
|
+
"translation layer",
|
|
163
|
+
"图的意图",
|
|
164
|
+
"资产意图",
|
|
165
|
+
"占位符",
|
|
166
|
+
"工作流语言",
|
|
167
|
+
"同步到",
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def parse_args() -> argparse.Namespace:
|
|
172
|
+
parser = argparse.ArgumentParser(
|
|
173
|
+
description="Validate that reference-paper structure was consumed as structure, not as shallow prose polish."
|
|
174
|
+
)
|
|
175
|
+
parser.add_argument("--section", required=True, choices=SECTION_CHOICES)
|
|
176
|
+
parser.add_argument("--section-file", required=True)
|
|
177
|
+
parser.add_argument("--consumption-plan", help="Path to .lab/writing/reference-patterns/consumption-plan/<section>.md")
|
|
178
|
+
parser.add_argument("--mode", required=True, choices=("draft", "final"))
|
|
179
|
+
return parser.parse_args()
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def read_text(path: Path) -> str:
|
|
183
|
+
return path.read_text(encoding="utf-8")
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def contains_any(text: str, phrases: tuple[str, ...]) -> bool:
|
|
187
|
+
lowered = text.lower()
|
|
188
|
+
return any(phrase.lower() in lowered for phrase in phrases)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def strip_latex_comments(text: str) -> str:
|
|
192
|
+
return re.sub(r"%.*", "", text)
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def infer_consumption_plan(section_file: Path, section: str) -> Path | None:
|
|
196
|
+
project_root = find_project_root(section_file.resolve())
|
|
197
|
+
if project_root is None:
|
|
198
|
+
return None
|
|
199
|
+
return project_root / ".lab" / "writing" / "reference-patterns" / "consumption-plan" / f"{section}.md"
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def section_block(text: str, heading: str) -> str:
|
|
203
|
+
match = re.search(
|
|
204
|
+
rf"^##\s+{re.escape(heading.removeprefix('## ').strip())}\s*$([\s\S]*?)(?=^##\s+|\Z)",
|
|
205
|
+
text,
|
|
206
|
+
flags=re.MULTILINE,
|
|
207
|
+
)
|
|
208
|
+
return match.group(1) if match else ""
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def nonempty_bullets(block: str) -> list[str]:
|
|
212
|
+
return [
|
|
213
|
+
line.strip()
|
|
214
|
+
for line in block.splitlines()
|
|
215
|
+
if line.strip().startswith("-") and line.strip().lower() not in {"- none", "- n/a", "- na"}
|
|
216
|
+
]
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def line_mentions_any(line: str, aliases: tuple[str, ...]) -> bool:
|
|
220
|
+
lowered = line.lower()
|
|
221
|
+
return any(alias.lower() in lowered for alias in aliases)
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def slot_is_mapped(plan_text: str, aliases: tuple[str, ...]) -> bool:
|
|
225
|
+
mapping_blocks = "\n".join(
|
|
226
|
+
[
|
|
227
|
+
section_block(plan_text, "Adopted Structure Slots"),
|
|
228
|
+
section_block(plan_text, "Section Mapping"),
|
|
229
|
+
section_block(plan_text, "Asset Mapping"),
|
|
230
|
+
]
|
|
231
|
+
)
|
|
232
|
+
for line in mapping_blocks.splitlines():
|
|
233
|
+
if line_mentions_any(line, aliases) and ("->" in line or ":" in line):
|
|
234
|
+
return True
|
|
235
|
+
return False
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def slot_is_waived(plan_text: str, aliases: tuple[str, ...]) -> bool:
|
|
239
|
+
waiver_block = section_block(plan_text, "Rejected or Waived Slots")
|
|
240
|
+
for line in waiver_block.splitlines():
|
|
241
|
+
lowered = line.lower()
|
|
242
|
+
if line_mentions_any(line, aliases) and any(token in lowered for token in ("waiv", "not used", "not applicable", "不采用", "不适用", "跳过")):
|
|
243
|
+
return True
|
|
244
|
+
return False
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def validate_plan_shape(plan_text: str, issues: list[str]) -> None:
|
|
248
|
+
for heading in REQUIRED_HEADINGS:
|
|
249
|
+
if heading not in plan_text:
|
|
250
|
+
issues.append(f"reference consumption plan is missing required heading: {heading}")
|
|
251
|
+
|
|
252
|
+
source_block = section_block(plan_text, "Sources")
|
|
253
|
+
if len(nonempty_bullets(source_block)) == 0:
|
|
254
|
+
issues.append("reference consumption plan must list at least one reference source")
|
|
255
|
+
|
|
256
|
+
boundary = section_block(plan_text, "Reuse Boundary").lower()
|
|
257
|
+
required_boundary_terms = ("structure", "wording", "claims", "metrics", "captions", "conclusions")
|
|
258
|
+
if not all(term in boundary for term in required_boundary_terms):
|
|
259
|
+
issues.append(
|
|
260
|
+
"reuse boundary must explicitly say to reuse structure only and not copy wording, claims, metrics, captions, or conclusions"
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def validate_experiment_slots(plan_text: str, issues: list[str]) -> None:
|
|
265
|
+
for slot, aliases in EXPERIMENT_CORE_SLOTS.items():
|
|
266
|
+
if not slot_is_mapped(plan_text, aliases):
|
|
267
|
+
issues.append(f"experiments reference consumption must map required slot: {slot}")
|
|
268
|
+
|
|
269
|
+
for slot, aliases in EXPERIMENT_WAIVABLE_SLOTS.items():
|
|
270
|
+
if not slot_is_mapped(plan_text, aliases) and not slot_is_waived(plan_text, aliases):
|
|
271
|
+
issues.append(f"experiments reference consumption must map or explicitly waive slot: {slot}")
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def mapped_experiment_slots(plan_text: str) -> list[str]:
|
|
275
|
+
slots: list[str] = []
|
|
276
|
+
for slot, aliases in {**EXPERIMENT_CORE_SLOTS, **EXPERIMENT_WAIVABLE_SLOTS}.items():
|
|
277
|
+
if slot_is_mapped(plan_text, aliases):
|
|
278
|
+
slots.append(slot)
|
|
279
|
+
return slots
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def validate_experiment_section_realizes_slots(plan_text: str, section_text: str, issues: list[str]) -> None:
|
|
283
|
+
paper_text = strip_latex_comments(section_text).lower()
|
|
284
|
+
if not re.search(r"\\subsection\{|\\paragraph\{", section_text):
|
|
285
|
+
issues.append(
|
|
286
|
+
"experiments reference consumption mapped multiple structure slots, but the section has no subsection or paragraph anchors"
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
for slot in mapped_experiment_slots(plan_text):
|
|
290
|
+
aliases = EXPERIMENT_TEXT_ALIASES.get(slot, ())
|
|
291
|
+
if aliases and not contains_any(paper_text, aliases):
|
|
292
|
+
issues.append(
|
|
293
|
+
f"mapped slot `{slot}` is not realized in the section text; expand the current section instead of recording a shallow consumption plan"
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def validate_section_text(section_text: str, issues: list[str]) -> None:
|
|
298
|
+
prose_text = strip_latex_comments(section_text)
|
|
299
|
+
if contains_any(prose_text, SERVICE_STYLE_PHRASES):
|
|
300
|
+
issues.append("section prose contains service-style or AI-assistant meta language; rewrite as paper-facing academic prose")
|
|
301
|
+
if contains_any(prose_text, WORKFLOW_ONLY_PHRASES):
|
|
302
|
+
issues.append("section prose contains workflow-only placeholder language; move authoring notes out of the manuscript")
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def main() -> int:
|
|
306
|
+
args = parse_args()
|
|
307
|
+
section_path = Path(args.section_file)
|
|
308
|
+
if not section_path.exists():
|
|
309
|
+
print(f"section file does not exist: {section_path}", file=sys.stderr)
|
|
310
|
+
return 1
|
|
311
|
+
|
|
312
|
+
plan_path = Path(args.consumption_plan) if args.consumption_plan else infer_consumption_plan(section_path, args.section)
|
|
313
|
+
if plan_path is None or not plan_path.exists():
|
|
314
|
+
message = (
|
|
315
|
+
"missing reference consumption plan; create "
|
|
316
|
+
f".lab/writing/reference-patterns/consumption-plan/{args.section}.md before reference-guided deep writing"
|
|
317
|
+
)
|
|
318
|
+
if args.mode == "draft":
|
|
319
|
+
print(f"WARNING: {message}")
|
|
320
|
+
return 0
|
|
321
|
+
print(message, file=sys.stderr)
|
|
322
|
+
return 1
|
|
323
|
+
|
|
324
|
+
issues: list[str] = []
|
|
325
|
+
plan_text = read_text(plan_path)
|
|
326
|
+
section_text = read_text(section_path)
|
|
327
|
+
validate_plan_shape(plan_text, issues)
|
|
328
|
+
if args.section == "experiments":
|
|
329
|
+
validate_experiment_slots(plan_text, issues)
|
|
330
|
+
validate_experiment_section_realizes_slots(plan_text, section_text, issues)
|
|
331
|
+
validate_section_text(section_text, issues)
|
|
332
|
+
|
|
333
|
+
if not issues:
|
|
334
|
+
print("reference consumption is valid")
|
|
335
|
+
return 0
|
|
336
|
+
|
|
337
|
+
if args.mode == "draft":
|
|
338
|
+
for issue in issues:
|
|
339
|
+
print(f"WARNING: {issue}")
|
|
340
|
+
return 0
|
|
341
|
+
|
|
342
|
+
for issue in issues:
|
|
343
|
+
print(issue, file=sys.stderr)
|
|
344
|
+
return 1
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
if __name__ == "__main__":
|
|
348
|
+
raise SystemExit(main())
|