superlab 0.1.63 → 0.1.65

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. package/lib/auto_state.cjs +3 -0
  2. package/lib/i18n.cjs +6 -4
  3. package/lib/install.cjs +1 -1
  4. package/lib/lab_write_contract.json +4 -4
  5. package/lib/rule_preflight.cjs +49 -1
  6. package/package-assets/claude/commands/lab/write.md +1 -1
  7. package/package-assets/claude/commands/lab-write.md +1 -1
  8. package/package-assets/claude/commands/lab:write.md +1 -1
  9. package/package-assets/claude/commands/lab/357/274/232write.md +1 -1
  10. package/package-assets/codex/prompts/lab/write.md +1 -1
  11. package/package-assets/codex/prompts/lab-write.md +1 -1
  12. package/package-assets/codex/prompts/lab:write.md +1 -1
  13. package/package-assets/codex/prompts/lab/357/274/232write.md +1 -1
  14. package/package-assets/shared/lab/.managed/scripts/extract_reference_paper_structure.py +910 -0
  15. package/package-assets/shared/lab/.managed/scripts/paper_topology.py +91 -0
  16. package/package-assets/shared/lab/.managed/scripts/render_rule_preflight.py +115 -0
  17. package/package-assets/shared/lab/.managed/scripts/validate_manuscript_delivery.py +59 -0
  18. package/package-assets/shared/lab/.managed/scripts/validate_paper_topology.py +83 -0
  19. package/package-assets/shared/lab/.managed/scripts/validate_rule_preflight.py +183 -0
  20. package/package-assets/shared/lab/.managed/scripts/validate_section_draft.py +134 -12
  21. package/package-assets/shared/lab/.managed/templates/iteration-report.md +1 -0
  22. package/package-assets/shared/lab/.managed/templates/reference-template-intake.md +40 -0
  23. package/package-assets/shared/lab/.managed/templates/write-iteration.md +28 -0
  24. package/package-assets/shared/lab/context/auto-status.md +1 -0
  25. package/package-assets/shared/skills/lab/SKILL.md +2 -0
  26. package/package-assets/shared/skills/lab/stages/auto.md +1 -1
  27. package/package-assets/shared/skills/lab/stages/write.md +21 -3
  28. package/package.json +1 -1
@@ -10,6 +10,7 @@ from paper_topology import (
10
10
  load_workflow_config,
11
11
  resolve_paper_topology,
12
12
  )
13
+ from validate_paper_topology import validate_topology_artifacts
13
14
  from validate_rule_preflight import validate_rule_preflight
14
15
 
15
16
 
@@ -424,6 +425,13 @@ def check_write_rule_preflight(section_path: Path, issues: list[str]):
424
425
  issues.extend(validate_rule_preflight(latest_iteration, "write", project_root=project_root))
425
426
 
426
427
 
428
+ def check_active_paper_topology(section_path: Path, issues: list[str]):
429
+ project_root = find_project_root(section_path)
430
+ if project_root is None:
431
+ return
432
+ issues.extend(validate_topology_artifacts(project_root))
433
+
434
+
427
435
  def check_abstract(text: str, issues: list[str]):
428
436
  numbers = re.findall(r"\b\d+(?:\.\d+)?\b", text)
429
437
  if len(numbers) > 6:
@@ -496,6 +504,100 @@ def check_method(text: str, issues: list[str]):
496
504
  issues.append("method should explain the technical advantage")
497
505
 
498
506
 
507
+ def has_performance_claim(text: str) -> bool:
508
+ return contains_any(
509
+ text,
510
+ (
511
+ "outperform",
512
+ "outperforms",
513
+ "improve",
514
+ "improves",
515
+ "improved",
516
+ "gain",
517
+ "gains",
518
+ "better",
519
+ "stronger",
520
+ "superior",
521
+ "state-of-the-art",
522
+ "sota",
523
+ "reduce",
524
+ "reduces",
525
+ "降低",
526
+ "提升",
527
+ "优于",
528
+ "超过",
529
+ "更好",
530
+ "增益",
531
+ ),
532
+ )
533
+
534
+
535
+ def has_numeric_or_table_evidence(text: str) -> bool:
536
+ if re.search(r"\b\d+\.\d+\b", text):
537
+ return True
538
+ if re.search(r"\b\d+(?:\.\d+)?\s*(?:%|pp|points?|AUUC|Qini|AUC|F1)\b", text, flags=re.IGNORECASE):
539
+ return True
540
+ if r"\pm" in text:
541
+ return True
542
+ return bool(
543
+ re.search(r"\\(?:auto|c|C)?ref\{(?:tab|fig):", text)
544
+ or re.search(r"\b(?:Table|Figure|Fig\.|表|图)~?\\ref\{", text)
545
+ )
546
+
547
+
548
+ def has_generic_comparator_without_anchor(text: str) -> bool:
549
+ generic_comparator = contains_any(
550
+ text,
551
+ (
552
+ "previous methods",
553
+ "prior methods",
554
+ "existing methods",
555
+ "several baselines",
556
+ "the baselines",
557
+ "baseline suite",
558
+ "previous work",
559
+ "prior work",
560
+ "现有方法",
561
+ "已有方法",
562
+ "若干基线",
563
+ "基线集合",
564
+ ),
565
+ )
566
+ if not generic_comparator:
567
+ return False
568
+ if r"\cite{" in text or r"\citet{" in text or r"\citep{" in text:
569
+ return False
570
+ return not bool(re.search(r"\b[A-Z][A-Za-z0-9-]{2,}(?:\s*,\s*[A-Z][A-Za-z0-9-]{2,})+", text))
571
+
572
+
573
+ def has_repeated_split_protocol(text: str) -> bool:
574
+ return bool(
575
+ re.search(r"\b\d+\s+(?:random\s+)?(?:splits|seeds|runs)\b", text, flags=re.IGNORECASE)
576
+ or re.search(r"\bacross\s+(?:random\s+)?(?:splits|seeds|runs)\b", text, flags=re.IGNORECASE)
577
+ or re.search(r"\b重复\s*\d+\s*次", text)
578
+ )
579
+
580
+
581
+ def has_variance_report(text: str) -> bool:
582
+ return contains_any(
583
+ text,
584
+ (
585
+ r"\pm",
586
+ "standard deviation",
587
+ "std",
588
+ "confidence interval",
589
+ "confidence intervals",
590
+ "ci",
591
+ "variance",
592
+ "mean",
593
+ "平均",
594
+ "标准差",
595
+ "置信区间",
596
+ "方差",
597
+ ),
598
+ )
599
+
600
+
499
601
  def check_experiments(text: str, issues: list[str]):
500
602
  if not contains_any(
501
603
  text,
@@ -523,6 +625,18 @@ def check_experiments(text: str, issues: list[str]):
523
625
  ),
524
626
  ):
525
627
  issues.append("experiments should include benchmark scene notes")
628
+ if has_performance_claim(text) and not has_numeric_or_table_evidence(text):
629
+ issues.append(
630
+ "experiment performance claims should tie to concrete metric or numeric evidence instead of prose-only claims"
631
+ )
632
+ if has_generic_comparator_without_anchor(text):
633
+ issues.append(
634
+ "experiments use generic comparator names; name the comparator family, table anchor, or citations before more polish"
635
+ )
636
+ if has_repeated_split_protocol(text) and not has_variance_report(text):
637
+ issues.append(
638
+ "repeated split or seed protocol should report variance, confidence intervals, or an explicit variance disposition"
639
+ )
526
640
 
527
641
 
528
642
  def check_conclusion(text: str, issues: list[str]):
@@ -550,25 +664,33 @@ def main():
550
664
  return 1
551
665
 
552
666
  text = read_text(section_path)
553
- issues: list[str] = []
554
- check_common_section_gate_risks(text, issues)
555
- check_write_rule_preflight(section_path, issues)
556
- check_paper_topology_targeting(section_path, issues)
557
- check_workflow_language_targeting(section_path, issues)
558
- check_section_style_policy(text, args.section, issues)
559
- SECTION_CHECKS[args.section](text, issues)
560
- check_neighbor_asset_files(args.section, section_path, issues)
561
-
562
- if not issues:
667
+ blocking_issues: list[str] = []
668
+ warning_issues: list[str] = []
669
+ check_write_rule_preflight(section_path, blocking_issues)
670
+ check_active_paper_topology(section_path, blocking_issues)
671
+ check_paper_topology_targeting(section_path, blocking_issues)
672
+ check_workflow_language_targeting(section_path, blocking_issues)
673
+ check_common_section_gate_risks(text, warning_issues)
674
+ check_section_style_policy(text, args.section, warning_issues)
675
+ SECTION_CHECKS[args.section](text, warning_issues)
676
+ check_neighbor_asset_files(args.section, section_path, warning_issues)
677
+
678
+ if not blocking_issues and not warning_issues:
563
679
  print("section draft is valid")
564
680
  return 0
565
681
 
566
682
  if args.mode == "draft":
567
- for issue in issues:
683
+ if blocking_issues:
684
+ for issue in blocking_issues:
685
+ print(issue, file=sys.stderr)
686
+ for issue in warning_issues:
687
+ print(f"WARNING: {issue}")
688
+ return 1
689
+ for issue in warning_issues:
568
690
  print(f"WARNING: {issue}")
569
691
  return 0
570
692
 
571
- for issue in issues:
693
+ for issue in [*blocking_issues, *warning_issues]:
572
694
  print(issue, file=sys.stderr)
573
695
  return 1
574
696
 
@@ -8,6 +8,7 @@
8
8
  - Resolved stage:
9
9
  - Resolved mode:
10
10
  - Resolved target:
11
+ - Preflight stamp:
11
12
  - Override reason, if any:
12
13
 
13
14
  ## Round
@@ -0,0 +1,40 @@
1
+ # Reference Template Intake
2
+
3
+ ## Purpose
4
+
5
+ - Help `/lab:write` reproduce mature paper structure from multiple reference templates.
6
+ - Extract section slots, paragraph roles, and table/figure functions.
7
+ - Reuse structure and logic only; do not copy wording, claims, metrics, or conclusions.
8
+
9
+ ## Sources
10
+
11
+ - Source paths or URLs:
12
+ - Extraction command:
13
+ - Output root:
14
+
15
+ ## Section Templates
16
+
17
+ - Abstract:
18
+ - Introduction:
19
+ - Related work:
20
+ - Method:
21
+ - Experiments:
22
+ - Conclusion:
23
+
24
+ ## Visual/Table Templates
25
+
26
+ - Main result tables:
27
+ - Ablation tables:
28
+ - Dataset/protocol tables:
29
+ - Method overview figures:
30
+ - Result or trade-off figures:
31
+ - Analysis/sensitivity figures:
32
+
33
+ ## Write Handoff
34
+
35
+ - Aggregate template playbook:
36
+ - Section template selected for this round:
37
+ - Visual/table template selected for this round:
38
+ - Multi-template reproduction plan:
39
+ - Current-paper evidence that will fill the template:
40
+ - Structure-only reuse boundary:
@@ -8,6 +8,7 @@
8
8
  - Resolved stage:
9
9
  - Resolved mode:
10
10
  - Resolved target:
11
+ - Preflight stamp:
11
12
  - Override reason, if any:
12
13
 
13
14
  ## Round
@@ -66,6 +67,33 @@
66
67
  - Any discouraged move kept and why:
67
68
  - Any banned move found:
68
69
 
70
+ ## Review Issue Bundle
71
+
72
+ - Issue bundle path:
73
+ - New issues:
74
+ - Resolved issues:
75
+ - Open issues:
76
+ - Quote-backed findings recorded:
77
+ - Script-backed findings separated from judgment-backed findings:
78
+
79
+ ## Re-Audit Status
80
+
81
+ - Previous issue bundle compared:
82
+ - Fully addressed root causes:
83
+ - Partially addressed root causes:
84
+ - Not addressed root causes:
85
+ - New root causes:
86
+ - Which root-cause issues block further prose polish:
87
+
88
+ ## Reference Template Intake
89
+
90
+ - Reference sources used:
91
+ - Aggregate template playbook:
92
+ - Section templates consulted:
93
+ - Visual/table templates consulted:
94
+ - Multi-template reproduction plan:
95
+ - Structure-only reuse boundary:
96
+
69
97
  ## Table Semantics
70
98
 
71
99
  - Metrics promised in Method:
@@ -8,6 +8,7 @@
8
8
  - Resolved stage:
9
9
  - Resolved mode:
10
10
  - Resolved target:
11
+ - Preflight stamp:
11
12
  - Override reason, if any:
12
13
 
13
14
  ## Runtime State
@@ -37,7 +37,9 @@ Use this skill when the user invokes `/lab:*` or asks for the structured researc
37
37
  - `Resolved stage`
38
38
  - `Resolved mode`
39
39
  - `Resolved target`
40
+ - `Preflight stamp`
40
41
  - `Override reason, if any`
42
+ - Generate the `Rule Preflight` block from `.lab/.managed/rule-manifest.json` with the managed preflight renderer instead of handwriting it from memory.
41
43
  - Treat missing, stale, or contradictory `Rule Preflight` data as a stage-contract failure.
42
44
  - Project-installed rules take priority over model memory. If remembered patterns conflict with the installed rule source, follow the installed source recorded in `.lab/.managed/rule-manifest.json`.
43
45
  - Final paper output should default to LaTeX, and its manuscript language should be decided separately from the workflow language.
@@ -29,7 +29,7 @@
29
29
  ## Rule Preflight
30
30
 
31
31
  - Read `.lab/.managed/rule-manifest.json` before arming auto mode.
32
- - The visible `Auto preflight` summary must also record the installed rule source file, rule source revision, project version, resolved stage, resolved mode, resolved target, and any override reason.
32
+ - The visible `Auto preflight` summary must also record the installed rule source file, rule source revision, project version, resolved stage, resolved mode, resolved target, a machine-generated preflight stamp, and any override reason.
33
33
  - Keep the same `Rule Preflight` fields in `.lab/context/auto-status.md` while the campaign is live.
34
34
  - If the installed auto rule and the current campaign behavior disagree, stop and fix the contract or record a valid override reason before launching the loop.
35
35
 
@@ -33,9 +33,10 @@
33
33
  ## Rule Preflight
34
34
 
35
35
  - Read `.lab/.managed/rule-manifest.json` before drafting.
36
- - Record a `Rule Preflight` block in the write-iteration artifact before revising prose.
37
- - The `Rule Preflight` block must record the installed rule source file, rule source revision, project version, resolved stage, resolved mode, resolved target, and any override reason.
36
+ - Write the `Rule Preflight` block with `.lab/.managed/scripts/render_rule_preflight.py` before revising prose; do not hand-fill it from memory.
37
+ - The `Rule Preflight` block must record the installed rule source file, rule source revision, project version, resolved stage, resolved mode, resolved target, a machine-generated preflight stamp, and any override reason.
38
38
  - If the installed write rule and the current round behavior disagree, fix the targeting or record a valid override reason before further editing.
39
+ - In draft mode, rule-preflight mismatches and paper-topology mismatches are blockers, not polish warnings.
39
40
 
40
41
  ## Context Write Set
41
42
 
@@ -112,6 +113,13 @@ Run these on every round:
112
113
  - Load only the current section guide. Do not load every section guide at once.
113
114
  - Reuse example-bank structure, paragraph roles, sentence logic, and paper-facing LaTeX asset patterns when examples are bundled, but never copy wording verbatim.
114
115
  - Treat example cites and example file names as writing references, not as evidence for the current paper.
116
+ - When the user provides local PDFs, PDF URLs, HTML pages, or reference papers while invoking `/lab:write`, run `.lab/.managed/scripts/extract_reference_paper_structure.py --output-dir .lab/writing/reference-patterns <sources...>` before drafting unless an up-to-date `.lab/writing/reference-patterns/aggregate-template-playbook.md` already covers those exact sources.
117
+ - Treat reference-paper intake as an internal write capability, not a separate user command. The user should still only need `/lab:write`; do not ask them to learn another workflow.
118
+ - The purpose of reference-paper intake is to help `/lab:write` reproduce mature multi-template writing structure: section slots, paragraph roles, argument sequence, table and figure functions, placement logic, and bridge sentences.
119
+ - Use at least two compatible reference templates when available. If only one reference is available, mark it as a single-template pattern and avoid treating it as a universal standard.
120
+ - For every reference table or figure, extract what reader question it answers, which section/subsection it supports, why it is placed there, what the prose before it should do, and what the prose after it should explain.
121
+ - When drafting from reference templates, reproduce structure and logic only. Do not copy wording, claims, metrics, baselines, data, captions, or conclusions from reference papers.
122
+ - Before drafting a section from reference templates, read `.lab/writing/reference-patterns/aggregate-template-playbook.md`, the matching file under `.lab/writing/reference-patterns/section-templates/`, and the matching visual/table template under `.lab/writing/reference-patterns/visual-templates/` when the section uses tables or figures.
115
123
  - Build a compact mini-outline before prose.
116
124
  - Academic readability standards are the same in `workflow_language` and `paper_language`; changing languages must not lower external-reader clarity.
117
125
  - If the current round introduces or revises key terms, abbreviations, metric names, mechanism names, or system labels, explain them at first mention by briefly stating what they are and why they matter here.
@@ -125,6 +133,10 @@ Run these on every round:
125
133
  - Before any additional tighten, compress, or polish pass on the same section, run a section-level acceptance gate first.
126
134
  - The section-level acceptance gate is passed only when canonical naming consistency, adjacent-section consistency, claim, metric, and ranking consistency with the current evidence, local clarity, local concision, and section-style compliance are all explicitly checked and no unresolved blocker remains.
127
135
  - If the current section still contains a banned expression or banned rhetorical move from `section-style-policies.md`, the round has not passed the section-level acceptance gate.
136
+ - If reviewer notes, validator warnings, or prior write rounds produced issues, record them as a review issue bundle in the write-iteration artifact before further polishing.
137
+ - Review issue bundles should separate script-backed findings from judgment-backed findings, preserve the source quote or local pointer when available, and track whether each issue is new, resolved, open, or superseded.
138
+ - Before continuing prose polish after a review issue bundle exists, run a re-audit pass that compares the current draft against previous root causes and records fully addressed, partially addressed, not addressed, and newly introduced root causes.
139
+ - Do not answer a review issue by merely changing wording around it. Fix the underlying section structure, evidence support, terminology definition, or asset/table linkage that caused the issue.
128
140
  - If the current round changes the paper's canonical experiment or evaluation protocol (for example split ratio, train/test size, seed or split count, benchmark set, or main-table evaluation contract), treat it as a canonical protocol replacement unless the user explicitly scopes it as supplementary or appendix-only.
129
141
  - A canonical protocol replacement requires a paper-wide impact audit before more polishing: identify stale sections and assets across Abstract, Introduction, Method, Experiments, Conclusion, tables, figures, analysis assets, and `.lab/writing/plan.md`, then update the plan and highest-impact stale targets first.
130
142
  - When a paper-wide impact audit is still open, default the next write action to the highest-impact canonical stale section or asset instead of polishing the same section again.
@@ -156,12 +168,13 @@ Run these on every round:
156
168
  - record what each figure or analysis asset should show and why the reader needs it
157
169
  - record which citation anchors must appear in the section and why each anchor matters
158
170
  - Before drafting `introduction`, `method`, `experiments`, `related work`, or `conclusion`, run `.lab/.managed/scripts/validate_paper_plan.py --paper-plan .lab/writing/plan.md`.
159
- - Before drafting `introduction`, `method`, `experiments`, `related work`, or `conclusion`, also run `.lab/.managed/scripts/validate_paper_topology.py --project-root .` so plan/context files cannot keep presenting legacy layers as the active paper topology.
171
+ - Before drafting `introduction`, `method`, `experiments`, `related work`, or `conclusion`, also run `.lab/.managed/scripts/validate_paper_topology.py --project-root .` so plan/context files cannot keep presenting legacy layers as the active paper topology. If that validator fails, do not mark the topology as repaired and do not continue section polish.
160
172
  - When the repository workflow config is available, the paper-plan validator also checks that `.lab/writing/plan.md` stays in `workflow_language` instead of silently drifting into another language.
161
173
  - If the paper-plan validator fails, stop and fill `.lab/writing/plan.md` first instead of drafting prose.
162
174
  - During ordinary draft rounds, run `.lab/.managed/scripts/validate_section_draft.py --section <section> --section-file <section-file> --mode draft` and `.lab/.managed/scripts/validate_paper_claims.py --section-file <section-file> --mode draft` after revising the active section.
163
175
  - Treat draft-round output from the section and claim validators as warnings that must be recorded and addressed in the write-iteration artifact, not as immediate stop conditions.
164
176
  - If the active section already lives under a paper-layer `sections/` directory, the draft section validator should also warn when the neighboring required figure or analysis placeholder files are still missing from that same paper layer.
177
+ - For experiment sections, treat prose-only performance claims, unnamed generic comparator phrases, repeated split/seed protocols without variance disposition, and result paragraphs without concrete metric/table anchors as section warnings that must be fixed before more prose-only polishing.
165
178
  - For each subsection, explicitly include motivation, design, and technical advantage when applicable.
166
179
  - Avoid a writing style that reads like incremental patching of a naive baseline.
167
180
  - Keep terminology stable across the full paper.
@@ -182,6 +195,9 @@ Run these on every round:
182
195
  - `<deliverables_root>/paper/analysis/analysis-asset.tex`
183
196
  - Table assets must use paper-facing LaTeX structure with `booktabs`, caption, label, and consistent precision.
184
197
  - Table assets must also include a local table note that explains row meaning, column meaning, metric definitions, comparison scope, and any important caveat.
198
+ - Table assets must avoid vertical rules, `\hline`, and `\cline`; use `booktabs` rules and whitespace instead.
199
+ - Table captions should appear before the tabular body so the table can be read top-down in manuscript order.
200
+ - Numeric precision should be consistent within each metric column unless the table note explains a deliberate exception.
185
201
  - Table assets must not rely on aggressive width hacks by default; if width control is still needed after table redesign, document it locally and keep it readable.
186
202
  - Figure placeholders must explain what the final figure should show and why the reader needs it.
187
203
  - Core asset coverage for a paper-facing final draft should include a problem-setting or teaser figure, a method overview figure, a results overview figure, a main-results table, an ablation table, and one additional analysis asset.
@@ -199,6 +215,8 @@ Run these on every round:
199
215
  - When a round introduces or revises key terms, include a compact terminology note in the user-facing round summary and record the terminology-clarity self-check in the write-iteration artifact.
200
216
  - Record the section-level acceptance gate in the write-iteration artifact before recommending further tightening on the same section.
201
217
  - Record section-style policy compliance, any retained discouraged move, and any banned move found in the write-iteration artifact.
218
+ - Record the review issue bundle and re-audit status in the write-iteration artifact whenever the round follows reviewer notes, validator warnings, or prior failed writing rounds.
219
+ - Record the reference template intake in the write-iteration artifact whenever the round uses PDFs, URLs, or `.lab/writing/reference-patterns/` artifacts: sources used, aggregate playbook path, section templates consulted, visual/table templates consulted, multi-template reproduction plan, and structure-only reuse boundary.
202
220
  - Record the round target layer in the write-iteration artifact as `canonical manuscript`, `workflow-language paper layer`, or `both`.
203
221
  - If workflow-language was active and the round still targeted the canonical manuscript, record why canonical-only writing was acceptable in the write-iteration artifact.
204
222
  - If both layers were edited, record why the cross-language sync was required and whether it was explicitly requested by the user or required by final-draft/export finalization.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "superlab",
3
- "version": "0.1.63",
3
+ "version": "0.1.65",
4
4
  "description": "Strict /lab research workflow installer for Codex and Claude",
5
5
  "keywords": [
6
6
  "codex",