npm - superlab - Versions diffs - 0.1.63 → 0.1.65 - Mend

superlab 0.1.63 → 0.1.65

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/package-assets/shared/lab/.managed/scripts/validate_section_draft.py CHANGED Viewed

@@ -10,6 +10,7 @@ from paper_topology import (
     load_workflow_config,
     resolve_paper_topology,
 )
+from validate_paper_topology import validate_topology_artifacts
 from validate_rule_preflight import validate_rule_preflight
@@ -424,6 +425,13 @@ def check_write_rule_preflight(section_path: Path, issues: list[str]):
     issues.extend(validate_rule_preflight(latest_iteration, "write", project_root=project_root))
+def check_active_paper_topology(section_path: Path, issues: list[str]):
+    project_root = find_project_root(section_path)
+    if project_root is None:
+        return
+    issues.extend(validate_topology_artifacts(project_root))
 def check_abstract(text: str, issues: list[str]):
     numbers = re.findall(r"\b\d+(?:\.\d+)?\b", text)
     if len(numbers) > 6:
@@ -496,6 +504,100 @@ def check_method(text: str, issues: list[str]):
         issues.append("method should explain the technical advantage")
+def has_performance_claim(text: str) -> bool:
+    return contains_any(
+        text,
+        (
+            "outperform",
+            "outperforms",
+            "improve",
+            "improves",
+            "improved",
+            "gain",
+            "gains",
+            "better",
+            "stronger",
+            "superior",
+            "state-of-the-art",
+            "sota",
+            "reduce",
+            "reduces",
+            "降低",
+            "提升",
+            "优于",
+            "超过",
+            "更好",
+            "增益",
+        ),
+    )
+def has_numeric_or_table_evidence(text: str) -> bool:
+    if re.search(r"\b\d+\.\d+\b", text):
+        return True
+    if re.search(r"\b\d+(?:\.\d+)?\s*(?:%|pp|points?|AUUC|Qini|AUC|F1)\b", text, flags=re.IGNORECASE):
+        return True
+    if r"\pm" in text:
+        return True
+    return bool(
+        re.search(r"\\(?:auto|c|C)?ref\{(?:tab|fig):", text)
+        or re.search(r"\b(?:Table|Figure|Fig\.|表|图)~?\\ref\{", text)
+    )
+def has_generic_comparator_without_anchor(text: str) -> bool:
+    generic_comparator = contains_any(
+        text,
+        (
+            "previous methods",
+            "prior methods",
+            "existing methods",
+            "several baselines",
+            "the baselines",
+            "baseline suite",
+            "previous work",
+            "prior work",
+            "现有方法",
+            "已有方法",
+            "若干基线",
+            "基线集合",
+        ),
+    )
+    if not generic_comparator:
+        return False
+    if r"\cite{" in text or r"\citet{" in text or r"\citep{" in text:
+        return False
+    return not bool(re.search(r"\b[A-Z][A-Za-z0-9-]{2,}(?:\s*,\s*[A-Z][A-Za-z0-9-]{2,})+", text))
+def has_repeated_split_protocol(text: str) -> bool:
+    return bool(
+        re.search(r"\b\d+\s+(?:random\s+)?(?:splits|seeds|runs)\b", text, flags=re.IGNORECASE)
+        or re.search(r"\bacross\s+(?:random\s+)?(?:splits|seeds|runs)\b", text, flags=re.IGNORECASE)
+        or re.search(r"\b重复\s*\d+\s*次", text)
+    )
+def has_variance_report(text: str) -> bool:
+    return contains_any(
+        text,
+        (
+            r"\pm",
+            "standard deviation",
+            "std",
+            "confidence interval",
+            "confidence intervals",
+            "ci",
+            "variance",
+            "mean",
+            "平均",
+            "标准差",
+            "置信区间",
+            "方差",
+        ),
+    )
 def check_experiments(text: str, issues: list[str]):
     if not contains_any(
         text,
@@ -523,6 +625,18 @@ def check_experiments(text: str, issues: list[str]):
         ),
     ):
         issues.append("experiments should include benchmark scene notes")
+    if has_performance_claim(text) and not has_numeric_or_table_evidence(text):
+        issues.append(
+            "experiment performance claims should tie to concrete metric or numeric evidence instead of prose-only claims"
+        )
+    if has_generic_comparator_without_anchor(text):
+        issues.append(
+            "experiments use generic comparator names; name the comparator family, table anchor, or citations before more polish"
+        )
+    if has_repeated_split_protocol(text) and not has_variance_report(text):
+        issues.append(
+            "repeated split or seed protocol should report variance, confidence intervals, or an explicit variance disposition"
+        )
 def check_conclusion(text: str, issues: list[str]):
@@ -550,25 +664,33 @@ def main():
         return 1
     text = read_text(section_path)
-    issues: list[str] = []
-    check_common_section_gate_risks(text, issues)
-    check_write_rule_preflight(section_path, issues)
-    check_paper_topology_targeting(section_path, issues)
-    check_workflow_language_targeting(section_path, issues)
-    check_section_style_policy(text, args.section, issues)
-    SECTION_CHECKS[args.section](text, issues)
-    check_neighbor_asset_files(args.section, section_path, issues)
-    if not issues:
+    blocking_issues: list[str] = []
+    warning_issues: list[str] = []
+    check_write_rule_preflight(section_path, blocking_issues)
+    check_active_paper_topology(section_path, blocking_issues)
+    check_paper_topology_targeting(section_path, blocking_issues)
+    check_workflow_language_targeting(section_path, blocking_issues)
+    check_common_section_gate_risks(text, warning_issues)
+    check_section_style_policy(text, args.section, warning_issues)
+    SECTION_CHECKS[args.section](text, warning_issues)
+    check_neighbor_asset_files(args.section, section_path, warning_issues)
+    if not blocking_issues and not warning_issues:
         print("section draft is valid")
         return 0
     if args.mode == "draft":
-        for issue in issues:
+        if blocking_issues:
+            for issue in blocking_issues:
+                print(issue, file=sys.stderr)
+            for issue in warning_issues:
+                print(f"WARNING: {issue}")
+            return 1
+        for issue in warning_issues:
             print(f"WARNING: {issue}")
         return 0
-    for issue in issues:
+    for issue in [*blocking_issues, *warning_issues]:
         print(issue, file=sys.stderr)
     return 1

package/package-assets/shared/lab/.managed/templates/iteration-report.md CHANGED Viewed

@@ -8,6 +8,7 @@
 - Resolved stage:
 - Resolved mode:
 - Resolved target:
+- Preflight stamp:
 - Override reason, if any:
 ## Round

package/package-assets/shared/lab/.managed/templates/reference-template-intake.md ADDED Viewed

@@ -0,0 +1,40 @@
+# Reference Template Intake
+## Purpose
+- Help `/lab:write` reproduce mature paper structure from multiple reference templates.
+- Extract section slots, paragraph roles, and table/figure functions.
+- Reuse structure and logic only; do not copy wording, claims, metrics, or conclusions.
+## Sources
+- Source paths or URLs:
+- Extraction command:
+- Output root:
+## Section Templates
+- Abstract:
+- Introduction:
+- Related work:
+- Method:
+- Experiments:
+- Conclusion:
+## Visual/Table Templates
+- Main result tables:
+- Ablation tables:
+- Dataset/protocol tables:
+- Method overview figures:
+- Result or trade-off figures:
+- Analysis/sensitivity figures:
+## Write Handoff
+- Aggregate template playbook:
+- Section template selected for this round:
+- Visual/table template selected for this round:
+- Multi-template reproduction plan:
+- Current-paper evidence that will fill the template:
+- Structure-only reuse boundary:

package/package-assets/shared/lab/.managed/templates/write-iteration.md CHANGED Viewed

@@ -8,6 +8,7 @@
 - Resolved stage:
 - Resolved mode:
 - Resolved target:
+- Preflight stamp:
 - Override reason, if any:
 ## Round
@@ -66,6 +67,33 @@
 - Any discouraged move kept and why:
 - Any banned move found:
+## Review Issue Bundle
+- Issue bundle path:
+- New issues:
+- Resolved issues:
+- Open issues:
+- Quote-backed findings recorded:
+- Script-backed findings separated from judgment-backed findings:
+## Re-Audit Status
+- Previous issue bundle compared:
+- Fully addressed root causes:
+- Partially addressed root causes:
+- Not addressed root causes:
+- New root causes:
+- Which root-cause issues block further prose polish:
+## Reference Template Intake
+- Reference sources used:
+- Aggregate template playbook:
+- Section templates consulted:
+- Visual/table templates consulted:
+- Multi-template reproduction plan:
+- Structure-only reuse boundary:
 ## Table Semantics
 - Metrics promised in Method:

package/package-assets/shared/lab/context/auto-status.md CHANGED Viewed

@@ -8,6 +8,7 @@
 - Resolved stage:
 - Resolved mode:
 - Resolved target:
+- Preflight stamp:
 - Override reason, if any:
 ## Runtime State

package/package-assets/shared/skills/lab/SKILL.md CHANGED Viewed

@@ -37,7 +37,9 @@ Use this skill when the user invokes `/lab:*` or asks for the structured researc
   - `Resolved stage`
   - `Resolved mode`
   - `Resolved target`
+  - `Preflight stamp`
   - `Override reason, if any`
+- Generate the `Rule Preflight` block from `.lab/.managed/rule-manifest.json` with the managed preflight renderer instead of handwriting it from memory.
 - Treat missing, stale, or contradictory `Rule Preflight` data as a stage-contract failure.
 - Project-installed rules take priority over model memory. If remembered patterns conflict with the installed rule source, follow the installed source recorded in `.lab/.managed/rule-manifest.json`.
 - Final paper output should default to LaTeX, and its manuscript language should be decided separately from the workflow language.

package/package-assets/shared/skills/lab/stages/auto.md CHANGED Viewed

@@ -29,7 +29,7 @@
 ## Rule Preflight
 - Read `.lab/.managed/rule-manifest.json` before arming auto mode.
-- The visible `Auto preflight` summary must also record the installed rule source file, rule source revision, project version, resolved stage, resolved mode, resolved target, and any override reason.
+- The visible `Auto preflight` summary must also record the installed rule source file, rule source revision, project version, resolved stage, resolved mode, resolved target, a machine-generated preflight stamp, and any override reason.
 - Keep the same `Rule Preflight` fields in `.lab/context/auto-status.md` while the campaign is live.
 - If the installed auto rule and the current campaign behavior disagree, stop and fix the contract or record a valid override reason before launching the loop.

package/package-assets/shared/skills/lab/stages/write.md CHANGED Viewed

@@ -33,9 +33,10 @@
 ## Rule Preflight
 - Read `.lab/.managed/rule-manifest.json` before drafting.
-- Record a `Rule Preflight` block in the write-iteration artifact before revising prose.
-- The `Rule Preflight` block must record the installed rule source file, rule source revision, project version, resolved stage, resolved mode, resolved target, and any override reason.
+- Write the `Rule Preflight` block with `.lab/.managed/scripts/render_rule_preflight.py` before revising prose; do not hand-fill it from memory.
+- The `Rule Preflight` block must record the installed rule source file, rule source revision, project version, resolved stage, resolved mode, resolved target, a machine-generated preflight stamp, and any override reason.
 - If the installed write rule and the current round behavior disagree, fix the targeting or record a valid override reason before further editing.
+- In draft mode, rule-preflight mismatches and paper-topology mismatches are blockers, not polish warnings.
 ## Context Write Set
@@ -112,6 +113,13 @@ Run these on every round:
 - Load only the current section guide. Do not load every section guide at once.
 - Reuse example-bank structure, paragraph roles, sentence logic, and paper-facing LaTeX asset patterns when examples are bundled, but never copy wording verbatim.
 - Treat example cites and example file names as writing references, not as evidence for the current paper.
+- When the user provides local PDFs, PDF URLs, HTML pages, or reference papers while invoking `/lab:write`, run `.lab/.managed/scripts/extract_reference_paper_structure.py --output-dir .lab/writing/reference-patterns <sources...>` before drafting unless an up-to-date `.lab/writing/reference-patterns/aggregate-template-playbook.md` already covers those exact sources.
+- Treat reference-paper intake as an internal write capability, not a separate user command. The user should still only need `/lab:write`; do not ask them to learn another workflow.
+- The purpose of reference-paper intake is to help `/lab:write` reproduce mature multi-template writing structure: section slots, paragraph roles, argument sequence, table and figure functions, placement logic, and bridge sentences.
+- Use at least two compatible reference templates when available. If only one reference is available, mark it as a single-template pattern and avoid treating it as a universal standard.
+- For every reference table or figure, extract what reader question it answers, which section/subsection it supports, why it is placed there, what the prose before it should do, and what the prose after it should explain.
+- When drafting from reference templates, reproduce structure and logic only. Do not copy wording, claims, metrics, baselines, data, captions, or conclusions from reference papers.
+- Before drafting a section from reference templates, read `.lab/writing/reference-patterns/aggregate-template-playbook.md`, the matching file under `.lab/writing/reference-patterns/section-templates/`, and the matching visual/table template under `.lab/writing/reference-patterns/visual-templates/` when the section uses tables or figures.
 - Build a compact mini-outline before prose.
 - Academic readability standards are the same in `workflow_language` and `paper_language`; changing languages must not lower external-reader clarity.
 - If the current round introduces or revises key terms, abbreviations, metric names, mechanism names, or system labels, explain them at first mention by briefly stating what they are and why they matter here.
@@ -125,6 +133,10 @@ Run these on every round:
 - Before any additional tighten, compress, or polish pass on the same section, run a section-level acceptance gate first.
 - The section-level acceptance gate is passed only when canonical naming consistency, adjacent-section consistency, claim, metric, and ranking consistency with the current evidence, local clarity, local concision, and section-style compliance are all explicitly checked and no unresolved blocker remains.
 - If the current section still contains a banned expression or banned rhetorical move from `section-style-policies.md`, the round has not passed the section-level acceptance gate.
+- If reviewer notes, validator warnings, or prior write rounds produced issues, record them as a review issue bundle in the write-iteration artifact before further polishing.
+- Review issue bundles should separate script-backed findings from judgment-backed findings, preserve the source quote or local pointer when available, and track whether each issue is new, resolved, open, or superseded.
+- Before continuing prose polish after a review issue bundle exists, run a re-audit pass that compares the current draft against previous root causes and records fully addressed, partially addressed, not addressed, and newly introduced root causes.
+- Do not answer a review issue by merely changing wording around it. Fix the underlying section structure, evidence support, terminology definition, or asset/table linkage that caused the issue.
 - If the current round changes the paper's canonical experiment or evaluation protocol (for example split ratio, train/test size, seed or split count, benchmark set, or main-table evaluation contract), treat it as a canonical protocol replacement unless the user explicitly scopes it as supplementary or appendix-only.
 - A canonical protocol replacement requires a paper-wide impact audit before more polishing: identify stale sections and assets across Abstract, Introduction, Method, Experiments, Conclusion, tables, figures, analysis assets, and `.lab/writing/plan.md`, then update the plan and highest-impact stale targets first.
 - When a paper-wide impact audit is still open, default the next write action to the highest-impact canonical stale section or asset instead of polishing the same section again.
@@ -156,12 +168,13 @@ Run these on every round:
   - record what each figure or analysis asset should show and why the reader needs it
   - record which citation anchors must appear in the section and why each anchor matters
 - Before drafting `introduction`, `method`, `experiments`, `related work`, or `conclusion`, run `.lab/.managed/scripts/validate_paper_plan.py --paper-plan .lab/writing/plan.md`.
-- Before drafting `introduction`, `method`, `experiments`, `related work`, or `conclusion`, also run `.lab/.managed/scripts/validate_paper_topology.py --project-root .` so plan/context files cannot keep presenting legacy layers as the active paper topology.
+- Before drafting `introduction`, `method`, `experiments`, `related work`, or `conclusion`, also run `.lab/.managed/scripts/validate_paper_topology.py --project-root .` so plan/context files cannot keep presenting legacy layers as the active paper topology. If that validator fails, do not mark the topology as repaired and do not continue section polish.
 - When the repository workflow config is available, the paper-plan validator also checks that `.lab/writing/plan.md` stays in `workflow_language` instead of silently drifting into another language.
 - If the paper-plan validator fails, stop and fill `.lab/writing/plan.md` first instead of drafting prose.
 - During ordinary draft rounds, run `.lab/.managed/scripts/validate_section_draft.py --section <section> --section-file <section-file> --mode draft` and `.lab/.managed/scripts/validate_paper_claims.py --section-file <section-file> --mode draft` after revising the active section.
 - Treat draft-round output from the section and claim validators as warnings that must be recorded and addressed in the write-iteration artifact, not as immediate stop conditions.
 - If the active section already lives under a paper-layer `sections/` directory, the draft section validator should also warn when the neighboring required figure or analysis placeholder files are still missing from that same paper layer.
+- For experiment sections, treat prose-only performance claims, unnamed generic comparator phrases, repeated split/seed protocols without variance disposition, and result paragraphs without concrete metric/table anchors as section warnings that must be fixed before more prose-only polishing.
 - For each subsection, explicitly include motivation, design, and technical advantage when applicable.
 - Avoid a writing style that reads like incremental patching of a naive baseline.
 - Keep terminology stable across the full paper.
@@ -182,6 +195,9 @@ Run these on every round:
   - `<deliverables_root>/paper/analysis/analysis-asset.tex`
 - Table assets must use paper-facing LaTeX structure with `booktabs`, caption, label, and consistent precision.
 - Table assets must also include a local table note that explains row meaning, column meaning, metric definitions, comparison scope, and any important caveat.
+- Table assets must avoid vertical rules, `\hline`, and `\cline`; use `booktabs` rules and whitespace instead.
+- Table captions should appear before the tabular body so the table can be read top-down in manuscript order.
+- Numeric precision should be consistent within each metric column unless the table note explains a deliberate exception.
 - Table assets must not rely on aggressive width hacks by default; if width control is still needed after table redesign, document it locally and keep it readable.
 - Figure placeholders must explain what the final figure should show and why the reader needs it.
 - Core asset coverage for a paper-facing final draft should include a problem-setting or teaser figure, a method overview figure, a results overview figure, a main-results table, an ablation table, and one additional analysis asset.
@@ -199,6 +215,8 @@ Run these on every round:
 - When a round introduces or revises key terms, include a compact terminology note in the user-facing round summary and record the terminology-clarity self-check in the write-iteration artifact.
 - Record the section-level acceptance gate in the write-iteration artifact before recommending further tightening on the same section.
 - Record section-style policy compliance, any retained discouraged move, and any banned move found in the write-iteration artifact.
+- Record the review issue bundle and re-audit status in the write-iteration artifact whenever the round follows reviewer notes, validator warnings, or prior failed writing rounds.
+- Record the reference template intake in the write-iteration artifact whenever the round uses PDFs, URLs, or `.lab/writing/reference-patterns/` artifacts: sources used, aggregate playbook path, section templates consulted, visual/table templates consulted, multi-template reproduction plan, and structure-only reuse boundary.
 - Record the round target layer in the write-iteration artifact as `canonical manuscript`, `workflow-language paper layer`, or `both`.
 - If workflow-language was active and the round still targeted the canonical manuscript, record why canonical-only writing was acceptable in the write-iteration artifact.
 - If both layers were edited, record why the cross-language sync was required and whether it was explicitly requested by the user or required by final-draft/export finalization.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "superlab",
-  "version": "0.1.63",
+  "version": "0.1.65",
   "description": "Strict /lab research workflow installer for Codex and Claude",
   "keywords": [
     "codex",