npm - superlab - Versions diffs - 0.1.66 → 0.1.68 - Mend

superlab 0.1.66 → 0.1.68

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/package-assets/shared/lab/.managed/scripts/validate_section_draft.py CHANGED Viewed

@@ -91,14 +91,6 @@ def has_meaningful_field_value(value: str) -> bool:
     return normalized not in {"", "-", "n/a", "na", "none", "no", "not applicable", "null", "false"}
-def latest_write_iteration(project_root: Path) -> Path | None:
-    iteration_dir = project_root / ".lab" / "writing" / "iterations"
-    if not iteration_dir.exists():
-        return None
-    iteration_files = sorted(iteration_dir.glob("*.md"))
-    return iteration_files[-1] if iteration_files else None
 SECTION_STYLE_WARNINGS = {
     "abstract": [
         (
@@ -206,13 +198,73 @@ SECTION_STYLE_WARNINGS = {
     ],
 }
+SERVICE_STYLE_PHRASES = (
+    "user asked",
+    "the user asked",
+    "as requested by the user",
+    "let me explain",
+    "i will explain",
+    "below i",
+    "用户说",
+    "用户要求",
+    "按你的要求",
+    "我来解释",
+    "我会说明",
+    "下面我",
+    "这版",
+    "已完成",
+    "已按",
+)
+WORKFLOW_ONLY_MANUSCRIPT_PHRASES = (
+    "figure intent",
+    "asset intent",
+    "placeholder",
+    "workflow-language",
+    "translation layer",
+    "review layer",
+    "图的意图",
+    "资产意图",
+    "占位符",
+    "工作流语言",
+    "同步到",
+)
+INTERNAL_EXPERIMENT_PROVENANCE_PHRASES = (
+    "tuning run",
+    "tuning runs",
+    "historical probe",
+    "rank-margin probe",
+    "rerun id",
+    "run id",
+    "实验包",
+    "历史 probe",
+    "调参运行",
+    "调参轮次",
+)
+INTERNAL_CONFIG_LABEL_PATTERN = re.compile(
+    r"\b[a-z]{1,4}\d+(?:[-_][a-z]?\d+(?:\.\d+)?){1,4}\b",
+    flags=re.IGNORECASE,
+)
 def check_common_section_gate_risks(text: str, issues: list[str]):
     prose_text = strip_latex_commands(text)
+    if contains_any(prose_text, SERVICE_STYLE_PHRASES):
+        issues.append(
+            "service-style or AI-assistant meta language appears in reader-facing prose; rewrite it as academic manuscript text"
+        )
+    if contains_any(prose_text, WORKFLOW_ONLY_MANUSCRIPT_PHRASES):
+        issues.append(
+            "workflow-only placeholder language appears in reader-facing prose; move authoring notes out of the manuscript"
+        )
     if re.search(r"\b[a-z0-9]+(?:_[a-z0-9]+)+\b", prose_text):
         issues.append(
             "reader-facing prose appears to contain internal identifier-like tokens; map them once for the reader and move them back out of prose before more polishing"
         )
+    if contains_any(prose_text, INTERNAL_EXPERIMENT_PROVENANCE_PHRASES) or INTERNAL_CONFIG_LABEL_PATTERN.search(prose_text):
+        issues.append(
+            "reader-facing prose appears to contain internal experiment provenance or tuning/config labels; move run provenance to workflow notes or map it to paper-facing diagnostic terminology"
+        )
     if contains_any(
         prose_text,
         (
@@ -310,6 +362,16 @@ def check_neighbor_asset_files(section: str, section_path: Path, issues: list[st
             issues.append(
                 f"{section} section is missing the required paper-layer asset file: {asset_path.as_posix()}"
             )
+            continue
+        asset_text = strip_latex_commands(read_text(asset_path))
+        if contains_any(asset_text, SERVICE_STYLE_PHRASES):
+            issues.append(
+                f"{asset_path.as_posix()} contains service-style or AI-assistant meta language; rewrite it as paper-facing asset text"
+            )
+        if contains_any(asset_text, WORKFLOW_ONLY_MANUSCRIPT_PHRASES):
+            issues.append(
+                f"{asset_path.as_posix()} contains workflow-only placeholder language; move authoring notes out of captions and paper-facing asset text"
+            )
 def check_paper_topology_targeting(section_path: Path, issues: list[str]):
@@ -440,32 +502,6 @@ def check_active_paper_topology(section_path: Path, issues: list[str]):
     issues.extend(validate_topology_artifacts(project_root))
-def check_reference_template_intake(section_path: Path, issues: list[str]):
-    project_root = find_project_root(section_path)
-    if project_root is None:
-        return
-    reference_root = project_root / ".lab" / "writing" / "reference-patterns"
-    aggregate_playbook = reference_root / "aggregate-template-playbook.md"
-    legacy_notes_root = project_root / ".lab" / "writing" / "pdf-structure-notes"
-    has_legacy_notes = legacy_notes_root.exists() and any(legacy_notes_root.glob("*"))
-    latest_iteration = latest_write_iteration(project_root)
-    reference_sources = ""
-    if latest_iteration:
-        iteration_text = read_text(latest_iteration)
-        reference_sources = extract_markdown_field(
-            iteration_text,
-            "Reference Template Intake",
-            "Reference sources used:",
-        )
-    if (has_legacy_notes or has_meaningful_field_value(reference_sources)) and not aggregate_playbook.exists():
-        issues.append(
-            "reference papers appear to be used without .lab/writing/reference-patterns/aggregate-template-playbook.md; run extract_reference_paper_structure.py and use structured section/visual templates instead of legacy pdf-structure-notes"
-        )
 def check_abstract(text: str, issues: list[str]):
     numbers = re.findall(r"\b\d+(?:\.\d+)?\b", text)
     if len(numbers) > 6:
@@ -538,100 +574,6 @@ def check_method(text: str, issues: list[str]):
         issues.append("method should explain the technical advantage")
-def has_performance_claim(text: str) -> bool:
-    return contains_any(
-        text,
-        (
-            "outperform",
-            "outperforms",
-            "improve",
-            "improves",
-            "improved",
-            "gain",
-            "gains",
-            "better",
-            "stronger",
-            "superior",
-            "state-of-the-art",
-            "sota",
-            "reduce",
-            "reduces",
-            "降低",
-            "提升",
-            "优于",
-            "超过",
-            "更好",
-            "增益",
-        ),
-    )
-def has_numeric_or_table_evidence(text: str) -> bool:
-    if re.search(r"\b\d+\.\d+\b", text):
-        return True
-    if re.search(r"\b\d+(?:\.\d+)?\s*(?:%|pp|points?|AUUC|Qini|AUC|F1)\b", text, flags=re.IGNORECASE):
-        return True
-    if r"\pm" in text:
-        return True
-    return bool(
-        re.search(r"\\(?:auto|c|C)?ref\{(?:tab|fig):", text)
-        or re.search(r"\b(?:Table|Figure|Fig\.|表|图)~?\\ref\{", text)
-    )
-def has_generic_comparator_without_anchor(text: str) -> bool:
-    generic_comparator = contains_any(
-        text,
-        (
-            "previous methods",
-            "prior methods",
-            "existing methods",
-            "several baselines",
-            "the baselines",
-            "baseline suite",
-            "previous work",
-            "prior work",
-            "现有方法",
-            "已有方法",
-            "若干基线",
-            "基线集合",
-        ),
-    )
-    if not generic_comparator:
-        return False
-    if r"\cite{" in text or r"\citet{" in text or r"\citep{" in text:
-        return False
-    return not bool(re.search(r"\b[A-Z][A-Za-z0-9-]{2,}(?:\s*,\s*[A-Z][A-Za-z0-9-]{2,})+", text))
-def has_repeated_split_protocol(text: str) -> bool:
-    return bool(
-        re.search(r"\b\d+\s+(?:random\s+)?(?:splits|seeds|runs)\b", text, flags=re.IGNORECASE)
-        or re.search(r"\bacross\s+(?:random\s+)?(?:splits|seeds|runs)\b", text, flags=re.IGNORECASE)
-        or re.search(r"\b重复\s*\d+\s*次", text)
-    )
-def has_variance_report(text: str) -> bool:
-    return contains_any(
-        text,
-        (
-            r"\pm",
-            "standard deviation",
-            "std",
-            "confidence interval",
-            "confidence intervals",
-            "ci",
-            "variance",
-            "mean",
-            "平均",
-            "标准差",
-            "置信区间",
-            "方差",
-        ),
-    )
 def check_experiments(text: str, issues: list[str]):
     if not contains_any(
         text,
@@ -659,18 +601,6 @@ def check_experiments(text: str, issues: list[str]):
         ),
     ):
         issues.append("experiments should include benchmark scene notes")
-    if has_performance_claim(text) and not has_numeric_or_table_evidence(text):
-        issues.append(
-            "experiment performance claims should tie to concrete metric or numeric evidence instead of prose-only claims"
-        )
-    if has_generic_comparator_without_anchor(text):
-        issues.append(
-            "experiments use generic comparator names; name the comparator family, table anchor, or citations before more polish"
-        )
-    if has_repeated_split_protocol(text) and not has_variance_report(text):
-        issues.append(
-            "repeated split or seed protocol should report variance, confidence intervals, or an explicit variance disposition"
-        )
 def check_conclusion(text: str, issues: list[str]):
@@ -706,7 +636,6 @@ def main():
     check_workflow_language_targeting(section_path, blocking_issues)
     check_common_section_gate_risks(text, warning_issues)
     check_section_style_policy(text, args.section, warning_issues)
-    check_reference_template_intake(section_path, warning_issues)
     SECTION_CHECKS[args.section](text, warning_issues)
     check_neighbor_asset_files(args.section, section_path, warning_issues)

package/package-assets/shared/lab/.managed/templates/paper-figure.tex CHANGED Viewed

@@ -1,6 +1,7 @@
 \begin{figure}[t]
 \centering
 \fbox{\rule{0pt}{1.2in}\rule{0.9\linewidth}{0pt}}
-\caption{Figure title. Figure intent: explain what this figure should show and why the reader needs it.}
+% Authoring note: record what this figure should show and why the reader needs it in the paper plan or write-iteration artifact, not in the caption.
+\caption{Figure title. Replace with a paper-facing caption that states the visual content and the supported claim.}
 \label{fig:placeholder}
 \end{figure}

package/package-assets/shared/lab/.managed/templates/paper-plan.md CHANGED Viewed

@@ -51,19 +51,19 @@
 - Problem setting or teaser figure:
   - Asset file:
   - Section:
-  - Figure intent:
+  - Figure role:
   - Evidence:
   - Status:
 - Method overview figure:
   - Asset file:
   - Section:
-  - Figure intent:
+  - Figure role:
   - Evidence:
   - Status:
 - Results overview figure:
   - Asset file:
   - Section:
-  - Figure intent:
+  - Figure role:
   - Evidence:
   - Status:
@@ -73,7 +73,7 @@
   - Asset file:
   - Asset type:
   - Section:
-  - Asset intent:
+  - Asset role:
   - Evidence:
   - Status:

package/package-assets/shared/lab/.managed/templates/reference-consumption-plan.md ADDED Viewed

@@ -0,0 +1,41 @@
+# Reference Consumption Plan
+## Sources
+- Source:
+## Adopted Structure Slots
+- slot_name -> target subsection / paragraph / asset
+## Rejected or Waived Slots
+- slot_name -> waiver reason
+## Section Mapping
+- slot_name -> current-paper section or subsection
+## Paragraph Role Mapping
+- paragraph_role -> current-paper paragraph
+- paragraph_role -> current-paper local bridge / transition sentence
+## Asset Mapping
+- asset_role -> current-paper figure/table/analysis asset
+## Section Realization Check
+- Adopted slots that are visibly realized in the current section:
+- Adopted slots that still need prose, subsection, paragraph, or asset support:
+- Reason any dense paragraph was kept instead of splitting into reader-facing anchors:
+## Reuse Boundary
+- Reuse structure, section order, paragraph roles, asset function, placement logic, and bridge logic only.
+- Do not copy wording, claims, metrics, captions, or conclusions from reference papers.
+## Validation
+- `validate_reference_consumption.py` result:

package/package-assets/shared/lab/.managed/templates/write-iteration.md CHANGED Viewed

@@ -67,33 +67,6 @@
 - Any discouraged move kept and why:
 - Any banned move found:
-## Review Issue Bundle
-- Issue bundle path:
-- New issues:
-- Resolved issues:
-- Open issues:
-- Quote-backed findings recorded:
-- Script-backed findings separated from judgment-backed findings:
-## Re-Audit Status
-- Previous issue bundle compared:
-- Fully addressed root causes:
-- Partially addressed root causes:
-- Not addressed root causes:
-- New root causes:
-- Which root-cause issues block further prose polish:
-## Reference Template Intake
-- Reference sources used:
-- Aggregate template playbook:
-- Section templates consulted:
-- Visual/table templates consulted:
-- Multi-template reproduction plan:
-- Structure-only reuse boundary:
 ## Table Semantics
 - Metrics promised in Method:
@@ -142,6 +115,23 @@
 - Was workflow-language paper layer included in the exported/pushed bundle:
 - If workflow-language was omitted, why was canonical-only export acceptable:
+## Reference Structure Consumption
+- Was reference-guided deep writing triggered:
+- Reference sources used:
+- Reference consumption plan path:
+- Were section/subsection slots mapped before prose:
+- Were paragraph roles mapped before prose:
+- Were table/figure roles mapped before prose:
+- Were adopted slots visibly realized in the section, not only in the plan:
+- Did any adopted experiment slot remain collapsed into an overly dense paragraph:
+- Which reference slots were adopted:
+- Which reference slots were waived and why:
+- Did the round avoid copying reference wording, claims, metrics, captions, or conclusions:
+- Did final prose avoid service-style or AI-assistant meta language:
+- Did final prose avoid workflow-only placeholder language:
+- Validator command and result:
 ## Decision
 - Continue or stop:

package/package-assets/shared/skills/lab/SKILL.md CHANGED Viewed

@@ -246,6 +246,10 @@ Use this skill when the user invokes `/lab:*` or asks for the structured researc
 - Keep one canonical natural-language paper-facing name per concept.
 - Once a paper-facing model or ablation label is chosen, reuse the canonical label instead of replacing it with a narrative alias in later prose, tables, or captions.
 - Before drafting or polishing, check the current section block in `skills/lab/references/paper-writing/section-style-policies.md` and follow its encouraged, discouraged, and banned expression lists.
+- When the user provides reference PDFs, paper URLs, local reference-paper paths, or asks to write by reference, stay within `/lab:write` but switch to reference-guided deep writing: extract structure, map section/subsection slots, paragraph roles, and table/figure roles to the current paper, record the mapping, and only then draft prose.
+- The reference-consumption plan is not sufficient by itself. The current section must visibly realize the adopted structure slots through subsection or paragraph anchors, table/figure placement, local bridges, and reader-facing prose.
+- Reference-guided writing may reuse structure, paragraph roles, asset placement, and bridge logic, but must not copy reference wording, claims, metrics, captions, or conclusions.
+- Keep service-style, AI-assistant meta language, and workflow-only placeholder language out of manuscript prose, captions, table notes, and paper-facing analysis assets.
 - Before any additional tighten, compress, or polish pass on the same section, run a section-level acceptance gate first.
 - The section-level acceptance gate must explicitly check canonical naming consistency, adjacent-section consistency, claim/metric/ranking consistency with evidence, local clarity, local concision, and section-style compliance.
 - If the current section still contains a banned expression or banned rhetorical move from the section-style policy, the round has not passed the section-level acceptance gate.
@@ -262,12 +266,13 @@ Use this skill when the user invokes `/lab:*` or asks for the structured researc
 - Main tables must be locally self-contained: a reader should be able to understand row meaning, column meaning, metric direction, and any relevant unit or denominator from the table title, table note, and adjacent prose without chasing the Method section.
 - If Method or Experiments prose promises a metric family, the main table set must either expose that metric family directly or mark the missing items as appendix-only and explain why.
 - Short table headers are allowed, but any abbreviation in a paper-facing table must be expanded locally in the same table.
+- Local table notes must be filled with real reader-facing explanations; default template text such as "explain what each row represents" or "expand local abbreviations" is still incomplete.
 - If a metric is measured but omitted because it is zero everywhere, redundant, or appendix-only, state that decision explicitly in the table note instead of silently dropping it.
 - Do not treat `\resizebox{\linewidth}{!}{...}` as the default main-table fit strategy.
 - Fit paper-facing main tables by redesign first: shorten headers, move secondary metrics out of the main table, reduce or split columns, then adjust `\tabcolsep` conservatively; only use `\resizebox` as a last resort and document why.
 - Keep `\tabcolsep` adjustments conservative and avoid shrinking below a roughly readable floor for paper-facing main tables.
 - Do not rely on `\scriptsize` or `\tiny` as the default way to make a main table fit.
-- Keep internal identifiers out of prose unless they are mapped once for the reader and then moved back out of prose.
+- Keep internal identifiers, tuning-run labels, probe names, config strings, rerun ids, and package labels out of prose unless they are mapped once for the reader and then moved back out of prose.
 - Do not rely on unexplained jargon density as a substitute for academic tone.
 - Bind each claim to evidence from `report`, iteration reports, or normalized summaries.
 - Use the write-stage contract in `.codex/skills/lab/stages/write.md` or `.claude/skills/lab/stages/write.md` as the single source of truth for template choice, paper-plan requirements, section-specific references, validator calls, asset coverage, and final manuscript gates.

package/package-assets/shared/skills/lab/references/paper-writing/examples/experiments/figure-placeholder-and-discussion.md CHANGED Viewed

@@ -10,9 +10,11 @@ attachment.
 \begin{figure}[t]
 \centering
 \fbox{\rule{0pt}{1.55in}\rule{0.92\linewidth}{0pt}}
-\caption{Method overview. Figure intent: show the full pipeline, highlight the
-boundary between the structured scoring module and the post-hoc calibration
-stage, and make the train-time versus inference-time data flow easy to inspect.}
+% Authoring note: the final visual should show the full pipeline, highlight the
+% module boundary, and make train-time versus inference-time data flow visible.
+\caption{Method overview of the proposed model. The diagram separates the
+structured scoring module from the post-hoc calibration stage and indicates the
+data flow used during training and inference.}
 \label{fig:method-overview}
 \end{figure}
 ```
@@ -23,9 +25,11 @@ stage, and make the train-time versus inference-time data flow easy to inspect.}
 \begin{figure}[t]
 \centering
 \fbox{\rule{0pt}{1.55in}\rule{0.92\linewidth}{0pt}}
-\caption{Benchmark-level results overview. Figure intent: summarize the trend
-across datasets, show error bars or confidence intervals, and reveal whether the
-main gain is stable or dominated by one benchmark.}
+% Authoring note: the final visual should summarize cross-dataset trends and
+% uncertainty without introducing a claim absent from the result tables.
+\caption{Benchmark-level results overview. The plot summarizes the primary
+metric across datasets and shows whether the reported gain is stable across the
+evaluated settings.}
 \label{fig:results-overview}
 \end{figure}
 ```

package/package-assets/shared/skills/lab/references/paper-writing/examples/experiments-examples.md CHANGED Viewed

@@ -8,7 +8,7 @@ glue, not just checklists.
 1. Main results should live in a real `table` environment.
 2. Ablations should live in a separate `table` environment.
-3. Method and experiments should each have at least one figure placeholder with an explicit `Figure intent`.
+3. Method and experiments should each have at least one figure placeholder with a reader-ready caption and any authoring notes kept outside the caption.
 4. Captions should explain the table or figure message briefly; longer interpretation belongs in prose.
 ## Example Files

package/package-assets/shared/skills/lab/references/paper-writing/section-style-policies.md CHANGED Viewed

@@ -30,6 +30,8 @@ These are paper-facing defaults. They are not project-specific branding rules.
 - Roadmap prose such as "In this paper, we first..., then..., finally...".
 - Reviewer-facing instructions such as "the reader can see" or "as shown clearly below".
 - Unbounded superiority claims such as "universally", "always", or "in every setting".
+- Service-style or AI-assistant meta language such as "用户说", "按你的要求", "我来解释", "let me explain", or "as requested by the user".
+- Workflow-only placeholder language such as "图的意图", "资产意图", "占位符", "workflow-language", or "sync this wording".
 ## Introduction
@@ -51,6 +53,8 @@ These are paper-facing defaults. They are not project-specific branding rules.
 - Empty macro-importance claims such as "this problem is increasingly critical" with no concrete consequence.
 - Marketing-style first-claim language such as "revolutionary", "game-changing", or "unprecedented" without evidence.
 - Paragraphs that only praise the paper instead of stating the research gap.
+- Service-style or AI-assistant meta language such as "用户说", "按你的要求", "我来解释", "let me explain", or "as requested by the user".
+- Workflow-only placeholder language such as "图的意图", "资产意图", "占位符", "workflow-language", or "sync this wording".
 ## Related Work
@@ -71,6 +75,8 @@ These are paper-facing defaults. They are not project-specific branding rules.
 - Laundry-list paragraphs that only say "X does..., Y does..., Z does..." with no comparison.
 - Claims that related work is weak or obsolete without specifying the missing capability.
 - Hiding the closest prior work behind broad category language.
+- Service-style or AI-assistant meta language such as "用户说", "按你的要求", "我来解释", "let me explain", or "as requested by the user".
+- Workflow-only placeholder language such as "图的意图", "资产意图", "占位符", "workflow-language", or "sync this wording".
 ## Method
@@ -92,6 +98,8 @@ These are paper-facing defaults. They are not project-specific branding rules.
 - Marketing-style or self-promotional wording such as "elegant", "powerful", "dramatically stronger", or "significantly outperforms prior methods" when used as prose decoration rather than evidence-backed result reporting.
 - Explaining the method by saying it is "better", "stronger", or "more advanced" without saying how it works.
 - Introducing new narrative aliases for canonical model or ablation labels after they have already been locked.
+- Service-style or AI-assistant meta language such as "用户说", "按你的要求", "我来解释", "let me explain", or "as requested by the user".
+- Workflow-only placeholder language such as "图的意图", "资产意图", "占位符", "workflow-language", or "sync this wording".
 ## Experiments
@@ -114,6 +122,8 @@ These are paper-facing defaults. They are not project-specific branding rules.
 - Self-evaluations such as "结果也很清楚", "the defense results are very clear", or "the table is self-explanatory".
 - Layout-process commentary in scientific prose, such as "由于表列较多，这里采用页宽自适应排版" or "we use page-width adaptive layout here".
 - Claims that a table "proves" something when the evidence only supports a bounded empirical result.
+- Service-style or AI-assistant meta language such as "用户说", "按你的要求", "我来解释", "let me explain", or "as requested by the user".
+- Workflow-only placeholder language such as "图的意图", "资产意图", "占位符", "workflow-language", or "sync this wording".
 ## Conclusion
@@ -134,3 +144,5 @@ These are paper-facing defaults. They are not project-specific branding rules.
 - Introducing new evidence, new experiments, or new mechanism claims.
 - Expanding the paper's scope beyond what the experiments support.
 - Ending with generic hype such as "this opens a new era" or "this will broadly transform the field".
+- Service-style or AI-assistant meta language such as "用户说", "按你的要求", "我来解释", "let me explain", or "as requested by the user".
+- Workflow-only placeholder language such as "图的意图", "资产意图", "占位符", "workflow-language", or "sync this wording".