npm - superlab - Versions diffs - 0.1.26 → 0.1.28 - Mend

superlab 0.1.26 → 0.1.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

package/package-assets/shared/lab/.managed/scripts/validate_section_draft.py ADDED Viewed

@@ -0,0 +1,181 @@
+#!/usr/bin/env python3
+import argparse
+import re
+import sys
+from pathlib import Path
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Validate section-specific paper-writing quality expectations."
+    )
+    parser.add_argument(
+        "--section",
+        required=True,
+        choices=["abstract", "introduction", "related-work", "method", "experiments", "conclusion"],
+        help="Section type being validated",
+    )
+    parser.add_argument("--section-file", required=True, help="Path to the section .tex file")
+    parser.add_argument(
+        "--mode",
+        required=True,
+        choices=["draft", "final"],
+        help="Draft rounds emit warnings; final rounds fail on missing section structure",
+    )
+    return parser.parse_args()
+def read_text(path: Path) -> str:
+    return path.read_text(encoding="utf-8")
+def contains_any(text: str, needles: tuple[str, ...]) -> bool:
+    lowered = text.lower()
+    return any(needle.lower() in lowered for needle in needles)
+def check_abstract(text: str, issues: list[str]):
+    numbers = re.findall(r"\b\d+(?:\.\d+)?\b", text)
+    if len(numbers) > 6:
+        issues.append("abstract has overly dense benchmark-number dumping")
+    if not contains_any(text, ("challenge", "gap", "however", "difficulty", "challenge", "难点", "挑战", "不足")):
+        issues.append("abstract should state the core challenge or gap")
+    if not contains_any(text, ("boundary", "bounded", "limitation", "however", "but", "局限", "边界", "限制")):
+        issues.append("abstract should include a bounded result or explicit limitation")
+def check_introduction(text: str, issues: list[str]):
+    has_problem_figure = contains_any(
+        text,
+        (
+            r"\input{figures/problem-setting}",
+            r"\begin{figure}",
+            r"\ref{fig:problem-setting}",
+        ),
+    )
+    if not has_problem_figure:
+        issues.append("introduction should consume a problem-setting figure")
+    if not contains_any(
+        text,
+        (
+            "contribution",
+            "contributions",
+            "贡献",
+            "我们的贡献",
+        ),
+    ):
+        issues.append("introduction should make contributions explicit")
+    if not contains_any(
+        text,
+        (
+            "however",
+            "prior work",
+            "existing methods",
+            "falls short",
+            "gap",
+            "limitation",
+            "然而",
+            "现有方法",
+            "不足",
+        ),
+    ):
+        issues.append("introduction should explain what is missing in prior work")
+def check_related_work(text: str, issues: list[str]):
+    if r"\cite{" not in text:
+        issues.append("related work should include citation-driven comparisons")
+def check_method(text: str, issues: list[str]):
+    has_method_figure = contains_any(
+        text,
+        (
+            r"\input{figures/method-overview}",
+            r"\begin{figure}",
+            r"\ref{fig:method-overview}",
+        ),
+    )
+    if not has_method_figure:
+        issues.append("method should consume a method-overview figure")
+    if not contains_any(text, ("motivation", "motivate", "动机")):
+        issues.append("method should explain module motivation")
+    if not contains_any(text, ("design", "we first", "we then", "设计")):
+        issues.append("method should explain the concrete design")
+    if not contains_any(text, ("advantage", "benefit", "improves", "优势", "收益")):
+        issues.append("method should explain the technical advantage")
+def check_experiments(text: str, issues: list[str]):
+    if not contains_any(
+        text,
+        (
+            r"\input{analysis/analysis-asset}",
+            r"\input{tables/analysis}",
+            r"\ref{fig:analysis",
+            r"\ref{tab:analysis",
+        ),
+    ):
+        issues.append("experiments should consume an analysis asset")
+    if not contains_any(
+        text,
+        (
+            "represents",
+            "corresponds to",
+            "setting",
+            "scenario",
+            "scene",
+            "task setting",
+            "benchmark family",
+            "场景",
+            "设定",
+            "任务",
+        ),
+    ):
+        issues.append("experiments should include benchmark scene notes")
+def check_conclusion(text: str, issues: list[str]):
+    if not contains_any(text, ("limitation", "limitations", "bounded", "局限", "限制", "边界")):
+        issues.append("conclusion should state at least one limitation or boundary")
+    if not contains_any(text, ("future work", "next step", "future direction", "下一步", "未来工作")):
+        issues.append("conclusion should state one next step or future direction")
+SECTION_CHECKS = {
+    "abstract": check_abstract,
+    "introduction": check_introduction,
+    "related-work": check_related_work,
+    "method": check_method,
+    "experiments": check_experiments,
+    "conclusion": check_conclusion,
+}
+def main():
+    args = parse_args()
+    section_path = Path(args.section_file)
+    if not section_path.exists():
+        print(f"section file does not exist: {section_path}", file=sys.stderr)
+        return 1
+    text = read_text(section_path)
+    issues: list[str] = []
+    SECTION_CHECKS[args.section](text, issues)
+    if not issues:
+        print("section draft is valid")
+        return 0
+    if args.mode == "draft":
+        for issue in issues:
+            print(f"WARNING: {issue}")
+        return 0
+    for issue in issues:
+        print(issue, file=sys.stderr)
+    return 1
+if __name__ == "__main__":
+    raise SystemExit(main())

package/package-assets/shared/lab/.managed/templates/idea.md CHANGED Viewed

@@ -1,9 +1,19 @@
 # Idea Artifact
+## Scenario
+- Real-world setting:
+- Who would care if this problem were solved:
 ## One-Sentence Problem
 State the problem in one short sentence.
+## Why It Matters
+- Why this matters in practice:
+- What breaks if we ignore it:
 ## Failure Case
 - Where current methods fail:
@@ -48,6 +58,32 @@ Suggested levels:
 - Shared assumption:
 - Why that assumption breaks here:
+## Literature Scoping Bundle
+- Default target source count:
+- Actual source count:
+- Closest prior bucket:
+- Recent strong papers:
+- Benchmark or evaluation papers:
+- Survey or taxonomy papers:
+- Adjacent-field papers:
+- If the total is below the default target, why:
+## Closest Prior Work Comparison
+- Prior work 1:
+  - Citation:
+  - What it does:
+  - Why it is relevant here:
+  - Limitation for the current problem:
+  - Difference from our direction:
+- Prior work 2:
+  - Citation:
+  - What it does:
+  - Why it is relevant here:
+  - Limitation for the current problem:
+  - Difference from our direction:
 ## Why Ours Is Different
 - Existing methods rely on:
@@ -55,6 +91,11 @@ Suggested levels:
 - Expected advantage:
 - Evidence needed to prove the advantage:
+## Rough Approach
+- Plain-language description of how this would work:
+- Why this design might resolve the failure case:
 ## Three Meaningful Points
 1. Significance:
@@ -67,6 +108,7 @@ Suggested levels:
 - Benchmark conventions:
 - Typical datasets:
 - Typical metrics:
+- Credible baseline shortlist:
 ## Candidate Approaches
@@ -90,6 +132,7 @@ Suggested levels:
 - Primary metric:
 - Secondary metrics:
 - Minimum viable experiment:
+- Fastest way to falsify the idea:
 ## Critique Summary

package/package-assets/shared/lab/.managed/templates/paper-plan.md CHANGED Viewed

@@ -24,6 +24,84 @@
 - Limitation sources:
 - Claims that still need more evidence:
+## Asset Coverage Targets
+- Core asset floor:
+- Required coverage categories:
+- Current planned core assets:
+- Coverage risks or gaps:
+## Table Plan
+- Main results table:
+  - Asset file:
+  - Section:
+  - Message:
+  - Evidence:
+  - Status:
+- Ablation table:
+  - Asset file:
+  - Section:
+  - Message:
+  - Evidence:
+  - Status:
+## Figure Plan
+- Problem setting or teaser figure:
+  - Asset file:
+  - Section:
+  - Figure intent:
+  - Evidence:
+  - Status:
+- Method overview figure:
+  - Asset file:
+  - Section:
+  - Figure intent:
+  - Evidence:
+  - Status:
+- Results overview figure:
+  - Asset file:
+  - Section:
+  - Figure intent:
+  - Evidence:
+  - Status:
+## Analysis Asset Plan
+- Analysis asset:
+  - Asset file:
+  - Asset type:
+  - Section:
+  - Asset intent:
+  - Evidence:
+  - Status:
+## Citation Plan
+- Background anchor:
+  - Citation or source anchor:
+  - Section:
+  - Why it matters:
+  - Status:
+- Closest prior work:
+  - Citation or source anchor:
+  - Section:
+  - Why it matters:
+  - Status:
+- Metric or benchmark source:
+  - Citation or source anchor:
+  - Section:
+  - Why it matters:
+  - Status:
+## Section-to-Asset Map
+- Introduction:
+- Method:
+- Experiments:
+- Related Work:
 ## Writing Order
 1. First section target:

package/package-assets/shared/lab/config/workflow.json CHANGED Viewed

@@ -7,5 +7,6 @@
   "deliverables_root": "docs/research",
   "paper_template_root": "",
   "paper_template_decision": "unconfirmed",
-  "paper_template_final_reminder_acknowledged": false
+  "paper_template_final_reminder_acknowledged": false,
+  "paper_language_finalization_decision": "unconfirmed"
 }

package/package-assets/shared/lab/context/auto-mode.md CHANGED Viewed

@@ -68,4 +68,4 @@ If `eval-protocol.md` declares structured rung entries, auto mode follows those
 - Stop conditions:
 - Escalation conditions:
-- Canonical promotion writeback: update `.lab/context/data-decisions.md`, `.lab/context/decisions.md`, `.lab/context/state.md`, and `.lab/context/session-brief.md`.
+- Canonical promotion writeback: update `.lab/context/data-decisions.md`, `.lab/context/decisions.md`, `.lab/context/state.md`, and `.lab/context/workflow-state.md`.

package/package-assets/shared/lab/context/next-action.md CHANGED Viewed

@@ -5,15 +5,15 @@
 - Action:
 - Success signal:
-## If Success
+## After Completion
 - Next action:
-## If Failure
+## If Blocked
 - Fallback action:
-## Human Decision Needed
+## Escalation
 - Question:
-- Why it blocks progress:
+- Escalate when:

package/package-assets/shared/lab/context/session-brief.md CHANGED Viewed

@@ -3,6 +3,8 @@
 ## Active Stage
 - Stage:
+- Current objective:
+- Immediate next action:
 ## Mission
@@ -11,10 +13,15 @@ One sentence describing the active research mission.
 ## Best Current Path
 - Approved direction:
-- Why this is the active path:
+- Strongest supported claim:
 - Auto mode:
 - Auto objective:
 - Auto decision:
+- Collaborator report mode:
+- Canonical context readiness:
+- Method name:
+- Primary metrics:
+- Secondary metrics:
 ## Main Risk

package/package-assets/shared/lab/context/summary.md CHANGED Viewed

@@ -1,12 +1,23 @@
 # Research Summary
 ## Current Direction
-Summarize the current approved research direction in 5-10 lines.
+- Mission:
+- Approved direction:
+- Active stage:
+- Current objective:
 - Auto mode:
 - Auto objective:
 - Auto decision:
+- Collaborator report mode:
+- Canonical context readiness:
+- Method name:
+- Contribution bullets:
+- Eval objective:
+- Primary metrics:
+- Secondary metrics:
+- Dataset package:
+- Benchmark role:
+- Comparison suite:
 ## Strongest Evidence

package/package-assets/shared/skills/lab/SKILL.md CHANGED Viewed

@@ -34,17 +34,22 @@ Use this skill when the user invokes `/lab:*` or asks for the structured researc
 ### `/lab:idea`
 - Search relevant literature, baselines, datasets, and evaluation metrics before proposing a plan.
+- Build a literature-scoping bundle before claiming novelty. The default target is 20 relevant sources unless the field is genuinely too narrow and that exception is written down.
 - Read `.lab/context/mission.md` and `.lab/context/open-questions.md` before drafting.
+- Read `.lab/config/workflow.json` before drafting and follow its `workflow_language` for idea artifacts.
 - Ask one clarifying question at a time when critical ambiguity remains.
-- State the problem, the failure case, and why the problem matters before proposing solutions.
+- State the scenario, the problem, the failure case, and why the problem matters before proposing solutions.
 - Classify the idea by contribution category and breakthrough level.
 - Compare against existing methods explicitly and state why the idea should be better.
+- Include a closest-prior-work comparison and a plain-language description of how the proposed direction would work.
 - Distinguish sourced evidence from generated innovation claims.
 - End with three meaningful points that are clear, short, and easy to scan.
 - Produce 2-3 candidate approaches with trade-offs before recommending one.
 - Critique the idea before converging on it.
+- Include a minimum viable experiment before approval.
 - Keep an explicit approval gate before `/lab:spec`.
 - Write idea artifacts with the template in `.lab/.managed/templates/idea.md`.
+- Run `.lab/.managed/scripts/validate_idea_artifact.py --idea <idea-artifact> --workflow-config .lab/config/workflow.json` before treating the idea as converged.
 - Update `.lab/context/mission.md`, `.lab/context/decisions.md`, and `.lab/context/open-questions.md` after convergence.
 - Do not leave `.lab/context/mission.md` as a template shell once the problem statement and approved direction are known.
 - Do not implement code in this stage.
@@ -91,9 +96,9 @@ Use this skill when the user invokes `/lab:*` or asks for the structured researc
 - Reuse `/lab:run`, `/lab:iterate`, `/lab:review`, `/lab:report`, and optional `/lab:write` instead of inventing a second workflow.
 - Do not automatically change the research mission, paper-facing framing, or core claims.
 - You may add exploratory datasets, benchmarks, and comparison methods inside the approved exploration envelope.
-- You may promote an exploratory addition to the primary package only after the promotion policy in `auto-mode.md` is satisfied and the promotion is written back into `.lab/context/data-decisions.md`, `.lab/context/decisions.md`, `.lab/context/state.md`, and `.lab/context/session-brief.md`.
+- You may promote an exploratory addition to the primary package only after the promotion policy in `auto-mode.md` is satisfied and the promotion is written back into `.lab/context/data-decisions.md`, `.lab/context/decisions.md`, `.lab/context/state.md`, and `.lab/context/workflow-state.md`.
 - Poll long-running commands until they complete, time out, or hit a stop condition.
-- Update `.lab/context/auto-status.md`, `.lab/context/state.md`, `.lab/context/workflow-state.md`, `.lab/context/decisions.md`, `.lab/context/data-decisions.md`, `.lab/context/evidence-index.md`, and `.lab/context/session-brief.md` as the campaign advances.
+- Update `.lab/context/auto-status.md`, `.lab/context/state.md`, `.lab/context/workflow-state.md`, `.lab/context/decisions.md`, `.lab/context/data-decisions.md`, and `.lab/context/evidence-index.md` as the campaign advances, then refresh the derived handoff files.
 - Keep an explicit approval gate when a proposed action would leave the frozen core defined by the auto-mode contract.
 ### `/lab:spec`
@@ -165,26 +170,21 @@ Use this skill when the user invokes `/lab:*` or asks for the structured researc
 - Start only after `report` artifacts are stable enough to support paper claims.
 - Start only after an approved framing artifact exists at `.lab/writing/framing.md`.
-- Read `.lab/config/workflow.json` before drafting and enforce its `paper_language` and `paper_format`.
-- If `paper_template_root` is empty and `paper_template_decision` is `unconfirmed`, ask once whether to continue with the managed default scaffold or attach a template directory first; persist the answer before drafting `.tex`.
-- If the project is still on the default scaffold at a final export or final-draft boundary and `paper_template_final_reminder_acknowledged` is `false`, ask one final reminder question before finalizing.
+- Read `.lab/config/workflow.json` before drafting and enforce its `workflow_language`, `paper_language`, and `paper_format`.
 - Read `.lab/context/mission.md`, `.lab/context/decisions.md`, `.lab/context/evidence-index.md`, and `.lab/context/data-decisions.md` before drafting.
 - Write one paper section or one explicit subproblem per round.
+- Ordinary manuscript drafting rounds should follow `workflow_language`.
+- If `workflow_language` and `paper_language` differ, the first final-draft or export round must ask once whether to keep the draft language or convert the final manuscript to `paper_language`, then persist that choice.
 - Bind each claim to evidence from `report`, iteration reports, or normalized summaries.
-- Write planning artifacts with `.lab/.managed/templates/paper-plan.md`, `.lab/.managed/templates/paper-section.md`, and `.lab/.managed/templates/write-iteration.md`.
-- Write final manuscript artifacts with `.lab/.managed/templates/paper.tex`, `.lab/.managed/templates/paper-section.tex`, `.lab/.managed/templates/paper-table.tex`, `.lab/.managed/templates/paper-figure.tex`, and `.lab/.managed/templates/paper-references.bib`.
-- Use the vendored paper-writing references under `skills/lab/references/paper-writing/`.
-- For any section with a bundled example bank, also use the vendored example-bank files under `skills/lab/references/paper-writing/examples/`.
-- Load only the current section guide, the matching examples index when one exists, 1-2 matching concrete example files, plus `paper-review.md` and `does-my-writing-flow-source.md`.
-- Build a compact mini-outline before prose.
-- Build the paper asset plan before prose when the section carries method or experiments claims.
+- Use the write-stage contract in `.codex/skills/lab/stages/write.md` or `.claude/skills/lab/stages/write.md` as the single source of truth for template choice, paper-plan requirements, section-specific references, validator calls, asset coverage, and final manuscript gates.
+- Use the vendored paper-writing references under `skills/lab/references/paper-writing/` and the matching example-bank files under `skills/lab/references/paper-writing/examples/`.
+- Treat `.lab/writing/plan.md` as the write-time source of truth for tables, figures, citations, and asset coverage.
+- Treat section-quality, claim-safety, and manuscript-delivery checks as the canonical acceptance gates for final-draft or export rounds.
 - For each subsection, explicitly cover motivation, design, and technical advantage when applicable.
 - Keep terminology stable across rounds and sections.
 - If a claim is not supported by evidence, weaken or remove it.
 - Treat tables, figures, citations, and bibliography as core manuscript content rather than optional polish.
 - Keep paper-facing LaTeX free of absolute local paths, rerun ids, shell transcripts, and internal workflow provenance.
-- Materialize real LaTeX tables and figure placeholders instead of leaving all evidence inside prose paragraphs.
-- Run `.lab/.managed/scripts/validate_manuscript_delivery.py --paper-dir <deliverables_root>/paper` before accepting a final-draft or export round.
 - Before finalizing a round, append and answer the five-dimension self-review checklist and revise unresolved items.
 - Apply paper-writing discipline without changing experimental truth.
 - If the evidence is insufficient, stop and route back to `review` or `iterate`.
@@ -199,7 +199,8 @@ Use this skill when the user invokes `/lab:*` or asks for the structured researc
 - No unconstrained auto mode. Every `/lab:auto` campaign must declare allowed stages, stop conditions, and a promotion policy in `.lab/context/auto-mode.md`.
 - No auto start without an explicit autonomy level and `Approval status: approved`.
 - No final report without validated normalized results.
-- No paper-writing round without stable report artifacts, an approved framing artifact, evidence links, LaTeX manuscript output, and a passing manuscript-delivery validation for final-draft or export rounds.
+- No paper-writing round without stable report artifacts, an approved framing artifact, evidence links, and LaTeX manuscript output.
+- No final-draft or export round without passing section-quality, claim-safety, and manuscript-delivery validation.
 ## References

package/package-assets/shared/skills/lab/references/paper-writing/examples/abstract/template-b.md CHANGED Viewed

@@ -16,7 +16,7 @@
 % Introduce the technical contribution that implements the insight in one to two sentences (usually mention the technical term/name only, without describing every detailed step. The term should be easy to understand and should not create a jump in reading. This ability is very important for writing a good abstract.)
 %% Example 1: To do this, we first present a label-efficient depth estimation framework using the internal representations of diffusion models. At the sampling phase, we utilize two guidance techniques to self-condition the generated image using the estimated depth map, the first of which uses pseudo-labeling, and the subsequent one uses a depth-domain diffusion prior.
-%% Example 2: To this end, we propose Neural Body, a new human body representation which assumes that the learned neural representations at different frames share the same set of latent codes anchored to a deformable mesh
+%% Example 2: To this end, we propose AnchorField, a structured representation in which different observations share the same set of latent codes anchored to a deformable support.
 % Introduce the benefits of technical novelty
 %% Example 2: so that the observations across frames can be naturally integrated. The deformable mesh also provides geometric guidance for the network to learn 3D representations more efficiently.
@@ -29,6 +29,6 @@
 1. `This paper addresses the challenge of novel view synthesis for a human performer from a very sparse set of camera views.`
 2. `... representation learning will be ill-posed if the views are highly sparse.`
 3. `To solve this ill-posed problem, our key idea is to integrate observations over video frames.`
-4. `To this end, we propose Neural Body ...`
+4. `To this end, we propose AnchorField ...`
 5. `... observations across frames can be naturally integrated ... provides geometric guidance ...`
 6. `Experiments show [main result].`

package/package-assets/shared/skills/lab/references/paper-writing/examples/conclusion/conservative-claim-boundary.md CHANGED Viewed

@@ -6,22 +6,22 @@ boundary explicit.
 ```tex
 \section{Conclusion}
-This paper shows that adding a structured ranking backbone together with a
-post-hoc calibration stage improves uplift ranking under the frozen benchmark
-protocol. Across the three benchmark families used in this work, the full model
+This paper shows that adding a structured intermediate module together with a
+lightweight adjustment stage improves performance under a fixed evaluation
+protocol. Across the benchmark families used in this work, the full model
 consistently matches or exceeds the strongest baselines and remains stronger
 than the key ablated variants. This makes the main claim narrower than a
-universal superiority claim but stronger than a single-dataset win.
+universal superiority claim but stronger than a single-setting win.
-We do not claim that the current method solves uplift modeling in every domain
-or that every design choice helps equally on every benchmark. In particular, the
-calibration stage appears beneficial on some datasets and neutral on others,
-which means its value should be interpreted as setting-dependent rather than as
-a guaranteed gain. That boundary is consistent with recent benchmarking
-practice, which argues for claim discipline and protocol-specific interpretation
-rather than broad overgeneralization~\cite{carlini2019evaluating}.
+We do not claim that the current method solves the broader problem in every
+domain or that every design choice helps equally on every benchmark. In
+particular, the adjustment stage appears beneficial in some settings and
+neutral in others, which means its value should be interpreted as
+setting-dependent rather than as a guaranteed gain. That boundary is consistent
+with recent benchmarking practice, which argues for claim discipline and
+protocol-specific interpretation rather than broad overgeneralization~\cite{carlini2019evaluating}.
 The most useful next step is to extend the evaluation to a broader set of
-benchmark slices and to test whether the same ranking-versus-calibration split
-remains useful when the label distribution shifts more aggressively.
+benchmark slices and to test whether the same backbone-versus-adjustment split
+remains useful when the data distribution shifts more aggressively.
 ```

package/package-assets/shared/skills/lab/references/paper-writing/examples/experiments/main-results-and-ablation-latex.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # Main Results and Ablation LaTeX Example
-This file is a complete paper-facing LaTeX example for the experiments section.
+This file is a complete manuscript-ready LaTeX example for the experiments section.
 Reuse the structure, caption logic, and prose-to-table linkage. Replace the
 placeholder methods, metrics, and values with the current project's evidence.
@@ -14,13 +14,13 @@ Source inspiration:
 ```tex
 \begin{table}[t]
-\caption{Main benchmark results under the frozen evaluation protocol. Higher is better on all metrics.}
+\caption{Main benchmark results under the fixed evaluation protocol. Higher is better on all metrics.}
 \label{tab:main-results}
 \centering
 \resizebox{0.92\linewidth}{!}{
 \begin{tabular}{lccc}
 \toprule
-Method & AUUC $\uparrow$ & Qini $\uparrow$ & Calibration Error $\downarrow$ \\
+Method & Primary Metric $\uparrow$ & Secondary Metric $\uparrow$ & Error Metric $\downarrow$ \\
 \midrule
 Strongest baseline & 0.1421 & 0.0873 & 0.0612 \\
 Closest prior work & 0.1488 & 0.0915 & 0.0544 \\
@@ -32,7 +32,7 @@ Ours & \textbf{0.1564} & \textbf{0.0987} & \textbf{0.0418} \\
 ```
 Table message:
-- `Does the proposed method beat the strongest baselines under the frozen protocol?`
+- `Does the proposed method beat the strongest baselines under the fixed evaluation protocol?`
 ## Ablation Table
@@ -47,7 +47,7 @@ Variant & AUUC $\uparrow$ \\
 \midrule
 Ours & \textbf{0.1564} \\
 w/o structure module & 0.1497 \\
-w/o calibration stage & 0.1510 \\
+w/o final adjustment stage & 0.1510 \\
 w/ shuffled auxiliary signal & 0.1458 \\
 \bottomrule
 \end{tabular}
@@ -65,19 +65,20 @@ Table message:
 Table~\ref{tab:main-results} answers the main ranking question: whether the full
 method remains stronger than the closest prior work and the strongest practical
-baseline under the frozen protocol. Our method achieves the best AUUC and Qini
-while also reducing calibration error, which means the gain is not coming from
-trading ranking quality against stability.
+baseline under the fixed evaluation protocol. Our method achieves the best
+primary and secondary metrics while also reducing the error metric, which means
+the gain is not coming from trading one objective against stability.
 Table~\ref{tab:ablations} then asks a narrower mechanism question. Removing the
-structure module causes the largest drop, so the main gain is tied to structured
-signal modeling rather than to a generic increase in capacity. Removing the
-calibration stage leads to a smaller but still visible drop, which supports the
-claim that calibration improves final ranking quality without being the sole
-driver of the result. The shuffled-signal variant acts as a negative control and
-shows that the gain does not survive when the auxiliary information is broken.
+structure module causes the largest drop, so the main gain is tied to explicit
+structure modeling rather than to a generic increase in capacity. Removing the
+final adjustment stage leads to a smaller but still visible drop, which
+supports the claim that the adjustment helps the exposed prediction without
+being the sole driver of the result. The shuffled-signal variant acts as a
+negative control and shows that the gain does not survive when the auxiliary
+information is broken.
-One caveat is that the calibration gain may remain neutral on some benchmarks,
-so the paper should not overclaim that every component helps equally on every
-dataset.
+One caveat is that the final adjustment gain may remain neutral in some
+settings, so the paper should not overclaim that every component helps equally
+on every dataset.
 ```

package/package-assets/shared/skills/lab/references/paper-writing/examples/experiments-examples.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # Experiments Example Patterns
-Use these examples when turning validated results into paper-facing LaTeX assets.
+Use these examples when turning validated results into manuscript-ready LaTeX assets.
 The referenced files contain complete LaTeX environments and section-level prose
 glue, not just checklists.