npm - superlab - Versions diffs - 0.1.79 → 0.1.80 - Mend

superlab 0.1.79 → 0.1.80

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package-assets/shared/lab/.managed/scripts/validate_section_draft.py CHANGED Viewed

@@ -43,6 +43,11 @@ def contains_any(text: str, needles: tuple[str, ...]) -> bool:
     return any(needle.lower() in lowered for needle in needles)
+def count_phrase_hits(text: str, needles: tuple[str, ...]) -> int:
+    lowered = text.lower()
+    return sum(lowered.count(needle.lower()) for needle in needles)
 def strip_latex_commands(text: str) -> str:
     text = re.sub(r"%.*", " ", text)
     text = re.sub(r"\\[A-Za-z@*]+(?:\[[^\]]*\])?", " ", text)
@@ -229,6 +234,51 @@ WORKFLOW_ONLY_MANUSCRIPT_PHRASES = (
     "工作流语言",
     "同步到",
 )
+OVER_DEFENSIVE_BOUNDARY_PHRASES = (
+    "not intended as",
+    "not intended to be",
+    "not meant as",
+    "not meant to be",
+    "not a general",
+    "not a deployable",
+    "not a production",
+    "should not be viewed as",
+    "should not be read as",
+    "only to show",
+    "only to illustrate",
+    "only to demonstrate",
+    "only used to show",
+    "serves only to show",
+    "仅用于说明",
+    "仅用于展示",
+    "仅用于证明",
+    "只是为了说明",
+    "不应被视为",
+    "不应视为",
+    "不是通用",
+    "不是可部署",
+    "不是真实场景",
+    "不作为",
+)
+RESULT_LOG_SIGNAL_PHRASES = (
+    "auuc",
+    "qini",
+    "auc",
+    "accuracy",
+    "f1",
+    "score",
+    "scores",
+    "point gain",
+    "point gains",
+    "points",
+    "baseline",
+    "baselines",
+    "提升",
+    "百分点",
+    "得分",
+    "分差",
+    "基线",
+)
 INTERNAL_EXPERIMENT_PROVENANCE_PHRASES = (
     "tuning run",
     "tuning runs",
@@ -351,6 +401,14 @@ def has_diagnostic_interpretation(text: str) -> bool:
     )
+def has_result_log_numeric_dump(text: str, min_numbers: int = 8, min_signal_hits: int = 2) -> bool:
+    prose_text = strip_latex_commands(text)
+    numbers = re.findall(r"\b\d+(?:\.\d+)?%?\b", prose_text)
+    if len(numbers) < min_numbers:
+        return False
+    return count_phrase_hits(prose_text, RESULT_LOG_SIGNAL_PHRASES) >= min_signal_hits
 def check_common_section_gate_risks(text: str, issues: list[str]):
     prose_text = strip_latex_commands(text)
     if ISOLATED_INSIGHT_HEADING_PATTERN.search(text):
@@ -435,6 +493,36 @@ def check_common_section_gate_risks(text: str, issues: list[str]):
         )
+def check_boundary_and_density_risks(section: str, text: str, issues: list[str]):
+    prose_text = strip_latex_commands(text)
+    boundary_hits = count_phrase_hits(prose_text, OVER_DEFENSIVE_BOUNDARY_PHRASES)
+    if section == "abstract" and boundary_hits >= 2:
+        issues.append(
+            "abstract section contains over-defensive boundary dumping; keep at most one brief boundary sentence and spend the abstract on problem, gap, method, and result"
+        )
+    elif section == "introduction" and boundary_hits >= 2:
+        issues.append(
+            "introduction section contains over-defensive boundary dumping; state the gap and mechanism first, then keep only one brief boundary if it is needed"
+        )
+    elif section == "method" and boundary_hits >= 2:
+        issues.append(
+            "method section contains over-defensive boundary dumping; explain what the method does and move repeated scaffold-defense language to one brief boundary sentence or the limitations section"
+        )
+    elif section == "experiments" and boundary_hits >= 2:
+        issues.append(
+            "experiments section contains over-defensive boundary dumping; report the attack or evaluation outcome directly and keep only the minimum boundary needed to scope the evidence"
+        )
+    if section == "introduction" and has_result_log_numeric_dump(text):
+        issues.append(
+            "introduction section contains result-log style numeric dumping; keep only the one or two numbers needed to motivate the contribution and move dense benchmark values to experiments"
+        )
+    elif section == "method" and has_result_log_numeric_dump(text, min_numbers=6, min_signal_hits=2):
+        issues.append(
+            "method section contains result-log style numeric dumping; keep method on mechanism and move dense numeric comparisons to experiments"
+        )
 def check_section_style_policy(text: str, section: str, issues: list[str]):
     prose_text = strip_latex_commands(text)
     for message, needles in SECTION_STYLE_WARNINGS.get(section, []):
@@ -484,6 +572,11 @@ def check_neighbor_asset_files(section: str, section_path: Path, issues: list[st
             issues.append(
                 f"{asset_path.as_posix()} contains workflow-only placeholder language; move authoring notes out of captions and paper-facing asset text"
             )
+        boundary_hits = count_phrase_hits(asset_text, OVER_DEFENSIVE_BOUNDARY_PHRASES)
+        if boundary_hits >= 2:
+            issues.append(
+                f"{asset_path.as_posix()} contains over-defensive boundary dumping in a paper-facing asset; keep captions and local notes focused on what the figure or table shows"
+            )
 def check_paper_topology_targeting(section_path: Path, issues: list[str]):
@@ -774,6 +867,7 @@ def main():
     check_paper_topology_targeting(section_path, blocking_issues)
     check_workflow_language_targeting(section_path, blocking_issues)
     check_common_section_gate_risks(text, warning_issues)
+    check_boundary_and_density_risks(args.section, text, warning_issues)
     check_section_style_policy(text, args.section, warning_issues)
     SECTION_CHECKS[args.section](text, warning_issues)
     check_neighbor_asset_files(args.section, section_path, warning_issues)

package/package-assets/shared/skills/lab/references/paper-writing/section-style-policies.md CHANGED Viewed

@@ -26,11 +26,14 @@ These are paper-facing defaults. They are not project-specific branding rules.
 - Long setup of benchmark details.
 - Contribution lists that read like the introduction.
 - Excessive future-work framing.
+- More than one defensive boundary sentence.
+- Internal scaffold names before the reader-facing concept is clear.
 **Banned expressions / moves**
 - Roadmap prose such as "In this paper, we first..., then..., finally...".
 - Reviewer-facing instructions such as "the reader can see" or "as shown clearly below".
 - Unbounded superiority claims such as "universally", "always", or "in every setting".
+- Over-defensive boundary dumping that keeps explaining what the paper is not instead of what it shows.
 - Service-style or AI-assistant meta language such as "用户说", "按你的要求", "我来解释", "let me explain", or "as requested by the user".
 - Workflow-only placeholder language such as "图的意图", "资产意图", "占位符", "workflow-language", or "sync this wording".
 - Standalone insight headings such as "Our Insights" when the insight is not woven into the abstract's challenge and contribution arc.
@@ -51,12 +54,15 @@ These are paper-facing defaults. They are not project-specific branding rules.
 - Repeating "important" or "significant" without a concrete consequence.
 - Opening with generic field hype.
 - Listing contributions before the gap is clear.
+- Result-log introductions that dump many benchmark values before the gap is clear.
+- Repeating the same boundary defense that will already appear in Method, Experiments, or Limitations.
 **Banned expressions / moves**
 - Empty macro-importance claims such as "this problem is increasingly critical" with no concrete consequence.
 - Marketing-style first-claim language such as "revolutionary", "game-changing", or "unprecedented" without evidence.
 - Paragraphs that only praise the paper instead of stating the research gap.
 - Standalone "Our Insights" sections; the insight should be part of the motivation and gap logic.
+- Over-defensive boundary dumping that explains what the system is not before the reader understands the problem, gap, and contribution.
 - Service-style or AI-assistant meta language such as "用户说", "按你的要求", "我来解释", "let me explain", or "as requested by the user".
 - Workflow-only placeholder language such as "图的意图", "资产意图", "占位符", "workflow-language", or "sync this wording".
@@ -98,12 +104,15 @@ These are paper-facing defaults. They are not project-specific branding rules.
 - Long implementation detail lists that belong in appendix or setup.
 - Repeating model names without explaining their role.
 - Overusing novelty language instead of mechanism explanation.
+- Defensive paragraphs that keep explaining what the method is not instead of what mechanism it implements.
+- Dense result numbers that belong in Experiments rather than Method.
 **Banned expressions / moves**
 - Marketing-style or self-promotional wording such as "elegant", "powerful", "dramatically stronger", or "significantly outperforms prior methods" when used as prose decoration rather than evidence-backed result reporting.
 - Explaining the method by saying it is "better", "stronger", or "more advanced" without saying how it works.
 - Method subsections that read like API documentation without explaining which mechanism or insight requires the design.
 - Introducing new narrative aliases for canonical model or ablation labels after they have already been locked.
+- Over-defensive boundary dumping that keeps justifying what the method is not or where it is not deployable instead of explaining how the mechanism works.
 - Service-style or AI-assistant meta language such as "用户说", "按你的要求", "我来解释", "let me explain", or "as requested by the user".
 - Workflow-only placeholder language such as "图的意图", "资产意图", "占位符", "workflow-language", or "sync this wording".
@@ -124,6 +133,8 @@ These are paper-facing defaults. They are not project-specific branding rules.
 - Re-explaining the same metric in every paragraph.
 - Paragraphs that only restate the table without synthesis.
 - Result paragraphs that say only "higher/lower/better" without explaining what the pattern teaches.
+- Repeating the same boundary defense after every table or figure.
+- Captions or table notes that carry more defense than result interpretation.
 **Banned expressions / moves**
 - Meta-reader guidance such as "这样读者可以……", "the reader can first...", or "this table lets the reader...".
@@ -131,6 +142,7 @@ These are paper-facing defaults. They are not project-specific branding rules.
 - Layout-process commentary in scientific prose, such as "由于表列较多，这里采用页宽自适应排版" or "we use page-width adaptive layout here".
 - Claims that a table "proves" something when the evidence only supports a bounded empirical result.
 - Internal experiment-planning prose, such as "还需要新增 holdout", "小批量门控", "冻结 payload", "不能边跑边调", "API 规模估计", or "if all scores are 1.0000, treat it as overfitting".
+- Over-defensive boundary dumping that spends more sentences disclaiming the setup than reporting what was attacked, measured, recovered, or bounded.
 - Service-style or AI-assistant meta language such as "用户说", "按你的要求", "我来解释", "let me explain", or "as requested by the user".
 - Workflow-only placeholder language such as "图的意图", "资产意图", "占位符", "workflow-language", or "sync this wording".

package/package-assets/shared/skills/lab/stages/write.md CHANGED Viewed

@@ -154,6 +154,7 @@ Do not enter prose polish until the current section has passed the reference-con
 - Build a compact mini-outline before prose.
 - When reference-guided deep-write is triggered, build the reference consumption plan before the mini-outline so the outline is based on mapped section slots rather than generic prose flow.
 - Academic readability standards are the same in `workflow_language` and `paper_language`; changing languages must not lower external-reader clarity.
+- Prefer concept first, implementation label second. If an internal scaffold, expert, oracle, parser, or module nickname is not central to the reader's understanding, state the reader-facing concept first and introduce the implementation label only if later reuse justifies it.
 - If the current round introduces or revises key terms, abbreviations, metric names, mechanism names, or system labels, explain them at first mention by briefly stating what they are and why they matter here.
 - First mention should use the full form. If a short form or acronym will be reused later, define it at first mention as `Full Form (Short Form)` before switching to the short form.
 - Apply the same first-mention rule to table headers, table captions, table notes, and figure captions or labels; if a term or abbreviation first appears in a table, expand it locally in that table.
@@ -169,6 +170,8 @@ Do not enter prose polish until the current section has passed the reference-con
 - In Experiments, interpret results diagnostically: say which part of the insight each result, ablation, robustness check, or failure case supports, weakens, or bounds. Do not only read numbers from a table.
 - In Conclusion, state the broader principle or action implication implied by the evidence, then state the boundary. Do not introduce a new insight there.
 - Avoid paper-facing headings such as `Our Insights` or `核心洞见`; if a heading is needed, use normal section roles such as motivation, analysis, ablation, or discussion and let the insight appear in the prose.
+- Keep boundary statements sparse. One brief boundary sentence in Abstract, one brief scope sentence in Experiments, and one fuller limitation in Conclusion is the default pattern; do not repeat the same defense across sections unless the evidence scope genuinely changes.
+- If a paragraph outside Experiments reads like a result log, cut it back to the one or two numbers needed for motivation and move the dense benchmark values to Experiments or tables.
 - Nontrivial section work must use three separated revision passes instead of one all-purpose rewrite:
   - Logic pass: check the paragraph role, claim chain, premise-to-conclusion transition, evidence dependency, and whether the section naturally follows from adjacent sections. Use `section-question-bank.md` to force explicit answers about section purpose. Do not polish wording in this pass.
   - Theory / field pass: after the logic pass is clean, check concept use, field terminology, metric definitions, citation anchors, and whether the chosen framework actually fits the claim. Run `argument-stress-test.md` here, including the weakest-link test, reverse-claim test, and strongest alternative explanation check. Do not treat fluent language as proof that the theory is right.
@@ -198,6 +201,7 @@ Do not enter prose polish until the current section has passed the reference-con
 - Keep run provenance such as tuning-run labels, probe names, internal config strings, rerun ids, and package labels out of reader-facing prose. If the evidence is useful, rewrite it as a bounded paper-facing diagnostic or move the raw provenance to workflow notes or appendix metadata.
 - Keep internal experiment planning out of reader-facing prose. Do not write paper sentences that explain future holdout expansion, small-batch gates, payload freezing, API budget, "if all scores are 1.0000 then treat as overfitting", or why a next automation round is needed.
 - When an experiment boundary matters, report only the scientific scope already supported by the evidence. Put the operational plan for collecting new attacks, new papers, new markers, or additional holdout cases into `.lab/changes/`, `.lab/iterations/`, or report artifacts, not into manuscript sections.
+- Do not let Method or Experiments be dominated by negative-definition prose such as what the system is not, what it is not meant to replace, or where it is not deployable. After one clear scope sentence, return to mechanism or evidence.
 - Do not use unexplained terminology density as a substitute for academic tone.
 - Keep service-style or AI-assistant meta language out of manuscript prose. Phrases such as "用户说", "按你的要求", "我来解释", "下面我", "this version", or "as requested by the user" belong in workflow notes, not in paper-facing sections, captions, table notes, or analysis assets.
 - Keep workflow-only placeholder language out of manuscript prose. Phrases such as "图的意图", "资产意图", "占位符", "workflow-language", "translation layer", or "sync this wording" belong in authoring artifacts, not in reader-facing LaTeX.
@@ -257,6 +261,7 @@ Do not enter prose polish until the current section has passed the reference-con
 - Table assets must use paper-facing LaTeX structure with `booktabs`, caption, label, and consistent precision.
 - Table assets must also include a local table note that explains row meaning, column meaning, metric definitions, comparison scope, and any important caveat.
 - The local table note must contain real reader-facing explanations, not the default template phrases such as "explain what each row represents" or "expand local abbreviations".
+- Captions and table notes should explain what the asset shows, not defend the workflow. Do not use captions to carry repeated boundary disclaimers that belong in section prose or Limitations.
 - Table assets must not rely on aggressive width hacks by default; if width control is still needed after table redesign, document it locally and keep it readable.
 - Table assets with seven or more columns should be split, moved partly to appendix, or written with width-aware columns such as `tabularx` or `p{...}` instead of a plain `tabular` layout.
 - Figure placeholders may record what the final figure should show and why the reader needs it in authoring comments, the paper plan, or the write-iteration artifact, but the caption itself must remain paper-facing and must not contain "Figure intent", "图的意图", "asset intent", "占位符", or similar workflow language.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "superlab",
-  "version": "0.1.79",
+  "version": "0.1.80",
   "description": "Strict /lab research workflow installer for Codex and Claude",
   "keywords": [
     "codex",