superlab 0.1.79 → 0.1.80

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -43,6 +43,11 @@ def contains_any(text: str, needles: tuple[str, ...]) -> bool:
43
43
  return any(needle.lower() in lowered for needle in needles)
44
44
 
45
45
 
46
+ def count_phrase_hits(text: str, needles: tuple[str, ...]) -> int:
47
+ lowered = text.lower()
48
+ return sum(lowered.count(needle.lower()) for needle in needles)
49
+
50
+
46
51
  def strip_latex_commands(text: str) -> str:
47
52
  text = re.sub(r"%.*", " ", text)
48
53
  text = re.sub(r"\\[A-Za-z@*]+(?:\[[^\]]*\])?", " ", text)
@@ -229,6 +234,51 @@ WORKFLOW_ONLY_MANUSCRIPT_PHRASES = (
229
234
  "工作流语言",
230
235
  "同步到",
231
236
  )
237
+ OVER_DEFENSIVE_BOUNDARY_PHRASES = (
238
+ "not intended as",
239
+ "not intended to be",
240
+ "not meant as",
241
+ "not meant to be",
242
+ "not a general",
243
+ "not a deployable",
244
+ "not a production",
245
+ "should not be viewed as",
246
+ "should not be read as",
247
+ "only to show",
248
+ "only to illustrate",
249
+ "only to demonstrate",
250
+ "only used to show",
251
+ "serves only to show",
252
+ "仅用于说明",
253
+ "仅用于展示",
254
+ "仅用于证明",
255
+ "只是为了说明",
256
+ "不应被视为",
257
+ "不应视为",
258
+ "不是通用",
259
+ "不是可部署",
260
+ "不是真实场景",
261
+ "不作为",
262
+ )
263
+ RESULT_LOG_SIGNAL_PHRASES = (
264
+ "auuc",
265
+ "qini",
266
+ "auc",
267
+ "accuracy",
268
+ "f1",
269
+ "score",
270
+ "scores",
271
+ "point gain",
272
+ "point gains",
273
+ "points",
274
+ "baseline",
275
+ "baselines",
276
+ "提升",
277
+ "百分点",
278
+ "得分",
279
+ "分差",
280
+ "基线",
281
+ )
232
282
  INTERNAL_EXPERIMENT_PROVENANCE_PHRASES = (
233
283
  "tuning run",
234
284
  "tuning runs",
@@ -351,6 +401,14 @@ def has_diagnostic_interpretation(text: str) -> bool:
351
401
  )
352
402
 
353
403
 
404
+ def has_result_log_numeric_dump(text: str, min_numbers: int = 8, min_signal_hits: int = 2) -> bool:
405
+ prose_text = strip_latex_commands(text)
406
+ numbers = re.findall(r"\b\d+(?:\.\d+)?%?\b", prose_text)
407
+ if len(numbers) < min_numbers:
408
+ return False
409
+ return count_phrase_hits(prose_text, RESULT_LOG_SIGNAL_PHRASES) >= min_signal_hits
410
+
411
+
354
412
  def check_common_section_gate_risks(text: str, issues: list[str]):
355
413
  prose_text = strip_latex_commands(text)
356
414
  if ISOLATED_INSIGHT_HEADING_PATTERN.search(text):
@@ -435,6 +493,36 @@ def check_common_section_gate_risks(text: str, issues: list[str]):
435
493
  )
436
494
 
437
495
 
496
+ def check_boundary_and_density_risks(section: str, text: str, issues: list[str]):
497
+ prose_text = strip_latex_commands(text)
498
+ boundary_hits = count_phrase_hits(prose_text, OVER_DEFENSIVE_BOUNDARY_PHRASES)
499
+ if section == "abstract" and boundary_hits >= 2:
500
+ issues.append(
501
+ "abstract section contains over-defensive boundary dumping; keep at most one brief boundary sentence and spend the abstract on problem, gap, method, and result"
502
+ )
503
+ elif section == "introduction" and boundary_hits >= 2:
504
+ issues.append(
505
+ "introduction section contains over-defensive boundary dumping; state the gap and mechanism first, then keep only one brief boundary if it is needed"
506
+ )
507
+ elif section == "method" and boundary_hits >= 2:
508
+ issues.append(
509
+ "method section contains over-defensive boundary dumping; explain what the method does and move repeated scaffold-defense language to one brief boundary sentence or the limitations section"
510
+ )
511
+ elif section == "experiments" and boundary_hits >= 2:
512
+ issues.append(
513
+ "experiments section contains over-defensive boundary dumping; report the attack or evaluation outcome directly and keep only the minimum boundary needed to scope the evidence"
514
+ )
515
+
516
+ if section == "introduction" and has_result_log_numeric_dump(text):
517
+ issues.append(
518
+ "introduction section contains result-log style numeric dumping; keep only the one or two numbers needed to motivate the contribution and move dense benchmark values to experiments"
519
+ )
520
+ elif section == "method" and has_result_log_numeric_dump(text, min_numbers=6, min_signal_hits=2):
521
+ issues.append(
522
+ "method section contains result-log style numeric dumping; keep method on mechanism and move dense numeric comparisons to experiments"
523
+ )
524
+
525
+
438
526
  def check_section_style_policy(text: str, section: str, issues: list[str]):
439
527
  prose_text = strip_latex_commands(text)
440
528
  for message, needles in SECTION_STYLE_WARNINGS.get(section, []):
@@ -484,6 +572,11 @@ def check_neighbor_asset_files(section: str, section_path: Path, issues: list[st
484
572
  issues.append(
485
573
  f"{asset_path.as_posix()} contains workflow-only placeholder language; move authoring notes out of captions and paper-facing asset text"
486
574
  )
575
+ boundary_hits = count_phrase_hits(asset_text, OVER_DEFENSIVE_BOUNDARY_PHRASES)
576
+ if boundary_hits >= 2:
577
+ issues.append(
578
+ f"{asset_path.as_posix()} contains over-defensive boundary dumping in a paper-facing asset; keep captions and local notes focused on what the figure or table shows"
579
+ )
487
580
 
488
581
 
489
582
  def check_paper_topology_targeting(section_path: Path, issues: list[str]):
@@ -774,6 +867,7 @@ def main():
774
867
  check_paper_topology_targeting(section_path, blocking_issues)
775
868
  check_workflow_language_targeting(section_path, blocking_issues)
776
869
  check_common_section_gate_risks(text, warning_issues)
870
+ check_boundary_and_density_risks(args.section, text, warning_issues)
777
871
  check_section_style_policy(text, args.section, warning_issues)
778
872
  SECTION_CHECKS[args.section](text, warning_issues)
779
873
  check_neighbor_asset_files(args.section, section_path, warning_issues)
@@ -26,11 +26,14 @@ These are paper-facing defaults. They are not project-specific branding rules.
26
26
  - Long setup of benchmark details.
27
27
  - Contribution lists that read like the introduction.
28
28
  - Excessive future-work framing.
29
+ - More than one defensive boundary sentence.
30
+ - Internal scaffold names before the reader-facing concept is clear.
29
31
 
30
32
  **Banned expressions / moves**
31
33
  - Roadmap prose such as "In this paper, we first..., then..., finally...".
32
34
  - Reviewer-facing instructions such as "the reader can see" or "as shown clearly below".
33
35
  - Unbounded superiority claims such as "universally", "always", or "in every setting".
36
+ - Over-defensive boundary dumping that keeps explaining what the paper is not instead of what it shows.
34
37
  - Service-style or AI-assistant meta language such as "用户说", "按你的要求", "我来解释", "let me explain", or "as requested by the user".
35
38
  - Workflow-only placeholder language such as "图的意图", "资产意图", "占位符", "workflow-language", or "sync this wording".
36
39
  - Standalone insight headings such as "Our Insights" when the insight is not woven into the abstract's challenge and contribution arc.
@@ -51,12 +54,15 @@ These are paper-facing defaults. They are not project-specific branding rules.
51
54
  - Repeating "important" or "significant" without a concrete consequence.
52
55
  - Opening with generic field hype.
53
56
  - Listing contributions before the gap is clear.
57
+ - Result-log introductions that dump many benchmark values before the gap is clear.
58
+ - Repeating the same boundary defense that will already appear in Method, Experiments, or Limitations.
54
59
 
55
60
  **Banned expressions / moves**
56
61
  - Empty macro-importance claims such as "this problem is increasingly critical" with no concrete consequence.
57
62
  - Marketing-style first-claim language such as "revolutionary", "game-changing", or "unprecedented" without evidence.
58
63
  - Paragraphs that only praise the paper instead of stating the research gap.
59
64
  - Standalone "Our Insights" sections; the insight should be part of the motivation and gap logic.
65
+ - Over-defensive boundary dumping that explains what the system is not before the reader understands the problem, gap, and contribution.
60
66
  - Service-style or AI-assistant meta language such as "用户说", "按你的要求", "我来解释", "let me explain", or "as requested by the user".
61
67
  - Workflow-only placeholder language such as "图的意图", "资产意图", "占位符", "workflow-language", or "sync this wording".
62
68
 
@@ -98,12 +104,15 @@ These are paper-facing defaults. They are not project-specific branding rules.
98
104
  - Long implementation detail lists that belong in appendix or setup.
99
105
  - Repeating model names without explaining their role.
100
106
  - Overusing novelty language instead of mechanism explanation.
107
+ - Defensive paragraphs that keep explaining what the method is not instead of what mechanism it implements.
108
+ - Dense result numbers that belong in Experiments rather than Method.
101
109
 
102
110
  **Banned expressions / moves**
103
111
  - Marketing-style or self-promotional wording such as "elegant", "powerful", "dramatically stronger", or "significantly outperforms prior methods" when used as prose decoration rather than evidence-backed result reporting.
104
112
  - Explaining the method by saying it is "better", "stronger", or "more advanced" without saying how it works.
105
113
  - Method subsections that read like API documentation without explaining which mechanism or insight requires the design.
106
114
  - Introducing new narrative aliases for canonical model or ablation labels after they have already been locked.
115
+ - Over-defensive boundary dumping that keeps justifying what the method is not or where it is not deployable instead of explaining how the mechanism works.
107
116
  - Service-style or AI-assistant meta language such as "用户说", "按你的要求", "我来解释", "let me explain", or "as requested by the user".
108
117
  - Workflow-only placeholder language such as "图的意图", "资产意图", "占位符", "workflow-language", or "sync this wording".
109
118
 
@@ -124,6 +133,8 @@ These are paper-facing defaults. They are not project-specific branding rules.
124
133
  - Re-explaining the same metric in every paragraph.
125
134
  - Paragraphs that only restate the table without synthesis.
126
135
  - Result paragraphs that say only "higher/lower/better" without explaining what the pattern teaches.
136
+ - Repeating the same boundary defense after every table or figure.
137
+ - Captions or table notes that carry more defense than result interpretation.
127
138
 
128
139
  **Banned expressions / moves**
129
140
  - Meta-reader guidance such as "这样读者可以……", "the reader can first...", or "this table lets the reader...".
@@ -131,6 +142,7 @@ These are paper-facing defaults. They are not project-specific branding rules.
131
142
  - Layout-process commentary in scientific prose, such as "由于表列较多,这里采用页宽自适应排版" or "we use page-width adaptive layout here".
132
143
  - Claims that a table "proves" something when the evidence only supports a bounded empirical result.
133
144
  - Internal experiment-planning prose, such as "还需要新增 holdout", "小批量门控", "冻结 payload", "不能边跑边调", "API 规模估计", or "if all scores are 1.0000, treat it as overfitting".
145
+ - Over-defensive boundary dumping that spends more sentences disclaiming the setup than reporting what was attacked, measured, recovered, or bounded.
134
146
  - Service-style or AI-assistant meta language such as "用户说", "按你的要求", "我来解释", "let me explain", or "as requested by the user".
135
147
  - Workflow-only placeholder language such as "图的意图", "资产意图", "占位符", "workflow-language", or "sync this wording".
136
148
 
@@ -154,6 +154,7 @@ Do not enter prose polish until the current section has passed the reference-con
154
154
  - Build a compact mini-outline before prose.
155
155
  - When reference-guided deep-write is triggered, build the reference consumption plan before the mini-outline so the outline is based on mapped section slots rather than generic prose flow.
156
156
  - Academic readability standards are the same in `workflow_language` and `paper_language`; changing languages must not lower external-reader clarity.
157
+ - Prefer concept first, implementation label second. If an internal scaffold, expert, oracle, parser, or module nickname is not central to the reader's understanding, state the reader-facing concept first and introduce the implementation label only if later reuse justifies it.
157
158
  - If the current round introduces or revises key terms, abbreviations, metric names, mechanism names, or system labels, explain them at first mention by briefly stating what they are and why they matter here.
158
159
  - First mention should use the full form. If a short form or acronym will be reused later, define it at first mention as `Full Form (Short Form)` before switching to the short form.
159
160
  - Apply the same first-mention rule to table headers, table captions, table notes, and figure captions or labels; if a term or abbreviation first appears in a table, expand it locally in that table.
@@ -169,6 +170,8 @@ Do not enter prose polish until the current section has passed the reference-con
169
170
  - In Experiments, interpret results diagnostically: say which part of the insight each result, ablation, robustness check, or failure case supports, weakens, or bounds. Do not only read numbers from a table.
170
171
  - In Conclusion, state the broader principle or action implication implied by the evidence, then state the boundary. Do not introduce a new insight there.
171
172
  - Avoid paper-facing headings such as `Our Insights` or `核心洞见`; if a heading is needed, use normal section roles such as motivation, analysis, ablation, or discussion and let the insight appear in the prose.
173
+ - Keep boundary statements sparse. One brief boundary sentence in Abstract, one brief scope sentence in Experiments, and one fuller limitation in Conclusion is the default pattern; do not repeat the same defense across sections unless the evidence scope genuinely changes.
174
+ - If a paragraph outside Experiments reads like a result log, cut it back to the one or two numbers needed for motivation and move the dense benchmark values to Experiments or tables.
172
175
  - Nontrivial section work must use three separated revision passes instead of one all-purpose rewrite:
173
176
  - Logic pass: check the paragraph role, claim chain, premise-to-conclusion transition, evidence dependency, and whether the section naturally follows from adjacent sections. Use `section-question-bank.md` to force explicit answers about section purpose. Do not polish wording in this pass.
174
177
  - Theory / field pass: after the logic pass is clean, check concept use, field terminology, metric definitions, citation anchors, and whether the chosen framework actually fits the claim. Run `argument-stress-test.md` here, including the weakest-link test, reverse-claim test, and strongest alternative explanation check. Do not treat fluent language as proof that the theory is right.
@@ -198,6 +201,7 @@ Do not enter prose polish until the current section has passed the reference-con
198
201
  - Keep run provenance such as tuning-run labels, probe names, internal config strings, rerun ids, and package labels out of reader-facing prose. If the evidence is useful, rewrite it as a bounded paper-facing diagnostic or move the raw provenance to workflow notes or appendix metadata.
199
202
  - Keep internal experiment planning out of reader-facing prose. Do not write paper sentences that explain future holdout expansion, small-batch gates, payload freezing, API budget, "if all scores are 1.0000 then treat as overfitting", or why a next automation round is needed.
200
203
  - When an experiment boundary matters, report only the scientific scope already supported by the evidence. Put the operational plan for collecting new attacks, new papers, new markers, or additional holdout cases into `.lab/changes/`, `.lab/iterations/`, or report artifacts, not into manuscript sections.
204
+ - Do not let Method or Experiments be dominated by negative-definition prose such as what the system is not, what it is not meant to replace, or where it is not deployable. After one clear scope sentence, return to mechanism or evidence.
201
205
  - Do not use unexplained terminology density as a substitute for academic tone.
202
206
  - Keep service-style or AI-assistant meta language out of manuscript prose. Phrases such as "用户说", "按你的要求", "我来解释", "下面我", "this version", or "as requested by the user" belong in workflow notes, not in paper-facing sections, captions, table notes, or analysis assets.
203
207
  - Keep workflow-only placeholder language out of manuscript prose. Phrases such as "图的意图", "资产意图", "占位符", "workflow-language", "translation layer", or "sync this wording" belong in authoring artifacts, not in reader-facing LaTeX.
@@ -257,6 +261,7 @@ Do not enter prose polish until the current section has passed the reference-con
257
261
  - Table assets must use paper-facing LaTeX structure with `booktabs`, caption, label, and consistent precision.
258
262
  - Table assets must also include a local table note that explains row meaning, column meaning, metric definitions, comparison scope, and any important caveat.
259
263
  - The local table note must contain real reader-facing explanations, not the default template phrases such as "explain what each row represents" or "expand local abbreviations".
264
+ - Captions and table notes should explain what the asset shows, not defend the workflow. Do not use captions to carry repeated boundary disclaimers that belong in section prose or Limitations.
260
265
  - Table assets must not rely on aggressive width hacks by default; if width control is still needed after table redesign, document it locally and keep it readable.
261
266
  - Table assets with seven or more columns should be split, moved partly to appendix, or written with width-aware columns such as `tabularx` or `p{...}` instead of a plain `tabular` layout.
262
267
  - Figure placeholders may record what the final figure should show and why the reader needs it in authoring comments, the paper plan, or the write-iteration artifact, but the caption itself must remain paper-facing and must not contain "Figure intent", "图的意图", "asset intent", "占位符", or similar workflow language.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "superlab",
3
- "version": "0.1.79",
3
+ "version": "0.1.80",
4
4
  "description": "Strict /lab research workflow installer for Codex and Claude",
5
5
  "keywords": [
6
6
  "codex",