superlab 0.1.78 → 0.1.80
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package-assets/shared/lab/.managed/scripts/validate_section_draft.py +94 -0
- package/package-assets/shared/lab/.managed/templates/rebuttal-panel.md +18 -6
- package/package-assets/shared/skills/lab/references/paper-writing/argument-stress-test.md +34 -0
- package/package-assets/shared/skills/lab/references/paper-writing/section-question-bank.md +59 -0
- package/package-assets/shared/skills/lab/references/paper-writing/section-style-policies.md +12 -0
- package/package-assets/shared/skills/lab/references/paper-writing/writing-anti-patterns.md +33 -0
- package/package-assets/shared/skills/lab/references/rebuttal-mode.md +22 -0
- package/package-assets/shared/skills/lab/stages/review.md +8 -4
- package/package-assets/shared/skills/lab/stages/write.md +11 -3
- package/package.json +1 -1
|
@@ -43,6 +43,11 @@ def contains_any(text: str, needles: tuple[str, ...]) -> bool:
|
|
|
43
43
|
return any(needle.lower() in lowered for needle in needles)
|
|
44
44
|
|
|
45
45
|
|
|
46
|
+
def count_phrase_hits(text: str, needles: tuple[str, ...]) -> int:
|
|
47
|
+
lowered = text.lower()
|
|
48
|
+
return sum(lowered.count(needle.lower()) for needle in needles)
|
|
49
|
+
|
|
50
|
+
|
|
46
51
|
def strip_latex_commands(text: str) -> str:
|
|
47
52
|
text = re.sub(r"%.*", " ", text)
|
|
48
53
|
text = re.sub(r"\\[A-Za-z@*]+(?:\[[^\]]*\])?", " ", text)
|
|
@@ -229,6 +234,51 @@ WORKFLOW_ONLY_MANUSCRIPT_PHRASES = (
|
|
|
229
234
|
"工作流语言",
|
|
230
235
|
"同步到",
|
|
231
236
|
)
|
|
237
|
+
OVER_DEFENSIVE_BOUNDARY_PHRASES = (
|
|
238
|
+
"not intended as",
|
|
239
|
+
"not intended to be",
|
|
240
|
+
"not meant as",
|
|
241
|
+
"not meant to be",
|
|
242
|
+
"not a general",
|
|
243
|
+
"not a deployable",
|
|
244
|
+
"not a production",
|
|
245
|
+
"should not be viewed as",
|
|
246
|
+
"should not be read as",
|
|
247
|
+
"only to show",
|
|
248
|
+
"only to illustrate",
|
|
249
|
+
"only to demonstrate",
|
|
250
|
+
"only used to show",
|
|
251
|
+
"serves only to show",
|
|
252
|
+
"仅用于说明",
|
|
253
|
+
"仅用于展示",
|
|
254
|
+
"仅用于证明",
|
|
255
|
+
"只是为了说明",
|
|
256
|
+
"不应被视为",
|
|
257
|
+
"不应视为",
|
|
258
|
+
"不是通用",
|
|
259
|
+
"不是可部署",
|
|
260
|
+
"不是真实场景",
|
|
261
|
+
"不作为",
|
|
262
|
+
)
|
|
263
|
+
RESULT_LOG_SIGNAL_PHRASES = (
|
|
264
|
+
"auuc",
|
|
265
|
+
"qini",
|
|
266
|
+
"auc",
|
|
267
|
+
"accuracy",
|
|
268
|
+
"f1",
|
|
269
|
+
"score",
|
|
270
|
+
"scores",
|
|
271
|
+
"point gain",
|
|
272
|
+
"point gains",
|
|
273
|
+
"points",
|
|
274
|
+
"baseline",
|
|
275
|
+
"baselines",
|
|
276
|
+
"提升",
|
|
277
|
+
"百分点",
|
|
278
|
+
"得分",
|
|
279
|
+
"分差",
|
|
280
|
+
"基线",
|
|
281
|
+
)
|
|
232
282
|
INTERNAL_EXPERIMENT_PROVENANCE_PHRASES = (
|
|
233
283
|
"tuning run",
|
|
234
284
|
"tuning runs",
|
|
@@ -351,6 +401,14 @@ def has_diagnostic_interpretation(text: str) -> bool:
|
|
|
351
401
|
)
|
|
352
402
|
|
|
353
403
|
|
|
404
|
+
def has_result_log_numeric_dump(text: str, min_numbers: int = 8, min_signal_hits: int = 2) -> bool:
|
|
405
|
+
prose_text = strip_latex_commands(text)
|
|
406
|
+
numbers = re.findall(r"\b\d+(?:\.\d+)?%?\b", prose_text)
|
|
407
|
+
if len(numbers) < min_numbers:
|
|
408
|
+
return False
|
|
409
|
+
return count_phrase_hits(prose_text, RESULT_LOG_SIGNAL_PHRASES) >= min_signal_hits
|
|
410
|
+
|
|
411
|
+
|
|
354
412
|
def check_common_section_gate_risks(text: str, issues: list[str]):
|
|
355
413
|
prose_text = strip_latex_commands(text)
|
|
356
414
|
if ISOLATED_INSIGHT_HEADING_PATTERN.search(text):
|
|
@@ -435,6 +493,36 @@ def check_common_section_gate_risks(text: str, issues: list[str]):
|
|
|
435
493
|
)
|
|
436
494
|
|
|
437
495
|
|
|
496
|
+
def check_boundary_and_density_risks(section: str, text: str, issues: list[str]):
|
|
497
|
+
prose_text = strip_latex_commands(text)
|
|
498
|
+
boundary_hits = count_phrase_hits(prose_text, OVER_DEFENSIVE_BOUNDARY_PHRASES)
|
|
499
|
+
if section == "abstract" and boundary_hits >= 2:
|
|
500
|
+
issues.append(
|
|
501
|
+
"abstract section contains over-defensive boundary dumping; keep at most one brief boundary sentence and spend the abstract on problem, gap, method, and result"
|
|
502
|
+
)
|
|
503
|
+
elif section == "introduction" and boundary_hits >= 2:
|
|
504
|
+
issues.append(
|
|
505
|
+
"introduction section contains over-defensive boundary dumping; state the gap and mechanism first, then keep only one brief boundary if it is needed"
|
|
506
|
+
)
|
|
507
|
+
elif section == "method" and boundary_hits >= 2:
|
|
508
|
+
issues.append(
|
|
509
|
+
"method section contains over-defensive boundary dumping; explain what the method does and move repeated scaffold-defense language to one brief boundary sentence or the limitations section"
|
|
510
|
+
)
|
|
511
|
+
elif section == "experiments" and boundary_hits >= 2:
|
|
512
|
+
issues.append(
|
|
513
|
+
"experiments section contains over-defensive boundary dumping; report the attack or evaluation outcome directly and keep only the minimum boundary needed to scope the evidence"
|
|
514
|
+
)
|
|
515
|
+
|
|
516
|
+
if section == "introduction" and has_result_log_numeric_dump(text):
|
|
517
|
+
issues.append(
|
|
518
|
+
"introduction section contains result-log style numeric dumping; keep only the one or two numbers needed to motivate the contribution and move dense benchmark values to experiments"
|
|
519
|
+
)
|
|
520
|
+
elif section == "method" and has_result_log_numeric_dump(text, min_numbers=6, min_signal_hits=2):
|
|
521
|
+
issues.append(
|
|
522
|
+
"method section contains result-log style numeric dumping; keep method on mechanism and move dense numeric comparisons to experiments"
|
|
523
|
+
)
|
|
524
|
+
|
|
525
|
+
|
|
438
526
|
def check_section_style_policy(text: str, section: str, issues: list[str]):
|
|
439
527
|
prose_text = strip_latex_commands(text)
|
|
440
528
|
for message, needles in SECTION_STYLE_WARNINGS.get(section, []):
|
|
@@ -484,6 +572,11 @@ def check_neighbor_asset_files(section: str, section_path: Path, issues: list[st
|
|
|
484
572
|
issues.append(
|
|
485
573
|
f"{asset_path.as_posix()} contains workflow-only placeholder language; move authoring notes out of captions and paper-facing asset text"
|
|
486
574
|
)
|
|
575
|
+
boundary_hits = count_phrase_hits(asset_text, OVER_DEFENSIVE_BOUNDARY_PHRASES)
|
|
576
|
+
if boundary_hits >= 2:
|
|
577
|
+
issues.append(
|
|
578
|
+
f"{asset_path.as_posix()} contains over-defensive boundary dumping in a paper-facing asset; keep captions and local notes focused on what the figure or table shows"
|
|
579
|
+
)
|
|
487
580
|
|
|
488
581
|
|
|
489
582
|
def check_paper_topology_targeting(section_path: Path, issues: list[str]):
|
|
@@ -774,6 +867,7 @@ def main():
|
|
|
774
867
|
check_paper_topology_targeting(section_path, blocking_issues)
|
|
775
868
|
check_workflow_language_targeting(section_path, blocking_issues)
|
|
776
869
|
check_common_section_gate_risks(text, warning_issues)
|
|
870
|
+
check_boundary_and_density_risks(args.section, text, warning_issues)
|
|
777
871
|
check_section_style_policy(text, args.section, warning_issues)
|
|
778
872
|
SECTION_CHECKS[args.section](text, warning_issues)
|
|
779
873
|
check_neighbor_asset_files(args.section, section_path, warning_issues)
|
|
@@ -22,9 +22,15 @@
|
|
|
22
22
|
|
|
23
23
|
## External Rebuttal Intake
|
|
24
24
|
|
|
25
|
-
| Source | Raw criticism summary | Affected unit | Reviewer axis | Severity | Route | Acceptance check |
|
|
26
|
-
| --- | --- | --- | --- | --- | --- | --- |
|
|
27
|
-
| | | | | | | |
|
|
25
|
+
| Source | Raw criticism summary | Affected unit | Reviewer axis | Severity | Why it matters | Route | Acceptance check |
|
|
26
|
+
| --- | --- | --- | --- | --- | --- | --- | --- |
|
|
27
|
+
| | | | | | | | |
|
|
28
|
+
|
|
29
|
+
## Revision Traceability
|
|
30
|
+
|
|
31
|
+
| Author claim or revised statement | Claimed artifact location | Independently verified | Remaining gap |
|
|
32
|
+
| --- | --- | --- | --- |
|
|
33
|
+
| | | | |
|
|
28
34
|
|
|
29
35
|
## Reviewer Panel Findings
|
|
30
36
|
|
|
@@ -70,9 +76,15 @@
|
|
|
70
76
|
|
|
71
77
|
## Actionable Issue Register
|
|
72
78
|
|
|
73
|
-
| ID | Axis | Severity | Affected artifact | Finding | Required fix | Route | Acceptance check | Core mutation required |
|
|
74
|
-
| --- | --- | --- | --- | --- | --- | --- | --- | --- |
|
|
75
|
-
| | | | | | | | | |
|
|
79
|
+
| ID | Axis | Severity | Affected artifact | Finding | Why it matters | Required fix | Route | Acceptance check | Core mutation required |
|
|
80
|
+
| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
|
|
81
|
+
| | | | | | | | | | |
|
|
82
|
+
|
|
83
|
+
## Consensus / Disagreement / Resolution
|
|
84
|
+
|
|
85
|
+
- Consensus:
|
|
86
|
+
- Disagreement:
|
|
87
|
+
- Resolution:
|
|
76
88
|
|
|
77
89
|
## Core Mutation Check
|
|
78
90
|
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# Argument Stress Test
|
|
2
|
+
|
|
3
|
+
Run this after the logic pass and before language polish. The goal is to break weak argument chains early.
|
|
4
|
+
|
|
5
|
+
## Required Checks
|
|
6
|
+
|
|
7
|
+
1. **Weakest-link test**
|
|
8
|
+
Which paragraph carries the most unsupported inference? Rewrite or narrow it first.
|
|
9
|
+
|
|
10
|
+
2. **Reverse-claim test**
|
|
11
|
+
If the main claim were false, which sentence in this section would fail first? That sentence usually reveals the overclaim.
|
|
12
|
+
|
|
13
|
+
3. **Alternative-explanation test**
|
|
14
|
+
What simpler explanation could produce the same pattern? State it explicitly and check whether the section already weakens it.
|
|
15
|
+
|
|
16
|
+
4. **Boundary test**
|
|
17
|
+
What does the current evidence still not justify? Add that limit before polishing.
|
|
18
|
+
|
|
19
|
+
5. **Evidence-trace test**
|
|
20
|
+
Can every major claim be traced to a figure, table, protocol statement, citation, or definition already present?
|
|
21
|
+
|
|
22
|
+
## Section-Specific Triggers
|
|
23
|
+
|
|
24
|
+
- **Introduction**: would the paper still look necessary if the claimed challenge were softened?
|
|
25
|
+
- **Method**: could a reviewer say the module is an arbitrary stack choice?
|
|
26
|
+
- **Experiments**: does the prose explain what the result teaches, not only what number is larger?
|
|
27
|
+
- **Conclusion**: does any sentence exceed what the experiments actually support?
|
|
28
|
+
|
|
29
|
+
## Repair Moves
|
|
30
|
+
|
|
31
|
+
1. Replace vague claims with a narrower mechanism claim.
|
|
32
|
+
2. Add one explicit bridge sentence from evidence to interpretation.
|
|
33
|
+
3. Move unsupported motivation out of results and back into hypothesis language.
|
|
34
|
+
4. State the remaining alternative explanation if it is not fully ruled out.
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# Section Question Bank
|
|
2
|
+
|
|
3
|
+
Use this file before drafting or revising a paper section. The goal is to force one pass on section purpose before wording.
|
|
4
|
+
|
|
5
|
+
## Global Questions
|
|
6
|
+
|
|
7
|
+
1. What single job must this section do for the paper?
|
|
8
|
+
2. What must the reader understand after this section that they did not understand before?
|
|
9
|
+
3. Which claim, protocol, metric, or boundary does this section depend on?
|
|
10
|
+
4. Which paragraph is most likely to confuse a skeptical reviewer?
|
|
11
|
+
5. Which sentence would become false if one supporting result disappeared?
|
|
12
|
+
|
|
13
|
+
## Abstract
|
|
14
|
+
|
|
15
|
+
1. Is the task clear in one pass?
|
|
16
|
+
2. Is the gap tied to prior work rather than hype?
|
|
17
|
+
3. Is the mechanism-level insight visible?
|
|
18
|
+
4. Is the main result bounded by scope?
|
|
19
|
+
5. Is the main boundary stated?
|
|
20
|
+
|
|
21
|
+
## Introduction
|
|
22
|
+
|
|
23
|
+
1. What common assumption or prior explanation is being challenged?
|
|
24
|
+
2. What failure, anomaly, or unmet need forces the paper to exist?
|
|
25
|
+
3. What is the core insight anchor?
|
|
26
|
+
4. Why does the proposed direction follow from that insight?
|
|
27
|
+
5. Which contribution is truly central, and which are supporting?
|
|
28
|
+
|
|
29
|
+
## Related Work
|
|
30
|
+
|
|
31
|
+
1. What are the 2-4 actual research clusters readers need?
|
|
32
|
+
2. Which paper is the closest prior work?
|
|
33
|
+
3. What exact capability remains unresolved in each cluster?
|
|
34
|
+
4. Does each cluster end with a gap statement?
|
|
35
|
+
5. Did any paragraph become a citation list instead of a comparison?
|
|
36
|
+
|
|
37
|
+
## Method
|
|
38
|
+
|
|
39
|
+
1. Which design choice would a reviewer call arbitrary?
|
|
40
|
+
2. What problem forces each module, loss, or representation?
|
|
41
|
+
3. Is the information flow described in executable order?
|
|
42
|
+
4. What technical effect is expected from each major design?
|
|
43
|
+
5. Which term or label still lacks a first-mention explanation?
|
|
44
|
+
|
|
45
|
+
## Experiments
|
|
46
|
+
|
|
47
|
+
1. Are dataset scope, split protocol, and baseline setup separately visible?
|
|
48
|
+
2. Is each metric defined once in reader-facing language?
|
|
49
|
+
3. Which table answers the main effectiveness question?
|
|
50
|
+
4. Which ablation rules out the strongest simpler explanation?
|
|
51
|
+
5. What boundary or failure mode remains after the main result?
|
|
52
|
+
|
|
53
|
+
## Conclusion
|
|
54
|
+
|
|
55
|
+
1. What is the strongest supported takeaway?
|
|
56
|
+
2. What broader principle follows from that takeaway?
|
|
57
|
+
3. What boundary prevents overclaiming?
|
|
58
|
+
4. What next step follows from the boundary instead of from habit?
|
|
59
|
+
5. Did the section introduce any new evidence or claim?
|
|
@@ -26,11 +26,14 @@ These are paper-facing defaults. They are not project-specific branding rules.
|
|
|
26
26
|
- Long setup of benchmark details.
|
|
27
27
|
- Contribution lists that read like the introduction.
|
|
28
28
|
- Excessive future-work framing.
|
|
29
|
+
- More than one defensive boundary sentence.
|
|
30
|
+
- Internal scaffold names before the reader-facing concept is clear.
|
|
29
31
|
|
|
30
32
|
**Banned expressions / moves**
|
|
31
33
|
- Roadmap prose such as "In this paper, we first..., then..., finally...".
|
|
32
34
|
- Reviewer-facing instructions such as "the reader can see" or "as shown clearly below".
|
|
33
35
|
- Unbounded superiority claims such as "universally", "always", or "in every setting".
|
|
36
|
+
- Over-defensive boundary dumping that keeps explaining what the paper is not instead of what it shows.
|
|
34
37
|
- Service-style or AI-assistant meta language such as "用户说", "按你的要求", "我来解释", "let me explain", or "as requested by the user".
|
|
35
38
|
- Workflow-only placeholder language such as "图的意图", "资产意图", "占位符", "workflow-language", or "sync this wording".
|
|
36
39
|
- Standalone insight headings such as "Our Insights" when the insight is not woven into the abstract's challenge and contribution arc.
|
|
@@ -51,12 +54,15 @@ These are paper-facing defaults. They are not project-specific branding rules.
|
|
|
51
54
|
- Repeating "important" or "significant" without a concrete consequence.
|
|
52
55
|
- Opening with generic field hype.
|
|
53
56
|
- Listing contributions before the gap is clear.
|
|
57
|
+
- Result-log introductions that dump many benchmark values before the gap is clear.
|
|
58
|
+
- Repeating the same boundary defense that will already appear in Method, Experiments, or Limitations.
|
|
54
59
|
|
|
55
60
|
**Banned expressions / moves**
|
|
56
61
|
- Empty macro-importance claims such as "this problem is increasingly critical" with no concrete consequence.
|
|
57
62
|
- Marketing-style first-claim language such as "revolutionary", "game-changing", or "unprecedented" without evidence.
|
|
58
63
|
- Paragraphs that only praise the paper instead of stating the research gap.
|
|
59
64
|
- Standalone "Our Insights" sections; the insight should be part of the motivation and gap logic.
|
|
65
|
+
- Over-defensive boundary dumping that explains what the system is not before the reader understands the problem, gap, and contribution.
|
|
60
66
|
- Service-style or AI-assistant meta language such as "用户说", "按你的要求", "我来解释", "let me explain", or "as requested by the user".
|
|
61
67
|
- Workflow-only placeholder language such as "图的意图", "资产意图", "占位符", "workflow-language", or "sync this wording".
|
|
62
68
|
|
|
@@ -98,12 +104,15 @@ These are paper-facing defaults. They are not project-specific branding rules.
|
|
|
98
104
|
- Long implementation detail lists that belong in appendix or setup.
|
|
99
105
|
- Repeating model names without explaining their role.
|
|
100
106
|
- Overusing novelty language instead of mechanism explanation.
|
|
107
|
+
- Defensive paragraphs that keep explaining what the method is not instead of what mechanism it implements.
|
|
108
|
+
- Dense result numbers that belong in Experiments rather than Method.
|
|
101
109
|
|
|
102
110
|
**Banned expressions / moves**
|
|
103
111
|
- Marketing-style or self-promotional wording such as "elegant", "powerful", "dramatically stronger", or "significantly outperforms prior methods" when used as prose decoration rather than evidence-backed result reporting.
|
|
104
112
|
- Explaining the method by saying it is "better", "stronger", or "more advanced" without saying how it works.
|
|
105
113
|
- Method subsections that read like API documentation without explaining which mechanism or insight requires the design.
|
|
106
114
|
- Introducing new narrative aliases for canonical model or ablation labels after they have already been locked.
|
|
115
|
+
- Over-defensive boundary dumping that keeps justifying what the method is not or where it is not deployable instead of explaining how the mechanism works.
|
|
107
116
|
- Service-style or AI-assistant meta language such as "用户说", "按你的要求", "我来解释", "let me explain", or "as requested by the user".
|
|
108
117
|
- Workflow-only placeholder language such as "图的意图", "资产意图", "占位符", "workflow-language", or "sync this wording".
|
|
109
118
|
|
|
@@ -124,6 +133,8 @@ These are paper-facing defaults. They are not project-specific branding rules.
|
|
|
124
133
|
- Re-explaining the same metric in every paragraph.
|
|
125
134
|
- Paragraphs that only restate the table without synthesis.
|
|
126
135
|
- Result paragraphs that say only "higher/lower/better" without explaining what the pattern teaches.
|
|
136
|
+
- Repeating the same boundary defense after every table or figure.
|
|
137
|
+
- Captions or table notes that carry more defense than result interpretation.
|
|
127
138
|
|
|
128
139
|
**Banned expressions / moves**
|
|
129
140
|
- Meta-reader guidance such as "这样读者可以……", "the reader can first...", or "this table lets the reader...".
|
|
@@ -131,6 +142,7 @@ These are paper-facing defaults. They are not project-specific branding rules.
|
|
|
131
142
|
- Layout-process commentary in scientific prose, such as "由于表列较多,这里采用页宽自适应排版" or "we use page-width adaptive layout here".
|
|
132
143
|
- Claims that a table "proves" something when the evidence only supports a bounded empirical result.
|
|
133
144
|
- Internal experiment-planning prose, such as "还需要新增 holdout", "小批量门控", "冻结 payload", "不能边跑边调", "API 规模估计", or "if all scores are 1.0000, treat it as overfitting".
|
|
145
|
+
- Over-defensive boundary dumping that spends more sentences disclaiming the setup than reporting what was attacked, measured, recovered, or bounded.
|
|
134
146
|
- Service-style or AI-assistant meta language such as "用户说", "按你的要求", "我来解释", "let me explain", or "as requested by the user".
|
|
135
147
|
- Workflow-only placeholder language such as "图的意图", "资产意图", "占位符", "workflow-language", or "sync this wording".
|
|
136
148
|
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# Writing Anti-Patterns
|
|
2
|
+
|
|
3
|
+
Run this only after logic and theory blockers are cleared. The goal is to remove common low-quality prose habits without inventing new content.
|
|
4
|
+
|
|
5
|
+
## Common Anti-Patterns
|
|
6
|
+
|
|
7
|
+
1. **Throat-clearing openings**
|
|
8
|
+
Paragraphs that spend one or two sentences warming up before making the point. Move the point to the first sentence.
|
|
9
|
+
|
|
10
|
+
2. **Uniform paragraph length**
|
|
11
|
+
Every paragraph having the same size often signals template writing rather than argument structure. Split or merge by idea, not by rhythm.
|
|
12
|
+
|
|
13
|
+
3. **Synonym cycling**
|
|
14
|
+
Switching among multiple names for the same concept creates false variety and weakens terminology control. Keep one canonical paper-facing name.
|
|
15
|
+
|
|
16
|
+
4. **Generic evaluative vocabulary**
|
|
17
|
+
Words such as `important`, `significant`, `effective`, `clear`, or `promising` without concrete content. Replace with the specific consequence, metric, or boundary.
|
|
18
|
+
|
|
19
|
+
5. **List-shaped filler**
|
|
20
|
+
Sentences that pile up three or more loosely related claims to sound complete. Split them into one claim per sentence when the logic differs.
|
|
21
|
+
|
|
22
|
+
6. **Overconnected transitions**
|
|
23
|
+
Repeated `therefore`, `moreover`, `furthermore`, or similar connectors can hide weak logic. Keep the connective only when the relation is explicit.
|
|
24
|
+
|
|
25
|
+
7. **Meta-writer commentary**
|
|
26
|
+
Phrases about what the paper is doing rather than the research itself, unless the sentence is a legitimate roadmap or scope statement.
|
|
27
|
+
|
|
28
|
+
## Quick Repair Questions
|
|
29
|
+
|
|
30
|
+
1. Can the first sentence of each paragraph state the paragraph's job directly?
|
|
31
|
+
2. Did any concept get renamed just to avoid repetition?
|
|
32
|
+
3. Can a vague adjective be replaced by a metric, mechanism, or constraint?
|
|
33
|
+
4. Did any sentence become longer because two different ideas were forced together?
|
|
@@ -56,11 +56,23 @@ For each external comment, record:
|
|
|
56
56
|
- affected paper unit: claim, section, table, figure, protocol, metric, threat model, experiment, or wording
|
|
57
57
|
- reviewer axis: R1, R2, R3, R4, or R5
|
|
58
58
|
- severity: fatal, major, minor, or clarification
|
|
59
|
+
- why it matters: what acceptance risk or scientific risk it creates
|
|
59
60
|
- route: `write`, `iterate`, `report`, `framing`, `data`, `spec`, or `ask-user`
|
|
60
61
|
- acceptance check: concrete evidence or manuscript condition that resolves the issue
|
|
61
62
|
|
|
62
63
|
Do not answer external criticism with prose-only reassurance. If the issue is valid, it must become a repair task. If it is invalid, state the evidence that rules it out.
|
|
63
64
|
|
|
65
|
+
## Revision Traceability
|
|
66
|
+
|
|
67
|
+
When the paper already contains a claimed repair, rebuttal response, or revised section, record:
|
|
68
|
+
|
|
69
|
+
- author claim: what the current draft or response says was fixed
|
|
70
|
+
- claimed artifact location: where the claimed fix lives
|
|
71
|
+
- independently verified: yes / no / partial
|
|
72
|
+
- remaining gap: what still fails the acceptance check
|
|
73
|
+
|
|
74
|
+
Do not mark an issue resolved only because the manuscript says it was resolved. The pass must independently verify the fix or keep the remaining gap open.
|
|
75
|
+
|
|
64
76
|
## Reviewer Panel
|
|
65
77
|
|
|
66
78
|
Run five independent review lenses. Each lens must produce actionable issues, not vague advice.
|
|
@@ -112,6 +124,16 @@ Every issue must include:
|
|
|
112
124
|
|
|
113
125
|
Prioritize fatal and major issues before language polish. Minor presentation fixes may be batched.
|
|
114
126
|
|
|
127
|
+
## Consensus / Disagreement / Resolution
|
|
128
|
+
|
|
129
|
+
At the end of the panel, summarize:
|
|
130
|
+
|
|
131
|
+
- consensus: what multiple review lenses agree is the main blocking problem
|
|
132
|
+
- disagreement: what remains contested across lenses or between criticism and evidence
|
|
133
|
+
- resolution: what concrete route, evidence, or rewrite will settle the disagreement
|
|
134
|
+
|
|
135
|
+
Use this summary to keep rebuttal mode actionable instead of devolving into parallel opinions.
|
|
136
|
+
|
|
115
137
|
## Core Mutation Policy
|
|
116
138
|
|
|
117
139
|
Core mutation means changing any of:
|
|
@@ -5,9 +5,11 @@
|
|
|
5
5
|
1. Give a concise summary of the artifact or result under review.
|
|
6
6
|
2. State the top review question or risk focus.
|
|
7
7
|
3. Audit in reviewer mode.
|
|
8
|
-
4.
|
|
9
|
-
5.
|
|
10
|
-
6.
|
|
8
|
+
4. Convert findings into structured issues: problem, why it matters, required fix, severity, route, and acceptance check.
|
|
9
|
+
5. When the target already has claimed fixes or rebuttal responses, add revision traceability: author claim, claimed artifact location, independently verified or not, and remaining gap.
|
|
10
|
+
6. Output fatal flaws first when present.
|
|
11
|
+
7. Rank the fix priority.
|
|
12
|
+
8. End with consensus, disagreement, resolution, residual risks, and a clear next action.
|
|
11
13
|
|
|
12
14
|
## Context Read Set
|
|
13
15
|
|
|
@@ -48,7 +50,8 @@
|
|
|
48
50
|
- For quick prompts such as "rebuttal一下看有什么缺点", start with the rebuttal Light Read Set only: active LaTeX, result summaries, managed indices, and supplied criticism. Do not run a whole-repository scan unless the panel records a specific escalation reason.
|
|
49
51
|
- External rebuttal, AC, meta-review, colleague, or user criticism must be converted into internal actionable issues before any rewrite or response draft.
|
|
50
52
|
- The Reviewer Panel must classify issues across R1 Significance / Originality / Insight, R2 Soundness / Technical Quality, R3 Evaluation / Analysis, R4 Results / Tables / Numeric Evidence, and R5 Presentation / Clarity.
|
|
51
|
-
- Each issue must include severity, affected artifact, required fix, route, acceptance check, and whether core mutation is required.
|
|
53
|
+
- Each issue must include problem, why it matters, severity, affected artifact, required fix, route, acceptance check, and whether core mutation is required.
|
|
54
|
+
- For re-review or rebuttal-repair passes, record author claim, claimed location, independently verified status, and remaining gap before marking an issue resolved.
|
|
52
55
|
- In L1/L2, core mutation remains an approval boundary unless explicitly authorized. In L3, route core mutation through the shared ledger policy instead of treating it as a reviewer-stage blocker.
|
|
53
56
|
|
|
54
57
|
## Output Style
|
|
@@ -57,6 +60,7 @@
|
|
|
57
60
|
- findings first
|
|
58
61
|
- fatal flaws called out explicitly
|
|
59
62
|
- fix priority stated clearly
|
|
63
|
+
- consensus / disagreement / resolution stated clearly
|
|
60
64
|
- evidence-linked critique
|
|
61
65
|
- explicit residual risks
|
|
62
66
|
- explicit alternative explanations and boundary risks
|
|
@@ -70,6 +70,9 @@ Run these on every round:
|
|
|
70
70
|
- section flow check -> `skills/lab/references/paper-writing/does-my-writing-flow-source.md`
|
|
71
71
|
- reviewer pass -> `skills/lab/references/paper-writing/paper-review.md`
|
|
72
72
|
- section-specific style policy -> `skills/lab/references/paper-writing/section-style-policies.md` (load the block matching the current section)
|
|
73
|
+
- section question bank -> `skills/lab/references/paper-writing/section-question-bank.md`
|
|
74
|
+
- argument stress test -> `skills/lab/references/paper-writing/argument-stress-test.md`
|
|
75
|
+
- language anti-pattern sweep -> `skills/lab/references/paper-writing/writing-anti-patterns.md`
|
|
73
76
|
|
|
74
77
|
## Rebuttal Mode
|
|
75
78
|
|
|
@@ -151,6 +154,7 @@ Do not enter prose polish until the current section has passed the reference-con
|
|
|
151
154
|
- Build a compact mini-outline before prose.
|
|
152
155
|
- When reference-guided deep-write is triggered, build the reference consumption plan before the mini-outline so the outline is based on mapped section slots rather than generic prose flow.
|
|
153
156
|
- Academic readability standards are the same in `workflow_language` and `paper_language`; changing languages must not lower external-reader clarity.
|
|
157
|
+
- Prefer concept first, implementation label second. If an internal scaffold, expert, oracle, parser, or module nickname is not central to the reader's understanding, state the reader-facing concept first and introduce the implementation label only if later reuse justifies it.
|
|
154
158
|
- If the current round introduces or revises key terms, abbreviations, metric names, mechanism names, or system labels, explain them at first mention by briefly stating what they are and why they matter here.
|
|
155
159
|
- First mention should use the full form. If a short form or acronym will be reused later, define it at first mention as `Full Form (Short Form)` before switching to the short form.
|
|
156
160
|
- Apply the same first-mention rule to table headers, table captions, table notes, and figure captions or labels; if a term or abbreviation first appears in a table, expand it locally in that table.
|
|
@@ -166,10 +170,12 @@ Do not enter prose polish until the current section has passed the reference-con
|
|
|
166
170
|
- In Experiments, interpret results diagnostically: say which part of the insight each result, ablation, robustness check, or failure case supports, weakens, or bounds. Do not only read numbers from a table.
|
|
167
171
|
- In Conclusion, state the broader principle or action implication implied by the evidence, then state the boundary. Do not introduce a new insight there.
|
|
168
172
|
- Avoid paper-facing headings such as `Our Insights` or `核心洞见`; if a heading is needed, use normal section roles such as motivation, analysis, ablation, or discussion and let the insight appear in the prose.
|
|
173
|
+
- Keep boundary statements sparse. One brief boundary sentence in Abstract, one brief scope sentence in Experiments, and one fuller limitation in Conclusion is the default pattern; do not repeat the same defense across sections unless the evidence scope genuinely changes.
|
|
174
|
+
- If a paragraph outside Experiments reads like a result log, cut it back to the one or two numbers needed for motivation and move the dense benchmark values to Experiments or tables.
|
|
169
175
|
- Nontrivial section work must use three separated revision passes instead of one all-purpose rewrite:
|
|
170
|
-
- Logic pass: check the paragraph role, claim chain, premise-to-conclusion transition, evidence dependency, and whether the section naturally follows from adjacent sections. Do not polish wording in this pass.
|
|
171
|
-
- Theory / field pass: after the logic pass is clean, check concept use, field terminology, metric definitions, citation anchors, and whether the chosen framework actually fits the claim. Do not treat fluent language as proof that the theory is right.
|
|
172
|
-
- Language pass: only after logic and theory blockers are resolved, revise academic tone, sentence rhythm, transitions, concision, and local readability.
|
|
176
|
+
- Logic pass: check the paragraph role, claim chain, premise-to-conclusion transition, evidence dependency, and whether the section naturally follows from adjacent sections. Use `section-question-bank.md` to force explicit answers about section purpose. Do not polish wording in this pass.
|
|
177
|
+
- Theory / field pass: after the logic pass is clean, check concept use, field terminology, metric definitions, citation anchors, and whether the chosen framework actually fits the claim. Run `argument-stress-test.md` here, including the weakest-link test, reverse-claim test, and strongest alternative explanation check. Do not treat fluent language as proof that the theory is right.
|
|
178
|
+
- Language pass: only after logic and theory blockers are resolved, revise academic tone, sentence rhythm, transitions, concision, and local readability. Run `writing-anti-patterns.md` here and remove throat-clearing openings, uniform paragraph patterns, synonym cycling, and generic evaluative filler before accepting the pass.
|
|
173
179
|
- Do not continue into language polish when the logic pass or theory / field pass still has an unresolved blocker; repair the blocker first or route back to `review`, `iterate`, or `report` if the blocker is evidentiary.
|
|
174
180
|
- Default automation should not require the user to approve every pass. Record the three pass outcomes in the write-iteration artifact and stop for one user question only when a failed pass would change paper-level framing, claims, protocol, or downstream section structure.
|
|
175
181
|
- If the user explicitly asks for interactive or human-in-the-loop rewriting, show the result of each pass and wait before moving from logic -> theory -> language.
|
|
@@ -195,6 +201,7 @@ Do not enter prose polish until the current section has passed the reference-con
|
|
|
195
201
|
- Keep run provenance such as tuning-run labels, probe names, internal config strings, rerun ids, and package labels out of reader-facing prose. If the evidence is useful, rewrite it as a bounded paper-facing diagnostic or move the raw provenance to workflow notes or appendix metadata.
|
|
196
202
|
- Keep internal experiment planning out of reader-facing prose. Do not write paper sentences that explain future holdout expansion, small-batch gates, payload freezing, API budget, "if all scores are 1.0000 then treat as overfitting", or why a next automation round is needed.
|
|
197
203
|
- When an experiment boundary matters, report only the scientific scope already supported by the evidence. Put the operational plan for collecting new attacks, new papers, new markers, or additional holdout cases into `.lab/changes/`, `.lab/iterations/`, or report artifacts, not into manuscript sections.
|
|
204
|
+
- Do not let Method or Experiments be dominated by negative-definition prose such as what the system is not, what it is not meant to replace, or where it is not deployable. After one clear scope sentence, return to mechanism or evidence.
|
|
198
205
|
- Do not use unexplained terminology density as a substitute for academic tone.
|
|
199
206
|
- Keep service-style or AI-assistant meta language out of manuscript prose. Phrases such as "用户说", "按你的要求", "我来解释", "下面我", "this version", or "as requested by the user" belong in workflow notes, not in paper-facing sections, captions, table notes, or analysis assets.
|
|
200
207
|
- Keep workflow-only placeholder language out of manuscript prose. Phrases such as "图的意图", "资产意图", "占位符", "workflow-language", "translation layer", or "sync this wording" belong in authoring artifacts, not in reader-facing LaTeX.
|
|
@@ -254,6 +261,7 @@ Do not enter prose polish until the current section has passed the reference-con
|
|
|
254
261
|
- Table assets must use paper-facing LaTeX structure with `booktabs`, caption, label, and consistent precision.
|
|
255
262
|
- Table assets must also include a local table note that explains row meaning, column meaning, metric definitions, comparison scope, and any important caveat.
|
|
256
263
|
- The local table note must contain real reader-facing explanations, not the default template phrases such as "explain what each row represents" or "expand local abbreviations".
|
|
264
|
+
- Captions and table notes should explain what the asset shows, not defend the workflow. Do not use captions to carry repeated boundary disclaimers that belong in section prose or Limitations.
|
|
257
265
|
- Table assets must not rely on aggressive width hacks by default; if width control is still needed after table redesign, document it locally and keep it readable.
|
|
258
266
|
- Table assets with seven or more columns should be split, moved partly to appendix, or written with width-aware columns such as `tabularx` or `p{...}` instead of a plain `tabular` layout.
|
|
259
267
|
- Figure placeholders may record what the final figure should show and why the reader needs it in authoring comments, the paper plan, or the write-iteration artifact, but the caption itself must remain paper-facing and must not contain "Figure intent", "图的意图", "asset intent", "占位符", or similar workflow language.
|