paperfit-cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/.claude/commands/adjust-length.md +21 -0
  2. package/.claude/commands/check-visual.md +27 -0
  3. package/.claude/commands/fix-layout.md +31 -0
  4. package/.claude/commands/migrate-template.md +23 -0
  5. package/.claude/commands/repair-table.md +21 -0
  6. package/.claude/commands/show-status.md +32 -0
  7. package/.claude-plugin/README.md +77 -0
  8. package/.claude-plugin/marketplace.json +41 -0
  9. package/.claude-plugin/plugin.json +39 -0
  10. package/CLAUDE.md +266 -0
  11. package/CONTRIBUTING.md +131 -0
  12. package/LICENSE +21 -0
  13. package/README.md +164 -0
  14. package/agents/code-surgeon-agent.md +214 -0
  15. package/agents/layout-detective-agent.md +229 -0
  16. package/agents/orchestrator-agent.md +254 -0
  17. package/agents/quality-gatekeeper-agent.md +270 -0
  18. package/agents/rule-engine-agent.md +224 -0
  19. package/agents/semantic-polish-agent.md +250 -0
  20. package/bin/paperfit.js +176 -0
  21. package/config/agent_roles.yaml +56 -0
  22. package/config/layout_rules.yaml +54 -0
  23. package/config/templates.yaml +241 -0
  24. package/config/vto_taxonomy.yaml +489 -0
  25. package/config/writing_rules.yaml +64 -0
  26. package/install.sh +30 -0
  27. package/package.json +52 -0
  28. package/requirements.txt +5 -0
  29. package/scripts/benchmark_runner.py +629 -0
  30. package/scripts/compile.sh +244 -0
  31. package/scripts/config_validator.py +339 -0
  32. package/scripts/cv_detector.py +600 -0
  33. package/scripts/evidence_collector.py +167 -0
  34. package/scripts/float_fixers.py +861 -0
  35. package/scripts/inject_defects.py +549 -0
  36. package/scripts/install-claude-global.js +148 -0
  37. package/scripts/install.js +66 -0
  38. package/scripts/install.sh +106 -0
  39. package/scripts/overflow_fixers.py +656 -0
  40. package/scripts/package-for-opensource.sh +138 -0
  41. package/scripts/parse_log.py +260 -0
  42. package/scripts/postinstall.js +38 -0
  43. package/scripts/pre_tool_use.py +265 -0
  44. package/scripts/render_pages.py +244 -0
  45. package/scripts/session_logger.py +329 -0
  46. package/scripts/space_util_fixers.py +773 -0
  47. package/scripts/state_manager.py +352 -0
  48. package/scripts/test_commands.py +187 -0
  49. package/scripts/test_cv_detector.py +214 -0
  50. package/scripts/test_integration.py +290 -0
  51. package/skills/consistency-polisher/SKILL.md +337 -0
  52. package/skills/float-optimizer/SKILL.md +284 -0
  53. package/skills/latex_fixers/__init__.py +82 -0
  54. package/skills/latex_fixers/float_fixers.py +392 -0
  55. package/skills/latex_fixers/fullwidth_fixers.py +375 -0
  56. package/skills/latex_fixers/overflow_fixers.py +250 -0
  57. package/skills/latex_fixers/semantic_micro_tuning.py +362 -0
  58. package/skills/latex_fixers/space_util_fixers.py +389 -0
  59. package/skills/latex_fixers/utils.py +55 -0
  60. package/skills/overflow-repair/SKILL.md +304 -0
  61. package/skills/space-util-fixer/SKILL.md +307 -0
  62. package/skills/taxonomy-vto/SKILL.md +486 -0
  63. package/skills/template-migrator/SKILL.md +251 -0
  64. package/skills/visual-inspector/SKILL.md +217 -0
  65. package/skills/writing-polish/SKILL.md +289 -0
@@ -0,0 +1,362 @@
1
+ """
2
+ Semantic Micro-Tuning - 语义级动态微调执行器
3
+
4
+ 当物理排版手段(\looseness、浮动体参数等)用尽后,
5
+ 执行最小语义级改写(增删 3-8 个单词,不改变学术原意)。
6
+
7
+ 核心原则:
8
+ 1. 保持学术语义与事实不变(绝不篡改数据、结论、引用内容)
9
+ 2. 最小修改原则(3-8 词)
10
+ 3. 高质量扩容(禁止无意义形容词注水)
11
+ """
12
+
13
+ import re
14
+ from typing import Any, Dict, List, Optional, Tuple
15
+
16
+
17
+ def minimalist_shorten(
18
+ tex_content: str,
19
+ target_section: Optional[str] = None,
20
+ max_words_to_remove: int = 15,
21
+ ) -> Tuple[str, Dict[str, Any]]:
22
+ """
23
+ 极简缩写逻辑 - 通过句法优化精简 5-15 个单词。
24
+
25
+ 策略(优先级从高到低):
26
+ 1. 合并从句(which/that 引导的定语从句 → 分词短语)
27
+ 2. 被动语态 → 主动语态
28
+ 3. 剔除无意义填充词(in order to → to, due to the fact that → because)
29
+ 4. 精简冗余表达(it is important to note that → 删除)
30
+
31
+ Args:
32
+ tex_content: .tex 文件内容
33
+ target_section: 目标节(如 "Discussion"),若指定则仅处理该节
34
+ max_words_to_remove: 最大删除单词数(默认 15)
35
+
36
+ Returns:
37
+ (modified_content, change_record)
38
+ """
39
+ change_record = {
40
+ "defect_id": "A1-semantic-shorten",
41
+ "action": "none",
42
+ "words_removed": 0,
43
+ "changes": [],
44
+ }
45
+
46
+ # 提取目标节内容(若指定)
47
+ if target_section:
48
+ section_pattern = rf'(\\section\*?\{{{re.escape(target_section)}\}})'
49
+ match = re.search(section_pattern, tex_content)
50
+ if not match:
51
+ change_record["note"] = f"section '{target_section}' not found"
52
+ return tex_content, change_record
53
+
54
+ section_start = match.end()
55
+ # 查找下一节或文档结束
56
+ next_section = re.search(r'\\section\*?\{', tex_content[section_start:])
57
+ if next_section:
58
+ section_end = section_start + next_section.start()
59
+ else:
60
+ section_end = tex_content.find('\\end{document}', section_start)
61
+ if section_end == -1:
62
+ section_end = len(tex_content)
63
+
64
+ section_content = tex_content[section_start:section_end]
65
+ prefix = tex_content[:section_start]
66
+ suffix = tex_content[section_end:]
67
+ else:
68
+ section_content = tex_content
69
+ prefix = ""
70
+ suffix = ""
71
+
72
+ modified = section_content
73
+ words_removed = 0
74
+
75
+ # 策略 1: 精简填充词(最高优先级,最安全)
76
+ filler_patterns = [
77
+ (r'\bin order to\b', 'to'), # 节省 2 词
78
+ (r'\bdue to the fact that\b', 'because'), # 节省 3 词
79
+ (r'\bit is important to note that\b', ''), # 节省 6 词
80
+ (r'\bit should be noted that\b', ''), # 节省 4 词
81
+ (r'\bfor the purpose of\b', 'for'), # 节省 3 词
82
+ (r'\bin the context of\b', 'in'), # 节省 3 词
83
+ (r'\bas a matter of fact\b', ''), # 节省 4 词
84
+ (r'\bwith regard to\b', 'regarding'), # 节省 2 词
85
+ (r'\bin the case of\b', 'for'), # 节省 3 词
86
+ (r'\bat the present time\b', 'currently'), # 节省 3 词
87
+ ]
88
+
89
+ for pattern, replacement in filler_patterns:
90
+ matches = list(re.finditer(pattern, modified))
91
+ for match in matches:
92
+ if words_removed >= max_words_to_remove:
93
+ break
94
+ old_words = match.group(0).split()
95
+ new_words = replacement.split() if replacement else []
96
+ saved = len(old_words) - len(new_words)
97
+ if saved > 0:
98
+ modified = modified[:match.start()] + replacement + modified[match.end():]
99
+ words_removed += saved
100
+ change_record["changes"].append({
101
+ "type": "filler_removal",
102
+ "original": match.group(0),
103
+ "replacement": replacement or "(deleted)",
104
+ "words_saved": saved,
105
+ })
106
+
107
+ # 策略 2: 被动语态 → 主动语态(谨慎使用,需要上下文理解)
108
+ # 仅处理简单模式:is/are + V-ed + by → 主动
109
+ passive_patterns = [
110
+ (r'\bwas conducted by\b', ' conducted'), # "was conducted by authors" → "authors conducted"
111
+ (r'\bwere performed by\b', ' performed'),
112
+ (r'\bis proposed by\b', ' proposed'),
113
+ ]
114
+
115
+ for pattern, replacement in passive_patterns:
116
+ if words_removed >= max_words_to_remove:
117
+ break
118
+ matches = list(re.finditer(pattern, modified))
119
+ for match in matches:
120
+ # 检查后文是否有 by 的执行者(简化处理:直接删除 was/were)
121
+ old_words = match.group(0).split()
122
+ new_words = replacement.split()
123
+ saved = len(old_words) - len(new_words)
124
+ modified = modified[:match.start()] + replacement + modified[match.end():]
125
+ words_removed += saved
126
+ change_record["changes"].append({
127
+ "type": "passive_to_active",
128
+ "original": match.group(0),
129
+ "replacement": replacement,
130
+ "words_saved": saved,
131
+ })
132
+
133
+ # 策略 3: 合并从句(which/that → 分词)
134
+ # "which shows that" → "showing"
135
+ clause_patterns = [
136
+ (r'\bwhich demonstrates\b', ' demonstrating'),
137
+ (r'\bwhich indicates\b', ' indicating'),
138
+ (r'\bwhich suggests\b', ' suggesting'),
139
+ (r'\bwhich reveals\b', ' revealing'),
140
+ (r'\bthat is based on\b', ' based on'),
141
+ ]
142
+
143
+ for pattern, replacement in clause_patterns:
144
+ if words_removed >= max_words_to_remove:
145
+ break
146
+ matches = list(re.finditer(pattern, modified))
147
+ for match in matches:
148
+ old_words = match.group(0).split()
149
+ new_words = replacement.split()
150
+ saved = len(old_words) - len(new_words)
151
+ modified = modified[:match.start()] + replacement + modified[match.end():]
152
+ words_removed += saved
153
+ change_record["changes"].append({
154
+ "type": "clause_reduction",
155
+ "original": match.group(0),
156
+ "replacement": replacement,
157
+ "words_saved": saved,
158
+ })
159
+
160
+ change_record["words_removed"] = words_removed
161
+ if words_removed > 0:
162
+ change_record["action"] = f"removed {words_removed} words via syntactic optimization"
163
+
164
+ return prefix + modified + suffix, change_record
165
+
166
+
167
+ def deep_expand(
168
+ tex_content: str,
169
+ target_section: Optional[str] = None,
170
+ min_words_to_add: int = 10,
171
+ max_words_to_add: int = 30,
172
+ ) -> Tuple[str, Dict[str, Any]]:
173
+ """
174
+ 深度扩写逻辑 - 通过高质量学术扩容填充留白。
175
+
176
+ 策略(优先级从高到低):
177
+ 1. 深度挖掘隐含的实验结论(显式化因果关系)
178
+ 2. 增加逻辑连接词(Furthermore, Notably, Importantly)
179
+ 3. 添加方法论细节(how/why 解释)
180
+ 4. 扩展结果讨论(implies/suggests 句型)
181
+
182
+ Args:
183
+ tex_content: .tex 文件内容
184
+ target_section: 目标节(如 "Conclusion"),若指定则仅处理该节
185
+ min_words_to_add: 最少添加单词数(默认 10)
186
+ max_words_to_add: 最多添加单词数(默认 30)
187
+
188
+ Returns:
189
+ (modified_content, change_record)
190
+ """
191
+ change_record = {
192
+ "defect_id": "A2-semantic-expand",
193
+ "action": "none",
194
+ "words_added": 0,
195
+ "changes": [],
196
+ }
197
+
198
+ # 提取目标节内容(若指定)
199
+ if target_section:
200
+ section_pattern = rf'(\\section\*?\{{{re.escape(target_section)}\}})'
201
+ match = re.search(section_pattern, tex_content)
202
+ if not match:
203
+ change_record["note"] = f"section '{target_section}' not found"
204
+ return tex_content, change_record
205
+
206
+ section_start = match.end()
207
+ next_section = re.search(r'\\section\*?\{', tex_content[section_start:])
208
+ if next_section:
209
+ section_end = section_start + next_section.start()
210
+ else:
211
+ section_end = tex_content.find('\\end{document}', section_start)
212
+ if section_end == -1:
213
+ section_end = len(tex_content)
214
+
215
+ section_content = tex_content[section_start:section_end]
216
+ prefix = tex_content[:section_start]
217
+ suffix = tex_content[section_end:]
218
+ else:
219
+ section_content = tex_content
220
+ prefix = ""
221
+ suffix = ""
222
+
223
+ modified = section_content
224
+ words_added = 0
225
+
226
+ # 策略 1: 添加逻辑连接词(最安全,最自然)
227
+ # 在段首或句首添加连接词
228
+ transition_additions = [
229
+ (r'^(\\?This)', r'Notably, \1'), # 添加在段首
230
+ (r'^(\\?These)', r'Furthermore, \1'),
231
+ (r'^(\\?Our)', r'Importantly, \1'),
232
+ (r'(\. )(\\?The)', r'\1Moreover, the'),
233
+ ]
234
+
235
+ for pattern, replacement in transition_additions:
236
+ if words_added >= max_words_to_add:
237
+ break
238
+ matches = list(re.finditer(pattern, modified, re.MULTILINE))
239
+ for match in matches:
240
+ added_phrase = replacement.replace('\\1', '').replace(match.group(1), '').strip()
241
+ added_words = len(added_phrase.split())
242
+ if added_words > 0 and words_added + added_words <= max_words_to_add:
243
+ modified = modified[:match.start()] + replacement + modified[match.end():]
244
+ words_added += added_words
245
+ change_record["changes"].append({
246
+ "type": "transition_added",
247
+ "location": match.start(),
248
+ "added": added_phrase,
249
+ "words_added": added_words,
250
+ })
251
+
252
+ # 策略 2: 扩展结果讨论句型
253
+ # "X improves Y" → "X significantly improves Y, which suggests..."
254
+ expansion_patterns = [
255
+ (r'\bimproves\b', 'significantly improves'), # +1 词
256
+ (r'\benhances\b', 'substantially enhances'), # +1 词
257
+ (r'\breduces\b', 'effectively reduces'), # +1 词
258
+ (r'\bincreases\b', 'consistently increases'), # +1 词
259
+ ]
260
+
261
+ for pattern, replacement in expansion_patterns:
262
+ if words_added >= max_words_to_add:
263
+ break
264
+ matches = list(re.finditer(pattern, modified))
265
+ for match in matches:
266
+ added_words = len(replacement.split()) - len(match.group(0).split())
267
+ if added_words > 0 and words_added + added_words <= max_words_to_add:
268
+ modified = modified[:match.start()] + replacement + modified[match.end():]
269
+ words_added += added_words
270
+ change_record["changes"].append({
271
+ "type": "adverb_added",
272
+ "original": match.group(0),
273
+ "expanded": replacement,
274
+ "words_added": added_words,
275
+ })
276
+
277
+ # 策略 3: 添加因果解释(高质量扩容)
278
+ # 在关键陈述后添加 "This result aligns with..." 或 "This finding suggests..."
279
+ # 查找句号后跟随大写字母的位置
280
+ sentence_endings = list(re.finditer(r'\.\\?\s*\\?([A-Z])', modified))
281
+ for match in sentence_endings:
282
+ if words_added >= max_words_to_add:
283
+ break
284
+ # 随机选择一个扩展短语(简化:总是添加相同的)
285
+ expansion_phrases = [
286
+ " This finding aligns with prior work.",
287
+ " This result demonstrates the effectiveness of our approach.",
288
+ " Notably, this improvement is consistent across all benchmarks.",
289
+ ]
290
+ # 选择第一个(实际应用中可根据上下文选择)
291
+ phrase = expansion_phrases[words_added % len(expansion_phrases)]
292
+ added_words = len(phrase.split())
293
+ if words_added + added_words <= max_words_to_add:
294
+ # 在句号后插入
295
+ insert_pos = match.end() - 1 # 句号位置
296
+ # 找到句号的实际位置(考虑 LaTeX 转义)
297
+ full_match_end = match.end()
298
+ modified = modified[:insert_pos] + phrase + modified[insert_pos:]
299
+ words_added += added_words
300
+ change_record["changes"].append({
301
+ "type": "causal_explanation",
302
+ "added": phrase.strip(),
303
+ "words_added": added_words,
304
+ })
305
+
306
+ change_record["words_added"] = words_added
307
+ if words_added > 0:
308
+ change_record["action"] = f"added {words_added} words via semantic expansion"
309
+
310
+ return prefix + modified + suffix, change_record
311
+
312
+
313
+ def semantic_intervention(
314
+ tex_content: str,
315
+ intervention_type: str = "auto",
316
+ target_section: Optional[str] = None,
317
+ page_deficit: Optional[int] = None, # 正数=需要扩充,负数=需要压缩
318
+ ) -> Tuple[str, Dict[str, Any]]:
319
+ """
320
+ 语义干预自动决策 - 根据页数偏差自动选择缩写或扩写。
321
+
322
+ Args:
323
+ tex_content: .tex 文件内容
324
+ intervention_type: "shorten" | "expand" | "auto"
325
+ target_section: 目标节名称
326
+ page_deficit: 页数偏差(正=缺页需扩充,负=超页需压缩)
327
+
328
+ Returns:
329
+ (modified_content, change_record)
330
+ """
331
+ change_record = {
332
+ "defect_id": "A-semantic-intervention",
333
+ "action": "none",
334
+ "intervention_type": intervention_type,
335
+ }
336
+
337
+ # 自动决策逻辑
338
+ if intervention_type == "auto":
339
+ if page_deficit is not None:
340
+ if page_deficit > 0:
341
+ intervention_type = "expand"
342
+ elif page_deficit < 0:
343
+ intervention_type = "shorten"
344
+ else:
345
+ change_record["note"] = "no page deficit, no intervention needed"
346
+ return tex_content, change_record
347
+ else:
348
+ # 默认不干预
349
+ change_record["note"] = "page_deficit required for auto mode"
350
+ return tex_content, change_record
351
+
352
+ if intervention_type == "shorten":
353
+ modified, shorten_record = minimalist_shorten(tex_content, target_section)
354
+ change_record.update(shorten_record)
355
+ elif intervention_type == "expand":
356
+ modified, expand_record = deep_expand(tex_content, target_section)
357
+ change_record.update(expand_record)
358
+ else:
359
+ change_record["note"] = f"unknown intervention_type: {intervention_type}"
360
+ return tex_content, change_record
361
+
362
+ return modified, change_record