paperfit-cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/.claude/commands/adjust-length.md +21 -0
  2. package/.claude/commands/check-visual.md +27 -0
  3. package/.claude/commands/fix-layout.md +31 -0
  4. package/.claude/commands/migrate-template.md +23 -0
  5. package/.claude/commands/repair-table.md +21 -0
  6. package/.claude/commands/show-status.md +32 -0
  7. package/.claude-plugin/README.md +77 -0
  8. package/.claude-plugin/marketplace.json +41 -0
  9. package/.claude-plugin/plugin.json +39 -0
  10. package/CLAUDE.md +266 -0
  11. package/CONTRIBUTING.md +131 -0
  12. package/LICENSE +21 -0
  13. package/README.md +164 -0
  14. package/agents/code-surgeon-agent.md +214 -0
  15. package/agents/layout-detective-agent.md +229 -0
  16. package/agents/orchestrator-agent.md +254 -0
  17. package/agents/quality-gatekeeper-agent.md +270 -0
  18. package/agents/rule-engine-agent.md +224 -0
  19. package/agents/semantic-polish-agent.md +250 -0
  20. package/bin/paperfit.js +176 -0
  21. package/config/agent_roles.yaml +56 -0
  22. package/config/layout_rules.yaml +54 -0
  23. package/config/templates.yaml +241 -0
  24. package/config/vto_taxonomy.yaml +489 -0
  25. package/config/writing_rules.yaml +64 -0
  26. package/install.sh +30 -0
  27. package/package.json +52 -0
  28. package/requirements.txt +5 -0
  29. package/scripts/benchmark_runner.py +629 -0
  30. package/scripts/compile.sh +244 -0
  31. package/scripts/config_validator.py +339 -0
  32. package/scripts/cv_detector.py +600 -0
  33. package/scripts/evidence_collector.py +167 -0
  34. package/scripts/float_fixers.py +861 -0
  35. package/scripts/inject_defects.py +549 -0
  36. package/scripts/install-claude-global.js +148 -0
  37. package/scripts/install.js +66 -0
  38. package/scripts/install.sh +106 -0
  39. package/scripts/overflow_fixers.py +656 -0
  40. package/scripts/package-for-opensource.sh +138 -0
  41. package/scripts/parse_log.py +260 -0
  42. package/scripts/postinstall.js +38 -0
  43. package/scripts/pre_tool_use.py +265 -0
  44. package/scripts/render_pages.py +244 -0
  45. package/scripts/session_logger.py +329 -0
  46. package/scripts/space_util_fixers.py +773 -0
  47. package/scripts/state_manager.py +352 -0
  48. package/scripts/test_commands.py +187 -0
  49. package/scripts/test_cv_detector.py +214 -0
  50. package/scripts/test_integration.py +290 -0
  51. package/skills/consistency-polisher/SKILL.md +337 -0
  52. package/skills/float-optimizer/SKILL.md +284 -0
  53. package/skills/latex_fixers/__init__.py +82 -0
  54. package/skills/latex_fixers/float_fixers.py +392 -0
  55. package/skills/latex_fixers/fullwidth_fixers.py +375 -0
  56. package/skills/latex_fixers/overflow_fixers.py +250 -0
  57. package/skills/latex_fixers/semantic_micro_tuning.py +362 -0
  58. package/skills/latex_fixers/space_util_fixers.py +389 -0
  59. package/skills/latex_fixers/utils.py +55 -0
  60. package/skills/overflow-repair/SKILL.md +304 -0
  61. package/skills/space-util-fixer/SKILL.md +307 -0
  62. package/skills/taxonomy-vto/SKILL.md +486 -0
  63. package/skills/template-migrator/SKILL.md +251 -0
  64. package/skills/visual-inspector/SKILL.md +217 -0
  65. package/skills/writing-polish/SKILL.md +289 -0
@@ -0,0 +1,392 @@
1
+ """
2
+ Float Optimizer Fixers - Category B 缺陷修复
3
+
4
+ 处理浮动体位置、尺寸、堆叠、跨页分裂等问题。
5
+ """
6
+
7
+ import re
8
+ from pathlib import Path
9
+ from typing import Any, Dict, List, Tuple
10
+
11
+ from .utils import add_package_to_preamble
12
+
13
+
14
+ def fix_float_placement(
15
+ tex_content: str,
16
+ float_label: str | None = None,
17
+ float_type: str = "figure",
18
+ ) -> Tuple[str, Dict[str, Any]]:
19
+ """
20
+ 修复浮动体远离引用的问题(B1 缺陷)。
21
+
22
+ 策略:
23
+ 1. 调整位置参数为 [htbp]
24
+ 2. 在引用点后插入\FloatBarrier
25
+ 3. 移动浮动体源码位置
26
+ """
27
+ change_record = {
28
+ "defect_id": "B1-float-placement",
29
+ "action": "none",
30
+ "object": float_label or "unknown",
31
+ }
32
+
33
+ # 策略 1: 查找浮动体环境并调整位置参数
34
+ float_pattern = rf'\\begin\{{{float_type}\}}\[([^\]]*)\]'
35
+
36
+ def fix_position_param(match):
37
+ current_param = match.group(1)
38
+ # 如果参数不是理想的 [htbp],则替换
39
+ if 'h' not in current_param.lower() or 't' not in current_param.lower():
40
+ change_record["action"] = f"changed position from [{current_param}] to [htbp]"
41
+ return f"\\begin{{{float_type}}}[htbp]"
42
+ return match.group(0)
43
+
44
+ modified = re.sub(float_pattern, fix_position_param, tex_content, count=1)
45
+
46
+ # 策略 2: 如果仍需要改进,在引用点后插入\FloatBarrier
47
+ if change_record["action"] == "none" and float_label:
48
+ # 查找引用该浮动体的\ref
49
+ ref_pattern = rf'\\ref\{{{float_label}\}}'
50
+ ref_match = re.search(ref_pattern, modified)
51
+ if ref_match:
52
+ # 在引用点后插入\FloatBarrier
53
+ insert_pos = ref_match.end()
54
+ # 检查是否已有\FloatBarrier
55
+ context = modified[insert_pos:insert_pos + 50]
56
+ if '\\FloatBarrier' not in context:
57
+ modified = modified[:insert_pos] + f"\\FloatBarrier%\n" + modified[insert_pos:]
58
+ change_record["action"] = f"added \\FloatBarrier after reference to {float_label}"
59
+ # 添加 placeins 宏包
60
+ modified = add_package_to_preamble(modified, "placeins")
61
+
62
+ return modified, change_record
63
+
64
+
65
+ def fix_float_width(
66
+ tex_content: str,
67
+ float_type: str = "figure",
68
+ is_two_column: bool = False,
69
+ ) -> Tuple[str, Dict[str, Any]]:
70
+ """
71
+ 修复浮动体大小不适配栏宽的问题(B2 缺陷)。
72
+
73
+ 策略:
74
+ 1. 图片宽度设为\linewidth 或\columnwidth
75
+ 2. 表格使用 tabularx
76
+ 3. 跨栏图表使用 figure*/table*
77
+ """
78
+ change_record = {
79
+ "defect_id": "B2-float-width",
80
+ "action": "none",
81
+ }
82
+
83
+ if float_type == "figure":
84
+ # 查找\includegraphics 并调整宽度
85
+ includegraphics_pattern = r'\\includegraphics\[([^\]]*)\]\{([^}]+)\}'
86
+
87
+ def fix_graphicx_width(match):
88
+ options = match.group(1)
89
+ filename = match.group(2)
90
+
91
+ # 解析当前选项
92
+ option_pairs = options.split(',')
93
+ width_found = False
94
+ new_options = []
95
+ for opt in option_pairs:
96
+ opt = opt.strip()
97
+ if 'width' in opt:
98
+ width_found = True
99
+ # 替换为\linewidth
100
+ new_options.append(f"width=\\linewidth")
101
+ else:
102
+ new_options.append(opt)
103
+
104
+ if not width_found:
105
+ new_options.append("width=\\linewidth")
106
+ change_record["action"] = f"added width=\\linewidth to {filename}"
107
+ else:
108
+ change_record["action"] = f"normalized width to \\linewidth for {filename}"
109
+
110
+ return f"\\includegraphics[{','.join(new_options)}]{{{filename}}}"
111
+
112
+ modified = re.sub(includegraphics_pattern, fix_graphicx_width, tex_content, count=1)
113
+
114
+ elif float_type == "table":
115
+ # 表格宽度修复委托给 overflow_fixers
116
+ return fix_table_width(tex_content)
117
+
118
+ return modified, change_record
119
+
120
+
121
+ def fix_float_fullwidth(
122
+ tex_content: str,
123
+ float_type: str = "table",
124
+ float_label: str | None = None,
125
+ template_layout: str = "two-column",
126
+ ) -> Tuple[str, Dict[str, Any]]:
127
+ """
128
+ 修复浮动体为满页/满栏格式(用户首选格式)。
129
+
130
+ 策略:
131
+ 1. 双栏模板:使用 table*/figure* 跨双栏
132
+ 2. 单栏模板:使用 tabularx 占满\textwidth
133
+ 3. 避免使用 \resizebox 暴力缩放
134
+
135
+ Args:
136
+ tex_content: .tex 文件内容
137
+ float_type: figure 或 table
138
+ float_label: 浮动体 label(用于定位)
139
+ template_layout: 模板类型 ("two-column" | "single-column")
140
+
141
+ Returns:
142
+ (modified_content, change_record)
143
+ """
144
+ change_record = {
145
+ "defect_id": "B2-fullwidth",
146
+ "action": "none",
147
+ "object": float_label or "unknown",
148
+ "layout": template_layout,
149
+ }
150
+
151
+ if template_layout == "two-column":
152
+ # 双栏模板:将 table/figure 转换为 table*/figure*
153
+ float_pattern = rf'\\begin\{{{float_type}\}}'
154
+ star_float_pattern = rf'\\begin\{{{float_type}\*\}}'
155
+
156
+ # 检查是否已经是 starred 版本
157
+ if re.search(star_float_pattern, tex_content):
158
+ change_record["action"] = f"already using {float_type}* environment"
159
+ return tex_content, change_record
160
+
161
+ # 转换为 starred 版本(跨双栏)
162
+ modified = tex_content.replace(
163
+ f'\\begin{{{float_type}}}',
164
+ f'\\begin{{{float_type}*}}',
165
+ 1
166
+ )
167
+ modified = modified.replace(
168
+ f'\\end{{{float_type}}}',
169
+ f'\\end{{{float_type}*}}',
170
+ 1
171
+ )
172
+
173
+ if modified != tex_content:
174
+ change_record["action"] = f"converted {float_type} to {float_type}* for full-column width"
175
+ change_record["note"] = "table*/figure* 将跨双栏显示,通常放置在页面顶部或底部"
176
+ return modified, change_record
177
+
178
+ elif template_layout == "single-column":
179
+ # 单栏模板:确保表格使用 tabularx 占满\textwidth
180
+ if float_type == "table":
181
+ return fix_table_fullwidth_single(tex_content)
182
+ else:
183
+ # Figure 在单栏模板中只需设置 width=\textwidth
184
+ includegraphics_pattern = r'\\includegraphics\[([^\]]*)\]\{([^}]+)\}'
185
+
186
+ def fix_width_to_textwidth(match):
187
+ options = match.group(1)
188
+ filename = match.group(2)
189
+ option_pairs = options.split(',')
190
+ new_options = []
191
+ width_found = False
192
+
193
+ for opt in option_pairs:
194
+ opt = opt.strip()
195
+ if 'width' in opt:
196
+ width_found = True
197
+ new_options.append('width=\\textwidth')
198
+ else:
199
+ new_options.append(opt)
200
+
201
+ if not width_found:
202
+ new_options.append('width=\\textwidth')
203
+
204
+ change_record["action"] = f"set {filename} width to \\textwidth"
205
+ return f"\\includegraphics[{','.join(new_options)}]{{{filename}}}"
206
+
207
+ modified = re.sub(includegraphics_pattern, fix_width_to_textwidth, tex_content, count=1)
208
+ return modified, change_record
209
+
210
+ return tex_content, change_record
211
+
212
+
213
+ def fix_table_fullwidth_single(tex_content: str) -> Tuple[str, Dict[str, Any]]:
214
+ """
215
+ 单栏模板中表格满页宽度的修复。
216
+
217
+ 策略:
218
+ 1. 将 tabular 转换为 tabularx
219
+ 2. 宽度设为\textwidth
220
+ 3. 优先使用 X 列类型,而非\resizebox
221
+ """
222
+ change_record = {
223
+ "defect_id": "B2-table-fullwidth-single",
224
+ "action": "none",
225
+ }
226
+
227
+ # 检查是否已使用 tabularx
228
+ if '\\begin{tabularx}' in tex_content:
229
+ change_record["action"] = "already using tabularx"
230
+ return tex_content, change_record
231
+
232
+ # 检查是否在使用\resizebox(需要移除)
233
+ resizebox_pattern = r'\\resizebox\{\\textwidth\}\{[^\}]*\}\{\\begin\{tabular\}'
234
+ if re.search(resizebox_pattern, tex_content):
235
+ # 移除\resizebox,直接使用 tabular
236
+ modified = tex_content.replace(
237
+ '\\resizebox{\\textwidth}{!}{\\begin{tabular}',
238
+ '\\begin{tabularx}{\\textwidth}'
239
+ )
240
+ modified = modified.replace(
241
+ '\\end{tabular}}',
242
+ '\\end{tabularx}'
243
+ )
244
+ change_record["action"] = "replaced \\resizebox with tabularx"
245
+ modified = add_package_to_preamble(modified, "tabularx")
246
+ return modified, change_record
247
+
248
+ # 普通 tabular 转换
249
+ tabular_pattern = r'\\begin\{tabular\}(\{[^}]*\})'
250
+
251
+ def convert_to_tabularx(match):
252
+ col_spec = match.group(1)
253
+ cols = col_spec.strip('{}').split('|')
254
+ new_cols = []
255
+
256
+ # 将 l/c/r 列转换为 X 列(第一列保留原样)
257
+ for i, col in enumerate(cols):
258
+ col_clean = col.strip()
259
+ if i == 0:
260
+ new_cols.append(col_clean) # 保留第一列
261
+ elif col_clean in ['l', 'c', 'r']:
262
+ new_cols.append('X') # 其他列转换为 X
263
+ else:
264
+ new_cols.append(col_clean)
265
+
266
+ new_spec = '{' + '|'.join(new_cols) + '}'
267
+ change_record["action"] = f"converted tabular to tabularx with spec {new_spec}"
268
+ return f"\\begin{{tabularx}}{{\\textwidth}}{new_spec}"
269
+
270
+ modified = re.sub(tabular_pattern, convert_to_tabularx, tex_content, count=1)
271
+
272
+ if modified != tex_content:
273
+ modified = add_package_to_preamble(modified, "tabularx")
274
+ change_record["packages_added"] = ["tabularx"]
275
+
276
+ return modified, change_record
277
+
278
+
279
+ def fix_table_width(tex_content: str) -> Tuple[str, Dict[str, Any]]:
280
+ """修复表格宽度"""
281
+ change_record = {
282
+ "defect_id": "B2-table-width",
283
+ "action": "none",
284
+ }
285
+
286
+ # 查找 tabular 环境
287
+ table_pattern = r'\\begin\{tabular\}(\{[^}]*\})'
288
+
289
+ def replace_with_tabularx(match):
290
+ col_spec = match.group(1)
291
+ change_record["action"] = f"converted tabular to tabularx with \\linewidth"
292
+ return f"\\begin{{tabularx}}{{\\linewidth}}{col_spec}"
293
+
294
+ modified = re.sub(table_pattern, replace_with_tabularx, tex_content, count=1)
295
+
296
+ if modified != tex_content:
297
+ modified = add_package_to_preamble(modified, "tabularx")
298
+ change_record["packages_added"] = ["tabularx"]
299
+
300
+ return modified, change_record
301
+
302
+
303
+ def fix_float_clustering(
304
+ tex_content: str,
305
+ cluster_start_line: int | None = None,
306
+ cluster_count: int = 3,
307
+ ) -> Tuple[str, Dict[str, Any]]:
308
+ """
309
+ 修复浮动体连续堆叠问题(B3 缺陷)。
310
+
311
+ 策略:
312
+ 1. 分散浮动体位置参数
313
+ 2. 在浮动体之间插入正文
314
+ 3. 使用\FloatBarrier 控制
315
+ """
316
+ change_record = {
317
+ "defect_id": "B3-float-clustering",
318
+ "action": "none",
319
+ "cluster_count": cluster_count,
320
+ }
321
+
322
+ # 查找连续的浮动体环境
323
+ float_positions = []
324
+ positions_param_pattern = r'\\begin\{(figure|table)\}(\[([^\]]*)\])?'
325
+
326
+ for match in re.finditer(positions_param_pattern, tex_content):
327
+ float_positions.append({
328
+ "start": match.start(),
329
+ "end": match.end(),
330
+ "type": match.group(1),
331
+ "param": match.group(3) if match.group(3) else "",
332
+ })
333
+
334
+ if len(float_positions) >= cluster_count:
335
+ # 对连续的浮动体应用不同的位置参数
336
+ position_prefs = ['[t]', '[b]', '[p]']
337
+ for i, fp in enumerate(float_positions[:cluster_count]):
338
+ pref = position_prefs[i % len(position_prefs)]
339
+ # 替换位置参数
340
+ old_pattern = rf'\\begin\{{{fp["type"]}\}}(\[{re.escape(fp["param"])}\])?'
341
+ new_replace = f"\\begin{{{fp['type']}}}{pref}"
342
+
343
+ # 只替换一次
344
+ tex_content = re.sub(old_pattern, new_replace, tex_content, count=1)
345
+
346
+ change_record["action"] = f"dispersed {cluster_count} floats with different position preferences"
347
+
348
+ return tex_content, change_record
349
+
350
+
351
+ def fix_split_float(
352
+ tex_content: str,
353
+ float_type: str = "table",
354
+ float_label: str | None = None,
355
+ ) -> Tuple[str, Dict[str, Any]]:
356
+ """
357
+ 修复浮动体跨页分裂问题(B4 缺陷)。
358
+
359
+ 策略:
360
+ 1. 长表格使用 longtable 环境
361
+ 2. 强制表格不跨页
362
+ 3. 拆分过大的图片组
363
+ """
364
+ change_record = {
365
+ "defect_id": "B4-split-float",
366
+ "action": "none",
367
+ "object": float_label or "unknown",
368
+ }
369
+
370
+ if float_type == "table":
371
+ # 策略 1: 将普通表格转换为 longtable
372
+ table_pattern = r'\\begin\{table\}[^\\]*\\begin\{tabular\}'
373
+
374
+ if re.search(table_pattern, tex_content):
375
+ # 转换为 longtable
376
+ modified = tex_content.replace(
377
+ '\\begin{table}',
378
+ '\\usepackage{longtable}\n\\begin{longtable}'
379
+ )
380
+ modified = modified.replace('\\end{table}', '\\end{longtable}')
381
+ change_record["action"] = "converted table to longtable"
382
+
383
+ # 策略 2: 添加表头重复配置
384
+ if 'longtable' in change_record.get("action", ""):
385
+ change_record["note"] = "请手动添加\\endfirsthead 和\\endhead 配置"
386
+
387
+ elif float_type == "figure":
388
+ # 拆分过大的 figure 环境(需要人工判断拆分点)
389
+ change_record["action"] = "manual_review_required"
390
+ change_record["note"] = "请检查 figure 环境中的子图,考虑拆分为多个独立环境"
391
+
392
+ return tex_content, change_record