paperfit-cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/.claude/commands/adjust-length.md +21 -0
  2. package/.claude/commands/check-visual.md +27 -0
  3. package/.claude/commands/fix-layout.md +31 -0
  4. package/.claude/commands/migrate-template.md +23 -0
  5. package/.claude/commands/repair-table.md +21 -0
  6. package/.claude/commands/show-status.md +32 -0
  7. package/.claude-plugin/README.md +77 -0
  8. package/.claude-plugin/marketplace.json +41 -0
  9. package/.claude-plugin/plugin.json +39 -0
  10. package/CLAUDE.md +266 -0
  11. package/CONTRIBUTING.md +131 -0
  12. package/LICENSE +21 -0
  13. package/README.md +164 -0
  14. package/agents/code-surgeon-agent.md +214 -0
  15. package/agents/layout-detective-agent.md +229 -0
  16. package/agents/orchestrator-agent.md +254 -0
  17. package/agents/quality-gatekeeper-agent.md +270 -0
  18. package/agents/rule-engine-agent.md +224 -0
  19. package/agents/semantic-polish-agent.md +250 -0
  20. package/bin/paperfit.js +176 -0
  21. package/config/agent_roles.yaml +56 -0
  22. package/config/layout_rules.yaml +54 -0
  23. package/config/templates.yaml +241 -0
  24. package/config/vto_taxonomy.yaml +489 -0
  25. package/config/writing_rules.yaml +64 -0
  26. package/install.sh +30 -0
  27. package/package.json +52 -0
  28. package/requirements.txt +5 -0
  29. package/scripts/benchmark_runner.py +629 -0
  30. package/scripts/compile.sh +244 -0
  31. package/scripts/config_validator.py +339 -0
  32. package/scripts/cv_detector.py +600 -0
  33. package/scripts/evidence_collector.py +167 -0
  34. package/scripts/float_fixers.py +861 -0
  35. package/scripts/inject_defects.py +549 -0
  36. package/scripts/install-claude-global.js +148 -0
  37. package/scripts/install.js +66 -0
  38. package/scripts/install.sh +106 -0
  39. package/scripts/overflow_fixers.py +656 -0
  40. package/scripts/package-for-opensource.sh +138 -0
  41. package/scripts/parse_log.py +260 -0
  42. package/scripts/postinstall.js +38 -0
  43. package/scripts/pre_tool_use.py +265 -0
  44. package/scripts/render_pages.py +244 -0
  45. package/scripts/session_logger.py +329 -0
  46. package/scripts/space_util_fixers.py +773 -0
  47. package/scripts/state_manager.py +352 -0
  48. package/scripts/test_commands.py +187 -0
  49. package/scripts/test_cv_detector.py +214 -0
  50. package/scripts/test_integration.py +290 -0
  51. package/skills/consistency-polisher/SKILL.md +337 -0
  52. package/skills/float-optimizer/SKILL.md +284 -0
  53. package/skills/latex_fixers/__init__.py +82 -0
  54. package/skills/latex_fixers/float_fixers.py +392 -0
  55. package/skills/latex_fixers/fullwidth_fixers.py +375 -0
  56. package/skills/latex_fixers/overflow_fixers.py +250 -0
  57. package/skills/latex_fixers/semantic_micro_tuning.py +362 -0
  58. package/skills/latex_fixers/space_util_fixers.py +389 -0
  59. package/skills/latex_fixers/utils.py +55 -0
  60. package/skills/overflow-repair/SKILL.md +304 -0
  61. package/skills/space-util-fixer/SKILL.md +307 -0
  62. package/skills/taxonomy-vto/SKILL.md +486 -0
  63. package/skills/template-migrator/SKILL.md +251 -0
  64. package/skills/visual-inspector/SKILL.md +217 -0
  65. package/skills/writing-polish/SKILL.md +289 -0
@@ -0,0 +1,656 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Overflow Fixers Module
4
+
5
+ 处理 Category D:溢出与对齐缺陷
6
+ - D1: Overfull hbox(段落文本、表格单元格、公式溢出栏宽)
7
+ - D2: 长公式未合理断行
8
+ - D3: URL/长标识符溢出
9
+
10
+ 该模块被 code-surgeon-agent 调用,执行对 .tex 源码的精确修改。
11
+ 所有修复遵循最小修改原则,不改变学术内容。
12
+ """
13
+
14
+ import re
15
+ from pathlib import Path
16
+ from dataclasses import dataclass, field
17
+ from typing import List, Dict, Optional, Tuple, Any
18
+
19
+
20
+ # ============================================================
21
+ # 数据结构定义
22
+ # ============================================================
23
+
24
+ @dataclass
25
+ class FixResult:
26
+ """修复结果"""
27
+ defect_id: str
28
+ object_name: str
29
+ action: str
30
+ before: str
31
+ after: str
32
+ page: int = 0
33
+ line_number: Optional[int] = None
34
+ success: bool = False
35
+
36
+
37
+ @dataclass
38
+ class OverflowFixReport:
39
+ """修复报告"""
40
+ status: str # success | partial | failed
41
+ modified_files: List[str] = field(default_factory=list)
42
+ changes: List[FixResult] = field(default_factory=list)
43
+ unresolved: List[str] = field(default_factory=list)
44
+
45
+ def to_dict(self) -> Dict[str, Any]:
46
+ return {
47
+ "skill": "overflow-repair",
48
+ "status": self.status,
49
+ "modified_files": self.modified_files,
50
+ "changes": [
51
+ {
52
+ "defect_id": c.defect_id,
53
+ "object": c.object_name,
54
+ "action": c.action,
55
+ "before": c.before,
56
+ "after": c.after,
57
+ "page": c.page,
58
+ "line_number": c.line_number,
59
+ "success": c.success,
60
+ }
61
+ for c in self.changes
62
+ ],
63
+ "unresolved": self.unresolved,
64
+ }
65
+
66
+
67
+ # ============================================================
68
+ # D1: 段落文本溢出修复
69
+ # ============================================================
70
+
71
+ def fix_paragraph_overflow(
72
+ tex_content: str,
73
+ overfull_line: str,
74
+ overflow_amount: float,
75
+ ) -> Tuple[str, Optional[FixResult]]:
76
+ """
77
+ 修复段落文本溢出
78
+
79
+ 策略优先级:
80
+ 1. 引入断词点 (\-)
81
+ 2. 调整段落级容差 (\emergencystretch)
82
+ 3. 返回未解决 (需语义改写)
83
+
84
+ Args:
85
+ tex_content: .tex 文件内容
86
+ overfull_line: 溢出的文本行
87
+ overflow_amount: 溢出量 (pt)
88
+
89
+ Returns:
90
+ (修改后的内容,修复结果)
91
+ """
92
+ # 策略 1: 为长单词添加断词点
93
+ # 查找长度 > 10 的单词 (可能是复合词或长学术术语)
94
+ long_words = re.findall(r'\b[a-zA-Z]{10,}\b', overfull_line)
95
+
96
+ if long_words:
97
+ # 为最长的单词添加断词点
98
+ longest_word = max(long_words, key=len)
99
+ if len(longest_word) >= 12:
100
+ # 在音节边界处添加断词点 (简化:每 4-5 个字母)
101
+ hyphenated = add_hyphenation_points(longest_word)
102
+ modified_content = tex_content.replace(longest_word, hyphenated, 1)
103
+ return modified_content, FixResult(
104
+ defect_id="D1",
105
+ object_name=f"段落文本",
106
+ action=f"为长单词 '{longest_word}' 添加断词点",
107
+ before=longest_word,
108
+ after=hyphenated,
109
+ success=True,
110
+ )
111
+
112
+ # 策略 2: 为段落添加\emergencystretch
113
+ # 查找包含溢出行的段落
114
+ paragraph_match = re.search(
115
+ r'((?:^[^\n]*\n)*?)' # 段落前的内容
116
+ r'([^\n]*' + re.escape(overfull_line[:50]) + r'[^\n]*(?:\n[^\n]*)*?)' # 包含溢出行的段落
117
+ r'((?:\n[^\n]*)*?\n\n|\Z)', # 段落到段落结束
118
+ tex_content,
119
+ re.MULTILINE
120
+ )
121
+
122
+ if paragraph_match:
123
+ paragraph = paragraph_match.group(0)
124
+ # 用花括号包裹并添加\emergencystretch
125
+ wrapped = f"{{\\emergencystretch=1em {paragraph.strip()}}}"
126
+ modified_content = tex_content.replace(paragraph, wrapped, 1)
127
+ return modified_content, FixResult(
128
+ defect_id="D1",
129
+ object_name="段落文本",
130
+ action=f"添加\\emergencystretch=1em 以允许额外拉伸",
131
+ before=overfull_line[:80] + "..." if len(overfull_line) > 80 else overfull_line,
132
+ after=f"{{\\emergencystretch=1em ...}}",
133
+ success=True,
134
+ )
135
+
136
+ # 策略 3: 无法自动修复,需要语义改写
137
+ return tex_content, None
138
+
139
+
140
+ def add_hyphenation_points(word: str) -> str:
141
+ """
142
+ 为长单词添加断词点
143
+
144
+ 简化实现:在元音 - 辅音边界处添加
145
+ """
146
+ if len(word) < 12:
147
+ return word
148
+
149
+ # 简化:每 4-5 个字母添加一个断词点 (在实际应用中应使用更精确的音节划分)
150
+ vowels = "aeiouAEIOU"
151
+ result = []
152
+ i = 0
153
+ while i < len(word):
154
+ result.append(word[i])
155
+ # 在元音后检查是否可以断词
156
+ if word[i] in vowels and i > 3 and i < len(word) - 3:
157
+ # 检查下一个字母是否是辅音
158
+ if i + 1 < len(word) and word[i + 1] not in vowels:
159
+ result.append(r'\-')
160
+ i += 1
161
+
162
+ return ''.join(result)
163
+
164
+
165
+ # ============================================================
166
+ # D1: 表格单元格溢出修复
167
+ # ============================================================
168
+
169
+ def fix_table_overflow(
170
+ tex_content: str,
171
+ table_label: Optional[str] = None,
172
+ line_number: Optional[int] = None,
173
+ ) -> Tuple[str, Optional[FixResult]]:
174
+ """
175
+ 修复表格单元格溢出
176
+
177
+ 策略优先级:
178
+ 1. 改用 tabularx 环境
179
+ 2. 手动设置列宽 (p{宽度})
180
+ 3. 精简表头
181
+ 4. 调整字号
182
+
183
+ Args:
184
+ tex_content: .tex 文件内容
185
+ table_label: 表格标签 (如 "tab:results")
186
+ line_number: 溢出行号
187
+
188
+ Returns:
189
+ (修改后的内容,修复结果)
190
+ """
191
+ # 定位表格环境
192
+ if table_label:
193
+ # 通过标签定位表格
194
+ pattern = r'(\\begin\{(?:table|table\*|sidewaystable)\}(?:\[[htbp]+\])?.*?)(\\begin\{tabular\})(\{[^}]+\})(.*?)(\\end\{tabular\})(.*?\\end\{(?:table|table\*|sidewaystable)\})'
195
+ else:
196
+ # 查找最近的 tabular 环境
197
+ pattern = r'(\\begin\{(?:table|table\*|sidewaystable)\}(?:\[[htbp]+\])?.*?)(\\begin\{tabular\})(\{[^}]+\})(.*?)(\\end\{tabular\})(.*?\\end\{(?:table|table\*|sidewaystable)\})'
198
+
199
+ matches = list(re.finditer(pattern, tex_content, re.DOTALL))
200
+
201
+ if not matches:
202
+ return tex_content, None
203
+
204
+ # 如果有 line_number,找到最接近的表格
205
+ target_match = matches[0]
206
+ if line_number:
207
+ for match in matches:
208
+ if match.start() <= line_number <= match.end():
209
+ target_match = match
210
+ break
211
+
212
+ full_table = target_match.group(0)
213
+ tabular_start = target_match.group(2)
214
+ column_spec = target_match.group(3)
215
+ table_body = target_match.group(4)
216
+ tabular_end = target_match.group(5)
217
+
218
+ # 策略 1: 改用 tabularx
219
+ # 检查是否有文本列 (l, r, c) 可以改为 X 列
220
+ if 'l' in column_spec or 'r' in column_spec or 'c' in column_spec:
221
+ # 将 tabular 改为 tabularx,将部分列改为 X 列
222
+ new_column_spec = convert_to_tabularx_columns(column_spec)
223
+ new_table = full_table.replace(
224
+ f"\\begin{{tabular}}{column_spec}",
225
+ f"\\begin{{tabularx}}{{\\linewidth}}{new_column_spec}"
226
+ ).replace(
227
+ f"\\end{{tabular}}",
228
+ f"\\end{{tabularx}}"
229
+ )
230
+ modified_content = tex_content.replace(full_table, new_table, 1)
231
+ return modified_content, FixResult(
232
+ defect_id="D1",
233
+ object_name=table_label or "表格",
234
+ action=f"将 tabular 改为 tabularx,列规格从 {column_spec} 改为 {new_column_spec}",
235
+ before=f"\\begin{{tabular}}{column_spec}",
236
+ after=f"\\begin{{tabularx}}{{\\linewidth}}{new_column_spec}",
237
+ line_number=line_number,
238
+ success=True,
239
+ )
240
+
241
+ # 策略 2: 添加\small 字号
242
+ if '\\small' not in full_table and '\\footnotesize' not in full_table:
243
+ # 在表格环境内添加\small
244
+ table_env_match = re.search(
245
+ r'\\begin\{(table[^}]*)\}(\[[htbp]+\])?',
246
+ full_table
247
+ )
248
+ if table_env_match:
249
+ insert_pos = target_match.end(0)
250
+ before = tex_content[:insert_pos]
251
+ after = tex_content[insert_pos:]
252
+ new_content = before + "\n\\small" + after
253
+ return new_content, FixResult(
254
+ defect_id="D1",
255
+ object_name=table_label or "表格",
256
+ action="添加\\small 字号以压缩表格",
257
+ before=full_table[:100] + "...",
258
+ after=full_table[:100] + "...\n\\small",
259
+ line_number=line_number,
260
+ success=True,
261
+ )
262
+
263
+ return tex_content, None
264
+
265
+
266
+ def convert_to_tabularx_columns(column_spec: str) -> str:
267
+ """
268
+ 将 tabular 列规格转换为 tabularx 列规格
269
+
270
+ 策略:将最宽的文本列改为 X 列
271
+ """
272
+ # 移除两侧的 { }
273
+ spec = column_spec.strip('{}')
274
+
275
+ # 统计列类型
276
+ text_columns = [] # l, r, c 列的位置
277
+ for i, c in enumerate(spec):
278
+ if c in 'lrc':
279
+ text_columns.append((i, c))
280
+
281
+ if not text_columns:
282
+ return column_spec
283
+
284
+ # 将最后一个文本列改为 X 列 (通常是描述性列)
285
+ last_text_idx, last_type = text_columns[-1]
286
+ new_spec = spec[:last_text_idx] + 'X' + spec[last_text_idx + 1:]
287
+
288
+ return '{' + new_spec + '}'
289
+
290
+
291
+ # ============================================================
292
+ # D2: 长公式溢出修复
293
+ # ============================================================
294
+
295
+ def fix_equation_overflow(
296
+ tex_content: str,
297
+ equation_label: Optional[str] = None,
298
+ line_number: Optional[int] = None,
299
+ ) -> Tuple[str, Optional[FixResult]]:
300
+ """
301
+ 修复长公式溢出
302
+
303
+ 策略优先级:
304
+ 1. equation → multline
305
+ 2. equation → align/split
306
+ 3. 引入中间变量简化
307
+
308
+ Args:
309
+ tex_content: .tex 文件内容
310
+ equation_label: 公式标签
311
+ line_number: 溢出行号
312
+
313
+ Returns:
314
+ (修改后的内容,修复结果)
315
+ """
316
+ # 策略 1: 将 equation 改为 multline
317
+ if equation_label:
318
+ pattern = r'(\\begin\{equation\})(.*?\\label\{' + re.escape(equation_label) + r'\}.*?)(\\end\{equation\})'
319
+ else:
320
+ pattern = r'(\\begin\{equation\})(.*?)(\\end\{equation\})'
321
+
322
+ match = re.search(pattern, tex_content, re.DOTALL)
323
+ if not match:
324
+ return tex_content, None
325
+
326
+ equation_start = match.group(1)
327
+ equation_body = match.group(2)
328
+ equation_end = match.group(3)
329
+
330
+ # 检查公式是否包含等号 (适合在等号处断行)
331
+ if '=' in equation_body:
332
+ # 策略:改为 split 环境,在等号处对齐
333
+ # 找到第一个等号位置
334
+ eq_pos = equation_body.find('=')
335
+ if eq_pos > 0:
336
+ before_eq = equation_body[:eq_pos].strip()
337
+ after_eq = equation_body[eq_pos:].strip()
338
+
339
+ # 在 + 或 - 处断行 (如果有)
340
+ if '+' in after_eq or '-' in after_eq:
341
+ # 找到合适的断行点
342
+ break_pos = max(after_eq.rfind('+'), after_eq.rfind('-'))
343
+ if break_pos > 0:
344
+ line1 = after_eq[:break_pos].strip()
345
+ line2 = after_eq[break_pos:].strip()
346
+ new_body = f"{before_eq} = {line1} \\\\\n &{line2}"
347
+
348
+ new_equation = f"\\begin{{split}}\n{new_body}\n\\end{{split}}"
349
+ modified_content = tex_content.replace(
350
+ f"{equation_start}{equation_body}{equation_end}",
351
+ new_equation,
352
+ 1
353
+ )
354
+ return modified_content, FixResult(
355
+ defect_id="D2",
356
+ object_name=equation_label or "公式",
357
+ action="将 equation 改为 split 环境,在等号处对齐并在运算符处断行",
358
+ before=f"\\begin{{equation}}...\\end{{equation}}",
359
+ after=f"\\begin{{split}}...\\end{{split}}",
360
+ line_number=line_number,
361
+ success=True,
362
+ )
363
+
364
+ # 策略 2: 改为 multline (首行左对齐,末行右对齐)
365
+ new_equation = f"\\begin{{multline}}\n{equation_body.strip()}\n\\end{{multline}}"
366
+ modified_content = tex_content.replace(
367
+ f"{equation_start}{equation_body}{equation_end}",
368
+ new_equation,
369
+ 1
370
+ )
371
+ return modified_content, FixResult(
372
+ defect_id="D2",
373
+ object_name=equation_label or "公式",
374
+ action="将 equation 改为 multline 环境",
375
+ before=f"\\begin{{equation}}...\\end{{equation}}",
376
+ after=f"\\begin{{multline}}...\\end{{multline}}",
377
+ line_number=line_number,
378
+ success=True,
379
+ )
380
+
381
+
382
+ # ============================================================
383
+ # D3: URL 溢出修复
384
+ # ============================================================
385
+
386
+ def fix_url_overflow(
387
+ tex_content: str,
388
+ url: Optional[str] = None,
389
+ ) -> Tuple[str, Optional[FixResult]]:
390
+ """
391
+ 修复 URL 溢出
392
+
393
+ 策略优先级:
394
+ 1. 使用\url 命令
395
+ 2. 启用参考文献断行
396
+ 3. 手动添加断行点
397
+
398
+ Args:
399
+ tex_content: .tex 文件内容
400
+ url: 溢出的 URL
401
+
402
+ Returns:
403
+ (修改后的内容,修复结果)
404
+ """
405
+ if url:
406
+ # 策略 1: 将裸 URL 改为\url 命令
407
+ # 查找裸 URL (以 http://或 https://开头)
408
+ url_pattern = r'(?<!\\)(https?://' + re.escape(url.replace('https://', '').replace('http://', '')) + r')'
409
+ match = re.search(url_pattern, tex_content)
410
+ if match:
411
+ bare_url = match.group(1)
412
+ modified_content = tex_content.replace(bare_url, f"\\url{{{bare_url}}}", 1)
413
+ return modified_content, FixResult(
414
+ defect_id="D3",
415
+ object_name="URL",
416
+ action=f"将裸 URL 改为\\url 命令",
417
+ before=bare_url[:50] + "..." if len(bare_url) > 50 else bare_url,
418
+ after=f"\\url{{{bare_url[:50]}...}}" if len(bare_url) > 50 else f"\\url{{{bare_url}}}",
419
+ success=True,
420
+ )
421
+
422
+ # 策略 2: 检查导言区是否有 url 宏包和断行设置
423
+ # 若没有,添加断行设置
424
+ if '\\usepackage{url}' not in tex_content and '\\usepackage{hyperref}' not in tex_content:
425
+ # 在\begin{document} 前添加宏包
426
+ match = re.search(r'\\begin\{document\}', tex_content)
427
+ if match:
428
+ insert_pos = match.start()
429
+ before = tex_content[:insert_pos]
430
+ after = tex_content[insert_pos:]
431
+ new_content = before + "\\usepackage{url}\n\\def\\UrlBreaks{\\do\\/\\do-}\n" + after
432
+ return new_content, FixResult(
433
+ defect_id="D3",
434
+ object_name="URL 断行设置",
435
+ action="添加 url 宏包和断行设置",
436
+ before="\\begin{document}",
437
+ after="\\usepackage{url}\n\\def\\UrlBreaks{\\do\\/\\do-}\n\\begin{document}",
438
+ success=True,
439
+ )
440
+
441
+ return tex_content, None
442
+
443
+
444
+ def fix_bibliography_url_breaking(
445
+ tex_content: str,
446
+ ) -> Tuple[str, Optional[FixResult]]:
447
+ """
448
+ 为参考文献添加 URL 断行设置
449
+
450
+ 针对 biblatex 或 natbib 的 URL 断行配置
451
+ """
452
+ # 检查是否使用 biblatex
453
+ if '\\usepackage{biblatex}' in tex_content or '\\bibliographystyle{biblatex}' in tex_content:
454
+ # 添加 biblatex 的 URL 断行计数器
455
+ if '\\setcounter{biburlnumpenalty}' not in tex_content:
456
+ match = re.search(r'\\begin\{document\}', tex_content)
457
+ if match:
458
+ insert_pos = match.start()
459
+ before = tex_content[:insert_pos]
460
+ after = tex_content[insert_pos:]
461
+ additions = (
462
+ "\\setcounter{biburlnumpenalty}{100}\n"
463
+ "\\setcounter{biburlucpenalty}{100}\n"
464
+ "\\setcounter{biburllcpenalty}{100}\n"
465
+ )
466
+ new_content = before + additions + after
467
+ return new_content, FixResult(
468
+ defect_id="D3",
469
+ object_name="参考文献 URL 断行",
470
+ action="添加 biblatex URL 断行计数器",
471
+ before="\\begin{document}",
472
+ after=additions + "\\begin{document}",
473
+ success=True,
474
+ )
475
+
476
+ return tex_content, None
477
+
478
+
479
+ # ============================================================
480
+ # 主修复函数
481
+ # ============================================================
482
+
483
+ def fix_overflow_defects(
484
+ tex_file_path: str,
485
+ defects: List[Dict[str, Any]],
486
+ ) -> OverflowFixReport:
487
+ """
488
+ 修复所有 Category D 缺陷
489
+
490
+ Args:
491
+ tex_file_path: .tex 文件路径
492
+ defects: 缺陷列表,每个缺陷包含:
493
+ - defect_id: D1, D2, D3
494
+ - page: 页码
495
+ - line_number: 行号 (可选)
496
+ - object: 对象名称 (如表格标签、公式标签)
497
+ - description: 描述
498
+ - overflow_amount: 溢出量 (D1 可选)
499
+
500
+ Returns:
501
+ OverflowFixReport: 修复报告
502
+ """
503
+ tex_path = Path(tex_file_path)
504
+ if not tex_path.exists():
505
+ return OverflowFixReport(
506
+ status="failed",
507
+ unresolved=[f"文件不存在:{tex_file_path}"]
508
+ )
509
+
510
+ try:
511
+ tex_content = tex_path.read_text(encoding='utf-8')
512
+ except (OSError, UnicodeDecodeError) as e:
513
+ return OverflowFixReport(
514
+ status="failed",
515
+ unresolved=[f"无法读取文件 {tex_file_path}: {e}"]
516
+ )
517
+
518
+ modified_files = set()
519
+ changes = []
520
+ unresolved = []
521
+
522
+ for defect in defects:
523
+ defect_id = defect.get("defect_id", "")
524
+ page = defect.get("page", 0)
525
+ line_number = defect.get("line_number")
526
+ object_name = defect.get("object", "")
527
+ overflow_amount = defect.get("overflow_amount", 0)
528
+
529
+ new_content = tex_content
530
+ fix_result = None
531
+
532
+ if defect_id == "D1":
533
+ # 判断是段落溢出还是表格溢出
534
+ if "table" in object_name.lower() or "tab:" in object_name.lower():
535
+ new_content, fix_result = fix_table_overflow(
536
+ tex_content,
537
+ table_label=object_name if object_name.startswith("tab:") else None,
538
+ line_number=line_number,
539
+ )
540
+ else:
541
+ new_content, fix_result = fix_paragraph_overflow(
542
+ tex_content,
543
+ overfull_line=defect.get("description", ""),
544
+ overflow_amount=overflow_amount,
545
+ )
546
+
547
+ elif defect_id == "D2":
548
+ new_content, fix_result = fix_equation_overflow(
549
+ tex_content,
550
+ equation_label=object_name if object_name.startswith("eq:") else None,
551
+ line_number=line_number,
552
+ )
553
+
554
+ elif defect_id == "D3":
555
+ # 尝试修复 URL
556
+ url = defect.get("url", "")
557
+ new_content, fix_result = fix_url_overflow(tex_content, url=url)
558
+ if not fix_result:
559
+ # 尝试修复参考文献 URL 断行
560
+ new_content, fix_result = fix_bibliography_url_breaking(tex_content)
561
+
562
+ # 检查修复是否成功
563
+ if fix_result and new_content != tex_content:
564
+ tex_content = new_content
565
+ fix_result.page = page
566
+ fix_result.line_number = line_number
567
+ changes.append(fix_result)
568
+ modified_files.add(str(tex_path))
569
+ else:
570
+ unresolved.append(
571
+ f"{defect_id} ({object_name or '未知对象'}): 无法自动修复,可能需要语义改写或人工调整"
572
+ )
573
+
574
+ # 写入修改后的内容
575
+ if modified_files:
576
+ try:
577
+ tex_path.write_text(tex_content, encoding='utf-8')
578
+ except OSError as e:
579
+ unresolved.append(f"无法写入文件 {tex_path}: {e}")
580
+ return OverflowFixReport(
581
+ status="failed",
582
+ modified_files=list(modified_files),
583
+ changes=changes,
584
+ unresolved=unresolved,
585
+ )
586
+
587
+ status = "success" if not unresolved else ("partial" if changes else "failed")
588
+
589
+ return OverflowFixReport(
590
+ status=status,
591
+ modified_files=list(modified_files),
592
+ changes=changes,
593
+ unresolved=unresolved,
594
+ )
595
+
596
+
597
+ # ============================================================
598
+ # CLI 入口
599
+ # ============================================================
600
+
601
+ def main():
602
+ """命令行接口"""
603
+ import argparse
604
+ import json
605
+
606
+ parser = argparse.ArgumentParser(
607
+ description="Fix Category D overflow defects in LaTeX documents"
608
+ )
609
+ parser.add_argument(
610
+ "tex_file",
611
+ help="Path to .tex file"
612
+ )
613
+ parser.add_argument(
614
+ "--defects",
615
+ type=str,
616
+ help="JSON string or file path containing defect list"
617
+ )
618
+ parser.add_argument(
619
+ "--json",
620
+ "-j",
621
+ action="store_true",
622
+ help="Output JSON report"
623
+ )
624
+
625
+ args = parser.parse_args()
626
+
627
+ # 解析缺陷列表
628
+ defects = []
629
+ if args.defects:
630
+ if Path(args.defects).exists():
631
+ with open(args.defects, 'r', encoding='utf-8') as f:
632
+ defects = json.load(f)
633
+ else:
634
+ defects = json.loads(args.defects)
635
+
636
+ # 执行修复
637
+ report = fix_overflow_defects(args.tex_file, defects)
638
+
639
+ if args.json:
640
+ print(json.dumps(report.to_dict(), indent=2, ensure_ascii=False))
641
+ else:
642
+ print(f"\nOverflow Fix Report")
643
+ print("=" * 50)
644
+ print(f"Status: {report.status}")
645
+ print(f"Modified files: {report.modified_files}")
646
+ print(f"Changes: {len(report.changes)}")
647
+ for change in report.changes:
648
+ print(f" - [{change.defect_id}] {change.object_name}: {change.action}")
649
+ if report.unresolved:
650
+ print(f"\nUnresolved: {len(report.unresolved)}")
651
+ for u in report.unresolved:
652
+ print(f" - {u}")
653
+
654
+
655
+ if __name__ == "__main__":
656
+ main()