paperfit-cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/.claude/commands/adjust-length.md +21 -0
  2. package/.claude/commands/check-visual.md +27 -0
  3. package/.claude/commands/fix-layout.md +31 -0
  4. package/.claude/commands/migrate-template.md +23 -0
  5. package/.claude/commands/repair-table.md +21 -0
  6. package/.claude/commands/show-status.md +32 -0
  7. package/.claude-plugin/README.md +77 -0
  8. package/.claude-plugin/marketplace.json +41 -0
  9. package/.claude-plugin/plugin.json +39 -0
  10. package/CLAUDE.md +266 -0
  11. package/CONTRIBUTING.md +131 -0
  12. package/LICENSE +21 -0
  13. package/README.md +164 -0
  14. package/agents/code-surgeon-agent.md +214 -0
  15. package/agents/layout-detective-agent.md +229 -0
  16. package/agents/orchestrator-agent.md +254 -0
  17. package/agents/quality-gatekeeper-agent.md +270 -0
  18. package/agents/rule-engine-agent.md +224 -0
  19. package/agents/semantic-polish-agent.md +250 -0
  20. package/bin/paperfit.js +176 -0
  21. package/config/agent_roles.yaml +56 -0
  22. package/config/layout_rules.yaml +54 -0
  23. package/config/templates.yaml +241 -0
  24. package/config/vto_taxonomy.yaml +489 -0
  25. package/config/writing_rules.yaml +64 -0
  26. package/install.sh +30 -0
  27. package/package.json +52 -0
  28. package/requirements.txt +5 -0
  29. package/scripts/benchmark_runner.py +629 -0
  30. package/scripts/compile.sh +244 -0
  31. package/scripts/config_validator.py +339 -0
  32. package/scripts/cv_detector.py +600 -0
  33. package/scripts/evidence_collector.py +167 -0
  34. package/scripts/float_fixers.py +861 -0
  35. package/scripts/inject_defects.py +549 -0
  36. package/scripts/install-claude-global.js +148 -0
  37. package/scripts/install.js +66 -0
  38. package/scripts/install.sh +106 -0
  39. package/scripts/overflow_fixers.py +656 -0
  40. package/scripts/package-for-opensource.sh +138 -0
  41. package/scripts/parse_log.py +260 -0
  42. package/scripts/postinstall.js +38 -0
  43. package/scripts/pre_tool_use.py +265 -0
  44. package/scripts/render_pages.py +244 -0
  45. package/scripts/session_logger.py +329 -0
  46. package/scripts/space_util_fixers.py +773 -0
  47. package/scripts/state_manager.py +352 -0
  48. package/scripts/test_commands.py +187 -0
  49. package/scripts/test_cv_detector.py +214 -0
  50. package/scripts/test_integration.py +290 -0
  51. package/skills/consistency-polisher/SKILL.md +337 -0
  52. package/skills/float-optimizer/SKILL.md +284 -0
  53. package/skills/latex_fixers/__init__.py +82 -0
  54. package/skills/latex_fixers/float_fixers.py +392 -0
  55. package/skills/latex_fixers/fullwidth_fixers.py +375 -0
  56. package/skills/latex_fixers/overflow_fixers.py +250 -0
  57. package/skills/latex_fixers/semantic_micro_tuning.py +362 -0
  58. package/skills/latex_fixers/space_util_fixers.py +389 -0
  59. package/skills/latex_fixers/utils.py +55 -0
  60. package/skills/overflow-repair/SKILL.md +304 -0
  61. package/skills/space-util-fixer/SKILL.md +307 -0
  62. package/skills/taxonomy-vto/SKILL.md +486 -0
  63. package/skills/template-migrator/SKILL.md +251 -0
  64. package/skills/visual-inspector/SKILL.md +217 -0
  65. package/skills/writing-polish/SKILL.md +289 -0
@@ -0,0 +1,375 @@
1
+ """
2
+ Full-Width and Reference Separation Fixers
3
+
4
+ 确保:
5
+ 1. 图片和表格使用满页(单栏)或满栏(双栏)宽度
6
+ 2. 参考文献与正文分离(另起一页)
7
+ 3. 正文末页要么满页要么缩到上一页
8
+
9
+ 核心原则:
10
+ - 绝对禁用 \\resizebox - 该命令会暴力压缩表格,导致字体大小不一
11
+ - 强制使用 tabularx 宏包配合 \\textwidth
12
+ - 通过自动弹性列(X 格式)、动态字号、列间距微调实现满宽
13
+ """
14
+
15
+ import re
16
+ from typing import Any, Dict, Tuple
17
+
18
+ from .utils import add_package_to_preamble, add_to_preamble
19
+
20
+
21
+ def fix_figure_fullwidth(
22
+ tex_content: str,
23
+ template_layout: str = "two-column",
24
+ ) -> Tuple[str, Dict[str, Any]]:
25
+ """
26
+ 修复图片为满宽格式。
27
+
28
+ 策略:
29
+ 1. 双栏模板:使用 figure* 跨双栏,图片宽度=\\textwidth
30
+ 2. 单栏模板:使用 figure,图片宽度=\\textwidth
31
+ 3. 移除所有非 \\textwidth/\\linewidth 的宽度设置
32
+
33
+ Args:
34
+ tex_content: .tex 文件内容
35
+ template_layout: 模板类型 ("two-column" | "single-column")
36
+
37
+ Returns:
38
+ (modified_content, change_record)
39
+ """
40
+ change_record = {
41
+ "defect_id": "B2-figure-fullwidth",
42
+ "action": "none",
43
+ "layout": template_layout,
44
+ }
45
+
46
+ includegraphics_pattern = r'\\includegraphics\[([^\]]*)\]\{([^}]+)\}'
47
+
48
+ def fix_width_to_full(match):
49
+ options = match.group(1)
50
+ filename = match.group(2)
51
+
52
+ # 替换为满宽
53
+ new_options = "width=\\textwidth"
54
+ change_record["action"] = f"set {filename} to full width (\\textwidth)"
55
+ return f"\\includegraphics[{new_options}]{{{filename}}}"
56
+
57
+ modified = re.sub(includegraphics_pattern, fix_width_to_full, tex_content)
58
+
59
+ if modified != tex_content:
60
+ change_record["count"] = len(re.findall(includegraphics_pattern, modified))
61
+
62
+ return modified, change_record
63
+
64
+
65
+ def fix_table_fullwidth(
66
+ tex_content: str,
67
+ template_layout: str = "two-column",
68
+ ) -> Tuple[str, Dict[str, Any]]:
69
+ """
70
+ 修复表格为满宽格式 - 原生自适应策略。
71
+
72
+ 绝对禁用:\\resizebox
73
+
74
+ 执行逻辑:
75
+ 1. 强制使用 tabularx 宏包配合 \\textwidth
76
+ 2. 通过自动弹性列(X 格式)实现自适应
77
+ 3. 动态字号(如 \\small)以及列间距(\\tabcolsep)微调
78
+ 4. 双栏模板使用 table* 跨双栏
79
+
80
+ Args:
81
+ tex_content: .tex 文件内容
82
+ template_layout: 模板类型 ("two-column" | "single-column")
83
+
84
+ Returns:
85
+ (modified_content, change_record)
86
+ """
87
+ change_record = {
88
+ "defect_id": "B2-table-fullwidth",
89
+ "action": "none",
90
+ "layout": template_layout,
91
+ "strategies_applied": [],
92
+ }
93
+
94
+ modified = tex_content
95
+
96
+ # 策略 1: 移除 \\resizebox 暴力缩放(最高优先级)
97
+ resizebox_pattern = r'\\resizebox\{[^}]*\}\{[^}]*\}\{\\begin\{tabular'
98
+ if re.search(resizebox_pattern, modified):
99
+ # 移除 \\resizebox,保留内部 tabular
100
+ modified = re.sub(
101
+ r'\\resizebox\{[^}]*\}\{[^}]*\}\{\\begin\{tabular\}\}',
102
+ '\\begin{tabular}',
103
+ modified
104
+ )
105
+ modified = re.sub(
106
+ r'\\end\{tabular\}\}\}',
107
+ '\\end{tabular}',
108
+ modified
109
+ )
110
+ change_record["strategies_applied"].append("removed_resizebox")
111
+ change_record["action"] = "removed \\resizebox hack"
112
+
113
+ # 策略 2: 添加 tabularx 宏包
114
+ if '\\usepackage{tabularx}' not in modified:
115
+ modified = add_package_to_preamble(modified, "tabularx")
116
+ change_record["strategies_applied"].append("added_tabularx_package")
117
+
118
+ # 策略 3: 将 tabular 转换为 tabularx 并设置 \\textwidth
119
+ tabular_pattern = r'\\begin\{tabular(\*)?\}\{([^}]+)\}'
120
+
121
+ def convert_to_tabularx(match):
122
+ star = match.group(1) or ""
123
+ col_spec = match.group(2)
124
+
125
+ # 将列规格转换为 X 列(弹性列)
126
+ new_col_spec = convert_cols_to_x(col_spec)
127
+
128
+ change_record["strategies_applied"].append(f"converted_to_tabularx_{new_col_spec}")
129
+ return f"\\begin{{tabularx}}{{\\textwidth}} {{{new_col_spec}}}"
130
+
131
+ modified = re.sub(tabular_pattern, convert_to_tabularx, modified)
132
+
133
+ # 策略 4: 双栏模板转换为 table*
134
+ if template_layout == "two-column":
135
+ if '\\begin{table*}' not in modified:
136
+ modified = re.sub(
137
+ r'\\begin\{table\}',
138
+ '\\begin{table*}',
139
+ modified
140
+ )
141
+ modified = re.sub(
142
+ r'\\end\{table\}',
143
+ '\\end{table*}',
144
+ modified
145
+ )
146
+ change_record["strategies_applied"].append("converted_to_table_star")
147
+ change_record["action"] = "converted table to table* for two-column layout"
148
+
149
+ # 策略 5: 优化列间距(如果表格仍然过窄)
150
+ if '\\tabcolsep' not in modified:
151
+ # 在导言区添加列间距微调
152
+ modified = add_to_preamble(modified, "\\setlength{\\tabcolsep}{4pt}")
153
+ change_record["strategies_applied"].append("reduced_tabcolsep")
154
+
155
+ if change_record["strategies_applied"]:
156
+ change_record["action"] = f"applied {len(change_record['strategies_applied'])} strategies for full-width table"
157
+
158
+ return modified, change_record
159
+
160
+
161
+ def convert_cols_to_x(col_spec: str) -> str:
162
+ """
163
+ 将列规格中的 l/c/r 转换为 X 列(弹性列)。
164
+
165
+ Args:
166
+ col_spec: 原始列规格(如 "l|c|r")
167
+
168
+ Returns:
169
+ 转换后的列规格(如 "X|X|X")
170
+ """
171
+ # 保留 | 分隔符和其他格式控制符
172
+ result = []
173
+ for char in col_spec:
174
+ if char in ['l', 'c', 'r']:
175
+ result.append('X')
176
+ else:
177
+ result.append(char)
178
+ return ''.join(result)
179
+
180
+
181
+ def fix_table_fullwidth_native(
182
+ tex_content: str,
183
+ template_layout: str = "two-column",
184
+ ) -> Tuple[str, Dict[str, Any]]:
185
+ """
186
+ 表格原生自适应满宽 - 增强版。
187
+
188
+ 此函数实现更激进的策略:
189
+ 1. 优先使用 tabularx + \\textwidth
190
+ 2. 自动调整字号(\\small, \\footnotesize)
191
+ 3. 微调 \\tabcolsep
192
+ 4. 双栏模板强制 table*
193
+
194
+ Args:
195
+ tex_content: .tex 文件内容
196
+ template_layout: 模板类型
197
+
198
+ Returns:
199
+ (modified_content, change_record)
200
+ """
201
+ change_record = {
202
+ "defect_id": "B2-table-native-fullwidth",
203
+ "action": "none",
204
+ "layout": template_layout,
205
+ }
206
+
207
+ # 调用主修复函数
208
+ modified, record = fix_table_fullwidth(tex_content, template_layout)
209
+ change_record.update(record)
210
+
211
+ # 额外策略:如果表格仍然溢出,添加字号调整
212
+ if 'Overfull' in tex_content or 'overflow' in change_record.get("note", ""):
213
+ # 在表格环境前添加 \\small
214
+ modified = re.sub(
215
+ r'\\begin\{tabularx\}',
216
+ '\\small\\begin{tabularx}',
217
+ modified,
218
+ count=1
219
+ )
220
+ change_record["action"] += " + added \\small for tighter fit"
221
+
222
+ return modified, change_record
223
+
224
+
225
+ def fix_all_floats_fullwidth(
226
+ tex_content: str,
227
+ template_layout: str = "two-column",
228
+ ) -> Tuple[str, Dict[str, Any]]:
229
+ """
230
+ 同时修复所有图片和表格为满宽格式。
231
+
232
+ Args:
233
+ tex_content: .tex 文件内容
234
+ template_layout: 模板类型 ("two-column" | "single-column")
235
+
236
+ Returns:
237
+ (modified_content, change_record)
238
+ """
239
+ change_record = {
240
+ "defect_id": "B2-all-floats-fullwidth",
241
+ "actions": [],
242
+ }
243
+
244
+ modified = tex_content
245
+
246
+ # 修复图片
247
+ modified, figure_record = fix_figure_fullwidth(modified, template_layout)
248
+ if figure_record["action"] != "none":
249
+ change_record["actions"].append(figure_record)
250
+
251
+ # 修复表格
252
+ modified, table_record = fix_table_fullwidth(modified, template_layout)
253
+ if table_record["action"] != "none":
254
+ change_record["actions"].append(table_record)
255
+
256
+ if not change_record["actions"]:
257
+ change_record["action"] = "none"
258
+ else:
259
+ change_record["action"] = f"fixed {len(change_record['actions'])} float types"
260
+
261
+ return modified, change_record
262
+
263
+
264
+ def ensure_reference_newpage(
265
+ tex_content: str,
266
+ ) -> Tuple[str, Dict[str, Any]]:
267
+ """
268
+ 确保参考文献另起一页,与正文分离。
269
+
270
+ 策略:
271
+ 1. 在 \\bibliography 或 \\printbibliography 前添加 \\newpage
272
+ 2. 如果正文末页未满,尝试扩写结论段
273
+ 3. 如果正文可以缩到上一页,压缩并分页
274
+
275
+ Args:
276
+ tex_content: .tex 文件内容
277
+
278
+ Returns:
279
+ (modified_content, change_record)
280
+ """
281
+ change_record = {
282
+ "defect_id": "A3-reference-separation",
283
+ "action": "none",
284
+ }
285
+
286
+ # 查找 bibliography 命令
287
+ biblio_patterns = [
288
+ (r'(\\bibliography\{[^}]*\})', '\\bibliography'),
289
+ (r'(\\printbibliography)', '\\printbibliography'),
290
+ (r'(\\begin\{thebibliography\})', '\\begin{thebibliography}'),
291
+ ]
292
+
293
+ for pattern, name in biblio_patterns:
294
+ matches = list(re.finditer(pattern, tex_content))
295
+ if matches:
296
+ for match in matches:
297
+ biblio_start = match.start()
298
+ # 检查前 50 字符内是否有 \\newpage
299
+ context_before = tex_content[max(0, biblio_start - 100):biblio_start]
300
+
301
+ # 检查是否已有 \\newpage 或 \\clearpage
302
+ if '\\newpage' not in context_before and '\\clearpage' not in context_before:
303
+ # 在 bibliography 前添加 \\newpage
304
+ # 找到 bibliography 前的最后一个空行或 section
305
+ insert_pos = biblio_start
306
+
307
+ # 向前查找合适位置(保持一些空白)
308
+ for i in range(biblio_start - 1, max(0, biblio_start - 200), -1):
309
+ if tex_content[i] == '\n':
310
+ # 找到前一个空行
311
+ if i > 0 and tex_content[i-1] == '\n':
312
+ insert_pos = i + 1
313
+ break
314
+
315
+ modified = tex_content[:insert_pos] + "\\newpage\\section*{References}\n" + tex_content[insert_pos:]
316
+ change_record["action"] = f"added \\newpage before {name}"
317
+ return modified, change_record
318
+
319
+ change_record["note"] = "no bibliography command found"
320
+ return tex_content, change_record
321
+
322
+
323
+ def fix_body_last_page(
324
+ tex_content: str,
325
+ target_section: str | None = None,
326
+ ) -> Tuple[str, Dict[str, Any]]:
327
+ """
328
+ 修复正文末页:要么满页,要么缩到上一页页尾。
329
+
330
+ 策略:
331
+ 1. 检测正文最后一段(参考文献前的内容)
332
+ 2. 如果末页留白超过 40%,扩写结论段
333
+ 3. 如果末页内容少于 20%,压缩并添加到上一页
334
+ 4. 确保参考文献从新页开始
335
+
336
+ Args:
337
+ tex_content: .tex 文件内容
338
+ target_section: 要扩写/缩写的节(如 "Conclusion")
339
+
340
+ Returns:
341
+ (modified_content, change_record)
342
+ """
343
+ change_record = {
344
+ "defect_id": "A2-body-last-page",
345
+ "action": "none",
346
+ }
347
+
348
+ # 首先确保参考文献分页
349
+ modified, ref_record = ensure_reference_newpage(tex_content)
350
+ if ref_record["action"] != "none":
351
+ change_record["action"] = ref_record["action"]
352
+ change_record["ref_separation"] = ref_record
353
+
354
+ # 查找结论段并扩写(如果需要)
355
+ if target_section:
356
+ conclusion_pattern = rf'(\\section\*\{{{target_section}\}}|\\section\{{{target_section}\}})'
357
+ conclusion_match = re.search(conclusion_pattern, modified)
358
+
359
+ if conclusion_match:
360
+ conclusion_start = conclusion_match.end()
361
+ # 查找结论段内容
362
+ conclusion_end = modified.find('\\bibliography', conclusion_start)
363
+ if conclusion_end == -1:
364
+ conclusion_end = modified.find('\\end{document}', conclusion_start)
365
+
366
+ if conclusion_end > conclusion_start:
367
+ conclusion_content = modified[conclusion_start:conclusion_end]
368
+ lines = conclusion_content.strip().split('\n')
369
+
370
+ # 如果结论段少于 3 行,建议扩写
371
+ if len(lines) < 3:
372
+ change_record["suggestion"] = "expand conclusion section to fill page"
373
+ change_record["action"] = "identified short conclusion section"
374
+
375
+ return modified, change_record
@@ -0,0 +1,250 @@
1
+ """
2
+ Overflow Repair Fixers - Category D 缺陷修复
3
+
4
+ 处理 overfull hbox、公式溢出、URL 溢出等问题。
5
+ """
6
+
7
+ import re
8
+ from pathlib import Path
9
+ from typing import Any, Dict, List, Tuple
10
+
11
+ from .utils import add_package_to_preamble, add_to_preamble, find_paragraph_end, find_paragraph_start
12
+
13
+
14
+ def fix_overfull_hbox(
15
+ tex_content: str,
16
+ line_number: int,
17
+ overflow_type: str = "paragraph",
18
+ overflow_amount: float | None = None,
19
+ ) -> Tuple[str, Dict[str, Any]]:
20
+ """
21
+ 修复指定行的 overfull hbox 问题。
22
+
23
+ Args:
24
+ tex_content: .tex 文件内容
25
+ line_number: 问题所在行号(从 1 开始)
26
+ overflow_type: 溢出类型 (paragraph/table/formula)
27
+ overflow_amount: 溢出量(pt)
28
+
29
+ Returns:
30
+ (modified_content, change_record)
31
+ """
32
+ lines = tex_content.split('\n')
33
+ if line_number < 1 or line_number > len(lines):
34
+ return tex_content, {"status": "failed", "reason": "行号超出范围"}
35
+
36
+ target_line = lines[line_number - 1]
37
+ change_record = {
38
+ "defect_id": "D1",
39
+ "line": line_number,
40
+ "type": overflow_type,
41
+ "overflow_amount": overflow_amount,
42
+ }
43
+
44
+ # 根据溢出类型选择修复策略
45
+ if overflow_type == "table":
46
+ return fix_table_overflow(tex_content, line_number)
47
+ elif overflow_type == "formula":
48
+ return fix_long_formula(tex_content, line_number)
49
+ else:
50
+ return fix_paragraph_overflow(tex_content, line_number)
51
+
52
+
53
+ def fix_paragraph_overflow(tex_content: str, line_number: int | None = None) -> Tuple[str, Dict[str, Any]]:
54
+ """
55
+ 修复段落文本溢出。
56
+
57
+ 策略:
58
+ 1. 在长单词中插入断词点 \-
59
+ 2. 添加 \emergencystretch 允许额外拉伸
60
+ """
61
+ lines = tex_content.split('\n')
62
+ change_record = {
63
+ "defect_id": "D1-paragraph",
64
+ "action": "none",
65
+ }
66
+
67
+ # 策略 1: 查找长单词并添加断词点
68
+ if line_number:
69
+ target_idx = line_number - 1
70
+ if 0 <= target_idx < len(lines):
71
+ line = lines[target_idx]
72
+ # 查找长度超过 15 的单词
73
+ long_words = re.findall(r'\b[a-zA-Z]{15,}\b', line)
74
+ if long_words:
75
+ for word in long_words[:2]: # 最多处理 2 个单词
76
+ # 在元音后添加断词点
77
+ hyphenated = add_hyphenation_points(word)
78
+ line = line.replace(word, hyphenated, 1)
79
+ lines[target_idx] = line
80
+ change_record["action"] = f"insert_hyphenation in {long_words}"
81
+
82
+ # 策略 2: 如果仍未解决,在段落前添加\emergencystretch
83
+ if change_record["action"] == "none":
84
+ # 查找段落开始(简单启发式:找到包含文本的行)
85
+ if line_number:
86
+ # 向前查找段落开始
87
+ para_start = find_paragraph_start(lines, line_number - 1)
88
+ if para_start >= 0:
89
+ # 在段落前插入\emergencystretch
90
+ emergencystretch_line = "{\\emergencystretch=1.5em "
91
+ lines.insert(para_start, emergencystretch_line)
92
+ # 在段落结束添加闭合括号
93
+ para_end = find_paragraph_end(lines, para_start + 1)
94
+ if para_end >= 0:
95
+ lines.insert(para_end + 1, "}")
96
+ change_record["action"] = f"add_emergencystretch at line {para_start}"
97
+
98
+ return '\n'.join(lines), change_record
99
+
100
+
101
+ def add_hyphenation_points(word: str) -> str:
102
+ """
103
+ 在单词中插入 LaTeX 断词点 \-。
104
+ 简单规则:在元音后、辅音前断词。
105
+ """
106
+ vowels = "aeiouAEIOU"
107
+ result = []
108
+ for i, char in enumerate(word):
109
+ result.append(char)
110
+ # 在元音后且后面还有辅音时插入断词点
111
+ if char in vowels and i < len(word) - 2:
112
+ next_char = word[i + 1]
113
+ if next_char not in vowels and next_char.isalpha():
114
+ result.append("\\-")
115
+ return ''.join(result)
116
+
117
+
118
+ def find_paragraph_start(lines: List[str], target_idx: int) -> int:
119
+ """向前查找段落开始(遇到空行或环境开始)"""
120
+ for i in range(target_idx, -1, -1):
121
+ line = lines[i].strip()
122
+ if not line or line.startswith('\\begin') or line.startswith('\\section'):
123
+ return i + 1 if i < target_idx else i
124
+ return 0
125
+
126
+
127
+ def find_paragraph_end(lines: List[str], start_idx: int) -> int:
128
+ """向后查找段落结束(遇到空行或环境结束)"""
129
+ for i in range(start_idx, len(lines)):
130
+ line = lines[i].strip()
131
+ if not line or line.startswith('\\end') or line.startswith('\\section'):
132
+ return i - 1 if i > start_idx else i
133
+ return len(lines) - 1
134
+
135
+
136
+ def fix_table_overflow(tex_content: str, line_number: int | None = None) -> Tuple[str, Dict[str, Any]]:
137
+ """
138
+ 修复表格溢出。
139
+
140
+ 策略:
141
+ 1. 将 tabular 替换为 tabularx
142
+ 2. 设置列宽为\linewidth
143
+ 3. 使用 p{width}列类型
144
+ """
145
+ change_record = {
146
+ "defect_id": "D1-table",
147
+ "action": "none",
148
+ }
149
+
150
+ # 策略 1: 查找 tabular 环境并替换为 tabularx
151
+ tabular_pattern = r'\\begin\{tabular\}(\{[^}]*\})'
152
+
153
+ def replace_with_tabularx(match):
154
+ col_spec = match.group(1)
155
+ # 将 l/c/r列替换为 X 列(保留第一列)
156
+ cols = col_spec.strip('{}').split('|')
157
+ new_cols = []
158
+ for col in cols:
159
+ if col.strip() in ['l', 'c', 'r']:
160
+ new_cols.append('X')
161
+ else:
162
+ new_cols.append(col)
163
+ new_spec = '{' + '|'.join(new_cols) + '}'
164
+ change_record["action"] = f"replaced tabular with tabularx, col spec: {new_spec}"
165
+ return f"\\begin{{tabularx}}{{\\linewidth}}{new_spec}"
166
+
167
+ modified = re.sub(tabular_pattern, replace_with_tabularx, tex_content, count=1)
168
+
169
+ if modified != tex_content:
170
+ # 检查是否需要添加 tabularx 宏包
171
+ if '\\usepackage{tabularx}' not in modified:
172
+ # 在导言区添加宏包
173
+ modified = add_package_to_preamble(modified, "tabularx")
174
+ change_record["packages_added"] = ["tabularx"]
175
+
176
+ return modified, change_record
177
+
178
+
179
+ def fix_long_formula(tex_content: str, line_number: int | None = None) -> Tuple[str, Dict[str, Any]]:
180
+ """
181
+ 修复长公式溢出。
182
+
183
+ 策略:
184
+ 1. 将 equation 替换为 multline 或 split
185
+ 2. 在运算符后添加换行\\\\
186
+ """
187
+ change_record = {
188
+ "defect_id": "D2-formula",
189
+ "action": "none",
190
+ }
191
+
192
+ # 策略 1: 查找单行 equation 环境并替换为 split
193
+ equation_pattern = r'\\begin\{equation\}([^\\]*?)\\end\{equation\}'
194
+
195
+ def replace_with_split(match):
196
+ formula = match.group(1)
197
+ # 查找等号或加减号位置
198
+ break_points = []
199
+ for i, char in enumerate(formula):
200
+ if char in '=+-' and i > len(formula) // 3 and i < 2 * len(formula) // 3:
201
+ break_points.append(i)
202
+
203
+ if break_points:
204
+ bp = break_points[0]
205
+ # 在断点处插入\\
206
+ part1 = formula[:bp+1].rstrip()
207
+ part2 = formula[bp+1:].lstrip()
208
+ new_formula = f"{part1}\n\\\\\n\\quad {part2}"
209
+ change_record["action"] = "split equation at operator"
210
+ return f"\\begin{{split}}\n{new_formula}\n\\end{{split}}"
211
+ return match.group(0)
212
+
213
+ modified = re.sub(equation_pattern, replace_with_split, tex_content, count=1, flags=re.DOTALL)
214
+
215
+ return modified, change_record
216
+
217
+
218
+ def fix_url_overflow(tex_content: str, url: str | None = None) -> Tuple[str, Dict[str, Any]]:
219
+ """
220
+ 修复 URL 溢出。
221
+
222
+ 策略:
223
+ 1. 使用\\url{}命令包裹
224
+ 2. 添加\\urlbreaks 配置
225
+ """
226
+ change_record = {
227
+ "defect_id": "D3-url",
228
+ "action": "none",
229
+ }
230
+
231
+ # 策略 1: 查找裸 URL 并替换为\\url{}
232
+ url_pattern = r'(https?://[^\s\}\]\)]+)'
233
+
234
+ def wrap_with_url(match):
235
+ raw_url = match.group(1)
236
+ if not raw_url.startswith('\\url{'):
237
+ change_record["action"] = f"wrapped URL with \\url command"
238
+ return f"\\url{{{raw_url}}}"
239
+ return raw_url
240
+
241
+ modified = re.sub(url_pattern, wrap_with_url, tex_content)
242
+
243
+ # 策略 2: 添加 URL 断行配置
244
+ if change_record["action"] and '\\def\\UrlBreaks' not in modified:
245
+ config_line = "\\def\\UrlBreaks{\\do\\/\\do-}"
246
+ # 在导言区添加
247
+ modified = add_to_preamble(modified, config_line)
248
+ change_record["preamble_added"] = config_line
249
+
250
+ return modified, change_record