paperfit-cli 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/commands/adjust-length.md +21 -0
- package/.claude/commands/check-visual.md +27 -0
- package/.claude/commands/fix-layout.md +31 -0
- package/.claude/commands/migrate-template.md +23 -0
- package/.claude/commands/repair-table.md +21 -0
- package/.claude/commands/show-status.md +32 -0
- package/.claude-plugin/README.md +77 -0
- package/.claude-plugin/marketplace.json +41 -0
- package/.claude-plugin/plugin.json +39 -0
- package/CLAUDE.md +266 -0
- package/CONTRIBUTING.md +131 -0
- package/LICENSE +21 -0
- package/README.md +164 -0
- package/agents/code-surgeon-agent.md +214 -0
- package/agents/layout-detective-agent.md +229 -0
- package/agents/orchestrator-agent.md +254 -0
- package/agents/quality-gatekeeper-agent.md +270 -0
- package/agents/rule-engine-agent.md +224 -0
- package/agents/semantic-polish-agent.md +250 -0
- package/bin/paperfit.js +176 -0
- package/config/agent_roles.yaml +56 -0
- package/config/layout_rules.yaml +54 -0
- package/config/templates.yaml +241 -0
- package/config/vto_taxonomy.yaml +489 -0
- package/config/writing_rules.yaml +64 -0
- package/install.sh +30 -0
- package/package.json +52 -0
- package/requirements.txt +5 -0
- package/scripts/benchmark_runner.py +629 -0
- package/scripts/compile.sh +244 -0
- package/scripts/config_validator.py +339 -0
- package/scripts/cv_detector.py +600 -0
- package/scripts/evidence_collector.py +167 -0
- package/scripts/float_fixers.py +861 -0
- package/scripts/inject_defects.py +549 -0
- package/scripts/install-claude-global.js +148 -0
- package/scripts/install.js +66 -0
- package/scripts/install.sh +106 -0
- package/scripts/overflow_fixers.py +656 -0
- package/scripts/package-for-opensource.sh +138 -0
- package/scripts/parse_log.py +260 -0
- package/scripts/postinstall.js +38 -0
- package/scripts/pre_tool_use.py +265 -0
- package/scripts/render_pages.py +244 -0
- package/scripts/session_logger.py +329 -0
- package/scripts/space_util_fixers.py +773 -0
- package/scripts/state_manager.py +352 -0
- package/scripts/test_commands.py +187 -0
- package/scripts/test_cv_detector.py +214 -0
- package/scripts/test_integration.py +290 -0
- package/skills/consistency-polisher/SKILL.md +337 -0
- package/skills/float-optimizer/SKILL.md +284 -0
- package/skills/latex_fixers/__init__.py +82 -0
- package/skills/latex_fixers/float_fixers.py +392 -0
- package/skills/latex_fixers/fullwidth_fixers.py +375 -0
- package/skills/latex_fixers/overflow_fixers.py +250 -0
- package/skills/latex_fixers/semantic_micro_tuning.py +362 -0
- package/skills/latex_fixers/space_util_fixers.py +389 -0
- package/skills/latex_fixers/utils.py +55 -0
- package/skills/overflow-repair/SKILL.md +304 -0
- package/skills/space-util-fixer/SKILL.md +307 -0
- package/skills/taxonomy-vto/SKILL.md +486 -0
- package/skills/template-migrator/SKILL.md +251 -0
- package/skills/visual-inspector/SKILL.md +217 -0
- package/skills/writing-polish/SKILL.md +289 -0
|
@@ -0,0 +1,375 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Full-Width and Reference Separation Fixers
|
|
3
|
+
|
|
4
|
+
确保:
|
|
5
|
+
1. 图片和表格使用满页(单栏)或满栏(双栏)宽度
|
|
6
|
+
2. 参考文献与正文分离(另起一页)
|
|
7
|
+
3. 正文末页要么满页要么缩到上一页
|
|
8
|
+
|
|
9
|
+
核心原则:
|
|
10
|
+
- 绝对禁用 \\resizebox - 该命令会暴力压缩表格,导致字体大小不一
|
|
11
|
+
- 强制使用 tabularx 宏包配合 \\textwidth
|
|
12
|
+
- 通过自动弹性列(X 格式)、动态字号、列间距微调实现满宽
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import re
|
|
16
|
+
from typing import Any, Dict, Tuple
|
|
17
|
+
|
|
18
|
+
from .utils import add_package_to_preamble, add_to_preamble
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def fix_figure_fullwidth(
|
|
22
|
+
tex_content: str,
|
|
23
|
+
template_layout: str = "two-column",
|
|
24
|
+
) -> Tuple[str, Dict[str, Any]]:
|
|
25
|
+
"""
|
|
26
|
+
修复图片为满宽格式。
|
|
27
|
+
|
|
28
|
+
策略:
|
|
29
|
+
1. 双栏模板:使用 figure* 跨双栏,图片宽度=\\textwidth
|
|
30
|
+
2. 单栏模板:使用 figure,图片宽度=\\textwidth
|
|
31
|
+
3. 移除所有非 \\textwidth/\\linewidth 的宽度设置
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
tex_content: .tex 文件内容
|
|
35
|
+
template_layout: 模板类型 ("two-column" | "single-column")
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
(modified_content, change_record)
|
|
39
|
+
"""
|
|
40
|
+
change_record = {
|
|
41
|
+
"defect_id": "B2-figure-fullwidth",
|
|
42
|
+
"action": "none",
|
|
43
|
+
"layout": template_layout,
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
includegraphics_pattern = r'\\includegraphics\[([^\]]*)\]\{([^}]+)\}'
|
|
47
|
+
|
|
48
|
+
def fix_width_to_full(match):
|
|
49
|
+
options = match.group(1)
|
|
50
|
+
filename = match.group(2)
|
|
51
|
+
|
|
52
|
+
# 替换为满宽
|
|
53
|
+
new_options = "width=\\textwidth"
|
|
54
|
+
change_record["action"] = f"set {filename} to full width (\\textwidth)"
|
|
55
|
+
return f"\\includegraphics[{new_options}]{{{filename}}}"
|
|
56
|
+
|
|
57
|
+
modified = re.sub(includegraphics_pattern, fix_width_to_full, tex_content)
|
|
58
|
+
|
|
59
|
+
if modified != tex_content:
|
|
60
|
+
change_record["count"] = len(re.findall(includegraphics_pattern, modified))
|
|
61
|
+
|
|
62
|
+
return modified, change_record
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def fix_table_fullwidth(
|
|
66
|
+
tex_content: str,
|
|
67
|
+
template_layout: str = "two-column",
|
|
68
|
+
) -> Tuple[str, Dict[str, Any]]:
|
|
69
|
+
"""
|
|
70
|
+
修复表格为满宽格式 - 原生自适应策略。
|
|
71
|
+
|
|
72
|
+
绝对禁用:\\resizebox
|
|
73
|
+
|
|
74
|
+
执行逻辑:
|
|
75
|
+
1. 强制使用 tabularx 宏包配合 \\textwidth
|
|
76
|
+
2. 通过自动弹性列(X 格式)实现自适应
|
|
77
|
+
3. 动态字号(如 \\small)以及列间距(\\tabcolsep)微调
|
|
78
|
+
4. 双栏模板使用 table* 跨双栏
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
tex_content: .tex 文件内容
|
|
82
|
+
template_layout: 模板类型 ("two-column" | "single-column")
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
(modified_content, change_record)
|
|
86
|
+
"""
|
|
87
|
+
change_record = {
|
|
88
|
+
"defect_id": "B2-table-fullwidth",
|
|
89
|
+
"action": "none",
|
|
90
|
+
"layout": template_layout,
|
|
91
|
+
"strategies_applied": [],
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
modified = tex_content
|
|
95
|
+
|
|
96
|
+
# 策略 1: 移除 \\resizebox 暴力缩放(最高优先级)
|
|
97
|
+
resizebox_pattern = r'\\resizebox\{[^}]*\}\{[^}]*\}\{\\begin\{tabular'
|
|
98
|
+
if re.search(resizebox_pattern, modified):
|
|
99
|
+
# 移除 \\resizebox,保留内部 tabular
|
|
100
|
+
modified = re.sub(
|
|
101
|
+
r'\\resizebox\{[^}]*\}\{[^}]*\}\{\\begin\{tabular\}\}',
|
|
102
|
+
'\\begin{tabular}',
|
|
103
|
+
modified
|
|
104
|
+
)
|
|
105
|
+
modified = re.sub(
|
|
106
|
+
r'\\end\{tabular\}\}\}',
|
|
107
|
+
'\\end{tabular}',
|
|
108
|
+
modified
|
|
109
|
+
)
|
|
110
|
+
change_record["strategies_applied"].append("removed_resizebox")
|
|
111
|
+
change_record["action"] = "removed \\resizebox hack"
|
|
112
|
+
|
|
113
|
+
# 策略 2: 添加 tabularx 宏包
|
|
114
|
+
if '\\usepackage{tabularx}' not in modified:
|
|
115
|
+
modified = add_package_to_preamble(modified, "tabularx")
|
|
116
|
+
change_record["strategies_applied"].append("added_tabularx_package")
|
|
117
|
+
|
|
118
|
+
# 策略 3: 将 tabular 转换为 tabularx 并设置 \\textwidth
|
|
119
|
+
tabular_pattern = r'\\begin\{tabular(\*)?\}\{([^}]+)\}'
|
|
120
|
+
|
|
121
|
+
def convert_to_tabularx(match):
|
|
122
|
+
star = match.group(1) or ""
|
|
123
|
+
col_spec = match.group(2)
|
|
124
|
+
|
|
125
|
+
# 将列规格转换为 X 列(弹性列)
|
|
126
|
+
new_col_spec = convert_cols_to_x(col_spec)
|
|
127
|
+
|
|
128
|
+
change_record["strategies_applied"].append(f"converted_to_tabularx_{new_col_spec}")
|
|
129
|
+
return f"\\begin{{tabularx}}{{\\textwidth}} {{{new_col_spec}}}"
|
|
130
|
+
|
|
131
|
+
modified = re.sub(tabular_pattern, convert_to_tabularx, modified)
|
|
132
|
+
|
|
133
|
+
# 策略 4: 双栏模板转换为 table*
|
|
134
|
+
if template_layout == "two-column":
|
|
135
|
+
if '\\begin{table*}' not in modified:
|
|
136
|
+
modified = re.sub(
|
|
137
|
+
r'\\begin\{table\}',
|
|
138
|
+
'\\begin{table*}',
|
|
139
|
+
modified
|
|
140
|
+
)
|
|
141
|
+
modified = re.sub(
|
|
142
|
+
r'\\end\{table\}',
|
|
143
|
+
'\\end{table*}',
|
|
144
|
+
modified
|
|
145
|
+
)
|
|
146
|
+
change_record["strategies_applied"].append("converted_to_table_star")
|
|
147
|
+
change_record["action"] = "converted table to table* for two-column layout"
|
|
148
|
+
|
|
149
|
+
# 策略 5: 优化列间距(如果表格仍然过窄)
|
|
150
|
+
if '\\tabcolsep' not in modified:
|
|
151
|
+
# 在导言区添加列间距微调
|
|
152
|
+
modified = add_to_preamble(modified, "\\setlength{\\tabcolsep}{4pt}")
|
|
153
|
+
change_record["strategies_applied"].append("reduced_tabcolsep")
|
|
154
|
+
|
|
155
|
+
if change_record["strategies_applied"]:
|
|
156
|
+
change_record["action"] = f"applied {len(change_record['strategies_applied'])} strategies for full-width table"
|
|
157
|
+
|
|
158
|
+
return modified, change_record
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def convert_cols_to_x(col_spec: str) -> str:
|
|
162
|
+
"""
|
|
163
|
+
将列规格中的 l/c/r 转换为 X 列(弹性列)。
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
col_spec: 原始列规格(如 "l|c|r")
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
转换后的列规格(如 "X|X|X")
|
|
170
|
+
"""
|
|
171
|
+
# 保留 | 分隔符和其他格式控制符
|
|
172
|
+
result = []
|
|
173
|
+
for char in col_spec:
|
|
174
|
+
if char in ['l', 'c', 'r']:
|
|
175
|
+
result.append('X')
|
|
176
|
+
else:
|
|
177
|
+
result.append(char)
|
|
178
|
+
return ''.join(result)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def fix_table_fullwidth_native(
|
|
182
|
+
tex_content: str,
|
|
183
|
+
template_layout: str = "two-column",
|
|
184
|
+
) -> Tuple[str, Dict[str, Any]]:
|
|
185
|
+
"""
|
|
186
|
+
表格原生自适应满宽 - 增强版。
|
|
187
|
+
|
|
188
|
+
此函数实现更激进的策略:
|
|
189
|
+
1. 优先使用 tabularx + \\textwidth
|
|
190
|
+
2. 自动调整字号(\\small, \\footnotesize)
|
|
191
|
+
3. 微调 \\tabcolsep
|
|
192
|
+
4. 双栏模板强制 table*
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
tex_content: .tex 文件内容
|
|
196
|
+
template_layout: 模板类型
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
(modified_content, change_record)
|
|
200
|
+
"""
|
|
201
|
+
change_record = {
|
|
202
|
+
"defect_id": "B2-table-native-fullwidth",
|
|
203
|
+
"action": "none",
|
|
204
|
+
"layout": template_layout,
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
# 调用主修复函数
|
|
208
|
+
modified, record = fix_table_fullwidth(tex_content, template_layout)
|
|
209
|
+
change_record.update(record)
|
|
210
|
+
|
|
211
|
+
# 额外策略:如果表格仍然溢出,添加字号调整
|
|
212
|
+
if 'Overfull' in tex_content or 'overflow' in change_record.get("note", ""):
|
|
213
|
+
# 在表格环境前添加 \\small
|
|
214
|
+
modified = re.sub(
|
|
215
|
+
r'\\begin\{tabularx\}',
|
|
216
|
+
'\\small\\begin{tabularx}',
|
|
217
|
+
modified,
|
|
218
|
+
count=1
|
|
219
|
+
)
|
|
220
|
+
change_record["action"] += " + added \\small for tighter fit"
|
|
221
|
+
|
|
222
|
+
return modified, change_record
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def fix_all_floats_fullwidth(
|
|
226
|
+
tex_content: str,
|
|
227
|
+
template_layout: str = "two-column",
|
|
228
|
+
) -> Tuple[str, Dict[str, Any]]:
|
|
229
|
+
"""
|
|
230
|
+
同时修复所有图片和表格为满宽格式。
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
tex_content: .tex 文件内容
|
|
234
|
+
template_layout: 模板类型 ("two-column" | "single-column")
|
|
235
|
+
|
|
236
|
+
Returns:
|
|
237
|
+
(modified_content, change_record)
|
|
238
|
+
"""
|
|
239
|
+
change_record = {
|
|
240
|
+
"defect_id": "B2-all-floats-fullwidth",
|
|
241
|
+
"actions": [],
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
modified = tex_content
|
|
245
|
+
|
|
246
|
+
# 修复图片
|
|
247
|
+
modified, figure_record = fix_figure_fullwidth(modified, template_layout)
|
|
248
|
+
if figure_record["action"] != "none":
|
|
249
|
+
change_record["actions"].append(figure_record)
|
|
250
|
+
|
|
251
|
+
# 修复表格
|
|
252
|
+
modified, table_record = fix_table_fullwidth(modified, template_layout)
|
|
253
|
+
if table_record["action"] != "none":
|
|
254
|
+
change_record["actions"].append(table_record)
|
|
255
|
+
|
|
256
|
+
if not change_record["actions"]:
|
|
257
|
+
change_record["action"] = "none"
|
|
258
|
+
else:
|
|
259
|
+
change_record["action"] = f"fixed {len(change_record['actions'])} float types"
|
|
260
|
+
|
|
261
|
+
return modified, change_record
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def ensure_reference_newpage(
|
|
265
|
+
tex_content: str,
|
|
266
|
+
) -> Tuple[str, Dict[str, Any]]:
|
|
267
|
+
"""
|
|
268
|
+
确保参考文献另起一页,与正文分离。
|
|
269
|
+
|
|
270
|
+
策略:
|
|
271
|
+
1. 在 \\bibliography 或 \\printbibliography 前添加 \\newpage
|
|
272
|
+
2. 如果正文末页未满,尝试扩写结论段
|
|
273
|
+
3. 如果正文可以缩到上一页,压缩并分页
|
|
274
|
+
|
|
275
|
+
Args:
|
|
276
|
+
tex_content: .tex 文件内容
|
|
277
|
+
|
|
278
|
+
Returns:
|
|
279
|
+
(modified_content, change_record)
|
|
280
|
+
"""
|
|
281
|
+
change_record = {
|
|
282
|
+
"defect_id": "A3-reference-separation",
|
|
283
|
+
"action": "none",
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
# 查找 bibliography 命令
|
|
287
|
+
biblio_patterns = [
|
|
288
|
+
(r'(\\bibliography\{[^}]*\})', '\\bibliography'),
|
|
289
|
+
(r'(\\printbibliography)', '\\printbibliography'),
|
|
290
|
+
(r'(\\begin\{thebibliography\})', '\\begin{thebibliography}'),
|
|
291
|
+
]
|
|
292
|
+
|
|
293
|
+
for pattern, name in biblio_patterns:
|
|
294
|
+
matches = list(re.finditer(pattern, tex_content))
|
|
295
|
+
if matches:
|
|
296
|
+
for match in matches:
|
|
297
|
+
biblio_start = match.start()
|
|
298
|
+
# 检查前 50 字符内是否有 \\newpage
|
|
299
|
+
context_before = tex_content[max(0, biblio_start - 100):biblio_start]
|
|
300
|
+
|
|
301
|
+
# 检查是否已有 \\newpage 或 \\clearpage
|
|
302
|
+
if '\\newpage' not in context_before and '\\clearpage' not in context_before:
|
|
303
|
+
# 在 bibliography 前添加 \\newpage
|
|
304
|
+
# 找到 bibliography 前的最后一个空行或 section
|
|
305
|
+
insert_pos = biblio_start
|
|
306
|
+
|
|
307
|
+
# 向前查找合适位置(保持一些空白)
|
|
308
|
+
for i in range(biblio_start - 1, max(0, biblio_start - 200), -1):
|
|
309
|
+
if tex_content[i] == '\n':
|
|
310
|
+
# 找到前一个空行
|
|
311
|
+
if i > 0 and tex_content[i-1] == '\n':
|
|
312
|
+
insert_pos = i + 1
|
|
313
|
+
break
|
|
314
|
+
|
|
315
|
+
modified = tex_content[:insert_pos] + "\\newpage\\section*{References}\n" + tex_content[insert_pos:]
|
|
316
|
+
change_record["action"] = f"added \\newpage before {name}"
|
|
317
|
+
return modified, change_record
|
|
318
|
+
|
|
319
|
+
change_record["note"] = "no bibliography command found"
|
|
320
|
+
return tex_content, change_record
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def fix_body_last_page(
|
|
324
|
+
tex_content: str,
|
|
325
|
+
target_section: str | None = None,
|
|
326
|
+
) -> Tuple[str, Dict[str, Any]]:
|
|
327
|
+
"""
|
|
328
|
+
修复正文末页:要么满页,要么缩到上一页页尾。
|
|
329
|
+
|
|
330
|
+
策略:
|
|
331
|
+
1. 检测正文最后一段(参考文献前的内容)
|
|
332
|
+
2. 如果末页留白超过 40%,扩写结论段
|
|
333
|
+
3. 如果末页内容少于 20%,压缩并添加到上一页
|
|
334
|
+
4. 确保参考文献从新页开始
|
|
335
|
+
|
|
336
|
+
Args:
|
|
337
|
+
tex_content: .tex 文件内容
|
|
338
|
+
target_section: 要扩写/缩写的节(如 "Conclusion")
|
|
339
|
+
|
|
340
|
+
Returns:
|
|
341
|
+
(modified_content, change_record)
|
|
342
|
+
"""
|
|
343
|
+
change_record = {
|
|
344
|
+
"defect_id": "A2-body-last-page",
|
|
345
|
+
"action": "none",
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
# 首先确保参考文献分页
|
|
349
|
+
modified, ref_record = ensure_reference_newpage(tex_content)
|
|
350
|
+
if ref_record["action"] != "none":
|
|
351
|
+
change_record["action"] = ref_record["action"]
|
|
352
|
+
change_record["ref_separation"] = ref_record
|
|
353
|
+
|
|
354
|
+
# 查找结论段并扩写(如果需要)
|
|
355
|
+
if target_section:
|
|
356
|
+
conclusion_pattern = rf'(\\section\*\{{{target_section}\}}|\\section\{{{target_section}\}})'
|
|
357
|
+
conclusion_match = re.search(conclusion_pattern, modified)
|
|
358
|
+
|
|
359
|
+
if conclusion_match:
|
|
360
|
+
conclusion_start = conclusion_match.end()
|
|
361
|
+
# 查找结论段内容
|
|
362
|
+
conclusion_end = modified.find('\\bibliography', conclusion_start)
|
|
363
|
+
if conclusion_end == -1:
|
|
364
|
+
conclusion_end = modified.find('\\end{document}', conclusion_start)
|
|
365
|
+
|
|
366
|
+
if conclusion_end > conclusion_start:
|
|
367
|
+
conclusion_content = modified[conclusion_start:conclusion_end]
|
|
368
|
+
lines = conclusion_content.strip().split('\n')
|
|
369
|
+
|
|
370
|
+
# 如果结论段少于 3 行,建议扩写
|
|
371
|
+
if len(lines) < 3:
|
|
372
|
+
change_record["suggestion"] = "expand conclusion section to fill page"
|
|
373
|
+
change_record["action"] = "identified short conclusion section"
|
|
374
|
+
|
|
375
|
+
return modified, change_record
|
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Overflow Repair Fixers - Category D 缺陷修复
|
|
3
|
+
|
|
4
|
+
处理 overfull hbox、公式溢出、URL 溢出等问题。
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import re
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any, Dict, List, Tuple
|
|
10
|
+
|
|
11
|
+
from .utils import add_package_to_preamble, add_to_preamble, find_paragraph_end, find_paragraph_start
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def fix_overfull_hbox(
|
|
15
|
+
tex_content: str,
|
|
16
|
+
line_number: int,
|
|
17
|
+
overflow_type: str = "paragraph",
|
|
18
|
+
overflow_amount: float | None = None,
|
|
19
|
+
) -> Tuple[str, Dict[str, Any]]:
|
|
20
|
+
"""
|
|
21
|
+
修复指定行的 overfull hbox 问题。
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
tex_content: .tex 文件内容
|
|
25
|
+
line_number: 问题所在行号(从 1 开始)
|
|
26
|
+
overflow_type: 溢出类型 (paragraph/table/formula)
|
|
27
|
+
overflow_amount: 溢出量(pt)
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
(modified_content, change_record)
|
|
31
|
+
"""
|
|
32
|
+
lines = tex_content.split('\n')
|
|
33
|
+
if line_number < 1 or line_number > len(lines):
|
|
34
|
+
return tex_content, {"status": "failed", "reason": "行号超出范围"}
|
|
35
|
+
|
|
36
|
+
target_line = lines[line_number - 1]
|
|
37
|
+
change_record = {
|
|
38
|
+
"defect_id": "D1",
|
|
39
|
+
"line": line_number,
|
|
40
|
+
"type": overflow_type,
|
|
41
|
+
"overflow_amount": overflow_amount,
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
# 根据溢出类型选择修复策略
|
|
45
|
+
if overflow_type == "table":
|
|
46
|
+
return fix_table_overflow(tex_content, line_number)
|
|
47
|
+
elif overflow_type == "formula":
|
|
48
|
+
return fix_long_formula(tex_content, line_number)
|
|
49
|
+
else:
|
|
50
|
+
return fix_paragraph_overflow(tex_content, line_number)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def fix_paragraph_overflow(tex_content: str, line_number: int | None = None) -> Tuple[str, Dict[str, Any]]:
|
|
54
|
+
"""
|
|
55
|
+
修复段落文本溢出。
|
|
56
|
+
|
|
57
|
+
策略:
|
|
58
|
+
1. 在长单词中插入断词点 \-
|
|
59
|
+
2. 添加 \emergencystretch 允许额外拉伸
|
|
60
|
+
"""
|
|
61
|
+
lines = tex_content.split('\n')
|
|
62
|
+
change_record = {
|
|
63
|
+
"defect_id": "D1-paragraph",
|
|
64
|
+
"action": "none",
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
# 策略 1: 查找长单词并添加断词点
|
|
68
|
+
if line_number:
|
|
69
|
+
target_idx = line_number - 1
|
|
70
|
+
if 0 <= target_idx < len(lines):
|
|
71
|
+
line = lines[target_idx]
|
|
72
|
+
# 查找长度超过 15 的单词
|
|
73
|
+
long_words = re.findall(r'\b[a-zA-Z]{15,}\b', line)
|
|
74
|
+
if long_words:
|
|
75
|
+
for word in long_words[:2]: # 最多处理 2 个单词
|
|
76
|
+
# 在元音后添加断词点
|
|
77
|
+
hyphenated = add_hyphenation_points(word)
|
|
78
|
+
line = line.replace(word, hyphenated, 1)
|
|
79
|
+
lines[target_idx] = line
|
|
80
|
+
change_record["action"] = f"insert_hyphenation in {long_words}"
|
|
81
|
+
|
|
82
|
+
# 策略 2: 如果仍未解决,在段落前添加\emergencystretch
|
|
83
|
+
if change_record["action"] == "none":
|
|
84
|
+
# 查找段落开始(简单启发式:找到包含文本的行)
|
|
85
|
+
if line_number:
|
|
86
|
+
# 向前查找段落开始
|
|
87
|
+
para_start = find_paragraph_start(lines, line_number - 1)
|
|
88
|
+
if para_start >= 0:
|
|
89
|
+
# 在段落前插入\emergencystretch
|
|
90
|
+
emergencystretch_line = "{\\emergencystretch=1.5em "
|
|
91
|
+
lines.insert(para_start, emergencystretch_line)
|
|
92
|
+
# 在段落结束添加闭合括号
|
|
93
|
+
para_end = find_paragraph_end(lines, para_start + 1)
|
|
94
|
+
if para_end >= 0:
|
|
95
|
+
lines.insert(para_end + 1, "}")
|
|
96
|
+
change_record["action"] = f"add_emergencystretch at line {para_start}"
|
|
97
|
+
|
|
98
|
+
return '\n'.join(lines), change_record
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def add_hyphenation_points(word: str) -> str:
|
|
102
|
+
"""
|
|
103
|
+
在单词中插入 LaTeX 断词点 \-。
|
|
104
|
+
简单规则:在元音后、辅音前断词。
|
|
105
|
+
"""
|
|
106
|
+
vowels = "aeiouAEIOU"
|
|
107
|
+
result = []
|
|
108
|
+
for i, char in enumerate(word):
|
|
109
|
+
result.append(char)
|
|
110
|
+
# 在元音后且后面还有辅音时插入断词点
|
|
111
|
+
if char in vowels and i < len(word) - 2:
|
|
112
|
+
next_char = word[i + 1]
|
|
113
|
+
if next_char not in vowels and next_char.isalpha():
|
|
114
|
+
result.append("\\-")
|
|
115
|
+
return ''.join(result)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def find_paragraph_start(lines: List[str], target_idx: int) -> int:
|
|
119
|
+
"""向前查找段落开始(遇到空行或环境开始)"""
|
|
120
|
+
for i in range(target_idx, -1, -1):
|
|
121
|
+
line = lines[i].strip()
|
|
122
|
+
if not line or line.startswith('\\begin') or line.startswith('\\section'):
|
|
123
|
+
return i + 1 if i < target_idx else i
|
|
124
|
+
return 0
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def find_paragraph_end(lines: List[str], start_idx: int) -> int:
|
|
128
|
+
"""向后查找段落结束(遇到空行或环境结束)"""
|
|
129
|
+
for i in range(start_idx, len(lines)):
|
|
130
|
+
line = lines[i].strip()
|
|
131
|
+
if not line or line.startswith('\\end') or line.startswith('\\section'):
|
|
132
|
+
return i - 1 if i > start_idx else i
|
|
133
|
+
return len(lines) - 1
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def fix_table_overflow(tex_content: str, line_number: int | None = None) -> Tuple[str, Dict[str, Any]]:
|
|
137
|
+
"""
|
|
138
|
+
修复表格溢出。
|
|
139
|
+
|
|
140
|
+
策略:
|
|
141
|
+
1. 将 tabular 替换为 tabularx
|
|
142
|
+
2. 设置列宽为\linewidth
|
|
143
|
+
3. 使用 p{width}列类型
|
|
144
|
+
"""
|
|
145
|
+
change_record = {
|
|
146
|
+
"defect_id": "D1-table",
|
|
147
|
+
"action": "none",
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
# 策略 1: 查找 tabular 环境并替换为 tabularx
|
|
151
|
+
tabular_pattern = r'\\begin\{tabular\}(\{[^}]*\})'
|
|
152
|
+
|
|
153
|
+
def replace_with_tabularx(match):
|
|
154
|
+
col_spec = match.group(1)
|
|
155
|
+
# 将 l/c/r列替换为 X 列(保留第一列)
|
|
156
|
+
cols = col_spec.strip('{}').split('|')
|
|
157
|
+
new_cols = []
|
|
158
|
+
for col in cols:
|
|
159
|
+
if col.strip() in ['l', 'c', 'r']:
|
|
160
|
+
new_cols.append('X')
|
|
161
|
+
else:
|
|
162
|
+
new_cols.append(col)
|
|
163
|
+
new_spec = '{' + '|'.join(new_cols) + '}'
|
|
164
|
+
change_record["action"] = f"replaced tabular with tabularx, col spec: {new_spec}"
|
|
165
|
+
return f"\\begin{{tabularx}}{{\\linewidth}}{new_spec}"
|
|
166
|
+
|
|
167
|
+
modified = re.sub(tabular_pattern, replace_with_tabularx, tex_content, count=1)
|
|
168
|
+
|
|
169
|
+
if modified != tex_content:
|
|
170
|
+
# 检查是否需要添加 tabularx 宏包
|
|
171
|
+
if '\\usepackage{tabularx}' not in modified:
|
|
172
|
+
# 在导言区添加宏包
|
|
173
|
+
modified = add_package_to_preamble(modified, "tabularx")
|
|
174
|
+
change_record["packages_added"] = ["tabularx"]
|
|
175
|
+
|
|
176
|
+
return modified, change_record
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def fix_long_formula(tex_content: str, line_number: int | None = None) -> Tuple[str, Dict[str, Any]]:
|
|
180
|
+
"""
|
|
181
|
+
修复长公式溢出。
|
|
182
|
+
|
|
183
|
+
策略:
|
|
184
|
+
1. 将 equation 替换为 multline 或 split
|
|
185
|
+
2. 在运算符后添加换行\\\\
|
|
186
|
+
"""
|
|
187
|
+
change_record = {
|
|
188
|
+
"defect_id": "D2-formula",
|
|
189
|
+
"action": "none",
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
# 策略 1: 查找单行 equation 环境并替换为 split
|
|
193
|
+
equation_pattern = r'\\begin\{equation\}([^\\]*?)\\end\{equation\}'
|
|
194
|
+
|
|
195
|
+
def replace_with_split(match):
|
|
196
|
+
formula = match.group(1)
|
|
197
|
+
# 查找等号或加减号位置
|
|
198
|
+
break_points = []
|
|
199
|
+
for i, char in enumerate(formula):
|
|
200
|
+
if char in '=+-' and i > len(formula) // 3 and i < 2 * len(formula) // 3:
|
|
201
|
+
break_points.append(i)
|
|
202
|
+
|
|
203
|
+
if break_points:
|
|
204
|
+
bp = break_points[0]
|
|
205
|
+
# 在断点处插入\\
|
|
206
|
+
part1 = formula[:bp+1].rstrip()
|
|
207
|
+
part2 = formula[bp+1:].lstrip()
|
|
208
|
+
new_formula = f"{part1}\n\\\\\n\\quad {part2}"
|
|
209
|
+
change_record["action"] = "split equation at operator"
|
|
210
|
+
return f"\\begin{{split}}\n{new_formula}\n\\end{{split}}"
|
|
211
|
+
return match.group(0)
|
|
212
|
+
|
|
213
|
+
modified = re.sub(equation_pattern, replace_with_split, tex_content, count=1, flags=re.DOTALL)
|
|
214
|
+
|
|
215
|
+
return modified, change_record
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def fix_url_overflow(tex_content: str, url: str | None = None) -> Tuple[str, Dict[str, Any]]:
|
|
219
|
+
"""
|
|
220
|
+
修复 URL 溢出。
|
|
221
|
+
|
|
222
|
+
策略:
|
|
223
|
+
1. 使用\\url{}命令包裹
|
|
224
|
+
2. 添加\\urlbreaks 配置
|
|
225
|
+
"""
|
|
226
|
+
change_record = {
|
|
227
|
+
"defect_id": "D3-url",
|
|
228
|
+
"action": "none",
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
# 策略 1: 查找裸 URL 并替换为\\url{}
|
|
232
|
+
url_pattern = r'(https?://[^\s\}\]\)]+)'
|
|
233
|
+
|
|
234
|
+
def wrap_with_url(match):
|
|
235
|
+
raw_url = match.group(1)
|
|
236
|
+
if not raw_url.startswith('\\url{'):
|
|
237
|
+
change_record["action"] = f"wrapped URL with \\url command"
|
|
238
|
+
return f"\\url{{{raw_url}}}"
|
|
239
|
+
return raw_url
|
|
240
|
+
|
|
241
|
+
modified = re.sub(url_pattern, wrap_with_url, tex_content)
|
|
242
|
+
|
|
243
|
+
# 策略 2: 添加 URL 断行配置
|
|
244
|
+
if change_record["action"] and '\\def\\UrlBreaks' not in modified:
|
|
245
|
+
config_line = "\\def\\UrlBreaks{\\do\\/\\do-}"
|
|
246
|
+
# 在导言区添加
|
|
247
|
+
modified = add_to_preamble(modified, config_line)
|
|
248
|
+
change_record["preamble_added"] = config_line
|
|
249
|
+
|
|
250
|
+
return modified, change_record
|