paperfit-cli 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/commands/adjust-length.md +21 -0
- package/.claude/commands/check-visual.md +27 -0
- package/.claude/commands/fix-layout.md +31 -0
- package/.claude/commands/migrate-template.md +23 -0
- package/.claude/commands/repair-table.md +21 -0
- package/.claude/commands/show-status.md +32 -0
- package/.claude-plugin/README.md +77 -0
- package/.claude-plugin/marketplace.json +41 -0
- package/.claude-plugin/plugin.json +39 -0
- package/CLAUDE.md +266 -0
- package/CONTRIBUTING.md +131 -0
- package/LICENSE +21 -0
- package/README.md +164 -0
- package/agents/code-surgeon-agent.md +214 -0
- package/agents/layout-detective-agent.md +229 -0
- package/agents/orchestrator-agent.md +254 -0
- package/agents/quality-gatekeeper-agent.md +270 -0
- package/agents/rule-engine-agent.md +224 -0
- package/agents/semantic-polish-agent.md +250 -0
- package/bin/paperfit.js +176 -0
- package/config/agent_roles.yaml +56 -0
- package/config/layout_rules.yaml +54 -0
- package/config/templates.yaml +241 -0
- package/config/vto_taxonomy.yaml +489 -0
- package/config/writing_rules.yaml +64 -0
- package/install.sh +30 -0
- package/package.json +52 -0
- package/requirements.txt +5 -0
- package/scripts/benchmark_runner.py +629 -0
- package/scripts/compile.sh +244 -0
- package/scripts/config_validator.py +339 -0
- package/scripts/cv_detector.py +600 -0
- package/scripts/evidence_collector.py +167 -0
- package/scripts/float_fixers.py +861 -0
- package/scripts/inject_defects.py +549 -0
- package/scripts/install-claude-global.js +148 -0
- package/scripts/install.js +66 -0
- package/scripts/install.sh +106 -0
- package/scripts/overflow_fixers.py +656 -0
- package/scripts/package-for-opensource.sh +138 -0
- package/scripts/parse_log.py +260 -0
- package/scripts/postinstall.js +38 -0
- package/scripts/pre_tool_use.py +265 -0
- package/scripts/render_pages.py +244 -0
- package/scripts/session_logger.py +329 -0
- package/scripts/space_util_fixers.py +773 -0
- package/scripts/state_manager.py +352 -0
- package/scripts/test_commands.py +187 -0
- package/scripts/test_cv_detector.py +214 -0
- package/scripts/test_integration.py +290 -0
- package/skills/consistency-polisher/SKILL.md +337 -0
- package/skills/float-optimizer/SKILL.md +284 -0
- package/skills/latex_fixers/__init__.py +82 -0
- package/skills/latex_fixers/float_fixers.py +392 -0
- package/skills/latex_fixers/fullwidth_fixers.py +375 -0
- package/skills/latex_fixers/overflow_fixers.py +250 -0
- package/skills/latex_fixers/semantic_micro_tuning.py +362 -0
- package/skills/latex_fixers/space_util_fixers.py +389 -0
- package/skills/latex_fixers/utils.py +55 -0
- package/skills/overflow-repair/SKILL.md +304 -0
- package/skills/space-util-fixer/SKILL.md +307 -0
- package/skills/taxonomy-vto/SKILL.md +486 -0
- package/skills/template-migrator/SKILL.md +251 -0
- package/skills/visual-inspector/SKILL.md +217 -0
- package/skills/writing-polish/SKILL.md +289 -0
|
@@ -0,0 +1,861 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Float Fixers Module
|
|
4
|
+
|
|
5
|
+
处理 Category B:浮动体缺陷
|
|
6
|
+
- B1: 浮动体远离首次引用
|
|
7
|
+
- B2: 浮动体大小不适配栏宽
|
|
8
|
+
- B3: 浮动体连续堆叠
|
|
9
|
+
- B4: 浮动体跨页分裂
|
|
10
|
+
|
|
11
|
+
该模块被 code-surgeon-agent 调用,执行对 .tex 源码的精确修改。
|
|
12
|
+
所有修复遵循最小修改原则,不改变学术内容。
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import re
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from dataclasses import dataclass, field
|
|
18
|
+
from typing import List, Dict, Optional, Tuple, Any
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# ============================================================
|
|
22
|
+
# 数据结构定义
|
|
23
|
+
# ============================================================
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class FixResult:
|
|
27
|
+
"""修复结果"""
|
|
28
|
+
defect_id: str
|
|
29
|
+
object_name: str
|
|
30
|
+
action: str
|
|
31
|
+
before: str
|
|
32
|
+
after: str
|
|
33
|
+
page: int = 0
|
|
34
|
+
line_number: Optional[int] = None
|
|
35
|
+
success: bool = False
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class FloatFixReport:
|
|
40
|
+
"""修复报告"""
|
|
41
|
+
status: str # success | partial | failed
|
|
42
|
+
modified_files: List[str] = field(default_factory=list)
|
|
43
|
+
changes: List[FixResult] = field(default_factory=list)
|
|
44
|
+
unresolved: List[str] = field(default_factory=list)
|
|
45
|
+
|
|
46
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
47
|
+
return {
|
|
48
|
+
"skill": "float-optimizer",
|
|
49
|
+
"status": self.status,
|
|
50
|
+
"modified_files": self.modified_files,
|
|
51
|
+
"changes": [
|
|
52
|
+
{
|
|
53
|
+
"defect_id": c.defect_id,
|
|
54
|
+
"object": c.object_name,
|
|
55
|
+
"action": c.action,
|
|
56
|
+
"before": c.before,
|
|
57
|
+
"after": c.after,
|
|
58
|
+
"page": c.page,
|
|
59
|
+
"line_number": c.line_number,
|
|
60
|
+
"success": c.success,
|
|
61
|
+
}
|
|
62
|
+
for c in self.changes
|
|
63
|
+
],
|
|
64
|
+
"unresolved": self.unresolved,
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# ============================================================
|
|
69
|
+
# B1:浮动体远离首次引用
|
|
70
|
+
# ============================================================
|
|
71
|
+
|
|
72
|
+
def fix_float_reference_distance(
|
|
73
|
+
tex_content: str,
|
|
74
|
+
float_label: str,
|
|
75
|
+
ref_page: int,
|
|
76
|
+
float_page: int,
|
|
77
|
+
) -> Tuple[str, Optional[FixResult]]:
|
|
78
|
+
"""
|
|
79
|
+
修复浮动体远离首次引用问题
|
|
80
|
+
|
|
81
|
+
策略优先级:
|
|
82
|
+
1. 调整位置参数为 [htbp]
|
|
83
|
+
2. 在引用点后添加 \FloatBarrier
|
|
84
|
+
3. 移动浮动体源码位置
|
|
85
|
+
4. 拆分大型浮动体
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
tex_content: .tex 文件内容
|
|
89
|
+
float_label: 浮动体标签 (如 "fig:result" 或 "tab:results")
|
|
90
|
+
ref_page: 首次引用所在页码
|
|
91
|
+
float_page: 浮动体实际所在页码
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
(修改后的内容,修复结果)
|
|
95
|
+
"""
|
|
96
|
+
# 计算距离
|
|
97
|
+
distance = abs(float_page - ref_page)
|
|
98
|
+
if distance <= 1:
|
|
99
|
+
# 距离在可接受范围内
|
|
100
|
+
return tex_content, None
|
|
101
|
+
|
|
102
|
+
# 定位浮动体环境
|
|
103
|
+
float_type = "figure" if "fig" in float_label.lower() else "table"
|
|
104
|
+
|
|
105
|
+
# 策略 1: 调整位置参数
|
|
106
|
+
pattern = r'\\begin\{' + float_type + r'\}(\[[^\]]*\])?'
|
|
107
|
+
matches = list(re.finditer(pattern, tex_content))
|
|
108
|
+
|
|
109
|
+
# 找到包含目标 label 的浮动体
|
|
110
|
+
target_match = None
|
|
111
|
+
for match in matches:
|
|
112
|
+
# 向后查找 \label
|
|
113
|
+
after_start = match.end()
|
|
114
|
+
label_pattern = r'\\label\{' + re.escape(float_label) + r'\}'
|
|
115
|
+
label_match = re.search(label_pattern, tex_content[after_start:after_start + 500])
|
|
116
|
+
if label_match:
|
|
117
|
+
target_match = match
|
|
118
|
+
break
|
|
119
|
+
|
|
120
|
+
if target_match:
|
|
121
|
+
pos_param = target_match.group(1) if target_match.group(1) else ""
|
|
122
|
+
|
|
123
|
+
# 检查当前参数是否限制性强
|
|
124
|
+
if pos_param in ['[t]', '[b]', '[h]', '[!t]', '[!b]', '[!h]']:
|
|
125
|
+
# 改为 [htbp]
|
|
126
|
+
new_param = "[htbp]"
|
|
127
|
+
modified_content = tex_content[:target_match.start(1)] + new_param + tex_content[target_match.end(1):]
|
|
128
|
+
return modified_content, FixResult(
|
|
129
|
+
defect_id="B1",
|
|
130
|
+
object_name=float_label,
|
|
131
|
+
action=f"将浮动体位置参数从 {pos_param} 改为 {new_param}",
|
|
132
|
+
before=f"\\begin{{{float_type}}}{pos_param}",
|
|
133
|
+
after=f"\\begin{{{float_type}}}{new_param}",
|
|
134
|
+
success=True,
|
|
135
|
+
)
|
|
136
|
+
elif not pos_param:
|
|
137
|
+
# 没有参数,添加 [htbp]
|
|
138
|
+
new_param = "[htbp]"
|
|
139
|
+
insert_pos = target_match.end()
|
|
140
|
+
modified_content = tex_content[:insert_pos] + new_param + tex_content[insert_pos:]
|
|
141
|
+
return modified_content, FixResult(
|
|
142
|
+
defect_id="B1",
|
|
143
|
+
object_name=float_label,
|
|
144
|
+
action=f"添加浮动体位置参数 {new_param}",
|
|
145
|
+
before=f"\\begin{{{float_type}}}",
|
|
146
|
+
after=f"\\begin{{{float_type}}}{new_param}",
|
|
147
|
+
success=True,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
# 策略 2: 在引用点后添加 \FloatBarrier
|
|
151
|
+
# 查找首次引用该 label 的位置
|
|
152
|
+
ref_pattern = r'\\(ref|autoref|cref|Cref)\{' + re.escape(float_label) + r'\}'
|
|
153
|
+
ref_match = re.search(ref_pattern, tex_content)
|
|
154
|
+
|
|
155
|
+
if ref_match:
|
|
156
|
+
# 在引用后添加 \FloatBarrier
|
|
157
|
+
insert_pos = ref_match.end()
|
|
158
|
+
# 检查是否已有 \FloatBarrier
|
|
159
|
+
after_ref = tex_content[insert_pos:insert_pos + 100]
|
|
160
|
+
if '\\FloatBarrier' not in after_ref:
|
|
161
|
+
modified_content = tex_content[:insert_pos] + "\n\\FloatBarrier" + tex_content[insert_pos:]
|
|
162
|
+
return modified_content, FixResult(
|
|
163
|
+
defect_id="B1",
|
|
164
|
+
object_name=float_label,
|
|
165
|
+
action="在引用后添加 \\FloatBarrier 以阻止浮动体继续漂后",
|
|
166
|
+
before=ref_match.group(0)[:30] + "...",
|
|
167
|
+
after=ref_match.group(0) + "\n\\FloatBarrier",
|
|
168
|
+
success=True,
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
return tex_content, None
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def add_floatbarrier_to_preamble(
|
|
175
|
+
tex_content: str,
|
|
176
|
+
) -> Tuple[str, Optional[FixResult]]:
|
|
177
|
+
"""
|
|
178
|
+
在导言区添加 placeins 宏包以支持 \FloatBarrier
|
|
179
|
+
"""
|
|
180
|
+
if '\\usepackage{placeins}' in tex_content:
|
|
181
|
+
return tex_content, None
|
|
182
|
+
|
|
183
|
+
# 在 \begin{document} 前添加
|
|
184
|
+
match = re.search(r'\\begin\{document\}', tex_content)
|
|
185
|
+
if match:
|
|
186
|
+
insert_pos = match.start()
|
|
187
|
+
modified_content = tex_content[:insert_pos] + "\\usepackage{placeins}\n" + tex_content[insert_pos:]
|
|
188
|
+
return modified_content, FixResult(
|
|
189
|
+
defect_id="B1",
|
|
190
|
+
object_name="导言区",
|
|
191
|
+
action="添加 placeins 宏包以支持 \\FloatBarrier",
|
|
192
|
+
before="\\begin{document}",
|
|
193
|
+
after="\\usepackage{placeins}\n\\begin{document}",
|
|
194
|
+
success=True,
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
return tex_content, None
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
# ============================================================
|
|
201
|
+
# B2:浮动体大小不适配栏宽
|
|
202
|
+
# ============================================================
|
|
203
|
+
|
|
204
|
+
def fix_figure_width_mismatch(
|
|
205
|
+
tex_content: str,
|
|
206
|
+
figure_label: str,
|
|
207
|
+
template_type: str = "single_column",
|
|
208
|
+
) -> Tuple[str, Optional[FixResult]]:
|
|
209
|
+
"""
|
|
210
|
+
修复图片宽度不适配栏宽问题
|
|
211
|
+
|
|
212
|
+
策略优先级:
|
|
213
|
+
1. 设置宽度为 \linewidth
|
|
214
|
+
2. 区分单栏/跨栏 (双栏模板)
|
|
215
|
+
3. 设置高度 + keepaspectratio
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
tex_content: .tex 文件内容
|
|
219
|
+
figure_label: 图片标签
|
|
220
|
+
template_type: 模板类型 ("single_column" | "double_column")
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
(修改后的内容,修复结果)
|
|
224
|
+
"""
|
|
225
|
+
# 定位 figure 环境
|
|
226
|
+
pattern = r'(\\begin\{(?:figure|figure\*)\}(?:\[[htbp]+\])?.*?)(\\includegraphics(?:\[[^\]]*\])?\{[^}]+\})'
|
|
227
|
+
matches = list(re.finditer(pattern, tex_content, re.DOTALL))
|
|
228
|
+
|
|
229
|
+
target_match = None
|
|
230
|
+
for match in matches:
|
|
231
|
+
label_pattern = r'\\label\{' + re.escape(figure_label) + r'\}'
|
|
232
|
+
label_match = re.search(label_pattern, tex_content[match.start():match.end()])
|
|
233
|
+
if label_match:
|
|
234
|
+
target_match = match
|
|
235
|
+
break
|
|
236
|
+
|
|
237
|
+
if not target_match:
|
|
238
|
+
return tex_content, None
|
|
239
|
+
|
|
240
|
+
include_graphic = target_match.group(2)
|
|
241
|
+
|
|
242
|
+
# 检查是否已有 \linewidth 宽度
|
|
243
|
+
if r'\linewidth' in include_graphic or r'\textwidth' in include_graphic:
|
|
244
|
+
# 宽度已合理设置
|
|
245
|
+
return tex_content, None
|
|
246
|
+
|
|
247
|
+
# 策略 1: 标准化宽度设置
|
|
248
|
+
# 解析当前宽度参数
|
|
249
|
+
width_match = re.search(r'width=([^\s,\]]+)', include_graphic)
|
|
250
|
+
|
|
251
|
+
if width_match:
|
|
252
|
+
# 替换现有宽度为 \linewidth
|
|
253
|
+
new_graphic = re.sub(
|
|
254
|
+
r'width=[^\s,\]]+',
|
|
255
|
+
r'width=\\linewidth',
|
|
256
|
+
include_graphic
|
|
257
|
+
)
|
|
258
|
+
else:
|
|
259
|
+
# 没有 width 参数,添加
|
|
260
|
+
# 检查是否有可选参数
|
|
261
|
+
if include_graphic.startswith('\\includegraphics['):
|
|
262
|
+
# 有可选参数,在 ] 前插入
|
|
263
|
+
bracket_pos = include_graphic.find(']')
|
|
264
|
+
new_graphic = include_graphic[:bracket_pos] + ',width=\\linewidth' + include_graphic[bracket_pos:]
|
|
265
|
+
else:
|
|
266
|
+
# 没有可选参数,添加
|
|
267
|
+
new_graphic = include_graphic.replace(
|
|
268
|
+
'\\includegraphics',
|
|
269
|
+
'\\includegraphics[width=\\linewidth]'
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
modified_content = tex_content.replace(include_graphic, new_graphic, 1)
|
|
273
|
+
|
|
274
|
+
return modified_content, FixResult(
|
|
275
|
+
defect_id="B2",
|
|
276
|
+
object_name=figure_label,
|
|
277
|
+
action="将图片宽度设为 \\linewidth",
|
|
278
|
+
before=include_graphic[:50] + "...",
|
|
279
|
+
after=new_graphic[:50] + "...",
|
|
280
|
+
success=True,
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def fix_table_width_mismatch(
|
|
285
|
+
tex_content: str,
|
|
286
|
+
table_label: str,
|
|
287
|
+
) -> Tuple[str, Optional[FixResult]]:
|
|
288
|
+
"""
|
|
289
|
+
修复表格宽度不适配栏宽问题
|
|
290
|
+
|
|
291
|
+
策略优先级:
|
|
292
|
+
1. 将 tabular 改为 tabularx 并设宽度为 \linewidth
|
|
293
|
+
2. 调整列规格
|
|
294
|
+
3. 使用 sidewaystable 旋转超宽表格
|
|
295
|
+
|
|
296
|
+
Args:
|
|
297
|
+
tex_content: .tex 文件内容
|
|
298
|
+
table_label: 表格标签
|
|
299
|
+
|
|
300
|
+
Returns:
|
|
301
|
+
(修改后的内容,修复结果)
|
|
302
|
+
"""
|
|
303
|
+
# 定位 table 环境
|
|
304
|
+
pattern = r'(\\begin\{table\}(?:\[[htbp]+\])?.*?)(\\begin\{tabular\})(\{[^}]+\})(.*?)(\\end\{tabular\})'
|
|
305
|
+
matches = list(re.finditer(pattern, tex_content, re.DOTALL))
|
|
306
|
+
|
|
307
|
+
target_match = None
|
|
308
|
+
for match in matches:
|
|
309
|
+
label_pattern = r'\\label\{' + re.escape(table_label) + r'\}'
|
|
310
|
+
label_match = re.search(label_pattern, tex_content[match.start():match.end()])
|
|
311
|
+
if label_match:
|
|
312
|
+
target_match = match
|
|
313
|
+
break
|
|
314
|
+
|
|
315
|
+
if not target_match:
|
|
316
|
+
return tex_content, None
|
|
317
|
+
|
|
318
|
+
tabular_start = target_match.group(2)
|
|
319
|
+
column_spec = target_match.group(3)
|
|
320
|
+
table_body = target_match.group(4)
|
|
321
|
+
tabular_end = target_match.group(5)
|
|
322
|
+
|
|
323
|
+
# 检查是否已有宽度设置
|
|
324
|
+
if target_match.group(1).strip().endswith('{\\linewidth}'):
|
|
325
|
+
return tex_content, None
|
|
326
|
+
|
|
327
|
+
# 策略:改为 tabularx
|
|
328
|
+
new_column_spec = _convert_to_tabularx_columns(column_spec.strip('{}'))
|
|
329
|
+
|
|
330
|
+
old_full = f"\\begin{{tabular}}{column_spec}"
|
|
331
|
+
new_full = f"\\begin{{tabularx}}{{\\linewidth}}{new_column_spec}"
|
|
332
|
+
|
|
333
|
+
modified_content = tex_content.replace(
|
|
334
|
+
old_full,
|
|
335
|
+
new_full,
|
|
336
|
+
1
|
|
337
|
+
).replace(
|
|
338
|
+
"\\end{tabular}",
|
|
339
|
+
"\\end{tabularx}",
|
|
340
|
+
1
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
return modified_content, FixResult(
|
|
344
|
+
defect_id="B2",
|
|
345
|
+
object_name=table_label,
|
|
346
|
+
action=f"将 tabular 改为 tabularx,宽度设为 \\linewidth",
|
|
347
|
+
before=f"\\begin{{tabular}}{column_spec}",
|
|
348
|
+
after=f"\\begin{{tabularx}}{{\\linewidth}}{new_column_spec}",
|
|
349
|
+
success=True,
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
def _convert_to_tabularx_columns(column_spec: str) -> str:
|
|
354
|
+
"""
|
|
355
|
+
将 tabular 列规格转换为 tabularx 列规格
|
|
356
|
+
策略:将最宽的文本列改为 X 列
|
|
357
|
+
"""
|
|
358
|
+
spec = column_spec.strip('{}')
|
|
359
|
+
|
|
360
|
+
# 统计列类型
|
|
361
|
+
text_columns = []
|
|
362
|
+
for i, c in enumerate(spec):
|
|
363
|
+
if c in 'lrc':
|
|
364
|
+
text_columns.append((i, c))
|
|
365
|
+
|
|
366
|
+
if not text_columns:
|
|
367
|
+
return column_spec
|
|
368
|
+
|
|
369
|
+
# 将最后一个文本列改为 X 列
|
|
370
|
+
last_text_idx, _ = text_columns[-1]
|
|
371
|
+
new_spec = spec[:last_text_idx] + 'X' + spec[last_text_idx + 1:]
|
|
372
|
+
|
|
373
|
+
return '{' + new_spec + '}'
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
def fix_wide_float_in_double_column(
|
|
377
|
+
tex_content: str,
|
|
378
|
+
float_label: str,
|
|
379
|
+
) -> Tuple[str, Optional[FixResult]]:
|
|
380
|
+
"""
|
|
381
|
+
在双栏模板中修复宽浮动体
|
|
382
|
+
|
|
383
|
+
策略:
|
|
384
|
+
1. 将 figure 改为 figure* (跨栏)
|
|
385
|
+
2. 将 table 改为 table*
|
|
386
|
+
3. 宽度设为 \\textwidth
|
|
387
|
+
|
|
388
|
+
Args:
|
|
389
|
+
tex_content: .tex 文件内容
|
|
390
|
+
float_label: 浮动体标签
|
|
391
|
+
|
|
392
|
+
Returns:
|
|
393
|
+
(修改后的内容,修复结果)
|
|
394
|
+
"""
|
|
395
|
+
# 确定浮动体类型
|
|
396
|
+
is_figure = "fig" in float_label.lower()
|
|
397
|
+
float_type = "figure" if is_figure else "table"
|
|
398
|
+
|
|
399
|
+
# 查找浮动体环境
|
|
400
|
+
pattern = r'\\begin\{' + float_type + r'\}(\[[htbp]+\])?'
|
|
401
|
+
matches = list(re.finditer(pattern, tex_content))
|
|
402
|
+
|
|
403
|
+
target_match = None
|
|
404
|
+
for match in matches:
|
|
405
|
+
label_pattern = r'\\label\{' + re.escape(float_label) + r'\}'
|
|
406
|
+
after_start = match.end()
|
|
407
|
+
label_match = re.search(label_pattern, tex_content[after_start:after_start + 500])
|
|
408
|
+
if label_match:
|
|
409
|
+
target_match = match
|
|
410
|
+
break
|
|
411
|
+
|
|
412
|
+
if not target_match:
|
|
413
|
+
return tex_content, None
|
|
414
|
+
|
|
415
|
+
# 改为跨栏环境
|
|
416
|
+
old_env = f"\\begin{{{float_type}}}"
|
|
417
|
+
new_env = f"\\begin{{{float_type}*}}"
|
|
418
|
+
|
|
419
|
+
modified_content = tex_content.replace(old_env, new_env, 1)
|
|
420
|
+
modified_content = modified_content.replace(
|
|
421
|
+
f"\\end{{{float_type}}}",
|
|
422
|
+
f"\\end{{{float_type}*}}",
|
|
423
|
+
1
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
return modified_content, FixResult(
|
|
427
|
+
defect_id="B2",
|
|
428
|
+
object_name=float_label,
|
|
429
|
+
action=f"将 {float_type} 改为 {float_type}* 以跨栏显示",
|
|
430
|
+
before=old_env,
|
|
431
|
+
after=new_env,
|
|
432
|
+
success=True,
|
|
433
|
+
)
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
# ============================================================
|
|
437
|
+
# B3:浮动体连续堆叠
|
|
438
|
+
# ============================================================
|
|
439
|
+
|
|
440
|
+
def fix_float_clustering(
|
|
441
|
+
tex_content: str,
|
|
442
|
+
float_labels: List[str],
|
|
443
|
+
) -> Tuple[str, Optional[FixResult]]:
|
|
444
|
+
"""
|
|
445
|
+
修复浮动体连续堆叠问题
|
|
446
|
+
|
|
447
|
+
策略优先级:
|
|
448
|
+
1. 分散浮动体位置参数
|
|
449
|
+
2. 在浮动体之间插入正文
|
|
450
|
+
3. 使用 \FloatBarrier 控制
|
|
451
|
+
|
|
452
|
+
Args:
|
|
453
|
+
tex_content: .tex 文件内容
|
|
454
|
+
float_labels: 堆叠的浮动体标签列表
|
|
455
|
+
|
|
456
|
+
Returns:
|
|
457
|
+
(修改后的内容,修复结果)
|
|
458
|
+
"""
|
|
459
|
+
if len(float_labels) < 2:
|
|
460
|
+
return tex_content, None
|
|
461
|
+
|
|
462
|
+
# 策略 1: 为每个浮动体分配不同的位置偏好
|
|
463
|
+
position_prefs = ["[t]", "[b]", "[p]", "[htbp]"]
|
|
464
|
+
changes_made = []
|
|
465
|
+
|
|
466
|
+
for i, label in enumerate(float_labels[:len(position_prefs)]):
|
|
467
|
+
# 确定浮动体类型
|
|
468
|
+
float_type = "figure" if "fig" in label.lower() else "table"
|
|
469
|
+
pattern = r'\\begin\{' + float_type + r'\}(\[[^\]]*\])?'
|
|
470
|
+
match = re.search(pattern, tex_content)
|
|
471
|
+
|
|
472
|
+
if match:
|
|
473
|
+
current_param = match.group(1) if match.group(1) else ""
|
|
474
|
+
new_param = position_prefs[i]
|
|
475
|
+
|
|
476
|
+
if current_param != new_param:
|
|
477
|
+
if current_param:
|
|
478
|
+
tex_content = tex_content[:match.start(1)] + new_param + tex_content[match.end(1):]
|
|
479
|
+
else:
|
|
480
|
+
insert_pos = match.end()
|
|
481
|
+
tex_content = tex_content[:insert_pos] + new_param + tex_content[insert_pos:]
|
|
482
|
+
|
|
483
|
+
changes_made.append({
|
|
484
|
+
"label": label,
|
|
485
|
+
"before": current_param or f"\\begin{{{float_type}}}",
|
|
486
|
+
"after": f"\\begin{{{float_type}}}{new_param}",
|
|
487
|
+
})
|
|
488
|
+
|
|
489
|
+
if changes_made:
|
|
490
|
+
return tex_content, FixResult(
|
|
491
|
+
defect_id="B3",
|
|
492
|
+
object_name=", ".join([c["label"] for c in changes_made]),
|
|
493
|
+
action="分散浮动体位置参数以避免堆叠",
|
|
494
|
+
before="; ".join([c["before"] for c in changes_made]),
|
|
495
|
+
after="; ".join([c["after"] for c in changes_made]),
|
|
496
|
+
success=True,
|
|
497
|
+
)
|
|
498
|
+
|
|
499
|
+
return tex_content, None
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
# ============================================================
|
|
503
|
+
# B4:浮动体跨页分裂
|
|
504
|
+
# ============================================================
|
|
505
|
+
|
|
506
|
+
def fix_split_table(
|
|
507
|
+
tex_content: str,
|
|
508
|
+
table_label: str,
|
|
509
|
+
) -> Tuple[str, Optional[FixResult]]:
|
|
510
|
+
"""
|
|
511
|
+
修复长表格跨页分裂问题
|
|
512
|
+
|
|
513
|
+
策略优先级:
|
|
514
|
+
1. 将 table + tabular 改为 longtable
|
|
515
|
+
2. 强制表格不跨页 [!h]
|
|
516
|
+
3. 拆分过大的表格
|
|
517
|
+
|
|
518
|
+
Args:
|
|
519
|
+
tex_content: .tex 文件内容
|
|
520
|
+
table_label: 表格标签
|
|
521
|
+
|
|
522
|
+
Returns:
|
|
523
|
+
(修改后的内容,修复结果)
|
|
524
|
+
"""
|
|
525
|
+
# 定位 table 环境
|
|
526
|
+
pattern = r'(\\begin\{table\}(?:\[[^\]]*\])?.*?)(\\begin\{tabular\})(\{[^}]+\})(.*?)(\\end\{tabular\})(.*?\\end\{table\})'
|
|
527
|
+
matches = list(re.finditer(pattern, tex_content, re.DOTALL))
|
|
528
|
+
|
|
529
|
+
target_match = None
|
|
530
|
+
for match in matches:
|
|
531
|
+
label_pattern = r'\\label\{' + re.escape(table_label) + r'\}'
|
|
532
|
+
label_match = re.search(label_pattern, tex_content[match.start():match.end()])
|
|
533
|
+
if label_match:
|
|
534
|
+
target_match = match
|
|
535
|
+
break
|
|
536
|
+
|
|
537
|
+
if not target_match:
|
|
538
|
+
return tex_content, None
|
|
539
|
+
|
|
540
|
+
full_table = target_match.group(0)
|
|
541
|
+
table_start = target_match.group(1)
|
|
542
|
+
tabular_start = target_match.group(2)
|
|
543
|
+
column_spec = target_match.group(3)
|
|
544
|
+
table_body = target_match.group(4)
|
|
545
|
+
tabular_end = target_match.group(5)
|
|
546
|
+
table_end = target_match.group(6)
|
|
547
|
+
|
|
548
|
+
# 检查是否已有 caption 和 label
|
|
549
|
+
caption_match = re.search(r'\\caption\{([^}]+)\}', table_start + table_body)
|
|
550
|
+
label_match = re.search(r'\\label\{([^}]+)\}', table_start + table_body)
|
|
551
|
+
|
|
552
|
+
caption_text = caption_match.group(1) if caption_match else "Long Table"
|
|
553
|
+
label_text = label_match.group(1) if label_match else table_label
|
|
554
|
+
|
|
555
|
+
# 策略 1: 改为 longtable
|
|
556
|
+
# 提取表头 (第一行)
|
|
557
|
+
header_match = re.search(r'([^\\]*?)(?:\\hline)?\s*([^\\]+?)\s*\\\\', table_body)
|
|
558
|
+
if header_match:
|
|
559
|
+
header_row = header_match.group(2).strip()
|
|
560
|
+
|
|
561
|
+
longtable_content = f"""\\begin{{longtable}}{column_spec}
|
|
562
|
+
\\caption{{{caption_text}}} \\label{{{label_text}}} \\\\
|
|
563
|
+
\\hline
|
|
564
|
+
{header_row} \\\\
|
|
565
|
+
\\hline
|
|
566
|
+
\\endfirsthead
|
|
567
|
+
\\hline
|
|
568
|
+
{header_row} \\\\
|
|
569
|
+
\\hline
|
|
570
|
+
\\endhead
|
|
571
|
+
\\hline \\multicolumn{{{len(column_spec.strip("{}"))}}}{{r}}{{Continued on next page}} \\\\
|
|
572
|
+
\\endfoot
|
|
573
|
+
\\hline
|
|
574
|
+
\\endlastfoot
|
|
575
|
+
"""
|
|
576
|
+
# 添加表体 (去除第一行)
|
|
577
|
+
body_lines = table_body.split('\\\\')
|
|
578
|
+
if len(body_lines) > 1:
|
|
579
|
+
longtable_content += '\n'.join(body_lines[1:])
|
|
580
|
+
|
|
581
|
+
longtable_content += "\n\\end{longtable}"
|
|
582
|
+
|
|
583
|
+
modified_content = tex_content.replace(full_table, longtable_content, 1)
|
|
584
|
+
|
|
585
|
+
return modified_content, FixResult(
|
|
586
|
+
defect_id="B4",
|
|
587
|
+
object_name=table_label,
|
|
588
|
+
action="将 table+tabular 改为 longtable 以支持跨页",
|
|
589
|
+
before=f"\\begin{{table}}...\\end{{tabular}}...\\end{{table}}",
|
|
590
|
+
after=f"\\begin{{longtable}}{column_spec}...\\end{{longtable}}",
|
|
591
|
+
success=True,
|
|
592
|
+
)
|
|
593
|
+
|
|
594
|
+
return tex_content, None
|
|
595
|
+
|
|
596
|
+
|
|
597
|
+
def fix_split_figure(
|
|
598
|
+
tex_content: str,
|
|
599
|
+
figure_label: str,
|
|
600
|
+
) -> Tuple[str, Optional[FixResult]]:
|
|
601
|
+
"""
|
|
602
|
+
修复图片组跨页分裂问题
|
|
603
|
+
|
|
604
|
+
策略:
|
|
605
|
+
1. 强制图片不跨页 [!h]
|
|
606
|
+
2. 拆分过大的图片组为多个独立 figure
|
|
607
|
+
|
|
608
|
+
Args:
|
|
609
|
+
tex_content: .tex 文件内容
|
|
610
|
+
figure_label: 图片标签
|
|
611
|
+
|
|
612
|
+
Returns:
|
|
613
|
+
(修改后的内容,修复结果)
|
|
614
|
+
"""
|
|
615
|
+
# 定位 figure 环境
|
|
616
|
+
pattern = r'\\begin\{figure\}(\[[^\]]*\])?'
|
|
617
|
+
matches = list(re.finditer(pattern, tex_content))
|
|
618
|
+
|
|
619
|
+
target_match = None
|
|
620
|
+
for match in matches:
|
|
621
|
+
label_pattern = r'\\label\{' + re.escape(figure_label) + r'\}'
|
|
622
|
+
after_start = match.end()
|
|
623
|
+
label_match = re.search(label_pattern, tex_content[after_start:after_start + 500])
|
|
624
|
+
if label_match:
|
|
625
|
+
target_match = match
|
|
626
|
+
break
|
|
627
|
+
|
|
628
|
+
if not target_match:
|
|
629
|
+
return tex_content, None
|
|
630
|
+
|
|
631
|
+
current_param = target_match.group(1) if target_match.group(1) else ""
|
|
632
|
+
|
|
633
|
+
# 策略:添加 [!h] 强制位置
|
|
634
|
+
new_param = "[!h]"
|
|
635
|
+
if current_param != new_param:
|
|
636
|
+
if current_param:
|
|
637
|
+
modified_content = tex_content[:target_match.start(1)] + new_param + tex_content[target_match.end(1):]
|
|
638
|
+
else:
|
|
639
|
+
insert_pos = target_match.end()
|
|
640
|
+
modified_content = tex_content[:insert_pos] + new_param + tex_content[insert_pos:]
|
|
641
|
+
|
|
642
|
+
return modified_content, FixResult(
|
|
643
|
+
defect_id="B4",
|
|
644
|
+
object_name=figure_label,
|
|
645
|
+
action=f"添加 [!h] 强制图片放置在此处以避免分裂",
|
|
646
|
+
before=f"\\begin{{figure}}{current_param}",
|
|
647
|
+
after=f"\\begin{{figure}}{new_param}",
|
|
648
|
+
success=True,
|
|
649
|
+
)
|
|
650
|
+
|
|
651
|
+
return tex_content, None
|
|
652
|
+
|
|
653
|
+
|
|
654
|
+
# ============================================================
|
|
655
|
+
# 主修复函数
|
|
656
|
+
# ============================================================
|
|
657
|
+
|
|
658
|
+
def fix_float_defects(
|
|
659
|
+
tex_file_path: str,
|
|
660
|
+
defects: List[Dict[str, Any]],
|
|
661
|
+
template_type: str = "single_column",
|
|
662
|
+
) -> FloatFixReport:
|
|
663
|
+
"""
|
|
664
|
+
修复所有 Category B 缺陷
|
|
665
|
+
|
|
666
|
+
Args:
|
|
667
|
+
tex_file_path: .tex 文件路径
|
|
668
|
+
defects: 缺陷列表,每个缺陷包含:
|
|
669
|
+
- defect_id: B1, B2, B3, B4
|
|
670
|
+
- page: 页码
|
|
671
|
+
- object: 对象名称 (图表标签)
|
|
672
|
+
- description: 描述
|
|
673
|
+
- ref_page: 引用页码 (B1 需要)
|
|
674
|
+
template_type: 模板类型 ("single_column" | "double_column")
|
|
675
|
+
|
|
676
|
+
Returns:
|
|
677
|
+
FloatFixReport: 修复报告
|
|
678
|
+
"""
|
|
679
|
+
tex_path = Path(tex_file_path)
|
|
680
|
+
if not tex_path.exists():
|
|
681
|
+
return FloatFixReport(
|
|
682
|
+
status="failed",
|
|
683
|
+
unresolved=[f"文件不存在:{tex_file_path}"]
|
|
684
|
+
)
|
|
685
|
+
|
|
686
|
+
try:
|
|
687
|
+
tex_content = tex_path.read_text(encoding='utf-8')
|
|
688
|
+
except (OSError, UnicodeDecodeError) as e:
|
|
689
|
+
return FloatFixReport(
|
|
690
|
+
status="failed",
|
|
691
|
+
unresolved=[f"无法读取文件 {tex_file_path}: {e}"]
|
|
692
|
+
)
|
|
693
|
+
|
|
694
|
+
modified_files = set()
|
|
695
|
+
changes = []
|
|
696
|
+
unresolved = []
|
|
697
|
+
|
|
698
|
+
# 检查是否需要添加 placeins 宏包
|
|
699
|
+
needs_placeins = any(d.get("defect_id") == "B1" for d in defects)
|
|
700
|
+
if needs_placeins and '\\usepackage{placeins}' not in tex_content:
|
|
701
|
+
new_content, fix_result = add_floatbarrier_to_preamble(tex_content)
|
|
702
|
+
if fix_result and new_content != tex_content:
|
|
703
|
+
tex_content = new_content
|
|
704
|
+
changes.append(fix_result)
|
|
705
|
+
modified_files.add(str(tex_path))
|
|
706
|
+
|
|
707
|
+
for defect in defects:
|
|
708
|
+
defect_id = defect.get("defect_id", "")
|
|
709
|
+
page = defect.get("page", 0)
|
|
710
|
+
object_name = defect.get("object", "")
|
|
711
|
+
ref_page = defect.get("ref_page", 0)
|
|
712
|
+
|
|
713
|
+
new_content = tex_content
|
|
714
|
+
fix_result = None
|
|
715
|
+
|
|
716
|
+
if defect_id == "B1":
|
|
717
|
+
# 浮动体远离首次引用
|
|
718
|
+
new_content, fix_result = fix_float_reference_distance(
|
|
719
|
+
tex_content,
|
|
720
|
+
float_label=object_name,
|
|
721
|
+
ref_page=ref_page,
|
|
722
|
+
float_page=page,
|
|
723
|
+
)
|
|
724
|
+
|
|
725
|
+
elif defect_id == "B2":
|
|
726
|
+
# 浮动体大小不适配
|
|
727
|
+
if "fig" in object_name.lower():
|
|
728
|
+
new_content, fix_result = fix_figure_width_mismatch(
|
|
729
|
+
tex_content,
|
|
730
|
+
figure_label=object_name,
|
|
731
|
+
template_type=template_type,
|
|
732
|
+
)
|
|
733
|
+
elif "tab" in object_name.lower():
|
|
734
|
+
new_content, fix_result = fix_table_width_mismatch(
|
|
735
|
+
tex_content,
|
|
736
|
+
table_label=object_name,
|
|
737
|
+
)
|
|
738
|
+
|
|
739
|
+
elif defect_id == "B3":
|
|
740
|
+
# 浮动体连续堆叠 - 需要收集所有堆叠的标签
|
|
741
|
+
# 简化实现:单个处理
|
|
742
|
+
new_content, fix_result = fix_float_clustering(
|
|
743
|
+
tex_content,
|
|
744
|
+
float_labels=[object_name],
|
|
745
|
+
)
|
|
746
|
+
|
|
747
|
+
elif defect_id == "B4":
|
|
748
|
+
# 浮动体跨页分裂
|
|
749
|
+
if "fig" in object_name.lower():
|
|
750
|
+
new_content, fix_result = fix_split_figure(
|
|
751
|
+
tex_content,
|
|
752
|
+
figure_label=object_name,
|
|
753
|
+
)
|
|
754
|
+
elif "tab" in object_name.lower():
|
|
755
|
+
new_content, fix_result = fix_split_table(
|
|
756
|
+
tex_content,
|
|
757
|
+
table_label=object_name,
|
|
758
|
+
)
|
|
759
|
+
|
|
760
|
+
# 检查修复是否成功
|
|
761
|
+
if fix_result and new_content != tex_content:
|
|
762
|
+
tex_content = new_content
|
|
763
|
+
fix_result.page = page
|
|
764
|
+
fix_result.line_number = defect.get("line_number")
|
|
765
|
+
changes.append(fix_result)
|
|
766
|
+
modified_files.add(str(tex_path))
|
|
767
|
+
else:
|
|
768
|
+
unresolved.append(
|
|
769
|
+
f"{defect_id} ({object_name or '未知对象'}): 无法自动修复,可能需要人工调整"
|
|
770
|
+
)
|
|
771
|
+
|
|
772
|
+
# 写入修改后的内容
|
|
773
|
+
if modified_files:
|
|
774
|
+
try:
|
|
775
|
+
tex_path.write_text(tex_content, encoding='utf-8')
|
|
776
|
+
except OSError as e:
|
|
777
|
+
unresolved.append(f"无法写入文件 {tex_path}: {e}")
|
|
778
|
+
return FloatFixReport(
|
|
779
|
+
status="failed",
|
|
780
|
+
modified_files=list(modified_files),
|
|
781
|
+
changes=changes,
|
|
782
|
+
unresolved=unresolved,
|
|
783
|
+
)
|
|
784
|
+
|
|
785
|
+
status = "success" if not unresolved else ("partial" if changes else "failed")
|
|
786
|
+
|
|
787
|
+
return FloatFixReport(
|
|
788
|
+
status=status,
|
|
789
|
+
modified_files=list(modified_files),
|
|
790
|
+
changes=changes,
|
|
791
|
+
unresolved=unresolved,
|
|
792
|
+
)
|
|
793
|
+
|
|
794
|
+
|
|
795
|
+
# ============================================================
|
|
796
|
+
# CLI 入口
|
|
797
|
+
# ============================================================
|
|
798
|
+
|
|
799
|
+
def main():
|
|
800
|
+
"""命令行接口"""
|
|
801
|
+
import argparse
|
|
802
|
+
import json
|
|
803
|
+
|
|
804
|
+
parser = argparse.ArgumentParser(
|
|
805
|
+
description="Fix Category B float defects in LaTeX documents"
|
|
806
|
+
)
|
|
807
|
+
parser.add_argument(
|
|
808
|
+
"tex_file",
|
|
809
|
+
help="Path to .tex file"
|
|
810
|
+
)
|
|
811
|
+
parser.add_argument(
|
|
812
|
+
"--defects",
|
|
813
|
+
type=str,
|
|
814
|
+
help="JSON string or file path containing defect list"
|
|
815
|
+
)
|
|
816
|
+
parser.add_argument(
|
|
817
|
+
"--template",
|
|
818
|
+
type=str,
|
|
819
|
+
default="single_column",
|
|
820
|
+
choices=["single_column", "double_column"],
|
|
821
|
+
help="Template type"
|
|
822
|
+
)
|
|
823
|
+
parser.add_argument(
|
|
824
|
+
"--json",
|
|
825
|
+
"-j",
|
|
826
|
+
action="store_true",
|
|
827
|
+
help="Output JSON report"
|
|
828
|
+
)
|
|
829
|
+
|
|
830
|
+
args = parser.parse_args()
|
|
831
|
+
|
|
832
|
+
# 解析缺陷列表
|
|
833
|
+
defects = []
|
|
834
|
+
if args.defects:
|
|
835
|
+
if Path(args.defects).exists():
|
|
836
|
+
with open(args.defects, 'r', encoding='utf-8') as f:
|
|
837
|
+
defects = json.load(f)
|
|
838
|
+
else:
|
|
839
|
+
defects = json.loads(args.defects)
|
|
840
|
+
|
|
841
|
+
# 执行修复
|
|
842
|
+
report = fix_float_defects(args.tex_file, defects, template_type=args.template)
|
|
843
|
+
|
|
844
|
+
if args.json:
|
|
845
|
+
print(json.dumps(report.to_dict(), indent=2, ensure_ascii=False))
|
|
846
|
+
else:
|
|
847
|
+
print(f"\nFloat Fix Report")
|
|
848
|
+
print("=" * 50)
|
|
849
|
+
print(f"Status: {report.status}")
|
|
850
|
+
print(f"Modified files: {report.modified_files}")
|
|
851
|
+
print(f"Changes: {len(report.changes)}")
|
|
852
|
+
for change in report.changes:
|
|
853
|
+
print(f" - [{change.defect_id}] {change.object_name}: {change.action}")
|
|
854
|
+
if report.unresolved:
|
|
855
|
+
print(f"\nUnresolved: {len(report.unresolved)}")
|
|
856
|
+
for u in report.unresolved:
|
|
857
|
+
print(f" - {u}")
|
|
858
|
+
|
|
859
|
+
|
|
860
|
+
if __name__ == "__main__":
|
|
861
|
+
main()
|