paperfit-cli 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/commands/adjust-length.md +21 -0
- package/.claude/commands/check-visual.md +27 -0
- package/.claude/commands/fix-layout.md +31 -0
- package/.claude/commands/migrate-template.md +23 -0
- package/.claude/commands/repair-table.md +21 -0
- package/.claude/commands/show-status.md +32 -0
- package/.claude-plugin/README.md +77 -0
- package/.claude-plugin/marketplace.json +41 -0
- package/.claude-plugin/plugin.json +39 -0
- package/CLAUDE.md +266 -0
- package/CONTRIBUTING.md +131 -0
- package/LICENSE +21 -0
- package/README.md +164 -0
- package/agents/code-surgeon-agent.md +214 -0
- package/agents/layout-detective-agent.md +229 -0
- package/agents/orchestrator-agent.md +254 -0
- package/agents/quality-gatekeeper-agent.md +270 -0
- package/agents/rule-engine-agent.md +224 -0
- package/agents/semantic-polish-agent.md +250 -0
- package/bin/paperfit.js +176 -0
- package/config/agent_roles.yaml +56 -0
- package/config/layout_rules.yaml +54 -0
- package/config/templates.yaml +241 -0
- package/config/vto_taxonomy.yaml +489 -0
- package/config/writing_rules.yaml +64 -0
- package/install.sh +30 -0
- package/package.json +52 -0
- package/requirements.txt +5 -0
- package/scripts/benchmark_runner.py +629 -0
- package/scripts/compile.sh +244 -0
- package/scripts/config_validator.py +339 -0
- package/scripts/cv_detector.py +600 -0
- package/scripts/evidence_collector.py +167 -0
- package/scripts/float_fixers.py +861 -0
- package/scripts/inject_defects.py +549 -0
- package/scripts/install-claude-global.js +148 -0
- package/scripts/install.js +66 -0
- package/scripts/install.sh +106 -0
- package/scripts/overflow_fixers.py +656 -0
- package/scripts/package-for-opensource.sh +138 -0
- package/scripts/parse_log.py +260 -0
- package/scripts/postinstall.js +38 -0
- package/scripts/pre_tool_use.py +265 -0
- package/scripts/render_pages.py +244 -0
- package/scripts/session_logger.py +329 -0
- package/scripts/space_util_fixers.py +773 -0
- package/scripts/state_manager.py +352 -0
- package/scripts/test_commands.py +187 -0
- package/scripts/test_cv_detector.py +214 -0
- package/scripts/test_integration.py +290 -0
- package/skills/consistency-polisher/SKILL.md +337 -0
- package/skills/float-optimizer/SKILL.md +284 -0
- package/skills/latex_fixers/__init__.py +82 -0
- package/skills/latex_fixers/float_fixers.py +392 -0
- package/skills/latex_fixers/fullwidth_fixers.py +375 -0
- package/skills/latex_fixers/overflow_fixers.py +250 -0
- package/skills/latex_fixers/semantic_micro_tuning.py +362 -0
- package/skills/latex_fixers/space_util_fixers.py +389 -0
- package/skills/latex_fixers/utils.py +55 -0
- package/skills/overflow-repair/SKILL.md +304 -0
- package/skills/space-util-fixer/SKILL.md +307 -0
- package/skills/taxonomy-vto/SKILL.md +486 -0
- package/skills/template-migrator/SKILL.md +251 -0
- package/skills/visual-inspector/SKILL.md +217 -0
- package/skills/writing-polish/SKILL.md +289 -0
|
@@ -0,0 +1,773 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Space Utilization Fixers Module
|
|
4
|
+
|
|
5
|
+
处理 Category A:空间利用缺陷
|
|
6
|
+
- A1: 孤行/寡行 (Widow/Orphan Lines)
|
|
7
|
+
- A2: 末页大面积留白 (Excessive Trailing Whitespace)
|
|
8
|
+
- A3: 页数预算违反 (Page Budget Violation)
|
|
9
|
+
- A4: 双栏末页左右栏高度不齐 (Unbalanced Column Heights)
|
|
10
|
+
|
|
11
|
+
该模块被 code-surgeon-agent 或 semantic-polish-agent 调用,执行对 .tex 源码的精确修改。
|
|
12
|
+
所有修复遵循最小修改原则,优先排版控制,最后才考虑语义改写。
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import re
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from dataclasses import dataclass, field
|
|
18
|
+
from typing import List, Dict, Optional, Tuple, Any
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# ============================================================
|
|
22
|
+
# 数据结构定义
|
|
23
|
+
# ============================================================
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class FixResult:
|
|
27
|
+
"""修复结果"""
|
|
28
|
+
defect_id: str
|
|
29
|
+
object_name: str
|
|
30
|
+
action: str
|
|
31
|
+
before: str
|
|
32
|
+
after: str
|
|
33
|
+
page: int = 0
|
|
34
|
+
line_number: Optional[int] = None
|
|
35
|
+
success: bool = False
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class SpaceUtilFixReport:
|
|
40
|
+
"""修复报告"""
|
|
41
|
+
status: str # success | partial | failed
|
|
42
|
+
modified_files: List[str] = field(default_factory=list)
|
|
43
|
+
changes: List[FixResult] = field(default_factory=list)
|
|
44
|
+
unresolved: List[str] = field(default_factory=list)
|
|
45
|
+
|
|
46
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
47
|
+
return {
|
|
48
|
+
"skill": "space-util-fixer",
|
|
49
|
+
"status": self.status,
|
|
50
|
+
"modified_files": self.modified_files,
|
|
51
|
+
"changes": [
|
|
52
|
+
{
|
|
53
|
+
"defect_id": c.defect_id,
|
|
54
|
+
"object": c.object_name,
|
|
55
|
+
"action": c.action,
|
|
56
|
+
"before": c.before,
|
|
57
|
+
"after": c.after,
|
|
58
|
+
"page": c.page,
|
|
59
|
+
"line_number": c.line_number,
|
|
60
|
+
"success": c.success,
|
|
61
|
+
}
|
|
62
|
+
for c in self.changes
|
|
63
|
+
],
|
|
64
|
+
"unresolved": self.unresolved,
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# ============================================================
|
|
69
|
+
# A1:孤行/寡行修复
|
|
70
|
+
# ============================================================
|
|
71
|
+
|
|
72
|
+
def fix_widow_orphan(
|
|
73
|
+
tex_content: str,
|
|
74
|
+
paragraph_start_line: Optional[int] = None,
|
|
75
|
+
paragraph_text: Optional[str] = None,
|
|
76
|
+
) -> Tuple[str, Optional[FixResult]]:
|
|
77
|
+
"""
|
|
78
|
+
修复孤行/寡行问题
|
|
79
|
+
|
|
80
|
+
策略优先级:
|
|
81
|
+
1. 段落级收紧 (\looseness=-1)
|
|
82
|
+
2. 段落级扩张 (\looseness=1)
|
|
83
|
+
3. 调整段落间胶水 (\emergencystretch)
|
|
84
|
+
4. 全局调整 widow/orphan 惩罚
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
tex_content: .tex 文件内容
|
|
88
|
+
paragraph_start_line: 段落起始行号 (用于定位)
|
|
89
|
+
paragraph_text: 段落文本 (用于精确定位)
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
(修改后的内容,修复结果)
|
|
93
|
+
"""
|
|
94
|
+
# 策略 1: 使用 \looseness=-1 收缩段落
|
|
95
|
+
if paragraph_text:
|
|
96
|
+
# 精确定位段落
|
|
97
|
+
# 清理文本中的特殊字符用于匹配
|
|
98
|
+
escaped_text = re.escape(paragraph_text[:50]) # 取前 50 字符定位
|
|
99
|
+
pattern = r'(?<!\\looseness=[-0-9]+\s)(\n|\A)\s*(' + escaped_text + r'[^\n]*\n(?:[^\n]*\n)*?)(?=\n\n|\Z)'
|
|
100
|
+
match = re.search(pattern, tex_content, re.MULTILINE)
|
|
101
|
+
|
|
102
|
+
if match:
|
|
103
|
+
paragraph_full = match.group(0)
|
|
104
|
+
# 用花括号包裹并添加 \looseness=-1
|
|
105
|
+
wrapped = f"{{\\looseness=-1 {paragraph_full.strip()}}}"
|
|
106
|
+
modified_content = tex_content.replace(paragraph_full, wrapped, 1)
|
|
107
|
+
|
|
108
|
+
return modified_content, FixResult(
|
|
109
|
+
defect_id="A1",
|
|
110
|
+
object_name=f"第 {paragraph_start_line} 行段落",
|
|
111
|
+
action="添加 \\looseness=-1 以收缩段落消除孤行",
|
|
112
|
+
before=paragraph_text[:40] + "..." if len(paragraph_text) > 40 else paragraph_text,
|
|
113
|
+
after=f"{{\\looseness=-1 {paragraph_text[:40]}...}}",
|
|
114
|
+
line_number=paragraph_start_line,
|
|
115
|
+
success=True,
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
# 策略 2: 如果无法精确定位,尝试在导言区添加全局设置
|
|
119
|
+
return add_widow_orphan_penalty(tex_content)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def add_widow_orphan_penalty(
|
|
123
|
+
tex_content: str,
|
|
124
|
+
) -> Tuple[str, Optional[FixResult]]:
|
|
125
|
+
"""
|
|
126
|
+
在导言区添加全局 widow/orphan 惩罚设置
|
|
127
|
+
"""
|
|
128
|
+
# 检查是否已有设置
|
|
129
|
+
if '\\widowpenalty' in tex_content and '\\clubpenalty' in tex_content:
|
|
130
|
+
return tex_content, None
|
|
131
|
+
|
|
132
|
+
# 在 \begin{document} 前添加
|
|
133
|
+
match = re.search(r'\\begin\{document\}', tex_content)
|
|
134
|
+
if match:
|
|
135
|
+
insert_pos = match.start()
|
|
136
|
+
penalties = (
|
|
137
|
+
"\\widowpenalty=10000\n"
|
|
138
|
+
"\\clubpenalty=10000\n"
|
|
139
|
+
"\\displaywidowpenalty=10000\n"
|
|
140
|
+
)
|
|
141
|
+
modified_content = tex_content[:insert_pos] + penalties + tex_content[insert_pos:]
|
|
142
|
+
|
|
143
|
+
return modified_content, FixResult(
|
|
144
|
+
defect_id="A1",
|
|
145
|
+
object_name="导言区",
|
|
146
|
+
action="添加全局 widow/orphan 惩罚设置",
|
|
147
|
+
before="\\begin{document}",
|
|
148
|
+
after="\\widowpenalty=10000\n\\clubpenalty=10000\n...\\begin{document}",
|
|
149
|
+
success=True,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
return tex_content, None
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def fix_paragraph_looseness(
|
|
156
|
+
tex_content: str,
|
|
157
|
+
paragraph_start: int,
|
|
158
|
+
looseness_value: int = -1,
|
|
159
|
+
) -> Tuple[str, Optional[FixResult]]:
|
|
160
|
+
"""
|
|
161
|
+
为特定段落设置 \looseness 值
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
tex_content: .tex 文件内容
|
|
165
|
+
paragraph_start: 段落起始位置 (字符索引)
|
|
166
|
+
looseness_value: \looseness 值 (-1 收缩,1 扩张)
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
(修改后的内容,修复结果)
|
|
170
|
+
"""
|
|
171
|
+
# 找到段落结束 (下一个空行或文件结束)
|
|
172
|
+
paragraph_end = tex_content.find('\n\n', paragraph_start)
|
|
173
|
+
if paragraph_end == -1:
|
|
174
|
+
paragraph_end = len(tex_content)
|
|
175
|
+
|
|
176
|
+
paragraph_text = tex_content[paragraph_start:paragraph_end]
|
|
177
|
+
|
|
178
|
+
# 添加 \looseness
|
|
179
|
+
wrapped = f"{{\\looseness={looseness_value} {paragraph_text.strip()}}}"
|
|
180
|
+
modified_content = tex_content[:paragraph_start] + wrapped + tex_content[paragraph_end:]
|
|
181
|
+
|
|
182
|
+
return modified_content, FixResult(
|
|
183
|
+
defect_id="A1",
|
|
184
|
+
object_name=f"段落 (行 {paragraph_start})",
|
|
185
|
+
action=f"添加 \\looseness={looseness_value}",
|
|
186
|
+
before=paragraph_text[:40] + "..." if len(paragraph_text) > 40 else paragraph_text,
|
|
187
|
+
after=f"{{\\looseness={looseness_value} ...}}",
|
|
188
|
+
success=True,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
# ============================================================
|
|
193
|
+
# A2:末页大面积留白修复
|
|
194
|
+
# ============================================================
|
|
195
|
+
|
|
196
|
+
def fix_trailing_whitespace(
|
|
197
|
+
tex_content: str,
|
|
198
|
+
last_page_number: int,
|
|
199
|
+
whitespace_ratio: float,
|
|
200
|
+
) -> Tuple[str, Optional[FixResult]]:
|
|
201
|
+
"""
|
|
202
|
+
修复末页大面积留白问题
|
|
203
|
+
|
|
204
|
+
策略优先级:
|
|
205
|
+
1. 前移浮动体
|
|
206
|
+
2. 调整局部垂直间距
|
|
207
|
+
3. 建议语义扩写 (返回 unresolved)
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
tex_content: .tex 文件内容
|
|
211
|
+
last_page_number: 最后一页页码
|
|
212
|
+
whitespace_ratio: 空白区域比例
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
(修改后的内容,修复结果)
|
|
216
|
+
"""
|
|
217
|
+
if whitespace_ratio < 0.2:
|
|
218
|
+
# 空白比例在可接受范围内
|
|
219
|
+
return tex_content, None
|
|
220
|
+
|
|
221
|
+
# 策略 1: 尝试前移浮动体
|
|
222
|
+
# 查找最后几个 figure/table 环境,尝试调整其位置参数
|
|
223
|
+
float_pattern = r'\\begin\{(figure|table)\}(\[[^\]]*\])?'
|
|
224
|
+
matches = list(re.finditer(float_pattern, tex_content))
|
|
225
|
+
|
|
226
|
+
if matches:
|
|
227
|
+
# 找到最后一个浮动体
|
|
228
|
+
last_float = matches[-1]
|
|
229
|
+
float_type = last_float.group(1)
|
|
230
|
+
pos_param = last_float.group(2) if last_float.group(2) else ""
|
|
231
|
+
|
|
232
|
+
# 尝试改为 [ht] 使其前移
|
|
233
|
+
if pos_param != "[ht]":
|
|
234
|
+
new_param = "[ht]"
|
|
235
|
+
if pos_param:
|
|
236
|
+
modified_content = tex_content[:last_float.start(2)] + new_param + tex_content[last_float.end(2):]
|
|
237
|
+
else:
|
|
238
|
+
insert_pos = last_float.end()
|
|
239
|
+
modified_content = tex_content[:insert_pos] + new_param + tex_content[insert_pos:]
|
|
240
|
+
|
|
241
|
+
return modified_content, FixResult(
|
|
242
|
+
defect_id="A2",
|
|
243
|
+
object_name=f"末页{float_type}",
|
|
244
|
+
action=f"将浮动体位置改为 {new_param} 以填充空白",
|
|
245
|
+
before=f"\\begin{{{float_type}}}{pos_param}",
|
|
246
|
+
after=f"\\begin{{{float_type}}}{new_param}",
|
|
247
|
+
page=last_page_number,
|
|
248
|
+
success=True,
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
# 策略 2: 调整最后一节前的间距
|
|
252
|
+
last_section = tex_content.rfind('\\section')
|
|
253
|
+
if last_section != -1:
|
|
254
|
+
# 在 \section 前添加 \vspace
|
|
255
|
+
modified_content = tex_content[:last_section] + "\\vspace{-0.3em}\n" + tex_content[last_section:]
|
|
256
|
+
|
|
257
|
+
return modified_content, FixResult(
|
|
258
|
+
defect_id="A2",
|
|
259
|
+
object_name="最后一节",
|
|
260
|
+
action="在最后一节前添加 \\vspace{-0.3em} 压缩间距",
|
|
261
|
+
before="\\section{...}",
|
|
262
|
+
after="\\vspace{-0.3em}\n\\section{...}",
|
|
263
|
+
page=last_page_number,
|
|
264
|
+
success=True,
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
# 策略 3: 无法自动修复,需要语义扩写
|
|
268
|
+
return tex_content, None
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
# ============================================================
|
|
272
|
+
# A3:页数预算修复
|
|
273
|
+
# ============================================================
|
|
274
|
+
|
|
275
|
+
def fix_page_budget_excess(
|
|
276
|
+
tex_content: str,
|
|
277
|
+
current_pages: int,
|
|
278
|
+
target_pages: int,
|
|
279
|
+
) -> Tuple[str, List[FixResult]]:
|
|
280
|
+
"""
|
|
281
|
+
修复超页问题 (实际页数 > 目标页数)
|
|
282
|
+
|
|
283
|
+
策略优先级:
|
|
284
|
+
1. 压缩浮动体
|
|
285
|
+
2. 缩减垂直间距
|
|
286
|
+
3. 建议精炼文字 (语义级)
|
|
287
|
+
4. 压缩参考文献
|
|
288
|
+
5. 微调页边距 (谨慎)
|
|
289
|
+
|
|
290
|
+
Args:
|
|
291
|
+
tex_content: .tex 文件内容
|
|
292
|
+
current_pages: 当前页数
|
|
293
|
+
target_pages: 目标页数
|
|
294
|
+
|
|
295
|
+
Returns:
|
|
296
|
+
(修改后的内容,修复结果列表)
|
|
297
|
+
"""
|
|
298
|
+
pages_to_reduce = current_pages - target_pages
|
|
299
|
+
if pages_to_reduce <= 0:
|
|
300
|
+
return tex_content, []
|
|
301
|
+
|
|
302
|
+
changes = []
|
|
303
|
+
|
|
304
|
+
# 策略 1: 压缩图片宽度 (从 \linewidth 改为 0.95\linewidth)
|
|
305
|
+
include_graphics_pattern = r'\\includegraphics(\[[^\]]*\])?\{[^}]+\}'
|
|
306
|
+
matches = list(re.finditer(include_graphics_pattern, tex_content))
|
|
307
|
+
|
|
308
|
+
for match in matches[:pages_to_reduce + 1]: # 压缩几个图
|
|
309
|
+
graphic_cmd = match.group(0)
|
|
310
|
+
if r'\linewidth' in graphic_cmd or r'\textwidth' in graphic_cmd:
|
|
311
|
+
# 替换为 0.95\linewidth
|
|
312
|
+
new_graphic = graphic_cmd.replace(
|
|
313
|
+
r'\linewidth', r'0.95\linewidth'
|
|
314
|
+
).replace(
|
|
315
|
+
r'\textwidth', r'0.95\textwidth'
|
|
316
|
+
)
|
|
317
|
+
tex_content = tex_content.replace(graphic_cmd, new_graphic, 1)
|
|
318
|
+
changes.append(FixResult(
|
|
319
|
+
defect_id="A3",
|
|
320
|
+
object_name="图片",
|
|
321
|
+
action="压缩图片宽度至 0.95\\linewidth",
|
|
322
|
+
before=graphic_cmd[:30] + "...",
|
|
323
|
+
after=new_graphic[:30] + "...",
|
|
324
|
+
success=True,
|
|
325
|
+
))
|
|
326
|
+
|
|
327
|
+
# 策略 2: 检查是否有冗余的 \vspace 或空行
|
|
328
|
+
# 移除过大的 \vspace
|
|
329
|
+
vspace_pattern = r'\\vspace\{[0-9.]+(em|pt|cm)\}'
|
|
330
|
+
large_vspace = re.search(vspace_pattern, tex_content)
|
|
331
|
+
if large_vspace:
|
|
332
|
+
vspace_val = large_vspace.group(0)
|
|
333
|
+
# 缩小 \vspace
|
|
334
|
+
num_match = re.search(r'[0-9.]+', vspace_val)
|
|
335
|
+
if num_match:
|
|
336
|
+
old_val = float(num_match.group(0))
|
|
337
|
+
new_val = old_val * 0.8
|
|
338
|
+
new_vspace = vspace_val.replace(str(old_val), str(new_val))
|
|
339
|
+
tex_content = tex_content.replace(vspace_val, new_vspace, 1)
|
|
340
|
+
changes.append(FixResult(
|
|
341
|
+
defect_id="A3",
|
|
342
|
+
object_name="垂直间距",
|
|
343
|
+
action=f"压缩 \\vspace 从 {old_val} 到 {new_val}",
|
|
344
|
+
before=vspace_val,
|
|
345
|
+
after=new_vspace,
|
|
346
|
+
success=True,
|
|
347
|
+
))
|
|
348
|
+
|
|
349
|
+
# 策略 3: 建议压缩参考文献样式
|
|
350
|
+
if '\\bibliographystyle{' in tex_content:
|
|
351
|
+
style_match = re.search(r'\\bibliographystyle\{([^}]+)\}', tex_content)
|
|
352
|
+
if style_match and style_match.group(1) not in ['abbrv', 'unsrt', 'plain']:
|
|
353
|
+
changes.append(FixResult(
|
|
354
|
+
defect_id="A3",
|
|
355
|
+
object_name="参考文献样式",
|
|
356
|
+
action="建议改用 abbrv 样式压缩参考文献",
|
|
357
|
+
before=f"\\bibliographystyle{{{style_match.group(1)}}}",
|
|
358
|
+
after="\\bibliographystyle{abbrv}",
|
|
359
|
+
success=False, # 需要人工确认
|
|
360
|
+
))
|
|
361
|
+
|
|
362
|
+
return tex_content, changes
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def fix_page_budget_deficit(
|
|
366
|
+
tex_content: str,
|
|
367
|
+
current_pages: int,
|
|
368
|
+
target_pages: int,
|
|
369
|
+
) -> Tuple[str, List[FixResult]]:
|
|
370
|
+
"""
|
|
371
|
+
修复缺页问题 (实际页数 < 目标页数)
|
|
372
|
+
|
|
373
|
+
策略优先级:
|
|
374
|
+
1. 检查浮动体堆积
|
|
375
|
+
2. 建议扩写结论/讨论 (语义级)
|
|
376
|
+
3. 增加附录
|
|
377
|
+
4. 微调图片尺寸
|
|
378
|
+
5. 增加分页点
|
|
379
|
+
|
|
380
|
+
Args:
|
|
381
|
+
tex_content: .tex 文件内容
|
|
382
|
+
current_pages: 当前页数
|
|
383
|
+
target_pages: 目标页数
|
|
384
|
+
|
|
385
|
+
Returns:
|
|
386
|
+
(修改后的内容,修复结果列表)
|
|
387
|
+
"""
|
|
388
|
+
pages_to_add = target_pages - current_pages
|
|
389
|
+
if pages_to_add <= 0:
|
|
390
|
+
return tex_content, []
|
|
391
|
+
|
|
392
|
+
changes = []
|
|
393
|
+
|
|
394
|
+
# 策略 1: 解除浮动体限制 (移除 [H] 或过度限制的参数)
|
|
395
|
+
float_pattern = r'\\begin\{(figure|table)\}\[H\]'
|
|
396
|
+
restricted_floats = re.finditer(float_pattern, tex_content)
|
|
397
|
+
|
|
398
|
+
for match in restricted_floats:
|
|
399
|
+
float_type = match.group(1)
|
|
400
|
+
old_cmd = f"\\begin{{{float_type}}}[H]"
|
|
401
|
+
new_cmd = f"\\begin{{{float_type}}}[htbp]"
|
|
402
|
+
tex_content = tex_content.replace(old_cmd, new_cmd, 1)
|
|
403
|
+
changes.append(FixResult(
|
|
404
|
+
defect_id="A3",
|
|
405
|
+
object_name=float_type,
|
|
406
|
+
action="移除 [H] 限制,允许浮动体自然放置",
|
|
407
|
+
before=old_cmd,
|
|
408
|
+
after=new_cmd,
|
|
409
|
+
success=True,
|
|
410
|
+
))
|
|
411
|
+
|
|
412
|
+
# 策略 2: 放大图片尺寸
|
|
413
|
+
include_graphics_pattern = r'\\includegraphics\[width=([0-9.]+)\\(linewidth|textwidth)\]'
|
|
414
|
+
matches = list(re.finditer(include_graphics_pattern, tex_content))
|
|
415
|
+
|
|
416
|
+
for match in matches[:pages_to_add]:
|
|
417
|
+
current_ratio = float(match.group(1))
|
|
418
|
+
if current_ratio < 1.0:
|
|
419
|
+
new_ratio = min(1.0, current_ratio + 0.1)
|
|
420
|
+
old_cmd = match.group(0)
|
|
421
|
+
new_cmd = old_cmd.replace(
|
|
422
|
+
f'{current_ratio}\\', f'{new_ratio}\\'
|
|
423
|
+
)
|
|
424
|
+
tex_content = tex_content.replace(old_cmd, new_cmd, 1)
|
|
425
|
+
changes.append(FixResult(
|
|
426
|
+
defect_id="A3",
|
|
427
|
+
object_name="图片",
|
|
428
|
+
action=f"放大图片宽度从 {current_ratio} 到 {new_ratio}",
|
|
429
|
+
before=old_cmd,
|
|
430
|
+
after=new_cmd,
|
|
431
|
+
success=True,
|
|
432
|
+
))
|
|
433
|
+
|
|
434
|
+
return tex_content, changes
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
# ============================================================
|
|
438
|
+
# A4:双栏末页左右栏高度不齐
|
|
439
|
+
# ============================================================
|
|
440
|
+
|
|
441
|
+
def fix_unbalanced_columns(
|
|
442
|
+
tex_content: str,
|
|
443
|
+
height_difference: float, # 栏高差 (比例)
|
|
444
|
+
) -> Tuple[str, Optional[FixResult]]:
|
|
445
|
+
"""
|
|
446
|
+
修复双栏末页左右栏高度不齐问题
|
|
447
|
+
|
|
448
|
+
策略优先级:
|
|
449
|
+
1. 使用 \balance 或 flushend 宏包
|
|
450
|
+
2. 手动平衡
|
|
451
|
+
3. 微调最后一段断行
|
|
452
|
+
4. 调整浮动体位置
|
|
453
|
+
|
|
454
|
+
Args:
|
|
455
|
+
tex_content: .tex 文件内容
|
|
456
|
+
height_difference: 栏高差比例
|
|
457
|
+
|
|
458
|
+
Returns:
|
|
459
|
+
(修改后的内容,修复结果)
|
|
460
|
+
"""
|
|
461
|
+
if height_difference < 0.1:
|
|
462
|
+
# 高度差在可接受范围内 (约 2 行以内)
|
|
463
|
+
return tex_content, None
|
|
464
|
+
|
|
465
|
+
# 策略 1: 添加 flushend 宏包
|
|
466
|
+
if '\\usepackage{flushend}' not in tex_content:
|
|
467
|
+
match = re.search(r'\\begin\{document\}', tex_content)
|
|
468
|
+
if match:
|
|
469
|
+
insert_pos = match.start()
|
|
470
|
+
modified_content = tex_content[:insert_pos] + "\\usepackage{flushend}\n" + tex_content[insert_pos:]
|
|
471
|
+
|
|
472
|
+
return modified_content, FixResult(
|
|
473
|
+
defect_id="A4",
|
|
474
|
+
object_name="导言区",
|
|
475
|
+
action="添加 flushend 宏包自动平衡末页两栏",
|
|
476
|
+
before="\\begin{document}",
|
|
477
|
+
after="\\usepackage{flushend}\n\\begin{document}",
|
|
478
|
+
success=True,
|
|
479
|
+
)
|
|
480
|
+
|
|
481
|
+
# 策略 2: 在文末添加 \balance
|
|
482
|
+
if '\\balance' not in tex_content:
|
|
483
|
+
# 在 \end{document} 前添加
|
|
484
|
+
match = re.search(r'\\end\{document\}', tex_content)
|
|
485
|
+
if match:
|
|
486
|
+
insert_pos = match.start()
|
|
487
|
+
modified_content = tex_content[:insert_pos] + "\\balance\n" + tex_content[insert_pos:]
|
|
488
|
+
|
|
489
|
+
return modified_content, FixResult(
|
|
490
|
+
defect_id="A4",
|
|
491
|
+
object_name="文末",
|
|
492
|
+
action="添加 \\balance 命令平衡两栏",
|
|
493
|
+
before="\\end{document}",
|
|
494
|
+
after="\\balance\n\\end{document}",
|
|
495
|
+
success=True,
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
return tex_content, None
|
|
499
|
+
|
|
500
|
+
|
|
501
|
+
def add_balance_package(
|
|
502
|
+
tex_content: str,
|
|
503
|
+
) -> Tuple[str, Optional[FixResult]]:
|
|
504
|
+
"""
|
|
505
|
+
添加 balance 宏包支持
|
|
506
|
+
"""
|
|
507
|
+
if '\\usepackage{balance}' in tex_content:
|
|
508
|
+
return tex_content, None
|
|
509
|
+
|
|
510
|
+
match = re.search(r'\\begin\{document\}', tex_content)
|
|
511
|
+
if match:
|
|
512
|
+
insert_pos = match.start()
|
|
513
|
+
modified_content = tex_content[:insert_pos] + "\\usepackage{balance}\n" + tex_content[insert_pos:]
|
|
514
|
+
|
|
515
|
+
return modified_content, FixResult(
|
|
516
|
+
defect_id="A4",
|
|
517
|
+
object_name="导言区",
|
|
518
|
+
action="添加 balance 宏包",
|
|
519
|
+
before="\\begin{document}",
|
|
520
|
+
after="\\usepackage{balance}\n\\begin{document}",
|
|
521
|
+
success=True,
|
|
522
|
+
)
|
|
523
|
+
|
|
524
|
+
return tex_content, None
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
# ============================================================
|
|
528
|
+
# 主修复函数
|
|
529
|
+
# ============================================================
|
|
530
|
+
|
|
531
|
+
def fix_space_util_defects(
|
|
532
|
+
tex_file_path: str,
|
|
533
|
+
defects: List[Dict[str, Any]],
|
|
534
|
+
target_pages: Optional[int] = None,
|
|
535
|
+
template_type: str = "single_column",
|
|
536
|
+
) -> SpaceUtilFixReport:
|
|
537
|
+
"""
|
|
538
|
+
修复所有 Category A 缺陷
|
|
539
|
+
|
|
540
|
+
Args:
|
|
541
|
+
tex_file_path: .tex 文件路径
|
|
542
|
+
defects: 缺陷列表,每个缺陷包含:
|
|
543
|
+
- defect_id: A1, A2, A3, A4
|
|
544
|
+
- page: 页码
|
|
545
|
+
- object: 对象名称
|
|
546
|
+
- description: 描述
|
|
547
|
+
- whitespace_ratio: 空白比例 (A2)
|
|
548
|
+
- current_pages: 当前页数 (A3)
|
|
549
|
+
- height_difference: 栏高差 (A4)
|
|
550
|
+
target_pages: 目标页数 (A3 需要)
|
|
551
|
+
template_type: 模板类型 ("single_column" | "double_column")
|
|
552
|
+
|
|
553
|
+
Returns:
|
|
554
|
+
SpaceUtilFixReport: 修复报告
|
|
555
|
+
"""
|
|
556
|
+
tex_path = Path(tex_file_path)
|
|
557
|
+
if not tex_path.exists():
|
|
558
|
+
return SpaceUtilFixReport(
|
|
559
|
+
status="failed",
|
|
560
|
+
unresolved=[f"文件不存在:{tex_file_path}"]
|
|
561
|
+
)
|
|
562
|
+
|
|
563
|
+
try:
|
|
564
|
+
tex_content = tex_path.read_text(encoding='utf-8')
|
|
565
|
+
except (OSError, UnicodeDecodeError) as e:
|
|
566
|
+
return SpaceUtilFixReport(
|
|
567
|
+
status="failed",
|
|
568
|
+
unresolved=[f"无法读取文件 {tex_file_path}: {e}"]
|
|
569
|
+
)
|
|
570
|
+
modified_files = set()
|
|
571
|
+
changes = []
|
|
572
|
+
unresolved = []
|
|
573
|
+
|
|
574
|
+
for defect in defects:
|
|
575
|
+
defect_id = defect.get("defect_id", "")
|
|
576
|
+
page = defect.get("page", 0)
|
|
577
|
+
object_name = defect.get("object", "")
|
|
578
|
+
description = defect.get("description", "")
|
|
579
|
+
|
|
580
|
+
new_content = tex_content
|
|
581
|
+
fix_result = None
|
|
582
|
+
additional_changes = []
|
|
583
|
+
|
|
584
|
+
if defect_id == "A1":
|
|
585
|
+
# 孤行/寡行
|
|
586
|
+
new_content, fix_result = fix_widow_orphan(
|
|
587
|
+
tex_content,
|
|
588
|
+
paragraph_start_line=defect.get("line_number"),
|
|
589
|
+
paragraph_text=description,
|
|
590
|
+
)
|
|
591
|
+
# 如果具体段落修复失败,尝试全局设置
|
|
592
|
+
if not fix_result:
|
|
593
|
+
new_content, fix_result = add_widow_orphan_penalty(tex_content)
|
|
594
|
+
|
|
595
|
+
elif defect_id == "A2":
|
|
596
|
+
# 末页留白
|
|
597
|
+
whitespace_ratio = defect.get("whitespace_ratio", 0)
|
|
598
|
+
new_content, fix_result = fix_trailing_whitespace(
|
|
599
|
+
tex_content,
|
|
600
|
+
last_page_number=page,
|
|
601
|
+
whitespace_ratio=whitespace_ratio,
|
|
602
|
+
)
|
|
603
|
+
if not fix_result:
|
|
604
|
+
unresolved.append(
|
|
605
|
+
f"A2 (末页): 需要语义扩写结论或讨论部分以填充空白"
|
|
606
|
+
)
|
|
607
|
+
|
|
608
|
+
elif defect_id == "A3":
|
|
609
|
+
# 页数预算
|
|
610
|
+
current_pages = defect.get("current_pages", 0)
|
|
611
|
+
if target_pages is None:
|
|
612
|
+
unresolved.append(f"A3: 需要用户提供目标页数")
|
|
613
|
+
continue
|
|
614
|
+
|
|
615
|
+
if current_pages > target_pages:
|
|
616
|
+
# 超页
|
|
617
|
+
new_content, additional_changes = fix_page_budget_excess(
|
|
618
|
+
tex_content,
|
|
619
|
+
current_pages=current_pages,
|
|
620
|
+
target_pages=target_pages,
|
|
621
|
+
)
|
|
622
|
+
changes.extend(additional_changes)
|
|
623
|
+
if additional_changes:
|
|
624
|
+
fix_result = additional_changes[0]
|
|
625
|
+
else:
|
|
626
|
+
unresolved.append(
|
|
627
|
+
f"A3: 需要精炼文字或压缩参考文献 (当前{current_pages}页,目标{target_pages}页)"
|
|
628
|
+
)
|
|
629
|
+
else:
|
|
630
|
+
# 缺页
|
|
631
|
+
new_content, additional_changes = fix_page_budget_deficit(
|
|
632
|
+
tex_content,
|
|
633
|
+
current_pages=current_pages,
|
|
634
|
+
target_pages=target_pages,
|
|
635
|
+
)
|
|
636
|
+
changes.extend(additional_changes)
|
|
637
|
+
if additional_changes:
|
|
638
|
+
fix_result = additional_changes[0]
|
|
639
|
+
else:
|
|
640
|
+
unresolved.append(
|
|
641
|
+
f"A3: 需要扩写结论或增加附录 (当前{current_pages}页,目标{target_pages}页)"
|
|
642
|
+
)
|
|
643
|
+
|
|
644
|
+
elif defect_id == "A4":
|
|
645
|
+
# 双栏末页不齐
|
|
646
|
+
if template_type != "double_column":
|
|
647
|
+
unresolved.append(f"A4: 仅适用于双栏模板")
|
|
648
|
+
continue
|
|
649
|
+
|
|
650
|
+
height_difference = defect.get("height_difference", 0)
|
|
651
|
+
new_content, fix_result = fix_unbalanced_columns(
|
|
652
|
+
tex_content,
|
|
653
|
+
height_difference=height_difference,
|
|
654
|
+
)
|
|
655
|
+
|
|
656
|
+
# 如果需要,添加 balance 宏包
|
|
657
|
+
if fix_result and '\\usepackage{balance}' not in tex_content:
|
|
658
|
+
new_content, _ = add_balance_package(new_content)
|
|
659
|
+
|
|
660
|
+
# 检查修复是否成功
|
|
661
|
+
if new_content != tex_content:
|
|
662
|
+
tex_content = new_content
|
|
663
|
+
if fix_result:
|
|
664
|
+
fix_result.page = page
|
|
665
|
+
fix_result.line_number = defect.get("line_number")
|
|
666
|
+
if fix_result not in changes:
|
|
667
|
+
changes.append(fix_result)
|
|
668
|
+
modified_files.add(str(tex_path))
|
|
669
|
+
elif not fix_result and not additional_changes:
|
|
670
|
+
unresolved.append(
|
|
671
|
+
f"{defect_id} ({object_name or '未知对象'}): 无法自动修复,可能需要人工调整"
|
|
672
|
+
)
|
|
673
|
+
|
|
674
|
+
# 写入修改后的内容
|
|
675
|
+
if modified_files:
|
|
676
|
+
try:
|
|
677
|
+
tex_path.write_text(tex_content, encoding='utf-8')
|
|
678
|
+
except OSError as e:
|
|
679
|
+
unresolved.append(f"无法写入文件 {tex_path}: {e}")
|
|
680
|
+
return SpaceUtilFixReport(
|
|
681
|
+
status="failed",
|
|
682
|
+
modified_files=list(modified_files),
|
|
683
|
+
changes=changes,
|
|
684
|
+
unresolved=unresolved,
|
|
685
|
+
)
|
|
686
|
+
|
|
687
|
+
status = "success" if not unresolved else ("partial" if changes else "failed")
|
|
688
|
+
|
|
689
|
+
return SpaceUtilFixReport(
|
|
690
|
+
status=status,
|
|
691
|
+
modified_files=list(modified_files),
|
|
692
|
+
changes=changes,
|
|
693
|
+
unresolved=unresolved,
|
|
694
|
+
)
|
|
695
|
+
|
|
696
|
+
|
|
697
|
+
# ============================================================
|
|
698
|
+
# CLI 入口
|
|
699
|
+
# ============================================================
|
|
700
|
+
|
|
701
|
+
def main():
|
|
702
|
+
"""命令行接口"""
|
|
703
|
+
import argparse
|
|
704
|
+
import json
|
|
705
|
+
|
|
706
|
+
parser = argparse.ArgumentParser(
|
|
707
|
+
description="Fix Category A space utilization defects in LaTeX documents"
|
|
708
|
+
)
|
|
709
|
+
parser.add_argument(
|
|
710
|
+
"tex_file",
|
|
711
|
+
help="Path to .tex file"
|
|
712
|
+
)
|
|
713
|
+
parser.add_argument(
|
|
714
|
+
"--defects",
|
|
715
|
+
type=str,
|
|
716
|
+
help="JSON string or file path containing defect list"
|
|
717
|
+
)
|
|
718
|
+
parser.add_argument(
|
|
719
|
+
"--target-pages",
|
|
720
|
+
type=int,
|
|
721
|
+
help="Target page count (for A3)"
|
|
722
|
+
)
|
|
723
|
+
parser.add_argument(
|
|
724
|
+
"--template",
|
|
725
|
+
type=str,
|
|
726
|
+
default="single_column",
|
|
727
|
+
choices=["single_column", "double_column"],
|
|
728
|
+
help="Template type"
|
|
729
|
+
)
|
|
730
|
+
parser.add_argument(
|
|
731
|
+
"--json",
|
|
732
|
+
"-j",
|
|
733
|
+
action="store_true",
|
|
734
|
+
help="Output JSON report"
|
|
735
|
+
)
|
|
736
|
+
|
|
737
|
+
args = parser.parse_args()
|
|
738
|
+
|
|
739
|
+
# 解析缺陷列表
|
|
740
|
+
defects = []
|
|
741
|
+
if args.defects:
|
|
742
|
+
if Path(args.defects).exists():
|
|
743
|
+
with open(args.defects, 'r', encoding='utf-8') as f:
|
|
744
|
+
defects = json.load(f)
|
|
745
|
+
else:
|
|
746
|
+
defects = json.loads(args.defects)
|
|
747
|
+
|
|
748
|
+
# 执行修复
|
|
749
|
+
report = fix_space_util_defects(
|
|
750
|
+
args.tex_file,
|
|
751
|
+
defects,
|
|
752
|
+
target_pages=args.target_pages,
|
|
753
|
+
template_type=args.template,
|
|
754
|
+
)
|
|
755
|
+
|
|
756
|
+
if args.json:
|
|
757
|
+
print(json.dumps(report.to_dict(), indent=2, ensure_ascii=False))
|
|
758
|
+
else:
|
|
759
|
+
print(f"\nSpace Utilization Fix Report")
|
|
760
|
+
print("=" * 50)
|
|
761
|
+
print(f"Status: {report.status}")
|
|
762
|
+
print(f"Modified files: {report.modified_files}")
|
|
763
|
+
print(f"Changes: {len(report.changes)}")
|
|
764
|
+
for change in report.changes:
|
|
765
|
+
print(f" - [{change.defect_id}] {change.object_name}: {change.action}")
|
|
766
|
+
if report.unresolved:
|
|
767
|
+
print(f"\nUnresolved: {len(report.unresolved)}")
|
|
768
|
+
for u in report.unresolved:
|
|
769
|
+
print(f" - {u}")
|
|
770
|
+
|
|
771
|
+
|
|
772
|
+
if __name__ == "__main__":
|
|
773
|
+
main()
|