paperfit-cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/.claude/commands/adjust-length.md +21 -0
  2. package/.claude/commands/check-visual.md +27 -0
  3. package/.claude/commands/fix-layout.md +31 -0
  4. package/.claude/commands/migrate-template.md +23 -0
  5. package/.claude/commands/repair-table.md +21 -0
  6. package/.claude/commands/show-status.md +32 -0
  7. package/.claude-plugin/README.md +77 -0
  8. package/.claude-plugin/marketplace.json +41 -0
  9. package/.claude-plugin/plugin.json +39 -0
  10. package/CLAUDE.md +266 -0
  11. package/CONTRIBUTING.md +131 -0
  12. package/LICENSE +21 -0
  13. package/README.md +164 -0
  14. package/agents/code-surgeon-agent.md +214 -0
  15. package/agents/layout-detective-agent.md +229 -0
  16. package/agents/orchestrator-agent.md +254 -0
  17. package/agents/quality-gatekeeper-agent.md +270 -0
  18. package/agents/rule-engine-agent.md +224 -0
  19. package/agents/semantic-polish-agent.md +250 -0
  20. package/bin/paperfit.js +176 -0
  21. package/config/agent_roles.yaml +56 -0
  22. package/config/layout_rules.yaml +54 -0
  23. package/config/templates.yaml +241 -0
  24. package/config/vto_taxonomy.yaml +489 -0
  25. package/config/writing_rules.yaml +64 -0
  26. package/install.sh +30 -0
  27. package/package.json +52 -0
  28. package/requirements.txt +5 -0
  29. package/scripts/benchmark_runner.py +629 -0
  30. package/scripts/compile.sh +244 -0
  31. package/scripts/config_validator.py +339 -0
  32. package/scripts/cv_detector.py +600 -0
  33. package/scripts/evidence_collector.py +167 -0
  34. package/scripts/float_fixers.py +861 -0
  35. package/scripts/inject_defects.py +549 -0
  36. package/scripts/install-claude-global.js +148 -0
  37. package/scripts/install.js +66 -0
  38. package/scripts/install.sh +106 -0
  39. package/scripts/overflow_fixers.py +656 -0
  40. package/scripts/package-for-opensource.sh +138 -0
  41. package/scripts/parse_log.py +260 -0
  42. package/scripts/postinstall.js +38 -0
  43. package/scripts/pre_tool_use.py +265 -0
  44. package/scripts/render_pages.py +244 -0
  45. package/scripts/session_logger.py +329 -0
  46. package/scripts/space_util_fixers.py +773 -0
  47. package/scripts/state_manager.py +352 -0
  48. package/scripts/test_commands.py +187 -0
  49. package/scripts/test_cv_detector.py +214 -0
  50. package/scripts/test_integration.py +290 -0
  51. package/skills/consistency-polisher/SKILL.md +337 -0
  52. package/skills/float-optimizer/SKILL.md +284 -0
  53. package/skills/latex_fixers/__init__.py +82 -0
  54. package/skills/latex_fixers/float_fixers.py +392 -0
  55. package/skills/latex_fixers/fullwidth_fixers.py +375 -0
  56. package/skills/latex_fixers/overflow_fixers.py +250 -0
  57. package/skills/latex_fixers/semantic_micro_tuning.py +362 -0
  58. package/skills/latex_fixers/space_util_fixers.py +389 -0
  59. package/skills/latex_fixers/utils.py +55 -0
  60. package/skills/overflow-repair/SKILL.md +304 -0
  61. package/skills/space-util-fixer/SKILL.md +307 -0
  62. package/skills/taxonomy-vto/SKILL.md +486 -0
  63. package/skills/template-migrator/SKILL.md +251 -0
  64. package/skills/visual-inspector/SKILL.md +217 -0
  65. package/skills/writing-polish/SKILL.md +289 -0
@@ -0,0 +1,352 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ PaperFit 状态管理器
4
+
5
+ 管理 VTO 闭环中的状态持久化,包括读取、更新、备份和归档。
6
+ 状态文件 state.json 记录了当前任务的所有关键信息,支持中断恢复和多轮迭代。
7
+
8
+ 用法:
9
+ python state_manager.py init <main_tex> [--task <type>] [--target-pages <n>]
10
+ python state_manager.py get <key>
11
+ python state_manager.py set <key> <value>
12
+ python state_manager.py update "<json_patch>"
13
+ python state_manager.py next-round
14
+ python state_manager.py archive
15
+ python state_manager.py show
16
+ """
17
+
18
+ import os
19
+ import json
20
+ import shutil
21
+ import argparse
22
+ import sys
23
+ from pathlib import Path
24
+ from datetime import datetime
25
+ from typing import Any, Dict, Optional
26
+
27
+
28
+ class StateManager:
29
+ """状态管理器"""
30
+
31
+ DEFAULT_STATE_PATH = "data/state.json"
32
+ BACKUP_DIR = "data/backups"
33
+ ARCHIVE_DIR = "data/archives"
34
+ CASE_DIR = "data/benchmarks/case" # Benchmark case directory
35
+
36
+ def __init__(self, state_path: str = DEFAULT_STATE_PATH):
37
+ self.state_path = Path(state_path)
38
+ self.backup_dir = self.state_path.parent / "backups"
39
+ self.archive_dir = self.state_path.parent / "archives"
40
+ self.case_dir = self.state_path.parent / "benchmarks" / "case"
41
+ self.state: Dict[str, Any] = {}
42
+
43
+ def init_state(
44
+ self,
45
+ main_tex: str,
46
+ task_type: str = "full_vto",
47
+ target_pages: Optional[int] = None,
48
+ template: Optional[str] = None,
49
+ strict_mode: bool = False,
50
+ max_rounds: int = 10
51
+ ) -> Dict[str, Any]:
52
+ """初始化新任务的状态文件"""
53
+ self.backup_dir.mkdir(parents=True, exist_ok=True)
54
+ self.archive_dir.mkdir(parents=True, exist_ok=True)
55
+
56
+ self.state = {
57
+ "project": "PaperFit",
58
+ "version": "1.0",
59
+ "created_at": datetime.now().isoformat(),
60
+ "updated_at": datetime.now().isoformat(),
61
+ "main_tex": main_tex,
62
+ "task": {
63
+ "type": task_type,
64
+ "target_pages": target_pages,
65
+ "template": template,
66
+ "strict_mode": strict_mode
67
+ },
68
+ "current_round": 0,
69
+ "max_rounds": max_rounds,
70
+ "status": "INITIALIZED",
71
+ "compile_success": None,
72
+ "page_images_rendered": False,
73
+ "defect_summary": {
74
+ "initial_total": 0,
75
+ "resolved": 0,
76
+ "remaining": 0
77
+ },
78
+ "agents_this_round": [],
79
+ "last_gatekeeper_decision": None,
80
+ "next_actions": [],
81
+ "history": []
82
+ }
83
+
84
+ self._save()
85
+ return self.state
86
+
87
+ def load(self) -> Dict[str, Any]:
88
+ """加载当前状态"""
89
+ if not self.state_path.exists():
90
+ raise FileNotFoundError(f"State file not found: {self.state_path}")
91
+
92
+ with open(self.state_path, 'r', encoding='utf-8') as f:
93
+ self.state = json.load(f)
94
+
95
+ return self.state
96
+
97
+ def save(self) -> None:
98
+ """保存状态(自动备份旧版本)"""
99
+ if self.state_path.exists():
100
+ self._backup()
101
+ self._save()
102
+
103
+ def _save(self) -> None:
104
+ """内部保存方法"""
105
+ self.state["updated_at"] = datetime.now().isoformat()
106
+ self.state_path.parent.mkdir(parents=True, exist_ok=True)
107
+
108
+ with open(self.state_path, 'w', encoding='utf-8') as f:
109
+ json.dump(self.state, f, indent=2, ensure_ascii=False)
110
+
111
+ def _backup(self) -> None:
112
+ """备份当前状态文件和 case 目录"""
113
+ if not self.state_path.exists():
114
+ return
115
+
116
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
117
+
118
+ # 备份状态文件
119
+ backup_name = f"state_{timestamp}.json"
120
+ backup_path = self.backup_dir / backup_name
121
+ shutil.copy2(self.state_path, backup_path)
122
+
123
+ # 备份 case 目录(如果存在)
124
+ if self.case_dir.exists():
125
+ case_backup_name = f"case_{timestamp}"
126
+ case_backup_path = self.backup_dir / case_backup_name
127
+ self._backup_directory(self.case_dir, case_backup_path)
128
+
129
+ # 保留最近 20 个备份
130
+ self._cleanup_old_files(self.backup_dir, "state_*.json", keep=20)
131
+ self._cleanup_old_files(self.backup_dir, "case_*", keep=20)
132
+
133
+ def _backup_directory(self, src: Path, dst: Path) -> None:
134
+ """递归备份目录,跳过大型文件和临时文件"""
135
+ dst.mkdir(parents=True, exist_ok=True)
136
+ skipped_extensions = {'.pdf', '.png', '.jpg', '.jpeg', '.log', '.aux', '.bbl', '.blg', '.out'}
137
+
138
+ for item in src.rglob('*'):
139
+ if item.is_file():
140
+ # 跳过大型文件和临时文件
141
+ if item.suffix in skipped_extensions:
142
+ continue
143
+ if item.name.startswith('.'):
144
+ continue
145
+
146
+ relative_path = item.relative_to(src)
147
+ dst_path = dst / relative_path
148
+ dst_path.parent.mkdir(parents=True, exist_ok=True)
149
+ shutil.copy2(item, dst_path)
150
+
151
+ def get(self, key: str) -> Any:
152
+ """获取状态中的指定键值(支持点号访问嵌套字段)"""
153
+ if not self.state:
154
+ self.load()
155
+
156
+ keys = key.split('.')
157
+ value = self.state
158
+ for k in keys:
159
+ if isinstance(value, dict):
160
+ value = value.get(k)
161
+ else:
162
+ return None
163
+ return value
164
+
165
+ def set(self, key: str, value: Any) -> None:
166
+ """设置状态中的指定键值(支持点号访问嵌套字段)"""
167
+ if not self.state:
168
+ self.load()
169
+
170
+ keys = key.split('.')
171
+ target = self.state
172
+ for k in keys[:-1]:
173
+ if k not in target:
174
+ target[k] = {}
175
+ target = target[k]
176
+
177
+ target[keys[-1]] = value
178
+ self.save()
179
+
180
+ def update(self, patch: Dict[str, Any]) -> None:
181
+ """批量更新状态(深度合并)"""
182
+ if not self.state:
183
+ self.load()
184
+
185
+ self._deep_update(self.state, patch)
186
+ self.save()
187
+
188
+ def _deep_update(self, target: Dict, source: Dict) -> None:
189
+ """递归深度合并字典"""
190
+ for key, value in source.items():
191
+ if key in target and isinstance(target[key], dict) and isinstance(value, dict):
192
+ self._deep_update(target[key], value)
193
+ else:
194
+ target[key] = value
195
+
196
+ def next_round(self) -> Dict[str, Any]:
197
+ """进入下一轮迭代"""
198
+ if not self.state:
199
+ self.load()
200
+
201
+ self.state["current_round"] += 1
202
+ self.state["status"] = "EVALUATING"
203
+ self.state["agents_this_round"] = []
204
+ self.state["compile_success"] = None
205
+ self.state["page_images_rendered"] = False
206
+ self.save()
207
+
208
+ return self.state
209
+
210
+ def archive(self) -> str:
211
+ """归档当前状态(任务完成时调用)"""
212
+ if not self.state:
213
+ self.load()
214
+
215
+ self.archive_dir.mkdir(parents=True, exist_ok=True)
216
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
217
+ archive_name = f"state_final_{timestamp}.json"
218
+ archive_path = self.archive_dir / archive_name
219
+
220
+ # 更新状态标记
221
+ self.state["status"] = "ARCHIVED"
222
+ self.state["archived_at"] = datetime.now().isoformat()
223
+
224
+ with open(archive_path, 'w', encoding='utf-8') as f:
225
+ json.dump(self.state, f, indent=2, ensure_ascii=False)
226
+
227
+ # 可选:删除当前状态文件表示任务完成
228
+ # self.state_path.unlink(missing_ok=True)
229
+
230
+ return str(archive_path)
231
+
232
+ def _cleanup_old_files(self, directory: Path, pattern: str, keep: int) -> None:
233
+ """清理旧文件,仅保留最近 keep 个"""
234
+ files = sorted(directory.glob(pattern), key=os.path.getmtime, reverse=True)
235
+ for old_file in files[keep:]:
236
+ old_file.unlink()
237
+
238
+ def add_history_entry(self, entry: Dict[str, Any]) -> None:
239
+ """添加一轮历史记录"""
240
+ if not self.state:
241
+ self.load()
242
+
243
+ if "history" not in self.state:
244
+ self.state["history"] = []
245
+
246
+ entry["timestamp"] = datetime.now().isoformat()
247
+ self.state["history"].append(entry)
248
+ self.save()
249
+
250
+ def update_defect_summary(self, resolved: int, remaining: int, initial: Optional[int] = None) -> None:
251
+ """更新缺陷摘要"""
252
+ if not self.state:
253
+ self.load()
254
+
255
+ if initial is not None:
256
+ self.state["defect_summary"]["initial_total"] = initial
257
+ self.state["defect_summary"]["resolved"] = resolved
258
+ self.state["defect_summary"]["remaining"] = remaining
259
+ self.save()
260
+
261
+
262
+ def main():
263
+ parser = argparse.ArgumentParser(description="PaperFit State Manager")
264
+ subparsers = parser.add_subparsers(dest="command", help="Commands")
265
+
266
+ # init 命令
267
+ init_parser = subparsers.add_parser("init", help="Initialize new state")
268
+ init_parser.add_argument("main_tex", help="Main .tex file path")
269
+ init_parser.add_argument("--task", default="full_vto", help="Task type")
270
+ init_parser.add_argument("--target-pages", type=int, help="Target page count")
271
+ init_parser.add_argument("--template", help="Template name")
272
+ init_parser.add_argument("--strict", action="store_true", help="Strict mode")
273
+ init_parser.add_argument("--max-rounds", type=int, default=10, help="Max iterations")
274
+
275
+ # get 命令
276
+ get_parser = subparsers.add_parser("get", help="Get value by key")
277
+ get_parser.add_argument("key", help="Key path (e.g., 'task.target_pages')")
278
+
279
+ # set 命令
280
+ set_parser = subparsers.add_parser("set", help="Set value by key")
281
+ set_parser.add_argument("key", help="Key path")
282
+ set_parser.add_argument("value", help="Value (JSON string)")
283
+
284
+ # update 命令
285
+ update_parser = subparsers.add_parser("update", help="Batch update with JSON patch")
286
+ update_parser.add_argument("patch", help="JSON patch string")
287
+
288
+ # next-round 命令
289
+ subparsers.add_parser("next-round", help="Increment round counter")
290
+
291
+ # archive 命令
292
+ subparsers.add_parser("archive", help="Archive current state")
293
+
294
+ # show 命令
295
+ subparsers.add_parser("show", help="Display current state")
296
+
297
+ args = parser.parse_args()
298
+ manager = StateManager()
299
+
300
+ try:
301
+ if args.command == "init":
302
+ state = manager.init_state(
303
+ args.main_tex,
304
+ task_type=args.task,
305
+ target_pages=args.target_pages,
306
+ template=args.template,
307
+ strict_mode=args.strict,
308
+ max_rounds=args.max_rounds
309
+ )
310
+ print(f"State initialized: {manager.state_path}")
311
+ print(json.dumps(state, indent=2))
312
+
313
+ elif args.command == "get":
314
+ value = manager.get(args.key)
315
+ print(json.dumps(value, indent=2, ensure_ascii=False))
316
+
317
+ elif args.command == "set":
318
+ try:
319
+ parsed_value = json.loads(args.value)
320
+ except json.JSONDecodeError:
321
+ parsed_value = args.value
322
+ manager.set(args.key, parsed_value)
323
+ print(f"Set {args.key} = {json.dumps(parsed_value, ensure_ascii=False)}")
324
+
325
+ elif args.command == "update":
326
+ patch = json.loads(args.patch)
327
+ manager.update(patch)
328
+ print("State updated")
329
+
330
+ elif args.command == "next-round":
331
+ state = manager.next_round()
332
+ print(f"Advanced to round {state['current_round']}")
333
+
334
+ elif args.command == "archive":
335
+ archive_path = manager.archive()
336
+ print(f"State archived to {archive_path}")
337
+
338
+ elif args.command == "show":
339
+ state = manager.load()
340
+ print(json.dumps(state, indent=2, ensure_ascii=False))
341
+
342
+ else:
343
+ parser.print_help()
344
+ sys.exit(1)
345
+
346
+ except Exception as e:
347
+ print(f"Error: {e}", file=sys.stderr)
348
+ sys.exit(1)
349
+
350
+
351
+ if __name__ == "__main__":
352
+ main()
@@ -0,0 +1,187 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ PaperFit Commands 测试脚本
4
+
5
+ 验证所有 commands/*.md 文件是否存在且格式正确,
6
+ 并检查它们引用的 agent 文件是否存在。
7
+ """
8
+
9
+ import json
10
+ from pathlib import Path
11
+ from typing import List, Tuple
12
+
13
+
14
+ class CommandTester:
15
+ """Commands 测试器"""
16
+
17
+ def __init__(self, root_dir: str = "."):
18
+ self.root = Path(root_dir)
19
+ self.commands_dir = self.root / ".claude" / "commands"
20
+ self.agents_dir = self.root / "agents"
21
+ self.skills_dir = self.root / "skills"
22
+ self.results: List[Tuple[str, bool, str]] = []
23
+
24
+ def check_command_file(self, cmd_path: Path) -> bool:
25
+ """检查单个命令文件"""
26
+ if not cmd_path.exists():
27
+ self.results.append((str(cmd_path), False, "文件不存在"))
28
+ return False
29
+
30
+ content = cmd_path.read_text()
31
+
32
+ # 检查必需字段
33
+ checks = []
34
+ if "# /" not in content:
35
+ checks.append("缺少命令标题 (# /command-name)")
36
+ if "作用" not in content:
37
+ checks.append("缺少'作用'描述")
38
+ if "用法" not in content:
39
+ checks.append("缺少'用法'说明")
40
+ if "执行流程" not in content:
41
+ checks.append("缺少'执行流程'")
42
+
43
+ # 检查是否包含参数(不应该有)
44
+ if "<" in content and ">" in content:
45
+ # 可能是参数占位符,需要人工审查
46
+ pass
47
+
48
+ if checks:
49
+ self.results.append((str(cmd_path), False, "; ".join(checks)))
50
+ return False
51
+
52
+ self.results.append((str(cmd_path), True, "格式正确"))
53
+ return True
54
+
55
+ def check_agent_references(self, cmd_path: Path) -> bool:
56
+ """检查命令文件中引用的 agent 是否存在"""
57
+ content = cmd_path.read_text()
58
+ missing_agents = []
59
+
60
+ # 查找 agent 引用
61
+ for line in content.split("\n"):
62
+ if "agents/" in line and ".md" in line:
63
+ # 提取 agent 文件名
64
+ start = line.find("agents/")
65
+ end = line.find(".md", start) + 3
66
+ if end > start:
67
+ agent_ref = line[start:end].strip()
68
+ agent_path = self.root / agent_ref
69
+ if not agent_path.exists():
70
+ missing_agents.append(agent_ref)
71
+
72
+ if missing_agents:
73
+ self.results.append((
74
+ str(cmd_path),
75
+ False,
76
+ f"引用不存在的 agent: {', '.join(missing_agents)}"
77
+ ))
78
+ return False
79
+
80
+ self.results.append((str(cmd_path), True, "agent 引用正确"))
81
+ return True
82
+
83
+ def check_skill_directories(self) -> bool:
84
+ """检查 skills 目录结构"""
85
+ required_skills = [
86
+ "taxonomy-vto",
87
+ "space-util-fixer",
88
+ "float-optimizer",
89
+ "consistency-polisher",
90
+ "overflow-repair",
91
+ "template-migrator",
92
+ "visual-inspector",
93
+ "writing-polish"
94
+ ]
95
+
96
+ for skill in required_skills:
97
+ skill_path = self.skills_dir / skill
98
+ if not skill_path.exists():
99
+ self.results.append((
100
+ f"skills/{skill}",
101
+ False,
102
+ "Skill 目录不存在"
103
+ ))
104
+ elif not (skill_path / "SKILL.md").exists():
105
+ self.results.append((
106
+ f"skills/{skill}",
107
+ False,
108
+ "缺少 SKILL.md 文件"
109
+ ))
110
+ else:
111
+ self.results.append((f"skills/{skill}", True, "OK"))
112
+
113
+ return all(r[1] for r in self.results if r[0].startswith("skills/"))
114
+
115
+ def run_all_tests(self) -> bool:
116
+ """运行所有测试"""
117
+ print("=" * 60)
118
+ print("PaperFit Commands 测试报告")
119
+ print("=" * 60)
120
+
121
+ # 检查 commands 目录
122
+ if not self.commands_dir.exists():
123
+ print(f"\n错误:commands 目录不存在:{self.commands_dir}")
124
+ return False
125
+
126
+ # 测试每个命令文件
127
+ cmd_files = list(self.commands_dir.glob("*.md"))
128
+ print(f"\n发现 {len(cmd_files)} 个命令文件\n")
129
+
130
+ for cmd_file in cmd_files:
131
+ print(f"检查:{cmd_file.name}")
132
+ self.check_command_file(cmd_file)
133
+ self.check_agent_references(cmd_file)
134
+
135
+ # 检查 skills
136
+ print("\n检查 Skills 目录:")
137
+ self.check_skill_directories()
138
+
139
+ # 输出结果摘要
140
+ print("\n" + "=" * 60)
141
+ print("测试结果摘要")
142
+ print("=" * 60)
143
+
144
+ passed = sum(1 for r in self.results if r[1])
145
+ failed = sum(1 for r in self.results if not r[1])
146
+
147
+ print(f"\n通过:{passed}")
148
+ print(f"失败:{failed}")
149
+
150
+ if failed > 0:
151
+ print("\n失败详情:")
152
+ for path, success, msg in self.results:
153
+ if not success:
154
+ print(f" ✗ {path}: {msg}")
155
+
156
+ # 输出命令列表
157
+ print("\n" + "=" * 60)
158
+ print("可用命令列表")
159
+ print("=" * 60)
160
+
161
+ for cmd_file in cmd_files:
162
+ content = cmd_file.read_text()
163
+ # 提取命令名称
164
+ for line in content.split("\n"):
165
+ if line.startswith("# /"):
166
+ cmd_name = line.replace("# /", "").strip().split(" ")[0]
167
+ # 提取作用描述
168
+ desc_line = ""
169
+ for desc in content.split("\n"):
170
+ if desc.startswith("**作用**"):
171
+ desc_line = desc.replace("**作用**:", "").strip()
172
+ break
173
+ print(f" /{cmd_name} - {desc_line}")
174
+ break
175
+
176
+ print("\n" + "=" * 60)
177
+
178
+ return failed == 0
179
+
180
+
181
+ if __name__ == "__main__":
182
+ import sys
183
+
184
+ root = sys.argv[1] if len(sys.argv) > 1 else "."
185
+ tester = CommandTester(root)
186
+ success = tester.run_all_tests()
187
+ sys.exit(0 if success else 1)