tech-book-extractor-skills 1.0.8 → 1.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/bin/install.js +2 -8
- package/package.json +3 -6
- package/scripts/extract_book.py +5 -2
- package/scripts/extract_chapter.py +5 -1
- package/scripts/pdf_extract_utils.py +7 -0
- package/skills/chapter-drill/SKILL.md +15 -0
- /package/{stage1 → scripts}/complexity_scanner.py +0 -0
package/README.md
CHANGED
package/bin/install.js
CHANGED
|
@@ -6,7 +6,6 @@ const os = require("os");
|
|
|
6
6
|
|
|
7
7
|
const skillsSource = path.join(__dirname, "..", "skills");
|
|
8
8
|
const skillsDest = path.join(os.homedir(), ".claude", "skills");
|
|
9
|
-
const scriptsSrc = path.join(__dirname, "..", "stage1", "complexity_scanner.py");
|
|
10
9
|
const scriptsDest = path.join(os.homedir(), ".claude", "scripts");
|
|
11
10
|
|
|
12
11
|
fs.mkdirSync(skillsDest, { recursive: true });
|
|
@@ -34,14 +33,9 @@ for (const skill of skills) {
|
|
|
34
33
|
console.log(`✓ skill: ${skill} → ${dest}`);
|
|
35
34
|
}
|
|
36
35
|
|
|
37
|
-
//
|
|
38
|
-
const scannerDest = path.join(scriptsDest, "complexity_scanner.py");
|
|
39
|
-
fs.copyFileSync(scriptsSrc, scannerDest);
|
|
40
|
-
console.log(`✓ script: complexity_scanner.py → ${scannerDest}`);
|
|
41
|
-
|
|
42
|
-
// 复制 PDF 文本提取脚本
|
|
36
|
+
// 复制 Python 脚本
|
|
43
37
|
const scriptsDir = path.join(__dirname, "..", "scripts");
|
|
44
|
-
for (const pyScript of ["pdf_extract_utils.py", "extract_book.py", "extract_chapter.py"]) {
|
|
38
|
+
for (const pyScript of ["complexity_scanner.py", "pdf_extract_utils.py", "extract_book.py", "extract_chapter.py"]) {
|
|
45
39
|
const dest = path.join(scriptsDest, pyScript);
|
|
46
40
|
fs.copyFileSync(path.join(scriptsDir, pyScript), dest);
|
|
47
41
|
console.log(`✓ script: ${pyScript} → ${dest}`);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "tech-book-extractor-skills",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.10",
|
|
4
4
|
"description": "Claude Code skills for deep technical book reading — structure parsing (Stage 1) and chapter extraction (Stage 2).",
|
|
5
5
|
"bin": {
|
|
6
6
|
"tech-book-extractor-skills": "bin/install.js"
|
|
@@ -8,16 +8,13 @@
|
|
|
8
8
|
"files": [
|
|
9
9
|
"skills/",
|
|
10
10
|
"bin/",
|
|
11
|
-
"
|
|
11
|
+
"scripts/complexity_scanner.py",
|
|
12
12
|
"scripts/pdf_extract_utils.py",
|
|
13
13
|
"scripts/extract_book.py",
|
|
14
14
|
"scripts/extract_chapter.py"
|
|
15
15
|
],
|
|
16
16
|
"scripts": {
|
|
17
|
-
"install-skills": "node bin/install.js"
|
|
18
|
-
"sync": "node scripts/sync.js",
|
|
19
|
-
"release:patch": "npm run sync && npm version patch && npm publish",
|
|
20
|
-
"release:minor": "npm run sync && npm version minor && npm publish"
|
|
17
|
+
"install-skills": "node bin/install.js"
|
|
21
18
|
},
|
|
22
19
|
"repository": {
|
|
23
20
|
"type": "git",
|
package/scripts/extract_book.py
CHANGED
|
@@ -16,7 +16,7 @@ extract_book.py — 整本书文本提取脚本
|
|
|
16
16
|
|
|
17
17
|
输出:
|
|
18
18
|
{output_dir}/{书名}/{书名}-fulltext.md
|
|
19
|
-
|
|
19
|
+
中间结果(图片等):/tmp/tech-book-extractor/{书名}/
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
22
|
from __future__ import annotations
|
|
@@ -32,6 +32,7 @@ from pdf_extract_utils import (
|
|
|
32
32
|
extract_pages,
|
|
33
33
|
save_results,
|
|
34
34
|
extract_book_name,
|
|
35
|
+
get_tmp_dir,
|
|
35
36
|
)
|
|
36
37
|
|
|
37
38
|
|
|
@@ -67,7 +68,9 @@ def main():
|
|
|
67
68
|
book_name = extract_book_name(pdf_path.name)
|
|
68
69
|
output_dir = Path(args.output) / book_name
|
|
69
70
|
output_path = output_dir / f"{book_name}-fulltext.md"
|
|
70
|
-
|
|
71
|
+
# 中间结果(图片等)固定放 /tmp,不污染用户输出目录
|
|
72
|
+
tmp_dir = get_tmp_dir(book_name)
|
|
73
|
+
image_dir = str(tmp_dir / "images") if args.export_images else ""
|
|
71
74
|
|
|
72
75
|
# 页码范围
|
|
73
76
|
total_pages = pdf_page_count(pdf_path)
|
|
@@ -17,6 +17,7 @@ extract_chapter.py — 单章文本提取脚本
|
|
|
17
17
|
|
|
18
18
|
输出:
|
|
19
19
|
{output_dir}/{书名}/chapters/{chapter_id}-raw.md
|
|
20
|
+
中间结果(图片等):/tmp/tech-book-extractor/{书名}/
|
|
20
21
|
"""
|
|
21
22
|
|
|
22
23
|
from __future__ import annotations
|
|
@@ -32,6 +33,7 @@ from pdf_extract_utils import (
|
|
|
32
33
|
extract_pages,
|
|
33
34
|
save_results,
|
|
34
35
|
extract_book_name,
|
|
36
|
+
get_tmp_dir,
|
|
35
37
|
parse_page_range,
|
|
36
38
|
)
|
|
37
39
|
|
|
@@ -80,7 +82,9 @@ def main():
|
|
|
80
82
|
book_name = extract_book_name(pdf_path.name)
|
|
81
83
|
output_dir = Path(args.output) / book_name / "chapters"
|
|
82
84
|
output_path = output_dir / f"{chapter_id}-raw.md"
|
|
83
|
-
|
|
85
|
+
# 中间结果(图片等)固定放 /tmp,不污染用户输出目录
|
|
86
|
+
tmp_dir = get_tmp_dir(book_name)
|
|
87
|
+
image_dir = str(tmp_dir / "images") if args.export_images else ""
|
|
84
88
|
|
|
85
89
|
page_count = page_end - page_start + 1
|
|
86
90
|
print(f"📖 {book_name}")
|
|
@@ -518,6 +518,13 @@ def save_results(
|
|
|
518
518
|
# 辅助函数
|
|
519
519
|
# ═══════════════════════════════════════════════════════════════════
|
|
520
520
|
|
|
521
|
+
def get_tmp_dir(book_name: str) -> Path:
|
|
522
|
+
"""中间结果目录:/tmp/tech-book-extractor/<书名>/"""
|
|
523
|
+
path = Path("/tmp/tech-book-extractor") / book_name
|
|
524
|
+
path.mkdir(parents=True, exist_ok=True)
|
|
525
|
+
return path
|
|
526
|
+
|
|
527
|
+
|
|
521
528
|
def extract_book_name(pdf_path: str | Path) -> str:
|
|
522
529
|
"""从 PDF 文件名提取书名(去后缀、去特殊字符)"""
|
|
523
530
|
name = Path(pdf_path).stem
|
|
@@ -8,6 +8,21 @@ description: "技术书章节深度萃取——把一章钻透。自动生成骨
|
|
|
8
8
|
|
|
9
9
|
---
|
|
10
10
|
|
|
11
|
+
## ⛔ 硬约束:骨架必须先于萃取
|
|
12
|
+
|
|
13
|
+
**本章 skill 的执行前提是 Stage 1 骨架(`stage1-skeleton.json`)已存在。**
|
|
14
|
+
|
|
15
|
+
执行前必须检查:`{output_dir}/{书名}/stage1-skeleton.json` 是否存在。
|
|
16
|
+
|
|
17
|
+
| 骨架状态 | 行为 |
|
|
18
|
+
|---------|------|
|
|
19
|
+
| 存在 | ✅ 读取骨架,进入萃取流程 |
|
|
20
|
+
| 不存在 | 🚫 **中止本章萃取**,提示用户先运行 `/book-map` 生成骨架 |
|
|
21
|
+
|
|
22
|
+
> **绝对禁止**:在骨架缺失时跳过骨架直接萃取。骨架是萃取的导航系统,没有骨架的萃取等于盲飞。
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
11
26
|
## 前置步骤:从 PDF 提取单章文本
|
|
12
27
|
|
|
13
28
|
如果章节原文在 PDF 里,先用提取脚本拿到文字:
|
|
File without changes
|