tech-book-extractor-skills 1.0.8 → 1.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md
CHANGED
package/bin/install.js
CHANGED
|
@@ -6,7 +6,6 @@ const os = require("os");
|
|
|
6
6
|
|
|
7
7
|
const skillsSource = path.join(__dirname, "..", "skills");
|
|
8
8
|
const skillsDest = path.join(os.homedir(), ".claude", "skills");
|
|
9
|
-
const scriptsSrc = path.join(__dirname, "..", "stage1", "complexity_scanner.py");
|
|
10
9
|
const scriptsDest = path.join(os.homedir(), ".claude", "scripts");
|
|
11
10
|
|
|
12
11
|
fs.mkdirSync(skillsDest, { recursive: true });
|
|
@@ -34,14 +33,9 @@ for (const skill of skills) {
|
|
|
34
33
|
console.log(`✓ skill: ${skill} → ${dest}`);
|
|
35
34
|
}
|
|
36
35
|
|
|
37
|
-
//
|
|
38
|
-
const scannerDest = path.join(scriptsDest, "complexity_scanner.py");
|
|
39
|
-
fs.copyFileSync(scriptsSrc, scannerDest);
|
|
40
|
-
console.log(`✓ script: complexity_scanner.py → ${scannerDest}`);
|
|
41
|
-
|
|
42
|
-
// 复制 PDF 文本提取脚本
|
|
36
|
+
// 复制 Python 脚本
|
|
43
37
|
const scriptsDir = path.join(__dirname, "..", "scripts");
|
|
44
|
-
for (const pyScript of ["pdf_extract_utils.py", "extract_book.py", "extract_chapter.py"]) {
|
|
38
|
+
for (const pyScript of ["complexity_scanner.py", "pdf_extract_utils.py", "extract_book.py", "extract_chapter.py"]) {
|
|
45
39
|
const dest = path.join(scriptsDest, pyScript);
|
|
46
40
|
fs.copyFileSync(path.join(scriptsDir, pyScript), dest);
|
|
47
41
|
console.log(`✓ script: ${pyScript} → ${dest}`);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "tech-book-extractor-skills",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.9",
|
|
4
4
|
"description": "Claude Code skills for deep technical book reading — structure parsing (Stage 1) and chapter extraction (Stage 2).",
|
|
5
5
|
"bin": {
|
|
6
6
|
"tech-book-extractor-skills": "bin/install.js"
|
|
@@ -8,16 +8,13 @@
|
|
|
8
8
|
"files": [
|
|
9
9
|
"skills/",
|
|
10
10
|
"bin/",
|
|
11
|
-
"
|
|
11
|
+
"scripts/complexity_scanner.py",
|
|
12
12
|
"scripts/pdf_extract_utils.py",
|
|
13
13
|
"scripts/extract_book.py",
|
|
14
14
|
"scripts/extract_chapter.py"
|
|
15
15
|
],
|
|
16
16
|
"scripts": {
|
|
17
|
-
"install-skills": "node bin/install.js"
|
|
18
|
-
"sync": "node scripts/sync.js",
|
|
19
|
-
"release:patch": "npm run sync && npm version patch && npm publish",
|
|
20
|
-
"release:minor": "npm run sync && npm version minor && npm publish"
|
|
17
|
+
"install-skills": "node bin/install.js"
|
|
21
18
|
},
|
|
22
19
|
"repository": {
|
|
23
20
|
"type": "git",
|
package/scripts/extract_book.py
CHANGED
|
@@ -16,7 +16,7 @@ extract_book.py — 整本书文本提取脚本
|
|
|
16
16
|
|
|
17
17
|
输出:
|
|
18
18
|
{output_dir}/{书名}/{书名}-fulltext.md
|
|
19
|
-
|
|
19
|
+
中间结果(图片等):/tmp/tech-book-extractor/{书名}/
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
22
|
from __future__ import annotations
|
|
@@ -32,6 +32,7 @@ from pdf_extract_utils import (
|
|
|
32
32
|
extract_pages,
|
|
33
33
|
save_results,
|
|
34
34
|
extract_book_name,
|
|
35
|
+
get_tmp_dir,
|
|
35
36
|
)
|
|
36
37
|
|
|
37
38
|
|
|
@@ -67,7 +68,9 @@ def main():
|
|
|
67
68
|
book_name = extract_book_name(pdf_path.name)
|
|
68
69
|
output_dir = Path(args.output) / book_name
|
|
69
70
|
output_path = output_dir / f"{book_name}-fulltext.md"
|
|
70
|
-
|
|
71
|
+
# 中间结果(图片等)固定放 /tmp,不污染用户输出目录
|
|
72
|
+
tmp_dir = get_tmp_dir(book_name)
|
|
73
|
+
image_dir = str(tmp_dir / "images") if args.export_images else ""
|
|
71
74
|
|
|
72
75
|
# 页码范围
|
|
73
76
|
total_pages = pdf_page_count(pdf_path)
|
|
@@ -17,6 +17,7 @@ extract_chapter.py — 单章文本提取脚本
|
|
|
17
17
|
|
|
18
18
|
输出:
|
|
19
19
|
{output_dir}/{书名}/chapters/{chapter_id}-raw.md
|
|
20
|
+
中间结果(图片等):/tmp/tech-book-extractor/{书名}/
|
|
20
21
|
"""
|
|
21
22
|
|
|
22
23
|
from __future__ import annotations
|
|
@@ -32,6 +33,7 @@ from pdf_extract_utils import (
|
|
|
32
33
|
extract_pages,
|
|
33
34
|
save_results,
|
|
34
35
|
extract_book_name,
|
|
36
|
+
get_tmp_dir,
|
|
35
37
|
parse_page_range,
|
|
36
38
|
)
|
|
37
39
|
|
|
@@ -80,7 +82,9 @@ def main():
|
|
|
80
82
|
book_name = extract_book_name(pdf_path.name)
|
|
81
83
|
output_dir = Path(args.output) / book_name / "chapters"
|
|
82
84
|
output_path = output_dir / f"{chapter_id}-raw.md"
|
|
83
|
-
|
|
85
|
+
# 中间结果(图片等)固定放 /tmp,不污染用户输出目录
|
|
86
|
+
tmp_dir = get_tmp_dir(book_name)
|
|
87
|
+
image_dir = str(tmp_dir / "images") if args.export_images else ""
|
|
84
88
|
|
|
85
89
|
page_count = page_end - page_start + 1
|
|
86
90
|
print(f"📖 {book_name}")
|
|
@@ -518,6 +518,13 @@ def save_results(
|
|
|
518
518
|
# 辅助函数
|
|
519
519
|
# ═══════════════════════════════════════════════════════════════════
|
|
520
520
|
|
|
521
|
+
def get_tmp_dir(book_name: str) -> Path:
|
|
522
|
+
"""中间结果目录:/tmp/tech-book-extractor/<书名>/"""
|
|
523
|
+
path = Path("/tmp/tech-book-extractor") / book_name
|
|
524
|
+
path.mkdir(parents=True, exist_ok=True)
|
|
525
|
+
return path
|
|
526
|
+
|
|
527
|
+
|
|
521
528
|
def extract_book_name(pdf_path: str | Path) -> str:
|
|
522
529
|
"""从 PDF 文件名提取书名(去后缀、去特殊字符)"""
|
|
523
530
|
name = Path(pdf_path).stem
|
|
File without changes
|