tech-book-extractor-skills 1.0.8 → 1.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,6 +2,8 @@
2
2
 
3
3
  Claude Code 技能:技术书深度萃取——两阶段流水线。
4
4
 
5
+ [![npm version](https://img.shields.io/npm/v/tech-book-extractor-skills)](https://www.npmjs.com/package/tech-book-extractor-skills)
6
+
5
7
  ## 安装
6
8
 
7
9
  ```bash
package/bin/install.js CHANGED
@@ -6,7 +6,6 @@ const os = require("os");
6
6
 
7
7
  const skillsSource = path.join(__dirname, "..", "skills");
8
8
  const skillsDest = path.join(os.homedir(), ".claude", "skills");
9
- const scriptsSrc = path.join(__dirname, "..", "stage1", "complexity_scanner.py");
10
9
  const scriptsDest = path.join(os.homedir(), ".claude", "scripts");
11
10
 
12
11
  fs.mkdirSync(skillsDest, { recursive: true });
@@ -34,14 +33,9 @@ for (const skill of skills) {
34
33
  console.log(`✓ skill: ${skill} → ${dest}`);
35
34
  }
36
35
 
37
- // 复制预处理脚本
38
- const scannerDest = path.join(scriptsDest, "complexity_scanner.py");
39
- fs.copyFileSync(scriptsSrc, scannerDest);
40
- console.log(`✓ script: complexity_scanner.py → ${scannerDest}`);
41
-
42
- // 复制 PDF 文本提取脚本
36
+ // 复制 Python 脚本
43
37
  const scriptsDir = path.join(__dirname, "..", "scripts");
44
- for (const pyScript of ["pdf_extract_utils.py", "extract_book.py", "extract_chapter.py"]) {
38
+ for (const pyScript of ["complexity_scanner.py", "pdf_extract_utils.py", "extract_book.py", "extract_chapter.py"]) {
45
39
  const dest = path.join(scriptsDest, pyScript);
46
40
  fs.copyFileSync(path.join(scriptsDir, pyScript), dest);
47
41
  console.log(`✓ script: ${pyScript} → ${dest}`);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tech-book-extractor-skills",
3
- "version": "1.0.8",
3
+ "version": "1.0.10",
4
4
  "description": "Claude Code skills for deep technical book reading — structure parsing (Stage 1) and chapter extraction (Stage 2).",
5
5
  "bin": {
6
6
  "tech-book-extractor-skills": "bin/install.js"
@@ -8,16 +8,13 @@
8
8
  "files": [
9
9
  "skills/",
10
10
  "bin/",
11
- "stage1/complexity_scanner.py",
11
+ "scripts/complexity_scanner.py",
12
12
  "scripts/pdf_extract_utils.py",
13
13
  "scripts/extract_book.py",
14
14
  "scripts/extract_chapter.py"
15
15
  ],
16
16
  "scripts": {
17
- "install-skills": "node bin/install.js",
18
- "sync": "node scripts/sync.js",
19
- "release:patch": "npm run sync && npm version patch && npm publish",
20
- "release:minor": "npm run sync && npm version minor && npm publish"
17
+ "install-skills": "node bin/install.js"
21
18
  },
22
19
  "repository": {
23
20
  "type": "git",
@@ -16,7 +16,7 @@ extract_book.py — 整本书文本提取脚本
16
16
 
17
17
  输出:
18
18
  {output_dir}/{书名}/{书名}-fulltext.md
19
- {output_dir}/{书名}/images/ # 仅 --export-images 时
19
+ 中间结果(图片等):/tmp/tech-book-extractor/{书名}/
20
20
  """
21
21
 
22
22
  from __future__ import annotations
@@ -32,6 +32,7 @@ from pdf_extract_utils import (
32
32
  extract_pages,
33
33
  save_results,
34
34
  extract_book_name,
35
+ get_tmp_dir,
35
36
  )
36
37
 
37
38
 
@@ -67,7 +68,9 @@ def main():
67
68
  book_name = extract_book_name(pdf_path.name)
68
69
  output_dir = Path(args.output) / book_name
69
70
  output_path = output_dir / f"{book_name}-fulltext.md"
70
- image_dir = str(output_dir / "images") if args.export_images else ""
71
+ # 中间结果(图片等)固定放 /tmp,不污染用户输出目录
72
+ tmp_dir = get_tmp_dir(book_name)
73
+ image_dir = str(tmp_dir / "images") if args.export_images else ""
71
74
 
72
75
  # 页码范围
73
76
  total_pages = pdf_page_count(pdf_path)
@@ -17,6 +17,7 @@ extract_chapter.py — 单章文本提取脚本
17
17
 
18
18
  输出:
19
19
  {output_dir}/{书名}/chapters/{chapter_id}-raw.md
20
+ 中间结果(图片等):/tmp/tech-book-extractor/{书名}/
20
21
  """
21
22
 
22
23
  from __future__ import annotations
@@ -32,6 +33,7 @@ from pdf_extract_utils import (
32
33
  extract_pages,
33
34
  save_results,
34
35
  extract_book_name,
36
+ get_tmp_dir,
35
37
  parse_page_range,
36
38
  )
37
39
 
@@ -80,7 +82,9 @@ def main():
80
82
  book_name = extract_book_name(pdf_path.name)
81
83
  output_dir = Path(args.output) / book_name / "chapters"
82
84
  output_path = output_dir / f"{chapter_id}-raw.md"
83
- image_dir = str(Path(args.output) / book_name / "images") if args.export_images else ""
85
+ # 中间结果(图片等)固定放 /tmp,不污染用户输出目录
86
+ tmp_dir = get_tmp_dir(book_name)
87
+ image_dir = str(tmp_dir / "images") if args.export_images else ""
84
88
 
85
89
  page_count = page_end - page_start + 1
86
90
  print(f"📖 {book_name}")
@@ -518,6 +518,13 @@ def save_results(
518
518
  # 辅助函数
519
519
  # ═══════════════════════════════════════════════════════════════════
520
520
 
521
+ def get_tmp_dir(book_name: str) -> Path:
522
+ """中间结果目录:/tmp/tech-book-extractor/<书名>/"""
523
+ path = Path("/tmp/tech-book-extractor") / book_name
524
+ path.mkdir(parents=True, exist_ok=True)
525
+ return path
526
+
527
+
521
528
  def extract_book_name(pdf_path: str | Path) -> str:
522
529
  """从 PDF 文件名提取书名(去后缀、去特殊字符)"""
523
530
  name = Path(pdf_path).stem
@@ -8,6 +8,21 @@ description: "技术书章节深度萃取——把一章钻透。自动生成骨
8
8
 
9
9
  ---
10
10
 
11
+ ## ⛔ 硬约束:骨架必须先于萃取
12
+
13
+ **本章 skill 的执行前提是 Stage 1 骨架(`stage1-skeleton.json`)已存在。**
14
+
15
+ 执行前必须检查:`{output_dir}/{书名}/stage1-skeleton.json` 是否存在。
16
+
17
+ | 骨架状态 | 行为 |
18
+ |---------|------|
19
+ | 存在 | ✅ 读取骨架,进入萃取流程 |
20
+ | 不存在 | 🚫 **中止本章萃取**,提示用户先运行 `/book-map` 生成骨架 |
21
+
22
+ > **绝对禁止**:在骨架缺失时跳过骨架直接萃取。骨架是萃取的导航系统,没有骨架的萃取等于盲飞。
23
+
24
+ ---
25
+
11
26
  ## 前置步骤:从 PDF 提取单章文本
12
27
 
13
28
  如果章节原文在 PDF 里,先用提取脚本拿到文字:
File without changes