paper-note-claude 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/install.js +75 -0
- package/commands/paper_note.md +130 -0
- package/commands/paper_note_step0.py +78 -0
- package/package.json +26 -0
package/bin/install.js
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
const fs = require('fs');
|
|
4
|
+
const path = require('path');
|
|
5
|
+
const os = require('os');
|
|
6
|
+
const { execSync } = require('child_process');
|
|
7
|
+
|
|
8
|
+
function detectPython() {
|
|
9
|
+
for (const cmd of ['python3', 'python']) {
|
|
10
|
+
try {
|
|
11
|
+
const version = execSync(`${cmd} --version 2>&1`, { encoding: 'utf8' }).trim();
|
|
12
|
+
if (/Python 3\.\d+/.test(version)) {
|
|
13
|
+
return { cmd, version };
|
|
14
|
+
}
|
|
15
|
+
} catch {}
|
|
16
|
+
}
|
|
17
|
+
return null;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
function detectPip(pythonCmd) {
|
|
21
|
+
for (const cmd of [`${pythonCmd} -m pip`, 'pip3', 'pip']) {
|
|
22
|
+
try {
|
|
23
|
+
execSync(`${cmd} --version`, { stdio: 'ignore' });
|
|
24
|
+
return cmd;
|
|
25
|
+
} catch {}
|
|
26
|
+
}
|
|
27
|
+
return null;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
console.log('Installing paper-note skill for Claude Code...\n');
|
|
31
|
+
|
|
32
|
+
// 1. Copy skill files
|
|
33
|
+
const destDir = path.join(os.homedir(), '.claude', 'commands');
|
|
34
|
+
const srcDir = path.join(__dirname, '..', 'commands');
|
|
35
|
+
fs.mkdirSync(destDir, { recursive: true });
|
|
36
|
+
|
|
37
|
+
for (const file of ['paper_note.md', 'paper_note_step0.py']) {
|
|
38
|
+
fs.copyFileSync(path.join(srcDir, file), path.join(destDir, file));
|
|
39
|
+
console.log(` copied: ${path.join(destDir, file)}`);
|
|
40
|
+
}
|
|
41
|
+
console.log();
|
|
42
|
+
|
|
43
|
+
// 2. Check Python
|
|
44
|
+
const python = detectPython();
|
|
45
|
+
if (!python) {
|
|
46
|
+
console.log('Skill files installed, but Python 3 was not found.\n');
|
|
47
|
+
console.log('Please install Python 3 first:');
|
|
48
|
+
console.log(' https://www.python.org/downloads/\n');
|
|
49
|
+
console.log('Then run:');
|
|
50
|
+
console.log(' pip install pymupdf pymupdf4llm');
|
|
51
|
+
console.log('\nOnce done, open any folder in Claude Code and run:');
|
|
52
|
+
console.log(' /paper_note <path-to-pdf>');
|
|
53
|
+
process.exit(0);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
console.log(`Found ${python.version}`);
|
|
57
|
+
|
|
58
|
+
// 3. Check pip
|
|
59
|
+
const pip = detectPip(python.cmd);
|
|
60
|
+
if (!pip) {
|
|
61
|
+
console.log('pip not found. Please install pip and then run:');
|
|
62
|
+
console.log(' pip install pymupdf pymupdf4llm');
|
|
63
|
+
process.exit(0);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// 4. Install Python dependencies
|
|
67
|
+
console.log('Installing Python dependencies...\n');
|
|
68
|
+
try {
|
|
69
|
+
execSync(`${pip} install pymupdf pymupdf4llm`, { stdio: 'inherit' });
|
|
70
|
+
console.log('\nDone! Open any folder in Claude Code and run:');
|
|
71
|
+
console.log(' /paper_note <path-to-pdf>');
|
|
72
|
+
} catch {
|
|
73
|
+
console.log('\npip install failed. Please run manually:');
|
|
74
|
+
console.log(' pip install pymupdf pymupdf4llm');
|
|
75
|
+
}
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# 论文阅读笔记
|
|
2
|
+
|
|
3
|
+
为 AI 领域论文自动生成结构化深度阅读笔记。
|
|
4
|
+
|
|
5
|
+
## 用法
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
/paper_note <pdf路径> [输出路径]
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## 依赖
|
|
12
|
+
|
|
13
|
+
运行前需安装:
|
|
14
|
+
|
|
15
|
+
```
|
|
16
|
+
pip install pymupdf pymupdf4llm
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## 执行流程
|
|
22
|
+
|
|
23
|
+
### Step 0:PDF 转文本
|
|
24
|
+
|
|
25
|
+
运行:
|
|
26
|
+
|
|
27
|
+
```
|
|
28
|
+
python ~/.claude/commands/paper_note_step0.py <pdf_path> [output_dir]
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
- 若用户提供了输出路径,作为第二个参数传入
|
|
32
|
+
- 否则脚本自动使用 `<pdf所在目录>/note/<title>/`
|
|
33
|
+
|
|
34
|
+
解析返回的 JSON,得到 `title`、`fulltext_path`、`output_dir`。
|
|
35
|
+
|
|
36
|
+
若返回 `error` 字段,报告错误并停止。
|
|
37
|
+
|
|
38
|
+
读取 `fulltext_path` 中的全文 markdown,后续所有板块均基于此全文生成。
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
|
|
42
|
+
### Step 1:生成阅读笔记
|
|
43
|
+
|
|
44
|
+
基于全文内容,按顺序生成以下 8 个板块,最终拼合写入 `<output_dir>/<safe_title>.md`。
|
|
45
|
+
|
|
46
|
+
**通用约束:**
|
|
47
|
+
|
|
48
|
+
- 板块一至板块六、板块八内容必须严格来自论文,不允许自由发散
|
|
49
|
+
- 板块之间用 `\n---\n\n` 分隔
|
|
50
|
+
- 每个板块末尾附来源标注:`<!-- src: fulltext.md -->`
|
|
51
|
+
- 每个板块标题直接写内容标题,不加"板块X:"前缀,例如:`# 论文核心信息`,而非 `# 板块一:论文核心信息`
|
|
52
|
+
|
|
53
|
+
#### 板块一:论文核心信息
|
|
54
|
+
|
|
55
|
+
从全文开头部分提取以下字段,以列表呈现:
|
|
56
|
+
|
|
57
|
+
- 标题
|
|
58
|
+
- 标题翻译(中文)
|
|
59
|
+
- 作者
|
|
60
|
+
- 机构
|
|
61
|
+
- 发表时间
|
|
62
|
+
- 发表期刊/会议
|
|
63
|
+
- 代码仓库地址(仅从论文正文寻找,没有则留空)
|
|
64
|
+
- 使用的数据集
|
|
65
|
+
- 关键词
|
|
66
|
+
|
|
67
|
+
#### 板块二:摘要翻译
|
|
68
|
+
|
|
69
|
+
找到全文中的 Abstract 部分,完整翻译为中文,不添加额外内容。
|
|
70
|
+
|
|
71
|
+
#### 板块三:创新点
|
|
72
|
+
|
|
73
|
+
找到论文自述的 contribution 列表,逐条翻译为中文。
|
|
74
|
+
|
|
75
|
+
#### 板块四:研究问题
|
|
76
|
+
|
|
77
|
+
包含三个二级标题:
|
|
78
|
+
|
|
79
|
+
- **论文要解决的问题**:从 Introduction 提炼实际问题或痛点
|
|
80
|
+
- **现有方法为何不足**:从 Introduction 提炼现有方法局限
|
|
81
|
+
- **问题的数学定义**:从 Preliminaries 或 Method 部分提取形式化定义,保留公式
|
|
82
|
+
|
|
83
|
+
#### 板块五:方法
|
|
84
|
+
|
|
85
|
+
找到 Method 相关章节。包含两个二级标题:
|
|
86
|
+
|
|
87
|
+
- **建模思路简述**:最精简语言描述核心建模路线,目标 2 分钟能看懂
|
|
88
|
+
- **方法部分全部公式**:列出所有公式,每条附一句必要说明
|
|
89
|
+
|
|
90
|
+
#### 板块六:实验
|
|
91
|
+
|
|
92
|
+
找到 Experiments 相关章节。每个实验生成一个二级标题,每个实验下包含四个三级标题:
|
|
93
|
+
|
|
94
|
+
- **实验目的**
|
|
95
|
+
- **实验设置**(数据集、基线、指标、关键超参数)
|
|
96
|
+
- **实验结果**(文字描述结果,不插入图片)
|
|
97
|
+
- **结论**
|
|
98
|
+
|
|
99
|
+
#### 板块七:局限性与未来方向
|
|
100
|
+
|
|
101
|
+
读取 Experiments 和 Conclusion 部分。**本板块允许自由发散。**
|
|
102
|
+
|
|
103
|
+
每个观点标注来源:
|
|
104
|
+
|
|
105
|
+
- 来自论文:句末注明 `(来自论文 conclusion/experiments)`
|
|
106
|
+
- 自己发现:句末注明 `(来自 LLM 思考)`
|
|
107
|
+
|
|
108
|
+
两类观点清晰区分,不得混写。
|
|
109
|
+
|
|
110
|
+
#### 板块八:参考文献
|
|
111
|
+
|
|
112
|
+
从全文末尾找到 References 部分,以列表形式列出所有引用文献:
|
|
113
|
+
|
|
114
|
+
```
|
|
115
|
+
- 标题 — 作者 et al. — 年份
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
严格照录,不添加摘要或评论。超过 3 位作者写"第一作者 et al."。
|
|
119
|
+
|
|
120
|
+
---
|
|
121
|
+
|
|
122
|
+
### Step 2:幻觉核查
|
|
123
|
+
|
|
124
|
+
读取生成的笔记,对照 `fulltext.md` 核查:
|
|
125
|
+
|
|
126
|
+
- **板块一至板块六、板块八**:发现与原文不符的内容直接修改或删除
|
|
127
|
+
- **板块七**:仅核查标注 `(来自论文...)` 的观点,`(来自 LLM 思考)` 的观点不核查
|
|
128
|
+
- **板块八专项**:逐条核查文献标题、作者、年份
|
|
129
|
+
|
|
130
|
+
核查完成后覆盖写入最终笔记文件。
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Step 0: PDF → Markdown 全文提取 + 标题识别
|
|
3
|
+
用法: python src/step0_source.py <pdf_path> [output_dir]
|
|
4
|
+
输出 JSON: {"title": "...", "fulltext_path": "...", "output_dir": "..."}
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import sys
|
|
8
|
+
import json
|
|
9
|
+
import re
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def sanitize_filename(name: str) -> str:
|
|
14
|
+
name = re.sub(r'[<>:"/\\|?*\n\r\t]', '', name)
|
|
15
|
+
return name.strip('. ')[:200] or "Untitled"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def get_pdf_title(pdf_path: Path) -> str:
|
|
19
|
+
import fitz
|
|
20
|
+
doc = fitz.open(str(pdf_path))
|
|
21
|
+
|
|
22
|
+
title = doc.metadata.get("title", "").strip()
|
|
23
|
+
if title and len(title) > 5:
|
|
24
|
+
return title
|
|
25
|
+
|
|
26
|
+
page = doc[0]
|
|
27
|
+
blocks = page.get_text("dict")["blocks"]
|
|
28
|
+
max_size = 0
|
|
29
|
+
best_text = pdf_path.stem
|
|
30
|
+
for block in blocks:
|
|
31
|
+
if block.get("type") != 0:
|
|
32
|
+
continue
|
|
33
|
+
for line in block.get("lines", []):
|
|
34
|
+
for span in line.get("spans", []):
|
|
35
|
+
text = span["text"].strip()
|
|
36
|
+
if span["size"] > max_size and len(text) > 5:
|
|
37
|
+
max_size = span["size"]
|
|
38
|
+
best_text = text
|
|
39
|
+
return best_text
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def main(pdf_path: str, output_dir: str = None):
|
|
43
|
+
import pymupdf4llm
|
|
44
|
+
|
|
45
|
+
pdf = Path(pdf_path)
|
|
46
|
+
if not pdf.exists():
|
|
47
|
+
print(json.dumps({"error": f"PDF 文件不存在: {pdf_path}"}))
|
|
48
|
+
sys.exit(1)
|
|
49
|
+
|
|
50
|
+
print("[Step 0] 提取标题...", file=sys.stderr)
|
|
51
|
+
title = get_pdf_title(pdf)
|
|
52
|
+
safe_title = sanitize_filename(title)
|
|
53
|
+
|
|
54
|
+
if output_dir:
|
|
55
|
+
out_dir = Path(output_dir)
|
|
56
|
+
else:
|
|
57
|
+
out_dir = pdf.parent / "note" / safe_title
|
|
58
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
|
59
|
+
|
|
60
|
+
print("[Step 0] PDF 转 Markdown...", file=sys.stderr)
|
|
61
|
+
md_text = pymupdf4llm.to_markdown(str(pdf))
|
|
62
|
+
fulltext_path = out_dir / "fulltext.md"
|
|
63
|
+
fulltext_path.write_text(md_text, encoding="utf-8")
|
|
64
|
+
|
|
65
|
+
print("[Step 0] 完成", file=sys.stderr)
|
|
66
|
+
print(json.dumps({
|
|
67
|
+
"title": title,
|
|
68
|
+
"safe_title": safe_title,
|
|
69
|
+
"fulltext_path": str(fulltext_path),
|
|
70
|
+
"output_dir": str(out_dir),
|
|
71
|
+
}, ensure_ascii=False))
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
if __name__ == "__main__":
|
|
75
|
+
if len(sys.argv) < 2:
|
|
76
|
+
print("用法: python src/step0_source.py <pdf_path> [output_dir]")
|
|
77
|
+
sys.exit(1)
|
|
78
|
+
main(sys.argv[1], sys.argv[2] if len(sys.argv) > 2 else None)
|
package/package.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "paper-note-claude",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Claude Code skill: generate structured reading notes for AI papers",
|
|
5
|
+
"bin": {
|
|
6
|
+
"paper-note-claude": "bin/install.js"
|
|
7
|
+
},
|
|
8
|
+
"files": [
|
|
9
|
+
"bin/",
|
|
10
|
+
"commands/"
|
|
11
|
+
],
|
|
12
|
+
"keywords": [
|
|
13
|
+
"claude",
|
|
14
|
+
"claude-code",
|
|
15
|
+
"skill",
|
|
16
|
+
"slash-command",
|
|
17
|
+
"paper",
|
|
18
|
+
"research",
|
|
19
|
+
"notes",
|
|
20
|
+
"ai"
|
|
21
|
+
],
|
|
22
|
+
"license": "MIT",
|
|
23
|
+
"engines": {
|
|
24
|
+
"node": ">=14"
|
|
25
|
+
}
|
|
26
|
+
}
|