paper-wiki 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,101 @@
1
+ Metadata-Version: 2.4
2
+ Name: paper-wiki
3
+ Version: 0.2.0
4
+ Summary: Deep paper reading wiki: search, summarize, and analyze arXiv papers into a three-layer markdown wiki
5
+ Author: mblank
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/mblank5/paper-wiki
8
+ Project-URL: Repository, https://github.com/mblank5/paper-wiki
9
+ Keywords: arxiv,paper,wiki,summarization,cli
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Environment :: Console
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
+ Requires-Python: >=3.10
19
+ Description-Content-Type: text/markdown
20
+ Requires-Dist: click>=8.0
21
+ Requires-Dist: openai>=1.0
22
+ Requires-Dist: python-dotenv>=1.0
23
+ Requires-Dist: rich>=13.0
24
+ Requires-Dist: requests>=2.28
25
+
26
+ # paper-wiki
27
+
28
+ Deep paper reading wiki: search, summarize, and analyze arXiv papers into a three-layer markdown wiki.
29
+
30
+ ## Features
31
+
32
+ - Search arXiv papers via [deepxiv](https://data.rag.ac.cn) CLI
33
+ - Download paper PDFs
34
+ - Generate per-section Chinese summaries using LLM
35
+ - Produce a full paper analysis with 5 dimensions:
36
+ - ELI5 (小学生也能懂)
37
+ - Core innovations (核心创新点)
38
+ - Test datasets & metrics (测试集与指标)
39
+ - Core weaknesses (核心缺点)
40
+ - Future directions (后期演进方向)
41
+
42
+ ## Three-Layer Wiki
43
+
44
+ ```
45
+ wiki/{arxiv_id}/
46
+ ├── README.md # Layer 1: full summary + 5 analysis aspects
47
+ ├── sections/ # Layer 2: per-section Chinese summaries
48
+ │ ├── 01_introduction.md
49
+ │ ├── 02_method.md
50
+ │ └── ...
51
+ └── paper.pdf # Layer 3: original PDF
52
+ ```
53
+
54
+ ## Install
55
+
56
+ ```bash
57
+ pip install paper-wiki
58
+ ```
59
+
60
+ ### Prerequisites
61
+
62
+ - [deepxiv](https://pypi.org/project/deepxiv/) CLI installed and configured (`pip install deepxiv`)
63
+ - OpenAI-compatible API endpoint
64
+
65
+ ## Quick Start
66
+
67
+ ```bash
68
+ # Configure API
69
+ export OPENAI_API_KEY=sk-xxx
70
+ export OPENAI_BASE_URL=https://api.openai.com/v1
71
+ export OPENAI_MODEL=gpt-4o
72
+
73
+ # Process a paper by arXiv ID
74
+ paper-wiki process 2409.05591
75
+
76
+ # Or by URL
77
+ paper-wiki process https://arxiv.org/pdf/2604.27393
78
+
79
+ # Search and interactively select
80
+ paper-wiki search "RAG long context"
81
+
82
+ # Search and auto-select first result
83
+ paper-wiki search "transformer memory" --first
84
+
85
+ # List processed papers
86
+ paper-wiki list
87
+ ```
88
+
89
+ ## Configuration
90
+
91
+ Create a `.env` file in your working directory:
92
+
93
+ ```env
94
+ OPENAI_API_KEY=sk-xxx
95
+ OPENAI_BASE_URL=https://api.openai.com/v1
96
+ OPENAI_MODEL=gpt-4o
97
+ ```
98
+
99
+ ## License
100
+
101
+ MIT
@@ -0,0 +1,76 @@
1
+ # paper-wiki
2
+
3
+ Deep paper reading wiki: search, summarize, and analyze arXiv papers into a three-layer markdown wiki.
4
+
5
+ ## Features
6
+
7
+ - Search arXiv papers via [deepxiv](https://data.rag.ac.cn) CLI
8
+ - Download paper PDFs
9
+ - Generate per-section Chinese summaries using LLM
10
+ - Produce a full paper analysis with 5 dimensions:
11
+ - ELI5 (小学生也能懂)
12
+ - Core innovations (核心创新点)
13
+ - Test datasets & metrics (测试集与指标)
14
+ - Core weaknesses (核心缺点)
15
+ - Future directions (后期演进方向)
16
+
17
+ ## Three-Layer Wiki
18
+
19
+ ```
20
+ wiki/{arxiv_id}/
21
+ ├── README.md # Layer 1: full summary + 5 analysis aspects
22
+ ├── sections/ # Layer 2: per-section Chinese summaries
23
+ │ ├── 01_introduction.md
24
+ │ ├── 02_method.md
25
+ │ └── ...
26
+ └── paper.pdf # Layer 3: original PDF
27
+ ```
28
+
29
+ ## Install
30
+
31
+ ```bash
32
+ pip install paper-wiki
33
+ ```
34
+
35
+ ### Prerequisites
36
+
37
+ - [deepxiv](https://pypi.org/project/deepxiv/) CLI installed and configured (`pip install deepxiv`)
38
+ - OpenAI-compatible API endpoint
39
+
40
+ ## Quick Start
41
+
42
+ ```bash
43
+ # Configure API
44
+ export OPENAI_API_KEY=sk-xxx
45
+ export OPENAI_BASE_URL=https://api.openai.com/v1
46
+ export OPENAI_MODEL=gpt-4o
47
+
48
+ # Process a paper by arXiv ID
49
+ paper-wiki process 2409.05591
50
+
51
+ # Or by URL
52
+ paper-wiki process https://arxiv.org/pdf/2604.27393
53
+
54
+ # Search and interactively select
55
+ paper-wiki search "RAG long context"
56
+
57
+ # Search and auto-select first result
58
+ paper-wiki search "transformer memory" --first
59
+
60
+ # List processed papers
61
+ paper-wiki list
62
+ ```
63
+
64
+ ## Configuration
65
+
66
+ Create a `.env` file in your working directory:
67
+
68
+ ```env
69
+ OPENAI_API_KEY=sk-xxx
70
+ OPENAI_BASE_URL=https://api.openai.com/v1
71
+ OPENAI_MODEL=gpt-4o
72
+ ```
73
+
74
+ ## License
75
+
76
+ MIT
@@ -0,0 +1 @@
1
+ """paper-wiki: Deep paper reading wiki generator."""
@@ -0,0 +1,143 @@
1
+ """CLI entry point for paper-wiki."""
2
+
3
+ import re
4
+
5
+ import click
6
+ from rich.console import Console
7
+ from rich.table import Table
8
+
9
+ from . import deepxiv_client as dx
10
+ from .wiki import process_paper, list_papers
11
+
12
+ console = Console()
13
+
14
+ _ARXIV_ID_RE = re.compile(r"(?:^|/)(\d{4}\.\d{4,5})(?:$|\.pdf|v\d+)")
15
+ _ARXIV_URL_RE = re.compile(r"arxiv\.org/(?:abs|pdf)/(\d{4}\.\d{4,5})")
16
+
17
+
18
+ def _extract_arxiv_id(text: str) -> str | None:
19
+ """Extract arXiv ID from raw ID, URL, or return None."""
20
+ text = text.strip()
21
+ # Pure ID like 2604.27393
22
+ if re.fullmatch(r"\d{4}\.\d{4,5}", text):
23
+ return text
24
+ # URL like https://arxiv.org/abs/2604.27393 or .../pdf/2604.27393.pdf
25
+ m = _ARXIV_URL_RE.search(text)
26
+ if m:
27
+ return m.group(1)
28
+ return None
29
+
30
+
31
+ @click.group()
32
+ @click.option("--output-dir", default="wiki", envvar="PAPER_WIKI_DIR", help="Wiki output directory")
33
+ @click.pass_context
34
+ def cli(ctx, output_dir):
35
+ """paper-wiki: Deep paper reading wiki generator."""
36
+ ctx.ensure_object(dict)
37
+ ctx.obj["output_dir"] = output_dir
38
+
39
+
40
+ @cli.command()
41
+ @click.argument("arxiv_id")
42
+ @click.option("--model", default=None, help="Override LLM model name")
43
+ @click.pass_context
44
+ def process(ctx, arxiv_id, model):
45
+ """Process an arXiv paper by ID or URL into a wiki page."""
46
+ output_dir = ctx.obj["output_dir"]
47
+ arxiv_id = _extract_arxiv_id(arxiv_id) or arxiv_id
48
+ console.print(f"[bold blue]Processing paper:[/] {arxiv_id}")
49
+ try:
50
+ paper_dir = process_paper(arxiv_id, output_dir=output_dir, model=model)
51
+ console.print(f"[bold green]Done![/] Wiki page: {paper_dir}/README.md")
52
+ except Exception as e:
53
+ console.print(f"[bold red]Error:[/] {e}")
54
+ raise SystemExit(1)
55
+
56
+
57
+ @cli.command()
58
+ @click.argument("query")
59
+ @click.option("--limit", "-l", default=5, help="Number of search results")
60
+ @click.option("--first", is_flag=True, help="Auto-select the first result")
61
+ @click.option("--model", default=None, help="Override LLM model name")
62
+ @click.pass_context
63
+ def search(ctx, query, limit, first, model):
64
+ """Search for papers and process into wiki.
65
+
66
+ If QUERY looks like an arXiv ID (e.g. 2604.27393) or arXiv URL,
67
+ skip search and process directly.
68
+ """
69
+ output_dir = ctx.obj["output_dir"]
70
+
71
+ # Detect direct arXiv ID / URL
72
+ direct_id = _extract_arxiv_id(query)
73
+ if direct_id:
74
+ console.print(f"[bold blue]Detected arXiv ID:[/] {direct_id}, processing directly...")
75
+ paper_dir = process_paper(direct_id, output_dir=output_dir, model=model)
76
+ console.print(f"[bold green]Done![/] Wiki page: {paper_dir}/README.md")
77
+ return
78
+
79
+ console.print(f"[bold blue]Searching:[/] {query}")
80
+
81
+ try:
82
+ results = dx.search(query, limit=limit)
83
+ except Exception as e:
84
+ console.print(f"[bold red]Search error:[/] {e}")
85
+ raise SystemExit(1)
86
+
87
+ if not results:
88
+ console.print("[yellow]No results found.[/]")
89
+ return
90
+
91
+ # Display results
92
+ table = Table(title="Search Results")
93
+ table.add_column("#", style="bold")
94
+ table.add_column("arXiv ID")
95
+ table.add_column("Title")
96
+ table.add_column("Date")
97
+
98
+ for i, r in enumerate(results, 1):
99
+ arxiv_id = r.get("arxiv_id", r.get("id", ""))
100
+ title = r.get("title", "")[:60]
101
+ date = r.get("publish_at", "")[:10]
102
+ table.add_row(str(i), arxiv_id, title, date)
103
+
104
+ console.print(table)
105
+
106
+ if first:
107
+ selected = results[0]
108
+ else:
109
+ choice = click.prompt("Select a paper (#) or press Enter to cancel", default="")
110
+ if not choice:
111
+ return
112
+ try:
113
+ selected = results[int(choice) - 1]
114
+ except (ValueError, IndexError):
115
+ console.print("[red]Invalid selection.[/]")
116
+ return
117
+
118
+ arxiv_id = selected.get("arxiv_id", selected.get("id", ""))
119
+ console.print(f"\n[bold blue]Processing:[/] {selected.get('title', arxiv_id)}")
120
+ paper_dir = process_paper(arxiv_id, output_dir=output_dir, model=model)
121
+ console.print(f"[bold green]Done![/] Wiki page: {paper_dir}/README.md")
122
+
123
+
124
+ @cli.command(name="list")
125
+ @click.pass_context
126
+ def list_cmd(ctx):
127
+ """List all processed papers in the wiki."""
128
+ output_dir = ctx.obj["output_dir"]
129
+ papers = list_papers(output_dir)
130
+
131
+ if not papers:
132
+ console.print(f"[yellow]No papers found in {output_dir}/[/]")
133
+ return
134
+
135
+ table = Table(title="Processed Papers")
136
+ table.add_column("arXiv ID", style="bold")
137
+ table.add_column("Title")
138
+ table.add_column("Path")
139
+
140
+ for p in papers:
141
+ table.add_row(p["id"], p["title"][:60], p["path"])
142
+
143
+ console.print(table)
@@ -0,0 +1,61 @@
1
+ """Wrapper around the deepxiv CLI for paper search and retrieval."""
2
+
3
+ import json
4
+ import subprocess
5
+
6
+ import requests
7
+
8
+
9
+ class DeepxivError(Exception):
10
+ """Error from deepxiv CLI."""
11
+
12
+
13
+ def _run(cmd: list[str]) -> str:
14
+ result = subprocess.run(cmd, capture_output=True, text=True)
15
+ if result.returncode != 0:
16
+ msg = result.stderr.strip() or f"deepxiv exited with code {result.returncode}"
17
+ raise DeepxivError(f"{' '.join(cmd)}\n{msg}")
18
+ return result.stdout
19
+
20
+
21
+ def search(query: str, limit: int = 10) -> list[dict]:
22
+ """Search papers on arXiv. Returns list of result dicts."""
23
+ out = _run(["deepxiv", "search", query, "-f", "json", "-l", str(limit)])
24
+ data = json.loads(out)
25
+ if isinstance(data, list):
26
+ return data
27
+ return data.get("result", data.get("results", []))
28
+
29
+
30
+ def get_metadata(arxiv_id: str) -> dict:
31
+ """Get paper metadata (title, authors, sections list, etc.)."""
32
+ out = _run(["deepxiv", "paper", arxiv_id, "--head", "-f", "json"])
33
+ return json.loads(out)
34
+
35
+
36
+ def get_brief(arxiv_id: str) -> dict:
37
+ """Get brief info (title, TLDR, keywords, citations, github_url)."""
38
+ out = _run(["deepxiv", "paper", arxiv_id, "--brief", "-f", "json"])
39
+ return json.loads(out)
40
+
41
+
42
+ def get_section(arxiv_id: str, section_name: str) -> str:
43
+ """Get full markdown content of a specific section."""
44
+ return _run(["deepxiv", "paper", arxiv_id, "--section", section_name])
45
+
46
+
47
+ def get_sections_list(arxiv_id: str) -> list[dict]:
48
+ """Extract sections list from paper metadata."""
49
+ meta = get_metadata(arxiv_id)
50
+ return meta.get("sections", [])
51
+
52
+
53
+ def download_pdf(arxiv_id: str, dest_path: str) -> str:
54
+ """Download paper PDF to dest_path. Returns dest_path."""
55
+ meta = get_metadata(arxiv_id)
56
+ url = meta.get("src_url") or f"https://arxiv.org/pdf/{arxiv_id}"
57
+ resp = requests.get(url, timeout=120)
58
+ resp.raise_for_status()
59
+ with open(dest_path, "wb") as f:
60
+ f.write(resp.content)
61
+ return dest_path
@@ -0,0 +1,47 @@
1
+ """OpenAI-compatible LLM client for paper analysis."""
2
+
3
+ import os
4
+
5
+ from dotenv import load_dotenv
6
+ from openai import OpenAI
7
+
8
+ load_dotenv()
9
+
10
+
11
+ def _default_client() -> OpenAI:
12
+ return OpenAI(
13
+ api_key=os.getenv("OPENAI_API_KEY", ""),
14
+ base_url=os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1"),
15
+ )
16
+
17
+
18
+ _client: OpenAI | None = None
19
+
20
+
21
+ def get_client() -> OpenAI:
22
+ global _client
23
+ if _client is None:
24
+ _client = _default_client()
25
+ return _client
26
+
27
+
28
+ def chat(
29
+ system_prompt: str,
30
+ user_content: str,
31
+ model: str | None = None,
32
+ temperature: float = 0.3,
33
+ max_tokens: int = 4096,
34
+ ) -> str:
35
+ """Single-turn chat completion. Returns assistant message content."""
36
+ client = get_client()
37
+ model = model or os.getenv("OPENAI_MODEL", "gpt-4o")
38
+ resp = client.chat.completions.create(
39
+ model=model,
40
+ temperature=temperature,
41
+ max_tokens=max_tokens,
42
+ messages=[
43
+ {"role": "system", "content": system_prompt},
44
+ {"role": "user", "content": user_content},
45
+ ],
46
+ )
47
+ return resp.choices[0].message.content or ""
@@ -0,0 +1,101 @@
1
+ """LLM prompt templates for paper analysis."""
2
+
3
+ SYSTEM_ROLE = "你是一位资深AI/CS领域的研究论文分析专家,擅长用中文进行学术分析和通俗讲解。"
4
+
5
+ SECTION_SUMMARY_PROMPT = SYSTEM_ROLE + """
6
+
7
+ 请对以下论文章节内容进行深度总结。要求:
8
+ 1. 用中文撰写,300-500字
9
+ 2. 准确概括该章节的核心内容、方法论和关键发现
10
+ 3. 保留重要的技术细节和数学概念(如有)
11
+ 4. 语言专业但清晰易懂
12
+
13
+ 章节名称:{section_name}
14
+
15
+ 章节内容:
16
+ {content}
17
+ """
18
+
19
+ FULL_SUMMARY_PROMPT = SYSTEM_ROLE + """
20
+
21
+ 以下是一篇论文各章节的中文总结。请基于这些章节总结,撰写一篇完整的论文概述。要求:
22
+ 1. 用中文撰写,800-1200字
23
+ 2. 涵盖论文的研究背景、核心方法、实验结果和结论
24
+ 3. 逻辑连贯,层次分明
25
+ 4. 面向具有一定学术背景的读者
26
+
27
+ 论文标题:{title}
28
+
29
+ 各章节总结:
30
+ {sections}
31
+ """
32
+
33
+ ELI5_PROMPT = SYSTEM_ROLE + """
34
+
35
+ 请用小学生(10岁左右)能听懂的方式,解释以下论文的核心内容。要求:
36
+ 1. 用中文撰写,200-400字
37
+ 2. 用生活中的比喻来解释复杂概念
38
+ 3. 不要使用专业术语,如果必须使用则要给出通俗解释
39
+ 4. 让孩子能理解这篇论文"解决了一个什么问题"以及"怎么解决的"
40
+
41
+ 论文标题:{title}
42
+
43
+ 论文概述:
44
+ {summary}
45
+ """
46
+
47
+ INNOVATIONS_PROMPT = SYSTEM_ROLE + """
48
+
49
+ 请分析以下论文的核心创新点。要求:
50
+ 1. 用中文列出3-5个核心创新点
51
+ 2. 每个创新点用简洁的一句话概括,然后附上简短说明
52
+ 3. 重点区分"真正的新贡献"和"已有方法的改进"
53
+ 4. 按重要性排序
54
+
55
+ 论文标题:{title}
56
+
57
+ 论文概述:
58
+ {summary}
59
+ """
60
+
61
+ TEST_METRICS_PROMPT = SYSTEM_ROLE + """
62
+
63
+ 请提取以下论文中使用的测试集和评估指标。要求:
64
+ 1. 列出论文使用的主要测试/评估数据集
65
+ 2. 列出主要评估指标及论文报告的具体数值
66
+ 3. 如有对比实验,列出基线方法和本文方法的结果对比
67
+ 4. 用中文撰写,但数据集名称和指标名称保留英文原文
68
+
69
+ 论文标题:{title}
70
+
71
+ 论文概述:
72
+ {summary}
73
+ """
74
+
75
+ WEAKNESSES_PROMPT = SYSTEM_ROLE + """
76
+
77
+ 请分析以下论文的核心缺点和局限性。要求:
78
+ 1. 用中文列出3-5个主要缺点
79
+ 2. 从以下维度分析:方法论的局限性、实验设计的不足、适用范围的限制、潜在的偏差
80
+ 3. 客观公正,既不过分苛刻也不回避问题
81
+ 4. 每个缺点给出具体论据支持
82
+
83
+ 论文标题:{title}
84
+
85
+ 论文概述:
86
+ {summary}
87
+ """
88
+
89
+ FUTURE_DIRECTIONS_PROMPT = SYSTEM_ROLE + """
90
+
91
+ 请基于以下论文的内容,分析其后期可能的研究演进方向。要求:
92
+ 1. 用中文列出3-5个未来研究方向
93
+ 2. 包括论文作者自己提到的方向和你基于领域知识的合理推测
94
+ 3. 每个方向说明:要解决什么问题、可能的思路、预期影响
95
+ 4. 结合当前AI领域的最新发展趋势
96
+
97
+ 论文标题:{title}
98
+
99
+ 论文概述:
100
+ {summary}
101
+ """
@@ -0,0 +1,167 @@
1
+ """Wiki generation orchestration: fetch paper, summarize, assemble."""
2
+
3
+ import re
4
+ import os
5
+ from pathlib import Path
6
+
7
+ from rich.progress import Progress, SpinnerColumn, TextColumn
8
+
9
+ from . import deepxiv_client as dx
10
+ from . import llm
11
+ from . import prompts
12
+
13
+
14
+ def _slug(name: str) -> str:
15
+ """Convert section name to a file-system friendly slug."""
16
+ # Remove leading numbers like "1. " or "Appendix A "
17
+ s = re.sub(r"^(\d+\.?\s*|Appendix\s+[A-Z]\s*)", "", name)
18
+ s = s.strip().lower()
19
+ s = re.sub(r"[^a-z0-9]+", "_", s)
20
+ return s.strip("_") or "section"
21
+
22
+
23
+ def process_paper(arxiv_id: str, output_dir: str = "wiki", model: str | None = None) -> Path:
24
+ """Full pipeline: fetch paper, summarize sections, generate wiki."""
25
+ paper_dir = Path(output_dir) / arxiv_id
26
+ sections_dir = paper_dir / "sections"
27
+ sections_dir.mkdir(parents=True, exist_ok=True)
28
+
29
+ with Progress(
30
+ SpinnerColumn(),
31
+ TextColumn("[progress.description]{task.description}"),
32
+ transient=True,
33
+ ) as progress:
34
+ # 1. Fetch metadata and brief
35
+ t = progress.add_task("Fetching paper metadata...", total=None)
36
+ metadata = dx.get_metadata(arxiv_id)
37
+ brief = dx.get_brief(arxiv_id)
38
+ title = metadata.get("title", arxiv_id)
39
+ raw_authors = metadata.get("authors", [])
40
+ if isinstance(raw_authors, str):
41
+ authors = raw_authors
42
+ elif raw_authors and isinstance(raw_authors[0], dict):
43
+ authors = ", ".join(a["name"] for a in raw_authors)
44
+ else:
45
+ authors = ", ".join(str(a) for a in raw_authors)
46
+ pub_date = metadata.get("publish_at", "")[:10]
47
+ sections_list = metadata.get("sections", [])
48
+ progress.update(t, completed=True)
49
+
50
+ # 2. Download PDF
51
+ progress.update(t, description="Downloading PDF...")
52
+ dx.download_pdf(arxiv_id, str(paper_dir / "paper.pdf"))
53
+ progress.update(t, completed=True)
54
+
55
+ # 3. Summarize each section
56
+ section_summaries: list[tuple[str, str, str]] = [] # (name, filename, summary)
57
+ for i, sec in enumerate(sections_list, 1):
58
+ sec_name = sec["name"]
59
+ progress.update(t, description=f"Summarizing section: {sec_name}...")
60
+ filename = f"{i:02d}_{_slug(sec_name)}.md"
61
+
62
+ content = dx.get_section(arxiv_id, sec_name)
63
+ summary = llm.chat(
64
+ prompts.SECTION_SUMMARY_PROMPT.format(section_name=sec_name, content=content),
65
+ "",
66
+ model=model,
67
+ )
68
+
69
+ sec_path = sections_dir / filename
70
+ sec_path.write_text(f"# {sec_name}\n\n{summary}\n", encoding="utf-8")
71
+ section_summaries.append((sec_name, filename, summary))
72
+
73
+ progress.update(t, completed=True)
74
+
75
+ # 4. Full paper summary from section summaries
76
+ progress.update(t, description="Generating full paper summary...")
77
+ all_sections_text = "\n\n---\n\n".join(
78
+ f"### {name}\n{summary}" for name, _, summary in section_summaries
79
+ )
80
+ full_summary = llm.chat(
81
+ prompts.FULL_SUMMARY_PROMPT.format(title=title, sections=all_sections_text),
82
+ "",
83
+ model=model,
84
+ )
85
+ progress.update(t, completed=True)
86
+
87
+ # 5. Five analysis aspects
88
+ aspects = {}
89
+ aspect_defs = [
90
+ ("eli5", "小学生也能懂", prompts.ELI5_PROMPT),
91
+ ("innovations", "核心创新点", prompts.INNOVATIONS_PROMPT),
92
+ ("test_metrics", "测试集与指标", prompts.TEST_METRICS_PROMPT),
93
+ ("weaknesses", "核心缺点", prompts.WEAKNESSES_PROMPT),
94
+ ("future", "后期演进方向", prompts.FUTURE_DIRECTIONS_PROMPT),
95
+ ]
96
+ for key, label, prompt_tmpl in aspect_defs:
97
+ progress.update(t, description=f"Analyzing: {label}...")
98
+ aspects[key] = llm.chat(
99
+ prompt_tmpl.format(title=title, summary=full_summary),
100
+ "",
101
+ model=model,
102
+ )
103
+ progress.update(t, completed=True)
104
+
105
+ # 6. Assemble README.md
106
+ section_links = "\n".join(
107
+ f"- [{name}](sections/{fname})" for name, fname, _ in section_summaries
108
+ )
109
+
110
+ readme = f"""# {title}
111
+
112
+ > Authors: {authors} | Published: {pub_date} | arXiv: [{arxiv_id}](https://arxiv.org/abs/{arxiv_id})
113
+
114
+ ## 论文概述
115
+
116
+ {full_summary}
117
+
118
+ ## 小学生也能懂
119
+
120
+ {aspects["eli5"]}
121
+
122
+ ## 核心创新点
123
+
124
+ {aspects["innovations"]}
125
+
126
+ ## 测试集与指标
127
+
128
+ {aspects["test_metrics"]}
129
+
130
+ ## 核心缺点
131
+
132
+ {aspects["weaknesses"]}
133
+
134
+ ## 后期演进方向
135
+
136
+ {aspects["future"]}
137
+
138
+ ---
139
+
140
+ ## 章节详解
141
+
142
+ {section_links}
143
+
144
+ ## 原文
145
+
146
+ - [PDF](paper.pdf)
147
+ """
148
+
149
+ readme_path = paper_dir / "README.md"
150
+ readme_path.write_text(readme, encoding="utf-8")
151
+ return paper_dir
152
+
153
+
154
+ def list_papers(output_dir: str = "wiki") -> list[dict]:
155
+ """List all processed papers in the wiki directory."""
156
+ wiki = Path(output_dir)
157
+ if not wiki.exists():
158
+ return []
159
+ papers = []
160
+ for d in sorted(wiki.iterdir()):
161
+ readme = d / "README.md"
162
+ if d.is_dir() and readme.exists():
163
+ # Read first line for title
164
+ first_line = readme.read_text(encoding="utf-8").split("\n")[0]
165
+ title = first_line.lstrip("# ").strip()
166
+ papers.append({"id": d.name, "title": title, "path": str(d)})
167
+ return papers
@@ -0,0 +1,101 @@
1
+ Metadata-Version: 2.4
2
+ Name: paper-wiki
3
+ Version: 0.2.0
4
+ Summary: Deep paper reading wiki: search, summarize, and analyze arXiv papers into a three-layer markdown wiki
5
+ Author: mblank
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/mblank5/paper-wiki
8
+ Project-URL: Repository, https://github.com/mblank5/paper-wiki
9
+ Keywords: arxiv,paper,wiki,summarization,cli
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Environment :: Console
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
+ Requires-Python: >=3.10
19
+ Description-Content-Type: text/markdown
20
+ Requires-Dist: click>=8.0
21
+ Requires-Dist: openai>=1.0
22
+ Requires-Dist: python-dotenv>=1.0
23
+ Requires-Dist: rich>=13.0
24
+ Requires-Dist: requests>=2.28
25
+
26
+ # paper-wiki
27
+
28
+ Deep paper reading wiki: search, summarize, and analyze arXiv papers into a three-layer markdown wiki.
29
+
30
+ ## Features
31
+
32
+ - Search arXiv papers via [deepxiv](https://data.rag.ac.cn) CLI
33
+ - Download paper PDFs
34
+ - Generate per-section Chinese summaries using LLM
35
+ - Produce a full paper analysis with 5 dimensions:
36
+ - ELI5 (小学生也能懂)
37
+ - Core innovations (核心创新点)
38
+ - Test datasets & metrics (测试集与指标)
39
+ - Core weaknesses (核心缺点)
40
+ - Future directions (后期演进方向)
41
+
42
+ ## Three-Layer Wiki
43
+
44
+ ```
45
+ wiki/{arxiv_id}/
46
+ ├── README.md # Layer 1: full summary + 5 analysis aspects
47
+ ├── sections/ # Layer 2: per-section Chinese summaries
48
+ │ ├── 01_introduction.md
49
+ │ ├── 02_method.md
50
+ │ └── ...
51
+ └── paper.pdf # Layer 3: original PDF
52
+ ```
53
+
54
+ ## Install
55
+
56
+ ```bash
57
+ pip install paper-wiki
58
+ ```
59
+
60
+ ### Prerequisites
61
+
62
+ - [deepxiv](https://pypi.org/project/deepxiv/) CLI installed and configured (`pip install deepxiv`)
63
+ - OpenAI-compatible API endpoint
64
+
65
+ ## Quick Start
66
+
67
+ ```bash
68
+ # Configure API
69
+ export OPENAI_API_KEY=sk-xxx
70
+ export OPENAI_BASE_URL=https://api.openai.com/v1
71
+ export OPENAI_MODEL=gpt-4o
72
+
73
+ # Process a paper by arXiv ID
74
+ paper-wiki process 2409.05591
75
+
76
+ # Or by URL
77
+ paper-wiki process https://arxiv.org/pdf/2604.27393
78
+
79
+ # Search and interactively select
80
+ paper-wiki search "RAG long context"
81
+
82
+ # Search and auto-select first result
83
+ paper-wiki search "transformer memory" --first
84
+
85
+ # List processed papers
86
+ paper-wiki list
87
+ ```
88
+
89
+ ## Configuration
90
+
91
+ Create a `.env` file in your working directory:
92
+
93
+ ```env
94
+ OPENAI_API_KEY=sk-xxx
95
+ OPENAI_BASE_URL=https://api.openai.com/v1
96
+ OPENAI_MODEL=gpt-4o
97
+ ```
98
+
99
+ ## License
100
+
101
+ MIT
@@ -0,0 +1,14 @@
1
+ README.md
2
+ pyproject.toml
3
+ paper_wiki/__init__.py
4
+ paper_wiki/cli.py
5
+ paper_wiki/deepxiv_client.py
6
+ paper_wiki/llm.py
7
+ paper_wiki/prompts.py
8
+ paper_wiki/wiki.py
9
+ paper_wiki.egg-info/PKG-INFO
10
+ paper_wiki.egg-info/SOURCES.txt
11
+ paper_wiki.egg-info/dependency_links.txt
12
+ paper_wiki.egg-info/entry_points.txt
13
+ paper_wiki.egg-info/requires.txt
14
+ paper_wiki.egg-info/top_level.txt
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ paper-wiki = paper_wiki.cli:cli
@@ -0,0 +1,5 @@
1
+ click>=8.0
2
+ openai>=1.0
3
+ python-dotenv>=1.0
4
+ rich>=13.0
5
+ requests>=2.28
@@ -0,0 +1 @@
1
+ paper_wiki
@@ -0,0 +1,42 @@
1
+ [project]
2
+ name = "paper-wiki"
3
+ version = "0.2.0"
4
+ description = "Deep paper reading wiki: search, summarize, and analyze arXiv papers into a three-layer markdown wiki"
5
+ readme = "README.md"
6
+ license = "MIT"
7
+ requires-python = ">=3.10"
8
+ authors = [
9
+ { name = "mblank" },
10
+ ]
11
+ keywords = ["arxiv", "paper", "wiki", "summarization", "cli"]
12
+ classifiers = [
13
+ "Development Status :: 3 - Alpha",
14
+ "Environment :: Console",
15
+ "Intended Audience :: Science/Research",
16
+ "Programming Language :: Python :: 3",
17
+ "Programming Language :: Python :: 3.10",
18
+ "Programming Language :: Python :: 3.11",
19
+ "Programming Language :: Python :: 3.12",
20
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
21
+ ]
22
+ dependencies = [
23
+ "click>=8.0",
24
+ "openai>=1.0",
25
+ "python-dotenv>=1.0",
26
+ "rich>=13.0",
27
+ "requests>=2.28",
28
+ ]
29
+
30
+ [project.urls]
31
+ Homepage = "https://github.com/mblank5/paper-wiki"
32
+ Repository = "https://github.com/mblank5/paper-wiki"
33
+
34
+ [project.scripts]
35
+ paper-wiki = "paper_wiki.cli:cli"
36
+
37
+ [build-system]
38
+ requires = ["setuptools>=68.0"]
39
+ build-backend = "setuptools.build_meta"
40
+
41
+ [tool.setuptools.packages.find]
42
+ include = ["paper_wiki*"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+