py-scholar-agent 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. py_scholar_agent-0.1.0/LICENSE +21 -0
  2. py_scholar_agent-0.1.0/MANIFEST.in +5 -0
  3. py_scholar_agent-0.1.0/PKG-INFO +225 -0
  4. py_scholar_agent-0.1.0/README.md +183 -0
  5. py_scholar_agent-0.1.0/pyproject.toml +122 -0
  6. py_scholar_agent-0.1.0/setup.cfg +4 -0
  7. py_scholar_agent-0.1.0/skills/paper-analysis/SKILL.md +80 -0
  8. py_scholar_agent-0.1.0/skills/paper-workflow/SKILL.md +56 -0
  9. py_scholar_agent-0.1.0/src/py_scholar_agent.egg-info/PKG-INFO +225 -0
  10. py_scholar_agent-0.1.0/src/py_scholar_agent.egg-info/SOURCES.txt +121 -0
  11. py_scholar_agent-0.1.0/src/py_scholar_agent.egg-info/dependency_links.txt +1 -0
  12. py_scholar_agent-0.1.0/src/py_scholar_agent.egg-info/entry_points.txt +2 -0
  13. py_scholar_agent-0.1.0/src/py_scholar_agent.egg-info/requires.txt +20 -0
  14. py_scholar_agent-0.1.0/src/py_scholar_agent.egg-info/top_level.txt +1 -0
  15. py_scholar_agent-0.1.0/src/scholar_agent/__init__.py +12 -0
  16. py_scholar_agent-0.1.0/src/scholar_agent/adapters/__init__.py +1 -0
  17. py_scholar_agent-0.1.0/src/scholar_agent/adapters/mcp_server.py +7 -0
  18. py_scholar_agent-0.1.0/src/scholar_agent/cli.py +904 -0
  19. py_scholar_agent-0.1.0/src/scholar_agent/config/__init__.py +12 -0
  20. py_scholar_agent-0.1.0/src/scholar_agent/config/loader.py +158 -0
  21. py_scholar_agent-0.1.0/src/scholar_agent/config/manager.py +104 -0
  22. py_scholar_agent-0.1.0/src/scholar_agent/config/paths.py +92 -0
  23. py_scholar_agent-0.1.0/src/scholar_agent/config/profiles.py +21 -0
  24. py_scholar_agent-0.1.0/src/scholar_agent/config_data/config.yaml +1 -0
  25. py_scholar_agent-0.1.0/src/scholar_agent/engine/__init__.py +1 -0
  26. py_scholar_agent-0.1.0/src/scholar_agent/engine/academic/__init__.py +0 -0
  27. py_scholar_agent-0.1.0/src/scholar_agent/engine/academic/arxiv_search.py +582 -0
  28. py_scholar_agent-0.1.0/src/scholar_agent/engine/academic/conf_search.py +510 -0
  29. py_scholar_agent-0.1.0/src/scholar_agent/engine/academic/daily_workflow.py +506 -0
  30. py_scholar_agent-0.1.0/src/scholar_agent/engine/academic/image_extractor.py +412 -0
  31. py_scholar_agent-0.1.0/src/scholar_agent/engine/academic/innovation_scorer.py +235 -0
  32. py_scholar_agent-0.1.0/src/scholar_agent/engine/academic/note_linker.py +391 -0
  33. py_scholar_agent-0.1.0/src/scholar_agent/engine/academic/paper_analyzer.py +1029 -0
  34. py_scholar_agent-0.1.0/src/scholar_agent/engine/academic/scoring.py +379 -0
  35. py_scholar_agent-0.1.0/src/scholar_agent/engine/agent.py +442 -0
  36. py_scholar_agent-0.1.0/src/scholar_agent/engine/bm25.py +140 -0
  37. py_scholar_agent-0.1.0/src/scholar_agent/engine/build_answer_context.py +130 -0
  38. py_scholar_agent-0.1.0/src/scholar_agent/engine/build_evidence_pack.py +106 -0
  39. py_scholar_agent-0.1.0/src/scholar_agent/engine/build_graph.py +152 -0
  40. py_scholar_agent-0.1.0/src/scholar_agent/engine/cache_helper.py +137 -0
  41. py_scholar_agent-0.1.0/src/scholar_agent/engine/close_knowledge_loop.py +846 -0
  42. py_scholar_agent-0.1.0/src/scholar_agent/engine/common.py +309 -0
  43. py_scholar_agent-0.1.0/src/scholar_agent/engine/distill_knowledge.py +98 -0
  44. py_scholar_agent-0.1.0/src/scholar_agent/engine/domain_router.py +728 -0
  45. py_scholar_agent-0.1.0/src/scholar_agent/engine/embedding_retrieve.py +260 -0
  46. py_scholar_agent-0.1.0/src/scholar_agent/engine/exceptions.py +31 -0
  47. py_scholar_agent-0.1.0/src/scholar_agent/engine/inputs/__init__.py +0 -0
  48. py_scholar_agent-0.1.0/src/scholar_agent/engine/inputs/external_candidates.py +51 -0
  49. py_scholar_agent-0.1.0/src/scholar_agent/engine/knowledge_governance.py +321 -0
  50. py_scholar_agent-0.1.0/src/scholar_agent/engine/knowledge_lifecycle.py +232 -0
  51. py_scholar_agent-0.1.0/src/scholar_agent/engine/local_index.py +318 -0
  52. py_scholar_agent-0.1.0/src/scholar_agent/engine/local_retrieve.py +210 -0
  53. py_scholar_agent-0.1.0/src/scholar_agent/engine/migrate_hierarchy.py +124 -0
  54. py_scholar_agent-0.1.0/src/scholar_agent/engine/normalizers/__init__.py +0 -0
  55. py_scholar_agent-0.1.0/src/scholar_agent/engine/normalizers/evidence_normalizer.py +232 -0
  56. py_scholar_agent-0.1.0/src/scholar_agent/engine/orchestrate_research.py +218 -0
  57. py_scholar_agent-0.1.0/src/scholar_agent/engine/promote_draft.py +132 -0
  58. py_scholar_agent-0.1.0/src/scholar_agent/engine/render_answer_bundle.py +143 -0
  59. py_scholar_agent-0.1.0/src/scholar_agent/engine/research_harness.py +643 -0
  60. py_scholar_agent-0.1.0/src/scholar_agent/engine/retry.py +63 -0
  61. py_scholar_agent-0.1.0/src/scholar_agent/engine/run_eval.py +254 -0
  62. py_scholar_agent-0.1.0/src/scholar_agent/engine/run_pipeline.py +198 -0
  63. py_scholar_agent-0.1.0/src/scholar_agent/engine/scholar_config.py +165 -0
  64. py_scholar_agent-0.1.0/src/scholar_agent/engine/search_pipeline.py +224 -0
  65. py_scholar_agent-0.1.0/src/scholar_agent/engine/search_providers/__init__.py +0 -0
  66. py_scholar_agent-0.1.0/src/scholar_agent/engine/search_providers/arxiv_provider.py +303 -0
  67. py_scholar_agent-0.1.0/src/scholar_agent/engine/search_providers/base.py +29 -0
  68. py_scholar_agent-0.1.0/src/scholar_agent/engine/search_providers/self_hosted_provider.py +129 -0
  69. py_scholar_agent-0.1.0/src/scholar_agent/engine/synthesize_answer.py +302 -0
  70. py_scholar_agent-0.1.0/src/scholar_agent/installers/__init__.py +1 -0
  71. py_scholar_agent-0.1.0/src/scholar_agent/installers/claude.py +259 -0
  72. py_scholar_agent-0.1.0/src/scholar_agent/installers/common.py +107 -0
  73. py_scholar_agent-0.1.0/src/scholar_agent/installers/opencode.py +93 -0
  74. py_scholar_agent-0.1.0/src/scholar_agent/installers/vscode.py +94 -0
  75. py_scholar_agent-0.1.0/src/scholar_agent/runtime.py +526 -0
  76. py_scholar_agent-0.1.0/src/scholar_agent/schemas/answer.schema.json +110 -0
  77. py_scholar_agent-0.1.0/src/scholar_agent/schemas/domain_routing_guide.md +105 -0
  78. py_scholar_agent-0.1.0/src/scholar_agent/schemas/domain_routing_policy.json +126 -0
  79. py_scholar_agent-0.1.0/src/scholar_agent/schemas/domain_routing_policy.template.json +26 -0
  80. py_scholar_agent-0.1.0/src/scholar_agent/schemas/evidence.schema.json +146 -0
  81. py_scholar_agent-0.1.0/src/scholar_agent/schemas/routing_skill.md +68 -0
  82. py_scholar_agent-0.1.0/src/scholar_agent/server.py +1443 -0
  83. py_scholar_agent-0.1.0/src/scholar_agent/skills/scholar-agent/SKILL.md +94 -0
  84. py_scholar_agent-0.1.0/src/scholar_agent/skills/scholar-agent/references/path-policy.md +24 -0
  85. py_scholar_agent-0.1.0/src/scholar_agent/skills/scholar-agent/references/quality-gates.md +69 -0
  86. py_scholar_agent-0.1.0/src/scholar_agent/skills/scholar-agent/references/workflow.md +30 -0
  87. py_scholar_agent-0.1.0/src/scholar_agent/skills/scholar-agent/scripts/normalize_note_location.py +8 -0
  88. py_scholar_agent-0.1.0/src/scholar_agent/skills/scholar-agent/scripts/validate_note.py +8 -0
  89. py_scholar_agent-0.1.0/src/scholar_agent/templates/__init__.py +29 -0
  90. py_scholar_agent-0.1.0/src/scholar_agent/templates/paper-en.md +196 -0
  91. py_scholar_agent-0.1.0/src/scholar_agent/templates/paper-zh.md +204 -0
  92. py_scholar_agent-0.1.0/src/scholar_agent/validation/__init__.py +6 -0
  93. py_scholar_agent-0.1.0/src/scholar_agent/validation/normalize_note.py +123 -0
  94. py_scholar_agent-0.1.0/src/scholar_agent/validation/validate_note.py +445 -0
  95. py_scholar_agent-0.1.0/tests/test_academic.py +642 -0
  96. py_scholar_agent-0.1.0/tests/test_agent.py +147 -0
  97. py_scholar_agent-0.1.0/tests/test_answer_context.py +174 -0
  98. py_scholar_agent-0.1.0/tests/test_bm25_retrieval.py +225 -0
  99. py_scholar_agent-0.1.0/tests/test_build_graph.py +77 -0
  100. py_scholar_agent-0.1.0/tests/test_common.py +253 -0
  101. py_scholar_agent-0.1.0/tests/test_distill_knowledge.py +92 -0
  102. py_scholar_agent-0.1.0/tests/test_domain_router.py +129 -0
  103. py_scholar_agent-0.1.0/tests/test_domain_seed_cards.py +61 -0
  104. py_scholar_agent-0.1.0/tests/test_external_candidates.py +82 -0
  105. py_scholar_agent-0.1.0/tests/test_hybrid_evidence_pack.py +104 -0
  106. py_scholar_agent-0.1.0/tests/test_incremental_index.py +208 -0
  107. py_scholar_agent-0.1.0/tests/test_input_validation.py +130 -0
  108. py_scholar_agent-0.1.0/tests/test_knowledge_lifecycle.py +231 -0
  109. py_scholar_agent-0.1.0/tests/test_knowledge_scaffold.py +20 -0
  110. py_scholar_agent-0.1.0/tests/test_local_index.py +106 -0
  111. py_scholar_agent-0.1.0/tests/test_local_rag_smoke.py +64 -0
  112. py_scholar_agent-0.1.0/tests/test_local_retrieve.py +66 -0
  113. py_scholar_agent-0.1.0/tests/test_local_synthesis_and_loop.py +338 -0
  114. py_scholar_agent-0.1.0/tests/test_mcp_server.py +294 -0
  115. py_scholar_agent-0.1.0/tests/test_multi_perspective.py +109 -0
  116. py_scholar_agent-0.1.0/tests/test_paper_analyzer_fill.py +527 -0
  117. py_scholar_agent-0.1.0/tests/test_pipeline_and_eval.py +170 -0
  118. py_scholar_agent-0.1.0/tests/test_promote_draft.py +190 -0
  119. py_scholar_agent-0.1.0/tests/test_render_answer_bundle.py +86 -0
  120. py_scholar_agent-0.1.0/tests/test_research_orchestrator.py +188 -0
  121. py_scholar_agent-0.1.0/tests/test_roundtrip_accumulation.py +164 -0
  122. py_scholar_agent-0.1.0/tests/test_search_pipeline.py +416 -0
  123. py_scholar_agent-0.1.0/tests/test_synthesize_answer.py +125 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 zfy465914233
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,5 @@
1
+ include .scholar.json
2
+ include schemas/answer.schema.json
3
+ recursive-include templates *
4
+ recursive-include skills *
5
+ recursive-include scholar_agent *
@@ -0,0 +1,225 @@
1
+ Metadata-Version: 2.4
2
+ Name: py-scholar-agent
3
+ Version: 0.1.0
4
+ Summary: Knowledge flywheel MCP server + academic paper research pipeline for Claude Code and VS Code Copilot
5
+ Author-email: Fangyi Zhou <zfy465914233@gmail.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/zfy465914233/scholar-agent
8
+ Project-URL: Repository, https://github.com/zfy465914233/scholar-agent
9
+ Project-URL: Issues, https://github.com/zfy465914233/scholar-agent/issues
10
+ Project-URL: Changelog, https://github.com/zfy465914233/scholar-agent/blob/main/CHANGELOG.md
11
+ Keywords: mcp,knowledge-management,arxiv,academic-research,obsidian
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Classifier: Topic :: Education
21
+ Classifier: Typing :: Typed
22
+ Requires-Python: >=3.10
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: jsonschema<5,>=4.23
26
+ Requires-Dist: fastmcp>=2.0
27
+ Requires-Dist: PyMuPDF>=1.24.0
28
+ Requires-Dist: requests>=2.28
29
+ Provides-Extra: academic
30
+ Requires-Dist: PyYAML>=6.0; extra == "academic"
31
+ Provides-Extra: embedding
32
+ Requires-Dist: sentence-transformers>=3.0; extra == "embedding"
33
+ Provides-Extra: dev
34
+ Requires-Dist: pytest>=7.0; extra == "dev"
35
+ Requires-Dist: pytest-cov>=5.0; extra == "dev"
36
+ Requires-Dist: ruff>=0.8; extra == "dev"
37
+ Requires-Dist: mypy>=1.10; extra == "dev"
38
+ Requires-Dist: pre-commit>=3.7; extra == "dev"
39
+ Provides-Extra: all
40
+ Requires-Dist: scholar-agent[academic,embedding]; extra == "all"
41
+ Dynamic: license-file
42
+
43
+ <p align="center">
44
+ <img src="assets/banner.svg" alt="Scholar Agent" width="720" />
45
+ </p>
46
+
47
+ <p align="center">
48
+ <img src="https://img.shields.io/badge/python-3.10+-blue.svg" alt="Python 3.10+" />
49
+ <img src="https://img.shields.io/badge/License-MIT-yellow.svg" alt="License: MIT" />
50
+ <img src="https://img.shields.io/badge/MCP-Ready-brightgreen.svg" alt="MCP Ready" />
51
+ <img src="https://img.shields.io/badge/tests-276%20passing-brightgreen.svg" alt="Tests" />
52
+ <img src="https://img.shields.io/pypi/v/scholar-agent?color=blue" alt="PyPI" />
53
+ </p>
54
+
55
+ <p align="center">
56
+ <a href="README.zh-CN.md">中文</a>
57
+ </p>
58
+
59
+ <p align="center">
60
+ <strong>AI that gets smarter in YOUR domain — every question compounds.</strong><br/>
61
+ Online research + local knowledge accumulation + MCP integration for Claude Code & VS Code Copilot.
62
+ </p>
63
+
64
+ ---
65
+
66
+ ## Demo
67
+
68
+ <p align="center">
69
+ <img src="assets/demo.gif" alt="Scholar Agent Demo" width="720" />
70
+ </p>
71
+
72
+ <p align="center"><sub>Ask → Research → Accumulate → Next question hits local cache instantly</sub></p>
73
+
74
+ <p align="center">
75
+ <img src="assets/flywheel.svg" alt="Knowledge Flywheel" width="720" />
76
+ </p>
77
+
78
+ Each round compounds. Knowledge cards have full lifecycle management: **draft → reviewed → trusted → stale → deprecated**.
79
+
80
+ ---
81
+
82
+ ## Why Scholar Agent?
83
+
84
+ | | ChatGPT / Claude | Obsidian + Plugins | Zotero | **Scholar Agent** |
85
+ |---|---|---|---|---|
86
+ | Domain knowledge accumulates | No — every chat starts fresh | Manual curation | Bibliography only | **Automatic — every query compounds** |
87
+ | Structured research with citations | Sometimes | Manual | Partial | **Yes — structured synthesis + confidence scores** |
88
+ | Academic paper pipeline | Limited | Via plugins | Yes | **Search → Score → Analyze → Extract → Recommend** |
89
+ | Works offline | No | Yes | Partial | **Yes — local BM25 index, falls back gracefully** |
90
+ | Human-readable knowledge base | Chat history | Markdown files | PDF library | **Obsidian-compatible Markdown + wiki-links** |
91
+ | Integrates with your IDE | No | No | Partial | **MCP server for Claude Code, VS Code Copilot, OpenCode** |
92
+ | Knowledge quality gates | None | None | None | **Lifecycle management + quality scoring + validation** |
93
+
94
+ ---
95
+
96
+ ## Quick Start
97
+
98
+ ### Install
99
+
100
+ ```bash
101
+ pip install py-scholar-agent
102
+ ```
103
+
104
+ Or from source:
105
+
106
+ ```bash
107
+ git clone https://github.com/zfy465914233/scholar-agent.git
108
+ cd scholar-agent
109
+ pip install -e .
110
+ ```
111
+
112
+ ### Setup
113
+
114
+ ```bash
115
+ scholar-agent init
116
+ ```
117
+
118
+ One command creates data directories, writes config, and registers MCP with Claude Code. You're done.
119
+
120
+ ### Modes
121
+
122
+ | Mode | Command | Data Location | Scope |
123
+ |------|---------|---------------|-------|
124
+ | **Global** (recommended) | `scholar-agent init` | `~/scholar/` | Every project |
125
+ | **Project-Local** | `SCHOLAR_HOME=./scholar scholar-agent init` | `my-project/scholar/` | Current project only |
126
+ | **Docker** | `docker run -v ~/scholar:/data scholar-agent serve-mcp` | Container volume | Isolated |
127
+
128
+ ---
129
+
130
+ ## What It Does
131
+
132
+ ### Knowledge Flywheel
133
+
134
+ Ask a question → Scholar Agent researches (web + academic APIs) → synthesizes a structured answer with citations → saves as a knowledge card → indexes it with BM25. Next time a similar question comes up, it hits the local index first — **fast, accurate, and free**.
135
+
136
+ ### Academic Research Pipeline
137
+
138
+ - **Paper Search** — arXiv, DBLP, Semantic Scholar. Filter by 10+ top conferences
139
+ - **Smart Scoring** — 4-dimensional engine (relevance, recency, popularity, quality)
140
+ - **Deep Analysis** — 20+ section Obsidian-style notes with AI-assisted completion
141
+ - **Figure Extraction** — From arXiv source archives and PDFs
142
+ - **Daily Recommendations** — Automated search, scoring, deduplication
143
+ - **Paper → Knowledge Card** — Feed analyses back into the flywheel
144
+ - **Keyword Auto-Linking** — `[[wiki-links]]` across all notes
145
+
146
+ ### MCP Tools
147
+
148
+ **Core** (always available): `query_knowledge` · `save_research` · `list_knowledge` · `capture_answer` · `ingest_source` · `build_graph`
149
+
150
+ **Academic** (set `SCHOLAR_ACADEMIC=1`): `search_papers` · `search_conf_papers` · `download_paper` · `analyze_paper` · `extract_paper_images` · `paper_to_card` · `daily_recommend` · `link_paper_keywords`
151
+
152
+ ---
153
+
154
+ ## CLI Reference
155
+
156
+ | Command | Description |
157
+ |---------|-------------|
158
+ | `scholar-agent init` | One-command setup: data dirs + config + MCP registration |
159
+ | `scholar-agent serve-mcp` | Start the MCP server |
160
+ | `scholar-agent doctor` | Show environment and config diagnostics |
161
+ | `scholar-agent config show` | Show resolved configuration |
162
+ | `scholar-agent install claude --write` | Register MCP with Claude Code |
163
+ | `scholar-agent install vscode --write` | Register MCP with VS Code Copilot |
164
+ | `scholar-agent install opencode --write` | Register MCP with OpenCode |
165
+
166
+ ---
167
+
168
+ ## Configuration
169
+
170
+ ### Environment Variables
171
+
172
+ | Variable | Required | Description |
173
+ |----------|----------|-------------|
174
+ | `SCHOLAR_ACADEMIC` | No | Set to `1` to enable academic tools |
175
+ | `SCHOLAR_HOME` | No | Override data directory (default: `~/scholar/`) |
176
+ | `S2_API_KEY` | No | Semantic Scholar API key ([get one free](https://api.semanticscholar.org/)) |
177
+ | `LLM_API_KEY` | No | LLM API key for advanced synthesis pipeline |
178
+
179
+ ### Config File
180
+
181
+ See [`.scholar.example.json`](.scholar.example.json) for a full example with comments. Key sections:
182
+
183
+ - `knowledge_dir` — Knowledge cards directory
184
+ - `index_path` — BM25 search index
185
+ - `academic.research_interests` — Your domains, keywords, arXiv categories
186
+ - `academic.scoring` — Paper scoring weights
187
+
188
+ ---
189
+
190
+ ## Recommended Workflow
191
+
192
+ For best paper analysis quality:
193
+
194
+ 1. **Download**: `download_paper("2510.24701", title="Paper Title", domain="LLM")`
195
+ 2. **Extract images**: `extract_paper_images("2510.24701")`
196
+ 3. **Deep analysis**: `analyze_paper(paper_json)`
197
+
198
+ > Downloading the PDF first enables full-text extraction, producing notes with specific data, formulas, and experimental results.
199
+
200
+ ---
201
+
202
+ ## Development
203
+
204
+ ```bash
205
+ make dev # Install with dev dependencies + pre-commit hooks
206
+ make lint # Run ruff + mypy
207
+ make test # Run test suite (276 tests, ~7s, fully offline)
208
+ make coverage # Run tests with coverage report
209
+ make build # Build distribution package
210
+ make docker # Build Docker image
211
+ ```
212
+
213
+ See [CONTRIBUTING.md](CONTRIBUTING.md) for detailed guidelines.
214
+
215
+ ## Features at a Glance
216
+
217
+ - **Multi-perspective research** — 5 perspectives (academic, technical, applied, contrarian, historical)
218
+ - **Obsidian compatible** — Markdown + YAML frontmatter + `[[wiki-links]]`
219
+ - **Knowledge governance** — Validate frontmatter, detect orphans, find duplicates, manage lifecycle
220
+ - **Provider fault tolerance** — Each source fails independently; graceful offline fallback
221
+ - **Cross-platform** — macOS, Linux, Windows
222
+
223
+ ## License
224
+
225
+ MIT — see [LICENSE](LICENSE).
@@ -0,0 +1,183 @@
1
+ <p align="center">
2
+ <img src="assets/banner.svg" alt="Scholar Agent" width="720" />
3
+ </p>
4
+
5
+ <p align="center">
6
+ <img src="https://img.shields.io/badge/python-3.10+-blue.svg" alt="Python 3.10+" />
7
+ <img src="https://img.shields.io/badge/License-MIT-yellow.svg" alt="License: MIT" />
8
+ <img src="https://img.shields.io/badge/MCP-Ready-brightgreen.svg" alt="MCP Ready" />
9
+ <img src="https://img.shields.io/badge/tests-276%20passing-brightgreen.svg" alt="Tests" />
10
+ <img src="https://img.shields.io/pypi/v/scholar-agent?color=blue" alt="PyPI" />
11
+ </p>
12
+
13
+ <p align="center">
14
+ <a href="README.zh-CN.md">中文</a>
15
+ </p>
16
+
17
+ <p align="center">
18
+ <strong>AI that gets smarter in YOUR domain — every question compounds.</strong><br/>
19
+ Online research + local knowledge accumulation + MCP integration for Claude Code & VS Code Copilot.
20
+ </p>
21
+
22
+ ---
23
+
24
+ ## Demo
25
+
26
+ <p align="center">
27
+ <img src="assets/demo.gif" alt="Scholar Agent Demo" width="720" />
28
+ </p>
29
+
30
+ <p align="center"><sub>Ask → Research → Accumulate → Next question hits local cache instantly</sub></p>
31
+
32
+ <p align="center">
33
+ <img src="assets/flywheel.svg" alt="Knowledge Flywheel" width="720" />
34
+ </p>
35
+
36
+ Each round compounds. Knowledge cards have full lifecycle management: **draft → reviewed → trusted → stale → deprecated**.
37
+
38
+ ---
39
+
40
+ ## Why Scholar Agent?
41
+
42
+ | | ChatGPT / Claude | Obsidian + Plugins | Zotero | **Scholar Agent** |
43
+ |---|---|---|---|---|
44
+ | Domain knowledge accumulates | No — every chat starts fresh | Manual curation | Bibliography only | **Automatic — every query compounds** |
45
+ | Structured research with citations | Sometimes | Manual | Partial | **Yes — structured synthesis + confidence scores** |
46
+ | Academic paper pipeline | Limited | Via plugins | Yes | **Search → Score → Analyze → Extract → Recommend** |
47
+ | Works offline | No | Yes | Partial | **Yes — local BM25 index, falls back gracefully** |
48
+ | Human-readable knowledge base | Chat history | Markdown files | PDF library | **Obsidian-compatible Markdown + wiki-links** |
49
+ | Integrates with your IDE | No | No | Partial | **MCP server for Claude Code, VS Code Copilot, OpenCode** |
50
+ | Knowledge quality gates | None | None | None | **Lifecycle management + quality scoring + validation** |
51
+
52
+ ---
53
+
54
+ ## Quick Start
55
+
56
+ ### Install
57
+
58
+ ```bash
59
+ pip install py-scholar-agent
60
+ ```
61
+
62
+ Or from source:
63
+
64
+ ```bash
65
+ git clone https://github.com/zfy465914233/scholar-agent.git
66
+ cd scholar-agent
67
+ pip install -e .
68
+ ```
69
+
70
+ ### Setup
71
+
72
+ ```bash
73
+ scholar-agent init
74
+ ```
75
+
76
+ One command creates data directories, writes config, and registers MCP with Claude Code. You're done.
77
+
78
+ ### Modes
79
+
80
+ | Mode | Command | Data Location | Scope |
81
+ |------|---------|---------------|-------|
82
+ | **Global** (recommended) | `scholar-agent init` | `~/scholar/` | Every project |
83
+ | **Project-Local** | `SCHOLAR_HOME=./scholar scholar-agent init` | `my-project/scholar/` | Current project only |
84
+ | **Docker** | `docker run -v ~/scholar:/data scholar-agent serve-mcp` | Container volume | Isolated |
85
+
86
+ ---
87
+
88
+ ## What It Does
89
+
90
+ ### Knowledge Flywheel
91
+
92
+ Ask a question → Scholar Agent researches (web + academic APIs) → synthesizes a structured answer with citations → saves as a knowledge card → indexes it with BM25. Next time a similar question comes up, it hits the local index first — **fast, accurate, and free**.
93
+
94
+ ### Academic Research Pipeline
95
+
96
+ - **Paper Search** — arXiv, DBLP, Semantic Scholar. Filter by 10+ top conferences
97
+ - **Smart Scoring** — 4-dimensional engine (relevance, recency, popularity, quality)
98
+ - **Deep Analysis** — 20+ section Obsidian-style notes with AI-assisted completion
99
+ - **Figure Extraction** — From arXiv source archives and PDFs
100
+ - **Daily Recommendations** — Automated search, scoring, deduplication
101
+ - **Paper → Knowledge Card** — Feed analyses back into the flywheel
102
+ - **Keyword Auto-Linking** — `[[wiki-links]]` across all notes
103
+
104
+ ### MCP Tools
105
+
106
+ **Core** (always available): `query_knowledge` · `save_research` · `list_knowledge` · `capture_answer` · `ingest_source` · `build_graph`
107
+
108
+ **Academic** (set `SCHOLAR_ACADEMIC=1`): `search_papers` · `search_conf_papers` · `download_paper` · `analyze_paper` · `extract_paper_images` · `paper_to_card` · `daily_recommend` · `link_paper_keywords`
109
+
110
+ ---
111
+
112
+ ## CLI Reference
113
+
114
+ | Command | Description |
115
+ |---------|-------------|
116
+ | `scholar-agent init` | One-command setup: data dirs + config + MCP registration |
117
+ | `scholar-agent serve-mcp` | Start the MCP server |
118
+ | `scholar-agent doctor` | Show environment and config diagnostics |
119
+ | `scholar-agent config show` | Show resolved configuration |
120
+ | `scholar-agent install claude --write` | Register MCP with Claude Code |
121
+ | `scholar-agent install vscode --write` | Register MCP with VS Code Copilot |
122
+ | `scholar-agent install opencode --write` | Register MCP with OpenCode |
123
+
124
+ ---
125
+
126
+ ## Configuration
127
+
128
+ ### Environment Variables
129
+
130
+ | Variable | Required | Description |
131
+ |----------|----------|-------------|
132
+ | `SCHOLAR_ACADEMIC` | No | Set to `1` to enable academic tools |
133
+ | `SCHOLAR_HOME` | No | Override data directory (default: `~/scholar/`) |
134
+ | `S2_API_KEY` | No | Semantic Scholar API key ([get one free](https://api.semanticscholar.org/)) |
135
+ | `LLM_API_KEY` | No | LLM API key for advanced synthesis pipeline |
136
+
137
+ ### Config File
138
+
139
+ See [`.scholar.example.json`](.scholar.example.json) for a full example with comments. Key sections:
140
+
141
+ - `knowledge_dir` — Knowledge cards directory
142
+ - `index_path` — BM25 search index
143
+ - `academic.research_interests` — Your domains, keywords, arXiv categories
144
+ - `academic.scoring` — Paper scoring weights
145
+
146
+ ---
147
+
148
+ ## Recommended Workflow
149
+
150
+ For best paper analysis quality:
151
+
152
+ 1. **Download**: `download_paper("2510.24701", title="Paper Title", domain="LLM")`
153
+ 2. **Extract images**: `extract_paper_images("2510.24701")`
154
+ 3. **Deep analysis**: `analyze_paper(paper_json)`
155
+
156
+ > Downloading the PDF first enables full-text extraction, producing notes with specific data, formulas, and experimental results.
157
+
158
+ ---
159
+
160
+ ## Development
161
+
162
+ ```bash
163
+ make dev # Install with dev dependencies + pre-commit hooks
164
+ make lint # Run ruff + mypy
165
+ make test # Run test suite (276 tests, ~7s, fully offline)
166
+ make coverage # Run tests with coverage report
167
+ make build # Build distribution package
168
+ make docker # Build Docker image
169
+ ```
170
+
171
+ See [CONTRIBUTING.md](CONTRIBUTING.md) for detailed guidelines.
172
+
173
+ ## Features at a Glance
174
+
175
+ - **Multi-perspective research** — 5 perspectives (academic, technical, applied, contrarian, historical)
176
+ - **Obsidian compatible** — Markdown + YAML frontmatter + `[[wiki-links]]`
177
+ - **Knowledge governance** — Validate frontmatter, detect orphans, find duplicates, manage lifecycle
178
+ - **Provider fault tolerance** — Each source fails independently; graceful offline fallback
179
+ - **Cross-platform** — macOS, Linux, Windows
180
+
181
+ ## License
182
+
183
+ MIT — see [LICENSE](LICENSE).
@@ -0,0 +1,122 @@
1
+ [project]
2
+ name = "py-scholar-agent"
3
+ version = "0.1.0"
4
+ description = "Knowledge flywheel MCP server + academic paper research pipeline for Claude Code and VS Code Copilot"
5
+ readme = "README.md"
6
+ license = "MIT"
7
+ requires-python = ">=3.10"
8
+ authors = [
9
+ {name = "Fangyi Zhou", email = "zfy465914233@gmail.com"},
10
+ ]
11
+ keywords = ["mcp", "knowledge-management", "arxiv", "academic-research", "obsidian"]
12
+ classifiers = [
13
+ "Development Status :: 3 - Alpha",
14
+ "Intended Audience :: Science/Research",
15
+ "Programming Language :: Python :: 3",
16
+ "Programming Language :: Python :: 3.10",
17
+ "Programming Language :: Python :: 3.11",
18
+ "Programming Language :: Python :: 3.12",
19
+ "Programming Language :: Python :: 3.13",
20
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
21
+ "Topic :: Education",
22
+ "Typing :: Typed",
23
+ ]
24
+
25
+ dependencies = [
26
+ "jsonschema>=4.23,<5",
27
+ "fastmcp>=2.0",
28
+ "PyMuPDF>=1.24.0",
29
+ "requests>=2.28",
30
+ ]
31
+
32
+ [project.optional-dependencies]
33
+ academic = [
34
+ "PyYAML>=6.0",
35
+ ]
36
+ embedding = [
37
+ "sentence-transformers>=3.0",
38
+ ]
39
+ dev = [
40
+ "pytest>=7.0",
41
+ "pytest-cov>=5.0",
42
+ "ruff>=0.8",
43
+ "mypy>=1.10",
44
+ "pre-commit>=3.7",
45
+ ]
46
+ all = [
47
+ "scholar-agent[academic,embedding]",
48
+ ]
49
+
50
+ [project.scripts]
51
+ scholar-agent = "scholar_agent.cli:main"
52
+
53
+ [project.urls]
54
+ Homepage = "https://github.com/zfy465914233/scholar-agent"
55
+ Repository = "https://github.com/zfy465914233/scholar-agent"
56
+ Issues = "https://github.com/zfy465914233/scholar-agent/issues"
57
+ Changelog = "https://github.com/zfy465914233/scholar-agent/blob/main/CHANGELOG.md"
58
+
59
+ [build-system]
60
+ requires = ["setuptools>=68.0"]
61
+ build-backend = "setuptools.build_meta"
62
+
63
+ [tool.setuptools.packages.find]
64
+ where = ["src"]
65
+ include = ["scholar_agent*"]
66
+
67
+ [tool.setuptools.package-data]
68
+ scholar_agent = ["schemas/**", "templates/**", "config_data/**", "skills/**", "validation/**"]
69
+
70
+ [tool.pytest.ini_options]
71
+ testpaths = ["tests"]
72
+ addopts = "--tb=short -q"
73
+
74
+ [tool.ruff]
75
+ target-version = "py310"
76
+ line-length = 120
77
+ src = ["src", "tests"]
78
+
79
+ [tool.ruff.lint]
80
+ select = [
81
+ "E", # pycodestyle errors
82
+ "W", # pycodestyle warnings
83
+ "F", # pyflakes
84
+ "I", # isort
85
+ "UP", # pyupgrade
86
+ "B", # flake8-bugbear
87
+ "SIM", # flake8-simplify
88
+ "TCH", # flake8-type-checking
89
+ "RUF", # ruff-specific
90
+ ]
91
+ ignore = [
92
+ "E501", # line too long (handled by formatter)
93
+ "E402", # module-level import not at top (common in scripts)
94
+ "RUF001", # ambiguous unicode character in string (Chinese content)
95
+ "RUF002", # ambiguous unicode character in docstring (Chinese content)
96
+ ]
97
+
98
+ [tool.ruff.lint.isort]
99
+ known-first-party = ["scholar_agent"]
100
+
101
+ [tool.mypy]
102
+ python_version = "3.10"
103
+ warn_return_any = true
104
+ warn_unused_configs = true
105
+ disallow_untyped_defs = false
106
+ check_untyped_defs = true
107
+ no_implicit_optional = true
108
+ warn_redundant_casts = true
109
+ exclude = ["tests/", "scripts/"]
110
+
111
+ [[tool.mypy.overrides]]
112
+ module = ["fitz.*", "pymupdf.*", "fastmcp.*", "sentence_transformers.*"]
113
+ ignore_missing_imports = true
114
+
115
+ [tool.coverage.run]
116
+ source = ["scholar_agent"]
117
+ omit = ["tests/*"]
118
+
119
+ [tool.coverage.report]
120
+ show_missing = true
121
+ skip_covered = true
122
+ fail_under = 60
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,80 @@
1
+ ---
2
+ name: paper-analysis
3
+ description: "Deep analysis of a single paper: download PDF, extract figures, generate structured 20+ section note, fill LLM placeholders."
4
+ allowed-tools: Bash
5
+ ---
6
+
7
+ # Paper Analysis Skill
8
+
9
+ Generate a comprehensive analysis note for a single paper.
10
+
11
+ ## Step 1: Download the paper
12
+
13
+ ```
14
+ download_paper(paper_id="2510.24701", title="Paper Title", domain="LLM")
15
+ ```
16
+
17
+ This caches the PDF locally under `paper-notes/<title>/`.
18
+
19
+ ## Step 2: Extract figures
20
+
21
+ ```
22
+ extract_paper_images(paper_id="2510.24701")
23
+ ```
24
+
25
+ Auto-detects the local PDF. Images go to `paper-notes/<title>/images/`.
26
+
27
+ ## Step 3: Generate the analysis note
28
+
29
+ ```
30
+ analyze_paper(paper_json='{"title": "...", "authors": [...], "arxiv_id": "..."}', language="zh")
31
+ ```
32
+
33
+ Returns:
34
+ - `note_path`: path to the generated markdown note
35
+ - `quality_check`: whether the note has unfilled placeholders
36
+ - `pdf_text`: full text extracted from the PDF (if available)
37
+ - `instructions`: if placeholders exist, instructions to fill them
38
+
39
+ ## Step 4: Fill placeholders
40
+
41
+ The generated note contains `<!-- LLM: describe method -->` style placeholders. When `instructions` is non-null, you MUST fill all placeholders using the `pdf_text` field:
42
+
43
+ ### Filling rules
44
+
45
+ 1. Read the note at `note_path`.
46
+ 2. For each `<!-- LLM: ... -->` placeholder, replace it with substantive content drawn from `pdf_text`.
47
+ 3. Be specific: cite numbers, dataset names, architecture details, formula descriptions.
48
+ 4. Do NOT leave any placeholder unfilled.
49
+ 5. Write the complete filled note back using the Write tool.
50
+
51
+ ### Section guide
52
+
53
+ | Section | What to write |
54
+ |---------|---------------|
55
+ | Core information | One-sentence contribution, key result, paper type |
56
+ | Abstract translation | Translate or paraphrase the abstract with domain context |
57
+ | Research background | What problem existed, why it matters |
58
+ | Research questions | List the explicit or implicit questions the paper addresses |
59
+ | Method overview | Architecture, algorithm, pipeline — with specifics |
60
+ | Experimental results | Datasets, metrics, comparisons, ablation findings |
61
+ | Deep analysis | Strengths, weaknesses, assumptions, limitations |
62
+ | Comparison with related work | Position vs. prior art |
63
+ | Technical roadmap | Where this fits in the broader research trajectory |
64
+ | Future work | Open questions the paper raises |
65
+ | Comprehensive evaluation | Your own 1-5 rating with justification |
66
+ | My notes | Leave empty for user to fill |
67
+
68
+ ## Step 5: Convert to knowledge card (optional)
69
+
70
+ ```
71
+ paper_to_card(paper_json='...', note_path='/path/to/note.md')
72
+ ```
73
+
74
+ ## Step 6: Link keywords (optional)
75
+
76
+ ```
77
+ link_paper_keywords()
78
+ ```
79
+
80
+ Scans all notes and auto-creates `[[wiki-links]]` for technical terms.