crossref-local 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crossref_local/__init__.py +128 -0
- crossref_local/__main__.py +6 -0
- crossref_local/aio.py +236 -0
- crossref_local/api.py +221 -0
- crossref_local/citations.py +413 -0
- crossref_local/cli.py +450 -0
- crossref_local/config.py +171 -0
- crossref_local/db.py +138 -0
- crossref_local/fts.py +172 -0
- crossref_local/impact_factor/__init__.py +20 -0
- crossref_local/impact_factor/calculator.py +479 -0
- crossref_local/impact_factor/journal_lookup.py +274 -0
- crossref_local/mcp_server.py +202 -0
- crossref_local/models.py +186 -0
- crossref_local/remote.py +264 -0
- crossref_local/server.py +352 -0
- crossref_local-0.3.1.dist-info/METADATA +306 -0
- crossref_local-0.3.1.dist-info/RECORD +20 -0
- crossref_local-0.3.1.dist-info/WHEEL +4 -0
- crossref_local-0.3.1.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: crossref-local
|
|
3
|
+
Version: 0.3.1
|
|
4
|
+
Summary: Local CrossRef database with 167M+ works and full-text search
|
|
5
|
+
Project-URL: Homepage, https://github.com/ywatanabe1989/crossref_local
|
|
6
|
+
Project-URL: Repository, https://github.com/ywatanabe1989/crossref_local
|
|
7
|
+
Author: Yusuke Watanabe
|
|
8
|
+
License-Expression: AGPL-3.0
|
|
9
|
+
Keywords: academic,citations,crossref,doi,fts5,full-text-search,impact-factor,scholarly
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Science/Research
|
|
12
|
+
Classifier: License :: OSI Approved :: GNU Affero General Public License v3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Database
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering
|
|
19
|
+
Requires-Python: >=3.10
|
|
20
|
+
Requires-Dist: click>=8.0
|
|
21
|
+
Provides-Extra: all
|
|
22
|
+
Requires-Dist: fastapi>=0.100; extra == 'all'
|
|
23
|
+
Requires-Dist: fastmcp>=0.4; extra == 'all'
|
|
24
|
+
Requires-Dist: matplotlib>=3.7; extra == 'all'
|
|
25
|
+
Requires-Dist: networkx>=3.0; extra == 'all'
|
|
26
|
+
Requires-Dist: pytest-asyncio>=0.21; extra == 'all'
|
|
27
|
+
Requires-Dist: pytest-cov>=4.0; extra == 'all'
|
|
28
|
+
Requires-Dist: pytest>=7.0; extra == 'all'
|
|
29
|
+
Requires-Dist: pyvis>=0.3; extra == 'all'
|
|
30
|
+
Requires-Dist: uvicorn>=0.20; extra == 'all'
|
|
31
|
+
Provides-Extra: api
|
|
32
|
+
Requires-Dist: fastapi>=0.100; extra == 'api'
|
|
33
|
+
Requires-Dist: uvicorn>=0.20; extra == 'api'
|
|
34
|
+
Provides-Extra: dev
|
|
35
|
+
Requires-Dist: pytest-asyncio>=0.21; extra == 'dev'
|
|
36
|
+
Requires-Dist: pytest-cov>=4.0; extra == 'dev'
|
|
37
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
38
|
+
Provides-Extra: mcp
|
|
39
|
+
Requires-Dist: fastmcp>=0.4; extra == 'mcp'
|
|
40
|
+
Provides-Extra: viz
|
|
41
|
+
Requires-Dist: matplotlib>=3.7; extra == 'viz'
|
|
42
|
+
Requires-Dist: networkx>=3.0; extra == 'viz'
|
|
43
|
+
Requires-Dist: pyvis>=0.3; extra == 'viz'
|
|
44
|
+
Description-Content-Type: text/markdown
|
|
45
|
+
|
|
46
|
+
# CrossRef Local
|
|
47
|
+
|
|
48
|
+
Local CrossRef database with 167M+ scholarly works, full-text search, and impact factor calculation.
|
|
49
|
+
|
|
50
|
+
[](https://github.com/ywatanabe1989/crossref-local/actions/workflows/test.yml)
|
|
51
|
+
[](https://www.python.org/downloads/)
|
|
52
|
+
[](LICENSE)
|
|
53
|
+
|
|
54
|
+
<p align="center">
|
|
55
|
+
<img src="examples/readme_figure.png" alt="CrossRef Local Demo" width="800"/>
|
|
56
|
+
</p>
|
|
57
|
+
|
|
58
|
+
<details>
|
|
59
|
+
<summary><strong>Why CrossRef Local?</strong></summary>
|
|
60
|
+
|
|
61
|
+
**Built for the LLM era** - features that matter for AI research assistants:
|
|
62
|
+
|
|
63
|
+
| Feature | Benefit |
|
|
64
|
+
|---------|---------|
|
|
65
|
+
| ๐ **Abstracts** | Full text for semantic understanding |
|
|
66
|
+
| ๐ **Impact Factor** | Filter by journal quality |
|
|
67
|
+
| ๐ **Citations** | Prioritize influential papers |
|
|
68
|
+
| โก **Speed** | 167M records in ms, no rate limits |
|
|
69
|
+
|
|
70
|
+
Perfect for: RAG systems, research assistants, literature review automation.
|
|
71
|
+
|
|
72
|
+
</details>
|
|
73
|
+
|
|
74
|
+
<details>
|
|
75
|
+
<summary><strong>Installation</strong></summary>
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
pip install crossref-local
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
From source:
|
|
82
|
+
```bash
|
|
83
|
+
git clone https://github.com/ywatanabe1989/crossref-local
|
|
84
|
+
cd crossref-local && make install
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
Database setup (1.5 TB, ~2 weeks to build):
|
|
88
|
+
```bash
|
|
89
|
+
# 1. Download CrossRef data (~100GB compressed)
|
|
90
|
+
aria2c "https://academictorrents.com/details/..."
|
|
91
|
+
|
|
92
|
+
# 2. Build SQLite database (~days)
|
|
93
|
+
pip install dois2sqlite
|
|
94
|
+
dois2sqlite build /path/to/crossref-data ./data/crossref.db
|
|
95
|
+
|
|
96
|
+
# 3. Build FTS5 index (~60 hours) & citations table (~days)
|
|
97
|
+
make fts-build-screen
|
|
98
|
+
make citations-build-screen
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
</details>
|
|
102
|
+
|
|
103
|
+
<details>
|
|
104
|
+
<summary><strong>Python API</strong></summary>
|
|
105
|
+
|
|
106
|
+
```python
|
|
107
|
+
from crossref_local import search, get, count
|
|
108
|
+
|
|
109
|
+
# Full-text search (22ms for 541 matches across 167M records)
|
|
110
|
+
results = search("hippocampal sharp wave ripples")
|
|
111
|
+
for work in results:
|
|
112
|
+
print(f"{work.title} ({work.year})")
|
|
113
|
+
|
|
114
|
+
# Get by DOI
|
|
115
|
+
work = get("10.1126/science.aax0758")
|
|
116
|
+
print(work.citation())
|
|
117
|
+
|
|
118
|
+
# Count matches
|
|
119
|
+
n = count("machine learning") # 477,922 matches
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
Async API:
|
|
123
|
+
```python
|
|
124
|
+
from crossref_local import aio
|
|
125
|
+
|
|
126
|
+
async def main():
|
|
127
|
+
counts = await aio.count_many(["CRISPR", "neural network", "climate"])
|
|
128
|
+
results = await aio.search("machine learning")
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
</details>
|
|
132
|
+
|
|
133
|
+
<details>
|
|
134
|
+
<summary><strong>CLI</strong></summary>
|
|
135
|
+
|
|
136
|
+
```bash
|
|
137
|
+
crossref-local search "CRISPR genome editing" -n 5
|
|
138
|
+
crossref-local get 10.1038/nature12373
|
|
139
|
+
crossref-local impact-factor Nature -y 2023 # IF: 54.067
|
|
140
|
+
crossref-local info # Database stats
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
With abstracts (`-a` flag):
|
|
144
|
+
```
|
|
145
|
+
$ crossref-local search "RS-1 enhances CRISPR" -n 1 -a
|
|
146
|
+
|
|
147
|
+
Found 4 matches in 128.4ms
|
|
148
|
+
|
|
149
|
+
1. RS-1 enhances CRISPR/Cas9- and TALEN-mediated knock-in efficiency (2016)
|
|
150
|
+
DOI: 10.1038/ncomms10548
|
|
151
|
+
Journal: Nature Communications
|
|
152
|
+
Abstract: Zinc-finger nuclease, transcription activator-like effector nuclease
|
|
153
|
+
and CRISPR/Cas9 are becoming major tools for genome editing...
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
</details>
|
|
157
|
+
|
|
158
|
+
<details>
|
|
159
|
+
<summary><strong>HTTP API</strong></summary>
|
|
160
|
+
|
|
161
|
+
Start the FastAPI server:
|
|
162
|
+
```bash
|
|
163
|
+
crossref-local api --host 0.0.0.0 --port 3333
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
Endpoints:
|
|
167
|
+
```bash
|
|
168
|
+
# Search works (FTS5)
|
|
169
|
+
curl "http://localhost:3333/works?q=CRISPR&limit=10"
|
|
170
|
+
|
|
171
|
+
# Get by DOI
|
|
172
|
+
curl "http://localhost:3333/works/10.1038/nature12373"
|
|
173
|
+
|
|
174
|
+
# Batch DOI lookup
|
|
175
|
+
curl -X POST "http://localhost:3333/works/batch" \
|
|
176
|
+
-H "Content-Type: application/json" \
|
|
177
|
+
-d '{"dois": ["10.1038/nature12373", "10.1126/science.aax0758"]}'
|
|
178
|
+
|
|
179
|
+
# Database info
|
|
180
|
+
curl "http://localhost:3333/info"
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
Remote access via SSH tunnel:
|
|
184
|
+
```bash
|
|
185
|
+
# On local machine
|
|
186
|
+
ssh -L 3333:127.0.0.1:3333 nas
|
|
187
|
+
|
|
188
|
+
# Python client
|
|
189
|
+
from crossref_local import configure_remote
|
|
190
|
+
configure_remote("http://localhost:3333")
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
</details>
|
|
194
|
+
|
|
195
|
+
<details>
|
|
196
|
+
<summary><strong>MCP Server (Claude Desktop)</strong></summary>
|
|
197
|
+
|
|
198
|
+
Run as MCP server for Claude Desktop integration:
|
|
199
|
+
```bash
|
|
200
|
+
crossref-local serve
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
Add to Claude Desktop config (`~/.config/claude/claude_desktop_config.json`):
|
|
204
|
+
```json
|
|
205
|
+
{
|
|
206
|
+
"mcpServers": {
|
|
207
|
+
"crossref-local": {
|
|
208
|
+
"command": "crossref-local",
|
|
209
|
+
"args": ["serve"],
|
|
210
|
+
"env": {
|
|
211
|
+
"CROSSREF_LOCAL_DB": "/path/to/crossref.db"
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
Available tools:
|
|
219
|
+
- `search_works` - Full-text search across 167M+ papers
|
|
220
|
+
- `get_work` - Get paper by DOI
|
|
221
|
+
- `count_works` - Count matching papers
|
|
222
|
+
- `database_info` - Database statistics
|
|
223
|
+
- `calculate_impact_factor` - Journal impact factor
|
|
224
|
+
|
|
225
|
+
</details>
|
|
226
|
+
|
|
227
|
+
<details>
|
|
228
|
+
<summary><strong>Impact Factor</strong></summary>
|
|
229
|
+
|
|
230
|
+
```python
|
|
231
|
+
from crossref_local.impact_factor import ImpactFactorCalculator
|
|
232
|
+
|
|
233
|
+
with ImpactFactorCalculator() as calc:
|
|
234
|
+
result = calc.calculate_impact_factor("Nature", target_year=2023)
|
|
235
|
+
print(f"IF: {result['impact_factor']:.3f}") # 54.067
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
| Journal | IF 2023 |
|
|
239
|
+
|---------|---------|
|
|
240
|
+
| Nature | 54.07 |
|
|
241
|
+
| Science | 46.17 |
|
|
242
|
+
| Cell | 54.01 |
|
|
243
|
+
| PLOS ONE | 3.37 |
|
|
244
|
+
|
|
245
|
+
</details>
|
|
246
|
+
|
|
247
|
+
<details>
|
|
248
|
+
<summary><strong>Citation Network</strong></summary>
|
|
249
|
+
|
|
250
|
+
```python
|
|
251
|
+
from crossref_local import get_citing, get_cited, CitationNetwork
|
|
252
|
+
|
|
253
|
+
citing = get_citing("10.1038/nature12373") # 1539 papers
|
|
254
|
+
cited = get_cited("10.1038/nature12373")
|
|
255
|
+
|
|
256
|
+
# Build visualization (like Connected Papers)
|
|
257
|
+
network = CitationNetwork("10.1038/nature12373", depth=2)
|
|
258
|
+
network.save_html("citation_network.html") # requires: pip install crossref-local[viz]
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
</details>
|
|
262
|
+
|
|
263
|
+
<details>
|
|
264
|
+
<summary><strong>Performance</strong></summary>
|
|
265
|
+
|
|
266
|
+
| Query | Matches | Time |
|
|
267
|
+
|-------|---------|------|
|
|
268
|
+
| `hippocampal sharp wave ripples` | 541 | 22ms |
|
|
269
|
+
| `machine learning` | 477,922 | 113ms |
|
|
270
|
+
| `CRISPR genome editing` | 12,170 | 257ms |
|
|
271
|
+
|
|
272
|
+
Searching 167M records in milliseconds via FTS5.
|
|
273
|
+
|
|
274
|
+
</details>
|
|
275
|
+
|
|
276
|
+
<details>
|
|
277
|
+
<summary><strong>Related Projects</strong></summary>
|
|
278
|
+
|
|
279
|
+
**[openalex-local](https://github.com/ywatanabe1989/openalex-local)** - Sister project with OpenAlex data:
|
|
280
|
+
|
|
281
|
+
| Feature | crossref-local | openalex-local |
|
|
282
|
+
|---------|----------------|----------------|
|
|
283
|
+
| Works | 167M | 284M |
|
|
284
|
+
| Abstracts | ~21% | ~45-60% |
|
|
285
|
+
| Update frequency | Real-time | Monthly |
|
|
286
|
+
| DOI authority | โ (source) | Uses CrossRef |
|
|
287
|
+
| Citations | Raw references | Linked works |
|
|
288
|
+
| Concepts/Topics | โ | โ |
|
|
289
|
+
| Author IDs | โ | โ |
|
|
290
|
+
| Best for | DOI lookup, raw refs | Semantic search |
|
|
291
|
+
|
|
292
|
+
**When to use CrossRef**: Real-time DOI updates, raw reference parsing, authoritative metadata.
|
|
293
|
+
**When to use OpenAlex**: Semantic search, citation analysis, topic discovery.
|
|
294
|
+
|
|
295
|
+
</details>
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
---
|
|
299
|
+
|
|
300
|
+
<p align="center">
|
|
301
|
+
<a href="https://scitex.ai"><img src="docs/scitex-icon-navy-inverted.png" alt="SciTeX" width="40"/></a>
|
|
302
|
+
<br>
|
|
303
|
+
AGPL-3.0 ยท ywatanabe@scitex.ai
|
|
304
|
+
</p>
|
|
305
|
+
|
|
306
|
+
<!-- EOF -->
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
crossref_local/__init__.py,sha256=AJIkriNQBf61SPoi-cfxNr25pdoQMS3KvLhsNoCfJmQ,3316
|
|
2
|
+
crossref_local/__main__.py,sha256=N1c1ESGgJkAwsSWXANUgmzxC1OJEIqw-cl9m4pmNP7s,110
|
|
3
|
+
crossref_local/aio.py,sha256=En2btSn3euRbEYav1919gsmdC8iQaMbgGUso-IThCwo,5490
|
|
4
|
+
crossref_local/api.py,sha256=h2lpoA7qsCzxBTv0Na3Etgk0hZCWZFKmalreIYsmOhw,5343
|
|
5
|
+
crossref_local/citations.py,sha256=QFahv84upNnXP_89A8bHxEbAdz7wHbh5LEniGcAiHas,12402
|
|
6
|
+
crossref_local/cli.py,sha256=WMk7GxJtTf2ZCH1ldkcUmX-643n5DE11kGu_K7AkOrA,15132
|
|
7
|
+
crossref_local/config.py,sha256=4LGZJ3CmsA9YRv48FkEqVR2xljuSjl0MYiMrT8ljk14,5050
|
|
8
|
+
crossref_local/db.py,sha256=x7dXQXjsFN4LavtkNAKTNw1cUBMG-2h53-Z-Xlq6aoQ,3696
|
|
9
|
+
crossref_local/fts.py,sha256=yZMh_vmtFentXKAFGTS4z7ZNNj7p_ItgfFP5i0yQltw,4448
|
|
10
|
+
crossref_local/mcp_server.py,sha256=KDcBvVMXrhemO7cS4kBMfEvp0Qb-LDsVbnPhnLaaC-4,5796
|
|
11
|
+
crossref_local/models.py,sha256=b_yYb91O6RwEPpEqe2Wmdz12WIfE5itjEus4-fCLxLI,5476
|
|
12
|
+
crossref_local/remote.py,sha256=p8P0zotkNYchqqKGOsqcFiHR10qD4pYmJ26-ltyqO4s,8389
|
|
13
|
+
crossref_local/server.py,sha256=lEc0EA3jVx31q1EEYOaT4cr9l2_fGpoQZmpYdnoGxFQ,9034
|
|
14
|
+
crossref_local/impact_factor/__init__.py,sha256=pcgVCPogBisANYE5Vp2PHVGPgxoMsSXr-6utqVE97-4,559
|
|
15
|
+
crossref_local/impact_factor/calculator.py,sha256=eZ13URAZzPdRyAQpS8zXe_T33e2lm_gQhtoJCXbfIGM,15977
|
|
16
|
+
crossref_local/impact_factor/journal_lookup.py,sha256=Ztx6ZeWxfmPvA3KfcW5h_yz01XPstIdk91j3nu2Q-qw,8846
|
|
17
|
+
crossref_local-0.3.1.dist-info/METADATA,sha256=Wiqa6MGJXMgMFI3qXss4IcLr0wo20Jsbl9UgAfyXrrU,8480
|
|
18
|
+
crossref_local-0.3.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
19
|
+
crossref_local-0.3.1.dist-info/entry_points.txt,sha256=BZbDvHLHzlKzFc-dqLAFwPrWGmGq5yFuD3vslzbmRnk,111
|
|
20
|
+
crossref_local-0.3.1.dist-info/RECORD,,
|