crossref-local 0.3.1__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crossref_local/__init__.py +38 -16
- crossref_local/__main__.py +0 -0
- crossref_local/_aio/__init__.py +30 -0
- crossref_local/_aio/_impl.py +238 -0
- crossref_local/_cache/__init__.py +15 -0
- crossref_local/_cache/export.py +100 -0
- crossref_local/_cache/utils.py +93 -0
- crossref_local/_cache/viz.py +296 -0
- crossref_local/_cli/__init__.py +9 -0
- crossref_local/_cli/cache.py +179 -0
- crossref_local/_cli/cli.py +512 -0
- crossref_local/_cli/completion.py +245 -0
- crossref_local/_cli/main.py +20 -0
- crossref_local/_cli/mcp.py +351 -0
- crossref_local/_cli/mcp_server.py +413 -0
- crossref_local/_core/__init__.py +58 -0
- crossref_local/{api.py → _core/api.py} +130 -36
- crossref_local/{citations.py → _core/citations.py} +55 -26
- crossref_local/{config.py → _core/config.py} +57 -42
- crossref_local/{db.py → _core/db.py} +32 -26
- crossref_local/{fts.py → _core/fts.py} +18 -14
- crossref_local/{models.py → _core/models.py} +11 -6
- crossref_local/{impact_factor → _impact_factor}/__init__.py +0 -0
- crossref_local/{impact_factor → _impact_factor}/calculator.py +0 -0
- crossref_local/{impact_factor → _impact_factor}/journal_lookup.py +0 -0
- crossref_local/_remote/__init__.py +56 -0
- crossref_local/_remote/base.py +356 -0
- crossref_local/_remote/collections.py +175 -0
- crossref_local/_server/__init__.py +140 -0
- crossref_local/_server/middleware.py +25 -0
- crossref_local/_server/models.py +129 -0
- crossref_local/_server/routes_citations.py +98 -0
- crossref_local/_server/routes_collections.py +282 -0
- crossref_local/_server/routes_compat.py +102 -0
- crossref_local/_server/routes_works.py +128 -0
- crossref_local/_server/server.py +19 -0
- crossref_local/aio.py +30 -206
- crossref_local/cache.py +466 -0
- crossref_local/cli.py +5 -447
- crossref_local/jobs.py +169 -0
- crossref_local/mcp_server.py +5 -199
- crossref_local/remote.py +5 -261
- crossref_local/server.py +5 -349
- {crossref_local-0.3.1.dist-info → crossref_local-0.5.0.dist-info}/METADATA +88 -24
- crossref_local-0.5.0.dist-info/RECORD +47 -0
- crossref_local-0.3.1.dist-info/RECORD +0 -20
- {crossref_local-0.3.1.dist-info → crossref_local-0.5.0.dist-info}/WHEEL +0 -0
- {crossref_local-0.3.1.dist-info → crossref_local-0.5.0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,413 @@
|
|
|
1
|
+
"""MCP server for CrossRef Local - Claude integration.
|
|
2
|
+
|
|
3
|
+
This server exposes crossref-local functionality as MCP tools,
|
|
4
|
+
enabling Claude Desktop and other MCP clients to search academic papers.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
crossref-local serve # stdio (Claude Desktop)
|
|
8
|
+
crossref-local serve -t http --port 8082 # HTTP transport
|
|
9
|
+
crossref-local-mcp # Direct entry point
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import json
|
|
13
|
+
|
|
14
|
+
from fastmcp import FastMCP
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
from .. import (
|
|
18
|
+
get as _get,
|
|
19
|
+
info as _info,
|
|
20
|
+
search as _search,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
# Initialize MCP server
|
|
24
|
+
mcp = FastMCP(
|
|
25
|
+
name="crossref-local",
|
|
26
|
+
instructions="Local CrossRef database with 167M+ works and full-text search. "
|
|
27
|
+
"Use search to find papers, search_by_doi for DOI lookup, enrich_dois to add "
|
|
28
|
+
"citation counts and references, and status for stats.",
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@mcp.tool()
|
|
33
|
+
def search(
|
|
34
|
+
query: str,
|
|
35
|
+
limit: int = 10,
|
|
36
|
+
offset: int = 0,
|
|
37
|
+
with_abstracts: bool = False,
|
|
38
|
+
) -> str:
|
|
39
|
+
"""Search for academic works by title, abstract, or authors.
|
|
40
|
+
|
|
41
|
+
Uses FTS5 full-text search index for fast searching across 167M+ papers.
|
|
42
|
+
Supports FTS5 query syntax: AND, OR, NOT, "exact phrases".
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
query: Search query (e.g., "machine learning", "CRISPR", "neural network AND hippocampus")
|
|
46
|
+
limit: Maximum number of results to return (default: 10, max: 100)
|
|
47
|
+
offset: Skip first N results for pagination (default: 0)
|
|
48
|
+
with_abstracts: Include abstracts in results (default: False)
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
JSON string with search results including total count and matching works.
|
|
52
|
+
|
|
53
|
+
Examples:
|
|
54
|
+
search("machine learning")
|
|
55
|
+
search("CRISPR", limit=20)
|
|
56
|
+
search("neural network AND memory", with_abstracts=True)
|
|
57
|
+
"""
|
|
58
|
+
results = _search(query, limit=min(limit, 100), offset=offset)
|
|
59
|
+
|
|
60
|
+
works_data = []
|
|
61
|
+
for work in results.works:
|
|
62
|
+
work_dict = {
|
|
63
|
+
"doi": work.doi,
|
|
64
|
+
"title": work.title,
|
|
65
|
+
"authors": work.authors,
|
|
66
|
+
"year": work.year,
|
|
67
|
+
"journal": work.journal,
|
|
68
|
+
}
|
|
69
|
+
if with_abstracts and work.abstract:
|
|
70
|
+
work_dict["abstract"] = work.abstract
|
|
71
|
+
works_data.append(work_dict)
|
|
72
|
+
|
|
73
|
+
return json.dumps(
|
|
74
|
+
{
|
|
75
|
+
"query": results.query,
|
|
76
|
+
"total": results.total,
|
|
77
|
+
"returned": len(works_data),
|
|
78
|
+
"elapsed_ms": round(results.elapsed_ms, 2),
|
|
79
|
+
"works": works_data,
|
|
80
|
+
},
|
|
81
|
+
indent=2,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@mcp.tool()
|
|
86
|
+
def search_by_doi(doi: str, as_citation: bool = False) -> str:
|
|
87
|
+
"""Get detailed information about a work by DOI.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
doi: Digital Object Identifier (e.g., "10.1038/nature12373")
|
|
91
|
+
as_citation: Return formatted citation instead of full metadata
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
JSON string with work metadata, or formatted citation string.
|
|
95
|
+
|
|
96
|
+
Examples:
|
|
97
|
+
search_by_doi("10.1038/nature12373")
|
|
98
|
+
search_by_doi("10.1126/science.aax0758", as_citation=True)
|
|
99
|
+
"""
|
|
100
|
+
work = _get(doi)
|
|
101
|
+
|
|
102
|
+
if work is None:
|
|
103
|
+
return json.dumps({"error": f"DOI not found: {doi}"})
|
|
104
|
+
|
|
105
|
+
if as_citation:
|
|
106
|
+
return work.citation()
|
|
107
|
+
|
|
108
|
+
return json.dumps(work.to_dict(), indent=2)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@mcp.tool()
|
|
112
|
+
def status() -> str:
|
|
113
|
+
"""Get database statistics and status.
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
JSON string with database path, work count, FTS index count, and citation count.
|
|
117
|
+
"""
|
|
118
|
+
db_info = _info()
|
|
119
|
+
return json.dumps(db_info, indent=2)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
@mcp.tool()
|
|
123
|
+
def enrich_dois(dois: list[str]) -> str:
|
|
124
|
+
"""Enrich DOIs with full metadata including citation counts and references.
|
|
125
|
+
|
|
126
|
+
Use this after search() to get detailed metadata for papers.
|
|
127
|
+
The search() tool returns basic info (title, authors, year, journal).
|
|
128
|
+
This tool adds: citation_count, references, volume, issue, publisher, etc.
|
|
129
|
+
|
|
130
|
+
Typical workflow:
|
|
131
|
+
1. search("epilepsy seizure prediction") -> get DOIs
|
|
132
|
+
2. enrich_dois([doi1, doi2, ...]) -> get full metadata
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
dois: List of DOIs to enrich (e.g., ["10.1038/nature12373", "10.1126/science.aax0758"])
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
JSON string with enriched works including citation_count and references.
|
|
139
|
+
|
|
140
|
+
Examples:
|
|
141
|
+
enrich_dois(["10.1038/nature12373"])
|
|
142
|
+
enrich_dois(["10.1038/s41467-017-02577-y", "10.1093/brain/aww019"])
|
|
143
|
+
"""
|
|
144
|
+
from .. import get_many as _get_many
|
|
145
|
+
|
|
146
|
+
works = _get_many(dois)
|
|
147
|
+
|
|
148
|
+
works_data = []
|
|
149
|
+
for work in works:
|
|
150
|
+
works_data.append(work.to_dict())
|
|
151
|
+
|
|
152
|
+
return json.dumps(
|
|
153
|
+
{
|
|
154
|
+
"requested": len(dois),
|
|
155
|
+
"found": len(works_data),
|
|
156
|
+
"works": works_data,
|
|
157
|
+
},
|
|
158
|
+
indent=2,
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
@mcp.tool()
|
|
163
|
+
def cache_create(
|
|
164
|
+
name: str,
|
|
165
|
+
query: str,
|
|
166
|
+
limit: int = 1000,
|
|
167
|
+
) -> str:
|
|
168
|
+
"""Create a paper cache from search query.
|
|
169
|
+
|
|
170
|
+
Fetches full metadata for papers matching query and saves to disk cache.
|
|
171
|
+
Use this to build a reusable paper collection for a research topic.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
name: Cache name (e.g., "epilepsy", "alzheimers")
|
|
175
|
+
query: FTS search query
|
|
176
|
+
limit: Max papers to cache (default: 1000)
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
JSON with cache info (path, paper count, size)
|
|
180
|
+
|
|
181
|
+
Example:
|
|
182
|
+
cache_create("epilepsy", "epilepsy seizure prediction", limit=500)
|
|
183
|
+
"""
|
|
184
|
+
from .. import cache
|
|
185
|
+
|
|
186
|
+
info = cache.create(name, query=query, limit=limit)
|
|
187
|
+
return json.dumps(info.to_dict(), indent=2)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
@mcp.tool()
|
|
191
|
+
def cache_query(
|
|
192
|
+
name: str,
|
|
193
|
+
fields: list[str] | None = None,
|
|
194
|
+
include_abstract: bool = False,
|
|
195
|
+
include_references: bool = False,
|
|
196
|
+
include_citations: bool = False,
|
|
197
|
+
year_min: int | None = None,
|
|
198
|
+
year_max: int | None = None,
|
|
199
|
+
journal: str | None = None,
|
|
200
|
+
limit: int | None = None,
|
|
201
|
+
) -> str:
|
|
202
|
+
"""Query cached papers with field filtering.
|
|
203
|
+
|
|
204
|
+
Returns minimal data to reduce context usage. Specify only fields needed.
|
|
205
|
+
|
|
206
|
+
Args:
|
|
207
|
+
name: Cache name
|
|
208
|
+
fields: Explicit field list (e.g., ["doi", "title", "year"])
|
|
209
|
+
include_abstract: Include abstract (default: False)
|
|
210
|
+
include_references: Include references list (default: False)
|
|
211
|
+
include_citations: Include citation_count (default: False)
|
|
212
|
+
year_min: Filter by minimum year
|
|
213
|
+
year_max: Filter by maximum year
|
|
214
|
+
journal: Filter by journal name (substring match)
|
|
215
|
+
limit: Max results to return
|
|
216
|
+
|
|
217
|
+
Returns:
|
|
218
|
+
JSON array of filtered papers
|
|
219
|
+
|
|
220
|
+
Examples:
|
|
221
|
+
cache_query("epilepsy", fields=["doi", "title", "year"])
|
|
222
|
+
cache_query("epilepsy", year_min=2020, include_citations=True, limit=50)
|
|
223
|
+
"""
|
|
224
|
+
from .. import cache
|
|
225
|
+
|
|
226
|
+
papers = cache.query(
|
|
227
|
+
name,
|
|
228
|
+
fields=fields,
|
|
229
|
+
include_abstract=include_abstract,
|
|
230
|
+
include_references=include_references,
|
|
231
|
+
include_citations=include_citations,
|
|
232
|
+
year_min=year_min,
|
|
233
|
+
year_max=year_max,
|
|
234
|
+
journal=journal,
|
|
235
|
+
limit=limit,
|
|
236
|
+
)
|
|
237
|
+
return json.dumps({"count": len(papers), "papers": papers}, indent=2)
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
@mcp.tool()
|
|
241
|
+
def cache_stats(name: str) -> str:
|
|
242
|
+
"""Get cache statistics.
|
|
243
|
+
|
|
244
|
+
Returns year distribution, top journals, citation stats without loading full data.
|
|
245
|
+
|
|
246
|
+
Args:
|
|
247
|
+
name: Cache name
|
|
248
|
+
|
|
249
|
+
Returns:
|
|
250
|
+
JSON with statistics (paper_count, year_range, top_journals, etc.)
|
|
251
|
+
"""
|
|
252
|
+
from .. import cache
|
|
253
|
+
|
|
254
|
+
stats = cache.stats(name)
|
|
255
|
+
return json.dumps(stats, indent=2)
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
@mcp.tool()
|
|
259
|
+
def cache_list() -> str:
|
|
260
|
+
"""List all available caches.
|
|
261
|
+
|
|
262
|
+
Returns:
|
|
263
|
+
JSON array of cache info (name, path, paper_count, size)
|
|
264
|
+
"""
|
|
265
|
+
from .. import cache
|
|
266
|
+
|
|
267
|
+
caches = cache.list_caches()
|
|
268
|
+
return json.dumps([c.to_dict() for c in caches], indent=2)
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
@mcp.tool()
|
|
272
|
+
def cache_top_cited(
|
|
273
|
+
name: str,
|
|
274
|
+
n: int = 20,
|
|
275
|
+
year_min: int | None = None,
|
|
276
|
+
year_max: int | None = None,
|
|
277
|
+
) -> str:
|
|
278
|
+
"""Get top cited papers from cache.
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
name: Cache name
|
|
282
|
+
n: Number of papers to return
|
|
283
|
+
year_min: Filter by minimum year
|
|
284
|
+
year_max: Filter by maximum year
|
|
285
|
+
|
|
286
|
+
Returns:
|
|
287
|
+
JSON array of top cited papers
|
|
288
|
+
"""
|
|
289
|
+
from .._cache.viz import get_top_cited
|
|
290
|
+
|
|
291
|
+
papers = get_top_cited(name, n=n, year_min=year_min, year_max=year_max)
|
|
292
|
+
return json.dumps(papers, indent=2)
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
@mcp.tool()
|
|
296
|
+
def cache_citation_summary(name: str) -> str:
|
|
297
|
+
"""Get citation statistics for cached papers.
|
|
298
|
+
|
|
299
|
+
Returns mean, median, max citations and counts of highly cited papers.
|
|
300
|
+
|
|
301
|
+
Args:
|
|
302
|
+
name: Cache name
|
|
303
|
+
|
|
304
|
+
Returns:
|
|
305
|
+
JSON with citation statistics
|
|
306
|
+
"""
|
|
307
|
+
from .._cache.viz import get_citation_summary
|
|
308
|
+
|
|
309
|
+
summary = get_citation_summary(name)
|
|
310
|
+
return json.dumps(summary, indent=2)
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
@mcp.tool()
|
|
314
|
+
def cache_plot_scatter(
|
|
315
|
+
name: str,
|
|
316
|
+
output: str,
|
|
317
|
+
top_n: int = 10,
|
|
318
|
+
) -> str:
|
|
319
|
+
"""Generate year vs citations scatter plot.
|
|
320
|
+
|
|
321
|
+
Saves plot to file and returns top cited papers.
|
|
322
|
+
|
|
323
|
+
Args:
|
|
324
|
+
name: Cache name
|
|
325
|
+
output: Output file path (png/pdf/svg)
|
|
326
|
+
top_n: Number of top papers to label on plot
|
|
327
|
+
|
|
328
|
+
Returns:
|
|
329
|
+
JSON with output path and top papers list
|
|
330
|
+
"""
|
|
331
|
+
from .._cache.viz import plot_year_citations
|
|
332
|
+
|
|
333
|
+
result = plot_year_citations(name, output=output, top_n=top_n)
|
|
334
|
+
return json.dumps(result, indent=2)
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
@mcp.tool()
|
|
338
|
+
def cache_plot_network(
|
|
339
|
+
name: str,
|
|
340
|
+
output: str,
|
|
341
|
+
max_nodes: int = 100,
|
|
342
|
+
) -> str:
|
|
343
|
+
"""Generate citation network visualization.
|
|
344
|
+
|
|
345
|
+
Creates interactive HTML graph showing citation relationships.
|
|
346
|
+
|
|
347
|
+
Args:
|
|
348
|
+
name: Cache name
|
|
349
|
+
output: Output HTML file path
|
|
350
|
+
max_nodes: Maximum papers to include
|
|
351
|
+
|
|
352
|
+
Returns:
|
|
353
|
+
JSON with network stats
|
|
354
|
+
"""
|
|
355
|
+
from .._cache.viz import plot_citation_network
|
|
356
|
+
|
|
357
|
+
result = plot_citation_network(name, output=output, max_nodes=max_nodes)
|
|
358
|
+
return json.dumps(result, indent=2)
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
@mcp.tool()
|
|
362
|
+
def cache_export(
|
|
363
|
+
name: str,
|
|
364
|
+
output_path: str,
|
|
365
|
+
format: str = "json",
|
|
366
|
+
fields: list[str] | None = None,
|
|
367
|
+
) -> str:
|
|
368
|
+
"""Export cache to file.
|
|
369
|
+
|
|
370
|
+
Args:
|
|
371
|
+
name: Cache name
|
|
372
|
+
output_path: Output file path
|
|
373
|
+
format: Export format (json, csv, bibtex, dois)
|
|
374
|
+
fields: Fields to include (for json/csv)
|
|
375
|
+
|
|
376
|
+
Returns:
|
|
377
|
+
JSON with output path
|
|
378
|
+
"""
|
|
379
|
+
from .. import cache
|
|
380
|
+
|
|
381
|
+
path = cache.export(name, output_path, format=format, fields=fields)
|
|
382
|
+
return json.dumps({"exported": path, "format": format})
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
def run_server(
|
|
386
|
+
transport: str = "stdio",
|
|
387
|
+
host: str = "localhost",
|
|
388
|
+
port: int = 8082,
|
|
389
|
+
) -> None:
|
|
390
|
+
"""Run the MCP server.
|
|
391
|
+
|
|
392
|
+
Args:
|
|
393
|
+
transport: Transport protocol ("stdio", "sse", or "http")
|
|
394
|
+
host: Host for HTTP/SSE transport
|
|
395
|
+
port: Port for HTTP/SSE transport
|
|
396
|
+
"""
|
|
397
|
+
if transport == "stdio":
|
|
398
|
+
mcp.run(transport="stdio")
|
|
399
|
+
elif transport == "sse":
|
|
400
|
+
mcp.run(transport="sse", host=host, port=port)
|
|
401
|
+
elif transport == "http":
|
|
402
|
+
mcp.run(transport="streamable-http", host=host, port=port)
|
|
403
|
+
else:
|
|
404
|
+
raise ValueError(f"Unknown transport: {transport}")
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
def main():
|
|
408
|
+
"""Entry point for crossref-local-mcp command."""
|
|
409
|
+
run_server(transport="stdio")
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
if __name__ == "__main__":
|
|
413
|
+
main()
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Internal core modules for crossref_local."""
|
|
3
|
+
|
|
4
|
+
from .api import (
|
|
5
|
+
configure,
|
|
6
|
+
configure_http,
|
|
7
|
+
configure_remote,
|
|
8
|
+
count,
|
|
9
|
+
enrich,
|
|
10
|
+
enrich_dois,
|
|
11
|
+
exists,
|
|
12
|
+
get,
|
|
13
|
+
get_many,
|
|
14
|
+
get_mode,
|
|
15
|
+
info,
|
|
16
|
+
search,
|
|
17
|
+
)
|
|
18
|
+
from .citations import (
|
|
19
|
+
CitationNetwork,
|
|
20
|
+
get_citation_count,
|
|
21
|
+
get_cited,
|
|
22
|
+
get_citing,
|
|
23
|
+
)
|
|
24
|
+
from .config import Config
|
|
25
|
+
from .db import Database, close_db, get_db
|
|
26
|
+
from .models import SearchResult, Work
|
|
27
|
+
|
|
28
|
+
__all__ = [
|
|
29
|
+
# API functions
|
|
30
|
+
"search",
|
|
31
|
+
"count",
|
|
32
|
+
"get",
|
|
33
|
+
"get_many",
|
|
34
|
+
"exists",
|
|
35
|
+
"enrich",
|
|
36
|
+
"enrich_dois",
|
|
37
|
+
"configure",
|
|
38
|
+
"configure_http",
|
|
39
|
+
"configure_remote",
|
|
40
|
+
"get_mode",
|
|
41
|
+
"info",
|
|
42
|
+
# Models
|
|
43
|
+
"Work",
|
|
44
|
+
"SearchResult",
|
|
45
|
+
# Citations
|
|
46
|
+
"get_citing",
|
|
47
|
+
"get_cited",
|
|
48
|
+
"get_citation_count",
|
|
49
|
+
"CitationNetwork",
|
|
50
|
+
# Database
|
|
51
|
+
"Database",
|
|
52
|
+
"get_db",
|
|
53
|
+
"close_db",
|
|
54
|
+
# Config
|
|
55
|
+
"Config",
|
|
56
|
+
]
|
|
57
|
+
|
|
58
|
+
# EOF
|