crossref-local 0.3.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. crossref_local/__init__.py +38 -16
  2. crossref_local/__main__.py +0 -0
  3. crossref_local/_aio/__init__.py +30 -0
  4. crossref_local/_aio/_impl.py +238 -0
  5. crossref_local/_cache/__init__.py +15 -0
  6. crossref_local/_cache/export.py +100 -0
  7. crossref_local/_cache/utils.py +93 -0
  8. crossref_local/_cache/viz.py +296 -0
  9. crossref_local/_cli/__init__.py +9 -0
  10. crossref_local/_cli/cache.py +179 -0
  11. crossref_local/_cli/cli.py +512 -0
  12. crossref_local/_cli/completion.py +245 -0
  13. crossref_local/_cli/main.py +20 -0
  14. crossref_local/_cli/mcp.py +351 -0
  15. crossref_local/_cli/mcp_server.py +413 -0
  16. crossref_local/_core/__init__.py +58 -0
  17. crossref_local/{api.py → _core/api.py} +130 -36
  18. crossref_local/{citations.py → _core/citations.py} +55 -26
  19. crossref_local/{config.py → _core/config.py} +57 -42
  20. crossref_local/{db.py → _core/db.py} +32 -26
  21. crossref_local/{fts.py → _core/fts.py} +18 -14
  22. crossref_local/{models.py → _core/models.py} +11 -6
  23. crossref_local/{impact_factor → _impact_factor}/__init__.py +0 -0
  24. crossref_local/{impact_factor → _impact_factor}/calculator.py +0 -0
  25. crossref_local/{impact_factor → _impact_factor}/journal_lookup.py +0 -0
  26. crossref_local/_remote/__init__.py +56 -0
  27. crossref_local/_remote/base.py +356 -0
  28. crossref_local/_remote/collections.py +175 -0
  29. crossref_local/_server/__init__.py +140 -0
  30. crossref_local/_server/middleware.py +25 -0
  31. crossref_local/_server/models.py +129 -0
  32. crossref_local/_server/routes_citations.py +98 -0
  33. crossref_local/_server/routes_collections.py +282 -0
  34. crossref_local/_server/routes_compat.py +102 -0
  35. crossref_local/_server/routes_works.py +128 -0
  36. crossref_local/_server/server.py +19 -0
  37. crossref_local/aio.py +30 -206
  38. crossref_local/cache.py +466 -0
  39. crossref_local/cli.py +5 -447
  40. crossref_local/jobs.py +169 -0
  41. crossref_local/mcp_server.py +5 -199
  42. crossref_local/remote.py +5 -261
  43. crossref_local/server.py +5 -349
  44. {crossref_local-0.3.1.dist-info → crossref_local-0.5.0.dist-info}/METADATA +88 -24
  45. crossref_local-0.5.0.dist-info/RECORD +47 -0
  46. crossref_local-0.3.1.dist-info/RECORD +0 -20
  47. {crossref_local-0.3.1.dist-info → crossref_local-0.5.0.dist-info}/WHEEL +0 -0
  48. {crossref_local-0.3.1.dist-info → crossref_local-0.5.0.dist-info}/entry_points.txt +0 -0
@@ -1,202 +1,8 @@
1
- """MCP server for CrossRef Local - Claude integration.
1
+ #!/usr/bin/env python3
2
+ """Backward compatibility: re-export from _cli.mcp_server."""
2
3
 
3
- This server exposes crossref-local functionality as MCP tools,
4
- enabling Claude Desktop and other MCP clients to search academic papers.
4
+ from ._cli.mcp_server import mcp, run_server, main
5
5
 
6
- Usage:
7
- crossref-local serve # stdio (Claude Desktop)
8
- crossref-local serve -t http --port 8082 # HTTP transport
9
- crossref-local-mcp # Direct entry point
10
- """
6
+ __all__ = ["mcp", "run_server", "main"]
11
7
 
12
- import json
13
- from typing import Optional
14
-
15
- from fastmcp import FastMCP
16
-
17
- from . import search, get, count, info, __version__
18
- from .impact_factor import ImpactFactorCalculator
19
-
20
- # Initialize MCP server
21
- mcp = FastMCP(
22
- name="crossref-local",
23
- instructions="Local CrossRef database with 167M+ works and full-text search. "
24
- "Use search_works to find papers, get_work for DOI lookup, count_works for counts, "
25
- "database_info for stats, and calculate_impact_factor for journal metrics.",
26
- )
27
-
28
-
29
- @mcp.tool()
30
- def search_works(
31
- query: str,
32
- limit: int = 10,
33
- offset: int = 0,
34
- with_abstracts: bool = False,
35
- ) -> str:
36
- """Search for academic works by title, abstract, or authors.
37
-
38
- Uses FTS5 full-text search index for fast searching across 167M+ papers.
39
- Supports FTS5 query syntax: AND, OR, NOT, "exact phrases".
40
-
41
- Args:
42
- query: Search query (e.g., "machine learning", "CRISPR", "neural network AND hippocampus")
43
- limit: Maximum number of results to return (default: 10, max: 100)
44
- offset: Skip first N results for pagination (default: 0)
45
- with_abstracts: Include abstracts in results (default: False)
46
-
47
- Returns:
48
- JSON string with search results including total count and matching works.
49
-
50
- Examples:
51
- search_works("machine learning")
52
- search_works("CRISPR", limit=20)
53
- search_works("neural network AND memory", with_abstracts=True)
54
- """
55
- results = search(query, limit=min(limit, 100), offset=offset)
56
-
57
- works_data = []
58
- for work in results.works:
59
- work_dict = {
60
- "doi": work.doi,
61
- "title": work.title,
62
- "authors": work.authors,
63
- "year": work.year,
64
- "journal": work.journal,
65
- }
66
- if with_abstracts and work.abstract:
67
- work_dict["abstract"] = work.abstract
68
- works_data.append(work_dict)
69
-
70
- return json.dumps(
71
- {
72
- "query": results.query,
73
- "total": results.total,
74
- "returned": len(works_data),
75
- "elapsed_ms": round(results.elapsed_ms, 2),
76
- "works": works_data,
77
- },
78
- indent=2,
79
- )
80
-
81
-
82
- @mcp.tool()
83
- def get_work(doi: str, as_citation: bool = False) -> str:
84
- """Get detailed information about a work by DOI.
85
-
86
- Args:
87
- doi: Digital Object Identifier (e.g., "10.1038/nature12373")
88
- as_citation: Return formatted citation instead of full metadata
89
-
90
- Returns:
91
- JSON string with work metadata, or formatted citation string.
92
-
93
- Examples:
94
- get_work("10.1038/nature12373")
95
- get_work("10.1126/science.aax0758", as_citation=True)
96
- """
97
- work = get(doi)
98
-
99
- if work is None:
100
- return json.dumps({"error": f"DOI not found: {doi}"})
101
-
102
- if as_citation:
103
- return work.citation()
104
-
105
- return json.dumps(work.to_dict(), indent=2)
106
-
107
-
108
- @mcp.tool()
109
- def count_works(query: str) -> str:
110
- """Count matching works without fetching results.
111
-
112
- Faster than search when you only need the count.
113
-
114
- Args:
115
- query: FTS5 search query
116
-
117
- Returns:
118
- JSON string with count.
119
-
120
- Examples:
121
- count_works("CRISPR")
122
- count_works("machine learning AND deep")
123
- """
124
- n = count(query)
125
- return json.dumps({"query": query, "count": n})
126
-
127
-
128
- @mcp.tool()
129
- def database_info() -> str:
130
- """Get database statistics and status.
131
-
132
- Returns:
133
- JSON string with database path, work count, FTS index count, and citation count.
134
- """
135
- db_info = info()
136
- return json.dumps(db_info, indent=2)
137
-
138
-
139
- @mcp.tool()
140
- def calculate_impact_factor(
141
- journal: str,
142
- year: int = 2023,
143
- window: int = 2,
144
- ) -> str:
145
- """Calculate impact factor for a journal.
146
-
147
- Impact factor = citations in target year / articles in window years.
148
-
149
- Args:
150
- journal: Journal name or ISSN (e.g., "Nature", "Science", "0028-0836")
151
- year: Target year for citation count (default: 2023)
152
- window: Number of years for article window (default: 2 for standard IF)
153
-
154
- Returns:
155
- JSON string with journal name, article count, citation count, and impact factor.
156
-
157
- Examples:
158
- calculate_impact_factor("Nature")
159
- calculate_impact_factor("Science", year=2022)
160
- calculate_impact_factor("Cell", window=5) # 5-year impact factor
161
- """
162
- try:
163
- with ImpactFactorCalculator() as calc:
164
- result = calc.calculate_impact_factor(
165
- journal_identifier=journal,
166
- target_year=year,
167
- window_years=window,
168
- )
169
- return json.dumps(result, indent=2)
170
- except Exception as e:
171
- return json.dumps({"error": str(e)})
172
-
173
-
174
- def run_server(
175
- transport: str = "stdio",
176
- host: str = "localhost",
177
- port: int = 8082,
178
- ) -> None:
179
- """Run the MCP server.
180
-
181
- Args:
182
- transport: Transport protocol ("stdio", "sse", or "http")
183
- host: Host for HTTP/SSE transport
184
- port: Port for HTTP/SSE transport
185
- """
186
- if transport == "stdio":
187
- mcp.run(transport="stdio")
188
- elif transport == "sse":
189
- mcp.run(transport="sse", host=host, port=port)
190
- elif transport == "http":
191
- mcp.run(transport="streamable-http", host=host, port=port)
192
- else:
193
- raise ValueError(f"Unknown transport: {transport}")
194
-
195
-
196
- def main():
197
- """Entry point for crossref-local-mcp command."""
198
- run_server(transport="stdio")
199
-
200
-
201
- if __name__ == "__main__":
202
- main()
8
+ # EOF
crossref_local/remote.py CHANGED
@@ -1,264 +1,8 @@
1
- """Remote API client for crossref_local.
1
+ #!/usr/bin/env python3
2
+ """Backward compatibility: re-export from _remote."""
2
3
 
3
- Connects to a CrossRef Local API server instead of direct database access.
4
- Use this when the database is on a remote server accessible via HTTP.
5
- """
4
+ from ._remote import RemoteClient, DEFAULT_API_URL, get_client, reset_client
6
5
 
7
- import json
8
- import urllib.request
9
- import urllib.parse
10
- import urllib.error
11
- from typing import List, Optional, Dict, Any
6
+ __all__ = ["RemoteClient", "DEFAULT_API_URL", "get_client", "reset_client"]
12
7
 
13
- from .models import Work, SearchResult
14
-
15
-
16
- class RemoteClient:
17
- """
18
- HTTP client for CrossRef Local API server.
19
-
20
- Provides the same interface as the local API but connects
21
- to a remote server via HTTP.
22
-
23
- Example:
24
- >>> client = RemoteClient("http://localhost:3333")
25
- >>> results = client.search(title="machine learning", limit=10)
26
- >>> work = client.get("10.1038/nature12373")
27
- """
28
-
29
- def __init__(self, base_url: str = "http://localhost:3333", timeout: int = 30):
30
- """
31
- Initialize remote client.
32
-
33
- Args:
34
- base_url: API server URL (default: http://localhost:3333)
35
- timeout: Request timeout in seconds
36
- """
37
- self.base_url = base_url.rstrip("/")
38
- self.timeout = timeout
39
-
40
- def _request(self, endpoint: str, params: Optional[Dict[str, Any]] = None) -> Dict:
41
- """Make HTTP GET request to API."""
42
- url = f"{self.base_url}{endpoint}"
43
- if params:
44
- # Filter out None values
45
- params = {k: v for k, v in params.items() if v is not None}
46
- if params:
47
- url = f"{url}?{urllib.parse.urlencode(params)}"
48
-
49
- try:
50
- req = urllib.request.Request(url)
51
- req.add_header("Accept", "application/json")
52
- with urllib.request.urlopen(req, timeout=self.timeout) as response:
53
- return json.loads(response.read().decode("utf-8"))
54
- except urllib.error.HTTPError as e:
55
- if e.code == 404:
56
- return None
57
- raise ConnectionError(f"API request failed: {e.code} {e.reason}") from e
58
- except urllib.error.URLError as e:
59
- raise ConnectionError(
60
- f"Cannot connect to API at {self.base_url}: {e.reason}"
61
- ) from e
62
-
63
- def health(self) -> Dict:
64
- """Check API server health."""
65
- return self._request("/health")
66
-
67
- def info(self) -> Dict:
68
- """Get database/API information."""
69
- root = self._request("/")
70
- info_data = self._request("/info")
71
- return {
72
- "api_url": self.base_url,
73
- "api_version": root.get("version", "unknown"),
74
- "status": root.get("status", "unknown"),
75
- "mode": "remote",
76
- "works": info_data.get("total_papers", 0) if info_data else 0,
77
- "fts_indexed": info_data.get("fts_indexed", 0) if info_data else 0,
78
- "citations": info_data.get("citations", 0) if info_data else 0,
79
- }
80
-
81
- def search(
82
- self,
83
- query: Optional[str] = None,
84
- doi: Optional[str] = None,
85
- title: Optional[str] = None,
86
- authors: Optional[str] = None,
87
- year: Optional[int] = None,
88
- limit: int = 10,
89
- offset: int = 0,
90
- ) -> SearchResult:
91
- """
92
- Search for papers.
93
-
94
- Args:
95
- query: Full-text search query (searches title by default)
96
- doi: Search by DOI
97
- title: Search by title (explicit)
98
- authors: Search by author name
99
- year: Filter by publication year
100
- limit: Maximum results (default: 10, max: 100)
101
- offset: Skip first N results for pagination
102
-
103
- Returns:
104
- SearchResult with matching works
105
- """
106
- # Use new /works endpoint with FTS5 search
107
- search_query = query or title
108
-
109
- params = {
110
- "q": search_query,
111
- "limit": min(limit, 100),
112
- "offset": offset,
113
- }
114
-
115
- data = self._request("/works", params)
116
-
117
- if not data:
118
- return SearchResult(works=[], total=0, query=query or "", elapsed_ms=0.0)
119
-
120
- works = []
121
- for item in data.get("results", []):
122
- work = Work(
123
- doi=item.get("doi", ""),
124
- title=item.get("title", ""),
125
- authors=item.get("authors", []),
126
- year=item.get("year"),
127
- journal=item.get("journal"),
128
- volume=item.get("volume"),
129
- issue=item.get("issue"),
130
- page=item.get("page") or item.get("pages"),
131
- abstract=item.get("abstract"),
132
- citation_count=item.get("citation_count"),
133
- )
134
- works.append(work)
135
-
136
- return SearchResult(
137
- works=works,
138
- total=data.get("total", len(works)),
139
- query=query or title or doi or "",
140
- elapsed_ms=data.get("elapsed_ms", 0.0),
141
- )
142
-
143
- def get(self, doi: str) -> Optional[Work]:
144
- """
145
- Get a work by DOI.
146
-
147
- Args:
148
- doi: Digital Object Identifier
149
-
150
- Returns:
151
- Work object or None if not found
152
- """
153
- # Use /works/{doi} endpoint directly
154
- data = self._request(f"/works/{doi}")
155
- if not data or "error" in data:
156
- return None
157
-
158
- return Work(
159
- doi=data.get("doi", doi),
160
- title=data.get("title", ""),
161
- authors=data.get("authors", []),
162
- year=data.get("year"),
163
- journal=data.get("journal"),
164
- volume=data.get("volume"),
165
- issue=data.get("issue"),
166
- page=data.get("page"),
167
- abstract=data.get("abstract"),
168
- citation_count=data.get("citation_count"),
169
- )
170
-
171
- def get_many(self, dois: List[str]) -> List[Work]:
172
- """
173
- Get multiple works by DOI using batch endpoint.
174
-
175
- Args:
176
- dois: List of DOIs
177
-
178
- Returns:
179
- List of Work objects
180
- """
181
- # Use batch endpoint if available
182
- try:
183
- data = {"dois": dois}
184
- req_data = json.dumps(data).encode("utf-8")
185
- req = urllib.request.Request(
186
- f"{self.base_url}/works/batch", data=req_data, method="POST"
187
- )
188
- req.add_header("Content-Type", "application/json")
189
- req.add_header("Accept", "application/json")
190
-
191
- with urllib.request.urlopen(req, timeout=self.timeout) as response:
192
- result = json.loads(response.read().decode("utf-8"))
193
-
194
- works = []
195
- for item in result.get("results", []):
196
- work = Work(
197
- doi=item.get("doi", ""),
198
- title=item.get("title", ""),
199
- authors=item.get("authors", []),
200
- year=item.get("year"),
201
- journal=item.get("journal"),
202
- )
203
- works.append(work)
204
- return works
205
- except Exception:
206
- # Fallback to individual lookups
207
- works = []
208
- for doi in dois:
209
- work = self.get(doi)
210
- if work:
211
- works.append(work)
212
- return works
213
-
214
- def exists(self, doi: str) -> bool:
215
- """Check if a DOI exists."""
216
- return self.get(doi) is not None
217
-
218
- def get_citations(self, doi: str, direction: str = "both") -> Dict:
219
- """
220
- Get citations for a paper.
221
-
222
- Args:
223
- doi: Paper DOI
224
- direction: 'citing', 'cited_by', or 'both'
225
-
226
- Returns:
227
- Dict with citation information
228
- """
229
- params = {"doi": doi, "direction": direction}
230
- return self._request("/api/citations/", params) or {}
231
-
232
- def get_journal(
233
- self, issn: Optional[str] = None, name: Optional[str] = None
234
- ) -> Dict:
235
- """
236
- Get journal information.
237
-
238
- Args:
239
- issn: Journal ISSN
240
- name: Journal name
241
-
242
- Returns:
243
- Dict with journal information
244
- """
245
- params = {"issn": issn, "name": name}
246
- return self._request("/api/journal/", params) or {}
247
-
248
-
249
- # Module-level client for convenience
250
- _client: Optional[RemoteClient] = None
251
-
252
-
253
- def get_client(base_url: str = "http://localhost:3333") -> RemoteClient:
254
- """Get or create singleton remote client."""
255
- global _client
256
- if _client is None or _client.base_url != base_url:
257
- _client = RemoteClient(base_url)
258
- return _client
259
-
260
-
261
- def reset_client() -> None:
262
- """Reset singleton client."""
263
- global _client
264
- _client = None
8
+ # EOF