crossref-local 0.3.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. crossref_local/__init__.py +38 -16
  2. crossref_local/__main__.py +0 -0
  3. crossref_local/_aio/__init__.py +30 -0
  4. crossref_local/_aio/_impl.py +238 -0
  5. crossref_local/_cache/__init__.py +15 -0
  6. crossref_local/_cache/export.py +100 -0
  7. crossref_local/_cache/utils.py +93 -0
  8. crossref_local/_cache/viz.py +296 -0
  9. crossref_local/_cli/__init__.py +9 -0
  10. crossref_local/_cli/cache.py +179 -0
  11. crossref_local/_cli/cli.py +512 -0
  12. crossref_local/_cli/completion.py +245 -0
  13. crossref_local/_cli/main.py +20 -0
  14. crossref_local/_cli/mcp.py +351 -0
  15. crossref_local/_cli/mcp_server.py +413 -0
  16. crossref_local/_core/__init__.py +58 -0
  17. crossref_local/{api.py → _core/api.py} +130 -36
  18. crossref_local/{citations.py → _core/citations.py} +55 -26
  19. crossref_local/{config.py → _core/config.py} +57 -42
  20. crossref_local/{db.py → _core/db.py} +32 -26
  21. crossref_local/{fts.py → _core/fts.py} +18 -14
  22. crossref_local/{models.py → _core/models.py} +11 -6
  23. crossref_local/{impact_factor → _impact_factor}/__init__.py +0 -0
  24. crossref_local/{impact_factor → _impact_factor}/calculator.py +0 -0
  25. crossref_local/{impact_factor → _impact_factor}/journal_lookup.py +0 -0
  26. crossref_local/_remote/__init__.py +56 -0
  27. crossref_local/_remote/base.py +356 -0
  28. crossref_local/_remote/collections.py +175 -0
  29. crossref_local/_server/__init__.py +140 -0
  30. crossref_local/_server/middleware.py +25 -0
  31. crossref_local/_server/models.py +129 -0
  32. crossref_local/_server/routes_citations.py +98 -0
  33. crossref_local/_server/routes_collections.py +282 -0
  34. crossref_local/_server/routes_compat.py +102 -0
  35. crossref_local/_server/routes_works.py +128 -0
  36. crossref_local/_server/server.py +19 -0
  37. crossref_local/aio.py +30 -206
  38. crossref_local/cache.py +466 -0
  39. crossref_local/cli.py +5 -447
  40. crossref_local/jobs.py +169 -0
  41. crossref_local/mcp_server.py +5 -199
  42. crossref_local/remote.py +5 -261
  43. crossref_local/server.py +5 -349
  44. {crossref_local-0.3.1.dist-info → crossref_local-0.5.0.dist-info}/METADATA +88 -24
  45. crossref_local-0.5.0.dist-info/RECORD +47 -0
  46. crossref_local-0.3.1.dist-info/RECORD +0 -20
  47. {crossref_local-0.3.1.dist-info → crossref_local-0.5.0.dist-info}/WHEEL +0 -0
  48. {crossref_local-0.3.1.dist-info → crossref_local-0.5.0.dist-info}/entry_points.txt +0 -0
@@ -20,19 +20,19 @@ Async usage:
20
20
  Configuration
21
21
  -------------
22
22
 
23
- Local mode (direct database access):
23
+ DB mode (direct database access):
24
24
  >>> from crossref_local import configure
25
25
  >>> configure("/path/to/crossref.db")
26
26
  Or set CROSSREF_LOCAL_DB environment variable.
27
27
 
28
- Remote mode (API access via HTTP):
29
- >>> from crossref_local import configure_remote
30
- >>> configure_remote("http://localhost:3333")
31
- Or set CROSSREF_LOCAL_API environment variable.
28
+ HTTP mode (API access via HTTP):
29
+ >>> from crossref_local import configure_http
30
+ >>> configure_http("http://localhost:8333")
31
+ Or set CROSSREF_LOCAL_API_URL environment variable.
32
32
 
33
33
  Typical setup with SSH tunnel:
34
- $ ssh -L 3333:127.0.0.1:3333 nas # In terminal
35
- >>> configure_remote() # Uses default localhost:3333
34
+ $ ssh -L 8333:127.0.0.1:8333 your-server # In terminal
35
+ >>> configure_http() # Uses default localhost:8333
36
36
 
37
37
  Public API
38
38
  ----------
@@ -43,6 +43,8 @@ Functions:
43
43
  get(doi) -> Work | None
44
44
  get_many(dois) -> list[Work]
45
45
  exists(doi) -> bool
46
+ enrich(results) -> SearchResult
47
+ enrich_dois(dois) -> list[Work]
46
48
  configure(db_path) -> None
47
49
  configure_remote(api_url) -> None
48
50
  get_mode() -> str
@@ -62,29 +64,41 @@ Modules:
62
64
  aio - Async versions of all API functions
63
65
  """
64
66
 
65
- __version__ = "0.3.1"
67
+ __version__ = "0.5.0"
66
68
 
67
- # Core API (public functions)
68
- from .api import (
69
+ # Core API (from _core package)
70
+ from ._core import (
71
+ # Functions
69
72
  search,
70
73
  count,
71
74
  get,
72
75
  get_many,
73
76
  exists,
77
+ enrich,
78
+ enrich_dois,
74
79
  configure,
80
+ configure_http,
75
81
  configure_remote,
76
82
  get_mode,
77
83
  info,
84
+ # Models
85
+ Work,
86
+ SearchResult,
87
+ # Citations
88
+ get_citing,
89
+ get_cited,
90
+ get_citation_count,
91
+ CitationNetwork,
78
92
  )
79
93
 
80
- # Models (public classes)
81
- from .models import Work, SearchResult
82
-
83
94
  # Async API (public module)
84
95
  from . import aio
85
96
 
86
- # Citation network (public functions and classes)
87
- from .citations import get_citing, get_cited, get_citation_count, CitationNetwork
97
+ # Cache module (public)
98
+ from . import cache
99
+
100
+ # Jobs module (public)
101
+ from . import jobs
88
102
 
89
103
 
90
104
  # Public API - what users should import
@@ -97,9 +111,13 @@ __all__ = [
97
111
  "get",
98
112
  "get_many",
99
113
  "exists",
114
+ # Enrichment (add citations/references to search results)
115
+ "enrich",
116
+ "enrich_dois",
100
117
  # Configuration
101
118
  "configure",
102
- "configure_remote",
119
+ "configure_http",
120
+ "configure_remote", # Backward compatibility alias
103
121
  "get_mode",
104
122
  "info",
105
123
  # Data models
@@ -107,6 +125,10 @@ __all__ = [
107
125
  "SearchResult",
108
126
  # Async API
109
127
  "aio",
128
+ # Cache module
129
+ "cache",
130
+ # Jobs module
131
+ "jobs",
110
132
  # Citation network
111
133
  "get_citing",
112
134
  "get_cited",
File without changes
@@ -0,0 +1,30 @@
1
+ #!/usr/bin/env python3
2
+ """Async API module."""
3
+
4
+ from ._impl import (
5
+ SearchResult,
6
+ Work,
7
+ count,
8
+ count_many,
9
+ exists,
10
+ get,
11
+ get_many,
12
+ info,
13
+ search,
14
+ search_many,
15
+ )
16
+
17
+ __all__ = [
18
+ "search",
19
+ "count",
20
+ "get",
21
+ "get_many",
22
+ "exists",
23
+ "info",
24
+ "search_many",
25
+ "count_many",
26
+ "SearchResult",
27
+ "Work",
28
+ ]
29
+
30
+ # EOF
@@ -0,0 +1,238 @@
1
+ """
2
+ Async API for crossref_local.
3
+
4
+ Provides async versions of all API functions. Uses thread pool execution
5
+ with per-thread database connections for thread safety.
6
+
7
+ Usage:
8
+ from crossref_local import aio
9
+
10
+ async def main():
11
+ results = await aio.search("machine learning")
12
+ work = await aio.get("10.1038/nature12373")
13
+ n = await aio.count("CRISPR")
14
+
15
+ # Or import individual functions
16
+ from crossref_local.aio import search, get, count
17
+
18
+ # Concurrent operations
19
+ counts = await aio.count_many(["CRISPR", "machine learning"])
20
+ """
21
+
22
+ import asyncio as _asyncio
23
+ import threading as _threading
24
+ from typing import List, Optional
25
+
26
+ from .._core.config import Config as _Config
27
+ from .._core.db import Database as _Database
28
+ from .._core.models import SearchResult, Work
29
+
30
+ __all__ = [
31
+ "search",
32
+ "count",
33
+ "get",
34
+ "get_many",
35
+ "exists",
36
+ "info",
37
+ "search_many",
38
+ "count_many",
39
+ # Public types for type hints
40
+ "SearchResult",
41
+ "Work",
42
+ ]
43
+
44
+ # Thread-local storage for database connections
45
+ _thread_local = _threading.local()
46
+
47
+
48
+ def _get_thread_db() -> _Database:
49
+ """Get thread-local database connection."""
50
+ if not hasattr(_thread_local, "db"):
51
+ _thread_local.db = _Database(_Config.get_db_path())
52
+ return _thread_local.db
53
+
54
+
55
+ def _search_sync(query: str, limit: int, offset: int) -> SearchResult:
56
+ """Thread-safe sync search."""
57
+ from .._core import fts
58
+
59
+ db = _get_thread_db()
60
+ return fts._search_with_db(db, query, limit, offset)
61
+
62
+
63
+ def _count_sync(query: str) -> int:
64
+ """Thread-safe sync count."""
65
+ from .._core import fts
66
+
67
+ db = _get_thread_db()
68
+ return fts._count_with_db(db, query)
69
+
70
+
71
+ def _get_sync(doi: str) -> Optional[Work]:
72
+ """Thread-safe sync get."""
73
+ db = _get_thread_db()
74
+ metadata = db.get_metadata(doi)
75
+ if metadata:
76
+ return Work.from_metadata(doi, metadata)
77
+ return None
78
+
79
+
80
+ def _get_many_sync(dois: List[str]) -> List[Work]:
81
+ """Thread-safe sync get_many."""
82
+ db = _get_thread_db()
83
+ works = []
84
+ for doi in dois:
85
+ metadata = db.get_metadata(doi)
86
+ if metadata:
87
+ works.append(Work.from_metadata(doi, metadata))
88
+ return works
89
+
90
+
91
+ def _exists_sync(doi: str) -> bool:
92
+ """Thread-safe sync exists."""
93
+ db = _get_thread_db()
94
+ row = db.fetchone("SELECT 1 FROM works WHERE doi = ?", (doi,))
95
+ return row is not None
96
+
97
+
98
+ def _info_sync() -> dict:
99
+ """Thread-safe sync info."""
100
+ db = _get_thread_db()
101
+
102
+ row = db.fetchone("SELECT COUNT(*) as count FROM works")
103
+ work_count = row["count"] if row else 0
104
+
105
+ try:
106
+ row = db.fetchone("SELECT COUNT(*) as count FROM works_fts")
107
+ fts_count = row["count"] if row else 0
108
+ except Exception:
109
+ fts_count = 0
110
+
111
+ try:
112
+ row = db.fetchone("SELECT COUNT(*) as count FROM citations")
113
+ citation_count = row["count"] if row else 0
114
+ except Exception:
115
+ citation_count = 0
116
+
117
+ return {
118
+ "db_path": str(_Config.get_db_path()),
119
+ "works": work_count,
120
+ "fts_indexed": fts_count,
121
+ "citations": citation_count,
122
+ }
123
+
124
+
125
+ async def search(
126
+ query: str,
127
+ limit: int = 10,
128
+ offset: int = 0,
129
+ ) -> SearchResult:
130
+ """
131
+ Async full-text search across works.
132
+
133
+ Args:
134
+ query: Search query (supports FTS5 syntax)
135
+ limit: Maximum results to return
136
+ offset: Skip first N results (for pagination)
137
+
138
+ Returns:
139
+ SearchResult with matching works
140
+ """
141
+ return await _asyncio.to_thread(_search_sync, query, limit, offset)
142
+
143
+
144
+ async def count(query: str) -> int:
145
+ """
146
+ Async count matching works without fetching results.
147
+
148
+ Args:
149
+ query: FTS5 search query
150
+
151
+ Returns:
152
+ Number of matching works
153
+ """
154
+ return await _asyncio.to_thread(_count_sync, query)
155
+
156
+
157
+ async def get(doi: str) -> Optional[Work]:
158
+ """
159
+ Async get a work by DOI.
160
+
161
+ Args:
162
+ doi: Digital Object Identifier
163
+
164
+ Returns:
165
+ Work object or None if not found
166
+ """
167
+ return await _asyncio.to_thread(_get_sync, doi)
168
+
169
+
170
+ async def get_many(dois: List[str]) -> List[Work]:
171
+ """
172
+ Async get multiple works by DOI.
173
+
174
+ Args:
175
+ dois: List of DOIs
176
+
177
+ Returns:
178
+ List of Work objects (missing DOIs are skipped)
179
+ """
180
+ return await _asyncio.to_thread(_get_many_sync, dois)
181
+
182
+
183
+ async def exists(doi: str) -> bool:
184
+ """
185
+ Async check if a DOI exists in the database.
186
+
187
+ Args:
188
+ doi: Digital Object Identifier
189
+
190
+ Returns:
191
+ True if DOI exists
192
+ """
193
+ return await _asyncio.to_thread(_exists_sync, doi)
194
+
195
+
196
+ async def info() -> dict:
197
+ """
198
+ Async get database information.
199
+
200
+ Returns:
201
+ Dictionary with database stats
202
+ """
203
+ return await _asyncio.to_thread(_info_sync)
204
+
205
+
206
+ async def search_many(queries: List[str], limit: int = 10) -> List[SearchResult]:
207
+ """
208
+ Run multiple searches concurrently.
209
+
210
+ Args:
211
+ queries: List of search queries
212
+ limit: Maximum results per query
213
+
214
+ Returns:
215
+ List of SearchResult objects
216
+ """
217
+ tasks = [search(q, limit=limit) for q in queries]
218
+ return await _asyncio.gather(*tasks)
219
+
220
+
221
+ async def count_many(queries: List[str]) -> dict:
222
+ """
223
+ Count matches for multiple queries concurrently.
224
+
225
+ Args:
226
+ queries: List of search queries
227
+
228
+ Returns:
229
+ Dict mapping query -> count
230
+
231
+ Example:
232
+ >>> counts = await count_many(["CRISPR", "machine learning"])
233
+ >>> print(counts)
234
+ {'CRISPR': 45000, 'machine learning': 477922}
235
+ """
236
+ tasks = [count(q) for q in queries]
237
+ results = await _asyncio.gather(*tasks)
238
+ return dict(zip(queries, results))
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env python3
2
+ """Internal cache helper modules."""
3
+
4
+ from .export import export
5
+ from .utils import cache_path, get_cache_dir, meta_path, sanitize_name
6
+
7
+ __all__ = [
8
+ "export",
9
+ "cache_path",
10
+ "get_cache_dir",
11
+ "meta_path",
12
+ "sanitize_name",
13
+ ]
14
+
15
+ # EOF
@@ -0,0 +1,100 @@
1
+ """Export functionality for cache module."""
2
+
3
+ import csv as _csv
4
+ import json as _json
5
+ from pathlib import Path as _Path
6
+ from typing import List, Optional
7
+
8
+ from .utils import sanitize_name as _sanitize_name
9
+
10
+ __all__ = [
11
+ "export",
12
+ ]
13
+
14
+
15
+ def _load_cache(name: str, user_id: Optional[str] = None):
16
+ """Load cache data (lazy import to avoid circular dependency)."""
17
+ from ..cache import load
18
+
19
+ return load(name, user_id=user_id)
20
+
21
+
22
+ def export(
23
+ name: str,
24
+ output_path: str,
25
+ format: str = "json",
26
+ fields: Optional[List[str]] = None,
27
+ user_id: Optional[str] = None,
28
+ ) -> str:
29
+ """Export cache to file.
30
+
31
+ Args:
32
+ name: Cache name
33
+ output_path: Output file path
34
+ format: Export format (json, csv, bibtex, dois)
35
+ fields: Fields to include (for json/csv)
36
+ user_id: Optional user ID for multi-tenant scoping
37
+
38
+ Returns:
39
+ Output file path
40
+
41
+ Raises:
42
+ ValueError: If cache name contains invalid characters
43
+ """
44
+ # Validate cache name
45
+ _sanitize_name(name)
46
+ papers = _load_cache(name, user_id=user_id)
47
+ output = _Path(output_path)
48
+
49
+ if format == "json":
50
+ if fields:
51
+ papers = [{k: p.get(k) for k in fields} for p in papers]
52
+ with open(output, "w") as f:
53
+ _json.dump(papers, f, indent=2)
54
+
55
+ elif format == "csv":
56
+ if fields is None:
57
+ fields = ["doi", "title", "authors", "year", "journal"]
58
+ with open(output, "w", newline="") as f:
59
+ writer = _csv.DictWriter(f, fieldnames=fields, extrasaction="ignore")
60
+ writer.writeheader()
61
+ for p in papers:
62
+ row = dict(p)
63
+ if "authors" in row and isinstance(row["authors"], list):
64
+ row["authors"] = "; ".join(row["authors"])
65
+ writer.writerow(row)
66
+
67
+ elif format == "bibtex":
68
+ lines = []
69
+ for p in papers:
70
+ doi = p.get("doi", "").replace("/", "_").replace(".", "_")
71
+ entry = f"@article{{{doi},\n"
72
+ if p.get("title"):
73
+ entry += f" title = {{{p['title']}}},\n"
74
+ if p.get("authors"):
75
+ authors = (
76
+ " and ".join(p["authors"])
77
+ if isinstance(p["authors"], list)
78
+ else p["authors"]
79
+ )
80
+ entry += f" author = {{{authors}}},\n"
81
+ if p.get("year"):
82
+ entry += f" year = {{{p['year']}}},\n"
83
+ if p.get("journal"):
84
+ entry += f" journal = {{{p['journal']}}},\n"
85
+ if p.get("doi"):
86
+ entry += f" doi = {{{p['doi']}}},\n"
87
+ entry += "}\n"
88
+ lines.append(entry)
89
+ with open(output, "w") as f:
90
+ f.write("\n".join(lines))
91
+
92
+ elif format == "dois":
93
+ dois = [p["doi"] for p in papers if p.get("doi")]
94
+ with open(output, "w") as f:
95
+ f.write("\n".join(dois))
96
+
97
+ else:
98
+ raise ValueError(f"Unknown format: {format}")
99
+
100
+ return str(output)
@@ -0,0 +1,93 @@
1
+ """Cache utility functions for crossref-local.
2
+
3
+ Provides path handling and validation utilities for the cache module.
4
+ """
5
+
6
+ import os as _os
7
+ import re as _re
8
+ from pathlib import Path as _Path
9
+ from typing import Optional
10
+
11
+ __all__ = [
12
+ "sanitize_name",
13
+ "get_cache_dir",
14
+ "cache_path",
15
+ "meta_path",
16
+ ]
17
+
18
+
19
+ # Valid cache name pattern: alphanumeric, underscores, hyphens only
20
+ _CACHE_NAME_PATTERN = _re.compile(r"^[a-zA-Z0-9_-]+$")
21
+
22
+
23
+ def sanitize_name(name: str) -> str:
24
+ """Sanitize cache name to prevent path traversal.
25
+
26
+ Args:
27
+ name: Cache name to sanitize
28
+
29
+ Returns:
30
+ Sanitized name
31
+
32
+ Raises:
33
+ ValueError: If name contains invalid characters
34
+ """
35
+ if not name:
36
+ raise ValueError("Cache name cannot be empty")
37
+ if not _CACHE_NAME_PATTERN.match(name):
38
+ raise ValueError(
39
+ f"Invalid cache name '{name}': only alphanumeric, underscores, and hyphens allowed"
40
+ )
41
+ if len(name) > 64:
42
+ raise ValueError(f"Cache name too long: {len(name)} chars (max 64)")
43
+ return name
44
+
45
+
46
+ def get_cache_dir(user_id: Optional[str] = None) -> _Path:
47
+ """Get cache directory, creating if needed.
48
+
49
+ Args:
50
+ user_id: Optional user ID for multi-tenant scoping.
51
+ If provided, creates a user-specific subdirectory.
52
+ """
53
+ cache_dir = _Path(
54
+ _os.environ.get(
55
+ "CROSSREF_LOCAL_CACHE_DIR", _Path.home() / ".cache" / "crossref-local"
56
+ )
57
+ )
58
+ # Add user subdirectory for multi-tenant support
59
+ if user_id:
60
+ # Sanitize user_id as well
61
+ safe_user_id = _re.sub(r"[^a-zA-Z0-9_-]", "", user_id[:16])
62
+ if safe_user_id:
63
+ cache_dir = cache_dir / safe_user_id
64
+ cache_dir.mkdir(parents=True, exist_ok=True)
65
+ return cache_dir
66
+
67
+
68
+ def cache_path(name: str, user_id: Optional[str] = None) -> _Path:
69
+ """Get path for a named cache.
70
+
71
+ Args:
72
+ name: Cache name (will be sanitized)
73
+ user_id: Optional user ID for multi-tenant scoping
74
+
75
+ Returns:
76
+ Path to cache file
77
+ """
78
+ safe_name = sanitize_name(name)
79
+ return get_cache_dir(user_id) / f"{safe_name}.json"
80
+
81
+
82
+ def meta_path(name: str, user_id: Optional[str] = None) -> _Path:
83
+ """Get path for cache metadata.
84
+
85
+ Args:
86
+ name: Cache name (will be sanitized)
87
+ user_id: Optional user ID for multi-tenant scoping
88
+
89
+ Returns:
90
+ Path to metadata file
91
+ """
92
+ safe_name = sanitize_name(name)
93
+ return get_cache_dir(user_id) / f"{safe_name}.meta.json"