crossref-local 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. crossref_local/__init__.py +18 -10
  2. crossref_local/_aio/__init__.py +30 -0
  3. crossref_local/_aio/_impl.py +238 -0
  4. crossref_local/_cache/__init__.py +15 -0
  5. crossref_local/{cache_export.py → _cache/export.py} +27 -10
  6. crossref_local/_cache/utils.py +93 -0
  7. crossref_local/_cli/__init__.py +9 -0
  8. crossref_local/_cli/cli.py +512 -0
  9. crossref_local/_cli/mcp.py +351 -0
  10. crossref_local/_cli/mcp_server.py +413 -0
  11. crossref_local/_core/__init__.py +58 -0
  12. crossref_local/{api.py → _core/api.py} +24 -5
  13. crossref_local/{citations.py → _core/citations.py} +55 -26
  14. crossref_local/{config.py → _core/config.py} +40 -22
  15. crossref_local/{db.py → _core/db.py} +32 -26
  16. crossref_local/{fts.py → _core/fts.py} +18 -14
  17. crossref_local/{models.py → _core/models.py} +11 -6
  18. crossref_local/_remote/__init__.py +56 -0
  19. crossref_local/_remote/base.py +356 -0
  20. crossref_local/_remote/collections.py +175 -0
  21. crossref_local/_server/__init__.py +140 -0
  22. crossref_local/_server/middleware.py +25 -0
  23. crossref_local/_server/models.py +129 -0
  24. crossref_local/_server/routes_citations.py +98 -0
  25. crossref_local/_server/routes_collections.py +282 -0
  26. crossref_local/_server/routes_compat.py +102 -0
  27. crossref_local/_server/routes_works.py +128 -0
  28. crossref_local/_server/server.py +19 -0
  29. crossref_local/aio.py +30 -206
  30. crossref_local/cache.py +100 -100
  31. crossref_local/cli.py +5 -515
  32. crossref_local/jobs.py +169 -0
  33. crossref_local/mcp_server.py +5 -410
  34. crossref_local/remote.py +5 -266
  35. crossref_local/server.py +5 -349
  36. {crossref_local-0.4.0.dist-info → crossref_local-0.5.0.dist-info}/METADATA +36 -11
  37. crossref_local-0.5.0.dist-info/RECORD +47 -0
  38. {crossref_local-0.4.0.dist-info → crossref_local-0.5.0.dist-info}/entry_points.txt +1 -1
  39. crossref_local/cli_mcp.py +0 -275
  40. crossref_local-0.4.0.dist-info/RECORD +0 -27
  41. /crossref_local/{cache_viz.py → _cache/viz.py} +0 -0
  42. /crossref_local/{cli_cache.py → _cli/cache.py} +0 -0
  43. /crossref_local/{cli_completion.py → _cli/completion.py} +0 -0
  44. /crossref_local/{cli_main.py → _cli/main.py} +0 -0
  45. /crossref_local/{impact_factor → _impact_factor}/__init__.py +0 -0
  46. /crossref_local/{impact_factor → _impact_factor}/calculator.py +0 -0
  47. /crossref_local/{impact_factor → _impact_factor}/journal_lookup.py +0 -0
  48. {crossref_local-0.4.0.dist-info → crossref_local-0.5.0.dist-info}/WHEEL +0 -0
@@ -64,10 +64,11 @@ Modules:
64
64
  aio - Async versions of all API functions
65
65
  """
66
66
 
67
- __version__ = "0.3.1"
67
+ __version__ = "0.5.0"
68
68
 
69
- # Core API (public functions)
70
- from .api import (
69
+ # Core API (from _core package)
70
+ from ._core import (
71
+ # Functions
71
72
  search,
72
73
  count,
73
74
  get,
@@ -77,23 +78,28 @@ from .api import (
77
78
  enrich_dois,
78
79
  configure,
79
80
  configure_http,
80
- configure_remote, # Backward compatibility alias
81
+ configure_remote,
81
82
  get_mode,
82
83
  info,
84
+ # Models
85
+ Work,
86
+ SearchResult,
87
+ # Citations
88
+ get_citing,
89
+ get_cited,
90
+ get_citation_count,
91
+ CitationNetwork,
83
92
  )
84
93
 
85
- # Models (public classes)
86
- from .models import Work, SearchResult
87
-
88
94
  # Async API (public module)
89
95
  from . import aio
90
96
 
91
- # Citation network (public functions and classes)
92
- from .citations import get_citing, get_cited, get_citation_count, CitationNetwork
93
-
94
97
  # Cache module (public)
95
98
  from . import cache
96
99
 
100
+ # Jobs module (public)
101
+ from . import jobs
102
+
97
103
 
98
104
  # Public API - what users should import
99
105
  __all__ = [
@@ -121,6 +127,8 @@ __all__ = [
121
127
  "aio",
122
128
  # Cache module
123
129
  "cache",
130
+ # Jobs module
131
+ "jobs",
124
132
  # Citation network
125
133
  "get_citing",
126
134
  "get_cited",
@@ -0,0 +1,30 @@
1
+ #!/usr/bin/env python3
2
+ """Async API module."""
3
+
4
+ from ._impl import (
5
+ SearchResult,
6
+ Work,
7
+ count,
8
+ count_many,
9
+ exists,
10
+ get,
11
+ get_many,
12
+ info,
13
+ search,
14
+ search_many,
15
+ )
16
+
17
+ __all__ = [
18
+ "search",
19
+ "count",
20
+ "get",
21
+ "get_many",
22
+ "exists",
23
+ "info",
24
+ "search_many",
25
+ "count_many",
26
+ "SearchResult",
27
+ "Work",
28
+ ]
29
+
30
+ # EOF
@@ -0,0 +1,238 @@
1
+ """
2
+ Async API for crossref_local.
3
+
4
+ Provides async versions of all API functions. Uses thread pool execution
5
+ with per-thread database connections for thread safety.
6
+
7
+ Usage:
8
+ from crossref_local import aio
9
+
10
+ async def main():
11
+ results = await aio.search("machine learning")
12
+ work = await aio.get("10.1038/nature12373")
13
+ n = await aio.count("CRISPR")
14
+
15
+ # Or import individual functions
16
+ from crossref_local.aio import search, get, count
17
+
18
+ # Concurrent operations
19
+ counts = await aio.count_many(["CRISPR", "machine learning"])
20
+ """
21
+
22
+ import asyncio as _asyncio
23
+ import threading as _threading
24
+ from typing import List, Optional
25
+
26
+ from .._core.config import Config as _Config
27
+ from .._core.db import Database as _Database
28
+ from .._core.models import SearchResult, Work
29
+
30
+ __all__ = [
31
+ "search",
32
+ "count",
33
+ "get",
34
+ "get_many",
35
+ "exists",
36
+ "info",
37
+ "search_many",
38
+ "count_many",
39
+ # Public types for type hints
40
+ "SearchResult",
41
+ "Work",
42
+ ]
43
+
44
+ # Thread-local storage for database connections
45
+ _thread_local = _threading.local()
46
+
47
+
48
+ def _get_thread_db() -> _Database:
49
+ """Get thread-local database connection."""
50
+ if not hasattr(_thread_local, "db"):
51
+ _thread_local.db = _Database(_Config.get_db_path())
52
+ return _thread_local.db
53
+
54
+
55
+ def _search_sync(query: str, limit: int, offset: int) -> SearchResult:
56
+ """Thread-safe sync search."""
57
+ from .._core import fts
58
+
59
+ db = _get_thread_db()
60
+ return fts._search_with_db(db, query, limit, offset)
61
+
62
+
63
+ def _count_sync(query: str) -> int:
64
+ """Thread-safe sync count."""
65
+ from .._core import fts
66
+
67
+ db = _get_thread_db()
68
+ return fts._count_with_db(db, query)
69
+
70
+
71
+ def _get_sync(doi: str) -> Optional[Work]:
72
+ """Thread-safe sync get."""
73
+ db = _get_thread_db()
74
+ metadata = db.get_metadata(doi)
75
+ if metadata:
76
+ return Work.from_metadata(doi, metadata)
77
+ return None
78
+
79
+
80
+ def _get_many_sync(dois: List[str]) -> List[Work]:
81
+ """Thread-safe sync get_many."""
82
+ db = _get_thread_db()
83
+ works = []
84
+ for doi in dois:
85
+ metadata = db.get_metadata(doi)
86
+ if metadata:
87
+ works.append(Work.from_metadata(doi, metadata))
88
+ return works
89
+
90
+
91
+ def _exists_sync(doi: str) -> bool:
92
+ """Thread-safe sync exists."""
93
+ db = _get_thread_db()
94
+ row = db.fetchone("SELECT 1 FROM works WHERE doi = ?", (doi,))
95
+ return row is not None
96
+
97
+
98
+ def _info_sync() -> dict:
99
+ """Thread-safe sync info."""
100
+ db = _get_thread_db()
101
+
102
+ row = db.fetchone("SELECT COUNT(*) as count FROM works")
103
+ work_count = row["count"] if row else 0
104
+
105
+ try:
106
+ row = db.fetchone("SELECT COUNT(*) as count FROM works_fts")
107
+ fts_count = row["count"] if row else 0
108
+ except Exception:
109
+ fts_count = 0
110
+
111
+ try:
112
+ row = db.fetchone("SELECT COUNT(*) as count FROM citations")
113
+ citation_count = row["count"] if row else 0
114
+ except Exception:
115
+ citation_count = 0
116
+
117
+ return {
118
+ "db_path": str(_Config.get_db_path()),
119
+ "works": work_count,
120
+ "fts_indexed": fts_count,
121
+ "citations": citation_count,
122
+ }
123
+
124
+
125
+ async def search(
126
+ query: str,
127
+ limit: int = 10,
128
+ offset: int = 0,
129
+ ) -> SearchResult:
130
+ """
131
+ Async full-text search across works.
132
+
133
+ Args:
134
+ query: Search query (supports FTS5 syntax)
135
+ limit: Maximum results to return
136
+ offset: Skip first N results (for pagination)
137
+
138
+ Returns:
139
+ SearchResult with matching works
140
+ """
141
+ return await _asyncio.to_thread(_search_sync, query, limit, offset)
142
+
143
+
144
+ async def count(query: str) -> int:
145
+ """
146
+ Async count matching works without fetching results.
147
+
148
+ Args:
149
+ query: FTS5 search query
150
+
151
+ Returns:
152
+ Number of matching works
153
+ """
154
+ return await _asyncio.to_thread(_count_sync, query)
155
+
156
+
157
+ async def get(doi: str) -> Optional[Work]:
158
+ """
159
+ Async get a work by DOI.
160
+
161
+ Args:
162
+ doi: Digital Object Identifier
163
+
164
+ Returns:
165
+ Work object or None if not found
166
+ """
167
+ return await _asyncio.to_thread(_get_sync, doi)
168
+
169
+
170
+ async def get_many(dois: List[str]) -> List[Work]:
171
+ """
172
+ Async get multiple works by DOI.
173
+
174
+ Args:
175
+ dois: List of DOIs
176
+
177
+ Returns:
178
+ List of Work objects (missing DOIs are skipped)
179
+ """
180
+ return await _asyncio.to_thread(_get_many_sync, dois)
181
+
182
+
183
+ async def exists(doi: str) -> bool:
184
+ """
185
+ Async check if a DOI exists in the database.
186
+
187
+ Args:
188
+ doi: Digital Object Identifier
189
+
190
+ Returns:
191
+ True if DOI exists
192
+ """
193
+ return await _asyncio.to_thread(_exists_sync, doi)
194
+
195
+
196
+ async def info() -> dict:
197
+ """
198
+ Async get database information.
199
+
200
+ Returns:
201
+ Dictionary with database stats
202
+ """
203
+ return await _asyncio.to_thread(_info_sync)
204
+
205
+
206
+ async def search_many(queries: List[str], limit: int = 10) -> List[SearchResult]:
207
+ """
208
+ Run multiple searches concurrently.
209
+
210
+ Args:
211
+ queries: List of search queries
212
+ limit: Maximum results per query
213
+
214
+ Returns:
215
+ List of SearchResult objects
216
+ """
217
+ tasks = [search(q, limit=limit) for q in queries]
218
+ return await _asyncio.gather(*tasks)
219
+
220
+
221
+ async def count_many(queries: List[str]) -> dict:
222
+ """
223
+ Count matches for multiple queries concurrently.
224
+
225
+ Args:
226
+ queries: List of search queries
227
+
228
+ Returns:
229
+ Dict mapping query -> count
230
+
231
+ Example:
232
+ >>> counts = await count_many(["CRISPR", "machine learning"])
233
+ >>> print(counts)
234
+ {'CRISPR': 45000, 'machine learning': 477922}
235
+ """
236
+ tasks = [count(q) for q in queries]
237
+ results = await _asyncio.gather(*tasks)
238
+ return dict(zip(queries, results))
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env python3
2
+ """Internal cache helper modules."""
3
+
4
+ from .export import export
5
+ from .utils import cache_path, get_cache_dir, meta_path, sanitize_name
6
+
7
+ __all__ = [
8
+ "export",
9
+ "cache_path",
10
+ "get_cache_dir",
11
+ "meta_path",
12
+ "sanitize_name",
13
+ ]
14
+
15
+ # EOF
@@ -1,10 +1,22 @@
1
1
  """Export functionality for cache module."""
2
2
 
3
- import json
4
- from pathlib import Path
5
- from typing import Any, Dict, List, Optional
3
+ import csv as _csv
4
+ import json as _json
5
+ from pathlib import Path as _Path
6
+ from typing import List, Optional
6
7
 
7
- from .cache import load
8
+ from .utils import sanitize_name as _sanitize_name
9
+
10
+ __all__ = [
11
+ "export",
12
+ ]
13
+
14
+
15
+ def _load_cache(name: str, user_id: Optional[str] = None):
16
+ """Load cache data (lazy import to avoid circular dependency)."""
17
+ from ..cache import load
18
+
19
+ return load(name, user_id=user_id)
8
20
 
9
21
 
10
22
  def export(
@@ -12,6 +24,7 @@ def export(
12
24
  output_path: str,
13
25
  format: str = "json",
14
26
  fields: Optional[List[str]] = None,
27
+ user_id: Optional[str] = None,
15
28
  ) -> str:
16
29
  """Export cache to file.
17
30
 
@@ -20,26 +33,30 @@ def export(
20
33
  output_path: Output file path
21
34
  format: Export format (json, csv, bibtex, dois)
22
35
  fields: Fields to include (for json/csv)
36
+ user_id: Optional user ID for multi-tenant scoping
23
37
 
24
38
  Returns:
25
39
  Output file path
40
+
41
+ Raises:
42
+ ValueError: If cache name contains invalid characters
26
43
  """
27
- papers = load(name)
28
- output = Path(output_path)
44
+ # Validate cache name
45
+ _sanitize_name(name)
46
+ papers = _load_cache(name, user_id=user_id)
47
+ output = _Path(output_path)
29
48
 
30
49
  if format == "json":
31
50
  if fields:
32
51
  papers = [{k: p.get(k) for k in fields} for p in papers]
33
52
  with open(output, "w") as f:
34
- json.dump(papers, f, indent=2)
53
+ _json.dump(papers, f, indent=2)
35
54
 
36
55
  elif format == "csv":
37
- import csv
38
-
39
56
  if fields is None:
40
57
  fields = ["doi", "title", "authors", "year", "journal"]
41
58
  with open(output, "w", newline="") as f:
42
- writer = csv.DictWriter(f, fieldnames=fields, extrasaction="ignore")
59
+ writer = _csv.DictWriter(f, fieldnames=fields, extrasaction="ignore")
43
60
  writer.writeheader()
44
61
  for p in papers:
45
62
  row = dict(p)
@@ -0,0 +1,93 @@
1
+ """Cache utility functions for crossref-local.
2
+
3
+ Provides path handling and validation utilities for the cache module.
4
+ """
5
+
6
+ import os as _os
7
+ import re as _re
8
+ from pathlib import Path as _Path
9
+ from typing import Optional
10
+
11
+ __all__ = [
12
+ "sanitize_name",
13
+ "get_cache_dir",
14
+ "cache_path",
15
+ "meta_path",
16
+ ]
17
+
18
+
19
+ # Valid cache name pattern: alphanumeric, underscores, hyphens only
20
+ _CACHE_NAME_PATTERN = _re.compile(r"^[a-zA-Z0-9_-]+$")
21
+
22
+
23
+ def sanitize_name(name: str) -> str:
24
+ """Sanitize cache name to prevent path traversal.
25
+
26
+ Args:
27
+ name: Cache name to sanitize
28
+
29
+ Returns:
30
+ Sanitized name
31
+
32
+ Raises:
33
+ ValueError: If name contains invalid characters
34
+ """
35
+ if not name:
36
+ raise ValueError("Cache name cannot be empty")
37
+ if not _CACHE_NAME_PATTERN.match(name):
38
+ raise ValueError(
39
+ f"Invalid cache name '{name}': only alphanumeric, underscores, and hyphens allowed"
40
+ )
41
+ if len(name) > 64:
42
+ raise ValueError(f"Cache name too long: {len(name)} chars (max 64)")
43
+ return name
44
+
45
+
46
+ def get_cache_dir(user_id: Optional[str] = None) -> _Path:
47
+ """Get cache directory, creating if needed.
48
+
49
+ Args:
50
+ user_id: Optional user ID for multi-tenant scoping.
51
+ If provided, creates a user-specific subdirectory.
52
+ """
53
+ cache_dir = _Path(
54
+ _os.environ.get(
55
+ "CROSSREF_LOCAL_CACHE_DIR", _Path.home() / ".cache" / "crossref-local"
56
+ )
57
+ )
58
+ # Add user subdirectory for multi-tenant support
59
+ if user_id:
60
+ # Sanitize user_id as well
61
+ safe_user_id = _re.sub(r"[^a-zA-Z0-9_-]", "", user_id[:16])
62
+ if safe_user_id:
63
+ cache_dir = cache_dir / safe_user_id
64
+ cache_dir.mkdir(parents=True, exist_ok=True)
65
+ return cache_dir
66
+
67
+
68
+ def cache_path(name: str, user_id: Optional[str] = None) -> _Path:
69
+ """Get path for a named cache.
70
+
71
+ Args:
72
+ name: Cache name (will be sanitized)
73
+ user_id: Optional user ID for multi-tenant scoping
74
+
75
+ Returns:
76
+ Path to cache file
77
+ """
78
+ safe_name = sanitize_name(name)
79
+ return get_cache_dir(user_id) / f"{safe_name}.json"
80
+
81
+
82
+ def meta_path(name: str, user_id: Optional[str] = None) -> _Path:
83
+ """Get path for cache metadata.
84
+
85
+ Args:
86
+ name: Cache name (will be sanitized)
87
+ user_id: Optional user ID for multi-tenant scoping
88
+
89
+ Returns:
90
+ Path to metadata file
91
+ """
92
+ safe_name = sanitize_name(name)
93
+ return get_cache_dir(user_id) / f"{safe_name}.meta.json"
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env python3
2
+ """Internal CLI modules."""
3
+
4
+ from .cli import cli, main
5
+ from .mcp import mcp, run_mcp_server
6
+
7
+ __all__ = ["cli", "main", "mcp", "run_mcp_server"]
8
+
9
+ # EOF