crossref-local 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,9 @@ crossref_local - Local CrossRef database with full-text search.
4
4
  A Python package for querying a local mirror of the CrossRef database
5
5
  with 167M+ scholarly works, full-text search, and impact factor calculation.
6
6
 
7
+ Quick Start
8
+ -----------
9
+
7
10
  Sync usage:
8
11
  >>> from crossref_local import search, get
9
12
  >>> results = search("hippocampal sharp wave ripples")
@@ -14,65 +17,126 @@ Async usage:
14
17
  >>> results = await aio.search("machine learning")
15
18
  >>> counts = await aio.count_many(["CRISPR", "neural network"])
16
19
 
17
- Configuration:
20
+ Configuration
21
+ -------------
22
+
23
+ DB mode (direct database access):
18
24
  >>> from crossref_local import configure
19
25
  >>> configure("/path/to/crossref.db")
20
-
21
26
  Or set CROSSREF_LOCAL_DB environment variable.
27
+
28
+ HTTP mode (API access via HTTP):
29
+ >>> from crossref_local import configure_http
30
+ >>> configure_http("http://localhost:8333")
31
+ Or set CROSSREF_LOCAL_API_URL environment variable.
32
+
33
+ Typical setup with SSH tunnel:
34
+ $ ssh -L 8333:127.0.0.1:8333 your-server # In terminal
35
+ >>> configure_http() # Uses default localhost:8333
36
+
37
+ Public API
38
+ ----------
39
+
40
+ Functions:
41
+ search(query, limit, offset) -> SearchResult
42
+ count(query) -> int
43
+ get(doi) -> Work | None
44
+ get_many(dois) -> list[Work]
45
+ exists(doi) -> bool
46
+ enrich(results) -> SearchResult
47
+ enrich_dois(dois) -> list[Work]
48
+ configure(db_path) -> None
49
+ configure_remote(api_url) -> None
50
+ get_mode() -> str
51
+ info() -> dict
52
+
53
+ Citation functions:
54
+ get_citing(doi) -> list[str]
55
+ get_cited(doi) -> list[str]
56
+ get_citation_count(doi) -> int
57
+
58
+ Classes:
59
+ Work - Scholarly work with title, authors, DOI, etc.
60
+ SearchResult - Container for search results
61
+ CitationNetwork - Citation graph builder and visualizer
62
+
63
+ Modules:
64
+ aio - Async versions of all API functions
22
65
  """
23
66
 
24
- __version__ = "0.3.0"
67
+ __version__ = "0.3.1"
25
68
 
26
- # Core API
69
+ # Core API (public functions)
27
70
  from .api import (
28
71
  search,
29
72
  count,
30
73
  get,
31
74
  get_many,
32
75
  exists,
76
+ enrich,
77
+ enrich_dois,
33
78
  configure,
79
+ configure_http,
80
+ configure_remote, # Backward compatibility alias
81
+ get_mode,
34
82
  info,
35
83
  )
36
84
 
37
- # Models
85
+ # Models (public classes)
38
86
  from .models import Work, SearchResult
39
87
 
40
- # Database utilities
41
- from .db import Database, connection
42
-
43
- # Configuration
44
- from .config import Config
45
-
46
- # Async API
88
+ # Async API (public module)
47
89
  from . import aio
48
90
 
49
- # Citation network
91
+ # Citation network (public functions and classes)
50
92
  from .citations import get_citing, get_cited, get_citation_count, CitationNetwork
51
93
 
94
+ # Cache module (public)
95
+ from . import cache
96
+
97
+
98
+ # Public API - what users should import
52
99
  __all__ = [
53
100
  # Version
54
101
  "__version__",
55
- # Core API
102
+ # Core search/retrieval
56
103
  "search",
57
104
  "count",
58
105
  "get",
59
106
  "get_many",
60
107
  "exists",
108
+ # Enrichment (add citations/references to search results)
109
+ "enrich",
110
+ "enrich_dois",
111
+ # Configuration
61
112
  "configure",
113
+ "configure_http",
114
+ "configure_remote", # Backward compatibility alias
115
+ "get_mode",
62
116
  "info",
63
- # Models
117
+ # Data models
64
118
  "Work",
65
119
  "SearchResult",
66
- # Database
67
- "Database",
68
- "connection",
69
- # Config
70
- "Config",
71
- # Async
120
+ # Async API
72
121
  "aio",
73
- # Citations
122
+ # Cache module
123
+ "cache",
124
+ # Citation network
74
125
  "get_citing",
75
126
  "get_cited",
76
127
  "get_citation_count",
77
128
  "CitationNetwork",
78
129
  ]
130
+
131
+
132
+ # ============================================================================
133
+ # Advanced / Internal APIs (not in __all__, but importable if needed)
134
+ # ============================================================================
135
+ # These are exposed for advanced users but not part of the stable public API.
136
+ # Use at your own risk - they may change without notice.
137
+ #
138
+ # from crossref_local.db import Database, connection
139
+ # from crossref_local.config import Config
140
+ # from crossref_local.remote import RemoteClient
141
+ # from crossref_local.fts import search_dois
142
+ # ============================================================================
@@ -0,0 +1,6 @@
1
+ """Entry point for python -m crossref_local."""
2
+
3
+ from .cli import main
4
+
5
+ if __name__ == "__main__":
6
+ main()
crossref_local/aio.py CHANGED
File without changes
crossref_local/api.py CHANGED
@@ -1,13 +1,30 @@
1
- """Main API for crossref_local."""
1
+ """Main API for crossref_local.
2
+
3
+ Supports two modes:
4
+ - db: Direct database access (requires database file)
5
+ - http: HTTP API access (requires API server)
6
+
7
+ Mode is auto-detected or can be set explicitly via:
8
+ - CROSSREF_LOCAL_MODE environment variable ("db" or "http")
9
+ - CROSSREF_LOCAL_API_URL environment variable (API URL)
10
+ - configure() or configure_http() functions
11
+ """
2
12
 
3
13
  from typing import List, Optional
4
14
 
5
15
  from .config import Config
6
- from .db import Database, get_db, close_db, connection
16
+ from .db import get_db, close_db
7
17
  from .models import Work, SearchResult
8
18
  from . import fts
9
19
 
10
20
 
21
+ def _get_http_client():
22
+ """Get HTTP client (lazy import to avoid circular dependency)."""
23
+ from .remote import RemoteClient
24
+
25
+ return RemoteClient(Config.get_api_url())
26
+
27
+
11
28
  def search(
12
29
  query: str,
13
30
  limit: int = 10,
@@ -31,6 +48,9 @@ def search(
31
48
  >>> results = search("machine learning")
32
49
  >>> print(f"Found {results.total} matches")
33
50
  """
51
+ if Config.get_mode() == "http":
52
+ client = _get_http_client()
53
+ return client.search(query=query, limit=limit)
34
54
  return fts.search(query, limit, offset)
35
55
 
36
56
 
@@ -44,6 +64,10 @@ def count(query: str) -> int:
44
64
  Returns:
45
65
  Number of matching works
46
66
  """
67
+ if Config.get_mode() == "http":
68
+ client = _get_http_client()
69
+ result = client.search(query=query, limit=1)
70
+ return result.total
47
71
  return fts.count(query)
48
72
 
49
73
 
@@ -62,6 +86,9 @@ def get(doi: str) -> Optional[Work]:
62
86
  >>> work = get("10.1038/nature12373")
63
87
  >>> print(work.title)
64
88
  """
89
+ if Config.get_mode() == "http":
90
+ client = _get_http_client()
91
+ return client.get(doi)
65
92
  db = get_db()
66
93
  metadata = db.get_metadata(doi)
67
94
  if metadata:
@@ -79,6 +106,9 @@ def get_many(dois: List[str]) -> List[Work]:
79
106
  Returns:
80
107
  List of Work objects (missing DOIs are skipped)
81
108
  """
109
+ if Config.get_mode() == "http":
110
+ client = _get_http_client()
111
+ return client.get_many(dois)
82
112
  db = get_db()
83
113
  works = []
84
114
  for doi in dois:
@@ -98,6 +128,9 @@ def exists(doi: str) -> bool:
98
128
  Returns:
99
129
  True if DOI exists
100
130
  """
131
+ if Config.get_mode() == "http":
132
+ client = _get_http_client()
133
+ return client.exists(doi)
101
134
  db = get_db()
102
135
  row = db.fetchone("SELECT 1 FROM works WHERE doi = ?", (doi,))
103
136
  return row is not None
@@ -105,7 +138,7 @@ def exists(doi: str) -> bool:
105
138
 
106
139
  def configure(db_path: str) -> None:
107
140
  """
108
- Configure database path.
141
+ Configure for local database access.
109
142
 
110
143
  Args:
111
144
  db_path: Path to CrossRef SQLite database
@@ -118,13 +151,122 @@ def configure(db_path: str) -> None:
118
151
  close_db() # Reset singleton to use new path
119
152
 
120
153
 
154
+ def configure_http(api_url: str = "http://localhost:8333") -> None:
155
+ """
156
+ Configure for HTTP API access.
157
+
158
+ Args:
159
+ api_url: URL of CrossRef Local API server
160
+
161
+ Example:
162
+ >>> from crossref_local import configure_http
163
+ >>> configure_http("http://localhost:8333")
164
+ >>> # Or via SSH tunnel:
165
+ >>> # ssh -L 8333:127.0.0.1:8333 your-server
166
+ >>> configure_http() # Uses default localhost:8333
167
+ """
168
+ Config.set_api_url(api_url)
169
+
170
+
171
+ # Backward compatibility alias
172
+ configure_remote = configure_http
173
+
174
+
175
+ def enrich(
176
+ results: SearchResult,
177
+ include_citations: bool = True,
178
+ include_references: bool = True,
179
+ ) -> SearchResult:
180
+ """
181
+ Enrich search results with full metadata (citations, references).
182
+
183
+ The search() function returns basic metadata for speed. This function
184
+ fetches full metadata for each work, adding citation counts and references.
185
+
186
+ Args:
187
+ results: SearchResult from search()
188
+ include_citations: Include citation counts
189
+ include_references: Include reference DOIs
190
+
191
+ Returns:
192
+ SearchResult with enriched works
193
+
194
+ Example:
195
+ >>> from crossref_local import search, enrich
196
+ >>> results = search("machine learning", limit=10)
197
+ >>> enriched = enrich(results)
198
+ >>> for work in enriched:
199
+ ... print(f"{work.title}: {work.citation_count} citations")
200
+ """
201
+ enriched_works = []
202
+ for work in results.works:
203
+ full_work = get(work.doi)
204
+ if full_work:
205
+ enriched_works.append(full_work)
206
+ else:
207
+ # Keep original if full metadata not available
208
+ enriched_works.append(work)
209
+
210
+ return SearchResult(
211
+ works=enriched_works,
212
+ total=results.total,
213
+ query=results.query,
214
+ elapsed_ms=results.elapsed_ms,
215
+ )
216
+
217
+
218
+ def enrich_dois(
219
+ dois: List[str],
220
+ include_citations: bool = True,
221
+ include_references: bool = True,
222
+ ) -> List[Work]:
223
+ """
224
+ Enrich a list of DOIs with full metadata.
225
+
226
+ Fetches complete metadata for each DOI including citation counts
227
+ and reference lists.
228
+
229
+ Args:
230
+ dois: List of DOIs to enrich
231
+ include_citations: Include citation counts
232
+ include_references: Include reference DOIs
233
+
234
+ Returns:
235
+ List of Work objects with full metadata
236
+
237
+ Example:
238
+ >>> from crossref_local import enrich_dois
239
+ >>> works = enrich_dois(["10.1038/nature12373", "10.1126/science.aax0758"])
240
+ >>> for w in works:
241
+ ... print(f"{w.doi}: {w.citation_count} citations, {len(w.references)} refs")
242
+ """
243
+ return get_many(dois)
244
+
245
+
246
+ def get_mode() -> str:
247
+ """
248
+ Get current mode.
249
+
250
+ Returns:
251
+ "db" or "http"
252
+ """
253
+ return Config.get_mode()
254
+
255
+
121
256
  def info() -> dict:
122
257
  """
123
- Get database information.
258
+ Get database/API information.
124
259
 
125
260
  Returns:
126
- Dictionary with database stats
261
+ Dictionary with database stats and mode info
127
262
  """
263
+ mode = Config.get_mode()
264
+
265
+ if mode == "http":
266
+ client = _get_http_client()
267
+ http_info = client.info()
268
+ return {"mode": "http", **http_info}
269
+
128
270
  db = get_db()
129
271
 
130
272
  # Get work count
@@ -146,6 +288,7 @@ def info() -> dict:
146
288
  citation_count = 0
147
289
 
148
290
  return {
291
+ "mode": "db",
149
292
  "db_path": str(Config.get_db_path()),
150
293
  "works": work_count,
151
294
  "fts_indexed": fts_count,