crossref-local 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,78 @@
1
+ """
2
+ crossref_local - Local CrossRef database with full-text search.
3
+
4
+ A Python package for querying a local mirror of the CrossRef database
5
+ with 167M+ scholarly works, full-text search, and impact factor calculation.
6
+
7
+ Sync usage:
8
+ >>> from crossref_local import search, get
9
+ >>> results = search("hippocampal sharp wave ripples")
10
+ >>> work = get("10.1126/science.aax0758")
11
+
12
+ Async usage:
13
+ >>> from crossref_local import aio
14
+ >>> results = await aio.search("machine learning")
15
+ >>> counts = await aio.count_many(["CRISPR", "neural network"])
16
+
17
+ Configuration:
18
+ >>> from crossref_local import configure
19
+ >>> configure("/path/to/crossref.db")
20
+
21
+ Or set CROSSREF_LOCAL_DB environment variable.
22
+ """
23
+
24
+ __version__ = "0.3.0"
25
+
26
+ # Core API
27
+ from .api import (
28
+ search,
29
+ count,
30
+ get,
31
+ get_many,
32
+ exists,
33
+ configure,
34
+ info,
35
+ )
36
+
37
+ # Models
38
+ from .models import Work, SearchResult
39
+
40
+ # Database utilities
41
+ from .db import Database, connection
42
+
43
+ # Configuration
44
+ from .config import Config
45
+
46
+ # Async API
47
+ from . import aio
48
+
49
+ # Citation network
50
+ from .citations import get_citing, get_cited, get_citation_count, CitationNetwork
51
+
52
+ __all__ = [
53
+ # Version
54
+ "__version__",
55
+ # Core API
56
+ "search",
57
+ "count",
58
+ "get",
59
+ "get_many",
60
+ "exists",
61
+ "configure",
62
+ "info",
63
+ # Models
64
+ "Work",
65
+ "SearchResult",
66
+ # Database
67
+ "Database",
68
+ "connection",
69
+ # Config
70
+ "Config",
71
+ # Async
72
+ "aio",
73
+ # Citations
74
+ "get_citing",
75
+ "get_cited",
76
+ "get_citation_count",
77
+ "CitationNetwork",
78
+ ]
crossref_local/aio.py ADDED
@@ -0,0 +1,236 @@
1
+ """
2
+ Async API for crossref_local.
3
+
4
+ Provides async versions of all API functions. Uses thread pool execution
5
+ with per-thread database connections for thread safety.
6
+
7
+ Usage:
8
+ from crossref_local import aio
9
+
10
+ async def main():
11
+ results = await aio.search("machine learning")
12
+ work = await aio.get("10.1038/nature12373")
13
+ n = await aio.count("CRISPR")
14
+
15
+ # Or import individual functions
16
+ from crossref_local.aio import search, get, count
17
+
18
+ # Concurrent operations
19
+ counts = await aio.count_many(["CRISPR", "machine learning"])
20
+ """
21
+
22
+ import asyncio
23
+ import threading
24
+ from typing import List, Optional
25
+
26
+ from .models import Work, SearchResult
27
+ from .config import Config
28
+ from .db import Database
29
+
30
+
31
+ # Thread-local storage for database connections
32
+ _thread_local = threading.local()
33
+
34
+
35
+ def _get_thread_db() -> Database:
36
+ """Get thread-local database connection."""
37
+ if not hasattr(_thread_local, 'db'):
38
+ _thread_local.db = Database(Config.get_db_path())
39
+ return _thread_local.db
40
+
41
+
42
+ def _search_sync(query: str, limit: int, offset: int) -> SearchResult:
43
+ """Thread-safe sync search."""
44
+ from . import fts
45
+ # Use thread-local DB
46
+ db = _get_thread_db()
47
+ return fts._search_with_db(db, query, limit, offset)
48
+
49
+
50
+ def _count_sync(query: str) -> int:
51
+ """Thread-safe sync count."""
52
+ from . import fts
53
+ db = _get_thread_db()
54
+ return fts._count_with_db(db, query)
55
+
56
+
57
+ def _get_sync(doi: str) -> Optional[Work]:
58
+ """Thread-safe sync get."""
59
+ db = _get_thread_db()
60
+ metadata = db.get_metadata(doi)
61
+ if metadata:
62
+ return Work.from_metadata(doi, metadata)
63
+ return None
64
+
65
+
66
+ def _get_many_sync(dois: List[str]) -> List[Work]:
67
+ """Thread-safe sync get_many."""
68
+ db = _get_thread_db()
69
+ works = []
70
+ for doi in dois:
71
+ metadata = db.get_metadata(doi)
72
+ if metadata:
73
+ works.append(Work.from_metadata(doi, metadata))
74
+ return works
75
+
76
+
77
+ def _exists_sync(doi: str) -> bool:
78
+ """Thread-safe sync exists."""
79
+ db = _get_thread_db()
80
+ row = db.fetchone("SELECT 1 FROM works WHERE doi = ?", (doi,))
81
+ return row is not None
82
+
83
+
84
+ def _info_sync() -> dict:
85
+ """Thread-safe sync info."""
86
+ db = _get_thread_db()
87
+
88
+ row = db.fetchone("SELECT COUNT(*) as count FROM works")
89
+ work_count = row["count"] if row else 0
90
+
91
+ try:
92
+ row = db.fetchone("SELECT COUNT(*) as count FROM works_fts")
93
+ fts_count = row["count"] if row else 0
94
+ except Exception:
95
+ fts_count = 0
96
+
97
+ try:
98
+ row = db.fetchone("SELECT COUNT(*) as count FROM citations")
99
+ citation_count = row["count"] if row else 0
100
+ except Exception:
101
+ citation_count = 0
102
+
103
+ return {
104
+ "db_path": str(Config.get_db_path()),
105
+ "works": work_count,
106
+ "fts_indexed": fts_count,
107
+ "citations": citation_count,
108
+ }
109
+
110
+
111
+ async def search(
112
+ query: str,
113
+ limit: int = 10,
114
+ offset: int = 0,
115
+ ) -> SearchResult:
116
+ """
117
+ Async full-text search across works.
118
+
119
+ Args:
120
+ query: Search query (supports FTS5 syntax)
121
+ limit: Maximum results to return
122
+ offset: Skip first N results (for pagination)
123
+
124
+ Returns:
125
+ SearchResult with matching works
126
+ """
127
+ return await asyncio.to_thread(_search_sync, query, limit, offset)
128
+
129
+
130
+ async def count(query: str) -> int:
131
+ """
132
+ Async count matching works without fetching results.
133
+
134
+ Args:
135
+ query: FTS5 search query
136
+
137
+ Returns:
138
+ Number of matching works
139
+ """
140
+ return await asyncio.to_thread(_count_sync, query)
141
+
142
+
143
+ async def get(doi: str) -> Optional[Work]:
144
+ """
145
+ Async get a work by DOI.
146
+
147
+ Args:
148
+ doi: Digital Object Identifier
149
+
150
+ Returns:
151
+ Work object or None if not found
152
+ """
153
+ return await asyncio.to_thread(_get_sync, doi)
154
+
155
+
156
+ async def get_many(dois: List[str]) -> List[Work]:
157
+ """
158
+ Async get multiple works by DOI.
159
+
160
+ Args:
161
+ dois: List of DOIs
162
+
163
+ Returns:
164
+ List of Work objects (missing DOIs are skipped)
165
+ """
166
+ return await asyncio.to_thread(_get_many_sync, dois)
167
+
168
+
169
+ async def exists(doi: str) -> bool:
170
+ """
171
+ Async check if a DOI exists in the database.
172
+
173
+ Args:
174
+ doi: Digital Object Identifier
175
+
176
+ Returns:
177
+ True if DOI exists
178
+ """
179
+ return await asyncio.to_thread(_exists_sync, doi)
180
+
181
+
182
+ async def info() -> dict:
183
+ """
184
+ Async get database information.
185
+
186
+ Returns:
187
+ Dictionary with database stats
188
+ """
189
+ return await asyncio.to_thread(_info_sync)
190
+
191
+
192
+ async def search_many(queries: List[str], limit: int = 10) -> List[SearchResult]:
193
+ """
194
+ Run multiple searches concurrently.
195
+
196
+ Args:
197
+ queries: List of search queries
198
+ limit: Maximum results per query
199
+
200
+ Returns:
201
+ List of SearchResult objects
202
+ """
203
+ tasks = [search(q, limit=limit) for q in queries]
204
+ return await asyncio.gather(*tasks)
205
+
206
+
207
+ async def count_many(queries: List[str]) -> dict:
208
+ """
209
+ Count matches for multiple queries concurrently.
210
+
211
+ Args:
212
+ queries: List of search queries
213
+
214
+ Returns:
215
+ Dict mapping query -> count
216
+
217
+ Example:
218
+ >>> counts = await count_many(["CRISPR", "machine learning"])
219
+ >>> print(counts)
220
+ {'CRISPR': 45000, 'machine learning': 477922}
221
+ """
222
+ tasks = [count(q) for q in queries]
223
+ results = await asyncio.gather(*tasks)
224
+ return dict(zip(queries, results))
225
+
226
+
227
+ __all__ = [
228
+ "search",
229
+ "count",
230
+ "get",
231
+ "get_many",
232
+ "exists",
233
+ "info",
234
+ "search_many",
235
+ "count_many",
236
+ ]
crossref_local/api.py ADDED
@@ -0,0 +1,153 @@
1
+ """Main API for crossref_local."""
2
+
3
+ from typing import List, Optional
4
+
5
+ from .config import Config
6
+ from .db import Database, get_db, close_db, connection
7
+ from .models import Work, SearchResult
8
+ from . import fts
9
+
10
+
11
+ def search(
12
+ query: str,
13
+ limit: int = 10,
14
+ offset: int = 0,
15
+ ) -> SearchResult:
16
+ """
17
+ Full-text search across works.
18
+
19
+ Uses FTS5 index for fast searching across titles, abstracts, and authors.
20
+
21
+ Args:
22
+ query: Search query (supports FTS5 syntax)
23
+ limit: Maximum results to return
24
+ offset: Skip first N results (for pagination)
25
+
26
+ Returns:
27
+ SearchResult with matching works
28
+
29
+ Example:
30
+ >>> from crossref_local import search
31
+ >>> results = search("machine learning")
32
+ >>> print(f"Found {results.total} matches")
33
+ """
34
+ return fts.search(query, limit, offset)
35
+
36
+
37
+ def count(query: str) -> int:
38
+ """
39
+ Count matching works without fetching results.
40
+
41
+ Args:
42
+ query: FTS5 search query
43
+
44
+ Returns:
45
+ Number of matching works
46
+ """
47
+ return fts.count(query)
48
+
49
+
50
+ def get(doi: str) -> Optional[Work]:
51
+ """
52
+ Get a work by DOI.
53
+
54
+ Args:
55
+ doi: Digital Object Identifier
56
+
57
+ Returns:
58
+ Work object or None if not found
59
+
60
+ Example:
61
+ >>> from crossref_local import get
62
+ >>> work = get("10.1038/nature12373")
63
+ >>> print(work.title)
64
+ """
65
+ db = get_db()
66
+ metadata = db.get_metadata(doi)
67
+ if metadata:
68
+ return Work.from_metadata(doi, metadata)
69
+ return None
70
+
71
+
72
+ def get_many(dois: List[str]) -> List[Work]:
73
+ """
74
+ Get multiple works by DOI.
75
+
76
+ Args:
77
+ dois: List of DOIs
78
+
79
+ Returns:
80
+ List of Work objects (missing DOIs are skipped)
81
+ """
82
+ db = get_db()
83
+ works = []
84
+ for doi in dois:
85
+ metadata = db.get_metadata(doi)
86
+ if metadata:
87
+ works.append(Work.from_metadata(doi, metadata))
88
+ return works
89
+
90
+
91
+ def exists(doi: str) -> bool:
92
+ """
93
+ Check if a DOI exists in the database.
94
+
95
+ Args:
96
+ doi: Digital Object Identifier
97
+
98
+ Returns:
99
+ True if DOI exists
100
+ """
101
+ db = get_db()
102
+ row = db.fetchone("SELECT 1 FROM works WHERE doi = ?", (doi,))
103
+ return row is not None
104
+
105
+
106
+ def configure(db_path: str) -> None:
107
+ """
108
+ Configure database path.
109
+
110
+ Args:
111
+ db_path: Path to CrossRef SQLite database
112
+
113
+ Example:
114
+ >>> from crossref_local import configure
115
+ >>> configure("/path/to/crossref.db")
116
+ """
117
+ Config.set_db_path(db_path)
118
+ close_db() # Reset singleton to use new path
119
+
120
+
121
+ def info() -> dict:
122
+ """
123
+ Get database information.
124
+
125
+ Returns:
126
+ Dictionary with database stats
127
+ """
128
+ db = get_db()
129
+
130
+ # Get work count
131
+ row = db.fetchone("SELECT COUNT(*) as count FROM works")
132
+ work_count = row["count"] if row else 0
133
+
134
+ # Get FTS count
135
+ try:
136
+ row = db.fetchone("SELECT COUNT(*) as count FROM works_fts")
137
+ fts_count = row["count"] if row else 0
138
+ except Exception:
139
+ fts_count = 0
140
+
141
+ # Get citations count
142
+ try:
143
+ row = db.fetchone("SELECT COUNT(*) as count FROM citations")
144
+ citation_count = row["count"] if row else 0
145
+ except Exception:
146
+ citation_count = 0
147
+
148
+ return {
149
+ "db_path": str(Config.get_db_path()),
150
+ "works": work_count,
151
+ "fts_indexed": fts_count,
152
+ "citations": citation_count,
153
+ }