crossref-local 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crossref_local/__init__.py +24 -10
- crossref_local/_aio/__init__.py +30 -0
- crossref_local/_aio/_impl.py +238 -0
- crossref_local/_cache/__init__.py +15 -0
- crossref_local/{cache_export.py → _cache/export.py} +27 -10
- crossref_local/_cache/utils.py +93 -0
- crossref_local/_cli/__init__.py +9 -0
- crossref_local/_cli/cli.py +389 -0
- crossref_local/_cli/mcp.py +351 -0
- crossref_local/_cli/mcp_server.py +457 -0
- crossref_local/_cli/search.py +199 -0
- crossref_local/_core/__init__.py +62 -0
- crossref_local/{api.py → _core/api.py} +26 -5
- crossref_local/{citations.py → _core/citations.py} +55 -26
- crossref_local/{config.py → _core/config.py} +40 -22
- crossref_local/{db.py → _core/db.py} +32 -26
- crossref_local/_core/export.py +344 -0
- crossref_local/{fts.py → _core/fts.py} +37 -14
- crossref_local/{models.py → _core/models.py} +120 -6
- crossref_local/_remote/__init__.py +56 -0
- crossref_local/_remote/base.py +378 -0
- crossref_local/_remote/collections.py +175 -0
- crossref_local/_server/__init__.py +140 -0
- crossref_local/_server/middleware.py +25 -0
- crossref_local/_server/models.py +143 -0
- crossref_local/_server/routes_citations.py +98 -0
- crossref_local/_server/routes_collections.py +282 -0
- crossref_local/_server/routes_compat.py +102 -0
- crossref_local/_server/routes_works.py +178 -0
- crossref_local/_server/server.py +19 -0
- crossref_local/aio.py +30 -206
- crossref_local/cache.py +100 -100
- crossref_local/cli.py +5 -515
- crossref_local/jobs.py +169 -0
- crossref_local/mcp_server.py +5 -410
- crossref_local/remote.py +5 -266
- crossref_local/server.py +5 -349
- {crossref_local-0.4.0.dist-info → crossref_local-0.5.1.dist-info}/METADATA +36 -11
- crossref_local-0.5.1.dist-info/RECORD +49 -0
- {crossref_local-0.4.0.dist-info → crossref_local-0.5.1.dist-info}/entry_points.txt +1 -1
- crossref_local/cli_mcp.py +0 -275
- crossref_local-0.4.0.dist-info/RECORD +0 -27
- /crossref_local/{cache_viz.py → _cache/viz.py} +0 -0
- /crossref_local/{cli_cache.py → _cli/cache.py} +0 -0
- /crossref_local/{cli_completion.py → _cli/completion.py} +0 -0
- /crossref_local/{cli_main.py → _cli/main.py} +0 -0
- /crossref_local/{impact_factor → _impact_factor}/__init__.py +0 -0
- /crossref_local/{impact_factor → _impact_factor}/calculator.py +0 -0
- /crossref_local/{impact_factor → _impact_factor}/journal_lookup.py +0 -0
- {crossref_local-0.4.0.dist-info → crossref_local-0.5.1.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
"""Export functionality for Work and SearchResult objects.
|
|
2
|
+
|
|
3
|
+
Supports multiple output formats:
|
|
4
|
+
- text: Human-readable formatted text
|
|
5
|
+
- json: JSON format with all fields
|
|
6
|
+
- bibtex: BibTeX bibliography format
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json as _json
|
|
10
|
+
from pathlib import Path as _Path
|
|
11
|
+
from typing import TYPE_CHECKING, List, Optional, Union
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from .models import SearchResult, Work
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"save",
|
|
18
|
+
"export_text",
|
|
19
|
+
"export_json",
|
|
20
|
+
"export_bibtex",
|
|
21
|
+
"SUPPORTED_FORMATS",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
SUPPORTED_FORMATS = ["text", "json", "bibtex"]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _sanitize_bibtex_key(doi: str) -> str:
|
|
28
|
+
"""Convert DOI to valid BibTeX key."""
|
|
29
|
+
return doi.replace("/", "_").replace(".", "_").replace("-", "_")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _escape_bibtex(text: str) -> str:
|
|
33
|
+
"""Escape special characters for BibTeX."""
|
|
34
|
+
if not text:
|
|
35
|
+
return ""
|
|
36
|
+
# Escape special LaTeX characters
|
|
37
|
+
replacements = [
|
|
38
|
+
("&", r"\&"),
|
|
39
|
+
("%", r"\%"),
|
|
40
|
+
("$", r"\$"),
|
|
41
|
+
("#", r"\#"),
|
|
42
|
+
("_", r"\_"),
|
|
43
|
+
("{", r"\{"),
|
|
44
|
+
("}", r"\}"),
|
|
45
|
+
]
|
|
46
|
+
for old, new in replacements:
|
|
47
|
+
text = text.replace(old, new)
|
|
48
|
+
return text
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def work_to_text(work: "Work", include_abstract: bool = False) -> str:
|
|
52
|
+
"""Convert a Work to human-readable text format.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
work: Work object to convert
|
|
56
|
+
include_abstract: Whether to include abstract
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
Formatted text string
|
|
60
|
+
"""
|
|
61
|
+
lines = []
|
|
62
|
+
|
|
63
|
+
# Title
|
|
64
|
+
title = work.title or "Untitled"
|
|
65
|
+
year = f"({work.year})" if work.year else ""
|
|
66
|
+
lines.append(f"{title} {year}".strip())
|
|
67
|
+
|
|
68
|
+
# Authors
|
|
69
|
+
if work.authors:
|
|
70
|
+
authors_str = ", ".join(work.authors[:5])
|
|
71
|
+
if len(work.authors) > 5:
|
|
72
|
+
authors_str += f" et al. ({len(work.authors)} authors)"
|
|
73
|
+
lines.append(f"Authors: {authors_str}")
|
|
74
|
+
|
|
75
|
+
# Journal and DOI
|
|
76
|
+
if work.journal:
|
|
77
|
+
journal_line = f"Journal: {work.journal}"
|
|
78
|
+
if work.volume:
|
|
79
|
+
journal_line += f", {work.volume}"
|
|
80
|
+
if work.issue:
|
|
81
|
+
journal_line += f"({work.issue})"
|
|
82
|
+
if work.page:
|
|
83
|
+
journal_line += f", {work.page}"
|
|
84
|
+
lines.append(journal_line)
|
|
85
|
+
|
|
86
|
+
lines.append(f"DOI: {work.doi}")
|
|
87
|
+
|
|
88
|
+
# Impact factor
|
|
89
|
+
if work.impact_factor:
|
|
90
|
+
lines.append(
|
|
91
|
+
f"Impact Factor: {work.impact_factor:.2f} ({work.impact_factor_source or 'unknown'})"
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# Citation count
|
|
95
|
+
if work.citation_count is not None:
|
|
96
|
+
lines.append(f"Citations: {work.citation_count}")
|
|
97
|
+
|
|
98
|
+
# Abstract
|
|
99
|
+
if include_abstract and work.abstract:
|
|
100
|
+
# Strip XML tags
|
|
101
|
+
import re
|
|
102
|
+
|
|
103
|
+
abstract = re.sub(r"<[^>]+>", " ", work.abstract)
|
|
104
|
+
abstract = re.sub(r"\s+", " ", abstract).strip()
|
|
105
|
+
lines.append(f"Abstract: {abstract}")
|
|
106
|
+
|
|
107
|
+
return "\n".join(lines)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def work_to_bibtex(work: "Work") -> str:
|
|
111
|
+
"""Convert a Work to BibTeX format.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
work: Work object to convert
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
BibTeX entry string
|
|
118
|
+
"""
|
|
119
|
+
key = _sanitize_bibtex_key(work.doi) if work.doi else "unknown"
|
|
120
|
+
work_type = work.type or "article"
|
|
121
|
+
|
|
122
|
+
# Map CrossRef types to BibTeX types
|
|
123
|
+
bibtex_type_map = {
|
|
124
|
+
"journal-article": "article",
|
|
125
|
+
"book-chapter": "incollection",
|
|
126
|
+
"book": "book",
|
|
127
|
+
"proceedings-article": "inproceedings",
|
|
128
|
+
"dissertation": "phdthesis",
|
|
129
|
+
"report": "techreport",
|
|
130
|
+
}
|
|
131
|
+
bibtex_type = bibtex_type_map.get(work_type, "misc")
|
|
132
|
+
|
|
133
|
+
lines = [f"@{bibtex_type}{{{key},"]
|
|
134
|
+
|
|
135
|
+
if work.title:
|
|
136
|
+
lines.append(f" title = {{{_escape_bibtex(work.title)}}},")
|
|
137
|
+
|
|
138
|
+
if work.authors:
|
|
139
|
+
authors = " and ".join(work.authors)
|
|
140
|
+
lines.append(f" author = {{{_escape_bibtex(authors)}}},")
|
|
141
|
+
|
|
142
|
+
if work.year:
|
|
143
|
+
lines.append(f" year = {{{work.year}}},")
|
|
144
|
+
|
|
145
|
+
if work.journal:
|
|
146
|
+
lines.append(f" journal = {{{_escape_bibtex(work.journal)}}},")
|
|
147
|
+
|
|
148
|
+
if work.volume:
|
|
149
|
+
lines.append(f" volume = {{{work.volume}}},")
|
|
150
|
+
|
|
151
|
+
if work.issue:
|
|
152
|
+
lines.append(f" number = {{{work.issue}}},")
|
|
153
|
+
|
|
154
|
+
if work.page:
|
|
155
|
+
lines.append(f" pages = {{{work.page}}},")
|
|
156
|
+
|
|
157
|
+
if work.publisher:
|
|
158
|
+
lines.append(f" publisher = {{{_escape_bibtex(work.publisher)}}},")
|
|
159
|
+
|
|
160
|
+
if work.doi:
|
|
161
|
+
lines.append(f" doi = {{{work.doi}}},")
|
|
162
|
+
|
|
163
|
+
if work.url:
|
|
164
|
+
lines.append(f" url = {{{work.url}}},")
|
|
165
|
+
|
|
166
|
+
if work.issn:
|
|
167
|
+
lines.append(f" issn = {{{work.issn}}},")
|
|
168
|
+
|
|
169
|
+
lines.append("}")
|
|
170
|
+
|
|
171
|
+
return "\n".join(lines)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def export_text(
|
|
175
|
+
works: List["Work"],
|
|
176
|
+
include_abstract: bool = False,
|
|
177
|
+
query: Optional[str] = None,
|
|
178
|
+
total: Optional[int] = None,
|
|
179
|
+
elapsed_ms: Optional[float] = None,
|
|
180
|
+
) -> str:
|
|
181
|
+
"""Export works to text format.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
works: List of Work objects
|
|
185
|
+
include_abstract: Whether to include abstracts
|
|
186
|
+
query: Original search query (for header)
|
|
187
|
+
total: Total number of matches
|
|
188
|
+
elapsed_ms: Search time in milliseconds
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
Formatted text string
|
|
192
|
+
"""
|
|
193
|
+
lines = []
|
|
194
|
+
|
|
195
|
+
# Header
|
|
196
|
+
if query is not None:
|
|
197
|
+
lines.append(f"Search: {query}")
|
|
198
|
+
if total is not None:
|
|
199
|
+
lines.append(f"Found: {total:,} matches")
|
|
200
|
+
if elapsed_ms is not None:
|
|
201
|
+
lines.append(f"Time: {elapsed_ms:.1f}ms")
|
|
202
|
+
lines.append("")
|
|
203
|
+
lines.append("=" * 60)
|
|
204
|
+
lines.append("")
|
|
205
|
+
|
|
206
|
+
# Works
|
|
207
|
+
for i, work in enumerate(works, 1):
|
|
208
|
+
lines.append(f"[{i}]")
|
|
209
|
+
lines.append(work_to_text(work, include_abstract=include_abstract))
|
|
210
|
+
lines.append("")
|
|
211
|
+
lines.append("-" * 40)
|
|
212
|
+
lines.append("")
|
|
213
|
+
|
|
214
|
+
return "\n".join(lines)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def export_json(
|
|
218
|
+
works: List["Work"],
|
|
219
|
+
query: Optional[str] = None,
|
|
220
|
+
total: Optional[int] = None,
|
|
221
|
+
elapsed_ms: Optional[float] = None,
|
|
222
|
+
indent: int = 2,
|
|
223
|
+
) -> str:
|
|
224
|
+
"""Export works to JSON format.
|
|
225
|
+
|
|
226
|
+
Args:
|
|
227
|
+
works: List of Work objects
|
|
228
|
+
query: Original search query
|
|
229
|
+
total: Total number of matches
|
|
230
|
+
elapsed_ms: Search time in milliseconds
|
|
231
|
+
indent: JSON indentation
|
|
232
|
+
|
|
233
|
+
Returns:
|
|
234
|
+
JSON string
|
|
235
|
+
"""
|
|
236
|
+
data = {
|
|
237
|
+
"works": [w.to_dict() for w in works],
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
if query is not None:
|
|
241
|
+
data["query"] = query
|
|
242
|
+
if total is not None:
|
|
243
|
+
data["total"] = total
|
|
244
|
+
if elapsed_ms is not None:
|
|
245
|
+
data["elapsed_ms"] = elapsed_ms
|
|
246
|
+
|
|
247
|
+
return _json.dumps(data, indent=indent, ensure_ascii=False)
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def export_bibtex(works: List["Work"]) -> str:
|
|
251
|
+
"""Export works to BibTeX format.
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
works: List of Work objects
|
|
255
|
+
|
|
256
|
+
Returns:
|
|
257
|
+
BibTeX string with all entries
|
|
258
|
+
"""
|
|
259
|
+
entries = [work_to_bibtex(w) for w in works]
|
|
260
|
+
return "\n\n".join(entries)
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def save(
|
|
264
|
+
data: Union["Work", "SearchResult", List["Work"]],
|
|
265
|
+
path: Union[str, _Path],
|
|
266
|
+
format: str = "json",
|
|
267
|
+
include_abstract: bool = True,
|
|
268
|
+
) -> str:
|
|
269
|
+
"""Save Work(s) or SearchResult to a file.
|
|
270
|
+
|
|
271
|
+
Args:
|
|
272
|
+
data: Work, SearchResult, or list of Works to save
|
|
273
|
+
path: Output file path
|
|
274
|
+
format: Output format ("text", "json", "bibtex")
|
|
275
|
+
include_abstract: Include abstracts in text format
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
Path to saved file
|
|
279
|
+
|
|
280
|
+
Raises:
|
|
281
|
+
ValueError: If format is not supported
|
|
282
|
+
|
|
283
|
+
Examples:
|
|
284
|
+
>>> from crossref_local import search, save
|
|
285
|
+
>>> results = search("machine learning", limit=10)
|
|
286
|
+
>>> save(results, "results.json")
|
|
287
|
+
>>> save(results, "results.bib", format="bibtex")
|
|
288
|
+
>>> save(results, "results.txt", format="text")
|
|
289
|
+
"""
|
|
290
|
+
from .models import SearchResult, Work
|
|
291
|
+
|
|
292
|
+
if format not in SUPPORTED_FORMATS:
|
|
293
|
+
raise ValueError(
|
|
294
|
+
f"Unsupported format: {format}. "
|
|
295
|
+
f"Supported formats: {', '.join(SUPPORTED_FORMATS)}"
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
path = _Path(path)
|
|
299
|
+
|
|
300
|
+
# Extract works and metadata
|
|
301
|
+
if isinstance(data, Work):
|
|
302
|
+
works = [data]
|
|
303
|
+
query = None
|
|
304
|
+
total = None
|
|
305
|
+
elapsed_ms = None
|
|
306
|
+
elif isinstance(data, SearchResult):
|
|
307
|
+
works = data.works
|
|
308
|
+
query = data.query
|
|
309
|
+
total = data.total
|
|
310
|
+
elapsed_ms = data.elapsed_ms
|
|
311
|
+
elif isinstance(data, list):
|
|
312
|
+
works = data
|
|
313
|
+
query = None
|
|
314
|
+
total = len(data)
|
|
315
|
+
elapsed_ms = None
|
|
316
|
+
else:
|
|
317
|
+
raise TypeError(f"Unsupported data type: {type(data)}")
|
|
318
|
+
|
|
319
|
+
# Generate content
|
|
320
|
+
if format == "text":
|
|
321
|
+
content = export_text(
|
|
322
|
+
works,
|
|
323
|
+
include_abstract=include_abstract,
|
|
324
|
+
query=query,
|
|
325
|
+
total=total,
|
|
326
|
+
elapsed_ms=elapsed_ms,
|
|
327
|
+
)
|
|
328
|
+
elif format == "json":
|
|
329
|
+
content = export_json(
|
|
330
|
+
works,
|
|
331
|
+
query=query,
|
|
332
|
+
total=total,
|
|
333
|
+
elapsed_ms=elapsed_ms,
|
|
334
|
+
)
|
|
335
|
+
elif format == "bibtex":
|
|
336
|
+
content = export_bibtex(works)
|
|
337
|
+
else:
|
|
338
|
+
raise ValueError(f"Unsupported format: {format}")
|
|
339
|
+
|
|
340
|
+
# Write to file
|
|
341
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
342
|
+
path.write_text(content, encoding="utf-8")
|
|
343
|
+
|
|
344
|
+
return str(path)
|
|
@@ -1,11 +1,17 @@
|
|
|
1
1
|
"""Full-text search using FTS5."""
|
|
2
2
|
|
|
3
|
-
import re
|
|
4
|
-
import time
|
|
3
|
+
import re as _re
|
|
4
|
+
import time as _time
|
|
5
5
|
from typing import List, Optional
|
|
6
6
|
|
|
7
7
|
from .db import Database, get_db
|
|
8
|
-
from .models import
|
|
8
|
+
from .models import LimitInfo, SearchResult, Work
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"search",
|
|
12
|
+
"count",
|
|
13
|
+
"search_dois",
|
|
14
|
+
]
|
|
9
15
|
|
|
10
16
|
|
|
11
17
|
def _sanitize_query(query: str) -> str:
|
|
@@ -24,13 +30,13 @@ def _sanitize_query(query: str) -> str:
|
|
|
24
30
|
|
|
25
31
|
# Check for problematic patterns (hyphenated words, special chars)
|
|
26
32
|
# But allow explicit FTS5 operators: AND, OR, NOT, NEAR
|
|
27
|
-
has_hyphenated_word =
|
|
28
|
-
has_special =
|
|
33
|
+
has_hyphenated_word = _re.search(r"\w+-\w+", query)
|
|
34
|
+
has_special = _re.search(r"[/\\@#$%^&]", query)
|
|
29
35
|
|
|
30
36
|
if has_hyphenated_word or has_special:
|
|
31
37
|
# Quote each word to treat as literal
|
|
32
38
|
words = query.split()
|
|
33
|
-
quoted =
|
|
39
|
+
quoted = " ".join(f'"{w}"' for w in words)
|
|
34
40
|
return quoted
|
|
35
41
|
|
|
36
42
|
return query
|
|
@@ -65,15 +71,14 @@ def search(
|
|
|
65
71
|
if db is None:
|
|
66
72
|
db = get_db()
|
|
67
73
|
|
|
68
|
-
start =
|
|
74
|
+
start = _time.perf_counter()
|
|
69
75
|
|
|
70
76
|
# Sanitize query for FTS5
|
|
71
77
|
safe_query = _sanitize_query(query)
|
|
72
78
|
|
|
73
79
|
# Get total count
|
|
74
80
|
count_row = db.fetchone(
|
|
75
|
-
"SELECT COUNT(*) as total FROM works_fts WHERE works_fts MATCH ?",
|
|
76
|
-
(safe_query,)
|
|
81
|
+
"SELECT COUNT(*) as total FROM works_fts WHERE works_fts MATCH ?", (safe_query,)
|
|
77
82
|
)
|
|
78
83
|
total = count_row["total"] if count_row else 0
|
|
79
84
|
|
|
@@ -86,10 +91,10 @@ def search(
|
|
|
86
91
|
WHERE works_fts MATCH ?
|
|
87
92
|
LIMIT ? OFFSET ?
|
|
88
93
|
""",
|
|
89
|
-
(safe_query, limit, offset)
|
|
94
|
+
(safe_query, limit, offset),
|
|
90
95
|
)
|
|
91
96
|
|
|
92
|
-
elapsed_ms = (
|
|
97
|
+
elapsed_ms = (_time.perf_counter() - start) * 1000
|
|
93
98
|
|
|
94
99
|
# Convert to Work objects
|
|
95
100
|
works = []
|
|
@@ -97,11 +102,30 @@ def search(
|
|
|
97
102
|
metadata = db._decompress_metadata(row["metadata"])
|
|
98
103
|
works.append(Work.from_metadata(row["doi"], metadata))
|
|
99
104
|
|
|
105
|
+
# Build limit info
|
|
106
|
+
returned = len(works)
|
|
107
|
+
capped = returned < total and returned == limit
|
|
108
|
+
capped_reason = None
|
|
109
|
+
if capped:
|
|
110
|
+
capped_reason = (
|
|
111
|
+
f"crossref-local: Limited to {limit} results (total available: {total})"
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
limit_info = LimitInfo(
|
|
115
|
+
requested=limit,
|
|
116
|
+
returned=returned,
|
|
117
|
+
total_available=total,
|
|
118
|
+
capped=capped,
|
|
119
|
+
capped_reason=capped_reason,
|
|
120
|
+
stage="crossref-local",
|
|
121
|
+
)
|
|
122
|
+
|
|
100
123
|
return SearchResult(
|
|
101
124
|
works=works,
|
|
102
125
|
total=total,
|
|
103
126
|
query=query,
|
|
104
127
|
elapsed_ms=elapsed_ms,
|
|
128
|
+
limit_info=limit_info,
|
|
105
129
|
)
|
|
106
130
|
|
|
107
131
|
|
|
@@ -121,8 +145,7 @@ def count(query: str, db: Optional[Database] = None) -> int:
|
|
|
121
145
|
|
|
122
146
|
safe_query = _sanitize_query(query)
|
|
123
147
|
row = db.fetchone(
|
|
124
|
-
"SELECT COUNT(*) as total FROM works_fts WHERE works_fts MATCH ?",
|
|
125
|
-
(safe_query,)
|
|
148
|
+
"SELECT COUNT(*) as total FROM works_fts WHERE works_fts MATCH ?", (safe_query,)
|
|
126
149
|
)
|
|
127
150
|
return row["total"] if row else 0
|
|
128
151
|
|
|
@@ -155,7 +178,7 @@ def search_dois(
|
|
|
155
178
|
WHERE works_fts MATCH ?
|
|
156
179
|
LIMIT ?
|
|
157
180
|
""",
|
|
158
|
-
(safe_query, limit)
|
|
181
|
+
(safe_query, limit),
|
|
159
182
|
)
|
|
160
183
|
|
|
161
184
|
return [row["doi"] for row in rows]
|
|
@@ -1,11 +1,17 @@
|
|
|
1
1
|
"""Data models for crossref_local."""
|
|
2
2
|
|
|
3
|
-
from dataclasses import dataclass
|
|
3
|
+
from dataclasses import dataclass as _dataclass
|
|
4
|
+
from dataclasses import field as _field
|
|
4
5
|
from typing import List, Optional
|
|
5
|
-
import json
|
|
6
6
|
|
|
7
|
+
__all__ = [
|
|
8
|
+
"Work",
|
|
9
|
+
"SearchResult",
|
|
10
|
+
"LimitInfo",
|
|
11
|
+
]
|
|
7
12
|
|
|
8
|
-
|
|
13
|
+
|
|
14
|
+
@_dataclass
|
|
9
15
|
class Work:
|
|
10
16
|
"""
|
|
11
17
|
Represents a scholarly work from CrossRef.
|
|
@@ -30,7 +36,7 @@ class Work:
|
|
|
30
36
|
|
|
31
37
|
doi: str
|
|
32
38
|
title: Optional[str] = None
|
|
33
|
-
authors: List[str] =
|
|
39
|
+
authors: List[str] = _field(default_factory=list)
|
|
34
40
|
year: Optional[int] = None
|
|
35
41
|
journal: Optional[str] = None
|
|
36
42
|
issn: Optional[str] = None
|
|
@@ -42,7 +48,9 @@ class Work:
|
|
|
42
48
|
abstract: Optional[str] = None
|
|
43
49
|
url: Optional[str] = None
|
|
44
50
|
citation_count: Optional[int] = None
|
|
45
|
-
references: List[str] =
|
|
51
|
+
references: List[str] = _field(default_factory=list)
|
|
52
|
+
impact_factor: Optional[float] = None
|
|
53
|
+
impact_factor_source: Optional[str] = None
|
|
46
54
|
|
|
47
55
|
@classmethod
|
|
48
56
|
def from_metadata(cls, doi: str, metadata: dict) -> "Work":
|
|
@@ -125,6 +133,8 @@ class Work:
|
|
|
125
133
|
"url": self.url,
|
|
126
134
|
"citation_count": self.citation_count,
|
|
127
135
|
"references": self.references,
|
|
136
|
+
"impact_factor": self.impact_factor,
|
|
137
|
+
"impact_factor_source": self.impact_factor_source,
|
|
128
138
|
}
|
|
129
139
|
|
|
130
140
|
def citation(self, style: str = "apa") -> str:
|
|
@@ -158,8 +168,86 @@ class Work:
|
|
|
158
168
|
|
|
159
169
|
return ". ".join(filter(None, parts))
|
|
160
170
|
|
|
171
|
+
def to_text(self, include_abstract: bool = False) -> str:
|
|
172
|
+
"""
|
|
173
|
+
Format as human-readable text.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
include_abstract: Include abstract in output
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
Formatted text string
|
|
180
|
+
"""
|
|
181
|
+
from .export import work_to_text
|
|
182
|
+
|
|
183
|
+
return work_to_text(self, include_abstract=include_abstract)
|
|
184
|
+
|
|
185
|
+
def to_bibtex(self) -> str:
|
|
186
|
+
"""
|
|
187
|
+
Format as BibTeX entry.
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
BibTeX string
|
|
191
|
+
"""
|
|
192
|
+
from .export import work_to_bibtex
|
|
193
|
+
|
|
194
|
+
return work_to_bibtex(self)
|
|
195
|
+
|
|
196
|
+
def save(self, path: str, format: str = "json") -> str:
|
|
197
|
+
"""
|
|
198
|
+
Save work to file.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
path: Output file path
|
|
202
|
+
format: Output format ("text", "json", "bibtex")
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
Path to saved file
|
|
206
|
+
|
|
207
|
+
Examples:
|
|
208
|
+
>>> work = get("10.1038/nature12373")
|
|
209
|
+
>>> work.save("paper.json")
|
|
210
|
+
>>> work.save("paper.bib", format="bibtex")
|
|
211
|
+
"""
|
|
212
|
+
from .export import save
|
|
213
|
+
|
|
214
|
+
return save(self, path, format=format)
|
|
161
215
|
|
|
162
|
-
|
|
216
|
+
|
|
217
|
+
@_dataclass
|
|
218
|
+
class LimitInfo:
|
|
219
|
+
"""
|
|
220
|
+
Information about result limiting at each stage.
|
|
221
|
+
|
|
222
|
+
Attributes:
|
|
223
|
+
requested: Number of results requested
|
|
224
|
+
returned: Number of results actually returned
|
|
225
|
+
total_available: Total matches in database
|
|
226
|
+
capped: Whether results were capped
|
|
227
|
+
capped_reason: Why results were capped (if applicable)
|
|
228
|
+
stage: Which stage applied this limit (e.g., "crossref-local", "scitex", "django")
|
|
229
|
+
"""
|
|
230
|
+
|
|
231
|
+
requested: int
|
|
232
|
+
returned: int
|
|
233
|
+
total_available: int
|
|
234
|
+
capped: bool = False
|
|
235
|
+
capped_reason: Optional[str] = None
|
|
236
|
+
stage: str = "crossref-local"
|
|
237
|
+
|
|
238
|
+
def to_dict(self) -> dict:
|
|
239
|
+
"""Convert to dictionary."""
|
|
240
|
+
return {
|
|
241
|
+
"requested": self.requested,
|
|
242
|
+
"returned": self.returned,
|
|
243
|
+
"total_available": self.total_available,
|
|
244
|
+
"capped": self.capped,
|
|
245
|
+
"capped_reason": self.capped_reason,
|
|
246
|
+
"stage": self.stage,
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
@_dataclass
|
|
163
251
|
class SearchResult:
|
|
164
252
|
"""
|
|
165
253
|
Container for search results with metadata.
|
|
@@ -169,12 +257,14 @@ class SearchResult:
|
|
|
169
257
|
total: Total number of matches
|
|
170
258
|
query: Original search query
|
|
171
259
|
elapsed_ms: Search time in milliseconds
|
|
260
|
+
limit_info: Information about result limiting
|
|
172
261
|
"""
|
|
173
262
|
|
|
174
263
|
works: List[Work]
|
|
175
264
|
total: int
|
|
176
265
|
query: str
|
|
177
266
|
elapsed_ms: float
|
|
267
|
+
limit_info: Optional[LimitInfo] = None
|
|
178
268
|
|
|
179
269
|
def __len__(self) -> int:
|
|
180
270
|
return len(self.works)
|
|
@@ -184,3 +274,27 @@ class SearchResult:
|
|
|
184
274
|
|
|
185
275
|
def __getitem__(self, idx):
|
|
186
276
|
return self.works[idx]
|
|
277
|
+
|
|
278
|
+
def save(
|
|
279
|
+
self, path: str, format: str = "json", include_abstract: bool = True
|
|
280
|
+
) -> str:
|
|
281
|
+
"""
|
|
282
|
+
Save search results to file.
|
|
283
|
+
|
|
284
|
+
Args:
|
|
285
|
+
path: Output file path
|
|
286
|
+
format: Output format ("text", "json", "bibtex")
|
|
287
|
+
include_abstract: Include abstracts in text format
|
|
288
|
+
|
|
289
|
+
Returns:
|
|
290
|
+
Path to saved file
|
|
291
|
+
|
|
292
|
+
Examples:
|
|
293
|
+
>>> results = search("machine learning", limit=10)
|
|
294
|
+
>>> results.save("results.json")
|
|
295
|
+
>>> results.save("results.bib", format="bibtex")
|
|
296
|
+
>>> results.save("results.txt", format="text")
|
|
297
|
+
"""
|
|
298
|
+
from .export import save
|
|
299
|
+
|
|
300
|
+
return save(self, path, format=format, include_abstract=include_abstract)
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""Remote API client package with collection support.
|
|
2
|
+
|
|
3
|
+
Provides RemoteClient for connecting to CrossRef Local API server.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
from .base import (
|
|
9
|
+
RemoteClient as _BaseClient,
|
|
10
|
+
DEFAULT_API_URL,
|
|
11
|
+
)
|
|
12
|
+
from .collections import CollectionsMixin
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class RemoteClient(CollectionsMixin, _BaseClient):
|
|
16
|
+
"""Remote client with collection support.
|
|
17
|
+
|
|
18
|
+
Extends base RemoteClient with collection management methods.
|
|
19
|
+
|
|
20
|
+
Example:
|
|
21
|
+
>>> client = RemoteClient("http://localhost:31291")
|
|
22
|
+
>>> # Create a collection
|
|
23
|
+
>>> client.create_collection("epilepsy", query="epilepsy seizure")
|
|
24
|
+
>>> # Query collection
|
|
25
|
+
>>> papers = client.get_collection("epilepsy", fields=["doi", "title"])
|
|
26
|
+
>>> # Download as file
|
|
27
|
+
>>> client.download_collection("epilepsy", "papers.bib", format="bibtex")
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# Module-level client singleton
|
|
34
|
+
_client: Optional[RemoteClient] = None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def get_client(base_url: str = DEFAULT_API_URL) -> RemoteClient:
|
|
38
|
+
"""Get or create singleton remote client with collection support."""
|
|
39
|
+
global _client
|
|
40
|
+
if _client is None or _client.base_url != base_url:
|
|
41
|
+
_client = RemoteClient(base_url)
|
|
42
|
+
return _client
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def reset_client() -> None:
|
|
46
|
+
"""Reset singleton client."""
|
|
47
|
+
global _client
|
|
48
|
+
_client = None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
__all__ = [
|
|
52
|
+
"RemoteClient",
|
|
53
|
+
"DEFAULT_API_URL",
|
|
54
|
+
"get_client",
|
|
55
|
+
"reset_client",
|
|
56
|
+
]
|