crossref-local 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crossref_local/__init__.py +7 -1
- crossref_local/_cli/cli.py +15 -138
- crossref_local/_cli/mcp_server.py +59 -15
- crossref_local/_cli/search.py +199 -0
- crossref_local/_core/__init__.py +4 -0
- crossref_local/_core/api.py +3 -1
- crossref_local/_core/export.py +344 -0
- crossref_local/_core/fts.py +20 -1
- crossref_local/_core/models.py +109 -0
- crossref_local/_remote/base.py +25 -3
- crossref_local/_server/models.py +14 -0
- crossref_local/_server/routes_works.py +63 -13
- {crossref_local-0.5.0.dist-info → crossref_local-0.5.1.dist-info}/METADATA +1 -1
- {crossref_local-0.5.0.dist-info → crossref_local-0.5.1.dist-info}/RECORD +16 -14
- {crossref_local-0.5.0.dist-info → crossref_local-0.5.1.dist-info}/WHEEL +0 -0
- {crossref_local-0.5.0.dist-info → crossref_local-0.5.1.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
"""Export functionality for Work and SearchResult objects.
|
|
2
|
+
|
|
3
|
+
Supports multiple output formats:
|
|
4
|
+
- text: Human-readable formatted text
|
|
5
|
+
- json: JSON format with all fields
|
|
6
|
+
- bibtex: BibTeX bibliography format
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json as _json
|
|
10
|
+
from pathlib import Path as _Path
|
|
11
|
+
from typing import TYPE_CHECKING, List, Optional, Union
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from .models import SearchResult, Work
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"save",
|
|
18
|
+
"export_text",
|
|
19
|
+
"export_json",
|
|
20
|
+
"export_bibtex",
|
|
21
|
+
"SUPPORTED_FORMATS",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
SUPPORTED_FORMATS = ["text", "json", "bibtex"]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _sanitize_bibtex_key(doi: str) -> str:
|
|
28
|
+
"""Convert DOI to valid BibTeX key."""
|
|
29
|
+
return doi.replace("/", "_").replace(".", "_").replace("-", "_")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _escape_bibtex(text: str) -> str:
|
|
33
|
+
"""Escape special characters for BibTeX."""
|
|
34
|
+
if not text:
|
|
35
|
+
return ""
|
|
36
|
+
# Escape special LaTeX characters
|
|
37
|
+
replacements = [
|
|
38
|
+
("&", r"\&"),
|
|
39
|
+
("%", r"\%"),
|
|
40
|
+
("$", r"\$"),
|
|
41
|
+
("#", r"\#"),
|
|
42
|
+
("_", r"\_"),
|
|
43
|
+
("{", r"\{"),
|
|
44
|
+
("}", r"\}"),
|
|
45
|
+
]
|
|
46
|
+
for old, new in replacements:
|
|
47
|
+
text = text.replace(old, new)
|
|
48
|
+
return text
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def work_to_text(work: "Work", include_abstract: bool = False) -> str:
|
|
52
|
+
"""Convert a Work to human-readable text format.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
work: Work object to convert
|
|
56
|
+
include_abstract: Whether to include abstract
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
Formatted text string
|
|
60
|
+
"""
|
|
61
|
+
lines = []
|
|
62
|
+
|
|
63
|
+
# Title
|
|
64
|
+
title = work.title or "Untitled"
|
|
65
|
+
year = f"({work.year})" if work.year else ""
|
|
66
|
+
lines.append(f"{title} {year}".strip())
|
|
67
|
+
|
|
68
|
+
# Authors
|
|
69
|
+
if work.authors:
|
|
70
|
+
authors_str = ", ".join(work.authors[:5])
|
|
71
|
+
if len(work.authors) > 5:
|
|
72
|
+
authors_str += f" et al. ({len(work.authors)} authors)"
|
|
73
|
+
lines.append(f"Authors: {authors_str}")
|
|
74
|
+
|
|
75
|
+
# Journal and DOI
|
|
76
|
+
if work.journal:
|
|
77
|
+
journal_line = f"Journal: {work.journal}"
|
|
78
|
+
if work.volume:
|
|
79
|
+
journal_line += f", {work.volume}"
|
|
80
|
+
if work.issue:
|
|
81
|
+
journal_line += f"({work.issue})"
|
|
82
|
+
if work.page:
|
|
83
|
+
journal_line += f", {work.page}"
|
|
84
|
+
lines.append(journal_line)
|
|
85
|
+
|
|
86
|
+
lines.append(f"DOI: {work.doi}")
|
|
87
|
+
|
|
88
|
+
# Impact factor
|
|
89
|
+
if work.impact_factor:
|
|
90
|
+
lines.append(
|
|
91
|
+
f"Impact Factor: {work.impact_factor:.2f} ({work.impact_factor_source or 'unknown'})"
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# Citation count
|
|
95
|
+
if work.citation_count is not None:
|
|
96
|
+
lines.append(f"Citations: {work.citation_count}")
|
|
97
|
+
|
|
98
|
+
# Abstract
|
|
99
|
+
if include_abstract and work.abstract:
|
|
100
|
+
# Strip XML tags
|
|
101
|
+
import re
|
|
102
|
+
|
|
103
|
+
abstract = re.sub(r"<[^>]+>", " ", work.abstract)
|
|
104
|
+
abstract = re.sub(r"\s+", " ", abstract).strip()
|
|
105
|
+
lines.append(f"Abstract: {abstract}")
|
|
106
|
+
|
|
107
|
+
return "\n".join(lines)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def work_to_bibtex(work: "Work") -> str:
|
|
111
|
+
"""Convert a Work to BibTeX format.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
work: Work object to convert
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
BibTeX entry string
|
|
118
|
+
"""
|
|
119
|
+
key = _sanitize_bibtex_key(work.doi) if work.doi else "unknown"
|
|
120
|
+
work_type = work.type or "article"
|
|
121
|
+
|
|
122
|
+
# Map CrossRef types to BibTeX types
|
|
123
|
+
bibtex_type_map = {
|
|
124
|
+
"journal-article": "article",
|
|
125
|
+
"book-chapter": "incollection",
|
|
126
|
+
"book": "book",
|
|
127
|
+
"proceedings-article": "inproceedings",
|
|
128
|
+
"dissertation": "phdthesis",
|
|
129
|
+
"report": "techreport",
|
|
130
|
+
}
|
|
131
|
+
bibtex_type = bibtex_type_map.get(work_type, "misc")
|
|
132
|
+
|
|
133
|
+
lines = [f"@{bibtex_type}{{{key},"]
|
|
134
|
+
|
|
135
|
+
if work.title:
|
|
136
|
+
lines.append(f" title = {{{_escape_bibtex(work.title)}}},")
|
|
137
|
+
|
|
138
|
+
if work.authors:
|
|
139
|
+
authors = " and ".join(work.authors)
|
|
140
|
+
lines.append(f" author = {{{_escape_bibtex(authors)}}},")
|
|
141
|
+
|
|
142
|
+
if work.year:
|
|
143
|
+
lines.append(f" year = {{{work.year}}},")
|
|
144
|
+
|
|
145
|
+
if work.journal:
|
|
146
|
+
lines.append(f" journal = {{{_escape_bibtex(work.journal)}}},")
|
|
147
|
+
|
|
148
|
+
if work.volume:
|
|
149
|
+
lines.append(f" volume = {{{work.volume}}},")
|
|
150
|
+
|
|
151
|
+
if work.issue:
|
|
152
|
+
lines.append(f" number = {{{work.issue}}},")
|
|
153
|
+
|
|
154
|
+
if work.page:
|
|
155
|
+
lines.append(f" pages = {{{work.page}}},")
|
|
156
|
+
|
|
157
|
+
if work.publisher:
|
|
158
|
+
lines.append(f" publisher = {{{_escape_bibtex(work.publisher)}}},")
|
|
159
|
+
|
|
160
|
+
if work.doi:
|
|
161
|
+
lines.append(f" doi = {{{work.doi}}},")
|
|
162
|
+
|
|
163
|
+
if work.url:
|
|
164
|
+
lines.append(f" url = {{{work.url}}},")
|
|
165
|
+
|
|
166
|
+
if work.issn:
|
|
167
|
+
lines.append(f" issn = {{{work.issn}}},")
|
|
168
|
+
|
|
169
|
+
lines.append("}")
|
|
170
|
+
|
|
171
|
+
return "\n".join(lines)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def export_text(
|
|
175
|
+
works: List["Work"],
|
|
176
|
+
include_abstract: bool = False,
|
|
177
|
+
query: Optional[str] = None,
|
|
178
|
+
total: Optional[int] = None,
|
|
179
|
+
elapsed_ms: Optional[float] = None,
|
|
180
|
+
) -> str:
|
|
181
|
+
"""Export works to text format.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
works: List of Work objects
|
|
185
|
+
include_abstract: Whether to include abstracts
|
|
186
|
+
query: Original search query (for header)
|
|
187
|
+
total: Total number of matches
|
|
188
|
+
elapsed_ms: Search time in milliseconds
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
Formatted text string
|
|
192
|
+
"""
|
|
193
|
+
lines = []
|
|
194
|
+
|
|
195
|
+
# Header
|
|
196
|
+
if query is not None:
|
|
197
|
+
lines.append(f"Search: {query}")
|
|
198
|
+
if total is not None:
|
|
199
|
+
lines.append(f"Found: {total:,} matches")
|
|
200
|
+
if elapsed_ms is not None:
|
|
201
|
+
lines.append(f"Time: {elapsed_ms:.1f}ms")
|
|
202
|
+
lines.append("")
|
|
203
|
+
lines.append("=" * 60)
|
|
204
|
+
lines.append("")
|
|
205
|
+
|
|
206
|
+
# Works
|
|
207
|
+
for i, work in enumerate(works, 1):
|
|
208
|
+
lines.append(f"[{i}]")
|
|
209
|
+
lines.append(work_to_text(work, include_abstract=include_abstract))
|
|
210
|
+
lines.append("")
|
|
211
|
+
lines.append("-" * 40)
|
|
212
|
+
lines.append("")
|
|
213
|
+
|
|
214
|
+
return "\n".join(lines)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def export_json(
|
|
218
|
+
works: List["Work"],
|
|
219
|
+
query: Optional[str] = None,
|
|
220
|
+
total: Optional[int] = None,
|
|
221
|
+
elapsed_ms: Optional[float] = None,
|
|
222
|
+
indent: int = 2,
|
|
223
|
+
) -> str:
|
|
224
|
+
"""Export works to JSON format.
|
|
225
|
+
|
|
226
|
+
Args:
|
|
227
|
+
works: List of Work objects
|
|
228
|
+
query: Original search query
|
|
229
|
+
total: Total number of matches
|
|
230
|
+
elapsed_ms: Search time in milliseconds
|
|
231
|
+
indent: JSON indentation
|
|
232
|
+
|
|
233
|
+
Returns:
|
|
234
|
+
JSON string
|
|
235
|
+
"""
|
|
236
|
+
data = {
|
|
237
|
+
"works": [w.to_dict() for w in works],
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
if query is not None:
|
|
241
|
+
data["query"] = query
|
|
242
|
+
if total is not None:
|
|
243
|
+
data["total"] = total
|
|
244
|
+
if elapsed_ms is not None:
|
|
245
|
+
data["elapsed_ms"] = elapsed_ms
|
|
246
|
+
|
|
247
|
+
return _json.dumps(data, indent=indent, ensure_ascii=False)
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def export_bibtex(works: List["Work"]) -> str:
|
|
251
|
+
"""Export works to BibTeX format.
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
works: List of Work objects
|
|
255
|
+
|
|
256
|
+
Returns:
|
|
257
|
+
BibTeX string with all entries
|
|
258
|
+
"""
|
|
259
|
+
entries = [work_to_bibtex(w) for w in works]
|
|
260
|
+
return "\n\n".join(entries)
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def save(
|
|
264
|
+
data: Union["Work", "SearchResult", List["Work"]],
|
|
265
|
+
path: Union[str, _Path],
|
|
266
|
+
format: str = "json",
|
|
267
|
+
include_abstract: bool = True,
|
|
268
|
+
) -> str:
|
|
269
|
+
"""Save Work(s) or SearchResult to a file.
|
|
270
|
+
|
|
271
|
+
Args:
|
|
272
|
+
data: Work, SearchResult, or list of Works to save
|
|
273
|
+
path: Output file path
|
|
274
|
+
format: Output format ("text", "json", "bibtex")
|
|
275
|
+
include_abstract: Include abstracts in text format
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
Path to saved file
|
|
279
|
+
|
|
280
|
+
Raises:
|
|
281
|
+
ValueError: If format is not supported
|
|
282
|
+
|
|
283
|
+
Examples:
|
|
284
|
+
>>> from crossref_local import search, save
|
|
285
|
+
>>> results = search("machine learning", limit=10)
|
|
286
|
+
>>> save(results, "results.json")
|
|
287
|
+
>>> save(results, "results.bib", format="bibtex")
|
|
288
|
+
>>> save(results, "results.txt", format="text")
|
|
289
|
+
"""
|
|
290
|
+
from .models import SearchResult, Work
|
|
291
|
+
|
|
292
|
+
if format not in SUPPORTED_FORMATS:
|
|
293
|
+
raise ValueError(
|
|
294
|
+
f"Unsupported format: {format}. "
|
|
295
|
+
f"Supported formats: {', '.join(SUPPORTED_FORMATS)}"
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
path = _Path(path)
|
|
299
|
+
|
|
300
|
+
# Extract works and metadata
|
|
301
|
+
if isinstance(data, Work):
|
|
302
|
+
works = [data]
|
|
303
|
+
query = None
|
|
304
|
+
total = None
|
|
305
|
+
elapsed_ms = None
|
|
306
|
+
elif isinstance(data, SearchResult):
|
|
307
|
+
works = data.works
|
|
308
|
+
query = data.query
|
|
309
|
+
total = data.total
|
|
310
|
+
elapsed_ms = data.elapsed_ms
|
|
311
|
+
elif isinstance(data, list):
|
|
312
|
+
works = data
|
|
313
|
+
query = None
|
|
314
|
+
total = len(data)
|
|
315
|
+
elapsed_ms = None
|
|
316
|
+
else:
|
|
317
|
+
raise TypeError(f"Unsupported data type: {type(data)}")
|
|
318
|
+
|
|
319
|
+
# Generate content
|
|
320
|
+
if format == "text":
|
|
321
|
+
content = export_text(
|
|
322
|
+
works,
|
|
323
|
+
include_abstract=include_abstract,
|
|
324
|
+
query=query,
|
|
325
|
+
total=total,
|
|
326
|
+
elapsed_ms=elapsed_ms,
|
|
327
|
+
)
|
|
328
|
+
elif format == "json":
|
|
329
|
+
content = export_json(
|
|
330
|
+
works,
|
|
331
|
+
query=query,
|
|
332
|
+
total=total,
|
|
333
|
+
elapsed_ms=elapsed_ms,
|
|
334
|
+
)
|
|
335
|
+
elif format == "bibtex":
|
|
336
|
+
content = export_bibtex(works)
|
|
337
|
+
else:
|
|
338
|
+
raise ValueError(f"Unsupported format: {format}")
|
|
339
|
+
|
|
340
|
+
# Write to file
|
|
341
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
342
|
+
path.write_text(content, encoding="utf-8")
|
|
343
|
+
|
|
344
|
+
return str(path)
|
crossref_local/_core/fts.py
CHANGED
|
@@ -5,7 +5,7 @@ import time as _time
|
|
|
5
5
|
from typing import List, Optional
|
|
6
6
|
|
|
7
7
|
from .db import Database, get_db
|
|
8
|
-
from .models import SearchResult, Work
|
|
8
|
+
from .models import LimitInfo, SearchResult, Work
|
|
9
9
|
|
|
10
10
|
__all__ = [
|
|
11
11
|
"search",
|
|
@@ -102,11 +102,30 @@ def search(
|
|
|
102
102
|
metadata = db._decompress_metadata(row["metadata"])
|
|
103
103
|
works.append(Work.from_metadata(row["doi"], metadata))
|
|
104
104
|
|
|
105
|
+
# Build limit info
|
|
106
|
+
returned = len(works)
|
|
107
|
+
capped = returned < total and returned == limit
|
|
108
|
+
capped_reason = None
|
|
109
|
+
if capped:
|
|
110
|
+
capped_reason = (
|
|
111
|
+
f"crossref-local: Limited to {limit} results (total available: {total})"
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
limit_info = LimitInfo(
|
|
115
|
+
requested=limit,
|
|
116
|
+
returned=returned,
|
|
117
|
+
total_available=total,
|
|
118
|
+
capped=capped,
|
|
119
|
+
capped_reason=capped_reason,
|
|
120
|
+
stage="crossref-local",
|
|
121
|
+
)
|
|
122
|
+
|
|
105
123
|
return SearchResult(
|
|
106
124
|
works=works,
|
|
107
125
|
total=total,
|
|
108
126
|
query=query,
|
|
109
127
|
elapsed_ms=elapsed_ms,
|
|
128
|
+
limit_info=limit_info,
|
|
110
129
|
)
|
|
111
130
|
|
|
112
131
|
|
crossref_local/_core/models.py
CHANGED
|
@@ -7,6 +7,7 @@ from typing import List, Optional
|
|
|
7
7
|
__all__ = [
|
|
8
8
|
"Work",
|
|
9
9
|
"SearchResult",
|
|
10
|
+
"LimitInfo",
|
|
10
11
|
]
|
|
11
12
|
|
|
12
13
|
|
|
@@ -48,6 +49,8 @@ class Work:
|
|
|
48
49
|
url: Optional[str] = None
|
|
49
50
|
citation_count: Optional[int] = None
|
|
50
51
|
references: List[str] = _field(default_factory=list)
|
|
52
|
+
impact_factor: Optional[float] = None
|
|
53
|
+
impact_factor_source: Optional[str] = None
|
|
51
54
|
|
|
52
55
|
@classmethod
|
|
53
56
|
def from_metadata(cls, doi: str, metadata: dict) -> "Work":
|
|
@@ -130,6 +133,8 @@ class Work:
|
|
|
130
133
|
"url": self.url,
|
|
131
134
|
"citation_count": self.citation_count,
|
|
132
135
|
"references": self.references,
|
|
136
|
+
"impact_factor": self.impact_factor,
|
|
137
|
+
"impact_factor_source": self.impact_factor_source,
|
|
133
138
|
}
|
|
134
139
|
|
|
135
140
|
def citation(self, style: str = "apa") -> str:
|
|
@@ -163,6 +168,84 @@ class Work:
|
|
|
163
168
|
|
|
164
169
|
return ". ".join(filter(None, parts))
|
|
165
170
|
|
|
171
|
+
def to_text(self, include_abstract: bool = False) -> str:
|
|
172
|
+
"""
|
|
173
|
+
Format as human-readable text.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
include_abstract: Include abstract in output
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
Formatted text string
|
|
180
|
+
"""
|
|
181
|
+
from .export import work_to_text
|
|
182
|
+
|
|
183
|
+
return work_to_text(self, include_abstract=include_abstract)
|
|
184
|
+
|
|
185
|
+
def to_bibtex(self) -> str:
|
|
186
|
+
"""
|
|
187
|
+
Format as BibTeX entry.
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
BibTeX string
|
|
191
|
+
"""
|
|
192
|
+
from .export import work_to_bibtex
|
|
193
|
+
|
|
194
|
+
return work_to_bibtex(self)
|
|
195
|
+
|
|
196
|
+
def save(self, path: str, format: str = "json") -> str:
|
|
197
|
+
"""
|
|
198
|
+
Save work to file.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
path: Output file path
|
|
202
|
+
format: Output format ("text", "json", "bibtex")
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
Path to saved file
|
|
206
|
+
|
|
207
|
+
Examples:
|
|
208
|
+
>>> work = get("10.1038/nature12373")
|
|
209
|
+
>>> work.save("paper.json")
|
|
210
|
+
>>> work.save("paper.bib", format="bibtex")
|
|
211
|
+
"""
|
|
212
|
+
from .export import save
|
|
213
|
+
|
|
214
|
+
return save(self, path, format=format)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
@_dataclass
|
|
218
|
+
class LimitInfo:
|
|
219
|
+
"""
|
|
220
|
+
Information about result limiting at each stage.
|
|
221
|
+
|
|
222
|
+
Attributes:
|
|
223
|
+
requested: Number of results requested
|
|
224
|
+
returned: Number of results actually returned
|
|
225
|
+
total_available: Total matches in database
|
|
226
|
+
capped: Whether results were capped
|
|
227
|
+
capped_reason: Why results were capped (if applicable)
|
|
228
|
+
stage: Which stage applied this limit (e.g., "crossref-local", "scitex", "django")
|
|
229
|
+
"""
|
|
230
|
+
|
|
231
|
+
requested: int
|
|
232
|
+
returned: int
|
|
233
|
+
total_available: int
|
|
234
|
+
capped: bool = False
|
|
235
|
+
capped_reason: Optional[str] = None
|
|
236
|
+
stage: str = "crossref-local"
|
|
237
|
+
|
|
238
|
+
def to_dict(self) -> dict:
|
|
239
|
+
"""Convert to dictionary."""
|
|
240
|
+
return {
|
|
241
|
+
"requested": self.requested,
|
|
242
|
+
"returned": self.returned,
|
|
243
|
+
"total_available": self.total_available,
|
|
244
|
+
"capped": self.capped,
|
|
245
|
+
"capped_reason": self.capped_reason,
|
|
246
|
+
"stage": self.stage,
|
|
247
|
+
}
|
|
248
|
+
|
|
166
249
|
|
|
167
250
|
@_dataclass
|
|
168
251
|
class SearchResult:
|
|
@@ -174,12 +257,14 @@ class SearchResult:
|
|
|
174
257
|
total: Total number of matches
|
|
175
258
|
query: Original search query
|
|
176
259
|
elapsed_ms: Search time in milliseconds
|
|
260
|
+
limit_info: Information about result limiting
|
|
177
261
|
"""
|
|
178
262
|
|
|
179
263
|
works: List[Work]
|
|
180
264
|
total: int
|
|
181
265
|
query: str
|
|
182
266
|
elapsed_ms: float
|
|
267
|
+
limit_info: Optional[LimitInfo] = None
|
|
183
268
|
|
|
184
269
|
def __len__(self) -> int:
|
|
185
270
|
return len(self.works)
|
|
@@ -189,3 +274,27 @@ class SearchResult:
|
|
|
189
274
|
|
|
190
275
|
def __getitem__(self, idx):
|
|
191
276
|
return self.works[idx]
|
|
277
|
+
|
|
278
|
+
def save(
|
|
279
|
+
self, path: str, format: str = "json", include_abstract: bool = True
|
|
280
|
+
) -> str:
|
|
281
|
+
"""
|
|
282
|
+
Save search results to file.
|
|
283
|
+
|
|
284
|
+
Args:
|
|
285
|
+
path: Output file path
|
|
286
|
+
format: Output format ("text", "json", "bibtex")
|
|
287
|
+
include_abstract: Include abstracts in text format
|
|
288
|
+
|
|
289
|
+
Returns:
|
|
290
|
+
Path to saved file
|
|
291
|
+
|
|
292
|
+
Examples:
|
|
293
|
+
>>> results = search("machine learning", limit=10)
|
|
294
|
+
>>> results.save("results.json")
|
|
295
|
+
>>> results.save("results.bib", format="bibtex")
|
|
296
|
+
>>> results.save("results.txt", format="text")
|
|
297
|
+
"""
|
|
298
|
+
from .export import save
|
|
299
|
+
|
|
300
|
+
return save(self, path, format=format, include_abstract=include_abstract)
|
crossref_local/_remote/base.py
CHANGED
|
@@ -10,7 +10,7 @@ import urllib.parse
|
|
|
10
10
|
import urllib.error
|
|
11
11
|
from typing import List, Optional, Dict, Any
|
|
12
12
|
|
|
13
|
-
from .._core.models import
|
|
13
|
+
from .._core.models import SearchResult, Work
|
|
14
14
|
from .._core.config import DEFAULT_PORT
|
|
15
15
|
|
|
16
16
|
# Default URL uses SCITEX port convention
|
|
@@ -104,6 +104,7 @@ class RemoteClient:
|
|
|
104
104
|
year: Optional[int] = None,
|
|
105
105
|
limit: int = 10,
|
|
106
106
|
offset: int = 0,
|
|
107
|
+
with_if: bool = False,
|
|
107
108
|
) -> SearchResult:
|
|
108
109
|
"""
|
|
109
110
|
Search for papers.
|
|
@@ -114,8 +115,9 @@ class RemoteClient:
|
|
|
114
115
|
title: Search by title (explicit)
|
|
115
116
|
authors: Search by author name
|
|
116
117
|
year: Filter by publication year
|
|
117
|
-
limit: Maximum results (default: 10
|
|
118
|
+
limit: Maximum results (default: 10)
|
|
118
119
|
offset: Skip first N results for pagination
|
|
120
|
+
with_if: Include impact factor data (OpenAlex)
|
|
119
121
|
|
|
120
122
|
Returns:
|
|
121
123
|
SearchResult with matching works
|
|
@@ -125,8 +127,9 @@ class RemoteClient:
|
|
|
125
127
|
|
|
126
128
|
params = {
|
|
127
129
|
"q": search_query,
|
|
128
|
-
"limit":
|
|
130
|
+
"limit": limit,
|
|
129
131
|
"offset": offset,
|
|
132
|
+
"with_if": with_if,
|
|
130
133
|
}
|
|
131
134
|
|
|
132
135
|
data = self._request("/works", params)
|
|
@@ -142,19 +145,38 @@ class RemoteClient:
|
|
|
142
145
|
authors=item.get("authors", []),
|
|
143
146
|
year=item.get("year"),
|
|
144
147
|
journal=item.get("journal"),
|
|
148
|
+
issn=item.get("issn"),
|
|
145
149
|
volume=item.get("volume"),
|
|
146
150
|
issue=item.get("issue"),
|
|
147
151
|
page=item.get("page") or item.get("pages"),
|
|
148
152
|
abstract=item.get("abstract"),
|
|
149
153
|
citation_count=item.get("citation_count"),
|
|
154
|
+
impact_factor=item.get("impact_factor"),
|
|
155
|
+
impact_factor_source=item.get("impact_factor_source"),
|
|
150
156
|
)
|
|
151
157
|
works.append(work)
|
|
152
158
|
|
|
159
|
+
# Parse limit_info from response
|
|
160
|
+
limit_info = None
|
|
161
|
+
if data.get("limit_info"):
|
|
162
|
+
from .._core.models import LimitInfo
|
|
163
|
+
|
|
164
|
+
li = data["limit_info"]
|
|
165
|
+
limit_info = LimitInfo(
|
|
166
|
+
requested=li.get("requested", limit),
|
|
167
|
+
returned=li.get("returned", len(works)),
|
|
168
|
+
total_available=li.get("total_available", data.get("total", 0)),
|
|
169
|
+
capped=li.get("capped", False),
|
|
170
|
+
capped_reason=li.get("capped_reason"),
|
|
171
|
+
stage=li.get("stage", "crossref-local-remote"),
|
|
172
|
+
)
|
|
173
|
+
|
|
153
174
|
return SearchResult(
|
|
154
175
|
works=works,
|
|
155
176
|
total=data.get("total", len(works)),
|
|
156
177
|
query=query or title or doi or "",
|
|
157
178
|
elapsed_ms=data.get("elapsed_ms", 0.0),
|
|
179
|
+
limit_info=limit_info,
|
|
158
180
|
)
|
|
159
181
|
|
|
160
182
|
def get(self, doi: str) -> Optional[Work]:
|
crossref_local/_server/models.py
CHANGED
|
@@ -20,6 +20,19 @@ class WorkResponse(BaseModel):
|
|
|
20
20
|
page: Optional[str] = None
|
|
21
21
|
abstract: Optional[str] = None
|
|
22
22
|
citation_count: Optional[int] = None
|
|
23
|
+
impact_factor: Optional[float] = None
|
|
24
|
+
impact_factor_source: Optional[str] = None
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class LimitInfoResponse(BaseModel):
|
|
28
|
+
"""Information about result limiting."""
|
|
29
|
+
|
|
30
|
+
requested: int
|
|
31
|
+
returned: int
|
|
32
|
+
total_available: int
|
|
33
|
+
capped: bool = False
|
|
34
|
+
capped_reason: Optional[str] = None
|
|
35
|
+
stage: str = "crossref-local"
|
|
23
36
|
|
|
24
37
|
|
|
25
38
|
class SearchResponse(BaseModel):
|
|
@@ -30,6 +43,7 @@ class SearchResponse(BaseModel):
|
|
|
30
43
|
returned: int
|
|
31
44
|
elapsed_ms: float
|
|
32
45
|
results: List[WorkResponse]
|
|
46
|
+
limit_info: Optional[LimitInfoResponse] = None
|
|
33
47
|
|
|
34
48
|
|
|
35
49
|
class InfoResponse(BaseModel):
|