scitex 2.17.3__py3-none-any.whl → 2.17.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scitex/_dev/_dashboard/_routes.py +13 -0
- scitex/_dev/_dashboard/_scripts.py +144 -23
- scitex/_dev/_dashboard/_styles.py +90 -0
- scitex/_dev/_dashboard/_templates.py +14 -1
- scitex/_dev/_rtd.py +122 -0
- scitex/_dev/_ssh.py +38 -8
- scitex/dev/plt/data/mpl/PLOTTING_FUNCTIONS.yaml +90 -0
- scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES.yaml +1571 -0
- scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES_DETAILED.yaml +6262 -0
- scitex/dev/plt/data/mpl/SIGNATURES_FLATTENED.yaml +1274 -0
- scitex/dev/plt/data/mpl/dir_ax.txt +459 -0
- scitex/scholar/_mcp/crossref_handlers.py +45 -7
- scitex/scholar/_mcp/openalex_handlers.py +45 -7
- scitex/scholar/config/default.yaml +2 -0
- scitex/scholar/data/.gitkeep +0 -0
- scitex/scholar/data/README.md +44 -0
- scitex/scholar/data/bib_files/bibliography.bib +1952 -0
- scitex/scholar/data/bib_files/neurovista.bib +277 -0
- scitex/scholar/data/bib_files/neurovista_enriched.bib +441 -0
- scitex/scholar/data/bib_files/neurovista_enriched_enriched.bib +441 -0
- scitex/scholar/data/bib_files/neurovista_processed.bib +338 -0
- scitex/scholar/data/bib_files/openaccess.bib +89 -0
- scitex/scholar/data/bib_files/pac-seizure_prediction_enriched.bib +2178 -0
- scitex/scholar/data/bib_files/pac.bib +698 -0
- scitex/scholar/data/bib_files/pac_enriched.bib +1061 -0
- scitex/scholar/data/bib_files/pac_processed.bib +0 -0
- scitex/scholar/data/bib_files/pac_titles.txt +75 -0
- scitex/scholar/data/bib_files/paywalled.bib +98 -0
- scitex/scholar/data/bib_files/related-papers-by-coauthors.bib +58 -0
- scitex/scholar/data/bib_files/related-papers-by-coauthors_enriched.bib +87 -0
- scitex/scholar/data/bib_files/seizure_prediction.bib +694 -0
- scitex/scholar/data/bib_files/seizure_prediction_processed.bib +0 -0
- scitex/scholar/data/bib_files/test_complete_enriched.bib +437 -0
- scitex/scholar/data/bib_files/test_final_enriched.bib +437 -0
- scitex/scholar/data/bib_files/test_seizure.bib +46 -0
- scitex/scholar/data/impact_factor/JCR_IF_2022.xlsx +0 -0
- scitex/scholar/data/impact_factor/JCR_IF_2024.db +0 -0
- scitex/scholar/data/impact_factor/JCR_IF_2024.xlsx +0 -0
- scitex/scholar/data/impact_factor/JCR_IF_2024_v01.db +0 -0
- scitex/scholar/data/impact_factor.db +0 -0
- scitex/scholar/local_dbs/__init__.py +5 -1
- scitex/scholar/local_dbs/export.py +93 -0
- scitex/scholar/local_dbs/unified.py +505 -0
- scitex/scholar/metadata_engines/ScholarEngine.py +11 -0
- scitex/scholar/metadata_engines/individual/OpenAlexLocalEngine.py +346 -0
- scitex/scholar/metadata_engines/individual/__init__.py +1 -0
- {scitex-2.17.3.dist-info → scitex-2.17.4.dist-info}/METADATA +1 -1
- {scitex-2.17.3.dist-info → scitex-2.17.4.dist-info}/RECORD +51 -22
- scitex/scholar/url_finder/.tmp/open_url/KNOWN_RESOLVERS.py +0 -462
- scitex/scholar/url_finder/.tmp/open_url/README.md +0 -223
- scitex/scholar/url_finder/.tmp/open_url/_DOIToURLResolver.py +0 -694
- scitex/scholar/url_finder/.tmp/open_url/_OpenURLResolver.py +0 -1160
- scitex/scholar/url_finder/.tmp/open_url/_ResolverLinkFinder.py +0 -344
- scitex/scholar/url_finder/.tmp/open_url/__init__.py +0 -24
- {scitex-2.17.3.dist-info → scitex-2.17.4.dist-info}/WHEEL +0 -0
- {scitex-2.17.3.dist-info → scitex-2.17.4.dist-info}/entry_points.txt +0 -0
- {scitex-2.17.3.dist-info → scitex-2.17.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# Timestamp: 2026-02-04
|
|
3
|
+
# File: src/scitex/scholar/local_dbs/export.py
|
|
4
|
+
"""Export functionality for unified local database results.
|
|
5
|
+
|
|
6
|
+
Supports multiple output formats:
|
|
7
|
+
- text: Human-readable formatted text
|
|
8
|
+
- json: JSON format with all fields
|
|
9
|
+
- bibtex: BibTeX bibliography format
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import TYPE_CHECKING, List, Union
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from .unified import UnifiedSearchResult, UnifiedWork
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"save",
|
|
22
|
+
"SUPPORTED_FORMATS",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
SUPPORTED_FORMATS = ["text", "json", "bibtex"]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def save(
|
|
29
|
+
data: Union[UnifiedWork, UnifiedSearchResult, List[UnifiedWork]],
|
|
30
|
+
path: str,
|
|
31
|
+
format: str = "json",
|
|
32
|
+
) -> str:
|
|
33
|
+
"""Save UnifiedWork(s) or UnifiedSearchResult to a file.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
data: UnifiedWork, UnifiedSearchResult, or list of UnifiedWorks
|
|
37
|
+
path: Output file path
|
|
38
|
+
format: Output format ("text", "json", "bibtex")
|
|
39
|
+
|
|
40
|
+
Returns
|
|
41
|
+
-------
|
|
42
|
+
Path to saved file
|
|
43
|
+
|
|
44
|
+
Raises
|
|
45
|
+
------
|
|
46
|
+
ValueError: If format is not supported
|
|
47
|
+
|
|
48
|
+
Examples
|
|
49
|
+
--------
|
|
50
|
+
>>> from scitex.scholar.local_dbs import search, save
|
|
51
|
+
>>> results = search("machine learning", limit=10)
|
|
52
|
+
>>> save(results, "results.json")
|
|
53
|
+
>>> save(results, "results.bib", format="bibtex")
|
|
54
|
+
>>> save(results, "results.txt", format="text")
|
|
55
|
+
"""
|
|
56
|
+
from .unified import UnifiedSearchResult, UnifiedWork, to_bibtex, to_json, to_text
|
|
57
|
+
|
|
58
|
+
if format not in SUPPORTED_FORMATS:
|
|
59
|
+
raise ValueError(
|
|
60
|
+
f"Unsupported format: {format}. "
|
|
61
|
+
f"Supported formats: {', '.join(SUPPORTED_FORMATS)}"
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
path = Path(path)
|
|
65
|
+
|
|
66
|
+
# Extract works
|
|
67
|
+
if isinstance(data, UnifiedWork):
|
|
68
|
+
works = [data]
|
|
69
|
+
elif isinstance(data, UnifiedSearchResult):
|
|
70
|
+
works = data.works
|
|
71
|
+
elif isinstance(data, list):
|
|
72
|
+
works = data
|
|
73
|
+
else:
|
|
74
|
+
raise TypeError(f"Unsupported data type: {type(data)}")
|
|
75
|
+
|
|
76
|
+
# Generate content
|
|
77
|
+
if format == "text":
|
|
78
|
+
content = to_text(works)
|
|
79
|
+
elif format == "json":
|
|
80
|
+
content = to_json(works)
|
|
81
|
+
elif format == "bibtex":
|
|
82
|
+
content = to_bibtex(works)
|
|
83
|
+
else:
|
|
84
|
+
raise ValueError(f"Unsupported format: {format}")
|
|
85
|
+
|
|
86
|
+
# Write to file
|
|
87
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
88
|
+
path.write_text(content, encoding="utf-8")
|
|
89
|
+
|
|
90
|
+
return str(path)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
# EOF
|
|
@@ -0,0 +1,505 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# Timestamp: 2026-02-04
|
|
3
|
+
# File: src/scitex/scholar/local_dbs/unified.py
|
|
4
|
+
"""Unified search across CrossRef and OpenAlex local databases.
|
|
5
|
+
|
|
6
|
+
This module provides a single interface for searching both databases
|
|
7
|
+
with automatic deduplication and result merging.
|
|
8
|
+
|
|
9
|
+
Usage:
|
|
10
|
+
>>> from scitex.scholar.local_dbs.unified import search, get, info
|
|
11
|
+
>>> results = search("hippocampal sharp wave ripples", limit=50)
|
|
12
|
+
>>> print(f"Found {len(results)} papers")
|
|
13
|
+
>>> # Export to different formats
|
|
14
|
+
>>> from scitex.scholar.local_dbs.unified import to_json, to_bibtex, to_text
|
|
15
|
+
>>> print(to_bibtex(results[:5]))
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import asyncio
|
|
21
|
+
import concurrent.futures
|
|
22
|
+
import json
|
|
23
|
+
from dataclasses import asdict, dataclass, field
|
|
24
|
+
from typing import Any, Dict, List, Literal, Optional, Tuple, Union
|
|
25
|
+
|
|
26
|
+
__all__ = [
|
|
27
|
+
"search",
|
|
28
|
+
"search_async",
|
|
29
|
+
"get",
|
|
30
|
+
"info",
|
|
31
|
+
"UnifiedWork",
|
|
32
|
+
"UnifiedSearchResult",
|
|
33
|
+
"to_json",
|
|
34
|
+
"to_bibtex",
|
|
35
|
+
"to_text",
|
|
36
|
+
"save",
|
|
37
|
+
"SUPPORTED_FORMATS",
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
# Import save from export module
|
|
41
|
+
from .export import SUPPORTED_FORMATS, save
|
|
42
|
+
|
|
43
|
+
# Try to import both databases
|
|
44
|
+
_crossref_available = False
|
|
45
|
+
_openalex_available = False
|
|
46
|
+
|
|
47
|
+
try:
|
|
48
|
+
from crossref_local import Work as CRWork
|
|
49
|
+
from crossref_local import get as cr_get
|
|
50
|
+
from crossref_local import info as cr_info
|
|
51
|
+
from crossref_local import search as cr_search
|
|
52
|
+
|
|
53
|
+
_crossref_available = True
|
|
54
|
+
except ImportError:
|
|
55
|
+
cr_search = cr_get = cr_info = CRWork = None
|
|
56
|
+
|
|
57
|
+
try:
|
|
58
|
+
from openalex_local import Work as OAWork
|
|
59
|
+
from openalex_local import get as oa_get
|
|
60
|
+
from openalex_local import info as oa_info
|
|
61
|
+
from openalex_local import search as oa_search
|
|
62
|
+
|
|
63
|
+
_openalex_available = True
|
|
64
|
+
except ImportError:
|
|
65
|
+
oa_search = oa_get = oa_info = OAWork = None
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@dataclass
|
|
69
|
+
class UnifiedWork:
|
|
70
|
+
"""Unified work representation merging CrossRef and OpenAlex data."""
|
|
71
|
+
|
|
72
|
+
doi: Optional[str] = None
|
|
73
|
+
title: Optional[str] = None
|
|
74
|
+
authors: List[str] = field(default_factory=list)
|
|
75
|
+
year: Optional[int] = None
|
|
76
|
+
journal: Optional[str] = None
|
|
77
|
+
abstract: Optional[str] = None
|
|
78
|
+
citation_count: Optional[int] = None
|
|
79
|
+
is_open_access: bool = False
|
|
80
|
+
oa_url: Optional[str] = None
|
|
81
|
+
source: str = "unknown" # "crossref", "openalex", or "merged"
|
|
82
|
+
|
|
83
|
+
# Extra fields from OpenAlex
|
|
84
|
+
openalex_id: Optional[str] = None
|
|
85
|
+
concepts: List[Dict] = field(default_factory=list)
|
|
86
|
+
topics: List[Dict] = field(default_factory=list)
|
|
87
|
+
|
|
88
|
+
# Extra fields from CrossRef
|
|
89
|
+
issn: Optional[str] = None
|
|
90
|
+
references: List[str] = field(default_factory=list)
|
|
91
|
+
impact_factor: Optional[float] = None
|
|
92
|
+
|
|
93
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
94
|
+
"""Convert to dictionary."""
|
|
95
|
+
return asdict(self)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
@dataclass
|
|
99
|
+
class UnifiedSearchResult:
|
|
100
|
+
"""Container for unified search results."""
|
|
101
|
+
|
|
102
|
+
works: List[UnifiedWork]
|
|
103
|
+
total_crossref: int = 0
|
|
104
|
+
total_openalex: int = 0
|
|
105
|
+
duplicates_removed: int = 0
|
|
106
|
+
query: str = ""
|
|
107
|
+
|
|
108
|
+
def __len__(self) -> int:
|
|
109
|
+
return len(self.works)
|
|
110
|
+
|
|
111
|
+
def __iter__(self):
|
|
112
|
+
return iter(self.works)
|
|
113
|
+
|
|
114
|
+
def __getitem__(self, idx):
|
|
115
|
+
return self.works[idx]
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _crossref_work_to_unified(work) -> UnifiedWork:
|
|
119
|
+
"""Convert CrossRef work to unified format."""
|
|
120
|
+
return UnifiedWork(
|
|
121
|
+
doi=work.doi,
|
|
122
|
+
title=work.title,
|
|
123
|
+
authors=work.authors if work.authors else [],
|
|
124
|
+
year=work.year,
|
|
125
|
+
journal=work.journal,
|
|
126
|
+
abstract=work.abstract,
|
|
127
|
+
citation_count=work.citation_count,
|
|
128
|
+
source="crossref",
|
|
129
|
+
issn=work.issn if hasattr(work, "issn") else None,
|
|
130
|
+
references=work.references if hasattr(work, "references") else [],
|
|
131
|
+
impact_factor=work.impact_factor if hasattr(work, "impact_factor") else None,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _openalex_work_to_unified(work) -> UnifiedWork:
|
|
136
|
+
"""Convert OpenAlex work to unified format."""
|
|
137
|
+
return UnifiedWork(
|
|
138
|
+
doi=work.doi,
|
|
139
|
+
title=work.title,
|
|
140
|
+
authors=work.authors if work.authors else [],
|
|
141
|
+
year=work.year,
|
|
142
|
+
journal=work.source if hasattr(work, "source") else None,
|
|
143
|
+
abstract=work.abstract,
|
|
144
|
+
citation_count=work.cited_by_count if hasattr(work, "cited_by_count") else None,
|
|
145
|
+
is_open_access=work.is_oa if hasattr(work, "is_oa") else False,
|
|
146
|
+
oa_url=work.oa_url if hasattr(work, "oa_url") else None,
|
|
147
|
+
source="openalex",
|
|
148
|
+
openalex_id=work.openalex_id if hasattr(work, "openalex_id") else None,
|
|
149
|
+
concepts=work.concepts if hasattr(work, "concepts") else [],
|
|
150
|
+
topics=work.topics if hasattr(work, "topics") else [],
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _merge_works(cr_work: UnifiedWork, oa_work: UnifiedWork) -> UnifiedWork:
|
|
155
|
+
"""Merge CrossRef and OpenAlex works, preferring more complete data."""
|
|
156
|
+
return UnifiedWork(
|
|
157
|
+
doi=cr_work.doi or oa_work.doi,
|
|
158
|
+
title=cr_work.title or oa_work.title,
|
|
159
|
+
authors=cr_work.authors or oa_work.authors,
|
|
160
|
+
year=cr_work.year or oa_work.year,
|
|
161
|
+
journal=cr_work.journal or oa_work.journal,
|
|
162
|
+
# Prefer OpenAlex abstract (more complete)
|
|
163
|
+
abstract=oa_work.abstract or cr_work.abstract,
|
|
164
|
+
# Prefer higher citation count
|
|
165
|
+
citation_count=max(
|
|
166
|
+
cr_work.citation_count or 0,
|
|
167
|
+
oa_work.citation_count or 0,
|
|
168
|
+
)
|
|
169
|
+
or None,
|
|
170
|
+
is_open_access=oa_work.is_open_access,
|
|
171
|
+
oa_url=oa_work.oa_url,
|
|
172
|
+
source="merged",
|
|
173
|
+
# OpenAlex fields
|
|
174
|
+
openalex_id=oa_work.openalex_id,
|
|
175
|
+
concepts=oa_work.concepts,
|
|
176
|
+
topics=oa_work.topics,
|
|
177
|
+
# CrossRef fields
|
|
178
|
+
issn=cr_work.issn,
|
|
179
|
+
references=cr_work.references,
|
|
180
|
+
impact_factor=cr_work.impact_factor,
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def _deduplicate_and_merge(
|
|
185
|
+
cr_works: List[UnifiedWork], oa_works: List[UnifiedWork]
|
|
186
|
+
) -> Tuple[List[UnifiedWork], int]:
|
|
187
|
+
"""Deduplicate and merge works from both sources."""
|
|
188
|
+
# Index by DOI for fast lookup
|
|
189
|
+
doi_to_cr: Dict[str, UnifiedWork] = {}
|
|
190
|
+
for w in cr_works:
|
|
191
|
+
if w.doi:
|
|
192
|
+
doi_to_cr[w.doi.lower()] = w
|
|
193
|
+
|
|
194
|
+
doi_to_oa: Dict[str, UnifiedWork] = {}
|
|
195
|
+
for w in oa_works:
|
|
196
|
+
if w.doi:
|
|
197
|
+
doi_to_oa[w.doi.lower()] = w
|
|
198
|
+
|
|
199
|
+
results: List[UnifiedWork] = []
|
|
200
|
+
seen_dois: set = set()
|
|
201
|
+
duplicates = 0
|
|
202
|
+
|
|
203
|
+
# Process CrossRef works (merge with OpenAlex if exists)
|
|
204
|
+
for w in cr_works:
|
|
205
|
+
if w.doi:
|
|
206
|
+
doi_lower = w.doi.lower()
|
|
207
|
+
if doi_lower in seen_dois:
|
|
208
|
+
duplicates += 1
|
|
209
|
+
continue
|
|
210
|
+
seen_dois.add(doi_lower)
|
|
211
|
+
|
|
212
|
+
if doi_lower in doi_to_oa:
|
|
213
|
+
# Merge with OpenAlex data
|
|
214
|
+
merged = _merge_works(w, doi_to_oa[doi_lower])
|
|
215
|
+
results.append(merged)
|
|
216
|
+
duplicates += 1 # Count the OpenAlex duplicate
|
|
217
|
+
else:
|
|
218
|
+
results.append(w)
|
|
219
|
+
else:
|
|
220
|
+
results.append(w)
|
|
221
|
+
|
|
222
|
+
# Add OpenAlex works not in CrossRef
|
|
223
|
+
for w in oa_works:
|
|
224
|
+
if w.doi:
|
|
225
|
+
if w.doi.lower() not in seen_dois:
|
|
226
|
+
results.append(w)
|
|
227
|
+
seen_dois.add(w.doi.lower())
|
|
228
|
+
else:
|
|
229
|
+
results.append(w)
|
|
230
|
+
|
|
231
|
+
return results, duplicates
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def search(
|
|
235
|
+
query: str,
|
|
236
|
+
limit: int = 50,
|
|
237
|
+
sources: Optional[List[Literal["crossref", "openalex"]]] = None,
|
|
238
|
+
merge_duplicates: bool = True,
|
|
239
|
+
**kwargs,
|
|
240
|
+
) -> UnifiedSearchResult:
|
|
241
|
+
"""
|
|
242
|
+
Search both CrossRef and OpenAlex databases.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
query: Search query string
|
|
246
|
+
limit: Maximum results per source (total may be up to 2x if no dedup)
|
|
247
|
+
sources: Which sources to search. Default: both available sources
|
|
248
|
+
merge_duplicates: Whether to merge works found in both databases
|
|
249
|
+
**kwargs: Additional arguments passed to search functions
|
|
250
|
+
|
|
251
|
+
Returns
|
|
252
|
+
-------
|
|
253
|
+
UnifiedSearchResult with merged works
|
|
254
|
+
"""
|
|
255
|
+
if sources is None:
|
|
256
|
+
sources = []
|
|
257
|
+
if _crossref_available:
|
|
258
|
+
sources.append("crossref")
|
|
259
|
+
if _openalex_available:
|
|
260
|
+
sources.append("openalex")
|
|
261
|
+
|
|
262
|
+
if not sources:
|
|
263
|
+
raise RuntimeError(
|
|
264
|
+
"No search sources available. Install crossref-local or openalex-local"
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
cr_works: List[UnifiedWork] = []
|
|
268
|
+
oa_works: List[UnifiedWork] = []
|
|
269
|
+
|
|
270
|
+
# Search in parallel using thread pool
|
|
271
|
+
def search_crossref():
|
|
272
|
+
if "crossref" in sources and _crossref_available:
|
|
273
|
+
results = cr_search(query, limit=limit, **kwargs)
|
|
274
|
+
return [_crossref_work_to_unified(w) for w in results]
|
|
275
|
+
return []
|
|
276
|
+
|
|
277
|
+
def search_openalex():
|
|
278
|
+
if "openalex" in sources and _openalex_available:
|
|
279
|
+
results = oa_search(query, limit=limit, **kwargs)
|
|
280
|
+
return [_openalex_work_to_unified(w) for w in results]
|
|
281
|
+
return []
|
|
282
|
+
|
|
283
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
|
|
284
|
+
cr_future = executor.submit(search_crossref)
|
|
285
|
+
oa_future = executor.submit(search_openalex)
|
|
286
|
+
|
|
287
|
+
try:
|
|
288
|
+
cr_works = cr_future.result(timeout=30)
|
|
289
|
+
except Exception:
|
|
290
|
+
cr_works = []
|
|
291
|
+
|
|
292
|
+
try:
|
|
293
|
+
oa_works = oa_future.result(timeout=30)
|
|
294
|
+
except Exception:
|
|
295
|
+
oa_works = []
|
|
296
|
+
|
|
297
|
+
# Deduplicate and merge
|
|
298
|
+
duplicates = 0
|
|
299
|
+
if merge_duplicates:
|
|
300
|
+
works, duplicates = _deduplicate_and_merge(cr_works, oa_works)
|
|
301
|
+
else:
|
|
302
|
+
works = cr_works + oa_works
|
|
303
|
+
|
|
304
|
+
return UnifiedSearchResult(
|
|
305
|
+
works=works,
|
|
306
|
+
total_crossref=len(cr_works),
|
|
307
|
+
total_openalex=len(oa_works),
|
|
308
|
+
duplicates_removed=duplicates,
|
|
309
|
+
query=query,
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
async def search_async(
|
|
314
|
+
query: str,
|
|
315
|
+
limit: int = 50,
|
|
316
|
+
sources: Optional[List[Literal["crossref", "openalex"]]] = None,
|
|
317
|
+
merge_duplicates: bool = True,
|
|
318
|
+
**kwargs,
|
|
319
|
+
) -> UnifiedSearchResult:
|
|
320
|
+
"""Async version of search."""
|
|
321
|
+
loop = asyncio.get_event_loop()
|
|
322
|
+
return await loop.run_in_executor(
|
|
323
|
+
None,
|
|
324
|
+
lambda: search(
|
|
325
|
+
query,
|
|
326
|
+
limit=limit,
|
|
327
|
+
sources=sources,
|
|
328
|
+
merge_duplicates=merge_duplicates,
|
|
329
|
+
**kwargs,
|
|
330
|
+
),
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def get(doi: str, sources: Optional[List[str]] = None) -> Optional[UnifiedWork]:
|
|
335
|
+
"""
|
|
336
|
+
Get a specific work by DOI from available sources.
|
|
337
|
+
|
|
338
|
+
Args:
|
|
339
|
+
doi: DOI to look up
|
|
340
|
+
sources: Which sources to check. Default: all available
|
|
341
|
+
|
|
342
|
+
Returns
|
|
343
|
+
-------
|
|
344
|
+
UnifiedWork if found, None otherwise
|
|
345
|
+
"""
|
|
346
|
+
if sources is None:
|
|
347
|
+
sources = []
|
|
348
|
+
if _crossref_available:
|
|
349
|
+
sources.append("crossref")
|
|
350
|
+
if _openalex_available:
|
|
351
|
+
sources.append("openalex")
|
|
352
|
+
|
|
353
|
+
cr_work = None
|
|
354
|
+
oa_work = None
|
|
355
|
+
|
|
356
|
+
if "crossref" in sources and _crossref_available:
|
|
357
|
+
try:
|
|
358
|
+
result = cr_get(doi)
|
|
359
|
+
if result:
|
|
360
|
+
cr_work = _crossref_work_to_unified(result)
|
|
361
|
+
except Exception:
|
|
362
|
+
pass
|
|
363
|
+
|
|
364
|
+
if "openalex" in sources and _openalex_available:
|
|
365
|
+
try:
|
|
366
|
+
result = oa_get(doi)
|
|
367
|
+
if result:
|
|
368
|
+
oa_work = _openalex_work_to_unified(result)
|
|
369
|
+
except Exception:
|
|
370
|
+
pass
|
|
371
|
+
|
|
372
|
+
if cr_work and oa_work:
|
|
373
|
+
return _merge_works(cr_work, oa_work)
|
|
374
|
+
return cr_work or oa_work
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
def info() -> Dict[str, Any]:
|
|
378
|
+
"""Get status info from all available sources."""
|
|
379
|
+
result = {
|
|
380
|
+
"sources": [],
|
|
381
|
+
"total_works": 0,
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
if _crossref_available:
|
|
385
|
+
try:
|
|
386
|
+
cr_info_data = cr_info()
|
|
387
|
+
result["sources"].append(
|
|
388
|
+
{
|
|
389
|
+
"name": "crossref",
|
|
390
|
+
"available": True,
|
|
391
|
+
"info": cr_info_data if isinstance(cr_info_data, dict) else {},
|
|
392
|
+
}
|
|
393
|
+
)
|
|
394
|
+
if isinstance(cr_info_data, dict) and "total_works" in cr_info_data:
|
|
395
|
+
result["total_works"] += cr_info_data["total_works"]
|
|
396
|
+
except Exception as e:
|
|
397
|
+
result["sources"].append(
|
|
398
|
+
{"name": "crossref", "available": False, "error": str(e)}
|
|
399
|
+
)
|
|
400
|
+
else:
|
|
401
|
+
result["sources"].append(
|
|
402
|
+
{"name": "crossref", "available": False, "error": "Not installed"}
|
|
403
|
+
)
|
|
404
|
+
|
|
405
|
+
if _openalex_available:
|
|
406
|
+
try:
|
|
407
|
+
oa_info_data = oa_info()
|
|
408
|
+
result["sources"].append(
|
|
409
|
+
{
|
|
410
|
+
"name": "openalex",
|
|
411
|
+
"available": True,
|
|
412
|
+
"info": oa_info_data if isinstance(oa_info_data, dict) else {},
|
|
413
|
+
}
|
|
414
|
+
)
|
|
415
|
+
if isinstance(oa_info_data, dict) and "total_works" in oa_info_data:
|
|
416
|
+
result["total_works"] += oa_info_data["total_works"]
|
|
417
|
+
except Exception as e:
|
|
418
|
+
result["sources"].append(
|
|
419
|
+
{"name": "openalex", "available": False, "error": str(e)}
|
|
420
|
+
)
|
|
421
|
+
else:
|
|
422
|
+
result["sources"].append(
|
|
423
|
+
{"name": "openalex", "available": False, "error": "Not installed"}
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
return result
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
# ============================================================================
|
|
430
|
+
# Export Formats
|
|
431
|
+
# ============================================================================
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
def to_json(
|
|
435
|
+
works: Union[List[UnifiedWork], UnifiedSearchResult], indent: int = 2
|
|
436
|
+
) -> str:
|
|
437
|
+
"""Export works to JSON format."""
|
|
438
|
+
if isinstance(works, UnifiedSearchResult):
|
|
439
|
+
works = works.works
|
|
440
|
+
return json.dumps([w.to_dict() for w in works], indent=indent, ensure_ascii=False)
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
def to_bibtex(works: Union[List[UnifiedWork], UnifiedSearchResult]) -> str:
|
|
444
|
+
"""Export works to BibTeX format."""
|
|
445
|
+
if isinstance(works, UnifiedSearchResult):
|
|
446
|
+
works = works.works
|
|
447
|
+
|
|
448
|
+
entries = []
|
|
449
|
+
for i, w in enumerate(works):
|
|
450
|
+
# Generate citation key
|
|
451
|
+
first_author = ""
|
|
452
|
+
if w.authors:
|
|
453
|
+
first_author = (
|
|
454
|
+
w.authors[0].split(",")[0].split()[-1].lower().replace(" ", "")
|
|
455
|
+
)
|
|
456
|
+
year = w.year or "nodate"
|
|
457
|
+
key = f"{first_author}{year}_{i + 1}" if first_author else f"paper_{i + 1}"
|
|
458
|
+
|
|
459
|
+
lines = [f"@article{{{key},"]
|
|
460
|
+
|
|
461
|
+
if w.title:
|
|
462
|
+
lines.append(f" title = {{{w.title}}},")
|
|
463
|
+
if w.authors:
|
|
464
|
+
lines.append(f" author = {{{' and '.join(w.authors)}}},")
|
|
465
|
+
if w.year:
|
|
466
|
+
lines.append(f" year = {{{w.year}}},")
|
|
467
|
+
if w.journal:
|
|
468
|
+
lines.append(f" journal = {{{w.journal}}},")
|
|
469
|
+
if w.doi:
|
|
470
|
+
lines.append(f" doi = {{{w.doi}}},")
|
|
471
|
+
if w.abstract:
|
|
472
|
+
# Escape special LaTeX chars
|
|
473
|
+
abstract = (
|
|
474
|
+
w.abstract.replace("&", r"\&").replace("%", r"\%").replace("$", r"\$")
|
|
475
|
+
)
|
|
476
|
+
lines.append(f" abstract = {{{abstract}}},")
|
|
477
|
+
|
|
478
|
+
lines.append("}")
|
|
479
|
+
entries.append("\n".join(lines))
|
|
480
|
+
|
|
481
|
+
return "\n\n".join(entries)
|
|
482
|
+
|
|
483
|
+
|
|
484
|
+
def to_text(works: Union[List[UnifiedWork], UnifiedSearchResult]) -> str:
|
|
485
|
+
"""Export works to plain text format (one per line)."""
|
|
486
|
+
if isinstance(works, UnifiedSearchResult):
|
|
487
|
+
works = works.works
|
|
488
|
+
|
|
489
|
+
lines = []
|
|
490
|
+
for i, w in enumerate(works, 1):
|
|
491
|
+
authors = ", ".join(w.authors[:3]) if w.authors else "Unknown"
|
|
492
|
+
if len(w.authors) > 3:
|
|
493
|
+
authors += " et al."
|
|
494
|
+
title = w.title or "No title"
|
|
495
|
+
year = f"({w.year})" if w.year else ""
|
|
496
|
+
journal = w.journal or ""
|
|
497
|
+
doi = f"DOI: {w.doi}" if w.doi else ""
|
|
498
|
+
|
|
499
|
+
line = f"{i}. {authors} {year}. {title}. {journal} {doi}".strip()
|
|
500
|
+
lines.append(line)
|
|
501
|
+
|
|
502
|
+
return "\n".join(lines)
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
# EOF
|
|
@@ -31,6 +31,7 @@ from .individual import ArXivEngine
|
|
|
31
31
|
from .individual import CrossRefEngine
|
|
32
32
|
from .individual import CrossRefLocalEngine
|
|
33
33
|
from .individual import OpenAlexEngine
|
|
34
|
+
from .individual import OpenAlexLocalEngine
|
|
34
35
|
from .individual import PubMedEngine
|
|
35
36
|
from .individual import SemanticScholarEngine
|
|
36
37
|
from .individual import URLDOIEngine
|
|
@@ -232,6 +233,7 @@ class ScholarEngine:
|
|
|
232
233
|
"CrossRef": CrossRefEngine,
|
|
233
234
|
"CrossRefLocal": CrossRefLocalEngine,
|
|
234
235
|
"OpenAlex": OpenAlexEngine,
|
|
236
|
+
"OpenAlexLocal": OpenAlexLocalEngine,
|
|
235
237
|
"PubMed": PubMedEngine,
|
|
236
238
|
"Semantic_Scholar": SemanticScholarEngine,
|
|
237
239
|
"arXiv": ArXivEngine,
|
|
@@ -247,6 +249,14 @@ class ScholarEngine:
|
|
|
247
249
|
self._engine_instances[name] = engine_classes[name](
|
|
248
250
|
"research@example.com", api_url=api_url
|
|
249
251
|
)
|
|
252
|
+
elif name == "OpenAlexLocal":
|
|
253
|
+
# Get API URL from config (supports SCITEX_SCHOLAR_OPENALEX_API_URL env var)
|
|
254
|
+
api_url = self.config.resolve(
|
|
255
|
+
"openalex_api_url", "http://127.0.0.1:31292"
|
|
256
|
+
)
|
|
257
|
+
self._engine_instances[name] = engine_classes[name](
|
|
258
|
+
"research@example.com", api_url=api_url
|
|
259
|
+
)
|
|
250
260
|
else:
|
|
251
261
|
self._engine_instances[name] = engine_classes[name](
|
|
252
262
|
"research@example.com"
|
|
@@ -476,6 +486,7 @@ class ScholarEngine:
|
|
|
476
486
|
engine_priority = {
|
|
477
487
|
"URL": 6,
|
|
478
488
|
"CrossRefLocal": 5,
|
|
489
|
+
"OpenAlexLocal": 5,
|
|
479
490
|
"CrossRef": 4,
|
|
480
491
|
"OpenAlex": 3,
|
|
481
492
|
"Semantic_Scholar": 2,
|