scitex 2.17.3__py3-none-any.whl → 2.17.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scitex/_dev/_dashboard/_routes.py +13 -0
- scitex/_dev/_dashboard/_scripts.py +144 -23
- scitex/_dev/_dashboard/_styles.py +90 -0
- scitex/_dev/_dashboard/_templates.py +14 -1
- scitex/_dev/_rtd.py +122 -0
- scitex/_dev/_ssh.py +38 -8
- scitex/dev/plt/data/mpl/PLOTTING_FUNCTIONS.yaml +90 -0
- scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES.yaml +1571 -0
- scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES_DETAILED.yaml +6262 -0
- scitex/dev/plt/data/mpl/SIGNATURES_FLATTENED.yaml +1274 -0
- scitex/dev/plt/data/mpl/dir_ax.txt +459 -0
- scitex/scholar/_mcp/crossref_handlers.py +45 -7
- scitex/scholar/_mcp/openalex_handlers.py +45 -7
- scitex/scholar/config/default.yaml +2 -0
- scitex/scholar/data/.gitkeep +0 -0
- scitex/scholar/data/README.md +44 -0
- scitex/scholar/data/bib_files/bibliography.bib +1952 -0
- scitex/scholar/data/bib_files/neurovista.bib +277 -0
- scitex/scholar/data/bib_files/neurovista_enriched.bib +441 -0
- scitex/scholar/data/bib_files/neurovista_enriched_enriched.bib +441 -0
- scitex/scholar/data/bib_files/neurovista_processed.bib +338 -0
- scitex/scholar/data/bib_files/openaccess.bib +89 -0
- scitex/scholar/data/bib_files/pac-seizure_prediction_enriched.bib +2178 -0
- scitex/scholar/data/bib_files/pac.bib +698 -0
- scitex/scholar/data/bib_files/pac_enriched.bib +1061 -0
- scitex/scholar/data/bib_files/pac_processed.bib +0 -0
- scitex/scholar/data/bib_files/pac_titles.txt +75 -0
- scitex/scholar/data/bib_files/paywalled.bib +98 -0
- scitex/scholar/data/bib_files/related-papers-by-coauthors.bib +58 -0
- scitex/scholar/data/bib_files/related-papers-by-coauthors_enriched.bib +87 -0
- scitex/scholar/data/bib_files/seizure_prediction.bib +694 -0
- scitex/scholar/data/bib_files/seizure_prediction_processed.bib +0 -0
- scitex/scholar/data/bib_files/test_complete_enriched.bib +437 -0
- scitex/scholar/data/bib_files/test_final_enriched.bib +437 -0
- scitex/scholar/data/bib_files/test_seizure.bib +46 -0
- scitex/scholar/data/impact_factor/JCR_IF_2022.xlsx +0 -0
- scitex/scholar/data/impact_factor/JCR_IF_2024.db +0 -0
- scitex/scholar/data/impact_factor/JCR_IF_2024.xlsx +0 -0
- scitex/scholar/data/impact_factor/JCR_IF_2024_v01.db +0 -0
- scitex/scholar/data/impact_factor.db +0 -0
- scitex/scholar/local_dbs/__init__.py +5 -1
- scitex/scholar/local_dbs/export.py +93 -0
- scitex/scholar/local_dbs/unified.py +505 -0
- scitex/scholar/metadata_engines/ScholarEngine.py +11 -0
- scitex/scholar/metadata_engines/individual/OpenAlexLocalEngine.py +346 -0
- scitex/scholar/metadata_engines/individual/__init__.py +1 -0
- {scitex-2.17.3.dist-info → scitex-2.17.4.dist-info}/METADATA +1 -1
- {scitex-2.17.3.dist-info → scitex-2.17.4.dist-info}/RECORD +51 -22
- scitex/scholar/url_finder/.tmp/open_url/KNOWN_RESOLVERS.py +0 -462
- scitex/scholar/url_finder/.tmp/open_url/README.md +0 -223
- scitex/scholar/url_finder/.tmp/open_url/_DOIToURLResolver.py +0 -694
- scitex/scholar/url_finder/.tmp/open_url/_OpenURLResolver.py +0 -1160
- scitex/scholar/url_finder/.tmp/open_url/_ResolverLinkFinder.py +0 -344
- scitex/scholar/url_finder/.tmp/open_url/__init__.py +0 -24
- {scitex-2.17.3.dist-info → scitex-2.17.4.dist-info}/WHEEL +0 -0
- {scitex-2.17.3.dist-info → scitex-2.17.4.dist-info}/entry_points.txt +0 -0
- {scitex-2.17.3.dist-info → scitex-2.17.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,462 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
|
-
# Time-stamp: "2025-08-01 13:15:00"
|
|
4
|
-
# Author: Claude
|
|
5
|
-
# File: KNOWN_RESOLVERS.py
|
|
6
|
-
|
|
7
|
-
"""
|
|
8
|
-
Known OpenURL resolvers from various institutions worldwide.
|
|
9
|
-
|
|
10
|
-
This module contains a curated list of OpenURL resolvers used by
|
|
11
|
-
academic institutions for accessing scholarly content.
|
|
12
|
-
|
|
13
|
-
Sources:
|
|
14
|
-
- Zotero OpenURL Resolver Directory: https://www.zotero.org/openurl_resolvers
|
|
15
|
-
- Individual institution library websites
|
|
16
|
-
- Common resolver patterns
|
|
17
|
-
"""
|
|
18
|
-
|
|
19
|
-
from typing import Dict, List, Optional
|
|
20
|
-
|
|
21
|
-
# Major OpenURL resolver vendors
|
|
22
|
-
RESOLVER_VENDORS = {
|
|
23
|
-
"ExLibris": {
|
|
24
|
-
"patterns": ["sfx", "exlibrisgroup.com"],
|
|
25
|
-
"description": "Ex Libris SFX resolver (very common)"
|
|
26
|
-
},
|
|
27
|
-
"SerialsSolutions": {
|
|
28
|
-
"patterns": ["serialssolutions.com", "360link"],
|
|
29
|
-
"description": "ProQuest SerialsSolutions 360 Link"
|
|
30
|
-
},
|
|
31
|
-
"EBSCO": {
|
|
32
|
-
"patterns": ["ebscohost.com/openurlresolver", "linkssource.ebsco.com"],
|
|
33
|
-
"description": "EBSCO Full Text Finder"
|
|
34
|
-
},
|
|
35
|
-
"OCLC": {
|
|
36
|
-
"patterns": ["worldcat.org", "oclc.org"],
|
|
37
|
-
"description": "OCLC WorldCat resolver"
|
|
38
|
-
},
|
|
39
|
-
"Ovid": {
|
|
40
|
-
"patterns": ["ovid.com", "linksolver"],
|
|
41
|
-
"description": "Ovid LinkSolver"
|
|
42
|
-
}
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
# Known institutional OpenURL resolvers
|
|
46
|
-
KNOWN_RESOLVERS: Dict[str, Dict[str, str]] = {
|
|
47
|
-
# United States
|
|
48
|
-
"Harvard University": {
|
|
49
|
-
"url": "https://sfx.hul.harvard.edu/sfx_local",
|
|
50
|
-
"country": "US",
|
|
51
|
-
"vendor": "ExLibris"
|
|
52
|
-
},
|
|
53
|
-
"MIT": {
|
|
54
|
-
"url": "https://owens.mit.edu/sfx_local",
|
|
55
|
-
"country": "US",
|
|
56
|
-
"vendor": "ExLibris"
|
|
57
|
-
},
|
|
58
|
-
"Stanford University": {
|
|
59
|
-
"url": "https://stanford.idm.oclc.org/login?url=",
|
|
60
|
-
"country": "US",
|
|
61
|
-
"vendor": "OCLC"
|
|
62
|
-
},
|
|
63
|
-
"Yale University": {
|
|
64
|
-
"url": "https://yale.idm.oclc.org/login?url=",
|
|
65
|
-
"country": "US",
|
|
66
|
-
"vendor": "OCLC"
|
|
67
|
-
},
|
|
68
|
-
"University of California, Berkeley": {
|
|
69
|
-
"url": "https://ucelinks.cdlib.org:8443/sfx_ucb",
|
|
70
|
-
"country": "US",
|
|
71
|
-
"vendor": "ExLibris"
|
|
72
|
-
},
|
|
73
|
-
"UCLA": {
|
|
74
|
-
"url": "https://ucelinks.cdlib.org:8443/sfx_ucla",
|
|
75
|
-
"country": "US",
|
|
76
|
-
"vendor": "ExLibris"
|
|
77
|
-
},
|
|
78
|
-
"Columbia University": {
|
|
79
|
-
"url": "https://resolver.library.columbia.edu/openurl",
|
|
80
|
-
"country": "US",
|
|
81
|
-
"vendor": "SerialsSolutions"
|
|
82
|
-
},
|
|
83
|
-
"Princeton University": {
|
|
84
|
-
"url": "https://princeton.idm.oclc.org/login?url=",
|
|
85
|
-
"country": "US",
|
|
86
|
-
"vendor": "OCLC"
|
|
87
|
-
},
|
|
88
|
-
"University of Chicago": {
|
|
89
|
-
"url": "https://proxy.uchicago.edu/login?url=",
|
|
90
|
-
"country": "US",
|
|
91
|
-
"vendor": "Custom"
|
|
92
|
-
},
|
|
93
|
-
"Johns Hopkins": {
|
|
94
|
-
"url": "https://openurl.library.jhu.edu",
|
|
95
|
-
"country": "US",
|
|
96
|
-
"vendor": "Custom"
|
|
97
|
-
},
|
|
98
|
-
|
|
99
|
-
# United Kingdom
|
|
100
|
-
"University of Oxford": {
|
|
101
|
-
"url": "https://fs.oxfordjournals.org/openurl",
|
|
102
|
-
"country": "UK",
|
|
103
|
-
"vendor": "Custom"
|
|
104
|
-
},
|
|
105
|
-
"University of Cambridge": {
|
|
106
|
-
"url": "https://cambridge.idm.oclc.org/login?url=",
|
|
107
|
-
"country": "UK",
|
|
108
|
-
"vendor": "OCLC"
|
|
109
|
-
},
|
|
110
|
-
"Imperial College London": {
|
|
111
|
-
"url": "https://imperial.idm.oclc.org/login?url=",
|
|
112
|
-
"country": "UK",
|
|
113
|
-
"vendor": "OCLC"
|
|
114
|
-
},
|
|
115
|
-
"UCL": {
|
|
116
|
-
"url": "https://ucl.idm.oclc.org/login?url=",
|
|
117
|
-
"country": "UK",
|
|
118
|
-
"vendor": "OCLC"
|
|
119
|
-
},
|
|
120
|
-
"University of Edinburgh": {
|
|
121
|
-
"url": "https://discovered.ed.ac.uk/openurl",
|
|
122
|
-
"country": "UK",
|
|
123
|
-
"vendor": "Custom"
|
|
124
|
-
},
|
|
125
|
-
|
|
126
|
-
# Canada
|
|
127
|
-
"University of Toronto": {
|
|
128
|
-
"url": "https://myaccess.library.utoronto.ca/login?url=",
|
|
129
|
-
"country": "CA",
|
|
130
|
-
"vendor": "Custom"
|
|
131
|
-
},
|
|
132
|
-
"McGill University": {
|
|
133
|
-
"url": "https://mcgill.on.worldcat.org/atoztitles/link",
|
|
134
|
-
"country": "CA",
|
|
135
|
-
"vendor": "OCLC"
|
|
136
|
-
},
|
|
137
|
-
"University of British Columbia": {
|
|
138
|
-
"url": "https://ubc.summon.serialssolutions.com/link",
|
|
139
|
-
"country": "CA",
|
|
140
|
-
"vendor": "SerialsSolutions"
|
|
141
|
-
},
|
|
142
|
-
|
|
143
|
-
# Australia
|
|
144
|
-
"University of Melbourne": {
|
|
145
|
-
"url": "https://unimelb.hosted.exlibrisgroup.com/sfxlcl41",
|
|
146
|
-
"country": "AU",
|
|
147
|
-
"vendor": "ExLibris"
|
|
148
|
-
},
|
|
149
|
-
"University of Sydney": {
|
|
150
|
-
"url": "https://ap01.alma.exlibrisgroup.com/view/uresolver/61USYD_INST/openurl",
|
|
151
|
-
"country": "AU",
|
|
152
|
-
"vendor": "ExLibris"
|
|
153
|
-
},
|
|
154
|
-
"Australian National University": {
|
|
155
|
-
"url": "https://anu.hosted.exlibrisgroup.com/primo-explore/openurl",
|
|
156
|
-
"country": "AU",
|
|
157
|
-
"vendor": "ExLibris"
|
|
158
|
-
},
|
|
159
|
-
"University of Queensland": {
|
|
160
|
-
"url": "https://uq.summon.serialssolutions.com/link",
|
|
161
|
-
"country": "AU",
|
|
162
|
-
"vendor": "SerialsSolutions"
|
|
163
|
-
},
|
|
164
|
-
"Monash University": {
|
|
165
|
-
"url": "https://monash.hosted.exlibrisgroup.com/sfx_local",
|
|
166
|
-
"country": "AU",
|
|
167
|
-
"vendor": "ExLibris"
|
|
168
|
-
},
|
|
169
|
-
|
|
170
|
-
# Germany
|
|
171
|
-
"Max Planck Society": {
|
|
172
|
-
"url": "http://sfx.mpg.de/sfx_local",
|
|
173
|
-
"country": "DE",
|
|
174
|
-
"vendor": "ExLibris"
|
|
175
|
-
},
|
|
176
|
-
"University of Munich (LMU)": {
|
|
177
|
-
"url": "https://sfx.bib.uni-muenchen.de/sfx_lmu",
|
|
178
|
-
"country": "DE",
|
|
179
|
-
"vendor": "ExLibris"
|
|
180
|
-
},
|
|
181
|
-
"Heidelberg University": {
|
|
182
|
-
"url": "https://sfx.bib.uni-heidelberg.de/sfx_heidelberg",
|
|
183
|
-
"country": "DE",
|
|
184
|
-
"vendor": "ExLibris"
|
|
185
|
-
},
|
|
186
|
-
|
|
187
|
-
# Netherlands
|
|
188
|
-
"University of Amsterdam": {
|
|
189
|
-
"url": "https://vu-nl.idm.oclc.org/login?url=",
|
|
190
|
-
"country": "NL",
|
|
191
|
-
"vendor": "OCLC"
|
|
192
|
-
},
|
|
193
|
-
"Delft University of Technology": {
|
|
194
|
-
"url": "https://tudelft.idm.oclc.org/login?url=",
|
|
195
|
-
"country": "NL",
|
|
196
|
-
"vendor": "OCLC"
|
|
197
|
-
},
|
|
198
|
-
|
|
199
|
-
# France
|
|
200
|
-
"Sorbonne University": {
|
|
201
|
-
"url": "https://accesdistant.sorbonne-universite.fr/login?url=",
|
|
202
|
-
"country": "FR",
|
|
203
|
-
"vendor": "Custom"
|
|
204
|
-
},
|
|
205
|
-
"École Polytechnique": {
|
|
206
|
-
"url": "https://portail.polytechnique.edu/openurl",
|
|
207
|
-
"country": "FR",
|
|
208
|
-
"vendor": "Custom"
|
|
209
|
-
},
|
|
210
|
-
|
|
211
|
-
# Switzerland
|
|
212
|
-
"ETH Zurich": {
|
|
213
|
-
"url": "https://www.library.ethz.ch/openurl",
|
|
214
|
-
"country": "CH",
|
|
215
|
-
"vendor": "Custom"
|
|
216
|
-
},
|
|
217
|
-
"EPFL": {
|
|
218
|
-
"url": "https://sfx.epfl.ch/sfx_local",
|
|
219
|
-
"country": "CH",
|
|
220
|
-
"vendor": "ExLibris"
|
|
221
|
-
},
|
|
222
|
-
|
|
223
|
-
# Japan
|
|
224
|
-
"University of Tokyo": {
|
|
225
|
-
"url": "https://vs2ga4mq9g.search.serialssolutions.com",
|
|
226
|
-
"country": "JP",
|
|
227
|
-
"vendor": "SerialsSolutions"
|
|
228
|
-
},
|
|
229
|
-
"Kyoto University": {
|
|
230
|
-
"url": "https://kuline.kulib.kyoto-u.ac.jp/portal/openurl",
|
|
231
|
-
"country": "JP",
|
|
232
|
-
"vendor": "Custom"
|
|
233
|
-
},
|
|
234
|
-
|
|
235
|
-
# Singapore
|
|
236
|
-
"National University of Singapore": {
|
|
237
|
-
"url": "https://libproxy.nus.edu.sg/login?url=",
|
|
238
|
-
"country": "SG",
|
|
239
|
-
"vendor": "Custom"
|
|
240
|
-
},
|
|
241
|
-
"Nanyang Technological University": {
|
|
242
|
-
"url": "https://ap01.alma.exlibrisgroup.com/view/uresolver/65NTU_INST/openurl",
|
|
243
|
-
"country": "SG",
|
|
244
|
-
"vendor": "ExLibris"
|
|
245
|
-
},
|
|
246
|
-
|
|
247
|
-
# China
|
|
248
|
-
"Tsinghua University": {
|
|
249
|
-
"url": "http://sfx.lib.tsinghua.edu.cn/sfx_local",
|
|
250
|
-
"country": "CN",
|
|
251
|
-
"vendor": "ExLibris"
|
|
252
|
-
},
|
|
253
|
-
"Peking University": {
|
|
254
|
-
"url": "http://sfx.lib.pku.edu.cn/sfx_pku",
|
|
255
|
-
"country": "CN",
|
|
256
|
-
"vendor": "ExLibris"
|
|
257
|
-
},
|
|
258
|
-
|
|
259
|
-
# South Korea
|
|
260
|
-
"Seoul National University": {
|
|
261
|
-
"url": "https://sfx.snu.ac.kr/sfx_local",
|
|
262
|
-
"country": "KR",
|
|
263
|
-
"vendor": "ExLibris"
|
|
264
|
-
},
|
|
265
|
-
"KAIST": {
|
|
266
|
-
"url": "https://library.kaist.ac.kr/openurl",
|
|
267
|
-
"country": "KR",
|
|
268
|
-
"vendor": "Custom"
|
|
269
|
-
},
|
|
270
|
-
|
|
271
|
-
# Brazil
|
|
272
|
-
"University of São Paulo": {
|
|
273
|
-
"url": "http://www.buscaintegrada.usp.br/openurl",
|
|
274
|
-
"country": "BR",
|
|
275
|
-
"vendor": "Custom"
|
|
276
|
-
},
|
|
277
|
-
|
|
278
|
-
# Mexico
|
|
279
|
-
"UNAM": {
|
|
280
|
-
"url": "https://pbidi.unam.mx/login?url=",
|
|
281
|
-
"country": "MX",
|
|
282
|
-
"vendor": "Custom"
|
|
283
|
-
},
|
|
284
|
-
|
|
285
|
-
# India
|
|
286
|
-
"IIT Delhi": {
|
|
287
|
-
"url": "https://libproxy.iitd.ac.in/login?url=",
|
|
288
|
-
"country": "IN",
|
|
289
|
-
"vendor": "Custom"
|
|
290
|
-
},
|
|
291
|
-
"Indian Institute of Science": {
|
|
292
|
-
"url": "https://library.iisc.ac.in/openurl",
|
|
293
|
-
"country": "IN",
|
|
294
|
-
"vendor": "Custom"
|
|
295
|
-
}
|
|
296
|
-
}
|
|
297
|
-
|
|
298
|
-
# Generic OpenURL resolver patterns
|
|
299
|
-
GENERIC_PATTERNS = [
|
|
300
|
-
# ExLibris SFX patterns
|
|
301
|
-
r"https?://[^/]+/sfx[^/]*",
|
|
302
|
-
r"https?://sfx\.[^/]+",
|
|
303
|
-
r"https?://[^/]+\.exlibrisgroup\.com",
|
|
304
|
-
|
|
305
|
-
# SerialsSolutions patterns
|
|
306
|
-
r"https?://[^/]+\.serialssolutions\.com",
|
|
307
|
-
r"https?://[^/]+/360link",
|
|
308
|
-
|
|
309
|
-
# OCLC patterns
|
|
310
|
-
r"https?://[^/]+\.idm\.oclc\.org",
|
|
311
|
-
r"https?://[^/]+\.worldcat\.org",
|
|
312
|
-
|
|
313
|
-
# Common proxy patterns
|
|
314
|
-
r"https?://[^/]+/login\?url=",
|
|
315
|
-
r"https?://libproxy\.[^/]+",
|
|
316
|
-
r"https?://proxy\.[^/]+",
|
|
317
|
-
|
|
318
|
-
# OpenURL patterns
|
|
319
|
-
r"https?://[^/]+/openurl",
|
|
320
|
-
r"https?://[^/]+/openurlresolver",
|
|
321
|
-
]
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
def get_resolver_by_institution(institution_name: str) -> Optional[Dict[str, str]]:
|
|
325
|
-
"""
|
|
326
|
-
Get OpenURL resolver information by institution name.
|
|
327
|
-
|
|
328
|
-
Args:
|
|
329
|
-
institution_name: Name of the institution
|
|
330
|
-
|
|
331
|
-
Returns:
|
|
332
|
-
Dict with 'url', 'country', and 'vendor' if found, None otherwise
|
|
333
|
-
"""
|
|
334
|
-
# Try exact match first
|
|
335
|
-
if institution_name in KNOWN_RESOLVERS:
|
|
336
|
-
return KNOWN_RESOLVERS[institution_name].copy()
|
|
337
|
-
|
|
338
|
-
# Try case-insensitive match
|
|
339
|
-
institution_lower = institution_name.lower()
|
|
340
|
-
for name, info in KNOWN_RESOLVERS.items():
|
|
341
|
-
if name.lower() == institution_lower:
|
|
342
|
-
return info.copy()
|
|
343
|
-
|
|
344
|
-
# Try partial match
|
|
345
|
-
for name, info in KNOWN_RESOLVERS.items():
|
|
346
|
-
if institution_lower in name.lower() or name.lower() in institution_lower:
|
|
347
|
-
return info.copy()
|
|
348
|
-
|
|
349
|
-
return None
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
def get_resolvers_by_country(country_code: str) -> Dict[str, Dict[str, str]]:
|
|
353
|
-
"""
|
|
354
|
-
Get all OpenURL resolvers for a specific country.
|
|
355
|
-
|
|
356
|
-
Args:
|
|
357
|
-
country_code: Two-letter country code (e.g., 'US', 'UK', 'AU')
|
|
358
|
-
|
|
359
|
-
Returns:
|
|
360
|
-
Dict of institution names to resolver info
|
|
361
|
-
"""
|
|
362
|
-
country_code = country_code.upper()
|
|
363
|
-
return {
|
|
364
|
-
name: info
|
|
365
|
-
for name, info in KNOWN_RESOLVERS.items()
|
|
366
|
-
if info.get('country') == country_code
|
|
367
|
-
}
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
def get_resolvers_by_vendor(vendor_name: str) -> Dict[str, Dict[str, str]]:
|
|
371
|
-
"""
|
|
372
|
-
Get all OpenURL resolvers using a specific vendor.
|
|
373
|
-
|
|
374
|
-
Args:
|
|
375
|
-
vendor_name: Vendor name (e.g., 'ExLibris', 'OCLC')
|
|
376
|
-
|
|
377
|
-
Returns:
|
|
378
|
-
Dict of institution names to resolver info
|
|
379
|
-
"""
|
|
380
|
-
return {
|
|
381
|
-
name: info
|
|
382
|
-
for name, info in KNOWN_RESOLVERS.items()
|
|
383
|
-
if info.get('vendor', '').lower() == vendor_name.lower()
|
|
384
|
-
}
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
def validate_resolver_url(url: str) -> bool:
|
|
388
|
-
"""
|
|
389
|
-
Check if a URL looks like a valid OpenURL resolver.
|
|
390
|
-
|
|
391
|
-
Args:
|
|
392
|
-
url: URL to validate
|
|
393
|
-
|
|
394
|
-
Returns:
|
|
395
|
-
True if URL matches known resolver patterns
|
|
396
|
-
"""
|
|
397
|
-
import re
|
|
398
|
-
|
|
399
|
-
# Check against known resolver URLs
|
|
400
|
-
for info in KNOWN_RESOLVERS.values():
|
|
401
|
-
if url.startswith(info['url']):
|
|
402
|
-
return True
|
|
403
|
-
|
|
404
|
-
# Check against generic patterns
|
|
405
|
-
for pattern in GENERIC_PATTERNS:
|
|
406
|
-
if re.match(pattern, url):
|
|
407
|
-
return True
|
|
408
|
-
|
|
409
|
-
return False
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
def get_all_resolvers() -> List[Dict[str, str]]:
|
|
413
|
-
"""
|
|
414
|
-
Get all known resolvers as a list.
|
|
415
|
-
|
|
416
|
-
Returns:
|
|
417
|
-
List of dicts with 'name', 'url', 'country', 'vendor'
|
|
418
|
-
"""
|
|
419
|
-
return [
|
|
420
|
-
{
|
|
421
|
-
'name': name,
|
|
422
|
-
'url': info['url'],
|
|
423
|
-
'country': info.get('country', 'Unknown'),
|
|
424
|
-
'vendor': info.get('vendor', 'Unknown')
|
|
425
|
-
}
|
|
426
|
-
for name, info in KNOWN_RESOLVERS.items()
|
|
427
|
-
]
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
# Common test DOIs for different publishers
|
|
431
|
-
TEST_DOIS = {
|
|
432
|
-
"Nature": "10.1038/nature12373",
|
|
433
|
-
"Science": "10.1126/science.1234567",
|
|
434
|
-
"Cell": "10.1016/j.cell.2020.01.001",
|
|
435
|
-
"Elsevier": "10.1016/j.neuroimage.2020.116584",
|
|
436
|
-
"Wiley": "10.1111/jnc.15327",
|
|
437
|
-
"Springer": "10.1007/s00401-021-02283-6",
|
|
438
|
-
"Oxford": "10.1093/brain/awaa123",
|
|
439
|
-
"IEEE": "10.1109/TPAMI.2020.2984611",
|
|
440
|
-
"ACS": "10.1021/acs.jmedchem.0c00606",
|
|
441
|
-
"PNAS": "10.1073/pnas.1921909117"
|
|
442
|
-
}
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
if __name__ == "__main__":
|
|
446
|
-
# Example usage
|
|
447
|
-
print(f"Total known resolvers: {len(KNOWN_RESOLVERS)}")
|
|
448
|
-
print(f"\nCountries represented: {len(set(info['country'] for info in KNOWN_RESOLVERS.values()))}")
|
|
449
|
-
print(f"Vendors: {set(info.get('vendor', 'Unknown') for info in KNOWN_RESOLVERS.values())}")
|
|
450
|
-
|
|
451
|
-
# Example: Find resolver for an institution
|
|
452
|
-
resolver = get_resolver_by_institution("Harvard")
|
|
453
|
-
if resolver:
|
|
454
|
-
print(f"\nHarvard resolver: {resolver['url']}")
|
|
455
|
-
|
|
456
|
-
# Example: Get all US resolvers
|
|
457
|
-
us_resolvers = get_resolvers_by_country("US")
|
|
458
|
-
print(f"\nUS institutions with resolvers: {len(us_resolvers)}")
|
|
459
|
-
|
|
460
|
-
# Example: Get all ExLibris resolvers
|
|
461
|
-
exlibris = get_resolvers_by_vendor("ExLibris")
|
|
462
|
-
print(f"Institutions using ExLibris SFX: {len(exlibris)}")
|
|
@@ -1,223 +0,0 @@
|
|
|
1
|
-
<!-- ---
|
|
2
|
-
!-- Timestamp: 2025-08-03 00:51:52
|
|
3
|
-
!-- Author: ywatanabe
|
|
4
|
-
!-- File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/open_url/README.md
|
|
5
|
-
!-- --- -->
|
|
6
|
-
|
|
7
|
-
# OpenURL Resolvers
|
|
8
|
-
|
|
9
|
-
This module provides OpenURL resolver implementations with automatic ZenRows integration when API key is present.
|
|
10
|
-
|
|
11
|
-
**Key Feature**: ZenRows stealth browser is automatically enabled when `SCITEX_SCHOLAR_ZENROWS_API_KEY` is set, providing:
|
|
12
|
-
- 🛡️ Anti-bot protection with residential IPs
|
|
13
|
-
- 🌐 Full browser control for authentication
|
|
14
|
-
- 🚀 Automatic bypass of rate limits and CAPTCHAs
|
|
15
|
-
|
|
16
|
-
## 1. OpenURLResolver (Standard)
|
|
17
|
-
|
|
18
|
-
The standard browser-based resolver using Playwright.
|
|
19
|
-
|
|
20
|
-
**Best for:**
|
|
21
|
-
- Authenticated access to paywalled content
|
|
22
|
-
- Complex JavaScript-based authentication flows
|
|
23
|
-
- Sites that require real browser interactions
|
|
24
|
-
|
|
25
|
-
**Limitations:**
|
|
26
|
-
- Can be blocked by anti-bot measures
|
|
27
|
-
- May encounter CAPTCHAs or rate limits
|
|
28
|
-
|
|
29
|
-
```python
|
|
30
|
-
from scitex.scholar.open_url import OpenURLResolver
|
|
31
|
-
from scitex.scholar.auth import AuthenticationManager
|
|
32
|
-
|
|
33
|
-
auth_manager = AuthenticationManager(email_openathens="your@email.com")
|
|
34
|
-
resolver = OpenURLResolver(auth_manager, "https://your.resolver.url/")
|
|
35
|
-
|
|
36
|
-
result = await resolver.resolve_async(doi="10.1038/nature12373")
|
|
37
|
-
```
|
|
38
|
-
|
|
39
|
-
## 2. OpenURLResolverWithZenRows (API-based)
|
|
40
|
-
|
|
41
|
-
Uses ZenRows API to bypass anti-bot detection while making HTTP requests.
|
|
42
|
-
|
|
43
|
-
**Best for:**
|
|
44
|
-
- High-volume resolution tasks
|
|
45
|
-
- Bypassing rate limits and IP blocks
|
|
46
|
-
- Open access content detection
|
|
47
|
-
|
|
48
|
-
**Limitations:**
|
|
49
|
-
- Cannot execute JavaScript (no popup handling)
|
|
50
|
-
- Limited authentication cookie transfer to publishers
|
|
51
|
-
- May show_async "Purchase" for paywalled content even with auth
|
|
52
|
-
|
|
53
|
-
```python
|
|
54
|
-
from scitex.scholar.open_url import OpenURLResolverWithZenRows
|
|
55
|
-
|
|
56
|
-
resolver = OpenURLResolverWithZenRows(
|
|
57
|
-
auth_manager,
|
|
58
|
-
resolver_url,
|
|
59
|
-
zenrows_api_key="your_api_key" # or set SCITEX_SCHOLAR_ZENROWS_API_KEY
|
|
60
|
-
)
|
|
61
|
-
|
|
62
|
-
result = await resolver.resolve_async(doi="10.1038/nature12373")
|
|
63
|
-
```
|
|
64
|
-
|
|
65
|
-
## 3. ZenRowsOpenURLResolver (Browser-based)
|
|
66
|
-
|
|
67
|
-
Uses ZenRows Scraping Browser service - cloud-based Chrome instances with anti-bot bypass.
|
|
68
|
-
|
|
69
|
-
**Best for:**
|
|
70
|
-
- Sites with aggressive anti-bot protection (e.g., PNAS)
|
|
71
|
-
- Maintaining full authentication context
|
|
72
|
-
- JavaScript-heavy authentication flows with anti-bot measures
|
|
73
|
-
|
|
74
|
-
**Limitations:**
|
|
75
|
-
- Requires ZenRows API key
|
|
76
|
-
- Slightly slower due to remote browser
|
|
77
|
-
- May have concurrency limits based on plan
|
|
78
|
-
|
|
79
|
-
```python
|
|
80
|
-
from scitex.scholar.open_url import ZenRowsOpenURLResolver
|
|
81
|
-
|
|
82
|
-
resolver = ZenRowsOpenURLResolver(
|
|
83
|
-
auth_manager,
|
|
84
|
-
resolver_url,
|
|
85
|
-
zenrows_api_key="your_api_key" # or set SCITEX_SCHOLAR_ZENROWS_API_KEY
|
|
86
|
-
)
|
|
87
|
-
|
|
88
|
-
result = await resolver.resolve_async(doi="10.1073/pnas.0608765104")
|
|
89
|
-
```
|
|
90
|
-
|
|
91
|
-
## Usage Example (Synchronous)
|
|
92
|
-
|
|
93
|
-
```python
|
|
94
|
-
from scitex.scholar.open_url import OpenURLResolver, ZenRowsOpenURLResolver
|
|
95
|
-
from scitex.scholar.auth import AuthenticationManager
|
|
96
|
-
import os
|
|
97
|
-
from scitex import logging
|
|
98
|
-
|
|
99
|
-
# Enable debug logging
|
|
100
|
-
logger = logging.getLogger()
|
|
101
|
-
logger.setLevel(logging.DEBUG)
|
|
102
|
-
|
|
103
|
-
# Initialize authentication
|
|
104
|
-
auth_manager = AuthenticationManager(
|
|
105
|
-
email_openathens=os.getenv("SCITEX_SCHOLAR_OPENATHENS_EMAIL")
|
|
106
|
-
)
|
|
107
|
-
is_authenticate_async = await auth_manager.is_authenticate_async()
|
|
108
|
-
|
|
109
|
-
# Choose your resolver
|
|
110
|
-
# Standard browser-based resolver
|
|
111
|
-
resolver = OpenURLResolver(
|
|
112
|
-
auth_manager,
|
|
113
|
-
os.getenv("SCITEX_SCHOLAR_OPENURL_RESOLVER_URL")
|
|
114
|
-
)
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
# # OR: ZenRows cloud browser resolver (for anti-bot bypass)
|
|
118
|
-
# resolver = ZenRowsOpenURLResolver(
|
|
119
|
-
# auth_manager,
|
|
120
|
-
# os.getenv("SCITEX_SCHOLAR_OPENURL_RESOLVER_URL"),
|
|
121
|
-
# os.getenv("SCITEX_SCHOLAR_ZENROWS_API_KEY"))
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
# DOIs to resolve
|
|
125
|
-
dois = [
|
|
126
|
-
"10.1038/nature12373",
|
|
127
|
-
"10.1016/j.neuron.2018.01.048",
|
|
128
|
-
"10.1126/science.1172133",
|
|
129
|
-
"10.1073/pnas.0608765104",
|
|
130
|
-
]
|
|
131
|
-
|
|
132
|
-
# "10.1002/hipo.22488",
|
|
133
|
-
# # Resolve single DOI
|
|
134
|
-
# result = resolver._resolve_single(doi=dois[0])
|
|
135
|
-
|
|
136
|
-
# Resolve multiple DOIs in parallel
|
|
137
|
-
results = resolver.resolve(dois)
|
|
138
|
-
```
|
|
139
|
-
|
|
140
|
-
## Choosing the Right Resolver
|
|
141
|
-
|
|
142
|
-
| Scenario | Recommended Resolver |
|
|
143
|
-
|----------|---------------------|
|
|
144
|
-
| General academic paper access | OpenURLResolver |
|
|
145
|
-
| High-volume batch processing | OpenURLResolverWithZenRows |
|
|
146
|
-
| Sites blocking normal browsers | ZenRowsOpenURLResolver |
|
|
147
|
-
| PNAS, sites with "unusual traffic" errors | ZenRowsOpenURLResolver |
|
|
148
|
-
| Need full JavaScript execution + anti-bot | ZenRowsOpenURLResolver |
|
|
149
|
-
|
|
150
|
-
## Automatic Fallback Strategy
|
|
151
|
-
|
|
152
|
-
You can implement automatic fallback between resolvers:
|
|
153
|
-
|
|
154
|
-
```python
|
|
155
|
-
async def resolve_with_fallback_async(doi, metadata):
|
|
156
|
-
# Try standard resolver first
|
|
157
|
-
result = await standard_resolver.resolve_async(doi=doi, **metadata)
|
|
158
|
-
|
|
159
|
-
if result and result.get('success'):
|
|
160
|
-
return result
|
|
161
|
-
|
|
162
|
-
# Check for anti-bot indicators
|
|
163
|
-
if result and result.get('access_type') in ['captcha_required', 'rate_limited']:
|
|
164
|
-
# Try ZenRows browser resolver
|
|
165
|
-
return await zenrows_browser_resolver.resolve_async(doi=doi, **metadata)
|
|
166
|
-
|
|
167
|
-
return result
|
|
168
|
-
```
|
|
169
|
-
|
|
170
|
-
## NEW: Simplified ZenRows Stealth Browser (Recommended)
|
|
171
|
-
|
|
172
|
-
As of the latest update, ZenRows stealth capabilities are automatically integrated when the API key is present:
|
|
173
|
-
|
|
174
|
-
```python
|
|
175
|
-
# Just set the API key - ZenRows stealth is automatically enabled!
|
|
176
|
-
os.environ["SCITEX_SCHOLAR_ZENROWS_API_KEY"] = "your_api_key"
|
|
177
|
-
|
|
178
|
-
from scitex.scholar import Scholar
|
|
179
|
-
|
|
180
|
-
# Scholar automatically uses ZenRows stealth browser
|
|
181
|
-
scholar = Scholar()
|
|
182
|
-
|
|
183
|
-
# Download with automatic anti-bot protection
|
|
184
|
-
papers = await scholar.download_pdf_asyncs_async(
|
|
185
|
-
["10.1038/nature12373", "10.1073/pnas.0608765104"],
|
|
186
|
-
show_async_progress=True
|
|
187
|
-
)
|
|
188
|
-
```
|
|
189
|
-
|
|
190
|
-
This provides:
|
|
191
|
-
- **Local browser window** you can see and interact with
|
|
192
|
-
- **ZenRows proxy** for clean residential IPs
|
|
193
|
-
- **Manual login** capability for complex SSO/2FA
|
|
194
|
-
- **Automatic anti-bot bypass** for all operations
|
|
195
|
-
|
|
196
|
-
## Environment Variables
|
|
197
|
-
|
|
198
|
-
- `SCITEX_SCHOLAR_ZENROWS_API_KEY`: Your ZenRows API key (auto-enables stealth)
|
|
199
|
-
- `SCITEX_SCHOLAR_OPENATHENS_EMAIL`: Email for OpenAthens authentication
|
|
200
|
-
- `SCITEX_SCHOLAR_OPENURL_RESOLVER_URL`: Your institutional OpenURL resolver
|
|
201
|
-
|
|
202
|
-
## Architecture
|
|
203
|
-
|
|
204
|
-
```
|
|
205
|
-
OpenURL Resolvers
|
|
206
|
-
├── _OpenURLResolver.py # Base implementation with Playwright
|
|
207
|
-
├── _OpenURLResolverWithZenRows.py # API-based ZenRows integration
|
|
208
|
-
├── _ZenRowsOpenURLResolver.py # Browser-based ZenRows integration
|
|
209
|
-
└── _ResolverLinkFinder.py # Shared link detection logic
|
|
210
|
-
|
|
211
|
-
Browser Managers
|
|
212
|
-
├── _BrowserManager.py # Standard local browser
|
|
213
|
-
├── _ProxyBrowserManager.py # Local browser + proxy routing
|
|
214
|
-
└── _ZenRowsBrowserManager.py # Cloud browser instances
|
|
215
|
-
```
|
|
216
|
-
|
|
217
|
-
The separation ensures:
|
|
218
|
-
- Clean architecture with single responsibility
|
|
219
|
-
- Easy switching between implementations
|
|
220
|
-
- No interference with other browser-based operations
|
|
221
|
-
- Flexibility to use different strategies for different papers
|
|
222
|
-
|
|
223
|
-
<!-- EOF -->
|