scitex 2.16.1__py3-none-any.whl → 2.16.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. scitex/scholar/url_finder/.tmp/open_url/KNOWN_RESOLVERS.py +462 -0
  2. scitex/scholar/url_finder/.tmp/open_url/README.md +223 -0
  3. scitex/scholar/url_finder/.tmp/open_url/_DOIToURLResolver.py +694 -0
  4. scitex/scholar/url_finder/.tmp/open_url/_OpenURLResolver.py +1160 -0
  5. scitex/scholar/url_finder/.tmp/open_url/_ResolverLinkFinder.py +344 -0
  6. scitex/scholar/url_finder/.tmp/open_url/__init__.py +24 -0
  7. {scitex-2.16.1.dist-info → scitex-2.16.2.dist-info}/METADATA +1 -1
  8. {scitex-2.16.1.dist-info → scitex-2.16.2.dist-info}/RECORD +11 -36
  9. scitex/dev/plt/data/mpl/PLOTTING_FUNCTIONS.yaml +0 -90
  10. scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES.yaml +0 -1571
  11. scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES_DETAILED.yaml +0 -6262
  12. scitex/dev/plt/data/mpl/SIGNATURES_FLATTENED.yaml +0 -1274
  13. scitex/dev/plt/data/mpl/dir_ax.txt +0 -459
  14. scitex/scholar/data/.gitkeep +0 -0
  15. scitex/scholar/data/README.md +0 -44
  16. scitex/scholar/data/bib_files/bibliography.bib +0 -1952
  17. scitex/scholar/data/bib_files/neurovista.bib +0 -277
  18. scitex/scholar/data/bib_files/neurovista_enriched.bib +0 -441
  19. scitex/scholar/data/bib_files/neurovista_enriched_enriched.bib +0 -441
  20. scitex/scholar/data/bib_files/neurovista_processed.bib +0 -338
  21. scitex/scholar/data/bib_files/openaccess.bib +0 -89
  22. scitex/scholar/data/bib_files/pac-seizure_prediction_enriched.bib +0 -2178
  23. scitex/scholar/data/bib_files/pac.bib +0 -698
  24. scitex/scholar/data/bib_files/pac_enriched.bib +0 -1061
  25. scitex/scholar/data/bib_files/pac_processed.bib +0 -0
  26. scitex/scholar/data/bib_files/pac_titles.txt +0 -75
  27. scitex/scholar/data/bib_files/paywalled.bib +0 -98
  28. scitex/scholar/data/bib_files/related-papers-by-coauthors.bib +0 -58
  29. scitex/scholar/data/bib_files/related-papers-by-coauthors_enriched.bib +0 -87
  30. scitex/scholar/data/bib_files/seizure_prediction.bib +0 -694
  31. scitex/scholar/data/bib_files/seizure_prediction_processed.bib +0 -0
  32. scitex/scholar/data/bib_files/test_complete_enriched.bib +0 -437
  33. scitex/scholar/data/bib_files/test_final_enriched.bib +0 -437
  34. scitex/scholar/data/bib_files/test_seizure.bib +0 -46
  35. scitex/scholar/data/impact_factor/JCR_IF_2022.xlsx +0 -0
  36. scitex/scholar/data/impact_factor/JCR_IF_2024.db +0 -0
  37. scitex/scholar/data/impact_factor/JCR_IF_2024.xlsx +0 -0
  38. scitex/scholar/data/impact_factor/JCR_IF_2024_v01.db +0 -0
  39. scitex/scholar/data/impact_factor.db +0 -0
  40. {scitex-2.16.1.dist-info → scitex-2.16.2.dist-info}/WHEEL +0 -0
  41. {scitex-2.16.1.dist-info → scitex-2.16.2.dist-info}/entry_points.txt +0 -0
  42. {scitex-2.16.1.dist-info → scitex-2.16.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,462 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # Time-stamp: "2025-08-01 13:15:00"
4
+ # Author: Claude
5
+ # File: KNOWN_RESOLVERS.py
6
+
7
+ """
8
+ Known OpenURL resolvers from various institutions worldwide.
9
+
10
+ This module contains a curated list of OpenURL resolvers used by
11
+ academic institutions for accessing scholarly content.
12
+
13
+ Sources:
14
+ - Zotero OpenURL Resolver Directory: https://www.zotero.org/openurl_resolvers
15
+ - Individual institution library websites
16
+ - Common resolver patterns
17
+ """
18
+
19
+ from typing import Dict, List, Optional
20
+
21
+ # Major OpenURL resolver vendors
22
+ RESOLVER_VENDORS = {
23
+ "ExLibris": {
24
+ "patterns": ["sfx", "exlibrisgroup.com"],
25
+ "description": "Ex Libris SFX resolver (very common)"
26
+ },
27
+ "SerialsSolutions": {
28
+ "patterns": ["serialssolutions.com", "360link"],
29
+ "description": "ProQuest SerialsSolutions 360 Link"
30
+ },
31
+ "EBSCO": {
32
+ "patterns": ["ebscohost.com/openurlresolver", "linkssource.ebsco.com"],
33
+ "description": "EBSCO Full Text Finder"
34
+ },
35
+ "OCLC": {
36
+ "patterns": ["worldcat.org", "oclc.org"],
37
+ "description": "OCLC WorldCat resolver"
38
+ },
39
+ "Ovid": {
40
+ "patterns": ["ovid.com", "linksolver"],
41
+ "description": "Ovid LinkSolver"
42
+ }
43
+ }
44
+
45
+ # Known institutional OpenURL resolvers
46
+ KNOWN_RESOLVERS: Dict[str, Dict[str, str]] = {
47
+ # United States
48
+ "Harvard University": {
49
+ "url": "https://sfx.hul.harvard.edu/sfx_local",
50
+ "country": "US",
51
+ "vendor": "ExLibris"
52
+ },
53
+ "MIT": {
54
+ "url": "https://owens.mit.edu/sfx_local",
55
+ "country": "US",
56
+ "vendor": "ExLibris"
57
+ },
58
+ "Stanford University": {
59
+ "url": "https://stanford.idm.oclc.org/login?url=",
60
+ "country": "US",
61
+ "vendor": "OCLC"
62
+ },
63
+ "Yale University": {
64
+ "url": "https://yale.idm.oclc.org/login?url=",
65
+ "country": "US",
66
+ "vendor": "OCLC"
67
+ },
68
+ "University of California, Berkeley": {
69
+ "url": "https://ucelinks.cdlib.org:8443/sfx_ucb",
70
+ "country": "US",
71
+ "vendor": "ExLibris"
72
+ },
73
+ "UCLA": {
74
+ "url": "https://ucelinks.cdlib.org:8443/sfx_ucla",
75
+ "country": "US",
76
+ "vendor": "ExLibris"
77
+ },
78
+ "Columbia University": {
79
+ "url": "https://resolver.library.columbia.edu/openurl",
80
+ "country": "US",
81
+ "vendor": "SerialsSolutions"
82
+ },
83
+ "Princeton University": {
84
+ "url": "https://princeton.idm.oclc.org/login?url=",
85
+ "country": "US",
86
+ "vendor": "OCLC"
87
+ },
88
+ "University of Chicago": {
89
+ "url": "https://proxy.uchicago.edu/login?url=",
90
+ "country": "US",
91
+ "vendor": "Custom"
92
+ },
93
+ "Johns Hopkins": {
94
+ "url": "https://openurl.library.jhu.edu",
95
+ "country": "US",
96
+ "vendor": "Custom"
97
+ },
98
+
99
+ # United Kingdom
100
+ "University of Oxford": {
101
+ "url": "https://fs.oxfordjournals.org/openurl",
102
+ "country": "UK",
103
+ "vendor": "Custom"
104
+ },
105
+ "University of Cambridge": {
106
+ "url": "https://cambridge.idm.oclc.org/login?url=",
107
+ "country": "UK",
108
+ "vendor": "OCLC"
109
+ },
110
+ "Imperial College London": {
111
+ "url": "https://imperial.idm.oclc.org/login?url=",
112
+ "country": "UK",
113
+ "vendor": "OCLC"
114
+ },
115
+ "UCL": {
116
+ "url": "https://ucl.idm.oclc.org/login?url=",
117
+ "country": "UK",
118
+ "vendor": "OCLC"
119
+ },
120
+ "University of Edinburgh": {
121
+ "url": "https://discovered.ed.ac.uk/openurl",
122
+ "country": "UK",
123
+ "vendor": "Custom"
124
+ },
125
+
126
+ # Canada
127
+ "University of Toronto": {
128
+ "url": "https://myaccess.library.utoronto.ca/login?url=",
129
+ "country": "CA",
130
+ "vendor": "Custom"
131
+ },
132
+ "McGill University": {
133
+ "url": "https://mcgill.on.worldcat.org/atoztitles/link",
134
+ "country": "CA",
135
+ "vendor": "OCLC"
136
+ },
137
+ "University of British Columbia": {
138
+ "url": "https://ubc.summon.serialssolutions.com/link",
139
+ "country": "CA",
140
+ "vendor": "SerialsSolutions"
141
+ },
142
+
143
+ # Australia
144
+ "University of Melbourne": {
145
+ "url": "https://unimelb.hosted.exlibrisgroup.com/sfxlcl41",
146
+ "country": "AU",
147
+ "vendor": "ExLibris"
148
+ },
149
+ "University of Sydney": {
150
+ "url": "https://ap01.alma.exlibrisgroup.com/view/uresolver/61USYD_INST/openurl",
151
+ "country": "AU",
152
+ "vendor": "ExLibris"
153
+ },
154
+ "Australian National University": {
155
+ "url": "https://anu.hosted.exlibrisgroup.com/primo-explore/openurl",
156
+ "country": "AU",
157
+ "vendor": "ExLibris"
158
+ },
159
+ "University of Queensland": {
160
+ "url": "https://uq.summon.serialssolutions.com/link",
161
+ "country": "AU",
162
+ "vendor": "SerialsSolutions"
163
+ },
164
+ "Monash University": {
165
+ "url": "https://monash.hosted.exlibrisgroup.com/sfx_local",
166
+ "country": "AU",
167
+ "vendor": "ExLibris"
168
+ },
169
+
170
+ # Germany
171
+ "Max Planck Society": {
172
+ "url": "http://sfx.mpg.de/sfx_local",
173
+ "country": "DE",
174
+ "vendor": "ExLibris"
175
+ },
176
+ "University of Munich (LMU)": {
177
+ "url": "https://sfx.bib.uni-muenchen.de/sfx_lmu",
178
+ "country": "DE",
179
+ "vendor": "ExLibris"
180
+ },
181
+ "Heidelberg University": {
182
+ "url": "https://sfx.bib.uni-heidelberg.de/sfx_heidelberg",
183
+ "country": "DE",
184
+ "vendor": "ExLibris"
185
+ },
186
+
187
+ # Netherlands
188
+ "University of Amsterdam": {
189
+ "url": "https://vu-nl.idm.oclc.org/login?url=",
190
+ "country": "NL",
191
+ "vendor": "OCLC"
192
+ },
193
+ "Delft University of Technology": {
194
+ "url": "https://tudelft.idm.oclc.org/login?url=",
195
+ "country": "NL",
196
+ "vendor": "OCLC"
197
+ },
198
+
199
+ # France
200
+ "Sorbonne University": {
201
+ "url": "https://accesdistant.sorbonne-universite.fr/login?url=",
202
+ "country": "FR",
203
+ "vendor": "Custom"
204
+ },
205
+ "École Polytechnique": {
206
+ "url": "https://portail.polytechnique.edu/openurl",
207
+ "country": "FR",
208
+ "vendor": "Custom"
209
+ },
210
+
211
+ # Switzerland
212
+ "ETH Zurich": {
213
+ "url": "https://www.library.ethz.ch/openurl",
214
+ "country": "CH",
215
+ "vendor": "Custom"
216
+ },
217
+ "EPFL": {
218
+ "url": "https://sfx.epfl.ch/sfx_local",
219
+ "country": "CH",
220
+ "vendor": "ExLibris"
221
+ },
222
+
223
+ # Japan
224
+ "University of Tokyo": {
225
+ "url": "https://vs2ga4mq9g.search.serialssolutions.com",
226
+ "country": "JP",
227
+ "vendor": "SerialsSolutions"
228
+ },
229
+ "Kyoto University": {
230
+ "url": "https://kuline.kulib.kyoto-u.ac.jp/portal/openurl",
231
+ "country": "JP",
232
+ "vendor": "Custom"
233
+ },
234
+
235
+ # Singapore
236
+ "National University of Singapore": {
237
+ "url": "https://libproxy.nus.edu.sg/login?url=",
238
+ "country": "SG",
239
+ "vendor": "Custom"
240
+ },
241
+ "Nanyang Technological University": {
242
+ "url": "https://ap01.alma.exlibrisgroup.com/view/uresolver/65NTU_INST/openurl",
243
+ "country": "SG",
244
+ "vendor": "ExLibris"
245
+ },
246
+
247
+ # China
248
+ "Tsinghua University": {
249
+ "url": "http://sfx.lib.tsinghua.edu.cn/sfx_local",
250
+ "country": "CN",
251
+ "vendor": "ExLibris"
252
+ },
253
+ "Peking University": {
254
+ "url": "http://sfx.lib.pku.edu.cn/sfx_pku",
255
+ "country": "CN",
256
+ "vendor": "ExLibris"
257
+ },
258
+
259
+ # South Korea
260
+ "Seoul National University": {
261
+ "url": "https://sfx.snu.ac.kr/sfx_local",
262
+ "country": "KR",
263
+ "vendor": "ExLibris"
264
+ },
265
+ "KAIST": {
266
+ "url": "https://library.kaist.ac.kr/openurl",
267
+ "country": "KR",
268
+ "vendor": "Custom"
269
+ },
270
+
271
+ # Brazil
272
+ "University of São Paulo": {
273
+ "url": "http://www.buscaintegrada.usp.br/openurl",
274
+ "country": "BR",
275
+ "vendor": "Custom"
276
+ },
277
+
278
+ # Mexico
279
+ "UNAM": {
280
+ "url": "https://pbidi.unam.mx/login?url=",
281
+ "country": "MX",
282
+ "vendor": "Custom"
283
+ },
284
+
285
+ # India
286
+ "IIT Delhi": {
287
+ "url": "https://libproxy.iitd.ac.in/login?url=",
288
+ "country": "IN",
289
+ "vendor": "Custom"
290
+ },
291
+ "Indian Institute of Science": {
292
+ "url": "https://library.iisc.ac.in/openurl",
293
+ "country": "IN",
294
+ "vendor": "Custom"
295
+ }
296
+ }
297
+
298
+ # Generic OpenURL resolver patterns
299
+ GENERIC_PATTERNS = [
300
+ # ExLibris SFX patterns
301
+ r"https?://[^/]+/sfx[^/]*",
302
+ r"https?://sfx\.[^/]+",
303
+ r"https?://[^/]+\.exlibrisgroup\.com",
304
+
305
+ # SerialsSolutions patterns
306
+ r"https?://[^/]+\.serialssolutions\.com",
307
+ r"https?://[^/]+/360link",
308
+
309
+ # OCLC patterns
310
+ r"https?://[^/]+\.idm\.oclc\.org",
311
+ r"https?://[^/]+\.worldcat\.org",
312
+
313
+ # Common proxy patterns
314
+ r"https?://[^/]+/login\?url=",
315
+ r"https?://libproxy\.[^/]+",
316
+ r"https?://proxy\.[^/]+",
317
+
318
+ # OpenURL patterns
319
+ r"https?://[^/]+/openurl",
320
+ r"https?://[^/]+/openurlresolver",
321
+ ]
322
+
323
+
324
+ def get_resolver_by_institution(institution_name: str) -> Optional[Dict[str, str]]:
325
+ """
326
+ Get OpenURL resolver information by institution name.
327
+
328
+ Args:
329
+ institution_name: Name of the institution
330
+
331
+ Returns:
332
+ Dict with 'url', 'country', and 'vendor' if found, None otherwise
333
+ """
334
+ # Try exact match first
335
+ if institution_name in KNOWN_RESOLVERS:
336
+ return KNOWN_RESOLVERS[institution_name].copy()
337
+
338
+ # Try case-insensitive match
339
+ institution_lower = institution_name.lower()
340
+ for name, info in KNOWN_RESOLVERS.items():
341
+ if name.lower() == institution_lower:
342
+ return info.copy()
343
+
344
+ # Try partial match
345
+ for name, info in KNOWN_RESOLVERS.items():
346
+ if institution_lower in name.lower() or name.lower() in institution_lower:
347
+ return info.copy()
348
+
349
+ return None
350
+
351
+
352
+ def get_resolvers_by_country(country_code: str) -> Dict[str, Dict[str, str]]:
353
+ """
354
+ Get all OpenURL resolvers for a specific country.
355
+
356
+ Args:
357
+ country_code: Two-letter country code (e.g., 'US', 'UK', 'AU')
358
+
359
+ Returns:
360
+ Dict of institution names to resolver info
361
+ """
362
+ country_code = country_code.upper()
363
+ return {
364
+ name: info
365
+ for name, info in KNOWN_RESOLVERS.items()
366
+ if info.get('country') == country_code
367
+ }
368
+
369
+
370
+ def get_resolvers_by_vendor(vendor_name: str) -> Dict[str, Dict[str, str]]:
371
+ """
372
+ Get all OpenURL resolvers using a specific vendor.
373
+
374
+ Args:
375
+ vendor_name: Vendor name (e.g., 'ExLibris', 'OCLC')
376
+
377
+ Returns:
378
+ Dict of institution names to resolver info
379
+ """
380
+ return {
381
+ name: info
382
+ for name, info in KNOWN_RESOLVERS.items()
383
+ if info.get('vendor', '').lower() == vendor_name.lower()
384
+ }
385
+
386
+
387
+ def validate_resolver_url(url: str) -> bool:
388
+ """
389
+ Check if a URL looks like a valid OpenURL resolver.
390
+
391
+ Args:
392
+ url: URL to validate
393
+
394
+ Returns:
395
+ True if URL matches known resolver patterns
396
+ """
397
+ import re
398
+
399
+ # Check against known resolver URLs
400
+ for info in KNOWN_RESOLVERS.values():
401
+ if url.startswith(info['url']):
402
+ return True
403
+
404
+ # Check against generic patterns
405
+ for pattern in GENERIC_PATTERNS:
406
+ if re.match(pattern, url):
407
+ return True
408
+
409
+ return False
410
+
411
+
412
+ def get_all_resolvers() -> List[Dict[str, str]]:
413
+ """
414
+ Get all known resolvers as a list.
415
+
416
+ Returns:
417
+ List of dicts with 'name', 'url', 'country', 'vendor'
418
+ """
419
+ return [
420
+ {
421
+ 'name': name,
422
+ 'url': info['url'],
423
+ 'country': info.get('country', 'Unknown'),
424
+ 'vendor': info.get('vendor', 'Unknown')
425
+ }
426
+ for name, info in KNOWN_RESOLVERS.items()
427
+ ]
428
+
429
+
430
+ # Common test DOIs for different publishers
431
+ TEST_DOIS = {
432
+ "Nature": "10.1038/nature12373",
433
+ "Science": "10.1126/science.1234567",
434
+ "Cell": "10.1016/j.cell.2020.01.001",
435
+ "Elsevier": "10.1016/j.neuroimage.2020.116584",
436
+ "Wiley": "10.1111/jnc.15327",
437
+ "Springer": "10.1007/s00401-021-02283-6",
438
+ "Oxford": "10.1093/brain/awaa123",
439
+ "IEEE": "10.1109/TPAMI.2020.2984611",
440
+ "ACS": "10.1021/acs.jmedchem.0c00606",
441
+ "PNAS": "10.1073/pnas.1921909117"
442
+ }
443
+
444
+
445
+ if __name__ == "__main__":
446
+ # Example usage
447
+ print(f"Total known resolvers: {len(KNOWN_RESOLVERS)}")
448
+ print(f"\nCountries represented: {len(set(info['country'] for info in KNOWN_RESOLVERS.values()))}")
449
+ print(f"Vendors: {set(info.get('vendor', 'Unknown') for info in KNOWN_RESOLVERS.values())}")
450
+
451
+ # Example: Find resolver for an institution
452
+ resolver = get_resolver_by_institution("Harvard")
453
+ if resolver:
454
+ print(f"\nHarvard resolver: {resolver['url']}")
455
+
456
+ # Example: Get all US resolvers
457
+ us_resolvers = get_resolvers_by_country("US")
458
+ print(f"\nUS institutions with resolvers: {len(us_resolvers)}")
459
+
460
+ # Example: Get all ExLibris resolvers
461
+ exlibris = get_resolvers_by_vendor("ExLibris")
462
+ print(f"Institutions using ExLibris SFX: {len(exlibris)}")
@@ -0,0 +1,223 @@
1
+ <!-- ---
2
+ !-- Timestamp: 2025-08-03 00:51:52
3
+ !-- Author: ywatanabe
4
+ !-- File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/open_url/README.md
5
+ !-- --- -->
6
+
7
+ # OpenURL Resolvers
8
+
9
+ This module provides OpenURL resolver implementations with automatic ZenRows integration when API key is present.
10
+
11
+ **Key Feature**: ZenRows stealth browser is automatically enabled when `SCITEX_SCHOLAR_ZENROWS_API_KEY` is set, providing:
12
+ - 🛡️ Anti-bot protection with residential IPs
13
+ - 🌐 Full browser control for authentication
14
+ - 🚀 Automatic bypass of rate limits and CAPTCHAs
15
+
16
+ ## 1. OpenURLResolver (Standard)
17
+
18
+ The standard browser-based resolver using Playwright.
19
+
20
+ **Best for:**
21
+ - Authenticated access to paywalled content
22
+ - Complex JavaScript-based authentication flows
23
+ - Sites that require real browser interactions
24
+
25
+ **Limitations:**
26
+ - Can be blocked by anti-bot measures
27
+ - May encounter CAPTCHAs or rate limits
28
+
29
+ ```python
30
+ from scitex.scholar.open_url import OpenURLResolver
31
+ from scitex.scholar.auth import AuthenticationManager
32
+
33
+ auth_manager = AuthenticationManager(email_openathens="your@email.com")
34
+ resolver = OpenURLResolver(auth_manager, "https://your.resolver.url/")
35
+
36
+ result = await resolver.resolve_async(doi="10.1038/nature12373")
37
+ ```
38
+
39
+ ## 2. OpenURLResolverWithZenRows (API-based)
40
+
41
+ Uses ZenRows API to bypass anti-bot detection while making HTTP requests.
42
+
43
+ **Best for:**
44
+ - High-volume resolution tasks
45
+ - Bypassing rate limits and IP blocks
46
+ - Open access content detection
47
+
48
+ **Limitations:**
49
+ - Cannot execute JavaScript (no popup handling)
50
+ - Limited authentication cookie transfer to publishers
51
+ - May show_async "Purchase" for paywalled content even with auth
52
+
53
+ ```python
54
+ from scitex.scholar.open_url import OpenURLResolverWithZenRows
55
+
56
+ resolver = OpenURLResolverWithZenRows(
57
+ auth_manager,
58
+ resolver_url,
59
+ zenrows_api_key="your_api_key" # or set SCITEX_SCHOLAR_ZENROWS_API_KEY
60
+ )
61
+
62
+ result = await resolver.resolve_async(doi="10.1038/nature12373")
63
+ ```
64
+
65
+ ## 3. ZenRowsOpenURLResolver (Browser-based)
66
+
67
+ Uses ZenRows Scraping Browser service - cloud-based Chrome instances with anti-bot bypass.
68
+
69
+ **Best for:**
70
+ - Sites with aggressive anti-bot protection (e.g., PNAS)
71
+ - Maintaining full authentication context
72
+ - JavaScript-heavy authentication flows with anti-bot measures
73
+
74
+ **Limitations:**
75
+ - Requires ZenRows API key
76
+ - Slightly slower due to remote browser
77
+ - May have concurrency limits based on plan
78
+
79
+ ```python
80
+ from scitex.scholar.open_url import ZenRowsOpenURLResolver
81
+
82
+ resolver = ZenRowsOpenURLResolver(
83
+ auth_manager,
84
+ resolver_url,
85
+ zenrows_api_key="your_api_key" # or set SCITEX_SCHOLAR_ZENROWS_API_KEY
86
+ )
87
+
88
+ result = await resolver.resolve_async(doi="10.1073/pnas.0608765104")
89
+ ```
90
+
91
+ ## Usage Example (Synchronous)
92
+
93
+ ```python
94
+ from scitex.scholar.open_url import OpenURLResolver, ZenRowsOpenURLResolver
95
+ from scitex.scholar.auth import AuthenticationManager
96
+ import os
97
+ from scitex import logging
98
+
99
+ # Enable debug logging
100
+ logger = logging.getLogger()
101
+ logger.setLevel(logging.DEBUG)
102
+
103
+ # Initialize authentication
104
+ auth_manager = AuthenticationManager(
105
+ email_openathens=os.getenv("SCITEX_SCHOLAR_OPENATHENS_EMAIL")
106
+ )
107
+ is_authenticate_async = await auth_manager.is_authenticate_async()
108
+
109
+ # Choose your resolver
110
+ # Standard browser-based resolver
111
+ resolver = OpenURLResolver(
112
+ auth_manager,
113
+ os.getenv("SCITEX_SCHOLAR_OPENURL_RESOLVER_URL")
114
+ )
115
+
116
+
117
+ # # OR: ZenRows cloud browser resolver (for anti-bot bypass)
118
+ # resolver = ZenRowsOpenURLResolver(
119
+ # auth_manager,
120
+ # os.getenv("SCITEX_SCHOLAR_OPENURL_RESOLVER_URL"),
121
+ # os.getenv("SCITEX_SCHOLAR_ZENROWS_API_KEY"))
122
+
123
+
124
+ # DOIs to resolve
125
+ dois = [
126
+ "10.1038/nature12373",
127
+ "10.1016/j.neuron.2018.01.048",
128
+ "10.1126/science.1172133",
129
+ "10.1073/pnas.0608765104",
130
+ ]
131
+
132
+ # "10.1002/hipo.22488",
133
+ # # Resolve single DOI
134
+ # result = resolver._resolve_single(doi=dois[0])
135
+
136
+ # Resolve multiple DOIs in parallel
137
+ results = resolver.resolve(dois)
138
+ ```
139
+
140
+ ## Choosing the Right Resolver
141
+
142
+ | Scenario | Recommended Resolver |
143
+ |----------|---------------------|
144
+ | General academic paper access | OpenURLResolver |
145
+ | High-volume batch processing | OpenURLResolverWithZenRows |
146
+ | Sites blocking normal browsers | ZenRowsOpenURLResolver |
147
+ | PNAS, sites with "unusual traffic" errors | ZenRowsOpenURLResolver |
148
+ | Need full JavaScript execution + anti-bot | ZenRowsOpenURLResolver |
149
+
150
+ ## Automatic Fallback Strategy
151
+
152
+ You can implement automatic fallback between resolvers:
153
+
154
+ ```python
155
+ async def resolve_with_fallback_async(doi, metadata):
156
+ # Try standard resolver first
157
+ result = await standard_resolver.resolve_async(doi=doi, **metadata)
158
+
159
+ if result and result.get('success'):
160
+ return result
161
+
162
+ # Check for anti-bot indicators
163
+ if result and result.get('access_type') in ['captcha_required', 'rate_limited']:
164
+ # Try ZenRows browser resolver
165
+ return await zenrows_browser_resolver.resolve_async(doi=doi, **metadata)
166
+
167
+ return result
168
+ ```
169
+
170
+ ## NEW: Simplified ZenRows Stealth Browser (Recommended)
171
+
172
+ As of the latest update, ZenRows stealth capabilities are automatically integrated when the API key is present:
173
+
174
+ ```python
175
+ # Just set the API key - ZenRows stealth is automatically enabled!
176
+ os.environ["SCITEX_SCHOLAR_ZENROWS_API_KEY"] = "your_api_key"
177
+
178
+ from scitex.scholar import Scholar
179
+
180
+ # Scholar automatically uses ZenRows stealth browser
181
+ scholar = Scholar()
182
+
183
+ # Download with automatic anti-bot protection
184
+ papers = await scholar.download_pdf_asyncs_async(
185
+ ["10.1038/nature12373", "10.1073/pnas.0608765104"],
186
+ show_async_progress=True
187
+ )
188
+ ```
189
+
190
+ This provides:
191
+ - **Local browser window** you can see and interact with
192
+ - **ZenRows proxy** for clean residential IPs
193
+ - **Manual login** capability for complex SSO/2FA
194
+ - **Automatic anti-bot bypass** for all operations
195
+
196
+ ## Environment Variables
197
+
198
+ - `SCITEX_SCHOLAR_ZENROWS_API_KEY`: Your ZenRows API key (auto-enables stealth)
199
+ - `SCITEX_SCHOLAR_OPENATHENS_EMAIL`: Email for OpenAthens authentication
200
+ - `SCITEX_SCHOLAR_OPENURL_RESOLVER_URL`: Your institutional OpenURL resolver
201
+
202
+ ## Architecture
203
+
204
+ ```
205
+ OpenURL Resolvers
206
+ ├── _OpenURLResolver.py # Base implementation with Playwright
207
+ ├── _OpenURLResolverWithZenRows.py # API-based ZenRows integration
208
+ ├── _ZenRowsOpenURLResolver.py # Browser-based ZenRows integration
209
+ └── _ResolverLinkFinder.py # Shared link detection logic
210
+
211
+ Browser Managers
212
+ ├── _BrowserManager.py # Standard local browser
213
+ ├── _ProxyBrowserManager.py # Local browser + proxy routing
214
+ └── _ZenRowsBrowserManager.py # Cloud browser instances
215
+ ```
216
+
217
+ The separation ensures:
218
+ - Clean architecture with single responsibility
219
+ - Easy switching between implementations
220
+ - No interference with other browser-based operations
221
+ - Flexibility to use different strategies for different papers
222
+
223
+ <!-- EOF -->