scitex 2.17.0__py3-none-any.whl → 2.17.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. scitex/_dev/__init__.py +122 -0
  2. scitex/_dev/_config.py +391 -0
  3. scitex/_dev/_dashboard/__init__.py +11 -0
  4. scitex/_dev/_dashboard/_app.py +89 -0
  5. scitex/_dev/_dashboard/_routes.py +169 -0
  6. scitex/_dev/_dashboard/_scripts.py +301 -0
  7. scitex/_dev/_dashboard/_styles.py +205 -0
  8. scitex/_dev/_dashboard/_templates.py +117 -0
  9. scitex/_dev/_dashboard/static/version-dashboard-favicon.svg +12 -0
  10. scitex/_dev/_ecosystem.py +109 -0
  11. scitex/_dev/_github.py +360 -0
  12. scitex/_dev/_mcp/__init__.py +11 -0
  13. scitex/_dev/_mcp/handlers.py +182 -0
  14. scitex/_dev/_ssh.py +332 -0
  15. scitex/_dev/_versions.py +272 -0
  16. scitex/_mcp_tools/__init__.py +2 -0
  17. scitex/_mcp_tools/dev.py +186 -0
  18. scitex/audio/_audio_check.py +84 -41
  19. scitex/cli/capture.py +45 -22
  20. scitex/cli/dev.py +494 -0
  21. scitex/cli/main.py +2 -0
  22. scitex/cli/stats.py +48 -20
  23. scitex/cli/verify.py +33 -36
  24. scitex/plt/__init__.py +16 -6
  25. scitex/scholar/url_finder/.tmp/open_url/KNOWN_RESOLVERS.py +462 -0
  26. scitex/scholar/url_finder/.tmp/open_url/README.md +223 -0
  27. scitex/scholar/url_finder/.tmp/open_url/_DOIToURLResolver.py +694 -0
  28. scitex/scholar/url_finder/.tmp/open_url/_OpenURLResolver.py +1160 -0
  29. scitex/scholar/url_finder/.tmp/open_url/_ResolverLinkFinder.py +344 -0
  30. scitex/scholar/url_finder/.tmp/open_url/__init__.py +24 -0
  31. scitex/template/__init__.py +18 -1
  32. scitex/template/clone_research_minimal.py +111 -0
  33. scitex/verify/README.md +0 -12
  34. scitex/verify/__init__.py +0 -4
  35. scitex/verify/_visualize.py +0 -4
  36. scitex/verify/_viz/__init__.py +0 -18
  37. {scitex-2.17.0.dist-info → scitex-2.17.3.dist-info}/METADATA +2 -1
  38. {scitex-2.17.0.dist-info → scitex-2.17.3.dist-info}/RECORD +41 -49
  39. scitex/dev/plt/data/mpl/PLOTTING_FUNCTIONS.yaml +0 -90
  40. scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES.yaml +0 -1571
  41. scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES_DETAILED.yaml +0 -6262
  42. scitex/dev/plt/data/mpl/SIGNATURES_FLATTENED.yaml +0 -1274
  43. scitex/dev/plt/data/mpl/dir_ax.txt +0 -459
  44. scitex/scholar/data/.gitkeep +0 -0
  45. scitex/scholar/data/README.md +0 -44
  46. scitex/scholar/data/bib_files/bibliography.bib +0 -1952
  47. scitex/scholar/data/bib_files/neurovista.bib +0 -277
  48. scitex/scholar/data/bib_files/neurovista_enriched.bib +0 -441
  49. scitex/scholar/data/bib_files/neurovista_enriched_enriched.bib +0 -441
  50. scitex/scholar/data/bib_files/neurovista_processed.bib +0 -338
  51. scitex/scholar/data/bib_files/openaccess.bib +0 -89
  52. scitex/scholar/data/bib_files/pac-seizure_prediction_enriched.bib +0 -2178
  53. scitex/scholar/data/bib_files/pac.bib +0 -698
  54. scitex/scholar/data/bib_files/pac_enriched.bib +0 -1061
  55. scitex/scholar/data/bib_files/pac_processed.bib +0 -0
  56. scitex/scholar/data/bib_files/pac_titles.txt +0 -75
  57. scitex/scholar/data/bib_files/paywalled.bib +0 -98
  58. scitex/scholar/data/bib_files/related-papers-by-coauthors.bib +0 -58
  59. scitex/scholar/data/bib_files/related-papers-by-coauthors_enriched.bib +0 -87
  60. scitex/scholar/data/bib_files/seizure_prediction.bib +0 -694
  61. scitex/scholar/data/bib_files/seizure_prediction_processed.bib +0 -0
  62. scitex/scholar/data/bib_files/test_complete_enriched.bib +0 -437
  63. scitex/scholar/data/bib_files/test_final_enriched.bib +0 -437
  64. scitex/scholar/data/bib_files/test_seizure.bib +0 -46
  65. scitex/scholar/data/impact_factor/JCR_IF_2022.xlsx +0 -0
  66. scitex/scholar/data/impact_factor/JCR_IF_2024.db +0 -0
  67. scitex/scholar/data/impact_factor/JCR_IF_2024.xlsx +0 -0
  68. scitex/scholar/data/impact_factor/JCR_IF_2024_v01.db +0 -0
  69. scitex/scholar/data/impact_factor.db +0 -0
  70. scitex/verify/_viz/_plotly.py +0 -193
  71. {scitex-2.17.0.dist-info → scitex-2.17.3.dist-info}/WHEEL +0 -0
  72. {scitex-2.17.0.dist-info → scitex-2.17.3.dist-info}/entry_points.txt +0 -0
  73. {scitex-2.17.0.dist-info → scitex-2.17.3.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,223 @@
1
+ <!-- ---
2
+ !-- Timestamp: 2025-08-03 00:51:52
3
+ !-- Author: ywatanabe
4
+ !-- File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/open_url/README.md
5
+ !-- --- -->
6
+
7
+ # OpenURL Resolvers
8
+
9
+ This module provides OpenURL resolver implementations with automatic ZenRows integration when API key is present.
10
+
11
+ **Key Feature**: ZenRows stealth browser is automatically enabled when `SCITEX_SCHOLAR_ZENROWS_API_KEY` is set, providing:
12
+ - 🛡️ Anti-bot protection with residential IPs
13
+ - 🌐 Full browser control for authentication
14
+ - 🚀 Automatic bypass of rate limits and CAPTCHAs
15
+
16
+ ## 1. OpenURLResolver (Standard)
17
+
18
+ The standard browser-based resolver using Playwright.
19
+
20
+ **Best for:**
21
+ - Authenticated access to paywalled content
22
+ - Complex JavaScript-based authentication flows
23
+ - Sites that require real browser interactions
24
+
25
+ **Limitations:**
26
+ - Can be blocked by anti-bot measures
27
+ - May encounter CAPTCHAs or rate limits
28
+
29
+ ```python
30
+ from scitex.scholar.open_url import OpenURLResolver
31
+ from scitex.scholar.auth import AuthenticationManager
32
+
33
+ auth_manager = AuthenticationManager(email_openathens="your@email.com")
34
+ resolver = OpenURLResolver(auth_manager, "https://your.resolver.url/")
35
+
36
+ result = await resolver.resolve_async(doi="10.1038/nature12373")
37
+ ```
38
+
39
+ ## 2. OpenURLResolverWithZenRows (API-based)
40
+
41
+ Uses ZenRows API to bypass anti-bot detection while making HTTP requests.
42
+
43
+ **Best for:**
44
+ - High-volume resolution tasks
45
+ - Bypassing rate limits and IP blocks
46
+ - Open access content detection
47
+
48
+ **Limitations:**
49
+ - Cannot execute JavaScript (no popup handling)
50
+ - Limited authentication cookie transfer to publishers
51
+ - May show_async "Purchase" for paywalled content even with auth
52
+
53
+ ```python
54
+ from scitex.scholar.open_url import OpenURLResolverWithZenRows
55
+
56
+ resolver = OpenURLResolverWithZenRows(
57
+ auth_manager,
58
+ resolver_url,
59
+ zenrows_api_key="your_api_key" # or set SCITEX_SCHOLAR_ZENROWS_API_KEY
60
+ )
61
+
62
+ result = await resolver.resolve_async(doi="10.1038/nature12373")
63
+ ```
64
+
65
+ ## 3. ZenRowsOpenURLResolver (Browser-based)
66
+
67
+ Uses ZenRows Scraping Browser service - cloud-based Chrome instances with anti-bot bypass.
68
+
69
+ **Best for:**
70
+ - Sites with aggressive anti-bot protection (e.g., PNAS)
71
+ - Maintaining full authentication context
72
+ - JavaScript-heavy authentication flows with anti-bot measures
73
+
74
+ **Limitations:**
75
+ - Requires ZenRows API key
76
+ - Slightly slower due to remote browser
77
+ - May have concurrency limits based on plan
78
+
79
+ ```python
80
+ from scitex.scholar.open_url import ZenRowsOpenURLResolver
81
+
82
+ resolver = ZenRowsOpenURLResolver(
83
+ auth_manager,
84
+ resolver_url,
85
+ zenrows_api_key="your_api_key" # or set SCITEX_SCHOLAR_ZENROWS_API_KEY
86
+ )
87
+
88
+ result = await resolver.resolve_async(doi="10.1073/pnas.0608765104")
89
+ ```
90
+
91
+ ## Usage Example (Synchronous)
92
+
93
+ ```python
94
+ from scitex.scholar.open_url import OpenURLResolver, ZenRowsOpenURLResolver
95
+ from scitex.scholar.auth import AuthenticationManager
96
+ import os
97
+ from scitex import logging
98
+
99
+ # Enable debug logging
100
+ logger = logging.getLogger()
101
+ logger.setLevel(logging.DEBUG)
102
+
103
+ # Initialize authentication
104
+ auth_manager = AuthenticationManager(
105
+ email_openathens=os.getenv("SCITEX_SCHOLAR_OPENATHENS_EMAIL")
106
+ )
107
+ is_authenticate_async = await auth_manager.is_authenticate_async()
108
+
109
+ # Choose your resolver
110
+ # Standard browser-based resolver
111
+ resolver = OpenURLResolver(
112
+ auth_manager,
113
+ os.getenv("SCITEX_SCHOLAR_OPENURL_RESOLVER_URL")
114
+ )
115
+
116
+
117
+ # # OR: ZenRows cloud browser resolver (for anti-bot bypass)
118
+ # resolver = ZenRowsOpenURLResolver(
119
+ # auth_manager,
120
+ # os.getenv("SCITEX_SCHOLAR_OPENURL_RESOLVER_URL"),
121
+ # os.getenv("SCITEX_SCHOLAR_ZENROWS_API_KEY"))
122
+
123
+
124
+ # DOIs to resolve
125
+ dois = [
126
+ "10.1038/nature12373",
127
+ "10.1016/j.neuron.2018.01.048",
128
+ "10.1126/science.1172133",
129
+ "10.1073/pnas.0608765104",
130
+ ]
131
+
132
+ # "10.1002/hipo.22488",
133
+ # # Resolve single DOI
134
+ # result = resolver._resolve_single(doi=dois[0])
135
+
136
+ # Resolve multiple DOIs in parallel
137
+ results = resolver.resolve(dois)
138
+ ```
139
+
140
+ ## Choosing the Right Resolver
141
+
142
+ | Scenario | Recommended Resolver |
143
+ |----------|---------------------|
144
+ | General academic paper access | OpenURLResolver |
145
+ | High-volume batch processing | OpenURLResolverWithZenRows |
146
+ | Sites blocking normal browsers | ZenRowsOpenURLResolver |
147
+ | PNAS, sites with "unusual traffic" errors | ZenRowsOpenURLResolver |
148
+ | Need full JavaScript execution + anti-bot | ZenRowsOpenURLResolver |
149
+
150
+ ## Automatic Fallback Strategy
151
+
152
+ You can implement automatic fallback between resolvers:
153
+
154
+ ```python
155
+ async def resolve_with_fallback_async(doi, metadata):
156
+ # Try standard resolver first
157
+ result = await standard_resolver.resolve_async(doi=doi, **metadata)
158
+
159
+ if result and result.get('success'):
160
+ return result
161
+
162
+ # Check for anti-bot indicators
163
+ if result and result.get('access_type') in ['captcha_required', 'rate_limited']:
164
+ # Try ZenRows browser resolver
165
+ return await zenrows_browser_resolver.resolve_async(doi=doi, **metadata)
166
+
167
+ return result
168
+ ```
169
+
170
+ ## NEW: Simplified ZenRows Stealth Browser (Recommended)
171
+
172
+ As of the latest update, ZenRows stealth capabilities are automatically integrated when the API key is present:
173
+
174
+ ```python
175
+ # Just set the API key - ZenRows stealth is automatically enabled!
176
+ os.environ["SCITEX_SCHOLAR_ZENROWS_API_KEY"] = "your_api_key"
177
+
178
+ from scitex.scholar import Scholar
179
+
180
+ # Scholar automatically uses ZenRows stealth browser
181
+ scholar = Scholar()
182
+
183
+ # Download with automatic anti-bot protection
184
+ papers = await scholar.download_pdf_asyncs_async(
185
+ ["10.1038/nature12373", "10.1073/pnas.0608765104"],
186
+ show_async_progress=True
187
+ )
188
+ ```
189
+
190
+ This provides:
191
+ - **Local browser window** you can see and interact with
192
+ - **ZenRows proxy** for clean residential IPs
193
+ - **Manual login** capability for complex SSO/2FA
194
+ - **Automatic anti-bot bypass** for all operations
195
+
196
+ ## Environment Variables
197
+
198
+ - `SCITEX_SCHOLAR_ZENROWS_API_KEY`: Your ZenRows API key (auto-enables stealth)
199
+ - `SCITEX_SCHOLAR_OPENATHENS_EMAIL`: Email for OpenAthens authentication
200
+ - `SCITEX_SCHOLAR_OPENURL_RESOLVER_URL`: Your institutional OpenURL resolver
201
+
202
+ ## Architecture
203
+
204
+ ```
205
+ OpenURL Resolvers
206
+ ├── _OpenURLResolver.py # Base implementation with Playwright
207
+ ├── _OpenURLResolverWithZenRows.py # API-based ZenRows integration
208
+ ├── _ZenRowsOpenURLResolver.py # Browser-based ZenRows integration
209
+ └── _ResolverLinkFinder.py # Shared link detection logic
210
+
211
+ Browser Managers
212
+ ├── _BrowserManager.py # Standard local browser
213
+ ├── _ProxyBrowserManager.py # Local browser + proxy routing
214
+ └── _ZenRowsBrowserManager.py # Cloud browser instances
215
+ ```
216
+
217
+ The separation ensures:
218
+ - Clean architecture with single responsibility
219
+ - Easy switching between implementations
220
+ - No interference with other browser-based operations
221
+ - Flexibility to use different strategies for different papers
222
+
223
+ <!-- EOF -->