scitex 2.4.1__py3-none-any.whl → 2.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. scitex/__version__.py +1 -1
  2. scitex/browser/__init__.py +53 -0
  3. scitex/browser/auth/__init__.py +35 -0
  4. scitex/browser/auth/google.py +381 -0
  5. scitex/browser/collaboration/__init__.py +5 -0
  6. scitex/browser/debugging/__init__.py +56 -0
  7. scitex/browser/debugging/_failure_capture.py +372 -0
  8. scitex/browser/debugging/_sync_session.py +259 -0
  9. scitex/browser/debugging/_test_monitor.py +284 -0
  10. scitex/browser/debugging/_visual_cursor.py +432 -0
  11. scitex/scholar/citation_graph/README.md +117 -0
  12. scitex/scholar/citation_graph/__init__.py +29 -0
  13. scitex/scholar/citation_graph/builder.py +214 -0
  14. scitex/scholar/citation_graph/database.py +246 -0
  15. scitex/scholar/citation_graph/example.py +96 -0
  16. scitex/scholar/citation_graph/models.py +80 -0
  17. scitex/scholar/config/ScholarConfig.py +23 -3
  18. scitex/scholar/config/default.yaml +56 -0
  19. scitex/scholar/core/Paper.py +102 -0
  20. scitex/scholar/core/__init__.py +44 -0
  21. scitex/scholar/core/journal_normalizer.py +524 -0
  22. scitex/scholar/core/oa_cache.py +285 -0
  23. scitex/scholar/core/open_access.py +457 -0
  24. scitex/scholar/metadata_engines/ScholarEngine.py +9 -1
  25. scitex/scholar/metadata_engines/individual/CrossRefLocalEngine.py +82 -21
  26. scitex/scholar/pdf_download/ScholarPDFDownloader.py +137 -0
  27. scitex/scholar/pdf_download/strategies/__init__.py +6 -0
  28. scitex/scholar/pdf_download/strategies/open_access_download.py +186 -0
  29. scitex/scholar/pipelines/ScholarPipelineSearchParallel.py +27 -9
  30. scitex/scholar/pipelines/ScholarPipelineSearchSingle.py +24 -8
  31. scitex/scholar/search_engines/ScholarSearchEngine.py +6 -1
  32. {scitex-2.4.1.dist-info → scitex-2.4.3.dist-info}/METADATA +1 -1
  33. {scitex-2.4.1.dist-info → scitex-2.4.3.dist-info}/RECORD +36 -20
  34. {scitex-2.4.1.dist-info → scitex-2.4.3.dist-info}/WHEEL +0 -0
  35. {scitex-2.4.1.dist-info → scitex-2.4.3.dist-info}/entry_points.txt +0 -0
  36. {scitex-2.4.1.dist-info → scitex-2.4.3.dist-info}/licenses/LICENSE +0 -0
scitex/__version__.py CHANGED
@@ -9,6 +9,6 @@ __FILE__ = "./src/scitex/__version__.py"
9
9
  __DIR__ = os.path.dirname(__FILE__)
10
10
  # ----------------------------------------
11
11
 
12
- __version__ = "2.3.0"
12
+ __version__ = "2.4.3"
13
13
 
14
14
  # EOF
@@ -8,6 +8,32 @@ from .debugging import (
8
8
  browser_logger,
9
9
  show_grid_async,
10
10
  highlight_element_async,
11
+ # Visual cursor/feedback utilities (sync and async)
12
+ inject_visual_effects,
13
+ inject_visual_effects_async,
14
+ show_cursor_at,
15
+ show_cursor_at_async,
16
+ show_click_effect,
17
+ show_click_effect_async,
18
+ show_step,
19
+ show_step_async,
20
+ show_test_result,
21
+ show_test_result_async,
22
+ # Failure capture utilities (mirrors console-interceptor.ts)
23
+ setup_console_interceptor,
24
+ collect_console_logs,
25
+ collect_console_logs_detailed,
26
+ format_logs_devtools_style,
27
+ save_failure_artifacts,
28
+ create_failure_capture_fixture,
29
+ # Test monitoring (periodic screenshots via scitex.capture)
30
+ TestMonitor,
31
+ create_test_monitor_fixture,
32
+ monitor_test,
33
+ # Sync browser session for zombie prevention
34
+ SyncBrowserSession,
35
+ sync_browser_session,
36
+ create_browser_session_fixture,
11
37
  )
12
38
 
13
39
  # PDF utilities
@@ -31,6 +57,33 @@ __all__ = [
31
57
  "browser_logger",
32
58
  "show_grid_async",
33
59
  "highlight_element_async",
60
+ # Visual cursor/feedback (sync)
61
+ "inject_visual_effects",
62
+ "show_cursor_at",
63
+ "show_click_effect",
64
+ "show_step",
65
+ "show_test_result",
66
+ # Visual cursor/feedback (async)
67
+ "inject_visual_effects_async",
68
+ "show_cursor_at_async",
69
+ "show_click_effect_async",
70
+ "show_step_async",
71
+ "show_test_result_async",
72
+ # Failure capture utilities (mirrors console-interceptor.ts)
73
+ "setup_console_interceptor",
74
+ "collect_console_logs",
75
+ "collect_console_logs_detailed",
76
+ "format_logs_devtools_style",
77
+ "save_failure_artifacts",
78
+ "create_failure_capture_fixture",
79
+ # Test monitoring (periodic screenshots via scitex.capture)
80
+ "TestMonitor",
81
+ "create_test_monitor_fixture",
82
+ "monitor_test",
83
+ # Sync browser session for zombie prevention
84
+ "SyncBrowserSession",
85
+ "sync_browser_session",
86
+ "create_browser_session_fixture",
34
87
 
35
88
  # PDF
36
89
  "detect_chrome_pdf_viewer_async",
@@ -0,0 +1,35 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # Timestamp: "2025-12-04 (ywatanabe)"
4
+ # File: ./src/scitex/browser/auth/__init__.py
5
+ # ----------------------------------------
6
+ """
7
+ scitex.browser.auth - Authentication helpers for browser automation.
8
+
9
+ Provides reusable authentication handlers for various OAuth providers
10
+ and login flows used in browser automation tasks.
11
+
12
+ Features:
13
+ - Google OAuth (popup-based flow)
14
+ - Django session auth
15
+ - Generic credential management
16
+
17
+ Example:
18
+ from scitex.browser.auth import GoogleAuthHelper, google_login
19
+
20
+ # Quick login
21
+ success = await google_login(page, "user@gmail.com", "password")
22
+
23
+ # Or with helper class
24
+ auth = GoogleAuthHelper(email="user@gmail.com", password="password")
25
+ success = await auth.login_via_google_button(page)
26
+ """
27
+
28
+ from .google import GoogleAuthHelper, google_login
29
+
30
+ __all__ = [
31
+ "GoogleAuthHelper",
32
+ "google_login",
33
+ ]
34
+
35
+ # EOF
@@ -0,0 +1,381 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # Timestamp: "2025-12-04 (ywatanabe)"
4
+ # File: ./src/scitex/browser/auth/google.py
5
+ # ----------------------------------------
6
+ """
7
+ Google OAuth authentication helper for Playwright.
8
+
9
+ Handles Google OAuth popup flow for services that use "Continue with Google".
10
+
11
+ Example:
12
+ from scitex.browser.auth import GoogleAuthHelper
13
+
14
+ auth = GoogleAuthHelper(
15
+ email="user@gmail.com",
16
+ password="your_password",
17
+ )
18
+
19
+ # Login to a service that uses Google OAuth
20
+ success = await auth.login_via_google_button(page, 'button:has-text("Continue with Google")')
21
+ """
22
+
23
+ import os
24
+ import sys
25
+ from typing import Optional
26
+ from playwright.async_api import Page
27
+
28
+
29
+ class GoogleAuthHelper:
30
+ """
31
+ Google OAuth authentication helper.
32
+
33
+ Handles the popup-based Google OAuth flow used by many services.
34
+
35
+ Environment Variables:
36
+ GOOGLE_EMAIL: Default email if not provided
37
+ GOOGLE_PASSWORD: Default password if not provided
38
+ """
39
+
40
+ def __init__(
41
+ self,
42
+ email: Optional[str] = None,
43
+ password: Optional[str] = None,
44
+ debug: bool = False,
45
+ ):
46
+ """
47
+ Initialize GoogleAuthHelper.
48
+
49
+ Args:
50
+ email: Google account email
51
+ password: Google account password
52
+ debug: Print debug messages to stderr
53
+ """
54
+ self.email = email or os.getenv("GOOGLE_EMAIL", "")
55
+ self.password = password or os.getenv("GOOGLE_PASSWORD", "")
56
+ self.debug = debug or bool(os.getenv("GOOGLE_AUTH_DEBUG"))
57
+
58
+ def _log(self, msg: str):
59
+ """Print debug message if debug mode is enabled."""
60
+ if self.debug:
61
+ print(f"[GoogleAuth] {msg}", file=sys.stderr)
62
+
63
+ async def login_via_google_button(
64
+ self,
65
+ page: Page,
66
+ google_button_selector: str = 'button:has-text("Continue with Google")',
67
+ timeout: int = 60000,
68
+ ) -> bool:
69
+ """
70
+ Perform Google OAuth login via a "Continue with Google" button.
71
+
72
+ This handles the popup-based OAuth flow:
73
+ 1. Click the Google button on the main page
74
+ 2. Handle the Google popup for email/password entry
75
+ 3. Wait for redirect back to the original service
76
+
77
+ Args:
78
+ page: Playwright Page object (the main page with the Google button)
79
+ google_button_selector: CSS selector for the Google login button
80
+ timeout: Maximum time to wait for login (ms)
81
+
82
+ Returns:
83
+ True if login successful, False otherwise
84
+ """
85
+ try:
86
+ # Find the Google button
87
+ google_btn = await page.query_selector(google_button_selector)
88
+ if not google_btn:
89
+ # Try alternative selectors
90
+ alternatives = [
91
+ 'button:has-text("Google")',
92
+ '[data-testid="google-login"]',
93
+ 'button >> text=Continue with Google',
94
+ 'button >> text=Sign in with Google',
95
+ ]
96
+ for selector in alternatives:
97
+ try:
98
+ google_btn = await page.query_selector(selector)
99
+ if google_btn:
100
+ break
101
+ except:
102
+ continue
103
+
104
+ if not google_btn:
105
+ self._log("Google button not found")
106
+ return False
107
+
108
+ self._log("Found Google button, clicking...")
109
+
110
+ # Google OAuth opens in a popup - listen for it
111
+ async with page.context.expect_page(timeout=timeout) as popup_info:
112
+ await google_btn.click()
113
+
114
+ popup = await popup_info.value
115
+ self._log(f"Popup opened: {popup.url[:100]}...")
116
+
117
+ # Handle Google OAuth in popup
118
+ success = await self._handle_google_popup(popup, timeout)
119
+
120
+ if success:
121
+ # Wait for main page to update after OAuth completes
122
+ await page.wait_for_timeout(3000)
123
+ self._log(f"Login complete, main page URL: {page.url}")
124
+
125
+ return success
126
+
127
+ except Exception as e:
128
+ self._log(f"Login error: {e}")
129
+ return False
130
+
131
+ async def _handle_google_popup(self, popup: Page, timeout: int = 60000) -> bool:
132
+ """
133
+ Handle the Google OAuth popup flow.
134
+
135
+ Args:
136
+ popup: The Google OAuth popup page
137
+ timeout: Maximum time to wait (ms)
138
+
139
+ Returns:
140
+ True if authentication successful, False otherwise
141
+ """
142
+ try:
143
+ # Wait for Google login page to load
144
+ await popup.wait_for_load_state("domcontentloaded")
145
+ await popup.wait_for_timeout(2000)
146
+
147
+ # Step 1: Enter email
148
+ email_filled = await self._fill_email(popup)
149
+ if not email_filled:
150
+ self._log("Failed to fill email")
151
+ return False
152
+
153
+ # Step 2: Wait for password page and enter password
154
+ password_filled = await self._fill_password(popup)
155
+ if not password_filled:
156
+ self._log("Failed to fill password")
157
+ return False
158
+
159
+ # Step 3: Wait for popup to close (indicates success)
160
+ try:
161
+ await popup.wait_for_event("close", timeout=20000)
162
+ self._log("Popup closed - login successful")
163
+ return True
164
+ except:
165
+ # Check if we're still on Google or redirected
166
+ current_url = popup.url
167
+ if "accounts.google.com" not in current_url:
168
+ self._log("Redirected away from Google - login successful")
169
+ return True
170
+ self._log("Popup didn't close - possible error")
171
+ return False
172
+
173
+ except Exception as e:
174
+ self._log(f"Popup handling error: {e}")
175
+ return False
176
+
177
+ async def _fill_email(self, popup: Page) -> bool:
178
+ """Fill email on Google login page."""
179
+ try:
180
+ # Wait for email input
181
+ await popup.wait_for_selector('input[type="email"]', state="visible", timeout=10000)
182
+
183
+ self._log(f"Filling email: {self.email}")
184
+ await popup.fill('input[type="email"]', self.email)
185
+ await popup.wait_for_timeout(500)
186
+
187
+ # Click Next button
188
+ next_btn = await popup.query_selector('#identifierNext')
189
+ if not next_btn:
190
+ next_btn = await popup.query_selector('button:has-text("Next")')
191
+
192
+ if next_btn:
193
+ self._log("Clicking Next after email")
194
+ await next_btn.click()
195
+ await popup.wait_for_timeout(3000)
196
+ return True
197
+ else:
198
+ self._log("Next button not found after email")
199
+ return False
200
+
201
+ except Exception as e:
202
+ self._log(f"Email fill error: {e}")
203
+ return False
204
+
205
+ async def _fill_password(self, popup: Page) -> bool:
206
+ """Fill password on Google login page."""
207
+ try:
208
+ # Wait for password page to load (Google transitions between pages)
209
+ self._log("Waiting for password page...")
210
+
211
+ # Wait for password input to become visible
212
+ await popup.wait_for_selector(
213
+ 'input[type="password"]',
214
+ state="visible",
215
+ timeout=15000
216
+ )
217
+
218
+ self._log("Filling password")
219
+ await popup.fill('input[type="password"]', self.password)
220
+ await popup.wait_for_timeout(500)
221
+
222
+ # Click Next button
223
+ next_btn = await popup.query_selector('#passwordNext')
224
+ if not next_btn:
225
+ next_btn = await popup.query_selector('button:has-text("Next")')
226
+
227
+ if next_btn:
228
+ self._log("Clicking Next after password")
229
+ await next_btn.click()
230
+ await popup.wait_for_timeout(5000)
231
+
232
+ # Handle 2FA if present
233
+ twofa_ok = await self._wait_for_2fa(popup, timeout=60000)
234
+ if not twofa_ok:
235
+ return False
236
+
237
+ # Handle potential consent/continue screens
238
+ await self._handle_consent_screens(popup)
239
+
240
+ return True
241
+ else:
242
+ self._log("Next button not found after password")
243
+ return False
244
+
245
+ except Exception as e:
246
+ self._log(f"Password fill error: {e}")
247
+ return False
248
+
249
+ async def _handle_consent_screens(self, popup: Page) -> None:
250
+ """Handle OAuth consent or 'Continue' screens that may appear."""
251
+ try:
252
+ # Check for Continue button (consent screen)
253
+ continue_selectors = [
254
+ 'button:has-text("Continue")',
255
+ 'button:has-text("Allow")',
256
+ '#submit_approve_access',
257
+ 'button[data-idom-class*="continue"]',
258
+ ]
259
+
260
+ for selector in continue_selectors:
261
+ try:
262
+ btn = await popup.query_selector(selector)
263
+ if btn and await btn.is_visible():
264
+ self._log(f"Found consent button: {selector}")
265
+ await btn.click()
266
+ await popup.wait_for_timeout(3000)
267
+ break
268
+ except:
269
+ continue
270
+
271
+ except Exception as e:
272
+ self._log(f"Consent handling: {e}")
273
+
274
+ async def _wait_for_2fa(self, popup: Page, timeout: int = 60000) -> bool:
275
+ """
276
+ Wait for 2FA verification to complete.
277
+
278
+ Detects 2FA screens and waits for user to approve on their device.
279
+
280
+ Args:
281
+ popup: The Google OAuth popup page
282
+ timeout: Maximum time to wait for 2FA (ms)
283
+
284
+ Returns:
285
+ True if 2FA completed, False if timed out
286
+ """
287
+ try:
288
+ # Check if we're on a 2FA page
289
+ page_text = await popup.inner_text("body")
290
+ twofa_indicators = [
291
+ "2-Step Verification",
292
+ "Verify it's you",
293
+ "confirm it's you",
294
+ "Open the Gmail app",
295
+ "Check your phone",
296
+ ]
297
+
298
+ is_2fa = any(indicator.lower() in page_text.lower() for indicator in twofa_indicators)
299
+
300
+ if is_2fa:
301
+ self._log("2FA detected - waiting for user approval...")
302
+ # Wait for popup to close or URL to change (indicating 2FA success)
303
+ start_url = popup.url
304
+ check_interval = 2000 # Check every 2 seconds
305
+ elapsed = 0
306
+
307
+ while elapsed < timeout:
308
+ await popup.wait_for_timeout(check_interval)
309
+ elapsed += check_interval
310
+
311
+ # Check if popup closed
312
+ try:
313
+ current_url = popup.url
314
+ if current_url != start_url and "accounts.google.com" not in current_url:
315
+ self._log("2FA completed - redirected")
316
+ return True
317
+ except:
318
+ # Popup closed
319
+ self._log("2FA completed - popup closed")
320
+ return True
321
+
322
+ self._log("2FA timeout")
323
+ return False
324
+
325
+ return True # Not a 2FA page
326
+
327
+ except Exception as e:
328
+ self._log(f"2FA check error: {e}")
329
+ return False
330
+
331
+ async def is_logged_in(self, page: Page, login_indicators: list = None) -> bool:
332
+ """
333
+ Check if user appears to be logged in.
334
+
335
+ Args:
336
+ page: Page to check
337
+ login_indicators: List of URL substrings that indicate NOT logged in
338
+ (default: ["login", "signin", "oauth"])
339
+
340
+ Returns:
341
+ True if appears logged in, False otherwise
342
+ """
343
+ if login_indicators is None:
344
+ login_indicators = ["login", "signin", "oauth", "accounts.google.com"]
345
+
346
+ current_url = page.url.lower()
347
+ for indicator in login_indicators:
348
+ if indicator.lower() in current_url:
349
+ return False
350
+ return True
351
+
352
+
353
+ # Convenience function for quick usage
354
+ async def google_login(
355
+ page: Page,
356
+ email: str,
357
+ password: str,
358
+ button_selector: str = 'button:has-text("Continue with Google")',
359
+ debug: bool = False,
360
+ ) -> bool:
361
+ """
362
+ Quick Google OAuth login.
363
+
364
+ Args:
365
+ page: Playwright Page with Google login button
366
+ email: Google account email
367
+ password: Google account password
368
+ button_selector: CSS selector for Google button
369
+ debug: Print debug messages
370
+
371
+ Returns:
372
+ True if login successful, False otherwise
373
+
374
+ Example:
375
+ success = await google_login(page, "user@gmail.com", "password")
376
+ """
377
+ auth = GoogleAuthHelper(email=email, password=password, debug=debug)
378
+ return await auth.login_via_google_button(page, button_selector)
379
+
380
+
381
+ # EOF
@@ -26,12 +26,17 @@ from .shared_session import SharedBrowserSession, SessionConfig
26
26
  from .visual_feedback import VisualFeedback
27
27
  from .credential_manager import CredentialManager
28
28
 
29
+ # Re-export auth helpers for convenience
30
+ from scitex.browser.auth import GoogleAuthHelper, google_login
31
+
29
32
  # Exports
30
33
  __all__ = [
31
34
  "SharedBrowserSession",
32
35
  "SessionConfig",
33
36
  "VisualFeedback",
34
37
  "CredentialManager",
38
+ "GoogleAuthHelper",
39
+ "google_login",
35
40
  ]
36
41
 
37
42
  # Compatibility check - ensure we don't break existing code
@@ -7,12 +7,68 @@
7
7
  from ._browser_logger import browser_logger
8
8
  from ._show_grid import show_grid_async
9
9
  from ._highlight_element import highlight_element_async
10
+ from ._visual_cursor import (
11
+ inject_visual_effects,
12
+ inject_visual_effects_async,
13
+ show_cursor_at,
14
+ show_cursor_at_async,
15
+ show_click_effect,
16
+ show_click_effect_async,
17
+ show_step,
18
+ show_step_async,
19
+ show_test_result,
20
+ show_test_result_async,
21
+ )
22
+ from ._failure_capture import (
23
+ setup_console_interceptor,
24
+ collect_console_logs,
25
+ collect_console_logs_detailed,
26
+ format_logs_devtools_style,
27
+ save_failure_artifacts,
28
+ create_failure_capture_fixture,
29
+ )
30
+ from ._test_monitor import (
31
+ TestMonitor,
32
+ create_test_monitor_fixture,
33
+ monitor_test,
34
+ )
35
+ from ._sync_session import (
36
+ SyncBrowserSession,
37
+ sync_browser_session,
38
+ create_browser_session_fixture,
39
+ )
10
40
 
11
41
  __all__ = [
12
42
  "log_page_async",
13
43
  "browser_logger",
14
44
  "show_grid_async",
15
45
  "highlight_element_async",
46
+ # Visual cursor/feedback utilities
47
+ "inject_visual_effects",
48
+ "inject_visual_effects_async",
49
+ "show_cursor_at",
50
+ "show_cursor_at_async",
51
+ "show_click_effect",
52
+ "show_click_effect_async",
53
+ "show_step",
54
+ "show_step_async",
55
+ "show_test_result",
56
+ "show_test_result_async",
57
+ # Failure capture utilities
58
+ "setup_console_interceptor",
59
+ "collect_console_logs",
60
+ "collect_console_logs_detailed",
61
+ "format_logs_devtools_style",
62
+ "save_failure_artifacts",
63
+ "create_failure_capture_fixture",
64
+ # Test monitoring (periodic screenshots via scitex.capture)
65
+ "TestMonitor",
66
+ "create_test_monitor_fixture",
67
+ "monitor_test",
68
+ # Sync browser session for zombie prevention
69
+ "SyncBrowserSession",
70
+ "sync_browser_session",
71
+ "create_browser_session_fixture",
16
72
  ]
17
73
 
18
74
  # EOF