PyPI - scitex - Versions diffs - 2.4.1__py3-none-any.whl → 2.4.3__py3-none-any.whl - Mend

scitex 2.4.1py3-none-any.whl → 2.4.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

scitex/__version__.py +1 -1
scitex/browser/__init__.py +53 -0
scitex/browser/auth/__init__.py +35 -0
scitex/browser/auth/google.py +381 -0
scitex/browser/collaboration/__init__.py +5 -0
scitex/browser/debugging/__init__.py +56 -0
scitex/browser/debugging/_failure_capture.py +372 -0
scitex/browser/debugging/_sync_session.py +259 -0
scitex/browser/debugging/_test_monitor.py +284 -0
scitex/browser/debugging/_visual_cursor.py +432 -0
scitex/scholar/citation_graph/README.md +117 -0
scitex/scholar/citation_graph/__init__.py +29 -0
scitex/scholar/citation_graph/builder.py +214 -0
scitex/scholar/citation_graph/database.py +246 -0
scitex/scholar/citation_graph/example.py +96 -0
scitex/scholar/citation_graph/models.py +80 -0
scitex/scholar/config/ScholarConfig.py +23 -3
scitex/scholar/config/default.yaml +56 -0
scitex/scholar/core/Paper.py +102 -0
scitex/scholar/core/__init__.py +44 -0
scitex/scholar/core/journal_normalizer.py +524 -0
scitex/scholar/core/oa_cache.py +285 -0
scitex/scholar/core/open_access.py +457 -0
scitex/scholar/metadata_engines/ScholarEngine.py +9 -1
scitex/scholar/metadata_engines/individual/CrossRefLocalEngine.py +82 -21
scitex/scholar/pdf_download/ScholarPDFDownloader.py +137 -0
scitex/scholar/pdf_download/strategies/__init__.py +6 -0
scitex/scholar/pdf_download/strategies/open_access_download.py +186 -0
scitex/scholar/pipelines/ScholarPipelineSearchParallel.py +27 -9
scitex/scholar/pipelines/ScholarPipelineSearchSingle.py +24 -8
scitex/scholar/search_engines/ScholarSearchEngine.py +6 -1
{scitex-2.4.1.dist-info → scitex-2.4.3.dist-info}/METADATA +1 -1
{scitex-2.4.1.dist-info → scitex-2.4.3.dist-info}/RECORD +36 -20
{scitex-2.4.1.dist-info → scitex-2.4.3.dist-info}/WHEEL +0 -0
{scitex-2.4.1.dist-info → scitex-2.4.3.dist-info}/entry_points.txt +0 -0
{scitex-2.4.1.dist-info → scitex-2.4.3.dist-info}/licenses/LICENSE +0 -0

scitex/browser/debugging/_failure_capture.py ADDED Viewed

@@ -0,0 +1,372 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Timestamp: 2025-12-08
+# File: /home/ywatanabe/proj/scitex-code/src/scitex/browser/debugging/_failure_capture.py
+"""
+Automatic failure capture utilities for Playwright E2E tests.
+Features:
+- Console log collection with source file/line tracking
+- Error interception (JS errors, unhandled promise rejections, resource failures)
+- Screenshot capture on test failure
+- Page HTML capture for debugging
+- DevTools-like formatted output
+- Pytest integration via fixtures
+Based on scitex-cloud's console-interceptor.ts functionality.
+Usage in conftest.py:
+    from scitex.browser.debugging import (
+        setup_console_interceptor,
+        collect_console_logs,
+        save_failure_artifacts,
+        create_failure_capture_fixture,
+    )
+"""
+from datetime import datetime
+from pathlib import Path
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from playwright.sync_api import Page
+# JavaScript code for advanced console interception
+# Mirrors functionality from scitex-cloud/static/shared/ts/utils/console-interceptor.ts
+CONSOLE_INTERCEPTOR_JS = """
+() => {
+    if (window._scitex_console_interceptor_setup) return;
+    // Store for captured logs with full details
+    window._scitex_console_logs = [];
+    window._scitex_console_history = [];
+    const maxHistory = 2000;
+    // Store original console methods
+    const originalConsole = {
+        log: console.log,
+        info: console.info,
+        warn: console.warn,
+        error: console.error,
+        debug: console.debug
+    };
+    // Get source file and line number from stack trace
+    function getSource() {
+        try {
+            const stack = new Error().stack;
+            if (!stack) return '';
+            const lines = stack.split('\\n');
+            // Skip Error, getSource, capture, and intercepted console method
+            for (let i = 4; i < lines.length; i++) {
+                const line = lines[i];
+                const match = line.match(/(?:https?:\\/\\/[^\\/]+)?([^\\s]+):(\\d+):(\\d+)/);
+                if (match) {
+                    const [, file, lineNum, col] = match;
+                    const cleanFile = file.split('/').slice(-2).join('/');
+                    return `${cleanFile}:${lineNum}:${col}`;
+                }
+            }
+        } catch (e) {}
+        return '';
+    }
+    // Format message from arguments
+    function formatMessage(args) {
+        return args.map(arg => {
+            if (typeof arg === 'object') {
+                try { return JSON.stringify(arg, null, 2); }
+                catch { return String(arg); }
+            }
+            return String(arg);
+        }).join(' ');
+    }
+    // Capture log entry
+    function capture(level, args) {
+        const message = formatMessage(args);
+        const source = getSource();
+        const entry = {
+            level,
+            message,
+            source,
+            timestamp: Date.now(),
+            url: window.location.href
+        };
+        window._scitex_console_history.push(entry);
+        if (window._scitex_console_history.length > maxHistory) {
+            window._scitex_console_history.shift();
+        }
+        // Also store simple format for backwards compatibility
+        window._scitex_console_logs.push(`[${level.toUpperCase()}] ${source ? source + ' ' : ''}${message}`);
+        if (window._scitex_console_logs.length > 500) {
+            window._scitex_console_logs.shift();
+        }
+    }
+    // Intercept console methods
+    ['log', 'info', 'warn', 'error', 'debug'].forEach(level => {
+        console[level] = function(...args) {
+            originalConsole[level].apply(console, args);
+            capture(level, args);
+        };
+    });
+    // Capture unhandled JavaScript errors
+    window.addEventListener('error', (event) => {
+        let entry;
+        if (event.target && event.target.tagName) {
+            // Resource loading error
+            const target = event.target;
+            const src = target.src || target.href || '';
+            if (src) {
+                entry = {
+                    level: 'error',
+                    message: `Failed to load resource: ${src}`,
+                    source: src.split('/').pop() || '',
+                    timestamp: Date.now(),
+                    url: window.location.href
+                };
+            }
+        } else {
+            // JavaScript error
+            entry = {
+                level: 'error',
+                message: event.message,
+                source: `${event.filename}:${event.lineno}:${event.colno}`,
+                timestamp: Date.now(),
+                url: window.location.href
+            };
+        }
+        if (entry) {
+            window._scitex_console_history.push(entry);
+            window._scitex_console_logs.push(`[ERROR] ${entry.source} ${entry.message}`);
+        }
+    }, true);
+    // Capture unhandled promise rejections
+    window.addEventListener('unhandledrejection', (event) => {
+        const entry = {
+            level: 'error',
+            message: `Uncaught (in promise): ${event.reason}`,
+            source: '',
+            timestamp: Date.now(),
+            url: window.location.href
+        };
+        window._scitex_console_history.push(entry);
+        window._scitex_console_logs.push(`[ERROR] Uncaught (in promise): ${event.reason}`);
+    });
+    window._scitex_console_interceptor_setup = true;
+}
+"""
+def setup_console_interceptor(page: "Page") -> None:
+    """Set up console log interceptor with source tracking and error capture.
+    Features (mirroring console-interceptor.ts):
+    - Intercepts console.log, info, warn, error, debug
+    - Captures source file and line number
+    - Captures unhandled JS errors
+    - Captures unhandled promise rejections
+    - Captures resource loading failures
+    Call this at the start of each test to begin capturing logs.
+    """
+    try:
+        page.evaluate(CONSOLE_INTERCEPTOR_JS)
+    except Exception:
+        pass
+def collect_console_logs(page: "Page") -> list:
+    """Collect all captured console logs from the browser.
+    Returns:
+        List of log strings in format "[LEVEL] source message"
+    """
+    try:
+        logs = page.evaluate("""
+        () => {
+            if (window._scitex_console_logs) {
+                return window._scitex_console_logs;
+            }
+            return [];
+        }
+        """)
+        return logs or []
+    except Exception:
+        return []
+def collect_console_logs_detailed(page: "Page") -> list:
+    """Collect all captured console logs with full details.
+    Returns:
+        List of dicts with keys: level, message, source, timestamp, url
+    """
+    try:
+        history = page.evaluate("""
+        () => {
+            if (window._scitex_console_history) {
+                return window._scitex_console_history;
+            }
+            return [];
+        }
+        """)
+        return history or []
+    except Exception:
+        return []
+def format_logs_devtools_style(logs: list) -> str:
+    """Format logs in DevTools-like style.
+    Args:
+        logs: List of detailed log entries from collect_console_logs_detailed()
+    Returns:
+        Formatted string like browser DevTools output
+    """
+    if not logs:
+        return "No console logs captured."
+    level_icons = {
+        "error": "[ERROR]",
+        "warn": "[WARN]",
+        "info": "[INFO]",
+        "debug": "[DEBUG]",
+        "log": "[LOG]",
+    }
+    output = []
+    for entry in logs:
+        if isinstance(entry, dict):
+            level = entry.get("level", "log")
+            source = entry.get("source", "")
+            message = entry.get("message", "")
+            icon = level_icons.get(level, "[LOG]")
+            source_str = f" {source}" if source else ""
+            output.append(f"{icon}{source_str} {message}")
+        else:
+            output.append(str(entry))
+    return "\n".join(output)
+def save_failure_artifacts(
+    page: "Page",
+    test_name: str,
+    artifacts_dir: Path | str,
+    console_logs: list | None = None,
+) -> dict:
+    """Save screenshot, console logs, and page HTML on test failure.
+    Args:
+        page: Playwright page object
+        test_name: Name of the failed test (e.g., request.node.nodeid)
+        artifacts_dir: Directory to save artifacts
+        console_logs: Pre-collected console logs (optional, will collect if None)
+    Returns:
+        Dict with paths to saved artifacts
+    """
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    safe_test_name = test_name.replace("::", "_").replace("[", "_").replace("]", "").replace("/", "_")
+    # Create artifacts directory with timestamp
+    artifacts_path = Path(artifacts_dir) / timestamp
+    artifacts_path.mkdir(parents=True, exist_ok=True)
+    saved_files = {}
+    # Collect console logs if not provided
+    if console_logs is None:
+        console_logs = collect_console_logs(page)
+    # Save screenshot
+    try:
+        screenshot_path = artifacts_path / f"{safe_test_name}_screenshot.png"
+        page.screenshot(path=str(screenshot_path), full_page=True)
+        saved_files["screenshot"] = screenshot_path
+        print(f"\n[FAILURE] Screenshot saved: {screenshot_path}")
+    except Exception as e:
+        print(f"\n[FAILURE] Failed to save screenshot: {e}")
+    # Save console logs
+    try:
+        logs_path = artifacts_path / f"{safe_test_name}_console.log"
+        with open(logs_path, "w") as f:
+            f.write(f"Test: {test_name}\n")
+            f.write(f"Timestamp: {timestamp}\n")
+            f.write(f"URL: {page.url}\n")
+            f.write("=" * 80 + "\n\n")
+            f.write("Console Logs:\n")
+            f.write("-" * 40 + "\n")
+            for log in console_logs:
+                f.write(f"{log}\n")
+        saved_files["console_logs"] = logs_path
+        print(f"[FAILURE] Console logs saved: {logs_path}")
+    except Exception as e:
+        print(f"[FAILURE] Failed to save console logs: {e}")
+    # Save page HTML
+    try:
+        html_path = artifacts_path / f"{safe_test_name}_page.html"
+        html_content = page.content()
+        with open(html_path, "w") as f:
+            f.write(html_content)
+        saved_files["page_html"] = html_path
+        print(f"[FAILURE] Page HTML saved: {html_path}")
+    except Exception as e:
+        print(f"[FAILURE] Failed to save page HTML: {e}")
+    return saved_files
+def create_failure_capture_fixture(artifacts_dir: Path | str):
+    """Create a pytest fixture for automatic failure capture.
+    Usage in conftest.py:
+        from scitex.browser.debugging import create_failure_capture_fixture
+        capture_on_failure = create_failure_capture_fixture(
+            Path(__file__).parent / "artifacts"
+        )
+    Args:
+        artifacts_dir: Directory to save failure artifacts
+    Returns:
+        A pytest fixture function
+    """
+    import pytest
+    @pytest.fixture(autouse=True)
+    def capture_on_failure(request, page):
+        """Automatically capture console logs and screenshot on test failure."""
+        setup_console_interceptor(page)
+        yield
+        if hasattr(request.node, "rep_call") and request.node.rep_call.failed:
+            console_logs = collect_console_logs(page)
+            save_failure_artifacts(page, request.node.nodeid, artifacts_dir, console_logs)
+    return capture_on_failure
+# Pytest hook for capturing test results - add to conftest.py
+PYTEST_HOOK_CODE = '''
+@pytest.hookimpl(tryfirst=True, hookwrapper=True)
+def pytest_runtest_makereport(item, call):
+    """Hook to capture test outcome for use in fixture."""
+    outcome = yield
+    rep = outcome.get_result()
+    setattr(item, f"rep_{rep.when}", rep)
+'''
+# EOF

scitex/browser/debugging/_sync_session.py ADDED Viewed

@@ -0,0 +1,259 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Timestamp: 2025-12-08
+# File: /home/ywatanabe/proj/scitex-code/src/scitex/browser/debugging/_sync_session.py
+"""
+Sync browser session context manager for pytest-playwright E2E tests.
+Ensures proper cleanup of browser processes to prevent zombies.
+Usage in conftest.py:
+    from scitex.browser import SyncBrowserSession
+    @pytest.fixture
+    def browser_session(page: Page):
+        with SyncBrowserSession(page) as session:
+            yield session
+        # Cleanup happens automatically even on exceptions
+Or use the fixture factory:
+    from scitex.browser import create_browser_session_fixture
+    browser_session = create_browser_session_fixture()
+"""
+import atexit
+import os
+import signal
+import subprocess
+from contextlib import contextmanager
+from typing import TYPE_CHECKING, Callable, Optional
+if TYPE_CHECKING:
+    from playwright.sync_api import Page
+class SyncBrowserSession:
+    """
+    Sync context manager for playwright browser sessions.
+    Ensures zombie process cleanup on test failures, timeouts, or crashes.
+    Tracks browser PIDs and kills orphaned processes on exit.
+    """
+    # Class-level tracking of active sessions for emergency cleanup
+    _active_sessions: list["SyncBrowserSession"] = []
+    _cleanup_registered = False
+    def __init__(
+        self,
+        page: "Page",
+        timeout: int = 60,
+        on_enter: Optional[Callable[["Page"], None]] = None,
+        on_exit: Optional[Callable[["Page", bool], None]] = None,
+    ):
+        """
+        Initialize sync browser session.
+        Args:
+            page: Playwright page instance from pytest-playwright
+            timeout: Default timeout for operations in seconds
+            on_enter: Callback when entering context
+            on_exit: Callback when exiting context (receives page and success flag)
+        """
+        self.page = page
+        self.timeout = timeout
+        self.on_enter = on_enter
+        self.on_exit = on_exit
+        self._browser_pid = None
+        self._context_pid = None
+        self._success = True
+        # Register class-level emergency cleanup
+        if not SyncBrowserSession._cleanup_registered:
+            atexit.register(SyncBrowserSession._emergency_cleanup)
+            SyncBrowserSession._cleanup_registered = True
+    def __enter__(self) -> "SyncBrowserSession":
+        """Enter context - track browser PIDs and run setup callback."""
+        # Track this session
+        SyncBrowserSession._active_sessions.append(self)
+        # Try to get browser PID for tracking
+        try:
+            if self.page.context.browser:
+                # Get the browser process
+                browser = self.page.context.browser
+                # Browser PID is available via internal _impl
+                if hasattr(browser, '_impl'):
+                    impl = browser._impl
+                    if hasattr(impl, '_process'):
+                        self._browser_pid = impl._process.pid
+        except Exception:
+            pass  # PID tracking is best-effort
+        # Run setup callback
+        if self.on_enter:
+            self.on_enter(self.page)
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb) -> bool:
+        """Exit context - ensure cleanup happens."""
+        self._success = exc_type is None
+        # Remove from active sessions
+        try:
+            SyncBrowserSession._active_sessions.remove(self)
+        except ValueError:
+            pass
+        # Run exit callback
+        if self.on_exit:
+            try:
+                self.on_exit(self.page, self._success)
+            except Exception:
+                pass  # Don't fail on callback errors
+        # If there was an exception, try to close gracefully
+        if exc_type is not None:
+            try:
+                self.page.close()
+            except Exception:
+                pass
+            try:
+                self.page.context.close()
+            except Exception:
+                pass
+        # Kill orphaned browser process if we have the PID
+        if self._browser_pid and not self._success:
+            self._kill_process_tree(self._browser_pid)
+        # Don't suppress the exception
+        return False
+    @staticmethod
+    def _kill_process_tree(pid: int):
+        """Kill a process and all its children (zombies)."""
+        try:
+            # Try SIGTERM first
+            os.kill(pid, signal.SIGTERM)
+        except ProcessLookupError:
+            return  # Already dead
+        except PermissionError:
+            return  # Can't kill
+        # Give it a moment
+        import time
+        time.sleep(0.5)
+        # Force kill if still running
+        try:
+            os.kill(pid, signal.SIGKILL)
+        except (ProcessLookupError, PermissionError):
+            pass
+    @classmethod
+    def _emergency_cleanup(cls):
+        """Emergency cleanup of all active sessions on process exit."""
+        for session in cls._active_sessions[:]:  # Copy list to avoid mutation
+            if session._browser_pid:
+                cls._kill_process_tree(session._browser_pid)
+        cls._active_sessions.clear()
+    @staticmethod
+    def kill_zombie_browsers():
+        """Kill all zombie chromium/chrome processes from failed tests.
+        Call this at the start of test sessions to clean up from previous runs.
+        """
+        try:
+            # Find orphaned chromium processes
+            result = subprocess.run(
+                ["pgrep", "-f", "chromium|chrome"],
+                capture_output=True,
+                text=True,
+            )
+            if result.returncode == 0:
+                pids = result.stdout.strip().split('\n')
+                for pid in pids:
+                    if pid:
+                        try:
+                            os.kill(int(pid), signal.SIGKILL)
+                        except (ProcessLookupError, PermissionError, ValueError):
+                            pass
+        except FileNotFoundError:
+            pass  # pgrep not available
+@contextmanager
+def sync_browser_session(
+    page: "Page",
+    timeout: int = 60,
+    on_enter: Optional[Callable[["Page"], None]] = None,
+    on_exit: Optional[Callable[["Page", bool], None]] = None,
+):
+    """
+    Context manager for sync playwright sessions.
+    Usage:
+        with sync_browser_session(page) as session:
+            session.page.goto(url)
+            # ... test code
+        # Cleanup happens automatically
+    """
+    session = SyncBrowserSession(page, timeout, on_enter, on_exit)
+    with session:
+        yield session
+def create_browser_session_fixture(
+    timeout: int = 60,
+    setup: Optional[Callable[["Page"], None]] = None,
+    teardown: Optional[Callable[["Page", bool], None]] = None,
+    kill_zombies_on_start: bool = True,
+):
+    """
+    Create a pytest fixture for browser session with cleanup.
+    Usage in conftest.py:
+        from scitex.browser import create_browser_session_fixture
+        browser_session = create_browser_session_fixture(
+            timeout=60,
+            setup=lambda page: print(f"Starting test"),
+            teardown=lambda page, success: print(f"Test {'passed' if success else 'failed'}"),
+            kill_zombies_on_start=True,
+        )
+    Args:
+        timeout: Default timeout for operations
+        setup: Callback when entering session
+        teardown: Callback when exiting (receives page and success flag)
+        kill_zombies_on_start: Kill orphaned browsers before first test
+    Returns:
+        A pytest fixture function
+    """
+    import pytest
+    _zombies_cleaned = False
+    @pytest.fixture
+    def browser_session(page: "Page"):
+        nonlocal _zombies_cleaned
+        # Clean up zombies from previous runs (once per session)
+        if kill_zombies_on_start and not _zombies_cleaned:
+            SyncBrowserSession.kill_zombie_browsers()
+            _zombies_cleaned = True
+        with SyncBrowserSession(page, timeout, setup, teardown) as session:
+            yield session
+    return browser_session
+# EOF

scitex 2.4.1__py3-none-any.whl → 2.4.3__py3-none-any.whl

scitex 2.4.1py3-none-any.whl → 2.4.3py3-none-any.whl