PyPI - scitex - Versions diffs - 2.16.0__py3-none-any.whl → 2.16.2__py3-none-any.whl - Mend

scitex 2.16.0py3-none-any.whl → 2.16.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (101) hide show

scitex/_mcp_tools/audio.py +11 -65
scitex/audio/README.md +40 -12
scitex/audio/__init__.py +27 -235
scitex/audio/_audio_check.py +93 -0
scitex/audio/_mcp/speak_handlers.py +56 -8
scitex/audio/_speak.py +295 -0
scitex/audio/mcp_server.py +98 -73
scitex/scholar/url_finder/.tmp/open_url/KNOWN_RESOLVERS.py +462 -0
scitex/scholar/url_finder/.tmp/open_url/README.md +223 -0
scitex/scholar/url_finder/.tmp/open_url/_DOIToURLResolver.py +694 -0
scitex/scholar/url_finder/.tmp/open_url/_OpenURLResolver.py +1160 -0
scitex/scholar/url_finder/.tmp/open_url/_ResolverLinkFinder.py +344 -0
scitex/scholar/url_finder/.tmp/open_url/__init__.py +24 -0
scitex/social/__init__.py +1 -24
scitex/writer/README.md +25 -409
scitex/writer/__init__.py +98 -13
{scitex-2.16.0.dist-info → scitex-2.16.2.dist-info}/METADATA +6 -1
{scitex-2.16.0.dist-info → scitex-2.16.2.dist-info}/RECORD +21 -93
scitex/dev/plt/data/mpl/PLOTTING_FUNCTIONS.yaml +0 -90
scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES.yaml +0 -1571
scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES_DETAILED.yaml +0 -6262
scitex/dev/plt/data/mpl/SIGNATURES_FLATTENED.yaml +0 -1274
scitex/dev/plt/data/mpl/dir_ax.txt +0 -459
scitex/scholar/data/.gitkeep +0 -0
scitex/scholar/data/README.md +0 -44
scitex/scholar/data/bib_files/bibliography.bib +0 -1952
scitex/scholar/data/bib_files/neurovista.bib +0 -277
scitex/scholar/data/bib_files/neurovista_enriched.bib +0 -441
scitex/scholar/data/bib_files/neurovista_enriched_enriched.bib +0 -441
scitex/scholar/data/bib_files/neurovista_processed.bib +0 -338
scitex/scholar/data/bib_files/openaccess.bib +0 -89
scitex/scholar/data/bib_files/pac-seizure_prediction_enriched.bib +0 -2178
scitex/scholar/data/bib_files/pac.bib +0 -698
scitex/scholar/data/bib_files/pac_enriched.bib +0 -1061
scitex/scholar/data/bib_files/pac_processed.bib +0 -0
scitex/scholar/data/bib_files/pac_titles.txt +0 -75
scitex/scholar/data/bib_files/paywalled.bib +0 -98
scitex/scholar/data/bib_files/related-papers-by-coauthors.bib +0 -58
scitex/scholar/data/bib_files/related-papers-by-coauthors_enriched.bib +0 -87
scitex/scholar/data/bib_files/seizure_prediction.bib +0 -694
scitex/scholar/data/bib_files/seizure_prediction_processed.bib +0 -0
scitex/scholar/data/bib_files/test_complete_enriched.bib +0 -437
scitex/scholar/data/bib_files/test_final_enriched.bib +0 -437
scitex/scholar/data/bib_files/test_seizure.bib +0 -46
scitex/scholar/data/impact_factor/JCR_IF_2022.xlsx +0 -0
scitex/scholar/data/impact_factor/JCR_IF_2024.db +0 -0
scitex/scholar/data/impact_factor/JCR_IF_2024.xlsx +0 -0
scitex/scholar/data/impact_factor/JCR_IF_2024_v01.db +0 -0
scitex/scholar/data/impact_factor.db +0 -0
scitex/writer/Writer.py +0 -487
scitex/writer/_clone_writer_project.py +0 -160
scitex/writer/_compile/__init__.py +0 -41
scitex/writer/_compile/_compile_async.py +0 -130
scitex/writer/_compile/_compile_unified.py +0 -148
scitex/writer/_compile/_parser.py +0 -63
scitex/writer/_compile/_runner.py +0 -457
scitex/writer/_compile/_validator.py +0 -46
scitex/writer/_compile/manuscript.py +0 -110
scitex/writer/_compile/revision.py +0 -82
scitex/writer/_compile/supplementary.py +0 -100
scitex/writer/_dataclasses/__init__.py +0 -44
scitex/writer/_dataclasses/config/_CONSTANTS.py +0 -46
scitex/writer/_dataclasses/config/_WriterConfig.py +0 -175
scitex/writer/_dataclasses/config/__init__.py +0 -9
scitex/writer/_dataclasses/contents/_ManuscriptContents.py +0 -236
scitex/writer/_dataclasses/contents/_RevisionContents.py +0 -136
scitex/writer/_dataclasses/contents/_SupplementaryContents.py +0 -114
scitex/writer/_dataclasses/contents/__init__.py +0 -9
scitex/writer/_dataclasses/core/_Document.py +0 -146
scitex/writer/_dataclasses/core/_DocumentSection.py +0 -546
scitex/writer/_dataclasses/core/__init__.py +0 -7
scitex/writer/_dataclasses/results/_CompilationResult.py +0 -165
scitex/writer/_dataclasses/results/_LaTeXIssue.py +0 -102
scitex/writer/_dataclasses/results/_SaveSectionsResponse.py +0 -118
scitex/writer/_dataclasses/results/_SectionReadResponse.py +0 -131
scitex/writer/_dataclasses/results/__init__.py +0 -11
scitex/writer/_dataclasses/tree/MINIMUM_FILES.md +0 -121
scitex/writer/_dataclasses/tree/_ConfigTree.py +0 -86
scitex/writer/_dataclasses/tree/_ManuscriptTree.py +0 -84
scitex/writer/_dataclasses/tree/_RevisionTree.py +0 -97
scitex/writer/_dataclasses/tree/_ScriptsTree.py +0 -118
scitex/writer/_dataclasses/tree/_SharedTree.py +0 -100
scitex/writer/_dataclasses/tree/_SupplementaryTree.py +0 -101
scitex/writer/_dataclasses/tree/__init__.py +0 -23
scitex/writer/_mcp/__init__.py +0 -4
scitex/writer/_mcp/handlers.py +0 -32
scitex/writer/_mcp/tool_schemas.py +0 -33
scitex/writer/_project/__init__.py +0 -29
scitex/writer/_project/_create.py +0 -89
scitex/writer/_project/_trees.py +0 -63
scitex/writer/_project/_validate.py +0 -61
scitex/writer/utils/.legacy_git_retry.py +0 -164
scitex/writer/utils/__init__.py +0 -24
scitex/writer/utils/_converters.py +0 -635
scitex/writer/utils/_parse_latex_logs.py +0 -138
scitex/writer/utils/_parse_script_args.py +0 -156
scitex/writer/utils/_verify_tree_structure.py +0 -205
scitex/writer/utils/_watch.py +0 -96
{scitex-2.16.0.dist-info → scitex-2.16.2.dist-info}/WHEEL +0 -0
{scitex-2.16.0.dist-info → scitex-2.16.2.dist-info}/entry_points.txt +0 -0
{scitex-2.16.0.dist-info → scitex-2.16.2.dist-info}/licenses/LICENSE +0 -0

scitex/scholar/url_finder/.tmp/open_url/_ResolverLinkFinder.py ADDED Viewed

@@ -0,0 +1,344 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Timestamp: "2025-07-29 03:10:08 (ywatanabe)"
+# File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/open_url/_ResolverLinkFinder.py
+# ----------------------------------------
+from __future__ import annotations
+import os
+__FILE__ = (
+    "./src/scitex/scholar/open_url/_ResolverLinkFinder.py"
+)
+__DIR__ = os.path.dirname(__FILE__)
+# ----------------------------------------
+"""Robust resolver link finder using a prioritized, multi-layered approach.
+Priority order:
+1. Link Target (domain matching) - Most reliable
+2. Page Structure (CSS selectors) - Very reliable
+3. Text Patterns - Good fallback
+"""
+import re
+from typing import List, Optional
+from urllib.parse import urlparse
+from playwright.async_api import ElementHandle, Page
+from scitex import logging
+logger = logging.getLogger(__name__)
+class ResolverLinkFinder:
+    """Finds full-text links on resolver pages using multiple strategies."""
+    # DOI prefix to publisher domain mapping
+    DOI_TO_DOMAIN = {
+        "10.1038": [
+            "nature.com",
+            "springernature.com",
+        ],  # Nature Publishing Group
+        "10.1016": ["sciencedirect.com", "elsevier.com"],  # Elsevier
+        "10.1002": ["wiley.com", "onlinelibrary.wiley.com"],  # Wiley
+        "10.1007": ["springer.com", "link.springer.com"],  # Springer
+        "10.1126": ["science.org", "sciencemag.org"],  # Science/AAAS
+        "10.1021": ["acs.org", "pubs.acs.org"],  # ACS Publications
+        "10.1111": [
+            "wiley.com",
+            "onlinelibrary.wiley.com",
+        ],  # Wiley (alternative)
+        "10.1080": ["tandfonline.com"],  # Taylor & Francis
+        "10.1177": ["sagepub.com", "journals.sagepub.com"],  # SAGE
+        "10.1093": ["oup.com", "academic.oup.com"],  # Oxford
+        "10.1109": ["ieee.org", "ieeexplore.ieee.org"],  # IEEE
+        "10.1371": ["plos.org", "journals.plos.org"],  # PLOS
+        "10.1073": ["pnas.org"],  # PNAS
+        "10.1136": ["bmj.com"],  # BMJ
+        "10.3389": ["frontiersin.org"],  # Frontiers
+        "10.3390": ["mdpi.com"],  # MDPI
+    }
+    # Common resolver page structures
+    STRUCTURE_SELECTORS = [
+        # SFX (ExLibris) - used by many universities
+        "div#fulltext a",
+        "div.sfx-fulltext a",
+        "div.results-title > a",
+        "td.object-cell a",
+        ".getFullTxt a",
+        'div[id*="fulltext"] a',
+        'div[class*="fulltext"] a',
+        # SFX specific selectors for University of Melbourne
+        "a[title*='Wiley Online Library']",
+        "a[href*='wiley.com']",
+        "a[href*='onlinelibrary.wiley.com']",
+        ".sfx-target a",
+        ".target a",
+        "td a[href*='wiley']",
+        # Primo (ExLibris)
+        "prm-full-view-service-container a",
+        "span.availability-status-available a",
+        # Summon (ProQuest)
+        ".summon-fulltext-link",
+        "a.summon-link",
+        # EDS (EBSCO)
+        "a.fulltext-link",
+        ".ft-link a",
+        # Generic patterns
+        "a.full-text-link",
+        "a.fulltext",
+        "a#full-text-link",
+        ".access-link a",
+        ".available-link a",
+    ]
+    # Text patterns in priority order
+    TEXT_PATTERNS = [
+        # Most specific
+        "View full text at",
+        "Available from Nature",
+        "Available from ScienceDirect",
+        "Available from Wiley",
+        "Available from Wiley Online Library",
+        "Full text available from",
+        # Common patterns
+        "View full text",
+        "Full Text from Publisher",
+        "Get full text",
+        "Access full text",
+        "Go to article",
+        "Access article",
+        # Generic but reliable
+        "Full Text",
+        "Full text",
+        "Article",
+        "View",
+        "PDF",
+        "Download",
+    ]
+    def __init__(self):
+        self._doi_pattern = re.compile(r"10\.\d{4,}/[-._;()/:\w]+")
+    def get_expected_domains(self, doi: str) -> List[str]:
+        """Get expected publisher domains for a DOI."""
+        # Extract DOI prefix
+        match = re.match(r"(10\.\d{4,})", doi)
+        if not match:
+            return []
+        prefix = match.group(1)
+        return self.DOI_TO_DOMAIN.get(prefix, [])
+    async def find_link_async(self, page, doi: str) -> dict:
+        """Find the best full-text link using prioritized strategies."""
+        logger.info(f"Finding resolver link for DOI: {doi}")
+        # Strategy 1: Link Target (Most Reliable)
+        link_url = await self._find_by_domain_async(page, doi)
+        if link_url:
+            logger.info("✓ Found link using domain matching (Strategy 1)")
+            return {"success": True, "url": link_url, "method": "domain"}
+        # Strategy 2: Page Structure with scoring
+        link_url = await self._find_by_structure_async(page, doi)
+        if link_url:
+            logger.info("✓ Found link using page structure (Strategy 2)")
+            return {"success": True, "url": link_url, "method": "structure"}
+        logger.warning("✗ No suitable links found")
+        return {"success": False, "url": None, "method": None}
+    async def _find_by_domain_async(self, page: Page, doi: str) -> Optional[str]:
+        """Strategy 1: Find link by expected publisher domain."""
+        expected_domains = self.get_expected_domains(doi)
+        if not expected_domains:
+            logger.debug(f"No known publisher domains for DOI prefix: {doi}")
+            return None
+        logger.debug(f"Looking for links to domains: {expected_domains}")
+        all_links = await page.query_selector_all("a[href]")
+        for link in all_links:
+            href = await link.get_attribute("href")
+            if not href:
+                continue
+            try:
+                parsed = urlparse(href)
+                domain = parsed.netloc.lower()
+                for expected in expected_domains:
+                    if expected in domain:
+                        text = await link.inner_text() or ""
+                        logger.info(
+                            f"Found domain match: {domain} (text: '{text[:50]}')"
+                        )
+                        if not any(
+                            bad in text.lower()
+                            for bad in ["abstract", "preview", "summary"]
+                        ):
+                            return href
+                        else:
+                            logger.debug(
+                                f"Skipping abstract/preview link: {text}"
+                            )
+            except Exception as e:
+                logger.debug(f"Error parsing URL {href}: {e}")
+        return None
+    async def _find_by_structure_async(self, page, doi: str):
+        """Find link by page structure with publisher prioritization."""
+        potential_links = []
+        expected_domains = self.get_expected_domains(doi)
+        publisher_keywords = [
+            domain.split(".")[0] for domain in expected_domains
+        ]
+        aggregator_keywords = ["gale", "proquest", "ebsco", "jstor", "onefile"]
+        # Gather all possible links
+        for selector in self.STRUCTURE_SELECTORS:
+            try:
+                elements = await page.query_selector_all(selector)
+                logger.debug(
+                    f"Found {len(elements)} elements with selector: {selector}"
+                )
+                for element in elements:
+                    if await element.is_visible():
+                        href = await element.get_attribute("href")
+                        text = (await element.inner_text() or "").lower()
+                        if href and href.strip():
+                            potential_links.append(
+                                {"href": href, "text": text, "score": 0}
+                            )
+            except Exception as element_error:
+                logger.debug(
+                    f"Error with selector '{selector}': {element_error}"
+                )
+        if not potential_links:
+            return None
+        # Score the links
+        for link in potential_links:
+            # Highest score for direct publisher match
+            if any(keyword in link["text"] for keyword in publisher_keywords):
+                link["score"] = 3
+            # High score for generic publisher
+            elif "publisher" in link["text"]:
+                link["score"] = 2
+            # Negative score for aggregators
+            elif any(
+                keyword in link["text"] for keyword in aggregator_keywords
+            ):
+                link["score"] = -1
+            # Default neutral score
+            else:
+                link["score"] = 0
+        # Sort by score, highest first
+        sorted_links = sorted(
+            potential_links, key=lambda x: x["score"], reverse=True
+        )
+        best_link = sorted_links[0]
+        logger.debug(
+            f"Found structural match: '{best_link['text'][:50]}' -> {best_link['href']}"
+        )
+        return best_link["href"]
+    async def _find_by_text_async(self, page: Page) -> Optional[str]:
+        """Strategy 3: Find link by text patterns."""
+        for pattern in self.TEXT_PATTERNS:
+            try:
+                selector = f'a:has-text("{pattern}")'
+                link = await page.query_selector(selector)
+                if link and await link.is_visible():
+                    href = await link.get_attribute("href")
+                    if href and href.strip():
+                        logger.debug(
+                            f"Found text match: '{pattern}' -> {href[:100]}"
+                        )
+                        return href
+            except Exception as e:
+                logger.debug(f"Error with text pattern '{pattern}': {e}")
+        return None
+    async def click_and_wait_async(self, page: Page, link: ElementHandle) -> bool:
+        """Click link and wait for navigation.
+        Returns True if navigation succeeded.
+        """
+        initial_url = page.url
+        try:
+            # Get link info for logging
+            href = await link.get_attribute("href") or ""
+            text = await link.inner_text() or ""
+            logger.info(f"Clicking link: '{text[:50]}' -> {href[:100]}")
+            # Click and wait for navigation
+            await link.click()
+            # Wait for either navigation or network idle
+            try:
+                await page.wait_for_load_state("networkidle", timeout=30000)
+            except:
+                # Fallback to domcontentloaded if network doesn't settle
+                await page.wait_for_load_state(
+                    "domcontentloaded", timeout=30000
+                )
+            # Additional wait for JavaScript redirects
+            await page.wait_for_timeout(3000)
+            # Check if we navigated
+            final_url = page.url
+            if final_url != initial_url:
+                logger.info(
+                    f"Successfully navigated: {initial_url} -> {final_url}"
+                )
+                return True
+            else:
+                logger.warning("No navigation occurred after click")
+                return False
+        except Exception as e:
+            logger.error(f"Error during click and navigation: {e}")
+            return False
+# Convenience function for integration
+async def find_and_click_resolver_link_async(page: Page, doi: str) -> Optional[str]:
+    """Find and click the best resolver link.
+    Args:
+        page: Playwright page object
+        doi: Target DOI
+    Returns:
+        Final URL after navigation, or None if failed
+    """
+    finder = ResolverLinkFinder()
+    # Find link
+    link = await finder.find_link_async(page, doi)
+    if not link:
+        return None
+    # Click and navigate
+    success = await finder.click_and_wait_async(page, link)
+    if success:
+        return page.url
+    else:
+        return None
+# EOF

scitex/scholar/url_finder/.tmp/open_url/__init__.py ADDED Viewed

@@ -0,0 +1,24 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Timestamp: "2025-07-31 00:53:24 (ywatanabe)"
+# File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/open_url/__init__.py
+# ----------------------------------------
+from __future__ import annotations
+import os
+__FILE__ = (
+    "./src/scitex/scholar/open_url/__init__.py"
+)
+__DIR__ = os.path.dirname(__FILE__)
+# ----------------------------------------
+from ._DOIToURLResolver import DOIToURLResolver
+from ._OpenURLResolver import OpenURLResolver
+__all__ = [
+    "OpenURLResolver",
+    "DOIToURLResolver",
+]
+# EOF

scitex/social/__init__.py CHANGED Viewed

@@ -61,25 +61,14 @@ _os.environ.setdefault("SOCIALIA_ENV_PREFIX", "SCITEX_SOCIAL")
 # Check socialia availability
 try:
-    import socialia as _socialia
     # Re-export platform clients
-    from socialia import (
-        # Content strategies for MCP
+    from socialia import (  # Content strategies for MCP; Platform clients (preferred names)
         PLATFORM_STRATEGIES,
-        # Base class
-        BasePoster,
         GoogleAnalytics,
         LinkedIn,
-        LinkedInPoster,
         Reddit,
-        RedditPoster,
-        # Platform clients (preferred names)
         Twitter,
-        # Backward compatibility aliases
-        TwitterPoster,
         YouTube,
-        YouTubePoster,
     )
     from socialia import __version__ as _socialia_version
@@ -106,16 +95,11 @@ except ImportError:
                 "Install with: pip install socialia"
             )
-    BasePoster = _SocialiaNotAvailable
     Twitter = _SocialiaNotAvailable
     LinkedIn = _SocialiaNotAvailable
     Reddit = _SocialiaNotAvailable
     YouTube = _SocialiaNotAvailable
     GoogleAnalytics = _SocialiaNotAvailable
-    TwitterPoster = _SocialiaNotAvailable
-    LinkedInPoster = _SocialiaNotAvailable
-    RedditPoster = _SocialiaNotAvailable
-    YouTubePoster = _SocialiaNotAvailable
     PLATFORM_STRATEGIES = ""
@@ -135,19 +119,12 @@ __all__ = [
     "SOCIALIA_AVAILABLE",
     "has_socialia",
     "__socialia_version__",
-    # Base class
-    "BasePoster",
     # Platform clients (preferred names)
     "Twitter",
     "LinkedIn",
     "Reddit",
     "YouTube",
     "GoogleAnalytics",
-    # Backward compatibility aliases
-    "TwitterPoster",
-    "LinkedInPoster",
-    "RedditPoster",
-    "YouTubePoster",
     # Content strategies
     "PLATFORM_STRATEGIES",
 ]

scitex 2.16.0__py3-none-any.whl → 2.16.2__py3-none-any.whl

scitex 2.16.0py3-none-any.whl → 2.16.2py3-none-any.whl