PyPI - scitex - Versions diffs - 2.17.3__py3-none-any.whl → 2.17.4__py3-none-any.whl - Mend

scitex 2.17.3py3-none-any.whl → 2.17.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

scitex/_dev/_dashboard/_routes.py +13 -0
scitex/_dev/_dashboard/_scripts.py +144 -23
scitex/_dev/_dashboard/_styles.py +90 -0
scitex/_dev/_dashboard/_templates.py +14 -1
scitex/_dev/_rtd.py +122 -0
scitex/_dev/_ssh.py +38 -8
scitex/dev/plt/data/mpl/PLOTTING_FUNCTIONS.yaml +90 -0
scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES.yaml +1571 -0
scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES_DETAILED.yaml +6262 -0
scitex/dev/plt/data/mpl/SIGNATURES_FLATTENED.yaml +1274 -0
scitex/dev/plt/data/mpl/dir_ax.txt +459 -0
scitex/scholar/_mcp/crossref_handlers.py +45 -7
scitex/scholar/_mcp/openalex_handlers.py +45 -7
scitex/scholar/config/default.yaml +2 -0
scitex/scholar/data/.gitkeep +0 -0
scitex/scholar/data/README.md +44 -0
scitex/scholar/data/bib_files/bibliography.bib +1952 -0
scitex/scholar/data/bib_files/neurovista.bib +277 -0
scitex/scholar/data/bib_files/neurovista_enriched.bib +441 -0
scitex/scholar/data/bib_files/neurovista_enriched_enriched.bib +441 -0
scitex/scholar/data/bib_files/neurovista_processed.bib +338 -0
scitex/scholar/data/bib_files/openaccess.bib +89 -0
scitex/scholar/data/bib_files/pac-seizure_prediction_enriched.bib +2178 -0
scitex/scholar/data/bib_files/pac.bib +698 -0
scitex/scholar/data/bib_files/pac_enriched.bib +1061 -0
scitex/scholar/data/bib_files/pac_processed.bib +0 -0
scitex/scholar/data/bib_files/pac_titles.txt +75 -0
scitex/scholar/data/bib_files/paywalled.bib +98 -0
scitex/scholar/data/bib_files/related-papers-by-coauthors.bib +58 -0
scitex/scholar/data/bib_files/related-papers-by-coauthors_enriched.bib +87 -0
scitex/scholar/data/bib_files/seizure_prediction.bib +694 -0
scitex/scholar/data/bib_files/seizure_prediction_processed.bib +0 -0
scitex/scholar/data/bib_files/test_complete_enriched.bib +437 -0
scitex/scholar/data/bib_files/test_final_enriched.bib +437 -0
scitex/scholar/data/bib_files/test_seizure.bib +46 -0
scitex/scholar/data/impact_factor/JCR_IF_2022.xlsx +0 -0
scitex/scholar/data/impact_factor/JCR_IF_2024.db +0 -0
scitex/scholar/data/impact_factor/JCR_IF_2024.xlsx +0 -0
scitex/scholar/data/impact_factor/JCR_IF_2024_v01.db +0 -0
scitex/scholar/data/impact_factor.db +0 -0
scitex/scholar/local_dbs/__init__.py +5 -1
scitex/scholar/local_dbs/export.py +93 -0
scitex/scholar/local_dbs/unified.py +505 -0
scitex/scholar/metadata_engines/ScholarEngine.py +11 -0
scitex/scholar/metadata_engines/individual/OpenAlexLocalEngine.py +346 -0
scitex/scholar/metadata_engines/individual/__init__.py +1 -0
{scitex-2.17.3.dist-info → scitex-2.17.4.dist-info}/METADATA +1 -1
{scitex-2.17.3.dist-info → scitex-2.17.4.dist-info}/RECORD +51 -22
scitex/scholar/url_finder/.tmp/open_url/KNOWN_RESOLVERS.py +0 -462
scitex/scholar/url_finder/.tmp/open_url/README.md +0 -223
scitex/scholar/url_finder/.tmp/open_url/_DOIToURLResolver.py +0 -694
scitex/scholar/url_finder/.tmp/open_url/_OpenURLResolver.py +0 -1160
scitex/scholar/url_finder/.tmp/open_url/_ResolverLinkFinder.py +0 -344
scitex/scholar/url_finder/.tmp/open_url/__init__.py +0 -24
{scitex-2.17.3.dist-info → scitex-2.17.4.dist-info}/WHEEL +0 -0
{scitex-2.17.3.dist-info → scitex-2.17.4.dist-info}/entry_points.txt +0 -0
{scitex-2.17.3.dist-info → scitex-2.17.4.dist-info}/licenses/LICENSE +0 -0

scitex/scholar/local_dbs/export.py ADDED Viewed

@@ -0,0 +1,93 @@
+#!/usr/bin/env python3
+# Timestamp: 2026-02-04
+# File: src/scitex/scholar/local_dbs/export.py
+"""Export functionality for unified local database results.
+Supports multiple output formats:
+- text: Human-readable formatted text
+- json: JSON format with all fields
+- bibtex: BibTeX bibliography format
+"""
+from __future__ import annotations
+from pathlib import Path
+from typing import TYPE_CHECKING, List, Union
+if TYPE_CHECKING:
+    from .unified import UnifiedSearchResult, UnifiedWork
+__all__ = [
+    "save",
+    "SUPPORTED_FORMATS",
+]
+SUPPORTED_FORMATS = ["text", "json", "bibtex"]
+def save(
+    data: Union[UnifiedWork, UnifiedSearchResult, List[UnifiedWork]],
+    path: str,
+    format: str = "json",
+) -> str:
+    """Save UnifiedWork(s) or UnifiedSearchResult to a file.
+    Args:
+        data: UnifiedWork, UnifiedSearchResult, or list of UnifiedWorks
+        path: Output file path
+        format: Output format ("text", "json", "bibtex")
+    Returns
+    -------
+        Path to saved file
+    Raises
+    ------
+        ValueError: If format is not supported
+    Examples
+    --------
+        >>> from scitex.scholar.local_dbs import search, save
+        >>> results = search("machine learning", limit=10)
+        >>> save(results, "results.json")
+        >>> save(results, "results.bib", format="bibtex")
+        >>> save(results, "results.txt", format="text")
+    """
+    from .unified import UnifiedSearchResult, UnifiedWork, to_bibtex, to_json, to_text
+    if format not in SUPPORTED_FORMATS:
+        raise ValueError(
+            f"Unsupported format: {format}. "
+            f"Supported formats: {', '.join(SUPPORTED_FORMATS)}"
+        )
+    path = Path(path)
+    # Extract works
+    if isinstance(data, UnifiedWork):
+        works = [data]
+    elif isinstance(data, UnifiedSearchResult):
+        works = data.works
+    elif isinstance(data, list):
+        works = data
+    else:
+        raise TypeError(f"Unsupported data type: {type(data)}")
+    # Generate content
+    if format == "text":
+        content = to_text(works)
+    elif format == "json":
+        content = to_json(works)
+    elif format == "bibtex":
+        content = to_bibtex(works)
+    else:
+        raise ValueError(f"Unsupported format: {format}")
+    # Write to file
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(content, encoding="utf-8")
+    return str(path)
+# EOF

scitex/scholar/local_dbs/unified.py ADDED Viewed

@@ -0,0 +1,505 @@
+#!/usr/bin/env python3
+# Timestamp: 2026-02-04
+# File: src/scitex/scholar/local_dbs/unified.py
+"""Unified search across CrossRef and OpenAlex local databases.
+This module provides a single interface for searching both databases
+with automatic deduplication and result merging.
+Usage:
+    >>> from scitex.scholar.local_dbs.unified import search, get, info
+    >>> results = search("hippocampal sharp wave ripples", limit=50)
+    >>> print(f"Found {len(results)} papers")
+    >>> # Export to different formats
+    >>> from scitex.scholar.local_dbs.unified import to_json, to_bibtex, to_text
+    >>> print(to_bibtex(results[:5]))
+"""
+from __future__ import annotations
+import asyncio
+import concurrent.futures
+import json
+from dataclasses import asdict, dataclass, field
+from typing import Any, Dict, List, Literal, Optional, Tuple, Union
+__all__ = [
+    "search",
+    "search_async",
+    "get",
+    "info",
+    "UnifiedWork",
+    "UnifiedSearchResult",
+    "to_json",
+    "to_bibtex",
+    "to_text",
+    "save",
+    "SUPPORTED_FORMATS",
+]
+# Import save from export module
+from .export import SUPPORTED_FORMATS, save
+# Try to import both databases
+_crossref_available = False
+_openalex_available = False
+try:
+    from crossref_local import Work as CRWork
+    from crossref_local import get as cr_get
+    from crossref_local import info as cr_info
+    from crossref_local import search as cr_search
+    _crossref_available = True
+except ImportError:
+    cr_search = cr_get = cr_info = CRWork = None
+try:
+    from openalex_local import Work as OAWork
+    from openalex_local import get as oa_get
+    from openalex_local import info as oa_info
+    from openalex_local import search as oa_search
+    _openalex_available = True
+except ImportError:
+    oa_search = oa_get = oa_info = OAWork = None
+@dataclass
+class UnifiedWork:
+    """Unified work representation merging CrossRef and OpenAlex data."""
+    doi: Optional[str] = None
+    title: Optional[str] = None
+    authors: List[str] = field(default_factory=list)
+    year: Optional[int] = None
+    journal: Optional[str] = None
+    abstract: Optional[str] = None
+    citation_count: Optional[int] = None
+    is_open_access: bool = False
+    oa_url: Optional[str] = None
+    source: str = "unknown"  # "crossref", "openalex", or "merged"
+    # Extra fields from OpenAlex
+    openalex_id: Optional[str] = None
+    concepts: List[Dict] = field(default_factory=list)
+    topics: List[Dict] = field(default_factory=list)
+    # Extra fields from CrossRef
+    issn: Optional[str] = None
+    references: List[str] = field(default_factory=list)
+    impact_factor: Optional[float] = None
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary."""
+        return asdict(self)
+@dataclass
+class UnifiedSearchResult:
+    """Container for unified search results."""
+    works: List[UnifiedWork]
+    total_crossref: int = 0
+    total_openalex: int = 0
+    duplicates_removed: int = 0
+    query: str = ""
+    def __len__(self) -> int:
+        return len(self.works)
+    def __iter__(self):
+        return iter(self.works)
+    def __getitem__(self, idx):
+        return self.works[idx]
+def _crossref_work_to_unified(work) -> UnifiedWork:
+    """Convert CrossRef work to unified format."""
+    return UnifiedWork(
+        doi=work.doi,
+        title=work.title,
+        authors=work.authors if work.authors else [],
+        year=work.year,
+        journal=work.journal,
+        abstract=work.abstract,
+        citation_count=work.citation_count,
+        source="crossref",
+        issn=work.issn if hasattr(work, "issn") else None,
+        references=work.references if hasattr(work, "references") else [],
+        impact_factor=work.impact_factor if hasattr(work, "impact_factor") else None,
+    )
+def _openalex_work_to_unified(work) -> UnifiedWork:
+    """Convert OpenAlex work to unified format."""
+    return UnifiedWork(
+        doi=work.doi,
+        title=work.title,
+        authors=work.authors if work.authors else [],
+        year=work.year,
+        journal=work.source if hasattr(work, "source") else None,
+        abstract=work.abstract,
+        citation_count=work.cited_by_count if hasattr(work, "cited_by_count") else None,
+        is_open_access=work.is_oa if hasattr(work, "is_oa") else False,
+        oa_url=work.oa_url if hasattr(work, "oa_url") else None,
+        source="openalex",
+        openalex_id=work.openalex_id if hasattr(work, "openalex_id") else None,
+        concepts=work.concepts if hasattr(work, "concepts") else [],
+        topics=work.topics if hasattr(work, "topics") else [],
+    )
+def _merge_works(cr_work: UnifiedWork, oa_work: UnifiedWork) -> UnifiedWork:
+    """Merge CrossRef and OpenAlex works, preferring more complete data."""
+    return UnifiedWork(
+        doi=cr_work.doi or oa_work.doi,
+        title=cr_work.title or oa_work.title,
+        authors=cr_work.authors or oa_work.authors,
+        year=cr_work.year or oa_work.year,
+        journal=cr_work.journal or oa_work.journal,
+        # Prefer OpenAlex abstract (more complete)
+        abstract=oa_work.abstract or cr_work.abstract,
+        # Prefer higher citation count
+        citation_count=max(
+            cr_work.citation_count or 0,
+            oa_work.citation_count or 0,
+        )
+        or None,
+        is_open_access=oa_work.is_open_access,
+        oa_url=oa_work.oa_url,
+        source="merged",
+        # OpenAlex fields
+        openalex_id=oa_work.openalex_id,
+        concepts=oa_work.concepts,
+        topics=oa_work.topics,
+        # CrossRef fields
+        issn=cr_work.issn,
+        references=cr_work.references,
+        impact_factor=cr_work.impact_factor,
+    )
+def _deduplicate_and_merge(
+    cr_works: List[UnifiedWork], oa_works: List[UnifiedWork]
+) -> Tuple[List[UnifiedWork], int]:
+    """Deduplicate and merge works from both sources."""
+    # Index by DOI for fast lookup
+    doi_to_cr: Dict[str, UnifiedWork] = {}
+    for w in cr_works:
+        if w.doi:
+            doi_to_cr[w.doi.lower()] = w
+    doi_to_oa: Dict[str, UnifiedWork] = {}
+    for w in oa_works:
+        if w.doi:
+            doi_to_oa[w.doi.lower()] = w
+    results: List[UnifiedWork] = []
+    seen_dois: set = set()
+    duplicates = 0
+    # Process CrossRef works (merge with OpenAlex if exists)
+    for w in cr_works:
+        if w.doi:
+            doi_lower = w.doi.lower()
+            if doi_lower in seen_dois:
+                duplicates += 1
+                continue
+            seen_dois.add(doi_lower)
+            if doi_lower in doi_to_oa:
+                # Merge with OpenAlex data
+                merged = _merge_works(w, doi_to_oa[doi_lower])
+                results.append(merged)
+                duplicates += 1  # Count the OpenAlex duplicate
+            else:
+                results.append(w)
+        else:
+            results.append(w)
+    # Add OpenAlex works not in CrossRef
+    for w in oa_works:
+        if w.doi:
+            if w.doi.lower() not in seen_dois:
+                results.append(w)
+                seen_dois.add(w.doi.lower())
+        else:
+            results.append(w)
+    return results, duplicates
+def search(
+    query: str,
+    limit: int = 50,
+    sources: Optional[List[Literal["crossref", "openalex"]]] = None,
+    merge_duplicates: bool = True,
+    **kwargs,
+) -> UnifiedSearchResult:
+    """
+    Search both CrossRef and OpenAlex databases.
+    Args:
+        query: Search query string
+        limit: Maximum results per source (total may be up to 2x if no dedup)
+        sources: Which sources to search. Default: both available sources
+        merge_duplicates: Whether to merge works found in both databases
+        **kwargs: Additional arguments passed to search functions
+    Returns
+    -------
+        UnifiedSearchResult with merged works
+    """
+    if sources is None:
+        sources = []
+        if _crossref_available:
+            sources.append("crossref")
+        if _openalex_available:
+            sources.append("openalex")
+    if not sources:
+        raise RuntimeError(
+            "No search sources available. Install crossref-local or openalex-local"
+        )
+    cr_works: List[UnifiedWork] = []
+    oa_works: List[UnifiedWork] = []
+    # Search in parallel using thread pool
+    def search_crossref():
+        if "crossref" in sources and _crossref_available:
+            results = cr_search(query, limit=limit, **kwargs)
+            return [_crossref_work_to_unified(w) for w in results]
+        return []
+    def search_openalex():
+        if "openalex" in sources and _openalex_available:
+            results = oa_search(query, limit=limit, **kwargs)
+            return [_openalex_work_to_unified(w) for w in results]
+        return []
+    with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
+        cr_future = executor.submit(search_crossref)
+        oa_future = executor.submit(search_openalex)
+        try:
+            cr_works = cr_future.result(timeout=30)
+        except Exception:
+            cr_works = []
+        try:
+            oa_works = oa_future.result(timeout=30)
+        except Exception:
+            oa_works = []
+    # Deduplicate and merge
+    duplicates = 0
+    if merge_duplicates:
+        works, duplicates = _deduplicate_and_merge(cr_works, oa_works)
+    else:
+        works = cr_works + oa_works
+    return UnifiedSearchResult(
+        works=works,
+        total_crossref=len(cr_works),
+        total_openalex=len(oa_works),
+        duplicates_removed=duplicates,
+        query=query,
+    )
+async def search_async(
+    query: str,
+    limit: int = 50,
+    sources: Optional[List[Literal["crossref", "openalex"]]] = None,
+    merge_duplicates: bool = True,
+    **kwargs,
+) -> UnifiedSearchResult:
+    """Async version of search."""
+    loop = asyncio.get_event_loop()
+    return await loop.run_in_executor(
+        None,
+        lambda: search(
+            query,
+            limit=limit,
+            sources=sources,
+            merge_duplicates=merge_duplicates,
+            **kwargs,
+        ),
+    )
+def get(doi: str, sources: Optional[List[str]] = None) -> Optional[UnifiedWork]:
+    """
+    Get a specific work by DOI from available sources.
+    Args:
+        doi: DOI to look up
+        sources: Which sources to check. Default: all available
+    Returns
+    -------
+        UnifiedWork if found, None otherwise
+    """
+    if sources is None:
+        sources = []
+        if _crossref_available:
+            sources.append("crossref")
+        if _openalex_available:
+            sources.append("openalex")
+    cr_work = None
+    oa_work = None
+    if "crossref" in sources and _crossref_available:
+        try:
+            result = cr_get(doi)
+            if result:
+                cr_work = _crossref_work_to_unified(result)
+        except Exception:
+            pass
+    if "openalex" in sources and _openalex_available:
+        try:
+            result = oa_get(doi)
+            if result:
+                oa_work = _openalex_work_to_unified(result)
+        except Exception:
+            pass
+    if cr_work and oa_work:
+        return _merge_works(cr_work, oa_work)
+    return cr_work or oa_work
+def info() -> Dict[str, Any]:
+    """Get status info from all available sources."""
+    result = {
+        "sources": [],
+        "total_works": 0,
+    }
+    if _crossref_available:
+        try:
+            cr_info_data = cr_info()
+            result["sources"].append(
+                {
+                    "name": "crossref",
+                    "available": True,
+                    "info": cr_info_data if isinstance(cr_info_data, dict) else {},
+                }
+            )
+            if isinstance(cr_info_data, dict) and "total_works" in cr_info_data:
+                result["total_works"] += cr_info_data["total_works"]
+        except Exception as e:
+            result["sources"].append(
+                {"name": "crossref", "available": False, "error": str(e)}
+            )
+    else:
+        result["sources"].append(
+            {"name": "crossref", "available": False, "error": "Not installed"}
+        )
+    if _openalex_available:
+        try:
+            oa_info_data = oa_info()
+            result["sources"].append(
+                {
+                    "name": "openalex",
+                    "available": True,
+                    "info": oa_info_data if isinstance(oa_info_data, dict) else {},
+                }
+            )
+            if isinstance(oa_info_data, dict) and "total_works" in oa_info_data:
+                result["total_works"] += oa_info_data["total_works"]
+        except Exception as e:
+            result["sources"].append(
+                {"name": "openalex", "available": False, "error": str(e)}
+            )
+    else:
+        result["sources"].append(
+            {"name": "openalex", "available": False, "error": "Not installed"}
+        )
+    return result
+# ============================================================================
+# Export Formats
+# ============================================================================
+def to_json(
+    works: Union[List[UnifiedWork], UnifiedSearchResult], indent: int = 2
+) -> str:
+    """Export works to JSON format."""
+    if isinstance(works, UnifiedSearchResult):
+        works = works.works
+    return json.dumps([w.to_dict() for w in works], indent=indent, ensure_ascii=False)
+def to_bibtex(works: Union[List[UnifiedWork], UnifiedSearchResult]) -> str:
+    """Export works to BibTeX format."""
+    if isinstance(works, UnifiedSearchResult):
+        works = works.works
+    entries = []
+    for i, w in enumerate(works):
+        # Generate citation key
+        first_author = ""
+        if w.authors:
+            first_author = (
+                w.authors[0].split(",")[0].split()[-1].lower().replace(" ", "")
+            )
+        year = w.year or "nodate"
+        key = f"{first_author}{year}_{i + 1}" if first_author else f"paper_{i + 1}"
+        lines = [f"@article{{{key},"]
+        if w.title:
+            lines.append(f"  title = {{{w.title}}},")
+        if w.authors:
+            lines.append(f"  author = {{{' and '.join(w.authors)}}},")
+        if w.year:
+            lines.append(f"  year = {{{w.year}}},")
+        if w.journal:
+            lines.append(f"  journal = {{{w.journal}}},")
+        if w.doi:
+            lines.append(f"  doi = {{{w.doi}}},")
+        if w.abstract:
+            # Escape special LaTeX chars
+            abstract = (
+                w.abstract.replace("&", r"\&").replace("%", r"\%").replace("$", r"\$")
+            )
+            lines.append(f"  abstract = {{{abstract}}},")
+        lines.append("}")
+        entries.append("\n".join(lines))
+    return "\n\n".join(entries)
+def to_text(works: Union[List[UnifiedWork], UnifiedSearchResult]) -> str:
+    """Export works to plain text format (one per line)."""
+    if isinstance(works, UnifiedSearchResult):
+        works = works.works
+    lines = []
+    for i, w in enumerate(works, 1):
+        authors = ", ".join(w.authors[:3]) if w.authors else "Unknown"
+        if len(w.authors) > 3:
+            authors += " et al."
+        title = w.title or "No title"
+        year = f"({w.year})" if w.year else ""
+        journal = w.journal or ""
+        doi = f"DOI: {w.doi}" if w.doi else ""
+        line = f"{i}. {authors} {year}. {title}. {journal} {doi}".strip()
+        lines.append(line)
+    return "\n".join(lines)
+# EOF

scitex/scholar/metadata_engines/ScholarEngine.py CHANGED Viewed

@@ -31,6 +31,7 @@ from .individual import ArXivEngine
 from .individual import CrossRefEngine
 from .individual import CrossRefLocalEngine
 from .individual import OpenAlexEngine
+from .individual import OpenAlexLocalEngine
 from .individual import PubMedEngine
 from .individual import SemanticScholarEngine
 from .individual import URLDOIEngine
@@ -232,6 +233,7 @@ class ScholarEngine:
                 "CrossRef": CrossRefEngine,
                 "CrossRefLocal": CrossRefLocalEngine,
                 "OpenAlex": OpenAlexEngine,
+                "OpenAlexLocal": OpenAlexLocalEngine,
                 "PubMed": PubMedEngine,
                 "Semantic_Scholar": SemanticScholarEngine,
                 "arXiv": ArXivEngine,
@@ -247,6 +249,14 @@ class ScholarEngine:
                     self._engine_instances[name] = engine_classes[name](
                         "research@example.com", api_url=api_url
                     )
+                elif name == "OpenAlexLocal":
+                    # Get API URL from config (supports SCITEX_SCHOLAR_OPENALEX_API_URL env var)
+                    api_url = self.config.resolve(
+                        "openalex_api_url", "http://127.0.0.1:31292"
+                    )
+                    self._engine_instances[name] = engine_classes[name](
+                        "research@example.com", api_url=api_url
+                    )
                 else:
                     self._engine_instances[name] = engine_classes[name](
                         "research@example.com"
@@ -476,6 +486,7 @@ class ScholarEngine:
         engine_priority = {
             "URL": 6,
             "CrossRefLocal": 5,
+            "OpenAlexLocal": 5,
             "CrossRef": 4,
             "OpenAlex": 3,
             "Semantic_Scholar": 2,

scitex 2.17.3__py3-none-any.whl → 2.17.4__py3-none-any.whl

scitex 2.17.3py3-none-any.whl → 2.17.4py3-none-any.whl