PyPI - crossref-local - Versions diffs - 0.3.1__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

crossref-local 0.3.1py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

crossref_local/__init__.py +38 -16
crossref_local/__main__.py +0 -0
crossref_local/_aio/__init__.py +30 -0
crossref_local/_aio/_impl.py +238 -0
crossref_local/_cache/__init__.py +15 -0
crossref_local/_cache/export.py +100 -0
crossref_local/_cache/utils.py +93 -0
crossref_local/_cache/viz.py +296 -0
crossref_local/_cli/__init__.py +9 -0
crossref_local/_cli/cache.py +179 -0
crossref_local/_cli/cli.py +512 -0
crossref_local/_cli/completion.py +245 -0
crossref_local/_cli/main.py +20 -0
crossref_local/_cli/mcp.py +351 -0
crossref_local/_cli/mcp_server.py +413 -0
crossref_local/_core/__init__.py +58 -0
crossref_local/{api.py → _core/api.py} +130 -36
crossref_local/{citations.py → _core/citations.py} +55 -26
crossref_local/{config.py → _core/config.py} +57 -42
crossref_local/{db.py → _core/db.py} +32 -26
crossref_local/{fts.py → _core/fts.py} +18 -14
crossref_local/{models.py → _core/models.py} +11 -6
crossref_local/{impact_factor → _impact_factor}/__init__.py +0 -0
crossref_local/{impact_factor → _impact_factor}/calculator.py +0 -0
crossref_local/{impact_factor → _impact_factor}/journal_lookup.py +0 -0
crossref_local/_remote/__init__.py +56 -0
crossref_local/_remote/base.py +356 -0
crossref_local/_remote/collections.py +175 -0
crossref_local/_server/__init__.py +140 -0
crossref_local/_server/middleware.py +25 -0
crossref_local/_server/models.py +129 -0
crossref_local/_server/routes_citations.py +98 -0
crossref_local/_server/routes_collections.py +282 -0
crossref_local/_server/routes_compat.py +102 -0
crossref_local/_server/routes_works.py +128 -0
crossref_local/_server/server.py +19 -0
crossref_local/aio.py +30 -206
crossref_local/cache.py +466 -0
crossref_local/cli.py +5 -447
crossref_local/jobs.py +169 -0
crossref_local/mcp_server.py +5 -199
crossref_local/remote.py +5 -261
crossref_local/server.py +5 -349
{crossref_local-0.3.1.dist-info → crossref_local-0.5.0.dist-info}/METADATA +88 -24
crossref_local-0.5.0.dist-info/RECORD +47 -0
crossref_local-0.3.1.dist-info/RECORD +0 -20
{crossref_local-0.3.1.dist-info → crossref_local-0.5.0.dist-info}/WHEEL +0 -0
{crossref_local-0.3.1.dist-info → crossref_local-0.5.0.dist-info}/entry_points.txt +0 -0

crossref_local/{db.py → _core/db.py} RENAMED Viewed

@@ -1,13 +1,20 @@
 """Database connection handling for crossref_local."""
-import sqlite3
-import json
-import zlib
-from contextlib import contextmanager
-from pathlib import Path
-from typing import Optional, Generator
+import json as _json
+import sqlite3 as _sqlite3
+import zlib as _zlib
+from contextlib import contextmanager as _contextmanager
+from pathlib import Path as _Path
+from typing import Generator, Optional
-from .config import Config
+from .config import Config as _Config
+__all__ = [
+    "Database",
+    "get_db",
+    "close_db",
+    "connection",
+]
 class Database:
@@ -17,7 +24,7 @@ class Database:
     Supports both direct usage and context manager pattern.
     """
-    def __init__(self, db_path: Optional[str | Path] = None):
+    def __init__(self, db_path: Optional[str | _Path] = None):
         """
         Initialize database connection.
@@ -25,19 +32,19 @@ class Database:
             db_path: Path to database. If None, auto-detects.
         """
         if db_path:
-            self.db_path = Path(db_path)
+            self.db_path = _Path(db_path)
         else:
-            self.db_path = Config.get_db_path()
+            self.db_path = _Config.get_db_path()
-        self.conn: Optional[sqlite3.Connection] = None
+        self.conn: Optional[_sqlite3.Connection] = None
         self._connect()
     def _connect(self) -> None:
         """Establish database connection."""
         # check_same_thread=False allows connection to be used across threads
         # Safe for read-only operations (which is our use case)
-        self.conn = sqlite3.connect(self.db_path, check_same_thread=False)
-        self.conn.row_factory = sqlite3.Row
+        self.conn = _sqlite3.connect(self.db_path, check_same_thread=False)
+        self.conn.row_factory = _sqlite3.Row
     def close(self) -> None:
         """Close database connection."""
@@ -51,11 +58,11 @@ class Database:
     def __exit__(self, exc_type, exc_val, exc_tb) -> None:
         self.close()
-    def execute(self, query: str, params: tuple = ()) -> sqlite3.Cursor:
+    def execute(self, query: str, params: tuple = ()) -> _sqlite3.Cursor:
         """Execute SQL query."""
         return self.conn.execute(query, params)
-    def fetchone(self, query: str, params: tuple = ()) -> Optional[sqlite3.Row]:
+    def fetchone(self, query: str, params: tuple = ()) -> Optional[_sqlite3.Row]:
         """Execute query and fetch one result."""
         cursor = self.execute(query, params)
         return cursor.fetchone()
@@ -75,10 +82,7 @@ class Database:
         Returns:
             Metadata dictionary or None
         """
-        row = self.fetchone(
-            "SELECT metadata FROM works WHERE doi = ?",
-            (doi,)
-        )
+        row = self.fetchone("SELECT metadata FROM works WHERE doi = ?", (doi,))
         if row and row["metadata"]:
             return self._decompress_metadata(row["metadata"])
         return None
@@ -87,15 +91,15 @@ class Database:
         """Decompress and parse metadata (handles both compressed and plain JSON)."""
         # If it's already a string, parse directly
         if isinstance(data, str):
-            return json.loads(data)
+            return _json.loads(data)
         # If bytes, try decompression
         if isinstance(data, bytes):
             try:
-                decompressed = zlib.decompress(data)
-                return json.loads(decompressed)
-            except zlib.error:
-                return json.loads(data.decode("utf-8"))
+                decompressed = _zlib.decompress(data)
+                return _json.loads(decompressed)
+            except _zlib.error:
+                return _json.loads(data.decode("utf-8"))
         return data
@@ -120,8 +124,10 @@ def close_db() -> None:
         _db = None
-@contextmanager
-def connection(db_path: Optional[str | Path] = None) -> Generator[Database, None, None]:
+@_contextmanager
+def connection(
+    db_path: Optional[str | _Path] = None,
+) -> Generator[Database, None, None]:
     """
     Context manager for database connection.

crossref_local/{fts.py → _core/fts.py} RENAMED Viewed

@@ -1,11 +1,17 @@
 """Full-text search using FTS5."""
-import re
-import time
+import re as _re
+import time as _time
 from typing import List, Optional
 from .db import Database, get_db
-from .models import Work, SearchResult
+from .models import SearchResult, Work
+__all__ = [
+    "search",
+    "count",
+    "search_dois",
+]
 def _sanitize_query(query: str) -> str:
@@ -24,13 +30,13 @@ def _sanitize_query(query: str) -> str:
     # Check for problematic patterns (hyphenated words, special chars)
     # But allow explicit FTS5 operators: AND, OR, NOT, NEAR
-    has_hyphenated_word = re.search(r'\w+-\w+', query)
-    has_special = re.search(r'[/\\@#$%^&]', query)
+    has_hyphenated_word = _re.search(r"\w+-\w+", query)
+    has_special = _re.search(r"[/\\@#$%^&]", query)
     if has_hyphenated_word or has_special:
         # Quote each word to treat as literal
         words = query.split()
-        quoted = ' '.join(f'"{w}"' for w in words)
+        quoted = " ".join(f'"{w}"' for w in words)
         return quoted
     return query
@@ -65,15 +71,14 @@ def search(
     if db is None:
         db = get_db()
-    start = time.perf_counter()
+    start = _time.perf_counter()
     # Sanitize query for FTS5
     safe_query = _sanitize_query(query)
     # Get total count
     count_row = db.fetchone(
-        "SELECT COUNT(*) as total FROM works_fts WHERE works_fts MATCH ?",
-        (safe_query,)
+        "SELECT COUNT(*) as total FROM works_fts WHERE works_fts MATCH ?", (safe_query,)
     )
     total = count_row["total"] if count_row else 0
@@ -86,10 +91,10 @@ def search(
         WHERE works_fts MATCH ?
         LIMIT ? OFFSET ?
         """,
-        (safe_query, limit, offset)
+        (safe_query, limit, offset),
     )
-    elapsed_ms = (time.perf_counter() - start) * 1000
+    elapsed_ms = (_time.perf_counter() - start) * 1000
     # Convert to Work objects
     works = []
@@ -121,8 +126,7 @@ def count(query: str, db: Optional[Database] = None) -> int:
     safe_query = _sanitize_query(query)
     row = db.fetchone(
-        "SELECT COUNT(*) as total FROM works_fts WHERE works_fts MATCH ?",
-        (safe_query,)
+        "SELECT COUNT(*) as total FROM works_fts WHERE works_fts MATCH ?", (safe_query,)
     )
     return row["total"] if row else 0
@@ -155,7 +159,7 @@ def search_dois(
         WHERE works_fts MATCH ?
         LIMIT ?
         """,
-        (safe_query, limit)
+        (safe_query, limit),
     )
     return [row["doi"] for row in rows]

crossref_local/{models.py → _core/models.py} RENAMED Viewed

@@ -1,11 +1,16 @@
 """Data models for crossref_local."""
-from dataclasses import dataclass, field
+from dataclasses import dataclass as _dataclass
+from dataclasses import field as _field
 from typing import List, Optional
-import json
+__all__ = [
+    "Work",
+    "SearchResult",
+]
-@dataclass
+@_dataclass
 class Work:
     """
     Represents a scholarly work from CrossRef.
@@ -30,7 +35,7 @@ class Work:
     doi: str
     title: Optional[str] = None
-    authors: List[str] = field(default_factory=list)
+    authors: List[str] = _field(default_factory=list)
     year: Optional[int] = None
     journal: Optional[str] = None
     issn: Optional[str] = None
@@ -42,7 +47,7 @@ class Work:
     abstract: Optional[str] = None
     url: Optional[str] = None
     citation_count: Optional[int] = None
-    references: List[str] = field(default_factory=list)
+    references: List[str] = _field(default_factory=list)
     @classmethod
     def from_metadata(cls, doi: str, metadata: dict) -> "Work":
@@ -159,7 +164,7 @@ class Work:
         return ". ".join(filter(None, parts))
-@dataclass
+@_dataclass
 class SearchResult:
     """
     Container for search results with metadata.

crossref_local/{impact_factor → _impact_factor}/__init__.py RENAMED Viewed

File without changes

crossref_local/{impact_factor → _impact_factor}/calculator.py RENAMED Viewed

File without changes

crossref_local/{impact_factor → _impact_factor}/journal_lookup.py RENAMED Viewed

File without changes

crossref_local/_remote/__init__.py ADDED Viewed

@@ -0,0 +1,56 @@
+"""Remote API client package with collection support.
+Provides RemoteClient for connecting to CrossRef Local API server.
+"""
+from typing import Optional
+from .base import (
+    RemoteClient as _BaseClient,
+    DEFAULT_API_URL,
+)
+from .collections import CollectionsMixin
+class RemoteClient(CollectionsMixin, _BaseClient):
+    """Remote client with collection support.
+    Extends base RemoteClient with collection management methods.
+    Example:
+        >>> client = RemoteClient("http://localhost:31291")
+        >>> # Create a collection
+        >>> client.create_collection("epilepsy", query="epilepsy seizure")
+        >>> # Query collection
+        >>> papers = client.get_collection("epilepsy", fields=["doi", "title"])
+        >>> # Download as file
+        >>> client.download_collection("epilepsy", "papers.bib", format="bibtex")
+    """
+    pass
+# Module-level client singleton
+_client: Optional[RemoteClient] = None
+def get_client(base_url: str = DEFAULT_API_URL) -> RemoteClient:
+    """Get or create singleton remote client with collection support."""
+    global _client
+    if _client is None or _client.base_url != base_url:
+        _client = RemoteClient(base_url)
+    return _client
+def reset_client() -> None:
+    """Reset singleton client."""
+    global _client
+    _client = None
+__all__ = [
+    "RemoteClient",
+    "DEFAULT_API_URL",
+    "get_client",
+    "reset_client",
+]

crossref_local/_remote/base.py ADDED Viewed

@@ -0,0 +1,356 @@
+"""Remote API client for crossref_local.
+Connects to a CrossRef Local API server instead of direct database access.
+Use this when the database is on a remote server accessible via HTTP.
+"""
+import json
+import urllib.request
+import urllib.parse
+import urllib.error
+from typing import List, Optional, Dict, Any
+from .._core.models import Work, SearchResult
+from .._core.config import DEFAULT_PORT
+# Default URL uses SCITEX port convention
+DEFAULT_API_URL = f"http://localhost:{DEFAULT_PORT}"
+class RemoteClient:
+    """
+    HTTP client for CrossRef Local API server.
+    Provides the same interface as the local API but connects
+    to a remote server via HTTP.
+    Example:
+        >>> client = RemoteClient("http://localhost:31291")
+        >>> results = client.search(title="machine learning", limit=10)
+        >>> work = client.get("10.1038/nature12373")
+    """
+    def __init__(self, base_url: str = DEFAULT_API_URL, timeout: int = 30):
+        """
+        Initialize remote client.
+        Args:
+            base_url: API server URL (default: http://localhost:3333)
+            timeout: Request timeout in seconds
+        """
+        self.base_url = base_url.rstrip("/")
+        self.timeout = timeout
+    def _request(
+        self,
+        endpoint: str,
+        params: Optional[Dict[str, Any]] = None,
+        method: str = "GET",
+        data: Optional[Dict[str, Any]] = None,
+    ) -> Dict:
+        """Make HTTP request to API."""
+        url = f"{self.base_url}{endpoint}"
+        if params:
+            # Filter out None values
+            params = {k: v for k, v in params.items() if v is not None}
+            if params:
+                url = f"{url}?{urllib.parse.urlencode(params)}"
+        try:
+            req_data = None
+            if data is not None:
+                req_data = json.dumps(data).encode("utf-8")
+            req = urllib.request.Request(url, data=req_data, method=method)
+            req.add_header("Accept", "application/json")
+            if req_data:
+                req.add_header("Content-Type", "application/json")
+            with urllib.request.urlopen(req, timeout=self.timeout) as response:
+                return json.loads(response.read().decode("utf-8"))
+        except urllib.error.HTTPError as e:
+            if e.code == 404:
+                return None
+            raise ConnectionError(f"API request failed: {e.code} {e.reason}") from e
+        except urllib.error.URLError as e:
+            raise ConnectionError(
+                f"Cannot connect to API at {self.base_url}: {e.reason}"
+            ) from e
+    def health(self) -> Dict:
+        """Check API server health."""
+        return self._request("/health")
+    def info(self) -> Dict:
+        """Get database/API information."""
+        root = self._request("/")
+        info_data = self._request("/info")
+        return {
+            "api_url": self.base_url,
+            "api_version": root.get("version", "unknown"),
+            "status": root.get("status", "unknown"),
+            "mode": "remote",
+            "works": info_data.get("total_papers", 0) if info_data else 0,
+            "fts_indexed": info_data.get("fts_indexed", 0) if info_data else 0,
+            "citations": info_data.get("citations", 0) if info_data else 0,
+        }
+    def search(
+        self,
+        query: Optional[str] = None,
+        doi: Optional[str] = None,
+        title: Optional[str] = None,
+        authors: Optional[str] = None,
+        year: Optional[int] = None,
+        limit: int = 10,
+        offset: int = 0,
+    ) -> SearchResult:
+        """
+        Search for papers.
+        Args:
+            query: Full-text search query (searches title by default)
+            doi: Search by DOI
+            title: Search by title (explicit)
+            authors: Search by author name
+            year: Filter by publication year
+            limit: Maximum results (default: 10, max: 100)
+            offset: Skip first N results for pagination
+        Returns:
+            SearchResult with matching works
+        """
+        # Use new /works endpoint with FTS5 search
+        search_query = query or title
+        params = {
+            "q": search_query,
+            "limit": min(limit, 100),
+            "offset": offset,
+        }
+        data = self._request("/works", params)
+        if not data:
+            return SearchResult(works=[], total=0, query=query or "", elapsed_ms=0.0)
+        works = []
+        for item in data.get("results", []):
+            work = Work(
+                doi=item.get("doi", ""),
+                title=item.get("title", ""),
+                authors=item.get("authors", []),
+                year=item.get("year"),
+                journal=item.get("journal"),
+                volume=item.get("volume"),
+                issue=item.get("issue"),
+                page=item.get("page") or item.get("pages"),
+                abstract=item.get("abstract"),
+                citation_count=item.get("citation_count"),
+            )
+            works.append(work)
+        return SearchResult(
+            works=works,
+            total=data.get("total", len(works)),
+            query=query or title or doi or "",
+            elapsed_ms=data.get("elapsed_ms", 0.0),
+        )
+    def get(self, doi: str) -> Optional[Work]:
+        """
+        Get a work by DOI.
+        Args:
+            doi: Digital Object Identifier
+        Returns:
+            Work object or None if not found
+        """
+        # Use /works/{doi} endpoint directly
+        data = self._request(f"/works/{doi}")
+        if not data or "error" in data:
+            return None
+        return Work(
+            doi=data.get("doi", doi),
+            title=data.get("title", ""),
+            authors=data.get("authors", []),
+            year=data.get("year"),
+            journal=data.get("journal"),
+            volume=data.get("volume"),
+            issue=data.get("issue"),
+            page=data.get("page"),
+            abstract=data.get("abstract"),
+            citation_count=data.get("citation_count"),
+        )
+    def get_many(self, dois: List[str]) -> List[Work]:
+        """
+        Get multiple works by DOI using batch endpoint.
+        Args:
+            dois: List of DOIs
+        Returns:
+            List of Work objects
+        """
+        # Use batch endpoint if available
+        try:
+            data = {"dois": dois}
+            req_data = json.dumps(data).encode("utf-8")
+            req = urllib.request.Request(
+                f"{self.base_url}/works/batch", data=req_data, method="POST"
+            )
+            req.add_header("Content-Type", "application/json")
+            req.add_header("Accept", "application/json")
+            with urllib.request.urlopen(req, timeout=self.timeout) as response:
+                result = json.loads(response.read().decode("utf-8"))
+            works = []
+            for item in result.get("results", []):
+                work = Work(
+                    doi=item.get("doi", ""),
+                    title=item.get("title", ""),
+                    authors=item.get("authors", []),
+                    year=item.get("year"),
+                    journal=item.get("journal"),
+                    volume=item.get("volume"),
+                    issue=item.get("issue"),
+                    page=item.get("page"),
+                    abstract=item.get("abstract"),
+                    citation_count=item.get("citation_count"),
+                )
+                works.append(work)
+            return works
+        except Exception:
+            # Fallback to individual lookups
+            works = []
+            for doi in dois:
+                work = self.get(doi)
+                if work:
+                    works.append(work)
+            return works
+    def exists(self, doi: str) -> bool:
+        """Check if a DOI exists."""
+        return self.get(doi) is not None
+    def get_citations(self, doi: str, direction: str = "both") -> Dict:
+        """
+        Get citations for a paper (legacy endpoint).
+        Args:
+            doi: Paper DOI
+            direction: 'citing', 'cited_by', or 'both'
+        Returns:
+            Dict with citation information
+        """
+        params = {"doi": doi, "direction": direction}
+        return self._request("/api/citations/", params) or {}
+    def get_citing(self, doi: str, limit: int = 100) -> List[str]:
+        """
+        Get DOIs of papers that cite the given DOI.
+        Args:
+            doi: The DOI to find citations for
+            limit: Maximum number of citing papers to return
+        Returns:
+            List of DOIs that cite this paper
+        """
+        data = self._request(f"/citations/{doi}/citing", {"limit": limit})
+        if not data:
+            return []
+        return data.get("papers", [])
+    def get_cited(self, doi: str, limit: int = 100) -> List[str]:
+        """
+        Get DOIs of papers that the given DOI cites (references).
+        Args:
+            doi: The DOI to find references for
+            limit: Maximum number of referenced papers to return
+        Returns:
+            List of DOIs that this paper cites
+        """
+        data = self._request(f"/citations/{doi}/cited", {"limit": limit})
+        if not data:
+            return []
+        return data.get("papers", [])
+    def get_citation_count(self, doi: str) -> int:
+        """
+        Get the number of citations for a DOI.
+        Args:
+            doi: The DOI to count citations for
+        Returns:
+            Number of papers citing this DOI
+        """
+        data = self._request(f"/citations/{doi}/count")
+        if not data:
+            return 0
+        return data.get("citation_count", 0)
+    def get_citation_network(
+        self, doi: str, depth: int = 1, max_citing: int = 25, max_cited: int = 25
+    ) -> Dict:
+        """
+        Get citation network graph for a DOI.
+        Args:
+            doi: The DOI to build the network around
+            depth: How many levels of citations to include (1-3)
+            max_citing: Max papers citing each node to include
+            max_cited: Max papers each node cites to include
+        Returns:
+            Dict with nodes, edges, and stats
+        """
+        params = {
+            "depth": depth,
+            "max_citing": max_citing,
+            "max_cited": max_cited,
+        }
+        data = self._request(f"/citations/{doi}/network", params)
+        return data or {}
+    def get_journal(
+        self, issn: Optional[str] = None, name: Optional[str] = None
+    ) -> Dict:
+        """
+        Get journal information.
+        Args:
+            issn: Journal ISSN
+            name: Journal name
+        Returns:
+            Dict with journal information
+        """
+        params = {"issn": issn, "name": name}
+        return self._request("/api/journal/", params) or {}
+# Module-level client for convenience
+_client: Optional[RemoteClient] = None
+def get_client(base_url: str = DEFAULT_API_URL) -> RemoteClient:
+    """Get or create singleton remote client."""
+    global _client
+    if _client is None or _client.base_url != base_url:
+        _client = RemoteClient(base_url)
+    return _client
+def reset_client() -> None:
+    """Reset singleton client."""
+    global _client
+    _client = None

crossref-local 0.3.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

crossref-local 0.3.1py3-none-any.whl → 0.5.0py3-none-any.whl