PyPI - crossref-local - Versions diffs - 0.3.1__py3-none-any.whl - Mend

crossref-local 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

crossref_local/__init__.py +128 -0
crossref_local/__main__.py +6 -0
crossref_local/aio.py +236 -0
crossref_local/api.py +221 -0
crossref_local/citations.py +413 -0
crossref_local/cli.py +450 -0
crossref_local/config.py +171 -0
crossref_local/db.py +138 -0
crossref_local/fts.py +172 -0
crossref_local/impact_factor/__init__.py +20 -0
crossref_local/impact_factor/calculator.py +479 -0
crossref_local/impact_factor/journal_lookup.py +274 -0
crossref_local/mcp_server.py +202 -0
crossref_local/models.py +186 -0
crossref_local/remote.py +264 -0
crossref_local/server.py +352 -0
crossref_local-0.3.1.dist-info/METADATA +306 -0
crossref_local-0.3.1.dist-info/RECORD +20 -0
crossref_local-0.3.1.dist-info/WHEEL +4 -0
crossref_local-0.3.1.dist-info/entry_points.txt +3 -0

crossref_local/remote.py ADDED Viewed

@@ -0,0 +1,264 @@
+"""Remote API client for crossref_local.
+Connects to a CrossRef Local API server instead of direct database access.
+Use this when the database is on a remote server accessible via HTTP.
+"""
+import json
+import urllib.request
+import urllib.parse
+import urllib.error
+from typing import List, Optional, Dict, Any
+from .models import Work, SearchResult
+class RemoteClient:
+    """
+    HTTP client for CrossRef Local API server.
+    Provides the same interface as the local API but connects
+    to a remote server via HTTP.
+    Example:
+        >>> client = RemoteClient("http://localhost:3333")
+        >>> results = client.search(title="machine learning", limit=10)
+        >>> work = client.get("10.1038/nature12373")
+    """
+    def __init__(self, base_url: str = "http://localhost:3333", timeout: int = 30):
+        """
+        Initialize remote client.
+        Args:
+            base_url: API server URL (default: http://localhost:3333)
+            timeout: Request timeout in seconds
+        """
+        self.base_url = base_url.rstrip("/")
+        self.timeout = timeout
+    def _request(self, endpoint: str, params: Optional[Dict[str, Any]] = None) -> Dict:
+        """Make HTTP GET request to API."""
+        url = f"{self.base_url}{endpoint}"
+        if params:
+            # Filter out None values
+            params = {k: v for k, v in params.items() if v is not None}
+            if params:
+                url = f"{url}?{urllib.parse.urlencode(params)}"
+        try:
+            req = urllib.request.Request(url)
+            req.add_header("Accept", "application/json")
+            with urllib.request.urlopen(req, timeout=self.timeout) as response:
+                return json.loads(response.read().decode("utf-8"))
+        except urllib.error.HTTPError as e:
+            if e.code == 404:
+                return None
+            raise ConnectionError(f"API request failed: {e.code} {e.reason}") from e
+        except urllib.error.URLError as e:
+            raise ConnectionError(
+                f"Cannot connect to API at {self.base_url}: {e.reason}"
+            ) from e
+    def health(self) -> Dict:
+        """Check API server health."""
+        return self._request("/health")
+    def info(self) -> Dict:
+        """Get database/API information."""
+        root = self._request("/")
+        info_data = self._request("/info")
+        return {
+            "api_url": self.base_url,
+            "api_version": root.get("version", "unknown"),
+            "status": root.get("status", "unknown"),
+            "mode": "remote",
+            "works": info_data.get("total_papers", 0) if info_data else 0,
+            "fts_indexed": info_data.get("fts_indexed", 0) if info_data else 0,
+            "citations": info_data.get("citations", 0) if info_data else 0,
+        }
+    def search(
+        self,
+        query: Optional[str] = None,
+        doi: Optional[str] = None,
+        title: Optional[str] = None,
+        authors: Optional[str] = None,
+        year: Optional[int] = None,
+        limit: int = 10,
+        offset: int = 0,
+    ) -> SearchResult:
+        """
+        Search for papers.
+        Args:
+            query: Full-text search query (searches title by default)
+            doi: Search by DOI
+            title: Search by title (explicit)
+            authors: Search by author name
+            year: Filter by publication year
+            limit: Maximum results (default: 10, max: 100)
+            offset: Skip first N results for pagination
+        Returns:
+            SearchResult with matching works
+        """
+        # Use new /works endpoint with FTS5 search
+        search_query = query or title
+        params = {
+            "q": search_query,
+            "limit": min(limit, 100),
+            "offset": offset,
+        }
+        data = self._request("/works", params)
+        if not data:
+            return SearchResult(works=[], total=0, query=query or "", elapsed_ms=0.0)
+        works = []
+        for item in data.get("results", []):
+            work = Work(
+                doi=item.get("doi", ""),
+                title=item.get("title", ""),
+                authors=item.get("authors", []),
+                year=item.get("year"),
+                journal=item.get("journal"),
+                volume=item.get("volume"),
+                issue=item.get("issue"),
+                page=item.get("page") or item.get("pages"),
+                abstract=item.get("abstract"),
+                citation_count=item.get("citation_count"),
+            )
+            works.append(work)
+        return SearchResult(
+            works=works,
+            total=data.get("total", len(works)),
+            query=query or title or doi or "",
+            elapsed_ms=data.get("elapsed_ms", 0.0),
+        )
+    def get(self, doi: str) -> Optional[Work]:
+        """
+        Get a work by DOI.
+        Args:
+            doi: Digital Object Identifier
+        Returns:
+            Work object or None if not found
+        """
+        # Use /works/{doi} endpoint directly
+        data = self._request(f"/works/{doi}")
+        if not data or "error" in data:
+            return None
+        return Work(
+            doi=data.get("doi", doi),
+            title=data.get("title", ""),
+            authors=data.get("authors", []),
+            year=data.get("year"),
+            journal=data.get("journal"),
+            volume=data.get("volume"),
+            issue=data.get("issue"),
+            page=data.get("page"),
+            abstract=data.get("abstract"),
+            citation_count=data.get("citation_count"),
+        )
+    def get_many(self, dois: List[str]) -> List[Work]:
+        """
+        Get multiple works by DOI using batch endpoint.
+        Args:
+            dois: List of DOIs
+        Returns:
+            List of Work objects
+        """
+        # Use batch endpoint if available
+        try:
+            data = {"dois": dois}
+            req_data = json.dumps(data).encode("utf-8")
+            req = urllib.request.Request(
+                f"{self.base_url}/works/batch", data=req_data, method="POST"
+            )
+            req.add_header("Content-Type", "application/json")
+            req.add_header("Accept", "application/json")
+            with urllib.request.urlopen(req, timeout=self.timeout) as response:
+                result = json.loads(response.read().decode("utf-8"))
+            works = []
+            for item in result.get("results", []):
+                work = Work(
+                    doi=item.get("doi", ""),
+                    title=item.get("title", ""),
+                    authors=item.get("authors", []),
+                    year=item.get("year"),
+                    journal=item.get("journal"),
+                )
+                works.append(work)
+            return works
+        except Exception:
+            # Fallback to individual lookups
+            works = []
+            for doi in dois:
+                work = self.get(doi)
+                if work:
+                    works.append(work)
+            return works
+    def exists(self, doi: str) -> bool:
+        """Check if a DOI exists."""
+        return self.get(doi) is not None
+    def get_citations(self, doi: str, direction: str = "both") -> Dict:
+        """
+        Get citations for a paper.
+        Args:
+            doi: Paper DOI
+            direction: 'citing', 'cited_by', or 'both'
+        Returns:
+            Dict with citation information
+        """
+        params = {"doi": doi, "direction": direction}
+        return self._request("/api/citations/", params) or {}
+    def get_journal(
+        self, issn: Optional[str] = None, name: Optional[str] = None
+    ) -> Dict:
+        """
+        Get journal information.
+        Args:
+            issn: Journal ISSN
+            name: Journal name
+        Returns:
+            Dict with journal information
+        """
+        params = {"issn": issn, "name": name}
+        return self._request("/api/journal/", params) or {}
+# Module-level client for convenience
+_client: Optional[RemoteClient] = None
+def get_client(base_url: str = "http://localhost:3333") -> RemoteClient:
+    """Get or create singleton remote client."""
+    global _client
+    if _client is None or _client.base_url != base_url:
+        _client = RemoteClient(base_url)
+    return _client
+def reset_client() -> None:
+    """Reset singleton client."""
+    global _client
+    _client = None

crossref_local/server.py ADDED Viewed

@@ -0,0 +1,352 @@
+"""FastAPI server for CrossRef Local with FTS5 search.
+This server provides proper full-text search using FTS5 index,
+unlike the Django API which only scans a limited subset.
+Usage:
+    crossref-local api                    # Run on default port 3333
+    crossref-local api --port 8080        # Custom port
+    # Or directly:
+    uvicorn crossref_local.server:app --host 0.0.0.0 --port 3333
+"""
+import time
+from typing import Optional, List
+from fastapi import FastAPI, Query, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from . import fts
+from .db import get_db
+from .models import Work
+app = FastAPI(
+    title="CrossRef Local API",
+    description="Fast full-text search across 167M+ scholarly works",
+    version="1.1.0",
+)
+# CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+class WorkResponse(BaseModel):
+    doi: str
+    title: Optional[str] = None
+    authors: List[str] = []
+    year: Optional[int] = None
+    journal: Optional[str] = None
+    issn: Optional[str] = None
+    volume: Optional[str] = None
+    issue: Optional[str] = None
+    page: Optional[str] = None
+    abstract: Optional[str] = None
+    citation_count: Optional[int] = None
+class SearchResponse(BaseModel):
+    query: str
+    total: int
+    returned: int
+    elapsed_ms: float
+    results: List[WorkResponse]
+class InfoResponse(BaseModel):
+    name: str = "CrossRef Local API"
+    version: str = "1.1.0"
+    status: str = "running"
+    mode: str = "local"
+    total_papers: int
+    fts_indexed: int
+    citations: int
+    database_path: str
+@app.get("/")
+def root():
+    """API root with endpoint information."""
+    return {
+        "name": "CrossRef Local API",
+        "version": "1.1.0",
+        "status": "running",
+        "endpoints": {
+            "health": "/health",
+            "info": "/info",
+            "search": "/works?q=<query>",
+            "get_by_doi": "/works/{doi}",
+            "batch": "/works/batch",
+        },
+    }
+@app.get("/health")
+def health():
+    """Health check endpoint."""
+    db = get_db()
+    return {
+        "status": "healthy",
+        "database_connected": db is not None,
+        "database_path": str(db.db_path) if db else None,
+    }
+@app.get("/info", response_model=InfoResponse)
+def info():
+    """Get database statistics."""
+    db = get_db()
+    row = db.fetchone("SELECT COUNT(*) as count FROM works")
+    work_count = row["count"] if row else 0
+    try:
+        row = db.fetchone("SELECT COUNT(*) as count FROM works_fts")
+        fts_count = row["count"] if row else 0
+    except Exception:
+        fts_count = 0
+    try:
+        row = db.fetchone("SELECT COUNT(*) as count FROM citations")
+        citation_count = row["count"] if row else 0
+    except Exception:
+        citation_count = 0
+    return InfoResponse(
+        total_papers=work_count,
+        fts_indexed=fts_count,
+        citations=citation_count,
+        database_path=str(db.db_path),
+    )
+@app.get("/works", response_model=SearchResponse)
+def search_works(
+    q: str = Query(..., description="Search query (FTS5 syntax supported)"),
+    limit: int = Query(10, ge=1, le=100, description="Max results"),
+    offset: int = Query(0, ge=0, description="Skip first N results"),
+):
+    """
+    Full-text search across works.
+    Uses FTS5 index for fast searching across titles, abstracts, and authors.
+    Supports FTS5 query syntax like AND, OR, NOT, "exact phrases".
+    Examples:
+        /works?q=machine learning
+        /works?q="neural network" AND hippocampus
+        /works?q=CRISPR&limit=20
+    """
+    start = time.perf_counter()
+    try:
+        results = fts.search(q, limit=limit, offset=offset)
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=f"Search error: {e}")
+    elapsed_ms = (time.perf_counter() - start) * 1000
+    return SearchResponse(
+        query=q,
+        total=results.total,
+        returned=len(results.works),
+        elapsed_ms=round(elapsed_ms, 2),
+        results=[
+            WorkResponse(
+                doi=w.doi,
+                title=w.title,
+                authors=w.authors,
+                year=w.year,
+                journal=w.journal,
+                issn=w.issn,
+                volume=w.volume,
+                issue=w.issue,
+                page=w.page,
+                abstract=w.abstract,
+                citation_count=w.citation_count,
+            )
+            for w in results.works
+        ],
+    )
+@app.get("/works/{doi:path}", response_model=Optional[WorkResponse])
+def get_work(doi: str):
+    """
+    Get work metadata by DOI.
+    Examples:
+        /works/10.1038/nature12373
+        /works/10.1016/j.cell.2020.01.001
+    """
+    db = get_db()
+    metadata = db.get_metadata(doi)
+    if metadata is None:
+        raise HTTPException(status_code=404, detail=f"DOI not found: {doi}")
+    work = Work.from_metadata(doi, metadata)
+    return WorkResponse(
+        doi=work.doi,
+        title=work.title,
+        authors=work.authors,
+        year=work.year,
+        journal=work.journal,
+        issn=work.issn,
+        volume=work.volume,
+        issue=work.issue,
+        page=work.page,
+        abstract=work.abstract,
+        citation_count=work.citation_count,
+    )
+class BatchRequest(BaseModel):
+    dois: List[str]
+class BatchResponse(BaseModel):
+    requested: int
+    found: int
+    results: List[WorkResponse]
+@app.post("/works/batch", response_model=BatchResponse)
+def get_works_batch(request: BatchRequest):
+    """
+    Get multiple works by DOI.
+    Request body: {"dois": ["10.1038/...", "10.1016/..."]}
+    """
+    db = get_db()
+    results = []
+    for doi in request.dois:
+        metadata = db.get_metadata(doi)
+        if metadata:
+            work = Work.from_metadata(doi, metadata)
+            results.append(
+                WorkResponse(
+                    doi=work.doi,
+                    title=work.title,
+                    authors=work.authors,
+                    year=work.year,
+                    journal=work.journal,
+                    abstract=work.abstract,
+                    citation_count=work.citation_count,
+                )
+            )
+    return BatchResponse(
+        requested=len(request.dois),
+        found=len(results),
+        results=results,
+    )
+# For backwards compatibility with existing API endpoints
+@app.get("/api/search/")
+def api_search_compat(
+    title: Optional[str] = None,
+    q: Optional[str] = None,
+    doi: Optional[str] = None,
+    limit: int = 10,
+):
+    """Backwards-compatible search endpoint."""
+    query = title or q
+    if doi:
+        # DOI lookup
+        try:
+            work = get_work(doi)
+            return {
+                "query": {"doi": doi},
+                "results": [work.model_dump()],
+                "total": 1,
+                "returned": 1,
+            }
+        except HTTPException:
+            return {"query": {"doi": doi}, "results": [], "total": 0, "returned": 0}
+    if not query:
+        raise HTTPException(
+            status_code=400, detail="Specify q, title, or doi parameter"
+        )
+    # Call fts.search directly (not the endpoint function)
+    results = fts.search(query, limit=limit, offset=0)
+    return {
+        "query": {
+            "title": query,
+            "doi": None,
+            "year": None,
+            "authors": None,
+            "limit": limit,
+        },
+        "results": [
+            WorkResponse(
+                doi=w.doi,
+                title=w.title,
+                authors=w.authors,
+                year=w.year,
+                journal=w.journal,
+                issn=w.issn,
+                volume=w.volume,
+                issue=w.issue,
+                page=w.page,
+                abstract=w.abstract,
+                citation_count=w.citation_count,
+            ).model_dump()
+            for w in results.works
+        ],
+        "total": results.total,
+        "returned": len(results.works),
+    }
+@app.get("/api/stats/")
+def api_stats_compat():
+    """Backwards-compatible stats endpoint."""
+    db = get_db()
+    row = db.fetchone("SELECT COUNT(*) as count FROM works")
+    work_count = row["count"] if row else 0
+    # Get table names
+    tables = []
+    for row in db.fetchall("SELECT name FROM sqlite_master WHERE type='table'"):
+        tables.append(row["name"])
+    # Get index names
+    indices = []
+    for row in db.fetchall("SELECT name FROM sqlite_master WHERE type='index'"):
+        if row["name"]:
+            indices.append(row["name"])
+    return {
+        "total_papers": work_count,
+        "database_size_mb": None,
+        "year_range": None,
+        "total_journals": 0,
+        "total_citations": None,
+        "tables": tables,
+        "indices": indices,
+    }
+def run_server(host: str = "0.0.0.0", port: int = 3333):
+    """Run the FastAPI server."""
+    import uvicorn
+    uvicorn.run(app, host=host, port=port)
+if __name__ == "__main__":
+    run_server()