PyPI - crossref-local - Versions diffs - 0.3.1__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

crossref-local 0.3.1py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

crossref_local/__init__.py +38 -16
crossref_local/__main__.py +0 -0
crossref_local/_aio/__init__.py +30 -0
crossref_local/_aio/_impl.py +238 -0
crossref_local/_cache/__init__.py +15 -0
crossref_local/_cache/export.py +100 -0
crossref_local/_cache/utils.py +93 -0
crossref_local/_cache/viz.py +296 -0
crossref_local/_cli/__init__.py +9 -0
crossref_local/_cli/cache.py +179 -0
crossref_local/_cli/cli.py +512 -0
crossref_local/_cli/completion.py +245 -0
crossref_local/_cli/main.py +20 -0
crossref_local/_cli/mcp.py +351 -0
crossref_local/_cli/mcp_server.py +413 -0
crossref_local/_core/__init__.py +58 -0
crossref_local/{api.py → _core/api.py} +130 -36
crossref_local/{citations.py → _core/citations.py} +55 -26
crossref_local/{config.py → _core/config.py} +57 -42
crossref_local/{db.py → _core/db.py} +32 -26
crossref_local/{fts.py → _core/fts.py} +18 -14
crossref_local/{models.py → _core/models.py} +11 -6
crossref_local/{impact_factor → _impact_factor}/__init__.py +0 -0
crossref_local/{impact_factor → _impact_factor}/calculator.py +0 -0
crossref_local/{impact_factor → _impact_factor}/journal_lookup.py +0 -0
crossref_local/_remote/__init__.py +56 -0
crossref_local/_remote/base.py +356 -0
crossref_local/_remote/collections.py +175 -0
crossref_local/_server/__init__.py +140 -0
crossref_local/_server/middleware.py +25 -0
crossref_local/_server/models.py +129 -0
crossref_local/_server/routes_citations.py +98 -0
crossref_local/_server/routes_collections.py +282 -0
crossref_local/_server/routes_compat.py +102 -0
crossref_local/_server/routes_works.py +128 -0
crossref_local/_server/server.py +19 -0
crossref_local/aio.py +30 -206
crossref_local/cache.py +466 -0
crossref_local/cli.py +5 -447
crossref_local/jobs.py +169 -0
crossref_local/mcp_server.py +5 -199
crossref_local/remote.py +5 -261
crossref_local/server.py +5 -349
{crossref_local-0.3.1.dist-info → crossref_local-0.5.0.dist-info}/METADATA +88 -24
crossref_local-0.5.0.dist-info/RECORD +47 -0
crossref_local-0.3.1.dist-info/RECORD +0 -20
{crossref_local-0.3.1.dist-info → crossref_local-0.5.0.dist-info}/WHEEL +0 -0
{crossref_local-0.3.1.dist-info → crossref_local-0.5.0.dist-info}/entry_points.txt +0 -0

crossref_local/_remote/collections.py ADDED Viewed

@@ -0,0 +1,175 @@
+"""Collection methods mixin for RemoteClient."""
+import json
+import urllib.request
+import urllib.parse
+import urllib.error
+from typing import Dict, List, Optional, Any
+class CollectionsMixin:
+    """Mixin providing collection management methods for RemoteClient."""
+    def list_collections(self) -> List[Dict]:
+        """
+        List all collections.
+        Returns:
+            List of collection info dictionaries
+        """
+        data = self._request("/collections")
+        if not data:
+            return []
+        return data.get("collections", [])
+    def create_collection(
+        self,
+        name: str,
+        query: Optional[str] = None,
+        dois: Optional[List[str]] = None,
+        limit: int = 1000,
+    ) -> Dict:
+        """
+        Create a new collection from search query or DOI list.
+        Args:
+            name: Collection name
+            query: FTS search query (if dois not provided)
+            dois: Explicit list of DOIs
+            limit: Max papers for query mode
+        Returns:
+            Collection info dictionary
+        """
+        body = {"name": name, "limit": limit}
+        if query:
+            body["query"] = query
+        if dois:
+            body["dois"] = dois
+        result = self._request("/collections", method="POST", data=body)
+        return result or {}
+    def get_collection(
+        self,
+        name: str,
+        fields: Optional[List[str]] = None,
+        include_abstract: bool = False,
+        include_references: bool = False,
+        include_citations: bool = False,
+        year_min: Optional[int] = None,
+        year_max: Optional[int] = None,
+        journal: Optional[str] = None,
+        limit: Optional[int] = None,
+    ) -> Dict:
+        """
+        Query a collection with field filtering.
+        Args:
+            name: Collection name
+            fields: Explicit field list
+            include_abstract: Include abstracts
+            include_references: Include references
+            include_citations: Include citation counts
+            year_min: Filter by min year
+            year_max: Filter by max year
+            journal: Filter by journal
+            limit: Max results
+        Returns:
+            Dict with collection name, count, and papers
+        """
+        params = {
+            "include_abstract": include_abstract,
+            "include_references": include_references,
+            "include_citations": include_citations,
+            "year_min": year_min,
+            "year_max": year_max,
+            "journal": journal,
+            "limit": limit,
+        }
+        if fields:
+            params["fields"] = ",".join(fields)
+        data = self._request(f"/collections/{name}", params)
+        return data or {}
+    def get_collection_stats(self, name: str) -> Dict:
+        """
+        Get collection statistics.
+        Args:
+            name: Collection name
+        Returns:
+            Dict with year distribution, top journals, citation stats
+        """
+        data = self._request(f"/collections/{name}/stats")
+        return data or {}
+    def download_collection(
+        self,
+        name: str,
+        output_path: str,
+        format: str = "json",
+        fields: Optional[List[str]] = None,
+    ) -> str:
+        """
+        Download collection as a file.
+        Args:
+            name: Collection name
+            output_path: Local file path to save to
+            format: Export format (json, csv, bibtex, dois)
+            fields: Fields to include (json/csv)
+        Returns:
+            Output file path
+        """
+        params = {"format": format}
+        if fields:
+            params["fields"] = ",".join(fields)
+        url = f"{self.base_url}/collections/{name}/download"
+        if params:
+            url = f"{url}?{urllib.parse.urlencode(params)}"
+        try:
+            req = urllib.request.Request(url)
+            with urllib.request.urlopen(req, timeout=self.timeout) as response:
+                content = response.read()
+                with open(output_path, "wb") as f:
+                    f.write(content)
+            return output_path
+        except urllib.error.HTTPError as e:
+            raise ConnectionError(f"Download failed: {e.code} {e.reason}") from e
+        except urllib.error.URLError as e:
+            raise ConnectionError(f"Cannot connect: {e.reason}") from e
+    def delete_collection(self, name: str) -> bool:
+        """
+        Delete a collection.
+        Args:
+            name: Collection name
+        Returns:
+            True if deleted
+        """
+        data = self._request(f"/collections/{name}", method="DELETE")
+        if not data:
+            return False
+        return data.get("deleted", False)
+    def collection_exists(self, name: str) -> bool:
+        """
+        Check if a collection exists.
+        Args:
+            name: Collection name
+        Returns:
+            True if exists
+        """
+        data = self._request(f"/collections/{name}/stats")
+        return data is not None

crossref_local/_server/__init__.py ADDED Viewed

@@ -0,0 +1,140 @@
+"""FastAPI server for CrossRef Local with FTS5 search.
+Modular server structure:
+- routes_works.py: /works endpoints
+- routes_citations.py: /citations endpoints
+- routes_collections.py: /collections endpoints
+- routes_compat.py: Legacy /api/* endpoints
+- models.py: Pydantic response models
+- middleware.py: Request middleware
+"""
+import os
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from .. import __version__
+from .middleware import UserContextMiddleware
+from .routes_works import router as works_router
+from .routes_citations import router as citations_router
+from .routes_collections import router as collections_router
+from .routes_compat import router as compat_router
+# Create FastAPI app
+app = FastAPI(
+    title="CrossRef Local API",
+    description="Fast full-text search across 167M+ scholarly works",
+    version=__version__,
+)
+# Middleware
+app.add_middleware(UserContextMiddleware)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Include routers
+app.include_router(works_router)
+app.include_router(citations_router)
+app.include_router(collections_router)
+app.include_router(compat_router)
+@app.get("/")
+def root():
+    """API root with endpoint information."""
+    return {
+        "name": "CrossRef Local API",
+        "version": __version__,
+        "status": "running",
+        "endpoints": {
+            "health": "/health",
+            "info": "/info",
+            "search": "/works?q=<query>",
+            "get_by_doi": "/works/{doi}",
+            "batch": "/works/batch",
+            "citations_citing": "/citations/{doi}/citing",
+            "citations_cited": "/citations/{doi}/cited",
+            "citations_count": "/citations/{doi}/count",
+            "citations_network": "/citations/{doi}/network",
+            "collections_list": "/collections",
+            "collections_create": "/collections (POST)",
+            "collections_get": "/collections/{name}",
+            "collections_stats": "/collections/{name}/stats",
+            "collections_download": "/collections/{name}/download",
+            "collections_delete": "/collections/{name} (DELETE)",
+        },
+    }
+@app.get("/health")
+def health():
+    """Health check endpoint."""
+    from .._core.db import get_db
+    db = get_db()
+    return {
+        "status": "healthy",
+        "database_connected": db is not None,
+        "database_path": str(db.db_path) if db else None,
+    }
+@app.get("/info")
+def info():
+    """Get database statistics."""
+    from .._core.db import get_db
+    from .models import InfoResponse
+    db = get_db()
+    row = db.fetchone("SELECT COUNT(*) as count FROM works")
+    work_count = row["count"] if row else 0
+    try:
+        row = db.fetchone("SELECT COUNT(*) as count FROM works_fts")
+        fts_count = row["count"] if row else 0
+    except Exception:
+        fts_count = 0
+    try:
+        row = db.fetchone("SELECT COUNT(*) as count FROM citations")
+        citation_count = row["count"] if row else 0
+    except Exception:
+        citation_count = 0
+    return InfoResponse(
+        total_papers=work_count,
+        fts_indexed=fts_count,
+        citations=citation_count,
+        database_path=str(db.db_path),
+    )
+# Default port: SCITEX convention (3129X scheme)
+DEFAULT_PORT = int(
+    os.environ.get(
+        "SCITEX_SCHOLAR_CROSSREF_PORT",
+        os.environ.get("CROSSREF_LOCAL_PORT", "31291"),
+    )
+)
+DEFAULT_HOST = os.environ.get(
+    "SCITEX_SCHOLAR_CROSSREF_HOST",
+    os.environ.get("CROSSREF_LOCAL_HOST", "0.0.0.0"),
+)
+def run_server(host: str = None, port: int = None):
+    """Run the FastAPI server."""
+    import uvicorn
+    host = host or DEFAULT_HOST
+    port = port or DEFAULT_PORT
+    uvicorn.run(app, host=host, port=port)
+__all__ = ["app", "run_server", "DEFAULT_PORT", "DEFAULT_HOST"]

crossref_local/_server/middleware.py ADDED Viewed

@@ -0,0 +1,25 @@
+"""Request middleware for CrossRef Local API."""
+from fastapi import Request
+from starlette.middleware.base import BaseHTTPMiddleware
+class UserContextMiddleware(BaseHTTPMiddleware):
+    """Extract X-User-ID header for multi-tenant collection scoping.
+    When requests come through scitex-cloud gateway, it passes the
+    authenticated user's ID via X-User-ID header. This middleware
+    extracts it and makes it available via request.state.user_id.
+    Usage in endpoints:
+        @app.get("/collections")
+        def list_collections(request: Request):
+            user_id = request.state.user_id  # None for local, set for cloud
+            ...
+    """
+    async def dispatch(self, request: Request, call_next):
+        # Extract user ID from header (passed by scitex-cloud gateway)
+        request.state.user_id = request.headers.get("X-User-ID")
+        response = await call_next(request)
+        return response

crossref_local/_server/models.py ADDED Viewed

@@ -0,0 +1,129 @@
+"""Pydantic models for API responses."""
+from typing import Optional, List
+from pydantic import BaseModel
+from .. import __version__
+class WorkResponse(BaseModel):
+    """Work metadata response."""
+    doi: str
+    title: Optional[str] = None
+    authors: List[str] = []
+    year: Optional[int] = None
+    journal: Optional[str] = None
+    issn: Optional[str] = None
+    volume: Optional[str] = None
+    issue: Optional[str] = None
+    page: Optional[str] = None
+    abstract: Optional[str] = None
+    citation_count: Optional[int] = None
+class SearchResponse(BaseModel):
+    """Search results response."""
+    query: str
+    total: int
+    returned: int
+    elapsed_ms: float
+    results: List[WorkResponse]
+class InfoResponse(BaseModel):
+    """Database info response."""
+    name: str = "CrossRef Local API"
+    version: str = __version__
+    status: str = "running"
+    mode: str = "local"
+    total_papers: int
+    fts_indexed: int
+    citations: int
+    database_path: str
+class BatchRequest(BaseModel):
+    """Batch DOI lookup request."""
+    dois: List[str]
+class BatchResponse(BaseModel):
+    """Batch DOI lookup response."""
+    requested: int
+    found: int
+    results: List[WorkResponse]
+# Citation models
+class CitingResponse(BaseModel):
+    """Papers citing a DOI."""
+    doi: str
+    citing_count: int
+    papers: List[str]
+class CitedResponse(BaseModel):
+    """Papers cited by a DOI."""
+    doi: str
+    cited_count: int
+    papers: List[str]
+class CitationCountResponse(BaseModel):
+    """Citation count for a DOI."""
+    doi: str
+    citation_count: int
+class CitationNetworkResponse(BaseModel):
+    """Citation network graph."""
+    center_doi: str
+    depth: int
+    total_nodes: int
+    total_edges: int
+    nodes: List[dict]
+    edges: List[dict]
+# Collection models
+class CollectionCreateRequest(BaseModel):
+    """Create collection request."""
+    name: str
+    query: Optional[str] = None
+    dois: Optional[List[str]] = None
+    limit: int = 1000
+class CollectionInfo(BaseModel):
+    """Collection information."""
+    name: str
+    path: str
+    size_bytes: int
+    size_mb: float
+    paper_count: int
+    created_at: str
+    query: Optional[str] = None
+class CollectionQueryRequest(BaseModel):
+    """Query collection request."""
+    fields: Optional[List[str]] = None
+    include_abstract: bool = False
+    include_references: bool = False
+    include_citations: bool = False
+    year_min: Optional[int] = None
+    year_max: Optional[int] = None
+    journal: Optional[str] = None
+    limit: Optional[int] = None

crossref_local/_server/routes_citations.py ADDED Viewed

@@ -0,0 +1,98 @@
+"""Citation network endpoints."""
+from fastapi import APIRouter, Query
+from .._core.citations import get_citing, get_cited, get_citation_count, CitationNetwork
+from .models import (
+    CitingResponse,
+    CitedResponse,
+    CitationCountResponse,
+    CitationNetworkResponse,
+)
+router = APIRouter(prefix="/citations", tags=["citations"])
+@router.get("/{doi:path}/citing", response_model=CitingResponse)
+def get_citing_papers(
+    doi: str,
+    limit: int = Query(100, ge=1, le=1000, description="Max papers to return"),
+):
+    """
+    Get papers that cite this DOI.
+    Examples:
+        /citations/10.1038/nature12373/citing
+        /citations/10.1038/nature12373/citing?limit=50
+    """
+    citing_dois = get_citing(doi, limit=limit)
+    return CitingResponse(
+        doi=doi,
+        citing_count=len(citing_dois),
+        papers=citing_dois,
+    )
+@router.get("/{doi:path}/cited", response_model=CitedResponse)
+def get_cited_papers(
+    doi: str,
+    limit: int = Query(100, ge=1, le=1000, description="Max papers to return"),
+):
+    """
+    Get papers cited by this DOI (references).
+    Examples:
+        /citations/10.1038/nature12373/cited
+        /citations/10.1038/nature12373/cited?limit=50
+    """
+    cited_dois = get_cited(doi, limit=limit)
+    return CitedResponse(
+        doi=doi,
+        cited_count=len(cited_dois),
+        papers=cited_dois,
+    )
+@router.get("/{doi:path}/count", response_model=CitationCountResponse)
+def get_citation_count_endpoint(doi: str):
+    """
+    Get citation count for a DOI.
+    Examples:
+        /citations/10.1038/nature12373/count
+    """
+    count = get_citation_count(doi)
+    return CitationCountResponse(doi=doi, citation_count=count)
+@router.get("/{doi:path}/network", response_model=CitationNetworkResponse)
+def get_citation_network(
+    doi: str,
+    depth: int = Query(1, ge=1, le=3, description="Network depth (1-3)"),
+    max_citing: int = Query(25, ge=1, le=100, description="Max citing per node"),
+    max_cited: int = Query(25, ge=1, le=100, description="Max cited per node"),
+):
+    """
+    Get citation network graph for a DOI.
+    Returns nodes (papers) and edges (citation relationships).
+    Examples:
+        /citations/10.1038/nature12373/network
+        /citations/10.1038/nature12373/network?depth=2&max_citing=50
+    """
+    network = CitationNetwork(
+        doi,
+        depth=depth,
+        max_citing=max_citing,
+        max_cited=max_cited,
+    )
+    data = network.to_dict()
+    return CitationNetworkResponse(
+        center_doi=data["center_doi"],
+        depth=data["depth"],
+        total_nodes=data["stats"]["total_nodes"],
+        total_edges=data["stats"]["total_edges"],
+        nodes=data["nodes"],
+        edges=data["edges"],
+    )

crossref-local 0.3.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

crossref-local 0.3.1py3-none-any.whl → 0.5.0py3-none-any.whl