PyPI - crossref-local - Versions diffs - 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

crossref-local 0.4.0py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

crossref_local/__init__.py +24 -10
crossref_local/_aio/__init__.py +30 -0
crossref_local/_aio/_impl.py +238 -0
crossref_local/_cache/__init__.py +15 -0
crossref_local/{cache_export.py → _cache/export.py} +27 -10
crossref_local/_cache/utils.py +93 -0
crossref_local/_cli/__init__.py +9 -0
crossref_local/_cli/cli.py +389 -0
crossref_local/_cli/mcp.py +351 -0
crossref_local/_cli/mcp_server.py +457 -0
crossref_local/_cli/search.py +199 -0
crossref_local/_core/__init__.py +62 -0
crossref_local/{api.py → _core/api.py} +26 -5
crossref_local/{citations.py → _core/citations.py} +55 -26
crossref_local/{config.py → _core/config.py} +40 -22
crossref_local/{db.py → _core/db.py} +32 -26
crossref_local/_core/export.py +344 -0
crossref_local/{fts.py → _core/fts.py} +37 -14
crossref_local/{models.py → _core/models.py} +120 -6
crossref_local/_remote/__init__.py +56 -0
crossref_local/_remote/base.py +378 -0
crossref_local/_remote/collections.py +175 -0
crossref_local/_server/__init__.py +140 -0
crossref_local/_server/middleware.py +25 -0
crossref_local/_server/models.py +143 -0
crossref_local/_server/routes_citations.py +98 -0
crossref_local/_server/routes_collections.py +282 -0
crossref_local/_server/routes_compat.py +102 -0
crossref_local/_server/routes_works.py +178 -0
crossref_local/_server/server.py +19 -0
crossref_local/aio.py +30 -206
crossref_local/cache.py +100 -100
crossref_local/cli.py +5 -515
crossref_local/jobs.py +169 -0
crossref_local/mcp_server.py +5 -410
crossref_local/remote.py +5 -266
crossref_local/server.py +5 -349
{crossref_local-0.4.0.dist-info → crossref_local-0.5.1.dist-info}/METADATA +36 -11
crossref_local-0.5.1.dist-info/RECORD +49 -0
{crossref_local-0.4.0.dist-info → crossref_local-0.5.1.dist-info}/entry_points.txt +1 -1
crossref_local/cli_mcp.py +0 -275
crossref_local-0.4.0.dist-info/RECORD +0 -27
/crossref_local/{cache_viz.py → _cache/viz.py} +0 -0
/crossref_local/{cli_cache.py → _cli/cache.py} +0 -0
/crossref_local/{cli_completion.py → _cli/completion.py} +0 -0
/crossref_local/{cli_main.py → _cli/main.py} +0 -0
/crossref_local/{impact_factor → _impact_factor}/__init__.py +0 -0
/crossref_local/{impact_factor → _impact_factor}/calculator.py +0 -0
/crossref_local/{impact_factor → _impact_factor}/journal_lookup.py +0 -0
{crossref_local-0.4.0.dist-info → crossref_local-0.5.1.dist-info}/WHEEL +0 -0

crossref_local/_server/middleware.py ADDED Viewed

@@ -0,0 +1,25 @@
+"""Request middleware for CrossRef Local API."""
+from fastapi import Request
+from starlette.middleware.base import BaseHTTPMiddleware
+class UserContextMiddleware(BaseHTTPMiddleware):
+    """Extract X-User-ID header for multi-tenant collection scoping.
+    When requests come through scitex-cloud gateway, it passes the
+    authenticated user's ID via X-User-ID header. This middleware
+    extracts it and makes it available via request.state.user_id.
+    Usage in endpoints:
+        @app.get("/collections")
+        def list_collections(request: Request):
+            user_id = request.state.user_id  # None for local, set for cloud
+            ...
+    """
+    async def dispatch(self, request: Request, call_next):
+        # Extract user ID from header (passed by scitex-cloud gateway)
+        request.state.user_id = request.headers.get("X-User-ID")
+        response = await call_next(request)
+        return response

crossref_local/_server/models.py ADDED Viewed

@@ -0,0 +1,143 @@
+"""Pydantic models for API responses."""
+from typing import Optional, List
+from pydantic import BaseModel
+from .. import __version__
+class WorkResponse(BaseModel):
+    """Work metadata response."""
+    doi: str
+    title: Optional[str] = None
+    authors: List[str] = []
+    year: Optional[int] = None
+    journal: Optional[str] = None
+    issn: Optional[str] = None
+    volume: Optional[str] = None
+    issue: Optional[str] = None
+    page: Optional[str] = None
+    abstract: Optional[str] = None
+    citation_count: Optional[int] = None
+    impact_factor: Optional[float] = None
+    impact_factor_source: Optional[str] = None
+class LimitInfoResponse(BaseModel):
+    """Information about result limiting."""
+    requested: int
+    returned: int
+    total_available: int
+    capped: bool = False
+    capped_reason: Optional[str] = None
+    stage: str = "crossref-local"
+class SearchResponse(BaseModel):
+    """Search results response."""
+    query: str
+    total: int
+    returned: int
+    elapsed_ms: float
+    results: List[WorkResponse]
+    limit_info: Optional[LimitInfoResponse] = None
+class InfoResponse(BaseModel):
+    """Database info response."""
+    name: str = "CrossRef Local API"
+    version: str = __version__
+    status: str = "running"
+    mode: str = "local"
+    total_papers: int
+    fts_indexed: int
+    citations: int
+    database_path: str
+class BatchRequest(BaseModel):
+    """Batch DOI lookup request."""
+    dois: List[str]
+class BatchResponse(BaseModel):
+    """Batch DOI lookup response."""
+    requested: int
+    found: int
+    results: List[WorkResponse]
+# Citation models
+class CitingResponse(BaseModel):
+    """Papers citing a DOI."""
+    doi: str
+    citing_count: int
+    papers: List[str]
+class CitedResponse(BaseModel):
+    """Papers cited by a DOI."""
+    doi: str
+    cited_count: int
+    papers: List[str]
+class CitationCountResponse(BaseModel):
+    """Citation count for a DOI."""
+    doi: str
+    citation_count: int
+class CitationNetworkResponse(BaseModel):
+    """Citation network graph."""
+    center_doi: str
+    depth: int
+    total_nodes: int
+    total_edges: int
+    nodes: List[dict]
+    edges: List[dict]
+# Collection models
+class CollectionCreateRequest(BaseModel):
+    """Create collection request."""
+    name: str
+    query: Optional[str] = None
+    dois: Optional[List[str]] = None
+    limit: int = 1000
+class CollectionInfo(BaseModel):
+    """Collection information."""
+    name: str
+    path: str
+    size_bytes: int
+    size_mb: float
+    paper_count: int
+    created_at: str
+    query: Optional[str] = None
+class CollectionQueryRequest(BaseModel):
+    """Query collection request."""
+    fields: Optional[List[str]] = None
+    include_abstract: bool = False
+    include_references: bool = False
+    include_citations: bool = False
+    year_min: Optional[int] = None
+    year_max: Optional[int] = None
+    journal: Optional[str] = None
+    limit: Optional[int] = None

crossref_local/_server/routes_citations.py ADDED Viewed

@@ -0,0 +1,98 @@
+"""Citation network endpoints."""
+from fastapi import APIRouter, Query
+from .._core.citations import get_citing, get_cited, get_citation_count, CitationNetwork
+from .models import (
+    CitingResponse,
+    CitedResponse,
+    CitationCountResponse,
+    CitationNetworkResponse,
+)
+router = APIRouter(prefix="/citations", tags=["citations"])
+@router.get("/{doi:path}/citing", response_model=CitingResponse)
+def get_citing_papers(
+    doi: str,
+    limit: int = Query(100, ge=1, le=1000, description="Max papers to return"),
+):
+    """
+    Get papers that cite this DOI.
+    Examples:
+        /citations/10.1038/nature12373/citing
+        /citations/10.1038/nature12373/citing?limit=50
+    """
+    citing_dois = get_citing(doi, limit=limit)
+    return CitingResponse(
+        doi=doi,
+        citing_count=len(citing_dois),
+        papers=citing_dois,
+    )
+@router.get("/{doi:path}/cited", response_model=CitedResponse)
+def get_cited_papers(
+    doi: str,
+    limit: int = Query(100, ge=1, le=1000, description="Max papers to return"),
+):
+    """
+    Get papers cited by this DOI (references).
+    Examples:
+        /citations/10.1038/nature12373/cited
+        /citations/10.1038/nature12373/cited?limit=50
+    """
+    cited_dois = get_cited(doi, limit=limit)
+    return CitedResponse(
+        doi=doi,
+        cited_count=len(cited_dois),
+        papers=cited_dois,
+    )
+@router.get("/{doi:path}/count", response_model=CitationCountResponse)
+def get_citation_count_endpoint(doi: str):
+    """
+    Get citation count for a DOI.
+    Examples:
+        /citations/10.1038/nature12373/count
+    """
+    count = get_citation_count(doi)
+    return CitationCountResponse(doi=doi, citation_count=count)
+@router.get("/{doi:path}/network", response_model=CitationNetworkResponse)
+def get_citation_network(
+    doi: str,
+    depth: int = Query(1, ge=1, le=3, description="Network depth (1-3)"),
+    max_citing: int = Query(25, ge=1, le=100, description="Max citing per node"),
+    max_cited: int = Query(25, ge=1, le=100, description="Max cited per node"),
+):
+    """
+    Get citation network graph for a DOI.
+    Returns nodes (papers) and edges (citation relationships).
+    Examples:
+        /citations/10.1038/nature12373/network
+        /citations/10.1038/nature12373/network?depth=2&max_citing=50
+    """
+    network = CitationNetwork(
+        doi,
+        depth=depth,
+        max_citing=max_citing,
+        max_cited=max_cited,
+    )
+    data = network.to_dict()
+    return CitationNetworkResponse(
+        center_doi=data["center_doi"],
+        depth=data["depth"],
+        total_nodes=data["stats"]["total_nodes"],
+        total_edges=data["stats"]["total_edges"],
+        nodes=data["nodes"],
+        edges=data["edges"],
+    )

crossref_local/_server/routes_collections.py ADDED Viewed

@@ -0,0 +1,282 @@
+"""Collection management endpoints with file download support."""
+import tempfile
+from typing import Optional
+from fastapi import APIRouter, Query, HTTPException, Request
+from fastapi.responses import FileResponse
+from .. import cache
+from .._cache.utils import sanitize_name
+from .models import CollectionCreateRequest, CollectionInfo
+# Allowed fields for field filtering (whitelist)
+ALLOWED_FIELDS = {
+    "doi",
+    "title",
+    "authors",
+    "year",
+    "journal",
+    "volume",
+    "issue",
+    "page",
+    "abstract",
+    "citation_count",
+    "references",
+    "issn",
+    "publisher",
+}
+# Maximum limits
+MAX_LIMIT = 10000
+MAX_DOIS = 1000
+router = APIRouter(prefix="/collections", tags=["collections"])
+def _get_user_id(request: Request) -> Optional[str]:
+    """Get user ID from request state (set by middleware)."""
+    return getattr(request.state, "user_id", None)
+@router.get("")
+def list_collections(request: Request):
+    """
+    List all collections.
+    For cloud API (with X-User-ID header), returns only user's collections.
+    For local API, returns all collections.
+    """
+    user_id = _get_user_id(request)
+    caches = cache.list_caches(user_id=user_id)
+    return {
+        "count": len(caches),
+        "collections": [c.to_dict() for c in caches],
+    }
+@router.post("", response_model=CollectionInfo)
+def create_collection(request: Request, body: CollectionCreateRequest):
+    """
+    Create a new collection from search query or DOI list.
+    Request body:
+        {"name": "epilepsy", "query": "epilepsy seizure", "limit": 500}
+        or
+        {"name": "my_papers", "dois": ["10.1038/...", "10.1016/..."]}
+    """
+    user_id = _get_user_id(request)
+    # Validate collection name
+    try:
+        sanitize_name(body.name)
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    if not body.query and not body.dois:
+        raise HTTPException(
+            status_code=400,
+            detail="Must provide 'query' or 'dois'",
+        )
+    # Validate limits
+    if body.limit > MAX_LIMIT:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Limit exceeds maximum ({MAX_LIMIT})",
+        )
+    if body.dois and len(body.dois) > MAX_DOIS:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Too many DOIs ({len(body.dois)}). Maximum: {MAX_DOIS}",
+        )
+    try:
+        info = cache.create(
+            body.name,
+            query=body.query,
+            dois=body.dois,
+            limit=body.limit,
+            user_id=user_id,
+        )
+        return CollectionInfo(**info.to_dict())
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@router.get("/{name}")
+def query_collection(
+    name: str,
+    request: Request,
+    fields: Optional[str] = Query(None, description="Comma-separated field list"),
+    include_abstract: bool = Query(False, description="Include abstracts"),
+    include_references: bool = Query(False, description="Include references"),
+    include_citations: bool = Query(False, description="Include citation counts"),
+    year_min: Optional[int] = Query(None, description="Filter by min year"),
+    year_max: Optional[int] = Query(None, description="Filter by max year"),
+    journal: Optional[str] = Query(None, description="Filter by journal"),
+    limit: Optional[int] = Query(None, description="Max results"),
+):
+    """
+    Query a collection with field filtering.
+    Returns minimal data to reduce response size.
+    Use 'fields' parameter to specify exactly which fields to return.
+    Examples:
+        /collections/epilepsy?fields=doi,title,year
+        /collections/epilepsy?year_min=2020&include_citations=true
+    """
+    user_id = _get_user_id(request)
+    # Validate collection name
+    try:
+        sanitize_name(name)
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    if not cache.exists(name, user_id=user_id):
+        raise HTTPException(status_code=404, detail=f"Collection not found: {name}")
+    # Validate and filter fields
+    field_list = None
+    if fields:
+        field_list = [f.strip() for f in fields.split(",")]
+        invalid_fields = set(field_list) - ALLOWED_FIELDS
+        if invalid_fields:
+            raise HTTPException(
+                status_code=400,
+                detail=f"Invalid fields: {invalid_fields}. Allowed: {ALLOWED_FIELDS}",
+            )
+    papers = cache.query(
+        name,
+        fields=field_list,
+        include_abstract=include_abstract,
+        include_references=include_references,
+        include_citations=include_citations,
+        year_min=year_min,
+        year_max=year_max,
+        journal=journal,
+        limit=limit,
+        user_id=user_id,
+    )
+    return {
+        "name": name,
+        "count": len(papers),
+        "papers": papers,
+    }
+@router.get("/{name}/stats")
+def collection_stats(name: str, request: Request):
+    """
+    Get collection statistics.
+    Returns year distribution, top journals, citation stats.
+    """
+    user_id = _get_user_id(request)
+    try:
+        sanitize_name(name)
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    if not cache.exists(name, user_id=user_id):
+        raise HTTPException(status_code=404, detail=f"Collection not found: {name}")
+    stats = cache.stats(name, user_id=user_id)
+    return {"name": name, **stats}
+@router.get("/{name}/download")
+def download_collection(
+    name: str,
+    request: Request,
+    format: str = Query("json", description="Export format: json, csv, bibtex, dois"),
+    fields: Optional[str] = Query(None, description="Fields to include (json/csv)"),
+):
+    """
+    Download collection as a file.
+    Supports multiple formats:
+    - json: Full JSON with all fields or specified fields
+    - csv: CSV format with specified fields
+    - bibtex: BibTeX format for bibliography
+    - dois: Plain text list of DOIs
+    Examples:
+        /collections/epilepsy/download?format=json
+        /collections/epilepsy/download?format=bibtex
+        /collections/epilepsy/download?format=csv&fields=doi,title,year
+    """
+    user_id = _get_user_id(request)
+    try:
+        sanitize_name(name)
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    if not cache.exists(name, user_id=user_id):
+        raise HTTPException(status_code=404, detail=f"Collection not found: {name}")
+    # Determine file extension and media type
+    format_info = {
+        "json": ("application/json", ".json"),
+        "csv": ("text/csv", ".csv"),
+        "bibtex": ("application/x-bibtex", ".bib"),
+        "dois": ("text/plain", ".txt"),
+    }
+    if format not in format_info:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Unsupported format: {format}. Use: json, csv, bibtex, dois",
+        )
+    media_type, ext = format_info[format]
+    filename = f"{name}{ext}"
+    # Export to temporary file
+    with tempfile.NamedTemporaryFile(mode="w", suffix=ext, delete=False) as tmp:
+        field_list = fields.split(",") if fields else None
+        cache.export(
+            name,
+            tmp.name,
+            format=format,
+            fields=field_list,
+            user_id=user_id,
+        )
+        tmp_path = tmp.name
+    return FileResponse(
+        tmp_path,
+        media_type=media_type,
+        filename=filename,
+        headers={"Content-Disposition": f'attachment; filename="{filename}"'},
+    )
+@router.delete("/{name}")
+def delete_collection(name: str, request: Request):
+    """
+    Delete a collection.
+    """
+    user_id = _get_user_id(request)
+    try:
+        sanitize_name(name)
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    if not cache.exists(name, user_id=user_id):
+        raise HTTPException(status_code=404, detail=f"Collection not found: {name}")
+    deleted = cache.delete(name, user_id=user_id)
+    return {"deleted": deleted, "name": name}

crossref_local/_server/routes_compat.py ADDED Viewed

@@ -0,0 +1,102 @@
+"""Backwards-compatible legacy API endpoints."""
+from typing import Optional
+from fastapi import APIRouter, HTTPException
+from .._core import fts
+from .._core.db import get_db
+from .._core.models import Work
+from .models import WorkResponse
+from .routes_works import get_work
+router = APIRouter(prefix="/api", tags=["legacy"])
+@router.get("/search/")
+def api_search_compat(
+    title: Optional[str] = None,
+    q: Optional[str] = None,
+    doi: Optional[str] = None,
+    limit: int = 10,
+):
+    """Backwards-compatible search endpoint."""
+    query = title or q
+    if doi:
+        # DOI lookup
+        try:
+            work = get_work(doi)
+            return {
+                "query": {"doi": doi},
+                "results": [work.model_dump()],
+                "total": 1,
+                "returned": 1,
+            }
+        except HTTPException:
+            return {"query": {"doi": doi}, "results": [], "total": 0, "returned": 0}
+    if not query:
+        raise HTTPException(
+            status_code=400, detail="Specify q, title, or doi parameter"
+        )
+    # Call fts.search directly (not the endpoint function)
+    results = fts.search(query, limit=limit, offset=0)
+    return {
+        "query": {
+            "title": query,
+            "doi": None,
+            "year": None,
+            "authors": None,
+            "limit": limit,
+        },
+        "results": [
+            WorkResponse(
+                doi=w.doi,
+                title=w.title,
+                authors=w.authors,
+                year=w.year,
+                journal=w.journal,
+                issn=w.issn,
+                volume=w.volume,
+                issue=w.issue,
+                page=w.page,
+                abstract=w.abstract,
+                citation_count=w.citation_count,
+            ).model_dump()
+            for w in results.works
+        ],
+        "total": results.total,
+        "returned": len(results.works),
+    }
+@router.get("/stats/")
+def api_stats_compat():
+    """Backwards-compatible stats endpoint."""
+    db = get_db()
+    row = db.fetchone("SELECT COUNT(*) as count FROM works")
+    work_count = row["count"] if row else 0
+    # Get table names
+    tables = []
+    for row in db.fetchall("SELECT name FROM sqlite_master WHERE type='table'"):
+        tables.append(row["name"])
+    # Get index names
+    indices = []
+    for row in db.fetchall("SELECT name FROM sqlite_master WHERE type='index'"):
+        if row["name"]:
+            indices.append(row["name"])
+    return {
+        "total_papers": work_count,
+        "database_size_mb": None,
+        "year_range": None,
+        "total_journals": 0,
+        "total_citations": None,
+        "tables": tables,
+        "indices": indices,
+    }

crossref-local 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

crossref-local 0.4.0py3-none-any.whl → 0.5.1py3-none-any.whl