PyPI - crossref-local - Versions diffs - 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

crossref-local 0.4.0py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

crossref_local/__init__.py +18 -10
crossref_local/_aio/__init__.py +30 -0
crossref_local/_aio/_impl.py +238 -0
crossref_local/_cache/__init__.py +15 -0
crossref_local/{cache_export.py → _cache/export.py} +27 -10
crossref_local/_cache/utils.py +93 -0
crossref_local/_cli/__init__.py +9 -0
crossref_local/_cli/cli.py +512 -0
crossref_local/_cli/mcp.py +351 -0
crossref_local/_cli/mcp_server.py +413 -0
crossref_local/_core/__init__.py +58 -0
crossref_local/{api.py → _core/api.py} +24 -5
crossref_local/{citations.py → _core/citations.py} +55 -26
crossref_local/{config.py → _core/config.py} +40 -22
crossref_local/{db.py → _core/db.py} +32 -26
crossref_local/{fts.py → _core/fts.py} +18 -14
crossref_local/{models.py → _core/models.py} +11 -6
crossref_local/_remote/__init__.py +56 -0
crossref_local/_remote/base.py +356 -0
crossref_local/_remote/collections.py +175 -0
crossref_local/_server/__init__.py +140 -0
crossref_local/_server/middleware.py +25 -0
crossref_local/_server/models.py +129 -0
crossref_local/_server/routes_citations.py +98 -0
crossref_local/_server/routes_collections.py +282 -0
crossref_local/_server/routes_compat.py +102 -0
crossref_local/_server/routes_works.py +128 -0
crossref_local/_server/server.py +19 -0
crossref_local/aio.py +30 -206
crossref_local/cache.py +100 -100
crossref_local/cli.py +5 -515
crossref_local/jobs.py +169 -0
crossref_local/mcp_server.py +5 -410
crossref_local/remote.py +5 -266
crossref_local/server.py +5 -349
{crossref_local-0.4.0.dist-info → crossref_local-0.5.0.dist-info}/METADATA +36 -11
crossref_local-0.5.0.dist-info/RECORD +47 -0
{crossref_local-0.4.0.dist-info → crossref_local-0.5.0.dist-info}/entry_points.txt +1 -1
crossref_local/cli_mcp.py +0 -275
crossref_local-0.4.0.dist-info/RECORD +0 -27
/crossref_local/{cache_viz.py → _cache/viz.py} +0 -0
/crossref_local/{cli_cache.py → _cli/cache.py} +0 -0
/crossref_local/{cli_completion.py → _cli/completion.py} +0 -0
/crossref_local/{cli_main.py → _cli/main.py} +0 -0
/crossref_local/{impact_factor → _impact_factor}/__init__.py +0 -0
/crossref_local/{impact_factor → _impact_factor}/calculator.py +0 -0
/crossref_local/{impact_factor → _impact_factor}/journal_lookup.py +0 -0
{crossref_local-0.4.0.dist-info → crossref_local-0.5.0.dist-info}/WHEEL +0 -0

crossref_local/__init__.py CHANGED Viewed

@@ -64,10 +64,11 @@ Modules:
     aio - Async versions of all API functions
 """
-__version__ = "0.3.1"
+__version__ = "0.5.0"
-# Core API (public functions)
-from .api import (
+# Core API (from _core package)
+from ._core import (
+    # Functions
     search,
     count,
     get,
@@ -77,23 +78,28 @@ from .api import (
     enrich_dois,
     configure,
     configure_http,
-    configure_remote,  # Backward compatibility alias
+    configure_remote,
     get_mode,
     info,
+    # Models
+    Work,
+    SearchResult,
+    # Citations
+    get_citing,
+    get_cited,
+    get_citation_count,
+    CitationNetwork,
 )
-# Models (public classes)
-from .models import Work, SearchResult
 # Async API (public module)
 from . import aio
-# Citation network (public functions and classes)
-from .citations import get_citing, get_cited, get_citation_count, CitationNetwork
 # Cache module (public)
 from . import cache
+# Jobs module (public)
+from . import jobs
 # Public API - what users should import
 __all__ = [
@@ -121,6 +127,8 @@ __all__ = [
     "aio",
     # Cache module
     "cache",
+    # Jobs module
+    "jobs",
     # Citation network
     "get_citing",
     "get_cited",

crossref_local/_aio/__init__.py ADDED Viewed

@@ -0,0 +1,30 @@
+#!/usr/bin/env python3
+"""Async API module."""
+from ._impl import (
+    SearchResult,
+    Work,
+    count,
+    count_many,
+    exists,
+    get,
+    get_many,
+    info,
+    search,
+    search_many,
+)
+__all__ = [
+    "search",
+    "count",
+    "get",
+    "get_many",
+    "exists",
+    "info",
+    "search_many",
+    "count_many",
+    "SearchResult",
+    "Work",
+]
+# EOF

crossref_local/_aio/_impl.py ADDED Viewed

@@ -0,0 +1,238 @@
+"""
+Async API for crossref_local.
+Provides async versions of all API functions. Uses thread pool execution
+with per-thread database connections for thread safety.
+Usage:
+    from crossref_local import aio
+    async def main():
+        results = await aio.search("machine learning")
+        work = await aio.get("10.1038/nature12373")
+        n = await aio.count("CRISPR")
+    # Or import individual functions
+    from crossref_local.aio import search, get, count
+    # Concurrent operations
+    counts = await aio.count_many(["CRISPR", "machine learning"])
+"""
+import asyncio as _asyncio
+import threading as _threading
+from typing import List, Optional
+from .._core.config import Config as _Config
+from .._core.db import Database as _Database
+from .._core.models import SearchResult, Work
+__all__ = [
+    "search",
+    "count",
+    "get",
+    "get_many",
+    "exists",
+    "info",
+    "search_many",
+    "count_many",
+    # Public types for type hints
+    "SearchResult",
+    "Work",
+]
+# Thread-local storage for database connections
+_thread_local = _threading.local()
+def _get_thread_db() -> _Database:
+    """Get thread-local database connection."""
+    if not hasattr(_thread_local, "db"):
+        _thread_local.db = _Database(_Config.get_db_path())
+    return _thread_local.db
+def _search_sync(query: str, limit: int, offset: int) -> SearchResult:
+    """Thread-safe sync search."""
+    from .._core import fts
+    db = _get_thread_db()
+    return fts._search_with_db(db, query, limit, offset)
+def _count_sync(query: str) -> int:
+    """Thread-safe sync count."""
+    from .._core import fts
+    db = _get_thread_db()
+    return fts._count_with_db(db, query)
+def _get_sync(doi: str) -> Optional[Work]:
+    """Thread-safe sync get."""
+    db = _get_thread_db()
+    metadata = db.get_metadata(doi)
+    if metadata:
+        return Work.from_metadata(doi, metadata)
+    return None
+def _get_many_sync(dois: List[str]) -> List[Work]:
+    """Thread-safe sync get_many."""
+    db = _get_thread_db()
+    works = []
+    for doi in dois:
+        metadata = db.get_metadata(doi)
+        if metadata:
+            works.append(Work.from_metadata(doi, metadata))
+    return works
+def _exists_sync(doi: str) -> bool:
+    """Thread-safe sync exists."""
+    db = _get_thread_db()
+    row = db.fetchone("SELECT 1 FROM works WHERE doi = ?", (doi,))
+    return row is not None
+def _info_sync() -> dict:
+    """Thread-safe sync info."""
+    db = _get_thread_db()
+    row = db.fetchone("SELECT COUNT(*) as count FROM works")
+    work_count = row["count"] if row else 0
+    try:
+        row = db.fetchone("SELECT COUNT(*) as count FROM works_fts")
+        fts_count = row["count"] if row else 0
+    except Exception:
+        fts_count = 0
+    try:
+        row = db.fetchone("SELECT COUNT(*) as count FROM citations")
+        citation_count = row["count"] if row else 0
+    except Exception:
+        citation_count = 0
+    return {
+        "db_path": str(_Config.get_db_path()),
+        "works": work_count,
+        "fts_indexed": fts_count,
+        "citations": citation_count,
+    }
+async def search(
+    query: str,
+    limit: int = 10,
+    offset: int = 0,
+) -> SearchResult:
+    """
+    Async full-text search across works.
+    Args:
+        query: Search query (supports FTS5 syntax)
+        limit: Maximum results to return
+        offset: Skip first N results (for pagination)
+    Returns:
+        SearchResult with matching works
+    """
+    return await _asyncio.to_thread(_search_sync, query, limit, offset)
+async def count(query: str) -> int:
+    """
+    Async count matching works without fetching results.
+    Args:
+        query: FTS5 search query
+    Returns:
+        Number of matching works
+    """
+    return await _asyncio.to_thread(_count_sync, query)
+async def get(doi: str) -> Optional[Work]:
+    """
+    Async get a work by DOI.
+    Args:
+        doi: Digital Object Identifier
+    Returns:
+        Work object or None if not found
+    """
+    return await _asyncio.to_thread(_get_sync, doi)
+async def get_many(dois: List[str]) -> List[Work]:
+    """
+    Async get multiple works by DOI.
+    Args:
+        dois: List of DOIs
+    Returns:
+        List of Work objects (missing DOIs are skipped)
+    """
+    return await _asyncio.to_thread(_get_many_sync, dois)
+async def exists(doi: str) -> bool:
+    """
+    Async check if a DOI exists in the database.
+    Args:
+        doi: Digital Object Identifier
+    Returns:
+        True if DOI exists
+    """
+    return await _asyncio.to_thread(_exists_sync, doi)
+async def info() -> dict:
+    """
+    Async get database information.
+    Returns:
+        Dictionary with database stats
+    """
+    return await _asyncio.to_thread(_info_sync)
+async def search_many(queries: List[str], limit: int = 10) -> List[SearchResult]:
+    """
+    Run multiple searches concurrently.
+    Args:
+        queries: List of search queries
+        limit: Maximum results per query
+    Returns:
+        List of SearchResult objects
+    """
+    tasks = [search(q, limit=limit) for q in queries]
+    return await _asyncio.gather(*tasks)
+async def count_many(queries: List[str]) -> dict:
+    """
+    Count matches for multiple queries concurrently.
+    Args:
+        queries: List of search queries
+    Returns:
+        Dict mapping query -> count
+    Example:
+        >>> counts = await count_many(["CRISPR", "machine learning"])
+        >>> print(counts)
+        {'CRISPR': 45000, 'machine learning': 477922}
+    """
+    tasks = [count(q) for q in queries]
+    results = await _asyncio.gather(*tasks)
+    return dict(zip(queries, results))

crossref_local/_cache/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+#!/usr/bin/env python3
+"""Internal cache helper modules."""
+from .export import export
+from .utils import cache_path, get_cache_dir, meta_path, sanitize_name
+__all__ = [
+    "export",
+    "cache_path",
+    "get_cache_dir",
+    "meta_path",
+    "sanitize_name",
+]
+# EOF

crossref_local/{cache_export.py → _cache/export.py} RENAMED Viewed

@@ -1,10 +1,22 @@
 """Export functionality for cache module."""
-import json
-from pathlib import Path
-from typing import Any, Dict, List, Optional
+import csv as _csv
+import json as _json
+from pathlib import Path as _Path
+from typing import List, Optional
-from .cache import load
+from .utils import sanitize_name as _sanitize_name
+__all__ = [
+    "export",
+]
+def _load_cache(name: str, user_id: Optional[str] = None):
+    """Load cache data (lazy import to avoid circular dependency)."""
+    from ..cache import load
+    return load(name, user_id=user_id)
 def export(
@@ -12,6 +24,7 @@ def export(
     output_path: str,
     format: str = "json",
     fields: Optional[List[str]] = None,
+    user_id: Optional[str] = None,
 ) -> str:
     """Export cache to file.
@@ -20,26 +33,30 @@ def export(
         output_path: Output file path
         format: Export format (json, csv, bibtex, dois)
         fields: Fields to include (for json/csv)
+        user_id: Optional user ID for multi-tenant scoping
     Returns:
         Output file path
+    Raises:
+        ValueError: If cache name contains invalid characters
     """
-    papers = load(name)
-    output = Path(output_path)
+    # Validate cache name
+    _sanitize_name(name)
+    papers = _load_cache(name, user_id=user_id)
+    output = _Path(output_path)
     if format == "json":
         if fields:
             papers = [{k: p.get(k) for k in fields} for p in papers]
         with open(output, "w") as f:
-            json.dump(papers, f, indent=2)
+            _json.dump(papers, f, indent=2)
     elif format == "csv":
-        import csv
         if fields is None:
             fields = ["doi", "title", "authors", "year", "journal"]
         with open(output, "w", newline="") as f:
-            writer = csv.DictWriter(f, fieldnames=fields, extrasaction="ignore")
+            writer = _csv.DictWriter(f, fieldnames=fields, extrasaction="ignore")
             writer.writeheader()
             for p in papers:
                 row = dict(p)

crossref_local/_cache/utils.py ADDED Viewed

@@ -0,0 +1,93 @@
+"""Cache utility functions for crossref-local.
+Provides path handling and validation utilities for the cache module.
+"""
+import os as _os
+import re as _re
+from pathlib import Path as _Path
+from typing import Optional
+__all__ = [
+    "sanitize_name",
+    "get_cache_dir",
+    "cache_path",
+    "meta_path",
+]
+# Valid cache name pattern: alphanumeric, underscores, hyphens only
+_CACHE_NAME_PATTERN = _re.compile(r"^[a-zA-Z0-9_-]+$")
+def sanitize_name(name: str) -> str:
+    """Sanitize cache name to prevent path traversal.
+    Args:
+        name: Cache name to sanitize
+    Returns:
+        Sanitized name
+    Raises:
+        ValueError: If name contains invalid characters
+    """
+    if not name:
+        raise ValueError("Cache name cannot be empty")
+    if not _CACHE_NAME_PATTERN.match(name):
+        raise ValueError(
+            f"Invalid cache name '{name}': only alphanumeric, underscores, and hyphens allowed"
+        )
+    if len(name) > 64:
+        raise ValueError(f"Cache name too long: {len(name)} chars (max 64)")
+    return name
+def get_cache_dir(user_id: Optional[str] = None) -> _Path:
+    """Get cache directory, creating if needed.
+    Args:
+        user_id: Optional user ID for multi-tenant scoping.
+                 If provided, creates a user-specific subdirectory.
+    """
+    cache_dir = _Path(
+        _os.environ.get(
+            "CROSSREF_LOCAL_CACHE_DIR", _Path.home() / ".cache" / "crossref-local"
+        )
+    )
+    # Add user subdirectory for multi-tenant support
+    if user_id:
+        # Sanitize user_id as well
+        safe_user_id = _re.sub(r"[^a-zA-Z0-9_-]", "", user_id[:16])
+        if safe_user_id:
+            cache_dir = cache_dir / safe_user_id
+    cache_dir.mkdir(parents=True, exist_ok=True)
+    return cache_dir
+def cache_path(name: str, user_id: Optional[str] = None) -> _Path:
+    """Get path for a named cache.
+    Args:
+        name: Cache name (will be sanitized)
+        user_id: Optional user ID for multi-tenant scoping
+    Returns:
+        Path to cache file
+    """
+    safe_name = sanitize_name(name)
+    return get_cache_dir(user_id) / f"{safe_name}.json"
+def meta_path(name: str, user_id: Optional[str] = None) -> _Path:
+    """Get path for cache metadata.
+    Args:
+        name: Cache name (will be sanitized)
+        user_id: Optional user ID for multi-tenant scoping
+    Returns:
+        Path to metadata file
+    """
+    safe_name = sanitize_name(name)
+    return get_cache_dir(user_id) / f"{safe_name}.meta.json"

crossref_local/_cli/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+#!/usr/bin/env python3
+"""Internal CLI modules."""
+from .cli import cli, main
+from .mcp import mcp, run_mcp_server
+__all__ = ["cli", "main", "mcp", "run_mcp_server"]
+# EOF

crossref-local 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

crossref-local 0.4.0py3-none-any.whl → 0.5.0py3-none-any.whl