PyPI - crossref-local - Versions diffs - 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

crossref-local 0.4.0py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

crossref_local/__init__.py +24 -10
crossref_local/_aio/__init__.py +30 -0
crossref_local/_aio/_impl.py +238 -0
crossref_local/_cache/__init__.py +15 -0
crossref_local/{cache_export.py → _cache/export.py} +27 -10
crossref_local/_cache/utils.py +93 -0
crossref_local/_cli/__init__.py +9 -0
crossref_local/_cli/cli.py +389 -0
crossref_local/_cli/mcp.py +351 -0
crossref_local/_cli/mcp_server.py +457 -0
crossref_local/_cli/search.py +199 -0
crossref_local/_core/__init__.py +62 -0
crossref_local/{api.py → _core/api.py} +26 -5
crossref_local/{citations.py → _core/citations.py} +55 -26
crossref_local/{config.py → _core/config.py} +40 -22
crossref_local/{db.py → _core/db.py} +32 -26
crossref_local/_core/export.py +344 -0
crossref_local/{fts.py → _core/fts.py} +37 -14
crossref_local/{models.py → _core/models.py} +120 -6
crossref_local/_remote/__init__.py +56 -0
crossref_local/_remote/base.py +378 -0
crossref_local/_remote/collections.py +175 -0
crossref_local/_server/__init__.py +140 -0
crossref_local/_server/middleware.py +25 -0
crossref_local/_server/models.py +143 -0
crossref_local/_server/routes_citations.py +98 -0
crossref_local/_server/routes_collections.py +282 -0
crossref_local/_server/routes_compat.py +102 -0
crossref_local/_server/routes_works.py +178 -0
crossref_local/_server/server.py +19 -0
crossref_local/aio.py +30 -206
crossref_local/cache.py +100 -100
crossref_local/cli.py +5 -515
crossref_local/jobs.py +169 -0
crossref_local/mcp_server.py +5 -410
crossref_local/remote.py +5 -266
crossref_local/server.py +5 -349
{crossref_local-0.4.0.dist-info → crossref_local-0.5.1.dist-info}/METADATA +36 -11
crossref_local-0.5.1.dist-info/RECORD +49 -0
{crossref_local-0.4.0.dist-info → crossref_local-0.5.1.dist-info}/entry_points.txt +1 -1
crossref_local/cli_mcp.py +0 -275
crossref_local-0.4.0.dist-info/RECORD +0 -27
/crossref_local/{cache_viz.py → _cache/viz.py} +0 -0
/crossref_local/{cli_cache.py → _cli/cache.py} +0 -0
/crossref_local/{cli_completion.py → _cli/completion.py} +0 -0
/crossref_local/{cli_main.py → _cli/main.py} +0 -0
/crossref_local/{impact_factor → _impact_factor}/__init__.py +0 -0
/crossref_local/{impact_factor → _impact_factor}/calculator.py +0 -0
/crossref_local/{impact_factor → _impact_factor}/journal_lookup.py +0 -0
{crossref_local-0.4.0.dist-info → crossref_local-0.5.1.dist-info}/WHEEL +0 -0

crossref_local/_core/export.py ADDED Viewed

@@ -0,0 +1,344 @@
+"""Export functionality for Work and SearchResult objects.
+Supports multiple output formats:
+- text: Human-readable formatted text
+- json: JSON format with all fields
+- bibtex: BibTeX bibliography format
+"""
+import json as _json
+from pathlib import Path as _Path
+from typing import TYPE_CHECKING, List, Optional, Union
+if TYPE_CHECKING:
+    from .models import SearchResult, Work
+__all__ = [
+    "save",
+    "export_text",
+    "export_json",
+    "export_bibtex",
+    "SUPPORTED_FORMATS",
+]
+SUPPORTED_FORMATS = ["text", "json", "bibtex"]
+def _sanitize_bibtex_key(doi: str) -> str:
+    """Convert DOI to valid BibTeX key."""
+    return doi.replace("/", "_").replace(".", "_").replace("-", "_")
+def _escape_bibtex(text: str) -> str:
+    """Escape special characters for BibTeX."""
+    if not text:
+        return ""
+    # Escape special LaTeX characters
+    replacements = [
+        ("&", r"\&"),
+        ("%", r"\%"),
+        ("$", r"\$"),
+        ("#", r"\#"),
+        ("_", r"\_"),
+        ("{", r"\{"),
+        ("}", r"\}"),
+    ]
+    for old, new in replacements:
+        text = text.replace(old, new)
+    return text
+def work_to_text(work: "Work", include_abstract: bool = False) -> str:
+    """Convert a Work to human-readable text format.
+    Args:
+        work: Work object to convert
+        include_abstract: Whether to include abstract
+    Returns:
+        Formatted text string
+    """
+    lines = []
+    # Title
+    title = work.title or "Untitled"
+    year = f"({work.year})" if work.year else ""
+    lines.append(f"{title} {year}".strip())
+    # Authors
+    if work.authors:
+        authors_str = ", ".join(work.authors[:5])
+        if len(work.authors) > 5:
+            authors_str += f" et al. ({len(work.authors)} authors)"
+        lines.append(f"Authors: {authors_str}")
+    # Journal and DOI
+    if work.journal:
+        journal_line = f"Journal: {work.journal}"
+        if work.volume:
+            journal_line += f", {work.volume}"
+            if work.issue:
+                journal_line += f"({work.issue})"
+        if work.page:
+            journal_line += f", {work.page}"
+        lines.append(journal_line)
+    lines.append(f"DOI: {work.doi}")
+    # Impact factor
+    if work.impact_factor:
+        lines.append(
+            f"Impact Factor: {work.impact_factor:.2f} ({work.impact_factor_source or 'unknown'})"
+        )
+    # Citation count
+    if work.citation_count is not None:
+        lines.append(f"Citations: {work.citation_count}")
+    # Abstract
+    if include_abstract and work.abstract:
+        # Strip XML tags
+        import re
+        abstract = re.sub(r"<[^>]+>", " ", work.abstract)
+        abstract = re.sub(r"\s+", " ", abstract).strip()
+        lines.append(f"Abstract: {abstract}")
+    return "\n".join(lines)
+def work_to_bibtex(work: "Work") -> str:
+    """Convert a Work to BibTeX format.
+    Args:
+        work: Work object to convert
+    Returns:
+        BibTeX entry string
+    """
+    key = _sanitize_bibtex_key(work.doi) if work.doi else "unknown"
+    work_type = work.type or "article"
+    # Map CrossRef types to BibTeX types
+    bibtex_type_map = {
+        "journal-article": "article",
+        "book-chapter": "incollection",
+        "book": "book",
+        "proceedings-article": "inproceedings",
+        "dissertation": "phdthesis",
+        "report": "techreport",
+    }
+    bibtex_type = bibtex_type_map.get(work_type, "misc")
+    lines = [f"@{bibtex_type}{{{key},"]
+    if work.title:
+        lines.append(f"  title = {{{_escape_bibtex(work.title)}}},")
+    if work.authors:
+        authors = " and ".join(work.authors)
+        lines.append(f"  author = {{{_escape_bibtex(authors)}}},")
+    if work.year:
+        lines.append(f"  year = {{{work.year}}},")
+    if work.journal:
+        lines.append(f"  journal = {{{_escape_bibtex(work.journal)}}},")
+    if work.volume:
+        lines.append(f"  volume = {{{work.volume}}},")
+    if work.issue:
+        lines.append(f"  number = {{{work.issue}}},")
+    if work.page:
+        lines.append(f"  pages = {{{work.page}}},")
+    if work.publisher:
+        lines.append(f"  publisher = {{{_escape_bibtex(work.publisher)}}},")
+    if work.doi:
+        lines.append(f"  doi = {{{work.doi}}},")
+    if work.url:
+        lines.append(f"  url = {{{work.url}}},")
+    if work.issn:
+        lines.append(f"  issn = {{{work.issn}}},")
+    lines.append("}")
+    return "\n".join(lines)
+def export_text(
+    works: List["Work"],
+    include_abstract: bool = False,
+    query: Optional[str] = None,
+    total: Optional[int] = None,
+    elapsed_ms: Optional[float] = None,
+) -> str:
+    """Export works to text format.
+    Args:
+        works: List of Work objects
+        include_abstract: Whether to include abstracts
+        query: Original search query (for header)
+        total: Total number of matches
+        elapsed_ms: Search time in milliseconds
+    Returns:
+        Formatted text string
+    """
+    lines = []
+    # Header
+    if query is not None:
+        lines.append(f"Search: {query}")
+        if total is not None:
+            lines.append(f"Found: {total:,} matches")
+        if elapsed_ms is not None:
+            lines.append(f"Time: {elapsed_ms:.1f}ms")
+        lines.append("")
+        lines.append("=" * 60)
+        lines.append("")
+    # Works
+    for i, work in enumerate(works, 1):
+        lines.append(f"[{i}]")
+        lines.append(work_to_text(work, include_abstract=include_abstract))
+        lines.append("")
+        lines.append("-" * 40)
+        lines.append("")
+    return "\n".join(lines)
+def export_json(
+    works: List["Work"],
+    query: Optional[str] = None,
+    total: Optional[int] = None,
+    elapsed_ms: Optional[float] = None,
+    indent: int = 2,
+) -> str:
+    """Export works to JSON format.
+    Args:
+        works: List of Work objects
+        query: Original search query
+        total: Total number of matches
+        elapsed_ms: Search time in milliseconds
+        indent: JSON indentation
+    Returns:
+        JSON string
+    """
+    data = {
+        "works": [w.to_dict() for w in works],
+    }
+    if query is not None:
+        data["query"] = query
+    if total is not None:
+        data["total"] = total
+    if elapsed_ms is not None:
+        data["elapsed_ms"] = elapsed_ms
+    return _json.dumps(data, indent=indent, ensure_ascii=False)
+def export_bibtex(works: List["Work"]) -> str:
+    """Export works to BibTeX format.
+    Args:
+        works: List of Work objects
+    Returns:
+        BibTeX string with all entries
+    """
+    entries = [work_to_bibtex(w) for w in works]
+    return "\n\n".join(entries)
+def save(
+    data: Union["Work", "SearchResult", List["Work"]],
+    path: Union[str, _Path],
+    format: str = "json",
+    include_abstract: bool = True,
+) -> str:
+    """Save Work(s) or SearchResult to a file.
+    Args:
+        data: Work, SearchResult, or list of Works to save
+        path: Output file path
+        format: Output format ("text", "json", "bibtex")
+        include_abstract: Include abstracts in text format
+    Returns:
+        Path to saved file
+    Raises:
+        ValueError: If format is not supported
+    Examples:
+        >>> from crossref_local import search, save
+        >>> results = search("machine learning", limit=10)
+        >>> save(results, "results.json")
+        >>> save(results, "results.bib", format="bibtex")
+        >>> save(results, "results.txt", format="text")
+    """
+    from .models import SearchResult, Work
+    if format not in SUPPORTED_FORMATS:
+        raise ValueError(
+            f"Unsupported format: {format}. "
+            f"Supported formats: {', '.join(SUPPORTED_FORMATS)}"
+        )
+    path = _Path(path)
+    # Extract works and metadata
+    if isinstance(data, Work):
+        works = [data]
+        query = None
+        total = None
+        elapsed_ms = None
+    elif isinstance(data, SearchResult):
+        works = data.works
+        query = data.query
+        total = data.total
+        elapsed_ms = data.elapsed_ms
+    elif isinstance(data, list):
+        works = data
+        query = None
+        total = len(data)
+        elapsed_ms = None
+    else:
+        raise TypeError(f"Unsupported data type: {type(data)}")
+    # Generate content
+    if format == "text":
+        content = export_text(
+            works,
+            include_abstract=include_abstract,
+            query=query,
+            total=total,
+            elapsed_ms=elapsed_ms,
+        )
+    elif format == "json":
+        content = export_json(
+            works,
+            query=query,
+            total=total,
+            elapsed_ms=elapsed_ms,
+        )
+    elif format == "bibtex":
+        content = export_bibtex(works)
+    else:
+        raise ValueError(f"Unsupported format: {format}")
+    # Write to file
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(content, encoding="utf-8")
+    return str(path)

crossref_local/{fts.py → _core/fts.py} RENAMED Viewed

@@ -1,11 +1,17 @@
 """Full-text search using FTS5."""
-import re
-import time
+import re as _re
+import time as _time
 from typing import List, Optional
 from .db import Database, get_db
-from .models import Work, SearchResult
+from .models import LimitInfo, SearchResult, Work
+__all__ = [
+    "search",
+    "count",
+    "search_dois",
+]
 def _sanitize_query(query: str) -> str:
@@ -24,13 +30,13 @@ def _sanitize_query(query: str) -> str:
     # Check for problematic patterns (hyphenated words, special chars)
     # But allow explicit FTS5 operators: AND, OR, NOT, NEAR
-    has_hyphenated_word = re.search(r'\w+-\w+', query)
-    has_special = re.search(r'[/\\@#$%^&]', query)
+    has_hyphenated_word = _re.search(r"\w+-\w+", query)
+    has_special = _re.search(r"[/\\@#$%^&]", query)
     if has_hyphenated_word or has_special:
         # Quote each word to treat as literal
         words = query.split()
-        quoted = ' '.join(f'"{w}"' for w in words)
+        quoted = " ".join(f'"{w}"' for w in words)
         return quoted
     return query
@@ -65,15 +71,14 @@ def search(
     if db is None:
         db = get_db()
-    start = time.perf_counter()
+    start = _time.perf_counter()
     # Sanitize query for FTS5
     safe_query = _sanitize_query(query)
     # Get total count
     count_row = db.fetchone(
-        "SELECT COUNT(*) as total FROM works_fts WHERE works_fts MATCH ?",
-        (safe_query,)
+        "SELECT COUNT(*) as total FROM works_fts WHERE works_fts MATCH ?", (safe_query,)
     )
     total = count_row["total"] if count_row else 0
@@ -86,10 +91,10 @@ def search(
         WHERE works_fts MATCH ?
         LIMIT ? OFFSET ?
         """,
-        (safe_query, limit, offset)
+        (safe_query, limit, offset),
     )
-    elapsed_ms = (time.perf_counter() - start) * 1000
+    elapsed_ms = (_time.perf_counter() - start) * 1000
     # Convert to Work objects
     works = []
@@ -97,11 +102,30 @@ def search(
         metadata = db._decompress_metadata(row["metadata"])
         works.append(Work.from_metadata(row["doi"], metadata))
+    # Build limit info
+    returned = len(works)
+    capped = returned < total and returned == limit
+    capped_reason = None
+    if capped:
+        capped_reason = (
+            f"crossref-local: Limited to {limit} results (total available: {total})"
+        )
+    limit_info = LimitInfo(
+        requested=limit,
+        returned=returned,
+        total_available=total,
+        capped=capped,
+        capped_reason=capped_reason,
+        stage="crossref-local",
+    )
     return SearchResult(
         works=works,
         total=total,
         query=query,
         elapsed_ms=elapsed_ms,
+        limit_info=limit_info,
     )
@@ -121,8 +145,7 @@ def count(query: str, db: Optional[Database] = None) -> int:
     safe_query = _sanitize_query(query)
     row = db.fetchone(
-        "SELECT COUNT(*) as total FROM works_fts WHERE works_fts MATCH ?",
-        (safe_query,)
+        "SELECT COUNT(*) as total FROM works_fts WHERE works_fts MATCH ?", (safe_query,)
     )
     return row["total"] if row else 0
@@ -155,7 +178,7 @@ def search_dois(
         WHERE works_fts MATCH ?
         LIMIT ?
         """,
-        (safe_query, limit)
+        (safe_query, limit),
     )
     return [row["doi"] for row in rows]

crossref_local/{models.py → _core/models.py} RENAMED Viewed

@@ -1,11 +1,17 @@
 """Data models for crossref_local."""
-from dataclasses import dataclass, field
+from dataclasses import dataclass as _dataclass
+from dataclasses import field as _field
 from typing import List, Optional
-import json
+__all__ = [
+    "Work",
+    "SearchResult",
+    "LimitInfo",
+]
-@dataclass
+@_dataclass
 class Work:
     """
     Represents a scholarly work from CrossRef.
@@ -30,7 +36,7 @@ class Work:
     doi: str
     title: Optional[str] = None
-    authors: List[str] = field(default_factory=list)
+    authors: List[str] = _field(default_factory=list)
     year: Optional[int] = None
     journal: Optional[str] = None
     issn: Optional[str] = None
@@ -42,7 +48,9 @@ class Work:
     abstract: Optional[str] = None
     url: Optional[str] = None
     citation_count: Optional[int] = None
-    references: List[str] = field(default_factory=list)
+    references: List[str] = _field(default_factory=list)
+    impact_factor: Optional[float] = None
+    impact_factor_source: Optional[str] = None
     @classmethod
     def from_metadata(cls, doi: str, metadata: dict) -> "Work":
@@ -125,6 +133,8 @@ class Work:
             "url": self.url,
             "citation_count": self.citation_count,
             "references": self.references,
+            "impact_factor": self.impact_factor,
+            "impact_factor_source": self.impact_factor_source,
         }
     def citation(self, style: str = "apa") -> str:
@@ -158,8 +168,86 @@ class Work:
         return ". ".join(filter(None, parts))
+    def to_text(self, include_abstract: bool = False) -> str:
+        """
+        Format as human-readable text.
+        Args:
+            include_abstract: Include abstract in output
+        Returns:
+            Formatted text string
+        """
+        from .export import work_to_text
+        return work_to_text(self, include_abstract=include_abstract)
+    def to_bibtex(self) -> str:
+        """
+        Format as BibTeX entry.
+        Returns:
+            BibTeX string
+        """
+        from .export import work_to_bibtex
+        return work_to_bibtex(self)
+    def save(self, path: str, format: str = "json") -> str:
+        """
+        Save work to file.
+        Args:
+            path: Output file path
+            format: Output format ("text", "json", "bibtex")
+        Returns:
+            Path to saved file
+        Examples:
+            >>> work = get("10.1038/nature12373")
+            >>> work.save("paper.json")
+            >>> work.save("paper.bib", format="bibtex")
+        """
+        from .export import save
+        return save(self, path, format=format)
-@dataclass
+@_dataclass
+class LimitInfo:
+    """
+    Information about result limiting at each stage.
+    Attributes:
+        requested: Number of results requested
+        returned: Number of results actually returned
+        total_available: Total matches in database
+        capped: Whether results were capped
+        capped_reason: Why results were capped (if applicable)
+        stage: Which stage applied this limit (e.g., "crossref-local", "scitex", "django")
+    """
+    requested: int
+    returned: int
+    total_available: int
+    capped: bool = False
+    capped_reason: Optional[str] = None
+    stage: str = "crossref-local"
+    def to_dict(self) -> dict:
+        """Convert to dictionary."""
+        return {
+            "requested": self.requested,
+            "returned": self.returned,
+            "total_available": self.total_available,
+            "capped": self.capped,
+            "capped_reason": self.capped_reason,
+            "stage": self.stage,
+        }
+@_dataclass
 class SearchResult:
     """
     Container for search results with metadata.
@@ -169,12 +257,14 @@ class SearchResult:
         total: Total number of matches
         query: Original search query
         elapsed_ms: Search time in milliseconds
+        limit_info: Information about result limiting
     """
     works: List[Work]
     total: int
     query: str
     elapsed_ms: float
+    limit_info: Optional[LimitInfo] = None
     def __len__(self) -> int:
         return len(self.works)
@@ -184,3 +274,27 @@ class SearchResult:
     def __getitem__(self, idx):
         return self.works[idx]
+    def save(
+        self, path: str, format: str = "json", include_abstract: bool = True
+    ) -> str:
+        """
+        Save search results to file.
+        Args:
+            path: Output file path
+            format: Output format ("text", "json", "bibtex")
+            include_abstract: Include abstracts in text format
+        Returns:
+            Path to saved file
+        Examples:
+            >>> results = search("machine learning", limit=10)
+            >>> results.save("results.json")
+            >>> results.save("results.bib", format="bibtex")
+            >>> results.save("results.txt", format="text")
+        """
+        from .export import save
+        return save(self, path, format=format, include_abstract=include_abstract)

crossref_local/_remote/__init__.py ADDED Viewed

@@ -0,0 +1,56 @@
+"""Remote API client package with collection support.
+Provides RemoteClient for connecting to CrossRef Local API server.
+"""
+from typing import Optional
+from .base import (
+    RemoteClient as _BaseClient,
+    DEFAULT_API_URL,
+)
+from .collections import CollectionsMixin
+class RemoteClient(CollectionsMixin, _BaseClient):
+    """Remote client with collection support.
+    Extends base RemoteClient with collection management methods.
+    Example:
+        >>> client = RemoteClient("http://localhost:31291")
+        >>> # Create a collection
+        >>> client.create_collection("epilepsy", query="epilepsy seizure")
+        >>> # Query collection
+        >>> papers = client.get_collection("epilepsy", fields=["doi", "title"])
+        >>> # Download as file
+        >>> client.download_collection("epilepsy", "papers.bib", format="bibtex")
+    """
+    pass
+# Module-level client singleton
+_client: Optional[RemoteClient] = None
+def get_client(base_url: str = DEFAULT_API_URL) -> RemoteClient:
+    """Get or create singleton remote client with collection support."""
+    global _client
+    if _client is None or _client.base_url != base_url:
+        _client = RemoteClient(base_url)
+    return _client
+def reset_client() -> None:
+    """Reset singleton client."""
+    global _client
+    _client = None
+__all__ = [
+    "RemoteClient",
+    "DEFAULT_API_URL",
+    "get_client",
+    "reset_client",
+]

crossref-local 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

crossref-local 0.4.0py3-none-any.whl → 0.5.1py3-none-any.whl