PyPI - pyzotero - Versions diffs - 1.7.6__py3-none-any.whl → 1.8.0__py3-none-any.whl - Mend

pyzotero 1.7.6py3-none-any.whl → 1.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

pyzotero/__init__.py +60 -0
pyzotero/_client.py +1402 -0
pyzotero/_decorators.py +195 -0
pyzotero/_search.py +190 -0
pyzotero/_upload.py +241 -0
pyzotero/_utils.py +86 -0
pyzotero/cli.py +420 -1
pyzotero/errors.py +185 -0
pyzotero/filetransport.py +2 -2
pyzotero/semantic_scholar.py +441 -0
pyzotero/zotero.py +62 -2035
pyzotero/zotero_errors.py +53 -136
{pyzotero-1.7.6.dist-info → pyzotero-1.8.0.dist-info}/METADATA +3 -3
pyzotero-1.8.0.dist-info/RECORD +16 -0
pyzotero-1.7.6.dist-info/RECORD +0 -9
{pyzotero-1.7.6.dist-info → pyzotero-1.8.0.dist-info}/WHEEL +0 -0
{pyzotero-1.7.6.dist-info → pyzotero-1.8.0.dist-info}/entry_points.txt +0 -0

pyzotero/_utils.py ADDED Viewed

@@ -0,0 +1,86 @@
+"""Utility functions for Pyzotero.
+This module contains helper functions used throughout the library.
+"""
+from __future__ import annotations
+import uuid
+from collections.abc import Iterator
+from pathlib import PurePosixPath
+from typing import TypeVar
+from urllib.parse import parse_qs, urlencode, urlparse, urlunparse
+# Avoid hanging the application if there's no server response
+DEFAULT_TIMEOUT = 30
+ONE_HOUR = 3600
+DEFAULT_NUM_ITEMS = 50
+DEFAULT_ITEM_LIMIT = 100
+T = TypeVar("T")
+def build_url(base_url: str, path: str, args_dict: dict | None = None) -> str:
+    """Build a valid URL from base, path, and optional query parameters.
+    This avoids string concatenation errors and leading/trailing slash issues.
+    """
+    base_url = base_url.removesuffix("/")
+    parsed = urlparse(base_url)
+    new_path = str(PurePosixPath(parsed.path) / path.removeprefix("/"))
+    if args_dict:
+        return urlunparse(parsed._replace(path=new_path, query=urlencode(args_dict)))
+    return urlunparse(parsed._replace(path=new_path))
+def merge_params(url: str, params: dict) -> tuple[str, dict]:
+    """Strip query parameters from URL and merge with provided params.
+    Returns a tuple of (base_url, merged_params).
+    """
+    parsed = urlparse(url)
+    # Extract query parameters from URL
+    incoming = parse_qs(parsed.query)
+    incoming = {k: v[0] for k, v in incoming.items()}
+    # Create new params dict by merging
+    merged = {**incoming, **params}
+    # Get base URL by zeroing out the query component
+    base_url = urlunparse(parsed._replace(query=""))
+    return base_url, merged
+def token() -> str:
+    """Return a unique 32-char write-token."""
+    return str(uuid.uuid4().hex)
+def chunks(iterable: list[T], n: int) -> Iterator[list[T]]:
+    """Yield successive n-sized chunks from an iterable."""
+    for i in range(0, len(iterable), n):
+        yield iterable[i : i + n]
+def get_backoff_duration(headers) -> str | None:
+    """Extract backoff duration from response headers.
+    The Zotero API may return backoff instructions via either the
+    'Backoff' or 'Retry-After' header.
+    """
+    return headers.get("backoff") or headers.get("retry-after")
+__all__ = [
+    "DEFAULT_ITEM_LIMIT",
+    "DEFAULT_NUM_ITEMS",
+    "DEFAULT_TIMEOUT",
+    "ONE_HOUR",
+    "build_url",
+    "chunks",
+    "get_backoff_duration",
+    "merge_params",
+    "token",
+]

pyzotero/cli.py CHANGED Viewed

@@ -3,10 +3,20 @@
 import json
 import sys
-import click
+import click  # ty:ignore[unresolved-import]
 import httpx
 from pyzotero import __version__, zotero
+from pyzotero.semantic_scholar import (
+    PaperNotFoundError,
+    RateLimitError,
+    SemanticScholarError,
+    filter_by_citations,
+    get_citations,
+    get_recommendations,
+    get_references,
+    search_papers,
+)
 from pyzotero.zotero import chunks
@@ -513,5 +523,414 @@ def alldoi(ctx, dois, output_json):  # noqa: PLR0912
         sys.exit(1)
+def _build_doi_index(zot):
+    """Build a mapping of normalised DOIs to Zotero item keys.
+    Returns:
+        Dict mapping normalised DOIs to item keys
+    """
+    doi_map = {}
+    all_items = zot.everything(zot.items())
+    for item in all_items:
+        data = item.get("data", {})
+        item_doi = data.get("DOI", "")
+        if item_doi:
+            normalised_doi = _normalize_doi(item_doi)
+            item_key = data.get("key", "")
+            if normalised_doi and item_key:
+                doi_map[normalised_doi] = item_key
+    return doi_map
+def _format_s2_paper(paper, in_library=None):
+    """Format a Semantic Scholar paper for output.
+    Args:
+        paper: Normalised paper dict from semantic_scholar module
+        in_library: Boolean indicating if paper is in local Zotero
+    Returns:
+        Formatted dict for output
+    """
+    result = {
+        "paperId": paper.get("paperId"),
+        "doi": paper.get("doi"),
+        "title": paper.get("title"),
+        "authors": [a.get("name") for a in (paper.get("authors") or [])],
+        "year": paper.get("year"),
+        "venue": paper.get("venue"),
+        "citationCount": paper.get("citationCount"),
+        "referenceCount": paper.get("referenceCount"),
+        "isOpenAccess": paper.get("isOpenAccess"),
+        "openAccessPdfUrl": paper.get("openAccessPdfUrl"),
+    }
+    if in_library is not None:
+        result["inLibrary"] = in_library
+    return result
+def _annotate_with_library(papers, doi_map):
+    """Annotate papers with in_library status based on DOI matching.
+    Args:
+        papers: List of normalised paper dicts
+        doi_map: Dict mapping normalised DOIs to Zotero item keys
+    Returns:
+        List of formatted paper dicts with inLibrary field
+    """
+    results = []
+    for paper in papers:
+        doi = paper.get("doi")
+        in_library = False
+        if doi:
+            normalised = _normalize_doi(doi)
+            in_library = normalised in doi_map
+        results.append(_format_s2_paper(paper, in_library))
+    return results
+@main.command()
+@click.option(
+    "--doi",
+    required=True,
+    help="DOI of the paper to find related papers for",
+)
+@click.option(
+    "--limit",
+    type=int,
+    default=20,
+    help="Maximum number of results to return (default: 20, max: 500)",
+)
+@click.option(
+    "--min-citations",
+    type=int,
+    default=0,
+    help="Minimum citation count filter (default: 0)",
+)
+@click.option(
+    "--check-library/--no-check-library",
+    default=True,
+    help="Check if papers exist in local Zotero (default: True)",
+)
+@click.pass_context
+def related(ctx, doi, limit, min_citations, check_library):
+    """Find papers related to a given paper using Semantic Scholar.
+    Uses SPECTER2 embeddings to find semantically similar papers.
+    Examples:
+        pyzotero related --doi "10.1038/nature12373"
+        pyzotero related --doi "10.1038/nature12373" --limit 50
+        pyzotero related --doi "10.1038/nature12373" --min-citations 100
+    """
+    try:
+        # Get recommendations from Semantic Scholar
+        click.echo(f"Fetching related papers for DOI: {doi}...", err=True)
+        result = get_recommendations(doi, id_type="doi", limit=limit)
+        papers = result.get("papers", [])
+        # Apply citation filter
+        if min_citations > 0:
+            papers = filter_by_citations(papers, min_citations)
+        if not papers:
+            click.echo(json.dumps({"count": 0, "papers": []}))
+            return
+        # Optionally annotate with library status
+        if check_library:
+            click.echo("Checking local Zotero library...", err=True)
+            locale = ctx.obj.get("locale", "en-US")
+            zot = _get_zotero_client(locale)
+            doi_map = _build_doi_index(zot)
+            output_papers = _annotate_with_library(papers, doi_map)
+        else:
+            output_papers = [_format_s2_paper(p) for p in papers]
+        click.echo(
+            json.dumps({"count": len(output_papers), "papers": output_papers}, indent=2)
+        )
+    except PaperNotFoundError:
+        click.echo("Error: Paper not found in Semantic Scholar.", err=True)
+        sys.exit(1)
+    except RateLimitError:
+        click.echo("Error: Rate limit exceeded. Please wait and try again.", err=True)
+        sys.exit(1)
+    except SemanticScholarError as e:
+        click.echo(f"Error: {e!s}", err=True)
+        sys.exit(1)
+    except Exception as e:
+        click.echo(f"Error: {e!s}", err=True)
+        sys.exit(1)
+@main.command()
+@click.option(
+    "--doi",
+    required=True,
+    help="DOI of the paper to find citations for",
+)
+@click.option(
+    "--limit",
+    type=int,
+    default=100,
+    help="Maximum number of results to return (default: 100, max: 1000)",
+)
+@click.option(
+    "--min-citations",
+    type=int,
+    default=0,
+    help="Minimum citation count filter (default: 0)",
+)
+@click.option(
+    "--check-library/--no-check-library",
+    default=True,
+    help="Check if papers exist in local Zotero (default: True)",
+)
+@click.pass_context
+def citations(ctx, doi, limit, min_citations, check_library):
+    """Find papers that cite a given paper using Semantic Scholar.
+    Examples:
+        pyzotero citations --doi "10.1038/nature12373"
+        pyzotero citations --doi "10.1038/nature12373" --limit 50
+        pyzotero citations --doi "10.1038/nature12373" --min-citations 50
+    """
+    try:
+        # Get citations from Semantic Scholar
+        click.echo(f"Fetching citations for DOI: {doi}...", err=True)
+        result = get_citations(doi, id_type="doi", limit=limit)
+        papers = result.get("papers", [])
+        # Apply citation filter
+        if min_citations > 0:
+            papers = filter_by_citations(papers, min_citations)
+        if not papers:
+            click.echo(json.dumps({"count": 0, "papers": []}))
+            return
+        # Optionally annotate with library status
+        if check_library:
+            click.echo("Checking local Zotero library...", err=True)
+            locale = ctx.obj.get("locale", "en-US")
+            zot = _get_zotero_client(locale)
+            doi_map = _build_doi_index(zot)
+            output_papers = _annotate_with_library(papers, doi_map)
+        else:
+            output_papers = [_format_s2_paper(p) for p in papers]
+        click.echo(
+            json.dumps({"count": len(output_papers), "papers": output_papers}, indent=2)
+        )
+    except PaperNotFoundError:
+        click.echo("Error: Paper not found in Semantic Scholar.", err=True)
+        sys.exit(1)
+    except RateLimitError:
+        click.echo("Error: Rate limit exceeded. Please wait and try again.", err=True)
+        sys.exit(1)
+    except SemanticScholarError as e:
+        click.echo(f"Error: {e!s}", err=True)
+        sys.exit(1)
+    except Exception as e:
+        click.echo(f"Error: {e!s}", err=True)
+        sys.exit(1)
+@main.command()
+@click.option(
+    "--doi",
+    required=True,
+    help="DOI of the paper to find references for",
+)
+@click.option(
+    "--limit",
+    type=int,
+    default=100,
+    help="Maximum number of results to return (default: 100, max: 1000)",
+)
+@click.option(
+    "--min-citations",
+    type=int,
+    default=0,
+    help="Minimum citation count filter (default: 0)",
+)
+@click.option(
+    "--check-library/--no-check-library",
+    default=True,
+    help="Check if papers exist in local Zotero (default: True)",
+)
+@click.pass_context
+def references(ctx, doi, limit, min_citations, check_library):
+    """Find papers referenced by a given paper using Semantic Scholar.
+    Examples:
+        pyzotero references --doi "10.1038/nature12373"
+        pyzotero references --doi "10.1038/nature12373" --limit 50
+        pyzotero references --doi "10.1038/nature12373" --min-citations 100
+    """
+    try:
+        # Get references from Semantic Scholar
+        click.echo(f"Fetching references for DOI: {doi}...", err=True)
+        result = get_references(doi, id_type="doi", limit=limit)
+        papers = result.get("papers", [])
+        # Apply citation filter
+        if min_citations > 0:
+            papers = filter_by_citations(papers, min_citations)
+        if not papers:
+            click.echo(json.dumps({"count": 0, "papers": []}))
+            return
+        # Optionally annotate with library status
+        if check_library:
+            click.echo("Checking local Zotero library...", err=True)
+            locale = ctx.obj.get("locale", "en-US")
+            zot = _get_zotero_client(locale)
+            doi_map = _build_doi_index(zot)
+            output_papers = _annotate_with_library(papers, doi_map)
+        else:
+            output_papers = [_format_s2_paper(p) for p in papers]
+        click.echo(
+            json.dumps({"count": len(output_papers), "papers": output_papers}, indent=2)
+        )
+    except PaperNotFoundError:
+        click.echo("Error: Paper not found in Semantic Scholar.", err=True)
+        sys.exit(1)
+    except RateLimitError:
+        click.echo("Error: Rate limit exceeded. Please wait and try again.", err=True)
+        sys.exit(1)
+    except SemanticScholarError as e:
+        click.echo(f"Error: {e!s}", err=True)
+        sys.exit(1)
+    except Exception as e:
+        click.echo(f"Error: {e!s}", err=True)
+        sys.exit(1)
+@main.command()
+@click.option(
+    "-q",
+    "--query",
+    required=True,
+    help="Search query string",
+)
+@click.option(
+    "--limit",
+    type=int,
+    default=20,
+    help="Maximum number of results to return (default: 20, max: 100)",
+)
+@click.option(
+    "--year",
+    help="Year filter (e.g., '2020', '2018-2022', '2020-')",
+)
+@click.option(
+    "--open-access/--no-open-access",
+    default=False,
+    help="Only return open access papers (default: False)",
+)
+@click.option(
+    "--sort",
+    type=click.Choice(["citations", "year"], case_sensitive=False),
+    help="Sort results by citation count or year (descending)",
+)
+@click.option(
+    "--min-citations",
+    type=int,
+    default=0,
+    help="Minimum citation count filter (default: 0)",
+)
+@click.option(
+    "--check-library/--no-check-library",
+    default=True,
+    help="Check if papers exist in local Zotero (default: True)",
+)
+@click.pass_context
+def s2search(ctx, query, limit, year, open_access, sort, min_citations, check_library):
+    """Search for papers on Semantic Scholar.
+    Search across Semantic Scholar's index of over 200M papers.
+    Examples:
+        pyzotero s2search -q "climate adaptation"
+        pyzotero s2search -q "machine learning" --year 2020-2024
+        pyzotero s2search -q "neural networks" --open-access --limit 50
+        pyzotero s2search -q "deep learning" --sort citations --min-citations 100
+    """
+    try:
+        # Search Semantic Scholar
+        click.echo(f'Searching Semantic Scholar for: "{query}"...', err=True)
+        result = search_papers(
+            query,
+            limit=limit,
+            year=year,
+            open_access_only=open_access,
+            sort=sort,
+            min_citations=min_citations,
+        )
+        papers = result.get("papers", [])
+        total = result.get("total", len(papers))
+        if not papers:
+            click.echo(json.dumps({"count": 0, "total": total, "papers": []}))
+            return
+        # Optionally annotate with library status
+        if check_library:
+            click.echo("Checking local Zotero library...", err=True)
+            locale = ctx.obj.get("locale", "en-US")
+            zot = _get_zotero_client(locale)
+            doi_map = _build_doi_index(zot)
+            output_papers = _annotate_with_library(papers, doi_map)
+        else:
+            output_papers = [_format_s2_paper(p) for p in papers]
+        click.echo(
+            json.dumps(
+                {"count": len(output_papers), "total": total, "papers": output_papers},
+                indent=2,
+            )
+        )
+    except RateLimitError:
+        click.echo("Error: Rate limit exceeded. Please wait and try again.", err=True)
+        sys.exit(1)
+    except SemanticScholarError as e:
+        click.echo(f"Error: {e!s}", err=True)
+        sys.exit(1)
+    except Exception as e:
+        click.echo(f"Error: {e!s}", err=True)
+        sys.exit(1)
 if __name__ == "__main__":
     main()

pyzotero/errors.py ADDED Viewed

@@ -0,0 +1,185 @@
+"""Exception classes and error handling for Pyzotero.
+This module defines all custom exceptions used by the library
+and the error_handler function for processing HTTP errors.
+"""
+from __future__ import annotations
+from typing import TYPE_CHECKING
+import httpx
+from ._utils import get_backoff_duration
+if TYPE_CHECKING:
+    from typing import Any
+class PyZoteroError(Exception):
+    """Generic parent exception for all Pyzotero errors."""
+class ParamNotPassedError(PyZoteroError):
+    """Raised if a parameter which is required isn't passed."""
+class CallDoesNotExistError(PyZoteroError):
+    """Raised if the specified API call doesn't exist."""
+class UnsupportedParamsError(PyZoteroError):
+    """Raised when unsupported parameters are passed."""
+class UserNotAuthorisedError(PyZoteroError):
+    """Raised when the user is not allowed to retrieve the resource."""
+class TooManyItemsError(PyZoteroError):
+    """Raised when too many items are passed to a Write API method."""
+class MissingCredentialsError(PyZoteroError):
+    """Raised when an attempt is made to create a Zotero instance
+    without providing both the user ID and the user key.
+    """
+class InvalidItemFieldsError(PyZoteroError):
+    """Raised when an attempt is made to create/update items w/invalid fields."""
+class ResourceNotFoundError(PyZoteroError):
+    """Raised when a resource (item, collection etc.) could not be found."""
+class HTTPError(PyZoteroError):
+    """Raised for miscellaneous HTTP errors."""
+class CouldNotReachURLError(PyZoteroError):
+    """Raised when we can't reach a URL."""
+class ConflictError(PyZoteroError):
+    """409 - Raised when the target library is locked."""
+class PreConditionFailedError(PyZoteroError):
+    """412 - Raised when the provided X-Zotero-Write-Token has already been
+    submitted.
+    """
+class RequestEntityTooLargeError(PyZoteroError):
+    """413 - The upload would exceed the storage quota of the library owner."""
+class PreConditionRequiredError(PyZoteroError):
+    """428 - Raised when If-Match or If-None-Match was not provided."""
+class TooManyRequestsError(PyZoteroError):
+    """429 - Raised when there are too many unfinished uploads.
+    Try again after the number of seconds specified in the Retry-After header.
+    """
+class FileDoesNotExistError(PyZoteroError):
+    """Raised when a file path to be attached can't be opened (or doesn't exist)."""
+class TooManyRetriesError(PyZoteroError):
+    """Raise after the backoff period for new requests exceeds 32s."""
+class UploadError(PyZoteroError):
+    """Raise if the connection drops during upload or some other non-HTTP error
+    code is returned.
+    """
+# Mapping of HTTP status codes to exception classes
+ERROR_CODES: dict[int, type[PyZoteroError]] = {
+    400: UnsupportedParamsError,
+    401: UserNotAuthorisedError,
+    403: UserNotAuthorisedError,
+    404: ResourceNotFoundError,
+    409: ConflictError,
+    412: PreConditionFailedError,
+    413: RequestEntityTooLargeError,
+    428: PreConditionRequiredError,
+    429: TooManyRequestsError,
+}
+def error_handler(
+    zot: Any, req: httpx.Response, exc: BaseException | None = None
+) -> None:
+    """Error handler for HTTP requests.
+    Raises appropriate exceptions based on HTTP status codes and handles
+    rate limiting with backoff.
+    Args:
+        zot: A Zotero instance (or any object with _set_backoff method)
+        req: The HTTP response object
+        exc: Optional exception that triggered this handler
+    """
+    def err_msg(req: httpx.Response) -> str:
+        """Return a nicely-formatted error message."""
+        return (
+            f"\nCode: {req.status_code}\n"
+            f"URL: {req.url!s}\n"
+            f"Method: {req.request.method}\n"
+            f"Response: {req.text}"
+        )
+    if ERROR_CODES.get(req.status_code):
+        # check to see whether its 429
+        if req.status_code == httpx.codes.TOO_MANY_REQUESTS:
+            # try to get backoff or delay duration
+            delay = get_backoff_duration(req.headers)
+            if not delay:
+                msg = (
+                    "You are being rate-limited and no backoff or retry duration "
+                    "has been received from the server. Try again later"
+                )
+                raise TooManyRetriesError(msg)
+            zot._set_backoff(delay)
+        elif not exc:
+            raise ERROR_CODES[req.status_code](err_msg(req))
+        else:
+            raise ERROR_CODES[req.status_code](err_msg(req)) from exc
+    elif not exc:
+        raise HTTPError(err_msg(req))
+    else:
+        raise HTTPError(err_msg(req)) from exc
+__all__ = [
+    "ERROR_CODES",
+    "CallDoesNotExistError",
+    "ConflictError",
+    "CouldNotReachURLError",
+    "FileDoesNotExistError",
+    "HTTPError",
+    "InvalidItemFieldsError",
+    "MissingCredentialsError",
+    "ParamNotPassedError",
+    "PreConditionFailedError",
+    "PreConditionRequiredError",
+    "PyZoteroError",
+    "RequestEntityTooLargeError",
+    "ResourceNotFoundError",
+    "TooManyItemsError",
+    "TooManyRequestsError",
+    "TooManyRetriesError",
+    "UnsupportedParamsError",
+    "UploadError",
+    "UserNotAuthorisedError",
+    "error_handler",
+]

pyzotero 1.7.6__py3-none-any.whl → 1.8.0__py3-none-any.whl

pyzotero 1.7.6py3-none-any.whl → 1.8.0py3-none-any.whl