PyPI - pyzotero - Versions diffs - 1.7.5__py3-none-any.whl → 1.8.0__py3-none-any.whl - Mend

pyzotero 1.7.5py3-none-any.whl → 1.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

pyzotero/__init__.py +60 -0
pyzotero/_client.py +1402 -0
pyzotero/_decorators.py +195 -0
pyzotero/_search.py +190 -0
pyzotero/_upload.py +241 -0
pyzotero/_utils.py +86 -0
pyzotero/cli.py +420 -1
pyzotero/errors.py +185 -0
pyzotero/filetransport.py +2 -2
pyzotero/semantic_scholar.py +441 -0
pyzotero/zotero.py +62 -2029
pyzotero/zotero_errors.py +53 -136
{pyzotero-1.7.5.dist-info → pyzotero-1.8.0.dist-info}/METADATA +3 -3
pyzotero-1.8.0.dist-info/RECORD +16 -0
pyzotero-1.7.5.dist-info/RECORD +0 -9
{pyzotero-1.7.5.dist-info → pyzotero-1.8.0.dist-info}/WHEEL +0 -0
{pyzotero-1.7.5.dist-info → pyzotero-1.8.0.dist-info}/entry_points.txt +0 -0

pyzotero/_decorators.py ADDED Viewed

@@ -0,0 +1,195 @@
+"""Decorator functions for Pyzotero.
+These decorators handle caching, backoff, and response processing for API calls.
+They are tightly coupled with the Zotero class and are internal implementation details.
+"""
+from __future__ import annotations
+import io
+import zipfile
+from functools import wraps
+from typing import TYPE_CHECKING, Any
+from urllib.parse import urlparse
+import bibtexparser
+import feedparser
+import httpx
+from httpx import Request
+from ._utils import DEFAULT_TIMEOUT, build_url, get_backoff_duration
+from .errors import error_handler
+if TYPE_CHECKING:
+    from collections.abc import Callable
+def cleanwrap(func: Callable) -> Callable:
+    """Wrap for Zotero._cleanup to process multiple items."""
+    @wraps(func)
+    def enc(self, *args, **kwargs):
+        """Send each item to _cleanup()."""
+        return (func(self, item, **kwargs) for item in args)
+    return enc
+def tcache(func: Callable) -> Callable:
+    """Handle URL building and caching for template functions."""
+    @wraps(func)
+    def wrapped_f(self, *args, **kwargs):
+        """Call the decorated function to get query string and params,
+        builds URL, retrieves template, caches result, and returns template.
+        """
+        query_string, params = func(self, *args, **kwargs)
+        params["timeout"] = DEFAULT_TIMEOUT
+        r = Request(
+            "GET",
+            build_url(self.endpoint, query_string),
+            params=params,
+        )
+        response = self.client.send(r)
+        # now split up the URL
+        result = urlparse(str(response.url))
+        # construct cache key
+        cachekey = f"{result.path}_{result.query}"
+        if self.templates.get(cachekey) and not self._updated(
+            query_string,
+            self.templates[cachekey],
+            cachekey,
+        ):
+            return self.templates[cachekey]["tmplt"]
+        # otherwise perform a normal request and cache the response
+        retrieved = self._retrieve_data(query_string, params=params)
+        return self._cache(retrieved, cachekey)
+    return wrapped_f
+def backoff_check(func: Callable) -> Callable:
+    """Perform backoff processing for write operations.
+    func must return a Requests GET / POST / PUT / PATCH / DELETE etc.
+    This is intercepted: we first check for an active backoff
+    and wait if need be.
+    After the response is received, we do normal error checking
+    and set a new backoff if necessary, before returning.
+    Use with functions that are intended to return True.
+    """
+    @wraps(func)
+    def wrapped_f(self, *args, **kwargs):
+        self._check_backoff()
+        # resp is a Requests response object
+        resp = func(self, *args, **kwargs)
+        try:
+            resp.raise_for_status()
+        except httpx.HTTPError as exc:
+            error_handler(self, resp, exc)
+        self.request = resp
+        backoff = get_backoff_duration(resp.headers)
+        if backoff:
+            self._set_backoff(backoff)
+        return True
+    return wrapped_f
+def retrieve(func: Callable) -> Callable:
+    """Call _retrieve_data() and pass the result to the correct processor."""
+    @wraps(func)
+    def wrapped_f(self, *args, **kwargs) -> Any:
+        """Return result of _retrieve_data().
+        func's return value is part of a URI, and it's this
+        which is intercepted and passed to _retrieve_data:
+        '/users/123/items?key=abc123'
+        """
+        if kwargs:
+            self.add_parameters(**kwargs)
+        retrieved = self._retrieve_data(func(self, *args))
+        # we now always have links in the header response
+        self.links = self._extract_links()
+        # determine content and format, based on url params
+        content = (
+            self.content.search(str(self.request.url))
+            and self.content.search(str(self.request.url)).group(0)
+        ) or "bib"
+        # select format, or assume JSON
+        content_type_header = self.request.headers["Content-Type"].lower() + ";"
+        fmt = self.formats.get(
+            # strip "; charset=..." segment
+            content_type_header[0 : content_type_header.index(";")],
+            "json",
+        )
+        # clear all query parameters
+        self.url_params = None
+        # Zotero API returns plain-text attachments as zipped content
+        # We can inspect the redirect header to check whether Zotero compressed the file
+        if fmt == "zip":
+            if (
+                self.request.history
+                and self.request.history[0].headers.get("Zotero-File-Compressed")
+                == "Yes"
+            ):
+                z = zipfile.ZipFile(io.BytesIO(retrieved.content))
+                namelist = z.namelist()
+                file = z.read(namelist[0])
+            else:
+                file = retrieved.content
+            return file
+        # check to see whether it's tag data
+        if "tags" in str(self.request.url):
+            self.tag_data = False
+            return self._tags_data(retrieved.json())
+        if fmt == "atom":
+            parsed = feedparser.parse(retrieved.text)
+            # select the correct processor
+            processor = self.processors.get(content)
+            # process the content correctly with a custom rule
+            return processor(parsed)
+        if fmt == "snapshot":
+            # we need to dump as a zip!
+            self.snapshot = True
+        if fmt == "bibtex":
+            parser = bibtexparser.bparser.BibTexParser(
+                common_strings=True,
+                ignore_nonstandard_types=False,
+            )
+            return parser.parse(retrieved.text)
+        # it's binary, so return raw content
+        if fmt != "json":
+            return retrieved.content
+        # no need to do anything special, return JSON
+        return retrieved.json()
+    return wrapped_f
+def ss_wrap(func: Callable) -> Callable:
+    """Ensure that a SavedSearch object exists before method execution."""
+    def wrapper(self, *args, **kwargs):
+        if not self.savedsearch:
+            # Import here to avoid circular imports
+            from ._search import SavedSearch  # noqa: PLC0415
+            self.savedsearch = SavedSearch(self)
+        return func(self, *args, **kwargs)
+    return wrapper
+__all__ = [
+    "backoff_check",
+    "cleanwrap",
+    "retrieve",
+    "ss_wrap",
+    "tcache",
+]

pyzotero/_search.py ADDED Viewed

@@ -0,0 +1,190 @@
+"""Saved search functionality for Pyzotero.
+This module contains the SavedSearch class for creating and managing
+Zotero saved searches.
+"""
+from __future__ import annotations
+from typing import TYPE_CHECKING
+from . import errors as ze
+if TYPE_CHECKING:
+    from ._client import Zotero
+class SavedSearch:
+    """Saved search functionality.
+    See https://github.com/zotero/zotero/blob/master/chrome/content/zotero/xpcom/data/searchConditions.js
+    """
+    def __init__(self, zinstance: Zotero) -> None:
+        super().__init__()
+        self.zinstance = zinstance
+        self.searchkeys = ("condition", "operator", "value")
+        # always exclude these fields from zotero.item_keys()
+        self.excluded_items = (
+            "accessDate",
+            "date",
+            "pages",
+            "section",
+            "seriesNumber",
+            "issue",
+        )
+        self.operators = {
+            "is": "is",
+            "isNot": "isNot",
+            "beginsWith": "beginsWith",
+            "contains": "contains",
+            "doesNotContain": "doesNotContain",
+            "isLessThan": "isLessThan",
+            "isGreaterThan": "isGreaterThan",
+            "isBefore": "isBefore",
+            "isAfter": "isAfter",
+            "isInTheLast": "isInTheLast",
+            "any": "any",
+            "all": "all",
+            "true": "true",
+            "false": "false",
+        }
+        # common groupings of operators
+        self.groups = {
+            "A": (self.operators["true"], self.operators["false"]),
+            "B": (self.operators["any"], self.operators["all"]),
+            "C": (
+                self.operators["is"],
+                self.operators["isNot"],
+                self.operators["contains"],
+                self.operators["doesNotContain"],
+            ),
+            "D": (self.operators["is"], self.operators["isNot"]),
+            "E": (
+                self.operators["is"],
+                self.operators["isNot"],
+                self.operators["isBefore"],
+                self.operators["isInTheLast"],
+            ),
+            "F": (self.operators["contains"], self.operators["doesNotContain"]),
+            "G": (
+                self.operators["is"],
+                self.operators["isNot"],
+                self.operators["contains"],
+                self.operators["doesNotContain"],
+                self.operators["isLessThan"],
+                self.operators["isGreaterThan"],
+            ),
+            "H": (
+                self.operators["is"],
+                self.operators["isNot"],
+                self.operators["beginsWith"],
+            ),
+            "I": (self.operators["is"]),
+        }
+        self.conditions_operators = {
+            "deleted": self.groups["A"],
+            "noChildren": self.groups["A"],
+            "unfiled": self.groups["A"],
+            "publications": self.groups["A"],
+            "retracted": self.groups["A"],
+            "includeParentsAndChildren": self.groups["A"],
+            "includeParents": self.groups["A"],
+            "includeChildren": self.groups["A"],
+            "recursive": self.groups["A"],
+            "joinMode": self.groups["B"],
+            "quicksearch-titleCreatorYear": self.groups["C"],
+            "quicksearch-titleCreatorYearNote": self.groups["C"],
+            "quicksearch-fields": self.groups["C"],
+            "quicksearch-everything": self.groups["C"],
+            "collectionID": self.groups["D"],
+            "savedSearchID": self.groups["D"],
+            "collection": self.groups["D"],
+            "savedSearch": self.groups["D"],
+            "dateAdded": self.groups["E"],
+            "dateModified": self.groups["E"],
+            "itemType": self.groups["D"],
+            "fileTypeID": self.groups["D"],
+            "tagID": self.groups["D"],
+            "tag": self.groups["C"],
+            "note": self.groups["F"],
+            "childNote": self.groups["F"],
+            "creator": self.groups["C"],
+            "lastName": self.groups["C"],
+            "field": self.groups["C"],
+            "datefield": self.groups["E"],
+            "year": self.groups["C"],
+            "numberfield": self.groups["G"],
+            "libraryID": self.groups["D"],
+            "key": self.groups["H"],
+            "itemID": self.groups["D"],
+            "annotationText": self.groups["F"],
+            "annotationComment": self.groups["F"],
+            "fulltextWord": self.groups["F"],
+            "fulltextContent": self.groups["F"],
+            "tempTable": self.groups["I"],
+        }
+        ###########
+        # ALIASES #
+        ###########
+        # aliases for numberfield
+        pagefields = (
+            "pages",
+            "numPages",
+            "numberOfVolumes",
+            "section",
+            "seriesNumber",
+            "issue",
+        )
+        for pf in pagefields:
+            self.conditions_operators[pf] = self.conditions_operators.get("numberfield")
+        # aliases for datefield
+        datefields = ("accessDate", "date", "dateDue", "accepted")
+        for df in datefields:
+            self.conditions_operators[df] = self.conditions_operators.get("datefield")
+        # aliases for field - this makes a blocking API call unless item types have been cached
+        item_fields = [
+            itm["field"]
+            for itm in self.zinstance.item_fields()
+            if itm["field"] not in set(self.excluded_items)
+        ]
+        for itf in item_fields:
+            self.conditions_operators[itf] = self.conditions_operators.get("field")
+    def _validate(self, conditions: list[dict]) -> None:
+        """Validate saved search conditions.
+        Raises an error if any contain invalid operators.
+        """
+        allowed_keys = set(self.searchkeys)
+        operators_set = set(self.operators.keys())
+        for condition in conditions:
+            if set(condition.keys()) != allowed_keys:
+                msg = f"Keys must be all of: {', '.join(self.searchkeys)}"
+                raise ze.ParamNotPassedError(msg)
+            if condition.get("operator") not in operators_set:
+                msg = f"You have specified an unknown operator: {condition.get('operator')}"
+                raise ze.ParamNotPassedError(msg)
+            # dict keys of allowed operators for the current condition
+            permitted_operators = self.conditions_operators.get(
+                condition.get("condition"),
+            )
+            if permitted_operators is None:
+                msg = f"Unknown condition: {condition.get('condition')}"
+                raise ze.ParamNotPassedError(msg)
+            # transform these into values
+            permitted_operators_list = {
+                op_value
+                for op in permitted_operators
+                if (op_value := self.operators.get(op)) is not None
+            }
+            if condition.get("operator") not in permitted_operators_list:
+                msg = (
+                    f"You may not use the '{condition.get('operator')}' operator when "
+                    f"selecting the '{condition.get('condition')}' condition. \n"
+                    f"Allowed operators: {', '.join(list(permitted_operators_list))}"
+                )
+                raise ze.ParamNotPassedError(msg)
+__all__ = ["SavedSearch"]

pyzotero/_upload.py ADDED Viewed

@@ -0,0 +1,241 @@
+"""File upload functionality for Pyzotero.
+This module contains the Zupload class for handling file attachments
+and uploads to the Zotero API.
+"""
+from __future__ import annotations
+import hashlib
+import json
+import mimetypes
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+import httpx
+import pyzotero as pz
+from . import errors as ze
+from ._utils import build_url, get_backoff_duration, token
+from .errors import error_handler
+if TYPE_CHECKING:
+    from ._client import Zotero
+class Zupload:
+    """Zotero file attachment helper.
+    Receives a Zotero instance, file(s) to upload, and optional parent ID.
+    """
+    def __init__(
+        self,
+        zinstance: Zotero,
+        payload: list[dict],
+        parentid: str | None = None,
+        basedir: str | Path | None = None,
+    ) -> None:
+        super().__init__()
+        self.zinstance = zinstance
+        self.payload = payload
+        self.parentid = parentid
+        if basedir is None:
+            self.basedir = Path()
+        elif isinstance(basedir, Path):
+            self.basedir = basedir
+        else:
+            self.basedir = Path(basedir)
+    def _verify(self, payload: list[dict]) -> None:
+        """Ensure that all files to be attached exist.
+        open()'s better than exists(), cos it avoids a race condition.
+        """
+        if not payload:  # Check payload has nonzero length
+            raise ze.ParamNotPassedError
+        for templt in payload:
+            filepath = self.basedir.joinpath(templt["filename"])
+            try:
+                with filepath.open():
+                    pass
+            except OSError:
+                msg = f"The file at {filepath!s} couldn't be opened or found."
+                raise ze.FileDoesNotExistError(msg) from None
+    def _create_prelim(self) -> dict | None:
+        """Step 0: Register intent to upload files."""
+        self._verify(self.payload)
+        if "key" in self.payload[0] and self.payload[0]["key"]:
+            if next((i for i in self.payload if "key" not in i), False):
+                msg = "Can't pass payload entries with and without keys to Zupload"
+                raise ze.UnsupportedParamsError(msg)
+            return None  # Don't do anything if payload comes with keys
+        # Set contentType for each attachment if not already provided
+        for item in self.payload:
+            if not item.get("contentType"):
+                filepath = str(self.basedir.joinpath(item["filename"]))
+                detected_type = mimetypes.guess_type(filepath)[0]
+                item["contentType"] = detected_type or "application/octet-stream"
+        liblevel = "/{t}/{u}/items"
+        # Create one or more new attachments
+        headers = {"Zotero-Write-Token": token(), "Content-Type": "application/json"}
+        # If we have a Parent ID, add it as a parentItem
+        if self.parentid:
+            for child in self.payload:
+                child["parentItem"] = self.parentid
+        to_send = json.dumps(self.payload)
+        self.zinstance._check_backoff()
+        req = self.zinstance.client.post(
+            url=build_url(
+                self.zinstance.endpoint,
+                liblevel.format(
+                    t=self.zinstance.library_type,
+                    u=self.zinstance.library_id,
+                ),
+            ),
+            content=to_send,
+            headers=headers,
+        )
+        try:
+            req.raise_for_status()
+        except httpx.HTTPError as exc:
+            error_handler(self.zinstance, req, exc)
+        backoff = get_backoff_duration(req.headers)
+        if backoff:
+            self.zinstance._set_backoff(backoff)
+        data = req.json()
+        for k in data["success"]:
+            self.payload[int(k)]["key"] = data["success"][k]
+        return data
+    def _get_auth(
+        self, attachment: str, reg_key: str, md5: str | None = None
+    ) -> dict[str, Any]:
+        """Step 1: get upload authorisation for a file."""
+        mtypes = mimetypes.guess_type(attachment)
+        digest = hashlib.md5()  # noqa: S324
+        with Path(attachment).open("rb") as att:
+            for chunk in iter(lambda: att.read(8192), b""):
+                digest.update(chunk)
+        auth_headers = {"Content-Type": "application/x-www-form-urlencoded"}
+        if not md5:
+            auth_headers["If-None-Match"] = "*"
+        else:
+            # docs specify that for existing file we use this
+            auth_headers["If-Match"] = md5
+        data = {
+            "md5": digest.hexdigest(),
+            "filename": Path(attachment).name,
+            "filesize": Path(attachment).stat().st_size,
+            "mtime": str(int(Path(attachment).stat().st_mtime * 1000)),
+            "contentType": mtypes[0] or "application/octet-stream",
+            "charset": mtypes[1],
+            "params": 1,
+        }
+        self.zinstance._check_backoff()
+        auth_req = self.zinstance.client.post(
+            url=build_url(
+                self.zinstance.endpoint,
+                f"/{self.zinstance.library_type}/{self.zinstance.library_id}/items/{reg_key}/file",
+            ),
+            data=data,
+            headers=auth_headers,
+        )
+        try:
+            auth_req.raise_for_status()
+        except httpx.HTTPError as exc:
+            error_handler(self.zinstance, auth_req, exc)
+        backoff = get_backoff_duration(auth_req.headers)
+        if backoff:
+            self.zinstance._set_backoff(backoff)
+        return auth_req.json()
+    def _upload_file(
+        self, authdata: dict[str, Any], attachment: str, reg_key: str
+    ) -> None:
+        """Step 2: auth successful, and file not on server.
+        See zotero.org/support/dev/server_api/file_upload#a_full_upload
+        reg_key isn't used, but we need to pass it through to Step 3.
+        """
+        upload_dict = authdata["params"]
+        # pass tuple of tuples (not dict!), to ensure key comes first
+        upload_list = [("key", upload_dict.pop("key"))]
+        for key, value in upload_dict.items():
+            upload_list.append((key, value))
+        upload_list.append(("file", Path(attachment).open("rb").read()))
+        upload_pairs = tuple(upload_list)
+        try:
+            self.zinstance._check_backoff()
+            # We use a fresh httpx POST because we don't want our existing Pyzotero headers
+            # for a call to the storage upload URL (currently S3)
+            upload = httpx.post(
+                url=authdata["url"],
+                files=upload_pairs,
+                headers={"User-Agent": f"Pyzotero/{pz.__version__}"},
+            )
+        except httpx.ConnectError:
+            msg = "ConnectionError"
+            raise ze.UploadError(msg) from None
+        try:
+            upload.raise_for_status()
+        except httpx.HTTPError as exc:
+            error_handler(self.zinstance, upload, exc)
+        backoff = get_backoff_duration(upload.headers)
+        if backoff:
+            self.zinstance._set_backoff(backoff)
+        # now check the responses
+        return self._register_upload(authdata, reg_key)
+    def _register_upload(self, authdata: dict[str, Any], reg_key: str) -> None:
+        """Step 3: upload successful, so register it."""
+        reg_headers = {
+            "Content-Type": "application/x-www-form-urlencoded",
+            "If-None-Match": "*",
+        }
+        reg_data = {"upload": authdata.get("uploadKey")}
+        self.zinstance._check_backoff()
+        upload_reg = self.zinstance.client.post(
+            url=build_url(
+                self.zinstance.endpoint,
+                f"/{self.zinstance.library_type}/{self.zinstance.library_id}/items/{reg_key}/file",
+            ),
+            data=reg_data,
+            headers=reg_headers,
+        )
+        try:
+            upload_reg.raise_for_status()
+        except httpx.HTTPError as exc:
+            error_handler(self.zinstance, upload_reg, exc)
+        backoff = get_backoff_duration(upload_reg.headers)
+        if backoff:
+            self.zinstance._set_backoff(backoff)
+    def upload(self) -> dict[str, list]:
+        """File upload functionality.
+        Goes through upload steps 0 - 3 (private class methods), and returns
+        a dict noting success, failure, or unchanged
+        (returning the payload entries with that property as a list for each status).
+        """
+        result: dict[str, list] = {"success": [], "failure": [], "unchanged": []}
+        self._create_prelim()
+        for item in self.payload:
+            if "key" not in item:
+                result["failure"].append(item)
+                continue
+            attach = str(self.basedir.joinpath(item["filename"]))
+            authdata = self._get_auth(attach, item["key"], md5=item.get("md5", None))
+            # no need to keep going if the file exists
+            if authdata.get("exists"):
+                result["unchanged"].append(item)
+                continue
+            self._upload_file(authdata, attach, item["key"])
+            result["success"].append(item)
+        return result
+__all__ = ["Zupload"]

pyzotero 1.7.5__py3-none-any.whl → 1.8.0__py3-none-any.whl

pyzotero 1.7.5py3-none-any.whl → 1.8.0py3-none-any.whl