pyzotero 1.7.5__py3-none-any.whl → 1.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,195 @@
1
+ """Decorator functions for Pyzotero.
2
+
3
+ These decorators handle caching, backoff, and response processing for API calls.
4
+ They are tightly coupled with the Zotero class and are internal implementation details.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import io
10
+ import zipfile
11
+ from functools import wraps
12
+ from typing import TYPE_CHECKING, Any
13
+ from urllib.parse import urlparse
14
+
15
+ import bibtexparser
16
+ import feedparser
17
+ import httpx
18
+ from httpx import Request
19
+
20
+ from ._utils import DEFAULT_TIMEOUT, build_url, get_backoff_duration
21
+ from .errors import error_handler
22
+
23
+ if TYPE_CHECKING:
24
+ from collections.abc import Callable
25
+
26
+
27
+ def cleanwrap(func: Callable) -> Callable:
28
+ """Wrap for Zotero._cleanup to process multiple items."""
29
+
30
+ @wraps(func)
31
+ def enc(self, *args, **kwargs):
32
+ """Send each item to _cleanup()."""
33
+ return (func(self, item, **kwargs) for item in args)
34
+
35
+ return enc
36
+
37
+
38
+ def tcache(func: Callable) -> Callable:
39
+ """Handle URL building and caching for template functions."""
40
+
41
+ @wraps(func)
42
+ def wrapped_f(self, *args, **kwargs):
43
+ """Call the decorated function to get query string and params,
44
+ builds URL, retrieves template, caches result, and returns template.
45
+ """
46
+ query_string, params = func(self, *args, **kwargs)
47
+ params["timeout"] = DEFAULT_TIMEOUT
48
+ r = Request(
49
+ "GET",
50
+ build_url(self.endpoint, query_string),
51
+ params=params,
52
+ )
53
+ response = self.client.send(r)
54
+
55
+ # now split up the URL
56
+ result = urlparse(str(response.url))
57
+ # construct cache key
58
+ cachekey = f"{result.path}_{result.query}"
59
+ if self.templates.get(cachekey) and not self._updated(
60
+ query_string,
61
+ self.templates[cachekey],
62
+ cachekey,
63
+ ):
64
+ return self.templates[cachekey]["tmplt"]
65
+ # otherwise perform a normal request and cache the response
66
+ retrieved = self._retrieve_data(query_string, params=params)
67
+ return self._cache(retrieved, cachekey)
68
+
69
+ return wrapped_f
70
+
71
+
72
+ def backoff_check(func: Callable) -> Callable:
73
+ """Perform backoff processing for write operations.
74
+
75
+ func must return a Requests GET / POST / PUT / PATCH / DELETE etc.
76
+ This is intercepted: we first check for an active backoff
77
+ and wait if need be.
78
+ After the response is received, we do normal error checking
79
+ and set a new backoff if necessary, before returning.
80
+
81
+ Use with functions that are intended to return True.
82
+ """
83
+
84
+ @wraps(func)
85
+ def wrapped_f(self, *args, **kwargs):
86
+ self._check_backoff()
87
+ # resp is a Requests response object
88
+ resp = func(self, *args, **kwargs)
89
+ try:
90
+ resp.raise_for_status()
91
+ except httpx.HTTPError as exc:
92
+ error_handler(self, resp, exc)
93
+ self.request = resp
94
+ backoff = get_backoff_duration(resp.headers)
95
+ if backoff:
96
+ self._set_backoff(backoff)
97
+
98
+ return True
99
+
100
+ return wrapped_f
101
+
102
+
103
+ def retrieve(func: Callable) -> Callable:
104
+ """Call _retrieve_data() and pass the result to the correct processor."""
105
+
106
+ @wraps(func)
107
+ def wrapped_f(self, *args, **kwargs) -> Any:
108
+ """Return result of _retrieve_data().
109
+
110
+ func's return value is part of a URI, and it's this
111
+ which is intercepted and passed to _retrieve_data:
112
+ '/users/123/items?key=abc123'
113
+ """
114
+ if kwargs:
115
+ self.add_parameters(**kwargs)
116
+ retrieved = self._retrieve_data(func(self, *args))
117
+ # we now always have links in the header response
118
+ self.links = self._extract_links()
119
+ # determine content and format, based on url params
120
+ content = (
121
+ self.content.search(str(self.request.url))
122
+ and self.content.search(str(self.request.url)).group(0)
123
+ ) or "bib"
124
+ # select format, or assume JSON
125
+ content_type_header = self.request.headers["Content-Type"].lower() + ";"
126
+ fmt = self.formats.get(
127
+ # strip "; charset=..." segment
128
+ content_type_header[0 : content_type_header.index(";")],
129
+ "json",
130
+ )
131
+ # clear all query parameters
132
+ self.url_params = None
133
+ # Zotero API returns plain-text attachments as zipped content
134
+ # We can inspect the redirect header to check whether Zotero compressed the file
135
+ if fmt == "zip":
136
+ if (
137
+ self.request.history
138
+ and self.request.history[0].headers.get("Zotero-File-Compressed")
139
+ == "Yes"
140
+ ):
141
+ z = zipfile.ZipFile(io.BytesIO(retrieved.content))
142
+ namelist = z.namelist()
143
+ file = z.read(namelist[0])
144
+ else:
145
+ file = retrieved.content
146
+ return file
147
+ # check to see whether it's tag data
148
+ if "tags" in str(self.request.url):
149
+ self.tag_data = False
150
+ return self._tags_data(retrieved.json())
151
+ if fmt == "atom":
152
+ parsed = feedparser.parse(retrieved.text)
153
+ # select the correct processor
154
+ processor = self.processors.get(content)
155
+ # process the content correctly with a custom rule
156
+ return processor(parsed)
157
+ if fmt == "snapshot":
158
+ # we need to dump as a zip!
159
+ self.snapshot = True
160
+ if fmt == "bibtex":
161
+ parser = bibtexparser.bparser.BibTexParser(
162
+ common_strings=True,
163
+ ignore_nonstandard_types=False,
164
+ )
165
+ return parser.parse(retrieved.text)
166
+ # it's binary, so return raw content
167
+ if fmt != "json":
168
+ return retrieved.content
169
+ # no need to do anything special, return JSON
170
+ return retrieved.json()
171
+
172
+ return wrapped_f
173
+
174
+
175
+ def ss_wrap(func: Callable) -> Callable:
176
+ """Ensure that a SavedSearch object exists before method execution."""
177
+
178
+ def wrapper(self, *args, **kwargs):
179
+ if not self.savedsearch:
180
+ # Import here to avoid circular imports
181
+ from ._search import SavedSearch # noqa: PLC0415
182
+
183
+ self.savedsearch = SavedSearch(self)
184
+ return func(self, *args, **kwargs)
185
+
186
+ return wrapper
187
+
188
+
189
+ __all__ = [
190
+ "backoff_check",
191
+ "cleanwrap",
192
+ "retrieve",
193
+ "ss_wrap",
194
+ "tcache",
195
+ ]
pyzotero/_search.py ADDED
@@ -0,0 +1,190 @@
1
+ """Saved search functionality for Pyzotero.
2
+
3
+ This module contains the SavedSearch class for creating and managing
4
+ Zotero saved searches.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import TYPE_CHECKING
10
+
11
+ from . import errors as ze
12
+
13
+ if TYPE_CHECKING:
14
+ from ._client import Zotero
15
+
16
+
17
+ class SavedSearch:
18
+ """Saved search functionality.
19
+
20
+ See https://github.com/zotero/zotero/blob/master/chrome/content/zotero/xpcom/data/searchConditions.js
21
+ """
22
+
23
+ def __init__(self, zinstance: Zotero) -> None:
24
+ super().__init__()
25
+ self.zinstance = zinstance
26
+ self.searchkeys = ("condition", "operator", "value")
27
+ # always exclude these fields from zotero.item_keys()
28
+ self.excluded_items = (
29
+ "accessDate",
30
+ "date",
31
+ "pages",
32
+ "section",
33
+ "seriesNumber",
34
+ "issue",
35
+ )
36
+ self.operators = {
37
+ "is": "is",
38
+ "isNot": "isNot",
39
+ "beginsWith": "beginsWith",
40
+ "contains": "contains",
41
+ "doesNotContain": "doesNotContain",
42
+ "isLessThan": "isLessThan",
43
+ "isGreaterThan": "isGreaterThan",
44
+ "isBefore": "isBefore",
45
+ "isAfter": "isAfter",
46
+ "isInTheLast": "isInTheLast",
47
+ "any": "any",
48
+ "all": "all",
49
+ "true": "true",
50
+ "false": "false",
51
+ }
52
+ # common groupings of operators
53
+ self.groups = {
54
+ "A": (self.operators["true"], self.operators["false"]),
55
+ "B": (self.operators["any"], self.operators["all"]),
56
+ "C": (
57
+ self.operators["is"],
58
+ self.operators["isNot"],
59
+ self.operators["contains"],
60
+ self.operators["doesNotContain"],
61
+ ),
62
+ "D": (self.operators["is"], self.operators["isNot"]),
63
+ "E": (
64
+ self.operators["is"],
65
+ self.operators["isNot"],
66
+ self.operators["isBefore"],
67
+ self.operators["isInTheLast"],
68
+ ),
69
+ "F": (self.operators["contains"], self.operators["doesNotContain"]),
70
+ "G": (
71
+ self.operators["is"],
72
+ self.operators["isNot"],
73
+ self.operators["contains"],
74
+ self.operators["doesNotContain"],
75
+ self.operators["isLessThan"],
76
+ self.operators["isGreaterThan"],
77
+ ),
78
+ "H": (
79
+ self.operators["is"],
80
+ self.operators["isNot"],
81
+ self.operators["beginsWith"],
82
+ ),
83
+ "I": (self.operators["is"]),
84
+ }
85
+ self.conditions_operators = {
86
+ "deleted": self.groups["A"],
87
+ "noChildren": self.groups["A"],
88
+ "unfiled": self.groups["A"],
89
+ "publications": self.groups["A"],
90
+ "retracted": self.groups["A"],
91
+ "includeParentsAndChildren": self.groups["A"],
92
+ "includeParents": self.groups["A"],
93
+ "includeChildren": self.groups["A"],
94
+ "recursive": self.groups["A"],
95
+ "joinMode": self.groups["B"],
96
+ "quicksearch-titleCreatorYear": self.groups["C"],
97
+ "quicksearch-titleCreatorYearNote": self.groups["C"],
98
+ "quicksearch-fields": self.groups["C"],
99
+ "quicksearch-everything": self.groups["C"],
100
+ "collectionID": self.groups["D"],
101
+ "savedSearchID": self.groups["D"],
102
+ "collection": self.groups["D"],
103
+ "savedSearch": self.groups["D"],
104
+ "dateAdded": self.groups["E"],
105
+ "dateModified": self.groups["E"],
106
+ "itemType": self.groups["D"],
107
+ "fileTypeID": self.groups["D"],
108
+ "tagID": self.groups["D"],
109
+ "tag": self.groups["C"],
110
+ "note": self.groups["F"],
111
+ "childNote": self.groups["F"],
112
+ "creator": self.groups["C"],
113
+ "lastName": self.groups["C"],
114
+ "field": self.groups["C"],
115
+ "datefield": self.groups["E"],
116
+ "year": self.groups["C"],
117
+ "numberfield": self.groups["G"],
118
+ "libraryID": self.groups["D"],
119
+ "key": self.groups["H"],
120
+ "itemID": self.groups["D"],
121
+ "annotationText": self.groups["F"],
122
+ "annotationComment": self.groups["F"],
123
+ "fulltextWord": self.groups["F"],
124
+ "fulltextContent": self.groups["F"],
125
+ "tempTable": self.groups["I"],
126
+ }
127
+ ###########
128
+ # ALIASES #
129
+ ###########
130
+ # aliases for numberfield
131
+ pagefields = (
132
+ "pages",
133
+ "numPages",
134
+ "numberOfVolumes",
135
+ "section",
136
+ "seriesNumber",
137
+ "issue",
138
+ )
139
+ for pf in pagefields:
140
+ self.conditions_operators[pf] = self.conditions_operators.get("numberfield")
141
+ # aliases for datefield
142
+ datefields = ("accessDate", "date", "dateDue", "accepted")
143
+ for df in datefields:
144
+ self.conditions_operators[df] = self.conditions_operators.get("datefield")
145
+ # aliases for field - this makes a blocking API call unless item types have been cached
146
+ item_fields = [
147
+ itm["field"]
148
+ for itm in self.zinstance.item_fields()
149
+ if itm["field"] not in set(self.excluded_items)
150
+ ]
151
+ for itf in item_fields:
152
+ self.conditions_operators[itf] = self.conditions_operators.get("field")
153
+
154
+ def _validate(self, conditions: list[dict]) -> None:
155
+ """Validate saved search conditions.
156
+
157
+ Raises an error if any contain invalid operators.
158
+ """
159
+ allowed_keys = set(self.searchkeys)
160
+ operators_set = set(self.operators.keys())
161
+ for condition in conditions:
162
+ if set(condition.keys()) != allowed_keys:
163
+ msg = f"Keys must be all of: {', '.join(self.searchkeys)}"
164
+ raise ze.ParamNotPassedError(msg)
165
+ if condition.get("operator") not in operators_set:
166
+ msg = f"You have specified an unknown operator: {condition.get('operator')}"
167
+ raise ze.ParamNotPassedError(msg)
168
+ # dict keys of allowed operators for the current condition
169
+ permitted_operators = self.conditions_operators.get(
170
+ condition.get("condition"),
171
+ )
172
+ if permitted_operators is None:
173
+ msg = f"Unknown condition: {condition.get('condition')}"
174
+ raise ze.ParamNotPassedError(msg)
175
+ # transform these into values
176
+ permitted_operators_list = {
177
+ op_value
178
+ for op in permitted_operators
179
+ if (op_value := self.operators.get(op)) is not None
180
+ }
181
+ if condition.get("operator") not in permitted_operators_list:
182
+ msg = (
183
+ f"You may not use the '{condition.get('operator')}' operator when "
184
+ f"selecting the '{condition.get('condition')}' condition. \n"
185
+ f"Allowed operators: {', '.join(list(permitted_operators_list))}"
186
+ )
187
+ raise ze.ParamNotPassedError(msg)
188
+
189
+
190
+ __all__ = ["SavedSearch"]
pyzotero/_upload.py ADDED
@@ -0,0 +1,241 @@
1
+ """File upload functionality for Pyzotero.
2
+
3
+ This module contains the Zupload class for handling file attachments
4
+ and uploads to the Zotero API.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import hashlib
10
+ import json
11
+ import mimetypes
12
+ from pathlib import Path
13
+ from typing import TYPE_CHECKING, Any
14
+
15
+ import httpx
16
+
17
+ import pyzotero as pz
18
+
19
+ from . import errors as ze
20
+ from ._utils import build_url, get_backoff_duration, token
21
+ from .errors import error_handler
22
+
23
+ if TYPE_CHECKING:
24
+ from ._client import Zotero
25
+
26
+
27
+ class Zupload:
28
+ """Zotero file attachment helper.
29
+
30
+ Receives a Zotero instance, file(s) to upload, and optional parent ID.
31
+ """
32
+
33
+ def __init__(
34
+ self,
35
+ zinstance: Zotero,
36
+ payload: list[dict],
37
+ parentid: str | None = None,
38
+ basedir: str | Path | None = None,
39
+ ) -> None:
40
+ super().__init__()
41
+ self.zinstance = zinstance
42
+ self.payload = payload
43
+ self.parentid = parentid
44
+ if basedir is None:
45
+ self.basedir = Path()
46
+ elif isinstance(basedir, Path):
47
+ self.basedir = basedir
48
+ else:
49
+ self.basedir = Path(basedir)
50
+
51
+ def _verify(self, payload: list[dict]) -> None:
52
+ """Ensure that all files to be attached exist.
53
+
54
+ open()'s better than exists(), cos it avoids a race condition.
55
+ """
56
+ if not payload: # Check payload has nonzero length
57
+ raise ze.ParamNotPassedError
58
+ for templt in payload:
59
+ filepath = self.basedir.joinpath(templt["filename"])
60
+ try:
61
+ with filepath.open():
62
+ pass
63
+ except OSError:
64
+ msg = f"The file at {filepath!s} couldn't be opened or found."
65
+ raise ze.FileDoesNotExistError(msg) from None
66
+
67
+ def _create_prelim(self) -> dict | None:
68
+ """Step 0: Register intent to upload files."""
69
+ self._verify(self.payload)
70
+ if "key" in self.payload[0] and self.payload[0]["key"]:
71
+ if next((i for i in self.payload if "key" not in i), False):
72
+ msg = "Can't pass payload entries with and without keys to Zupload"
73
+ raise ze.UnsupportedParamsError(msg)
74
+ return None # Don't do anything if payload comes with keys
75
+ # Set contentType for each attachment if not already provided
76
+ for item in self.payload:
77
+ if not item.get("contentType"):
78
+ filepath = str(self.basedir.joinpath(item["filename"]))
79
+ detected_type = mimetypes.guess_type(filepath)[0]
80
+ item["contentType"] = detected_type or "application/octet-stream"
81
+ liblevel = "/{t}/{u}/items"
82
+ # Create one or more new attachments
83
+ headers = {"Zotero-Write-Token": token(), "Content-Type": "application/json"}
84
+ # If we have a Parent ID, add it as a parentItem
85
+ if self.parentid:
86
+ for child in self.payload:
87
+ child["parentItem"] = self.parentid
88
+ to_send = json.dumps(self.payload)
89
+ self.zinstance._check_backoff()
90
+ req = self.zinstance.client.post(
91
+ url=build_url(
92
+ self.zinstance.endpoint,
93
+ liblevel.format(
94
+ t=self.zinstance.library_type,
95
+ u=self.zinstance.library_id,
96
+ ),
97
+ ),
98
+ content=to_send,
99
+ headers=headers,
100
+ )
101
+ try:
102
+ req.raise_for_status()
103
+ except httpx.HTTPError as exc:
104
+ error_handler(self.zinstance, req, exc)
105
+ backoff = get_backoff_duration(req.headers)
106
+ if backoff:
107
+ self.zinstance._set_backoff(backoff)
108
+ data = req.json()
109
+ for k in data["success"]:
110
+ self.payload[int(k)]["key"] = data["success"][k]
111
+ return data
112
+
113
+ def _get_auth(
114
+ self, attachment: str, reg_key: str, md5: str | None = None
115
+ ) -> dict[str, Any]:
116
+ """Step 1: get upload authorisation for a file."""
117
+ mtypes = mimetypes.guess_type(attachment)
118
+ digest = hashlib.md5() # noqa: S324
119
+ with Path(attachment).open("rb") as att:
120
+ for chunk in iter(lambda: att.read(8192), b""):
121
+ digest.update(chunk)
122
+ auth_headers = {"Content-Type": "application/x-www-form-urlencoded"}
123
+ if not md5:
124
+ auth_headers["If-None-Match"] = "*"
125
+ else:
126
+ # docs specify that for existing file we use this
127
+ auth_headers["If-Match"] = md5
128
+ data = {
129
+ "md5": digest.hexdigest(),
130
+ "filename": Path(attachment).name,
131
+ "filesize": Path(attachment).stat().st_size,
132
+ "mtime": str(int(Path(attachment).stat().st_mtime * 1000)),
133
+ "contentType": mtypes[0] or "application/octet-stream",
134
+ "charset": mtypes[1],
135
+ "params": 1,
136
+ }
137
+ self.zinstance._check_backoff()
138
+ auth_req = self.zinstance.client.post(
139
+ url=build_url(
140
+ self.zinstance.endpoint,
141
+ f"/{self.zinstance.library_type}/{self.zinstance.library_id}/items/{reg_key}/file",
142
+ ),
143
+ data=data,
144
+ headers=auth_headers,
145
+ )
146
+ try:
147
+ auth_req.raise_for_status()
148
+ except httpx.HTTPError as exc:
149
+ error_handler(self.zinstance, auth_req, exc)
150
+ backoff = get_backoff_duration(auth_req.headers)
151
+ if backoff:
152
+ self.zinstance._set_backoff(backoff)
153
+ return auth_req.json()
154
+
155
+ def _upload_file(
156
+ self, authdata: dict[str, Any], attachment: str, reg_key: str
157
+ ) -> None:
158
+ """Step 2: auth successful, and file not on server.
159
+
160
+ See zotero.org/support/dev/server_api/file_upload#a_full_upload
161
+
162
+ reg_key isn't used, but we need to pass it through to Step 3.
163
+ """
164
+ upload_dict = authdata["params"]
165
+ # pass tuple of tuples (not dict!), to ensure key comes first
166
+ upload_list = [("key", upload_dict.pop("key"))]
167
+ for key, value in upload_dict.items():
168
+ upload_list.append((key, value))
169
+ upload_list.append(("file", Path(attachment).open("rb").read()))
170
+ upload_pairs = tuple(upload_list)
171
+ try:
172
+ self.zinstance._check_backoff()
173
+ # We use a fresh httpx POST because we don't want our existing Pyzotero headers
174
+ # for a call to the storage upload URL (currently S3)
175
+ upload = httpx.post(
176
+ url=authdata["url"],
177
+ files=upload_pairs,
178
+ headers={"User-Agent": f"Pyzotero/{pz.__version__}"},
179
+ )
180
+ except httpx.ConnectError:
181
+ msg = "ConnectionError"
182
+ raise ze.UploadError(msg) from None
183
+ try:
184
+ upload.raise_for_status()
185
+ except httpx.HTTPError as exc:
186
+ error_handler(self.zinstance, upload, exc)
187
+ backoff = get_backoff_duration(upload.headers)
188
+ if backoff:
189
+ self.zinstance._set_backoff(backoff)
190
+ # now check the responses
191
+ return self._register_upload(authdata, reg_key)
192
+
193
+ def _register_upload(self, authdata: dict[str, Any], reg_key: str) -> None:
194
+ """Step 3: upload successful, so register it."""
195
+ reg_headers = {
196
+ "Content-Type": "application/x-www-form-urlencoded",
197
+ "If-None-Match": "*",
198
+ }
199
+ reg_data = {"upload": authdata.get("uploadKey")}
200
+ self.zinstance._check_backoff()
201
+ upload_reg = self.zinstance.client.post(
202
+ url=build_url(
203
+ self.zinstance.endpoint,
204
+ f"/{self.zinstance.library_type}/{self.zinstance.library_id}/items/{reg_key}/file",
205
+ ),
206
+ data=reg_data,
207
+ headers=reg_headers,
208
+ )
209
+ try:
210
+ upload_reg.raise_for_status()
211
+ except httpx.HTTPError as exc:
212
+ error_handler(self.zinstance, upload_reg, exc)
213
+ backoff = get_backoff_duration(upload_reg.headers)
214
+ if backoff:
215
+ self.zinstance._set_backoff(backoff)
216
+
217
+ def upload(self) -> dict[str, list]:
218
+ """File upload functionality.
219
+
220
+ Goes through upload steps 0 - 3 (private class methods), and returns
221
+ a dict noting success, failure, or unchanged
222
+ (returning the payload entries with that property as a list for each status).
223
+ """
224
+ result: dict[str, list] = {"success": [], "failure": [], "unchanged": []}
225
+ self._create_prelim()
226
+ for item in self.payload:
227
+ if "key" not in item:
228
+ result["failure"].append(item)
229
+ continue
230
+ attach = str(self.basedir.joinpath(item["filename"]))
231
+ authdata = self._get_auth(attach, item["key"], md5=item.get("md5", None))
232
+ # no need to keep going if the file exists
233
+ if authdata.get("exists"):
234
+ result["unchanged"].append(item)
235
+ continue
236
+ self._upload_file(authdata, attach, item["key"])
237
+ result["success"].append(item)
238
+ return result
239
+
240
+
241
+ __all__ = ["Zupload"]