pyzotero 1.7.6__py3-none-any.whl → 1.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyzotero/__init__.py +60 -0
- pyzotero/_client.py +1402 -0
- pyzotero/_decorators.py +195 -0
- pyzotero/_search.py +190 -0
- pyzotero/_upload.py +241 -0
- pyzotero/_utils.py +86 -0
- pyzotero/cli.py +789 -4
- pyzotero/errors.py +185 -0
- pyzotero/filetransport.py +2 -2
- pyzotero/semantic_scholar.py +441 -0
- pyzotero/zotero.py +62 -2035
- pyzotero/zotero_errors.py +53 -136
- {pyzotero-1.7.6.dist-info → pyzotero-1.9.0.dist-info}/METADATA +3 -3
- pyzotero-1.9.0.dist-info/RECORD +16 -0
- pyzotero-1.7.6.dist-info/RECORD +0 -9
- {pyzotero-1.7.6.dist-info → pyzotero-1.9.0.dist-info}/WHEEL +0 -0
- {pyzotero-1.7.6.dist-info → pyzotero-1.9.0.dist-info}/entry_points.txt +0 -0
pyzotero/zotero.py
CHANGED
|
@@ -1,2042 +1,69 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Backwards-compatible re-exports for pyzotero.zotero module.
|
|
2
2
|
|
|
3
|
-
This
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
__author__ = "Stephan Hügel"
|
|
7
|
-
__api_version__ = "3"
|
|
8
|
-
|
|
9
|
-
import copy
|
|
10
|
-
import hashlib
|
|
11
|
-
import io
|
|
12
|
-
import json
|
|
13
|
-
import mimetypes
|
|
14
|
-
import re
|
|
15
|
-
import threading
|
|
16
|
-
import time
|
|
17
|
-
import uuid
|
|
18
|
-
import zipfile
|
|
19
|
-
from collections import OrderedDict
|
|
20
|
-
from functools import wraps
|
|
21
|
-
from pathlib import Path, PurePosixPath
|
|
22
|
-
from urllib.parse import (
|
|
23
|
-
parse_qs,
|
|
24
|
-
parse_qsl,
|
|
25
|
-
quote,
|
|
26
|
-
unquote,
|
|
27
|
-
urlencode,
|
|
28
|
-
urlparse,
|
|
29
|
-
urlunparse,
|
|
30
|
-
)
|
|
31
|
-
|
|
32
|
-
import bibtexparser
|
|
33
|
-
import feedparser
|
|
34
|
-
import httpx
|
|
35
|
-
import whenever
|
|
36
|
-
from httpx import Request
|
|
37
|
-
|
|
38
|
-
import pyzotero as pz
|
|
39
|
-
|
|
40
|
-
from . import zotero_errors as ze
|
|
41
|
-
from .filetransport import Client as File_Client
|
|
42
|
-
|
|
43
|
-
# Avoid hanging the application if there's no server response
|
|
44
|
-
timeout = 30
|
|
45
|
-
|
|
46
|
-
ONE_HOUR = 3600
|
|
47
|
-
DEFAULT_NUM_ITEMS = 50
|
|
48
|
-
DEFAULT_ITEM_LIMIT = 100
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
def build_url(base_url, path, args_dict=None):
|
|
52
|
-
"""Build a valid URL so we don't have to worry about string concatenation errors and
|
|
53
|
-
leading / trailing slashes etc.
|
|
54
|
-
"""
|
|
55
|
-
base_url = base_url.removesuffix("/")
|
|
56
|
-
parsed = urlparse(base_url)
|
|
57
|
-
new_path = str(PurePosixPath(parsed.path) / path.removeprefix("/"))
|
|
58
|
-
if args_dict:
|
|
59
|
-
return urlunparse(parsed._replace(path=new_path, query=urlencode(args_dict)))
|
|
60
|
-
return urlunparse(parsed._replace(path=new_path))
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
def merge_params(url, params):
|
|
64
|
-
"""Strip query parameters, extracting them into a dict, then merging it with
|
|
65
|
-
the "params" dict, returning the truncated url and merged query params dict
|
|
66
|
-
"""
|
|
67
|
-
parsed = urlparse(url)
|
|
68
|
-
# Extract query parameters from URL
|
|
69
|
-
incoming = parse_qs(parsed.query)
|
|
70
|
-
incoming = {k: v[0] for k, v in incoming.items()}
|
|
71
|
-
|
|
72
|
-
# Create new params dict by merging
|
|
73
|
-
merged = {**incoming, **params}
|
|
74
|
-
|
|
75
|
-
# Get base URL by zeroing out the query component
|
|
76
|
-
base_url = urlunparse(parsed._replace(query=""))
|
|
77
|
-
|
|
78
|
-
return base_url, merged
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
def token():
|
|
82
|
-
"""Return a unique 32-char write-token"""
|
|
83
|
-
return str(uuid.uuid4().hex)
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
def cleanwrap(func):
|
|
87
|
-
"""Wrap for Zotero._cleanup"""
|
|
88
|
-
|
|
89
|
-
def enc(self, *args, **kwargs):
|
|
90
|
-
"""Send each item to _cleanup()"""
|
|
91
|
-
return (func(self, item, **kwargs) for item in args)
|
|
92
|
-
|
|
93
|
-
return enc
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
def chunks(iterable, n):
|
|
97
|
-
"""Yield successive n-sized chunks from l."""
|
|
98
|
-
for i in range(0, len(iterable), n):
|
|
99
|
-
yield iterable[i : i + n]
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
def tcache(func):
|
|
103
|
-
"""Take care of the URL building and caching for template functions"""
|
|
104
|
-
|
|
105
|
-
@wraps(func)
|
|
106
|
-
def wrapped_f(self, *args, **kwargs):
|
|
107
|
-
"""Call the decorated function to get query string and params,
|
|
108
|
-
builds URL, retrieves template, caches result, and returns template
|
|
109
|
-
"""
|
|
110
|
-
query_string, params = func(self, *args, **kwargs)
|
|
111
|
-
params["timeout"] = timeout
|
|
112
|
-
r = Request(
|
|
113
|
-
"GET",
|
|
114
|
-
build_url(self.endpoint, query_string),
|
|
115
|
-
params=params,
|
|
116
|
-
)
|
|
117
|
-
with httpx.Client() as client:
|
|
118
|
-
response = client.send(r)
|
|
119
|
-
|
|
120
|
-
# now split up the URL
|
|
121
|
-
result = urlparse(str(response.url))
|
|
122
|
-
# construct cache key
|
|
123
|
-
cachekey = f"{result.path}_{result.query}"
|
|
124
|
-
if self.templates.get(cachekey) and not self._updated(
|
|
125
|
-
query_string,
|
|
126
|
-
self.templates[cachekey],
|
|
127
|
-
cachekey,
|
|
128
|
-
):
|
|
129
|
-
return self.templates[cachekey]["tmplt"]
|
|
130
|
-
# otherwise perform a normal request and cache the response
|
|
131
|
-
retrieved = self._retrieve_data(query_string, params=params)
|
|
132
|
-
return self._cache(retrieved, cachekey)
|
|
133
|
-
|
|
134
|
-
return wrapped_f
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
def backoff_check(func):
|
|
138
|
-
"""Perform backoff processing
|
|
139
|
-
func must return a Requests GET / POST / PUT / PATCH / DELETE etc
|
|
140
|
-
This is is intercepted: we first check for an active backoff
|
|
141
|
-
and wait if need be.
|
|
142
|
-
After the response is received, we do normal error checking
|
|
143
|
-
and set a new backoff if necessary, before returning
|
|
144
|
-
|
|
145
|
-
Use with functions that are intended to return True
|
|
146
|
-
"""
|
|
147
|
-
|
|
148
|
-
@wraps(func)
|
|
149
|
-
def wrapped_f(self, *args, **kwargs):
|
|
150
|
-
self._check_backoff()
|
|
151
|
-
# resp is a Requests response object
|
|
152
|
-
resp = func(self, *args, **kwargs)
|
|
153
|
-
try:
|
|
154
|
-
resp.raise_for_status()
|
|
155
|
-
except httpx.HTTPError as exc:
|
|
156
|
-
error_handler(self, resp, exc)
|
|
157
|
-
self.request = resp
|
|
158
|
-
backoff = resp.headers.get("backoff") or resp.headers.get("retry-after")
|
|
159
|
-
if backoff:
|
|
160
|
-
self._set_backoff(backoff)
|
|
161
|
-
|
|
162
|
-
return True
|
|
163
|
-
|
|
164
|
-
return wrapped_f
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
def retrieve(func):
|
|
168
|
-
"""Call _retrieve_data() and passes
|
|
169
|
-
the result to the correct processor, based on a lookup
|
|
170
|
-
"""
|
|
171
|
-
|
|
172
|
-
@wraps(func)
|
|
173
|
-
def wrapped_f(self, *args, **kwargs):
|
|
174
|
-
"""Return result of _retrieve_data()
|
|
175
|
-
|
|
176
|
-
func's return value is part of a URI, and it's this
|
|
177
|
-
which is intercepted and passed to _retrieve_data:
|
|
178
|
-
'/users/123/items?key=abc123'
|
|
179
|
-
"""
|
|
180
|
-
if kwargs:
|
|
181
|
-
self.add_parameters(**kwargs)
|
|
182
|
-
retrieved = self._retrieve_data(func(self, *args))
|
|
183
|
-
# we now always have links in the header response
|
|
184
|
-
self.links = self._extract_links()
|
|
185
|
-
# determine content and format, based on url params
|
|
186
|
-
content = (
|
|
187
|
-
self.content.search(str(self.request.url))
|
|
188
|
-
and self.content.search(str(self.request.url)).group(0)
|
|
189
|
-
) or "bib"
|
|
190
|
-
# select format, or assume JSON
|
|
191
|
-
content_type_header = self.request.headers["Content-Type"].lower() + ";"
|
|
192
|
-
fmt = self.formats.get(
|
|
193
|
-
# strip "; charset=..." segment
|
|
194
|
-
content_type_header[0 : content_type_header.index(";")],
|
|
195
|
-
"json",
|
|
196
|
-
)
|
|
197
|
-
# clear all query parameters
|
|
198
|
-
self.url_params = None
|
|
199
|
-
# Zotero API returns plain-text attachments as zipped content
|
|
200
|
-
# We can inspect the redirect header to check whether Zotero compressed the file
|
|
201
|
-
if fmt == "zip":
|
|
202
|
-
if (
|
|
203
|
-
self.request.history
|
|
204
|
-
and self.request.history[0].headers.get("Zotero-File-Compressed")
|
|
205
|
-
== "Yes"
|
|
206
|
-
):
|
|
207
|
-
z = zipfile.ZipFile(io.BytesIO(retrieved.content))
|
|
208
|
-
namelist = z.namelist()
|
|
209
|
-
file = z.read(namelist[0])
|
|
210
|
-
else:
|
|
211
|
-
file = retrieved.content
|
|
212
|
-
return file
|
|
213
|
-
# check to see whether it's tag data
|
|
214
|
-
if "tags" in str(self.request.url):
|
|
215
|
-
self.tag_data = False
|
|
216
|
-
return self._tags_data(retrieved.json())
|
|
217
|
-
if fmt == "atom":
|
|
218
|
-
parsed = feedparser.parse(retrieved.text)
|
|
219
|
-
# select the correct processor
|
|
220
|
-
processor = self.processors.get(content)
|
|
221
|
-
# process the content correctly with a custom rule
|
|
222
|
-
return processor(parsed)
|
|
223
|
-
if fmt == "snapshot":
|
|
224
|
-
# we need to dump as a zip!
|
|
225
|
-
self.snapshot = True
|
|
226
|
-
if fmt == "bibtex":
|
|
227
|
-
parser = bibtexparser.bparser.BibTexParser(
|
|
228
|
-
common_strings=True,
|
|
229
|
-
ignore_nonstandard_types=False,
|
|
230
|
-
)
|
|
231
|
-
return parser.parse(retrieved.text)
|
|
232
|
-
# it's binary, so return raw content
|
|
233
|
-
if fmt != "json":
|
|
234
|
-
return retrieved.content
|
|
235
|
-
# no need to do anything special, return JSON
|
|
236
|
-
return retrieved.json()
|
|
237
|
-
|
|
238
|
-
return wrapped_f
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
def ss_wrap(func):
|
|
242
|
-
"""Ensure that a SavedSearch object exists"""
|
|
243
|
-
|
|
244
|
-
def wrapper(self, *args, **kwargs):
|
|
245
|
-
if not self.savedsearch:
|
|
246
|
-
self.savedsearch = SavedSearch(self)
|
|
247
|
-
return func(self, *args, **kwargs)
|
|
248
|
-
|
|
249
|
-
return wrapper
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
class Zotero:
|
|
253
|
-
"""Zotero API methods
|
|
254
|
-
A full list of methods can be found here:
|
|
255
|
-
http://www.zotero.org/support/dev/server_api
|
|
256
|
-
"""
|
|
257
|
-
|
|
258
|
-
def __init__(
|
|
259
|
-
self,
|
|
260
|
-
library_id=None,
|
|
261
|
-
library_type=None,
|
|
262
|
-
api_key=None,
|
|
263
|
-
preserve_json_order=False,
|
|
264
|
-
locale="en-US",
|
|
265
|
-
local=False,
|
|
266
|
-
):
|
|
267
|
-
self.client = None
|
|
268
|
-
"""Store Zotero credentials"""
|
|
269
|
-
if not local:
|
|
270
|
-
self.endpoint = "https://api.zotero.org"
|
|
271
|
-
self.local = False
|
|
272
|
-
else:
|
|
273
|
-
self.endpoint = "http://localhost:23119/api"
|
|
274
|
-
self.local = True
|
|
275
|
-
if library_id is not None and library_type:
|
|
276
|
-
self.library_id = library_id
|
|
277
|
-
# library_type determines whether query begins w. /users or /groups
|
|
278
|
-
self.library_type = library_type + "s"
|
|
279
|
-
else:
|
|
280
|
-
err = "Please provide both the library ID and the library type"
|
|
281
|
-
raise ze.MissingCredentialsError(err)
|
|
282
|
-
# api_key is not required for public individual or group libraries
|
|
283
|
-
self.api_key = api_key
|
|
284
|
-
self.preserve_json_order = preserve_json_order
|
|
285
|
-
self.locale = locale
|
|
286
|
-
self.url_params = None
|
|
287
|
-
self.tag_data = False
|
|
288
|
-
self.request = None
|
|
289
|
-
self.snapshot = False
|
|
290
|
-
self.client = httpx.Client(
|
|
291
|
-
headers=self.default_headers(),
|
|
292
|
-
follow_redirects=True,
|
|
293
|
-
)
|
|
294
|
-
# these aren't valid item fields, so never send them to the server
|
|
295
|
-
self.temp_keys = {"key", "etag", "group_id", "updated"}
|
|
296
|
-
# determine which processor to use for the parsed content
|
|
297
|
-
self.fmt = re.compile(r"(?<=format=)\w+")
|
|
298
|
-
self.content = re.compile(r"(?<=content=)\w+")
|
|
299
|
-
# JSON by default
|
|
300
|
-
self.formats = {
|
|
301
|
-
"application/atom+xml": "atom",
|
|
302
|
-
"application/x-bibtex": "bibtex",
|
|
303
|
-
"application/json": "json",
|
|
304
|
-
"text/html": "snapshot",
|
|
305
|
-
"text/plain": "plain",
|
|
306
|
-
"text/markdown": "plain",
|
|
307
|
-
"application/pdf; charset=utf-8": "pdf",
|
|
308
|
-
"application/pdf": "pdf",
|
|
309
|
-
"application/msword": "doc",
|
|
310
|
-
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
|
|
311
|
-
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
|
|
312
|
-
"application/vnd.openxmlformats-officedocument.presentationml.presentation": "pptx",
|
|
313
|
-
"application/zip": "zip",
|
|
314
|
-
"application/epub+zip": "zip",
|
|
315
|
-
"audio/mpeg": "mp3",
|
|
316
|
-
"video/mp4": "mp4",
|
|
317
|
-
"audio/x-wav": "wav",
|
|
318
|
-
"video/x-msvideo": "avi",
|
|
319
|
-
"application/octet-stream": "octet",
|
|
320
|
-
"application/x-tex": "tex",
|
|
321
|
-
"application/x-texinfo": "texinfo",
|
|
322
|
-
"image/jpeg": "jpeg",
|
|
323
|
-
"image/png": "png",
|
|
324
|
-
"image/gif": "gif",
|
|
325
|
-
"image/tiff": "tiff",
|
|
326
|
-
"application/postscript": "postscript",
|
|
327
|
-
"application/rtf": "rtf",
|
|
328
|
-
}
|
|
329
|
-
self.processors = {
|
|
330
|
-
"bib": self._bib_processor,
|
|
331
|
-
"citation": self._citation_processor,
|
|
332
|
-
"bibtex": self._bib_processor,
|
|
333
|
-
"bookmarks": self._bib_processor,
|
|
334
|
-
"coins": self._bib_processor,
|
|
335
|
-
"csljson": self._csljson_processor,
|
|
336
|
-
"mods": self._bib_processor,
|
|
337
|
-
"refer": self._bib_processor,
|
|
338
|
-
"rdf_bibliontology": self._bib_processor,
|
|
339
|
-
"rdf_dc": self._bib_processor,
|
|
340
|
-
"rdf_zotero": self._bib_processor,
|
|
341
|
-
"ris": self._bib_processor,
|
|
342
|
-
"tei": self._bib_processor,
|
|
343
|
-
"wikipedia": self._bib_processor,
|
|
344
|
-
"json": self._json_processor,
|
|
345
|
-
"html": self._bib_processor,
|
|
346
|
-
}
|
|
347
|
-
self.links = None
|
|
348
|
-
self.self_link = {}
|
|
349
|
-
self.templates = {}
|
|
350
|
-
self.savedsearch = None
|
|
351
|
-
# these are required for backoff handling
|
|
352
|
-
self.backoff = False
|
|
353
|
-
self.backoff_duration = 0.0
|
|
354
|
-
|
|
355
|
-
def __del__(self):
|
|
356
|
-
"""Remove client before cleanup"""
|
|
357
|
-
# this isn't guaranteed to run, but that's OK
|
|
358
|
-
if c := self.client:
|
|
359
|
-
c.close()
|
|
360
|
-
|
|
361
|
-
@property
|
|
362
|
-
def __version__(self):
|
|
363
|
-
"""Return the version of the pyzotero library"""
|
|
364
|
-
return pz.__version__
|
|
365
|
-
|
|
366
|
-
def _check_for_component(self, url, component):
|
|
367
|
-
"""Check a url path query fragment for a specific query parameter"""
|
|
368
|
-
return bool(parse_qs(url).get(component))
|
|
369
|
-
|
|
370
|
-
def _striplocal(self, url):
|
|
371
|
-
"""We need to remve the leading "/api" substring from urls if we're running in local mode"""
|
|
372
|
-
if self.local:
|
|
373
|
-
parsed = urlparse(url)
|
|
374
|
-
purepath = PurePosixPath(unquote(parsed.path))
|
|
375
|
-
newpath = "/".join(purepath.parts[2:])
|
|
376
|
-
replaced = parsed._replace(path="/" + newpath)
|
|
377
|
-
return urlunparse(replaced)
|
|
378
|
-
return url
|
|
379
|
-
|
|
380
|
-
def _set_backoff(self, duration):
|
|
381
|
-
"""Set a backoff
|
|
382
|
-
Spins up a timer in a background thread which resets the backoff logic
|
|
383
|
-
when it expires, then sets the time at which the backoff will expire.
|
|
384
|
-
The latter step is required so that other calls can check whether there's
|
|
385
|
-
an active backoff, because the threading.Timer method has no way
|
|
386
|
-
of returning a duration
|
|
387
|
-
"""
|
|
388
|
-
duration = float(duration)
|
|
389
|
-
self.backoff = True
|
|
390
|
-
threading.Timer(duration, self._reset_backoff).start()
|
|
391
|
-
self.backoff_duration = time.time() + duration
|
|
392
|
-
|
|
393
|
-
def _reset_backoff(self):
|
|
394
|
-
self.backoff = False
|
|
395
|
-
self.backoff_duration = 0.0
|
|
396
|
-
|
|
397
|
-
def _check_backoff(self):
|
|
398
|
-
"""Before an API call is made, we check whether there's an active backoff.
|
|
399
|
-
If there is, we check whether there's any time left on the backoff.
|
|
400
|
-
If there is, we sleep for the remainder before returning
|
|
401
|
-
"""
|
|
402
|
-
if self.backoff:
|
|
403
|
-
remainder = self.backoff_duration - time.time()
|
|
404
|
-
if remainder > 0.0:
|
|
405
|
-
time.sleep(remainder)
|
|
406
|
-
|
|
407
|
-
def default_headers(self):
|
|
408
|
-
"""It's always OK to include these headers"""
|
|
409
|
-
_headers = {
|
|
410
|
-
"User-Agent": f"Pyzotero/{pz.__version__}",
|
|
411
|
-
"Zotero-API-Version": f"{__api_version__}",
|
|
412
|
-
}
|
|
413
|
-
if self.api_key:
|
|
414
|
-
_headers["Authorization"] = f"Bearer {self.api_key}"
|
|
415
|
-
return _headers
|
|
416
|
-
|
|
417
|
-
def _cache(self, response, key):
|
|
418
|
-
"""Add a retrieved template to the cache for 304 checking
|
|
419
|
-
accepts a dict and key name, adds the retrieval time, and adds both
|
|
420
|
-
to self.templates as a new dict using the specified key
|
|
421
|
-
"""
|
|
422
|
-
# cache template and retrieval time for subsequent calls
|
|
423
|
-
try:
|
|
424
|
-
thetime = whenever.ZonedDateTime.now("Europe/London").py_datetime()
|
|
425
|
-
except AttributeError:
|
|
426
|
-
thetime = whenever.ZonedDateTime.now("Europe/London").py_datetime()
|
|
427
|
-
self.templates[key] = {"tmplt": response.json(), "updated": thetime}
|
|
428
|
-
return copy.deepcopy(response.json())
|
|
429
|
-
|
|
430
|
-
@cleanwrap
|
|
431
|
-
def _cleanup(self, to_clean, allow=()):
|
|
432
|
-
"""Remove keys we added for internal use"""
|
|
433
|
-
# this item's been retrieved from the API, we only need the 'data'
|
|
434
|
-
# entry
|
|
435
|
-
if to_clean.keys() == ["links", "library", "version", "meta", "key", "data"]:
|
|
436
|
-
to_clean = to_clean["data"]
|
|
437
|
-
return dict(
|
|
438
|
-
[
|
|
439
|
-
[k, v]
|
|
440
|
-
for k, v in list(to_clean.items())
|
|
441
|
-
if (k in allow or k not in self.temp_keys)
|
|
442
|
-
],
|
|
443
|
-
)
|
|
444
|
-
|
|
445
|
-
def _retrieve_data(self, request=None, params=None):
|
|
446
|
-
"""Retrieve Zotero items via the API
|
|
447
|
-
Combine endpoint and request to access the specific resource
|
|
448
|
-
Returns a JSON document
|
|
449
|
-
"""
|
|
450
|
-
full_url = build_url(self.endpoint, request)
|
|
451
|
-
# ensure that we wait if there's an active backoff
|
|
452
|
-
self._check_backoff()
|
|
453
|
-
# don't set locale if the url already contains it
|
|
454
|
-
# we always add a locale if it's a "standalone" or first call
|
|
455
|
-
needs_locale = not self.links or not self._check_for_component(
|
|
456
|
-
self.links.get("next"),
|
|
457
|
-
"locale",
|
|
458
|
-
)
|
|
459
|
-
if needs_locale:
|
|
460
|
-
if params:
|
|
461
|
-
params["locale"] = self.locale
|
|
462
|
-
else:
|
|
463
|
-
params = {"locale": self.locale}
|
|
464
|
-
# we now have to merge self.url_params (default params, and those supplied by the user)
|
|
465
|
-
if not params:
|
|
466
|
-
params = {}
|
|
467
|
-
if not self.url_params:
|
|
468
|
-
self.url_params = {}
|
|
469
|
-
merged_params = {**self.url_params, **params}
|
|
470
|
-
# our incoming url might be from the "links" dict, in which case it will contain url parameters.
|
|
471
|
-
# Unfortunately, httpx doesn't like to merge query paramaters in the url string and passed params
|
|
472
|
-
# so we strip the url params, combining them with our existing url_params
|
|
473
|
-
final_url, final_params = merge_params(full_url, merged_params)
|
|
474
|
-
# file URI errors are raised immediately so we have to try here
|
|
475
|
-
try:
|
|
476
|
-
self.request = self.client.get(
|
|
477
|
-
url=final_url,
|
|
478
|
-
params=final_params,
|
|
479
|
-
headers=self.default_headers(),
|
|
480
|
-
timeout=timeout,
|
|
481
|
-
)
|
|
482
|
-
self.request.encoding = "utf-8"
|
|
483
|
-
# The API doesn't return this any more, so we have to cheat
|
|
484
|
-
self.self_link = self.request.url
|
|
485
|
-
except httpx.UnsupportedProtocol:
|
|
486
|
-
# File URI handler logic
|
|
487
|
-
fc = File_Client()
|
|
488
|
-
request = fc.get(
|
|
489
|
-
url=final_url,
|
|
490
|
-
params=final_params,
|
|
491
|
-
headers=self.default_headers(),
|
|
492
|
-
timeout=timeout,
|
|
493
|
-
follow_redirects=True,
|
|
494
|
-
)
|
|
495
|
-
self.request = request
|
|
496
|
-
# since we'll be writing bytes, we need to set this to a type that will trigger the bytes processor
|
|
497
|
-
self.request.headers["Content-Type"] = "text/plain"
|
|
498
|
-
try:
|
|
499
|
-
self.request.raise_for_status()
|
|
500
|
-
except httpx.HTTPError as exc:
|
|
501
|
-
error_handler(self, self.request, exc)
|
|
502
|
-
backoff = self.request.headers.get("backoff") or self.request.headers.get(
|
|
503
|
-
"retry-after",
|
|
504
|
-
)
|
|
505
|
-
if backoff:
|
|
506
|
-
self._set_backoff(backoff)
|
|
507
|
-
return self.request
|
|
508
|
-
|
|
509
|
-
def _extract_links(self):
|
|
510
|
-
"""Extract self, first, next, last links from a request response"""
|
|
511
|
-
extracted = {}
|
|
512
|
-
try:
|
|
513
|
-
for key, value in self.request.links.items():
|
|
514
|
-
parsed = urlparse(value["url"])
|
|
515
|
-
fragment = urlunparse(("", "", parsed.path, "", parsed.query, ""))
|
|
516
|
-
extracted[key] = fragment
|
|
517
|
-
# add a 'self' link
|
|
518
|
-
parsed = urlparse(str(self.self_link))
|
|
519
|
-
# strip 'format' query parameter and rebuild query string
|
|
520
|
-
query_params = [(k, v) for k, v in parse_qsl(parsed.query) if k != "format"]
|
|
521
|
-
# rebuild url fragment with just path and query (consistent with other links)
|
|
522
|
-
extracted["self"] = urlunparse(
|
|
523
|
-
("", "", parsed.path, "", urlencode(query_params), "")
|
|
524
|
-
)
|
|
525
|
-
except KeyError:
|
|
526
|
-
# No links present, because it's a single item
|
|
527
|
-
return None
|
|
528
|
-
else:
|
|
529
|
-
return extracted
|
|
530
|
-
|
|
531
|
-
def _updated(self, url, payload, template=None):
|
|
532
|
-
"""Call to see if a template request returns 304
|
|
533
|
-
accepts:
|
|
534
|
-
- a string to combine with the API endpoint
|
|
535
|
-
- a dict of format values, in case they're required by 'url'
|
|
536
|
-
- a template name to check for
|
|
537
|
-
As per the API docs, a template less than 1 hour old is
|
|
538
|
-
assumed to be fresh, and will immediately return False if found
|
|
539
|
-
"""
|
|
540
|
-
# If the template is more than an hour old, try a 304
|
|
541
|
-
if (
|
|
542
|
-
abs(
|
|
543
|
-
whenever.ZonedDateTime.now("Europe/London").py_datetime()
|
|
544
|
-
- self.templates[template]["updated"],
|
|
545
|
-
).seconds
|
|
546
|
-
> ONE_HOUR
|
|
547
|
-
):
|
|
548
|
-
query = build_url(
|
|
549
|
-
self.endpoint,
|
|
550
|
-
url.format(u=self.library_id, t=self.library_type, **payload),
|
|
551
|
-
)
|
|
552
|
-
headers = {
|
|
553
|
-
"If-Modified-Since": payload["updated"].strftime(
|
|
554
|
-
"%a, %d %b %Y %H:%M:%S %Z",
|
|
555
|
-
),
|
|
556
|
-
}
|
|
557
|
-
# perform the request, and check whether the response returns 304
|
|
558
|
-
self._check_backoff()
|
|
559
|
-
req = self.client.get(query, headers=headers)
|
|
560
|
-
try:
|
|
561
|
-
req.raise_for_status()
|
|
562
|
-
except httpx.HTTPError as exc:
|
|
563
|
-
error_handler(self, req, exc)
|
|
564
|
-
backoff = self.request.headers.get("backoff") or self.request.headers.get(
|
|
565
|
-
"retry-after",
|
|
566
|
-
)
|
|
567
|
-
if backoff:
|
|
568
|
-
self._set_backoff(backoff)
|
|
569
|
-
return req.status_code == httpx.codes.NOT_MODIFIED
|
|
570
|
-
# Still plenty of life left in't
|
|
571
|
-
return False
|
|
572
|
-
|
|
573
|
-
def add_parameters(self, **params):
|
|
574
|
-
"""Add URL parameters.
|
|
575
|
-
|
|
576
|
-
Also ensure that only valid format/content combinations are requested
|
|
577
|
-
"""
|
|
578
|
-
# Preserve constructor-level parameters (like locale) while allowing method-level overrides
|
|
579
|
-
if self.url_params is None:
|
|
580
|
-
self.url_params = {}
|
|
581
|
-
|
|
582
|
-
# Store existing params to preserve things like locale
|
|
583
|
-
preserved_params = self.url_params.copy()
|
|
584
|
-
|
|
585
|
-
# we want JSON by default
|
|
586
|
-
if not params.get("format"):
|
|
587
|
-
params["format"] = "json"
|
|
588
|
-
# non-standard content must be retrieved as Atom
|
|
589
|
-
if params.get("content"):
|
|
590
|
-
params["format"] = "atom"
|
|
591
|
-
# TODO: rewrite format=atom, content=json request
|
|
592
|
-
if "limit" not in params or params.get("limit") == 0:
|
|
593
|
-
params["limit"] = DEFAULT_ITEM_LIMIT
|
|
594
|
-
# Need ability to request arbitrary number of results for version
|
|
595
|
-
# response
|
|
596
|
-
# -1 value is hack that works with current version
|
|
597
|
-
elif params["limit"] == -1 or params["limit"] is None:
|
|
598
|
-
del params["limit"]
|
|
599
|
-
# bib format can't have a limit
|
|
600
|
-
if params.get("format") == "bib":
|
|
601
|
-
params.pop("limit", None)
|
|
602
|
-
|
|
603
|
-
# Merge preserved params with new params (new params override existing ones)
|
|
604
|
-
self.url_params = {**preserved_params, **params}
|
|
605
|
-
|
|
606
|
-
def _build_query(self, query_string, no_params=False):
|
|
607
|
-
"""Set request parameters. Will always add the user ID if it hasn't
|
|
608
|
-
been specifically set by an API method
|
|
609
|
-
"""
|
|
610
|
-
try:
|
|
611
|
-
query = quote(query_string.format(u=self.library_id, t=self.library_type))
|
|
612
|
-
except KeyError as err:
|
|
613
|
-
errmsg = f"There's a request parameter missing: {err}"
|
|
614
|
-
raise ze.ParamNotPassedError(errmsg) from None
|
|
615
|
-
# Add the URL parameters and the user key, if necessary
|
|
616
|
-
if no_params is False and not self.url_params:
|
|
617
|
-
self.add_parameters()
|
|
618
|
-
return query
|
|
619
|
-
|
|
620
|
-
@retrieve
|
|
621
|
-
def publications(self):
|
|
622
|
-
"""Return the contents of My Publications."""
|
|
623
|
-
if self.library_type != "users":
|
|
624
|
-
msg = "This API call does not exist for group libraries"
|
|
625
|
-
raise ze.CallDoesNotExistError(
|
|
626
|
-
msg,
|
|
627
|
-
)
|
|
628
|
-
query_string = "/{t}/{u}/publications/items"
|
|
629
|
-
return self._build_query(query_string)
|
|
630
|
-
|
|
631
|
-
# The following methods are Zotero Read API calls
|
|
632
|
-
def num_items(self):
|
|
633
|
-
"""Return the total number of top-level items in the library"""
|
|
634
|
-
query = "/{t}/{u}/items/top"
|
|
635
|
-
return self._totals(query)
|
|
636
|
-
|
|
637
|
-
def count_items(self):
|
|
638
|
-
"""Return the count of all items in a group / library"""
|
|
639
|
-
query = "/{t}/{u}/items"
|
|
640
|
-
return self._totals(query)
|
|
641
|
-
|
|
642
|
-
def num_collectionitems(self, collection):
|
|
643
|
-
"""Return the total number of items in the specified collection"""
|
|
644
|
-
query = f"/{self.library_type}/{self.library_id}/collections/{collection.upper()}/items"
|
|
645
|
-
return self._totals(query)
|
|
646
|
-
|
|
647
|
-
def _totals(self, query):
|
|
648
|
-
"""General method for returning total counts"""
|
|
649
|
-
self.add_parameters(limit=1)
|
|
650
|
-
query = self._build_query(query)
|
|
651
|
-
self._retrieve_data(query)
|
|
652
|
-
self.url_params = None
|
|
653
|
-
# extract the 'total items' figure
|
|
654
|
-
return int(self.request.headers["Total-Results"])
|
|
655
|
-
|
|
656
|
-
@retrieve
|
|
657
|
-
def key_info(self, **kwargs):
|
|
658
|
-
"""Retrieve info about the permissions associated with the
|
|
659
|
-
key associated to the given Zotero instance
|
|
660
|
-
"""
|
|
661
|
-
query_string = f"/keys/{self.api_key}"
|
|
662
|
-
return self._build_query(query_string)
|
|
663
|
-
|
|
664
|
-
@retrieve
|
|
665
|
-
def items(self, **kwargs):
|
|
666
|
-
"""Get user items"""
|
|
667
|
-
query_string = "/{t}/{u}/items"
|
|
668
|
-
return self._build_query(query_string)
|
|
669
|
-
|
|
670
|
-
@retrieve
|
|
671
|
-
def settings(self, **kwargs):
|
|
672
|
-
"""Get synced user settings"""
|
|
673
|
-
query_string = "/{t}/{u}/settings"
|
|
674
|
-
return self._build_query(query_string)
|
|
675
|
-
|
|
676
|
-
@retrieve
|
|
677
|
-
def fulltext_item(self, itemkey, **kwargs):
|
|
678
|
-
"""Get full-text content for an item"""
|
|
679
|
-
query_string = (
|
|
680
|
-
f"/{self.library_type}/{self.library_id}/items/{itemkey}/fulltext"
|
|
681
|
-
)
|
|
682
|
-
return self._build_query(query_string)
|
|
683
|
-
|
|
684
|
-
@backoff_check
|
|
685
|
-
def set_fulltext(self, itemkey, payload):
|
|
686
|
-
"""Set full-text data for an item
|
|
687
|
-
<itemkey> should correspond to an existing attachment item.
|
|
688
|
-
payload should be a dict containing three keys:
|
|
689
|
-
'content': the full-text content and either
|
|
690
|
-
For text documents, 'indexedChars' and 'totalChars' OR
|
|
691
|
-
For PDFs, 'indexedPages' and 'totalPages'.
|
|
692
|
-
"""
|
|
693
|
-
headers = {}
|
|
694
|
-
headers.update({"Content-Type": "application/json"})
|
|
695
|
-
return self.client.put(
|
|
696
|
-
url=build_url(
|
|
697
|
-
self.endpoint,
|
|
698
|
-
f"/{self.library_type}/{self.library_id}/items/{itemkey}/fulltext",
|
|
699
|
-
),
|
|
700
|
-
headers=headers,
|
|
701
|
-
data=json.dumps(payload),
|
|
702
|
-
)
|
|
703
|
-
|
|
704
|
-
def new_fulltext(self, since):
|
|
705
|
-
"""Retrieve list of full-text content items and versions which are newer
|
|
706
|
-
than <since>
|
|
707
|
-
"""
|
|
708
|
-
query_string = f"/{self.library_type}/{self.library_id}/fulltext"
|
|
709
|
-
headers = {}
|
|
710
|
-
params = {"since": since}
|
|
711
|
-
self._check_backoff()
|
|
712
|
-
resp = self.client.get(
|
|
713
|
-
build_url(self.endpoint, query_string),
|
|
714
|
-
params=params,
|
|
715
|
-
headers=headers,
|
|
716
|
-
)
|
|
717
|
-
try:
|
|
718
|
-
resp.raise_for_status()
|
|
719
|
-
except httpx.HTTPError as exc:
|
|
720
|
-
error_handler(self, resp, exc)
|
|
721
|
-
backoff = self.request.headers.get("backoff") or self.request.headers.get(
|
|
722
|
-
"retry-after",
|
|
723
|
-
)
|
|
724
|
-
if backoff:
|
|
725
|
-
self._set_backoff(backoff)
|
|
726
|
-
return resp.json()
|
|
727
|
-
|
|
728
|
-
def item_versions(self, **kwargs):
|
|
729
|
-
"""Return dict associating items keys (all no limit by default) to versions.
|
|
730
|
-
Accepts a since= parameter in kwargs to limit the data to those updated since since=
|
|
731
|
-
"""
|
|
732
|
-
if "limit" not in kwargs:
|
|
733
|
-
kwargs["limit"] = None
|
|
734
|
-
kwargs["format"] = "versions"
|
|
735
|
-
return self.items(**kwargs)
|
|
736
|
-
|
|
737
|
-
def collection_versions(self, **kwargs):
|
|
738
|
-
"""Return dict associating collection keys (all no limit by default) to versions.
|
|
739
|
-
Accepts a since= parameter in kwargs to limit the data to those updated since since=
|
|
740
|
-
"""
|
|
741
|
-
if "limit" not in kwargs:
|
|
742
|
-
kwargs["limit"] = None
|
|
743
|
-
kwargs["format"] = "versions"
|
|
744
|
-
return self.collections(**kwargs)
|
|
745
|
-
|
|
746
|
-
def last_modified_version(self, **kwargs):
|
|
747
|
-
"""Get the last modified user or group library version"""
|
|
748
|
-
# This MUST be a multiple-object request, limit param notwithstanding
|
|
749
|
-
self.items(limit=1)
|
|
750
|
-
lmv = self.request.headers.get("last-modified-version", 0)
|
|
751
|
-
return int(lmv)
|
|
752
|
-
|
|
753
|
-
@retrieve
|
|
754
|
-
def top(self, **kwargs):
|
|
755
|
-
"""Get user top-level items"""
|
|
756
|
-
query_string = "/{t}/{u}/items/top"
|
|
757
|
-
return self._build_query(query_string)
|
|
3
|
+
This module maintains backwards compatibility for code that imports from
|
|
4
|
+
pyzotero.zotero. New code should import directly from pyzotero.
|
|
758
5
|
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
query_string = "/{t}/{u}/items/trash"
|
|
763
|
-
return self._build_query(query_string)
|
|
6
|
+
Example:
|
|
7
|
+
# Old style (still works)
|
|
8
|
+
from pyzotero.zotero import Zotero
|
|
764
9
|
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
"""Get saved searches"""
|
|
768
|
-
query_string = "/{t}/{u}/searches"
|
|
769
|
-
return self._build_query(query_string)
|
|
10
|
+
# New style (preferred)
|
|
11
|
+
from pyzotero import Zotero
|
|
770
12
|
|
|
771
|
-
|
|
772
|
-
def deleted(self, **kwargs):
|
|
773
|
-
"""Get all deleted items (requires since= parameter)"""
|
|
774
|
-
if "limit" not in kwargs:
|
|
775
|
-
# Currently deleted API doesn't respect limit leaving it out by
|
|
776
|
-
# default preserves compat
|
|
777
|
-
kwargs["limit"] = None
|
|
778
|
-
query_string = "/{t}/{u}/deleted"
|
|
779
|
-
return self._build_query(query_string)
|
|
780
|
-
|
|
781
|
-
@retrieve
|
|
782
|
-
def item(self, item, **kwargs):
|
|
783
|
-
"""Get a specific item"""
|
|
784
|
-
query_string = f"/{self.library_type}/{self.library_id}/items/{item.upper()}"
|
|
785
|
-
return self._build_query(query_string)
|
|
786
|
-
|
|
787
|
-
@retrieve
|
|
788
|
-
def file(self, item, **kwargs):
|
|
789
|
-
"""Get the file from a specific item"""
|
|
790
|
-
query_string = (
|
|
791
|
-
f"/{self.library_type}/{self.library_id}/items/{item.upper()}/file"
|
|
792
|
-
)
|
|
793
|
-
return self._build_query(query_string, no_params=True)
|
|
794
|
-
|
|
795
|
-
def dump(self, itemkey, filename=None, path=None):
|
|
796
|
-
"""Dump a file attachment to disk, with optional filename and path"""
|
|
797
|
-
if not filename:
|
|
798
|
-
filename = self.item(itemkey)["data"]["filename"]
|
|
799
|
-
pth = Path(path) / filename if path else Path(filename)
|
|
800
|
-
file = self.file(itemkey)
|
|
801
|
-
if self.snapshot:
|
|
802
|
-
self.snapshot = False
|
|
803
|
-
pth = pth.parent / (pth.name + ".zip")
|
|
804
|
-
with pth.open("wb") as f:
|
|
805
|
-
f.write(file)
|
|
806
|
-
|
|
807
|
-
@retrieve
|
|
808
|
-
def children(self, item, **kwargs):
|
|
809
|
-
"""Get a specific item's child items"""
|
|
810
|
-
query_string = (
|
|
811
|
-
f"/{self.library_type}/{self.library_id}/items/{item.upper()}/children"
|
|
812
|
-
)
|
|
813
|
-
return self._build_query(query_string)
|
|
814
|
-
|
|
815
|
-
@retrieve
|
|
816
|
-
def collection_items(self, collection, **kwargs):
|
|
817
|
-
"""Get a specific collection's items"""
|
|
818
|
-
query_string = f"/{self.library_type}/{self.library_id}/collections/{collection.upper()}/items"
|
|
819
|
-
return self._build_query(query_string)
|
|
820
|
-
|
|
821
|
-
@retrieve
|
|
822
|
-
def collection_items_top(self, collection, **kwargs):
|
|
823
|
-
"""Get a specific collection's top-level items"""
|
|
824
|
-
query_string = f"/{self.library_type}/{self.library_id}/collections/{collection.upper()}/items/top"
|
|
825
|
-
return self._build_query(query_string)
|
|
826
|
-
|
|
827
|
-
@retrieve
|
|
828
|
-
def collection_tags(self, collection, **kwargs):
|
|
829
|
-
"""Get a specific collection's tags"""
|
|
830
|
-
query_string = f"/{self.library_type}/{self.library_id}/collections/{collection.upper()}/tags"
|
|
831
|
-
return self._build_query(query_string)
|
|
832
|
-
|
|
833
|
-
@retrieve
|
|
834
|
-
def collection(self, collection, **kwargs):
|
|
835
|
-
"""Get user collection"""
|
|
836
|
-
query_string = (
|
|
837
|
-
f"/{self.library_type}/{self.library_id}/collections/{collection.upper()}"
|
|
838
|
-
)
|
|
839
|
-
return self._build_query(query_string)
|
|
840
|
-
|
|
841
|
-
@retrieve
|
|
842
|
-
def collections(self, **kwargs):
|
|
843
|
-
"""Get user collections"""
|
|
844
|
-
query_string = "/{t}/{u}/collections"
|
|
845
|
-
return self._build_query(query_string)
|
|
846
|
-
|
|
847
|
-
def all_collections(self, collid=None):
|
|
848
|
-
"""Retrieve all collections and subcollections. Works for top-level collections
|
|
849
|
-
or for a specific collection. Works at all collection depths.
|
|
850
|
-
"""
|
|
851
|
-
all_collections = []
|
|
852
|
-
|
|
853
|
-
def subcoll(clct):
|
|
854
|
-
"""Recursively add collections to a flat master list"""
|
|
855
|
-
all_collections.append(clct)
|
|
856
|
-
if clct["meta"].get("numCollections", 0) > 0:
|
|
857
|
-
# add collection to master list & recur with all child
|
|
858
|
-
# collections
|
|
859
|
-
[
|
|
860
|
-
subcoll(c)
|
|
861
|
-
for c in self.everything(self.collections_sub(clct["data"]["key"]))
|
|
862
|
-
]
|
|
863
|
-
|
|
864
|
-
# select all top-level collections or a specific collection and
|
|
865
|
-
# children
|
|
866
|
-
if collid:
|
|
867
|
-
toplevel = [self.collection(collid)]
|
|
868
|
-
else:
|
|
869
|
-
toplevel = self.everything(self.collections_top())
|
|
870
|
-
[subcoll(collection) for collection in toplevel]
|
|
871
|
-
return all_collections
|
|
872
|
-
|
|
873
|
-
@retrieve
|
|
874
|
-
def collections_top(self, **kwargs):
|
|
875
|
-
"""Get top-level user collections"""
|
|
876
|
-
query_string = "/{t}/{u}/collections/top"
|
|
877
|
-
return self._build_query(query_string)
|
|
878
|
-
|
|
879
|
-
@retrieve
|
|
880
|
-
def collections_sub(self, collection, **kwargs):
|
|
881
|
-
"""Get subcollections for a specific collection"""
|
|
882
|
-
query_string = f"/{self.library_type}/{self.library_id}/collections/{collection.upper()}/collections"
|
|
883
|
-
return self._build_query(query_string)
|
|
884
|
-
|
|
885
|
-
@retrieve
|
|
886
|
-
def groups(self, **kwargs):
|
|
887
|
-
"""Get user groups"""
|
|
888
|
-
query_string = "/users/{u}/groups"
|
|
889
|
-
return self._build_query(query_string)
|
|
890
|
-
|
|
891
|
-
@retrieve
|
|
892
|
-
def tags(self, **kwargs):
|
|
893
|
-
"""Get tags"""
|
|
894
|
-
query_string = "/{t}/{u}/tags"
|
|
895
|
-
self.tag_data = True
|
|
896
|
-
return self._build_query(query_string)
|
|
897
|
-
|
|
898
|
-
@retrieve
|
|
899
|
-
def item_tags(self, item, **kwargs):
|
|
900
|
-
"""Get tags for a specific item"""
|
|
901
|
-
query_string = (
|
|
902
|
-
f"/{self.library_type}/{self.library_id}/items/{item.upper()}/tags"
|
|
903
|
-
)
|
|
904
|
-
self.tag_data = True
|
|
905
|
-
return self._build_query(query_string)
|
|
906
|
-
|
|
907
|
-
def all_top(self, **kwargs):
|
|
908
|
-
"""Retrieve all top-level items"""
|
|
909
|
-
return self.everything(self.top(**kwargs))
|
|
910
|
-
|
|
911
|
-
@retrieve
|
|
912
|
-
def follow(self):
|
|
913
|
-
"""Return the result of the call to the URL in the 'Next' link"""
|
|
914
|
-
if n := self.links.get("next"):
|
|
915
|
-
return self._striplocal(n)
|
|
916
|
-
return None
|
|
917
|
-
|
|
918
|
-
def iterfollow(self):
|
|
919
|
-
"""Return generator for self.follow()"""
|
|
920
|
-
# use same criterion as self.follow()
|
|
921
|
-
while True:
|
|
922
|
-
if self.links.get("next"):
|
|
923
|
-
yield self.follow()
|
|
924
|
-
else:
|
|
925
|
-
return
|
|
926
|
-
|
|
927
|
-
def makeiter(self, func):
|
|
928
|
-
"""Return a generator of func's results"""
|
|
929
|
-
# reset the link. This results in an extra API call, yes
|
|
930
|
-
self.links["next"] = self.links["self"]
|
|
931
|
-
return self.iterfollow()
|
|
932
|
-
|
|
933
|
-
def everything(self, query):
|
|
934
|
-
"""Retrieve all items in the library for a particular query
|
|
935
|
-
This method will override the 'limit' parameter if it's been set
|
|
936
|
-
"""
|
|
937
|
-
try:
|
|
938
|
-
items = []
|
|
939
|
-
items.extend(query)
|
|
940
|
-
while self.links.get("next"):
|
|
941
|
-
items.extend(self.follow())
|
|
942
|
-
except TypeError:
|
|
943
|
-
# we have a bibliography object ughh
|
|
944
|
-
items = copy.deepcopy(query)
|
|
945
|
-
while self.links.get("next"):
|
|
946
|
-
items.entries.extend(self.follow().entries)
|
|
947
|
-
return items
|
|
948
|
-
|
|
949
|
-
def get_subset(self, subset):
|
|
950
|
-
"""Retrieve a subset of items
|
|
951
|
-
Accepts a single argument: a list of item IDs
|
|
952
|
-
"""
|
|
953
|
-
if len(subset) > DEFAULT_NUM_ITEMS:
|
|
954
|
-
err = f"You may only retrieve {DEFAULT_NUM_ITEMS} items per call"
|
|
955
|
-
raise ze.TooManyItemsError(err)
|
|
956
|
-
# remember any url parameters that have been set
|
|
957
|
-
params = self.url_params
|
|
958
|
-
retr = []
|
|
959
|
-
for itm in subset:
|
|
960
|
-
retr.append(self.item(itm))
|
|
961
|
-
self.url_params = params
|
|
962
|
-
# clean up URL params when we're finished
|
|
963
|
-
self.url_params = None
|
|
964
|
-
return retr
|
|
965
|
-
|
|
966
|
-
# The following methods process data returned by Read API calls
|
|
967
|
-
def _json_processor(self, retrieved):
|
|
968
|
-
"""Format and return data from API calls which return Items"""
|
|
969
|
-
json_kwargs = {}
|
|
970
|
-
if self.preserve_json_order:
|
|
971
|
-
json_kwargs["object_pairs_hook"] = OrderedDict
|
|
972
|
-
# send entries to _tags_data if there's no JSON
|
|
973
|
-
try:
|
|
974
|
-
items = [
|
|
975
|
-
json.loads(e["content"][0]["value"], **json_kwargs)
|
|
976
|
-
for e in retrieved.entries
|
|
977
|
-
]
|
|
978
|
-
except KeyError:
|
|
979
|
-
return self._tags_data(retrieved)
|
|
980
|
-
return items
|
|
981
|
-
|
|
982
|
-
def _csljson_processor(self, retrieved):
|
|
983
|
-
"""Return a list of dicts which are dumped CSL JSON"""
|
|
984
|
-
items = []
|
|
985
|
-
json_kwargs = {}
|
|
986
|
-
if self.preserve_json_order:
|
|
987
|
-
json_kwargs["object_pairs_hook"] = OrderedDict
|
|
988
|
-
items = [
|
|
989
|
-
json.loads(entry["content"][0]["value"], **json_kwargs)
|
|
990
|
-
for entry in retrieved.entries
|
|
991
|
-
]
|
|
992
|
-
self.url_params = None
|
|
993
|
-
return items
|
|
994
|
-
|
|
995
|
-
def _bib_processor(self, retrieved):
|
|
996
|
-
"""Return a list of strings formatted as HTML bibliography entries"""
|
|
997
|
-
items = [bib["content"][0]["value"] for bib in retrieved.entries]
|
|
998
|
-
self.url_params = None
|
|
999
|
-
return items
|
|
1000
|
-
|
|
1001
|
-
def _citation_processor(self, retrieved):
|
|
1002
|
-
"""Return a list of strings formatted as HTML citation entries"""
|
|
1003
|
-
items = [cit["content"][0]["value"] for cit in retrieved.entries]
|
|
1004
|
-
self.url_params = None
|
|
1005
|
-
return items
|
|
1006
|
-
|
|
1007
|
-
def _tags_data(self, retrieved):
|
|
1008
|
-
"""Format and return data from API calls which return Tags"""
|
|
1009
|
-
self.url_params = None
|
|
1010
|
-
return [t["tag"] for t in retrieved]
|
|
1011
|
-
|
|
1012
|
-
# The following methods are Write API calls
|
|
1013
|
-
def item_template(self, itemtype, linkmode=None):
|
|
1014
|
-
"""Get a template for a new item"""
|
|
1015
|
-
# if we have a template and it hasn't been updated since we stored it
|
|
1016
|
-
template_name = f"item_template_{itemtype}_{linkmode or ''}"
|
|
1017
|
-
params = {"itemType": itemtype}
|
|
1018
|
-
# Set linkMode parameter for API request if itemtype is attachment
|
|
1019
|
-
if itemtype == "attachment":
|
|
1020
|
-
params["linkMode"] = linkmode
|
|
1021
|
-
self.add_parameters(**params)
|
|
1022
|
-
query_string = "/items/new"
|
|
1023
|
-
if self.templates.get(template_name) and not self._updated(
|
|
1024
|
-
query_string,
|
|
1025
|
-
self.templates[template_name],
|
|
1026
|
-
template_name,
|
|
1027
|
-
):
|
|
1028
|
-
return copy.deepcopy(self.templates[template_name]["tmplt"])
|
|
1029
|
-
# otherwise perform a normal request and cache the response
|
|
1030
|
-
retrieved = self._retrieve_data(query_string)
|
|
1031
|
-
return self._cache(retrieved, template_name)
|
|
1032
|
-
|
|
1033
|
-
def _attachment_template(self, attachment_type):
|
|
1034
|
-
"""Return a new attachment template of the required type:
|
|
1035
|
-
imported_file
|
|
1036
|
-
imported_url
|
|
1037
|
-
linked_file
|
|
1038
|
-
linked_url
|
|
1039
|
-
"""
|
|
1040
|
-
return self.item_template("attachment", linkmode=attachment_type)
|
|
1041
|
-
|
|
1042
|
-
def _attachment(self, payload, parentid=None):
|
|
1043
|
-
"""Create attachments
|
|
1044
|
-
accepts a list of one or more attachment template dicts
|
|
1045
|
-
and an optional parent Item ID. If this is specified,
|
|
1046
|
-
attachments are created under this ID
|
|
1047
|
-
"""
|
|
1048
|
-
attachment = Zupload(self, payload, parentid)
|
|
1049
|
-
return attachment.upload()
|
|
1050
|
-
|
|
1051
|
-
@ss_wrap
|
|
1052
|
-
def show_operators(self):
|
|
1053
|
-
"""Show available saved search operators"""
|
|
1054
|
-
return self.savedsearch.operators
|
|
1055
|
-
|
|
1056
|
-
@ss_wrap
|
|
1057
|
-
def show_conditions(self):
|
|
1058
|
-
"""Show available saved search conditions"""
|
|
1059
|
-
return self.savedsearch.conditions_operators.keys()
|
|
1060
|
-
|
|
1061
|
-
@ss_wrap
|
|
1062
|
-
def show_condition_operators(self, condition):
|
|
1063
|
-
"""Show available operators for a given saved search condition"""
|
|
1064
|
-
# dict keys of allowed operators for the current condition
|
|
1065
|
-
permitted_operators = self.savedsearch.conditions_operators.get(condition)
|
|
1066
|
-
# transform these into values
|
|
1067
|
-
return {self.savedsearch.operators.get(op) for op in permitted_operators}
|
|
1068
|
-
|
|
1069
|
-
@ss_wrap
|
|
1070
|
-
def saved_search(self, name, conditions):
|
|
1071
|
-
"""Create a saved search. conditions is a list of dicts
|
|
1072
|
-
containing search conditions and must contain the following str keys:
|
|
1073
|
-
condition, operator, value
|
|
1074
|
-
"""
|
|
1075
|
-
self.savedsearch._validate(conditions)
|
|
1076
|
-
payload = [{"name": name, "conditions": conditions}]
|
|
1077
|
-
headers = {"Zotero-Write-Token": token()}
|
|
1078
|
-
self._check_backoff()
|
|
1079
|
-
req = self.client.post(
|
|
1080
|
-
url=build_url(
|
|
1081
|
-
self.endpoint,
|
|
1082
|
-
f"/{self.library_type}/{self.library_id}/searches",
|
|
1083
|
-
),
|
|
1084
|
-
headers=headers,
|
|
1085
|
-
data=json.dumps(payload),
|
|
1086
|
-
)
|
|
1087
|
-
self.request = req
|
|
1088
|
-
try:
|
|
1089
|
-
req.raise_for_status()
|
|
1090
|
-
except httpx.HTTPError as exc:
|
|
1091
|
-
error_handler(self, req, exc)
|
|
1092
|
-
backoff = self.request.headers.get("backoff") or self.request.headers.get(
|
|
1093
|
-
"retry-after",
|
|
1094
|
-
)
|
|
1095
|
-
if backoff:
|
|
1096
|
-
self._set_backoff(backoff)
|
|
1097
|
-
return req.json()
|
|
1098
|
-
|
|
1099
|
-
@ss_wrap
|
|
1100
|
-
def delete_saved_search(self, keys):
|
|
1101
|
-
"""Delete one or more saved searches by passing a list of one or more
|
|
1102
|
-
unique search keys
|
|
1103
|
-
"""
|
|
1104
|
-
headers = {"Zotero-Write-Token": token()}
|
|
1105
|
-
self._check_backoff()
|
|
1106
|
-
req = self.client.delete(
|
|
1107
|
-
url=build_url(
|
|
1108
|
-
self.endpoint,
|
|
1109
|
-
f"/{self.library_type}/{self.library_id}/searches",
|
|
1110
|
-
),
|
|
1111
|
-
headers=headers,
|
|
1112
|
-
params={"searchKey": ",".join(keys)},
|
|
1113
|
-
)
|
|
1114
|
-
self.request = req
|
|
1115
|
-
try:
|
|
1116
|
-
req.raise_for_status()
|
|
1117
|
-
except httpx.HTTPError as exc:
|
|
1118
|
-
error_handler(self, req, exc)
|
|
1119
|
-
backoff = self.request.headers.get("backoff") or self.request.headers.get(
|
|
1120
|
-
"retry-after",
|
|
1121
|
-
)
|
|
1122
|
-
if backoff:
|
|
1123
|
-
self._set_backoff(backoff)
|
|
1124
|
-
return req.status_code
|
|
1125
|
-
|
|
1126
|
-
def upload_attachments(self, attachments, parentid=None, basedir=None):
|
|
1127
|
-
"""Upload files to the already created (but never uploaded) attachments"""
|
|
1128
|
-
return Zupload(self, attachments, parentid, basedir=basedir).upload()
|
|
1129
|
-
|
|
1130
|
-
def add_tags(self, item, *tags):
|
|
1131
|
-
"""Add one or more tags to a retrieved item,
|
|
1132
|
-
then update it on the server
|
|
1133
|
-
Accepts a dict, and one or more tags to add to it
|
|
1134
|
-
Returns the updated item from the server
|
|
1135
|
-
"""
|
|
1136
|
-
# Make sure there's a tags field, or add one
|
|
1137
|
-
if not item.get("data", {}).get("tags"):
|
|
1138
|
-
item["data"]["tags"] = []
|
|
1139
|
-
for tag in tags:
|
|
1140
|
-
item["data"]["tags"].append({"tag": f"{tag}"})
|
|
1141
|
-
# make sure everything's OK
|
|
1142
|
-
self.check_items([item])
|
|
1143
|
-
return self.update_item(item)
|
|
1144
|
-
|
|
1145
|
-
def check_items(self, items):
|
|
1146
|
-
"""Check that items to be created contain no invalid dict keys
|
|
1147
|
-
Accepts a single argument: a list of one or more dicts
|
|
1148
|
-
The retrieved fields are cached and re-used until a 304 call fails
|
|
1149
|
-
"""
|
|
1150
|
-
params = {"locale": self.locale, "timeout": timeout}
|
|
1151
|
-
query_string = "/itemFields"
|
|
1152
|
-
r = Request(
|
|
1153
|
-
"GET",
|
|
1154
|
-
build_url(self.endpoint, query_string),
|
|
1155
|
-
params=params,
|
|
1156
|
-
)
|
|
1157
|
-
with httpx.Client() as client:
|
|
1158
|
-
response = client.send(r)
|
|
1159
|
-
# now split up the URL
|
|
1160
|
-
result = urlparse(str(response.url))
|
|
1161
|
-
# construct cache key
|
|
1162
|
-
cachekey = result.path + "_" + result.query
|
|
1163
|
-
if self.templates.get(cachekey) and not self._updated(
|
|
1164
|
-
query_string,
|
|
1165
|
-
self.templates[cachekey],
|
|
1166
|
-
cachekey,
|
|
1167
|
-
):
|
|
1168
|
-
template = {t["field"] for t in self.templates[cachekey]["tmplt"]}
|
|
1169
|
-
else:
|
|
1170
|
-
template = {t["field"] for t in self.item_fields()}
|
|
1171
|
-
# add fields we know to be OK
|
|
1172
|
-
template |= {
|
|
1173
|
-
"path",
|
|
1174
|
-
"tags",
|
|
1175
|
-
"notes",
|
|
1176
|
-
"itemType",
|
|
1177
|
-
"creators",
|
|
1178
|
-
"mimeType",
|
|
1179
|
-
"linkMode",
|
|
1180
|
-
"note",
|
|
1181
|
-
"charset",
|
|
1182
|
-
"dateAdded",
|
|
1183
|
-
"version",
|
|
1184
|
-
"collections",
|
|
1185
|
-
"dateModified",
|
|
1186
|
-
"relations",
|
|
1187
|
-
# attachment items
|
|
1188
|
-
"parentItem",
|
|
1189
|
-
"mtime",
|
|
1190
|
-
"contentType",
|
|
1191
|
-
"md5",
|
|
1192
|
-
"filename",
|
|
1193
|
-
"inPublications",
|
|
1194
|
-
# annotation fields
|
|
1195
|
-
"annotationText",
|
|
1196
|
-
"annotationColor",
|
|
1197
|
-
"annotationType",
|
|
1198
|
-
"annotationPageLabel",
|
|
1199
|
-
"annotationPosition",
|
|
1200
|
-
"annotationSortIndex",
|
|
1201
|
-
"annotationComment",
|
|
1202
|
-
"annotationAuthorName",
|
|
1203
|
-
}
|
|
1204
|
-
template |= set(self.temp_keys)
|
|
1205
|
-
processed_items = []
|
|
1206
|
-
for pos, item in enumerate(items):
|
|
1207
|
-
if set(item) == {"links", "library", "version", "meta", "key", "data"}:
|
|
1208
|
-
itm = item["data"]
|
|
1209
|
-
else:
|
|
1210
|
-
itm = item
|
|
1211
|
-
to_check = set(itm.keys())
|
|
1212
|
-
difference = to_check.difference(template)
|
|
1213
|
-
if difference:
|
|
1214
|
-
err = f"Invalid keys present in item {pos + 1}: {' '.join(i for i in difference)}"
|
|
1215
|
-
raise ze.InvalidItemFieldsError(
|
|
1216
|
-
err,
|
|
1217
|
-
)
|
|
1218
|
-
processed_items.append(itm)
|
|
1219
|
-
return processed_items
|
|
1220
|
-
|
|
1221
|
-
@tcache
|
|
1222
|
-
def item_types(self):
|
|
1223
|
-
"""Get all available item types"""
|
|
1224
|
-
# Check for a valid cached version
|
|
1225
|
-
params = {"locale": self.locale}
|
|
1226
|
-
query_string = "/itemTypes"
|
|
1227
|
-
return query_string, params
|
|
1228
|
-
|
|
1229
|
-
@tcache
|
|
1230
|
-
def creator_fields(self):
|
|
1231
|
-
"""Get localised creator fields"""
|
|
1232
|
-
# Check for a valid cached version
|
|
1233
|
-
params = {"locale": self.locale}
|
|
1234
|
-
query_string = "/creatorFields"
|
|
1235
|
-
return query_string, params
|
|
1236
|
-
|
|
1237
|
-
@tcache
|
|
1238
|
-
def item_type_fields(self, itemtype):
|
|
1239
|
-
"""Get all valid fields for an item"""
|
|
1240
|
-
params = {"itemType": itemtype, "locale": self.locale}
|
|
1241
|
-
query_string = "/itemTypeFields"
|
|
1242
|
-
return query_string, params
|
|
1243
|
-
|
|
1244
|
-
@tcache
|
|
1245
|
-
def item_creator_types(self, itemtype):
|
|
1246
|
-
"""Get all available creator types for an item"""
|
|
1247
|
-
params = {"itemType": itemtype, "locale": self.locale}
|
|
1248
|
-
query_string = "/itemTypeCreatorTypes"
|
|
1249
|
-
return query_string, params
|
|
1250
|
-
|
|
1251
|
-
@tcache
|
|
1252
|
-
def item_fields(self):
|
|
1253
|
-
"""Get all available item fields"""
|
|
1254
|
-
# Check for a valid cached version
|
|
1255
|
-
params = {"locale": self.locale}
|
|
1256
|
-
query_string = "/itemFields"
|
|
1257
|
-
return query_string, params
|
|
1258
|
-
|
|
1259
|
-
def item_attachment_link_modes():
|
|
1260
|
-
"""Get all available link mode types.
|
|
1261
|
-
Note: No viable REST API route was found for this, so I tested and built a list from documentation found
|
|
1262
|
-
here - https://www.zotero.org/support/dev/web_api/json
|
|
1263
|
-
"""
|
|
1264
|
-
return ["imported_file", "imported_url", "linked_file", "linked_url"]
|
|
1265
|
-
|
|
1266
|
-
def create_items(self, payload, parentid=None, last_modified=None):
|
|
1267
|
-
"""Create new Zotero items
|
|
1268
|
-
Accepts two arguments:
|
|
1269
|
-
a list containing one or more item dicts
|
|
1270
|
-
an optional parent item ID.
|
|
1271
|
-
Note that this can also be used to update existing items
|
|
1272
|
-
"""
|
|
1273
|
-
if len(payload) > DEFAULT_NUM_ITEMS:
|
|
1274
|
-
msg = f"You may only create up to {DEFAULT_NUM_ITEMS} items per call"
|
|
1275
|
-
raise ze.TooManyItemsError(msg)
|
|
1276
|
-
# TODO: strip extra data if it's an existing item
|
|
1277
|
-
headers = {"Zotero-Write-Token": token(), "Content-Type": "application/json"}
|
|
1278
|
-
if last_modified is not None:
|
|
1279
|
-
headers["If-Unmodified-Since-Version"] = str(last_modified)
|
|
1280
|
-
to_send = list(self._cleanup(*payload, allow=("key")))
|
|
1281
|
-
self._check_backoff()
|
|
1282
|
-
req = self.client.post(
|
|
1283
|
-
url=build_url(
|
|
1284
|
-
self.endpoint,
|
|
1285
|
-
f"/{self.library_type}/{self.library_id}/items",
|
|
1286
|
-
),
|
|
1287
|
-
content=json.dumps(to_send),
|
|
1288
|
-
headers=dict(headers),
|
|
1289
|
-
)
|
|
1290
|
-
self.request = req
|
|
1291
|
-
try:
|
|
1292
|
-
req.raise_for_status()
|
|
1293
|
-
except httpx.HTTPError as exc:
|
|
1294
|
-
error_handler(self, req, exc)
|
|
1295
|
-
resp = req.json()
|
|
1296
|
-
backoff = self.request.headers.get("backoff") or self.request.headers.get(
|
|
1297
|
-
"retry-after",
|
|
1298
|
-
)
|
|
1299
|
-
if backoff:
|
|
1300
|
-
self._set_backoff(backoff)
|
|
1301
|
-
if parentid:
|
|
1302
|
-
# we need to create child items using PATCH
|
|
1303
|
-
# TODO: handle possibility of item creation + failed parent
|
|
1304
|
-
# attachment
|
|
1305
|
-
uheaders = {
|
|
1306
|
-
"If-Unmodified-Since-Version": req.headers["last-modified-version"],
|
|
1307
|
-
}
|
|
1308
|
-
for value in resp["success"].values():
|
|
1309
|
-
payload = json.dumps({"parentItem": parentid})
|
|
1310
|
-
self._check_backoff()
|
|
1311
|
-
presp = self.client.patch(
|
|
1312
|
-
url=build_url(
|
|
1313
|
-
self.endpoint,
|
|
1314
|
-
f"/{self.library_type}/{self.library_id}/items/{value}",
|
|
1315
|
-
),
|
|
1316
|
-
data=payload,
|
|
1317
|
-
headers=dict(uheaders),
|
|
1318
|
-
)
|
|
1319
|
-
self.request = presp
|
|
1320
|
-
try:
|
|
1321
|
-
presp.raise_for_status()
|
|
1322
|
-
except httpx.HTTPError as exc:
|
|
1323
|
-
error_handler(self, presp, exc)
|
|
1324
|
-
backoff = presp.headers.get("backoff") or presp.headers.get(
|
|
1325
|
-
"retry-after",
|
|
1326
|
-
)
|
|
1327
|
-
if backoff:
|
|
1328
|
-
self._set_backoff(backoff)
|
|
1329
|
-
return resp
|
|
1330
|
-
|
|
1331
|
-
def create_collection(self, payload, last_modified=None):
|
|
1332
|
-
"""Alias for create_collections to preserve backward compatibility"""
|
|
1333
|
-
return self.create_collections(payload, last_modified)
|
|
1334
|
-
|
|
1335
|
-
def create_collections(self, payload, last_modified=None):
|
|
1336
|
-
"""Create new Zotero collections
|
|
1337
|
-
Accepts one argument, a list of dicts containing the following keys:
|
|
1338
|
-
|
|
1339
|
-
'name': the name of the collection
|
|
1340
|
-
'parentCollection': OPTIONAL, the parent collection to which you wish to add this
|
|
1341
|
-
"""
|
|
1342
|
-
# no point in proceeding if there's no 'name' key
|
|
1343
|
-
for item in payload:
|
|
1344
|
-
if "name" not in item:
|
|
1345
|
-
msg = "The dict you pass must include a 'name' key"
|
|
1346
|
-
raise ze.ParamNotPassedError(msg)
|
|
1347
|
-
# add a blank 'parentCollection' key if it hasn't been passed
|
|
1348
|
-
if "parentCollection" not in item:
|
|
1349
|
-
item["parentCollection"] = ""
|
|
1350
|
-
headers = {"Zotero-Write-Token": token()}
|
|
1351
|
-
if last_modified is not None:
|
|
1352
|
-
headers["If-Unmodified-Since-Version"] = str(last_modified)
|
|
1353
|
-
self._check_backoff()
|
|
1354
|
-
req = self.client.post(
|
|
1355
|
-
url=build_url(
|
|
1356
|
-
self.endpoint,
|
|
1357
|
-
f"/{self.library_type}/{self.library_id}/collections",
|
|
1358
|
-
),
|
|
1359
|
-
headers=headers,
|
|
1360
|
-
content=json.dumps(payload),
|
|
1361
|
-
)
|
|
1362
|
-
self.request = req
|
|
1363
|
-
try:
|
|
1364
|
-
req.raise_for_status()
|
|
1365
|
-
except httpx.HTTPError as exc:
|
|
1366
|
-
error_handler(self, req, exc)
|
|
1367
|
-
backoff = req.headers.get("backoff") or req.headers.get("retry-after")
|
|
1368
|
-
if backoff:
|
|
1369
|
-
self._set_backoff(backoff)
|
|
1370
|
-
return req.json()
|
|
1371
|
-
|
|
1372
|
-
@backoff_check
|
|
1373
|
-
def update_collection(self, payload, last_modified=None):
|
|
1374
|
-
"""Update a Zotero collection property such as 'name'
|
|
1375
|
-
Accepts one argument, a dict containing collection data retrieved
|
|
1376
|
-
using e.g. 'collections()'
|
|
1377
|
-
"""
|
|
1378
|
-
modified = payload["version"]
|
|
1379
|
-
if last_modified is not None:
|
|
1380
|
-
modified = last_modified
|
|
1381
|
-
key = payload["key"]
|
|
1382
|
-
headers = {"If-Unmodified-Since-Version": str(modified)}
|
|
1383
|
-
headers.update({"Content-Type": "application/json"})
|
|
1384
|
-
return self.client.put(
|
|
1385
|
-
url=build_url(
|
|
1386
|
-
self.endpoint,
|
|
1387
|
-
f"/{self.library_type}/{self.library_id}/collections/{key}",
|
|
1388
|
-
),
|
|
1389
|
-
headers=headers,
|
|
1390
|
-
content=json.dumps(payload),
|
|
1391
|
-
)
|
|
1392
|
-
|
|
1393
|
-
def attachment_simple(self, files, parentid=None):
|
|
1394
|
-
"""Add attachments using filenames as title
|
|
1395
|
-
Arguments:
|
|
1396
|
-
One or more file paths to add as attachments:
|
|
1397
|
-
An optional Item ID, which will create child attachments
|
|
1398
|
-
"""
|
|
1399
|
-
orig = self._attachment_template("imported_file")
|
|
1400
|
-
to_add = [orig.copy() for fls in files]
|
|
1401
|
-
for idx, tmplt in enumerate(to_add):
|
|
1402
|
-
tmplt["title"] = Path(files[idx]).name
|
|
1403
|
-
tmplt["filename"] = files[idx]
|
|
1404
|
-
if parentid:
|
|
1405
|
-
return self._attachment(to_add, parentid)
|
|
1406
|
-
return self._attachment(to_add)
|
|
1407
|
-
|
|
1408
|
-
def attachment_both(self, files, parentid=None):
|
|
1409
|
-
"""Add child attachments using title, filename
|
|
1410
|
-
Arguments:
|
|
1411
|
-
One or more lists or tuples containing title, file path
|
|
1412
|
-
An optional Item ID, which will create child attachments
|
|
1413
|
-
"""
|
|
1414
|
-
orig = self._attachment_template("imported_file")
|
|
1415
|
-
to_add = [orig.copy() for f in files]
|
|
1416
|
-
for idx, tmplt in enumerate(to_add):
|
|
1417
|
-
tmplt["title"] = files[idx][0]
|
|
1418
|
-
tmplt["filename"] = files[idx][1]
|
|
1419
|
-
if parentid:
|
|
1420
|
-
return self._attachment(to_add, parentid)
|
|
1421
|
-
return self._attachment(to_add)
|
|
1422
|
-
|
|
1423
|
-
@backoff_check
|
|
1424
|
-
def update_item(self, payload, last_modified=None):
|
|
1425
|
-
"""Update an existing item
|
|
1426
|
-
Accepts one argument, a dict containing Item data
|
|
1427
|
-
"""
|
|
1428
|
-
to_send = self.check_items([payload])[0]
|
|
1429
|
-
modified = payload["version"] if last_modified is None else last_modified
|
|
1430
|
-
ident = payload["key"]
|
|
1431
|
-
headers = {"If-Unmodified-Since-Version": str(modified)}
|
|
1432
|
-
return self.client.patch(
|
|
1433
|
-
url=build_url(
|
|
1434
|
-
self.endpoint,
|
|
1435
|
-
f"/{self.library_type}/{self.library_id}/items/{ident}",
|
|
1436
|
-
),
|
|
1437
|
-
headers=headers,
|
|
1438
|
-
content=json.dumps(to_send),
|
|
1439
|
-
)
|
|
1440
|
-
|
|
1441
|
-
def update_items(self, payload):
|
|
1442
|
-
"""Update existing items
|
|
1443
|
-
Accepts one argument, a list of dicts containing Item data
|
|
1444
|
-
"""
|
|
1445
|
-
to_send = [self.check_items([p])[0] for p in payload]
|
|
1446
|
-
# the API only accepts 50 items at a time, so we have to split
|
|
1447
|
-
# anything longer
|
|
1448
|
-
for chunk in chunks(to_send, DEFAULT_NUM_ITEMS):
|
|
1449
|
-
self._check_backoff()
|
|
1450
|
-
req = self.client.post(
|
|
1451
|
-
url=build_url(
|
|
1452
|
-
self.endpoint,
|
|
1453
|
-
f"/{self.library_type}/{self.library_id}/items/",
|
|
1454
|
-
),
|
|
1455
|
-
data=json.dumps(chunk),
|
|
1456
|
-
)
|
|
1457
|
-
self.request = req
|
|
1458
|
-
try:
|
|
1459
|
-
req.raise_for_status()
|
|
1460
|
-
except httpx.HTTPError as exc:
|
|
1461
|
-
error_handler(self, req, exc)
|
|
1462
|
-
backoff = req.headers.get("backoff") or req.headers.get("retry-after")
|
|
1463
|
-
if backoff:
|
|
1464
|
-
self._set_backoff(backoff)
|
|
1465
|
-
return True
|
|
1466
|
-
|
|
1467
|
-
def update_collections(self, payload):
|
|
1468
|
-
"""Update existing collections
|
|
1469
|
-
Accepts one argument, a list of dicts containing Collection data
|
|
1470
|
-
"""
|
|
1471
|
-
to_send = [self.check_items([p])[0] for p in payload]
|
|
1472
|
-
# the API only accepts 50 items at a time, so we have to split
|
|
1473
|
-
# anything longer
|
|
1474
|
-
for chunk in chunks(to_send, DEFAULT_NUM_ITEMS):
|
|
1475
|
-
self._check_backoff()
|
|
1476
|
-
req = self.client.post(
|
|
1477
|
-
url=build_url(
|
|
1478
|
-
self.endpoint,
|
|
1479
|
-
f"/{self.library_type}/{self.library_id}/collections/",
|
|
1480
|
-
),
|
|
1481
|
-
data=json.dumps(chunk),
|
|
1482
|
-
)
|
|
1483
|
-
self.request = req
|
|
1484
|
-
try:
|
|
1485
|
-
req.raise_for_status()
|
|
1486
|
-
except httpx.HTTPError as exc:
|
|
1487
|
-
error_handler(self, req, exc)
|
|
1488
|
-
backoff = req.headers.get("backoff") or req.headers.get("retry-after")
|
|
1489
|
-
if backoff:
|
|
1490
|
-
self._set_backoff(backoff)
|
|
1491
|
-
return True
|
|
1492
|
-
|
|
1493
|
-
@backoff_check
|
|
1494
|
-
def addto_collection(self, collection, payload):
|
|
1495
|
-
"""Add item to a collection
|
|
1496
|
-
Accepts two arguments:
|
|
1497
|
-
The collection ID, and an item dict
|
|
1498
|
-
"""
|
|
1499
|
-
ident = payload["key"]
|
|
1500
|
-
modified = payload["version"]
|
|
1501
|
-
# add the collection data from the item
|
|
1502
|
-
modified_collections = payload["data"]["collections"] + [collection]
|
|
1503
|
-
headers = {"If-Unmodified-Since-Version": str(modified)}
|
|
1504
|
-
return self.client.patch(
|
|
1505
|
-
url=build_url(
|
|
1506
|
-
self.endpoint,
|
|
1507
|
-
f"/{self.library_type}/{self.library_id}/items/{ident}",
|
|
1508
|
-
),
|
|
1509
|
-
data=json.dumps({"collections": modified_collections}),
|
|
1510
|
-
headers=headers,
|
|
1511
|
-
)
|
|
1512
|
-
|
|
1513
|
-
@backoff_check
|
|
1514
|
-
def deletefrom_collection(self, collection, payload):
|
|
1515
|
-
"""Delete an item from a collection
|
|
1516
|
-
Accepts two arguments:
|
|
1517
|
-
The collection ID, and and an item dict
|
|
1518
|
-
"""
|
|
1519
|
-
ident = payload["key"]
|
|
1520
|
-
modified = payload["version"]
|
|
1521
|
-
# strip the collection data from the item
|
|
1522
|
-
modified_collections = [
|
|
1523
|
-
c for c in payload["data"]["collections"] if c != collection
|
|
1524
|
-
]
|
|
1525
|
-
headers = {"If-Unmodified-Since-Version": str(modified)}
|
|
1526
|
-
return self.client.patch(
|
|
1527
|
-
url=build_url(
|
|
1528
|
-
self.endpoint,
|
|
1529
|
-
f"/{self.library_type}/{self.library_id}/items/{ident}",
|
|
1530
|
-
),
|
|
1531
|
-
data=json.dumps({"collections": modified_collections}),
|
|
1532
|
-
headers=headers,
|
|
1533
|
-
)
|
|
1534
|
-
|
|
1535
|
-
@backoff_check
|
|
1536
|
-
def delete_tags(self, *payload):
|
|
1537
|
-
"""Delete a group of tags
|
|
1538
|
-
pass in up to 50 tags, or use *[tags]
|
|
1539
|
-
|
|
1540
|
-
"""
|
|
1541
|
-
if len(payload) > DEFAULT_NUM_ITEMS:
|
|
1542
|
-
msg = f"Only {DEFAULT_NUM_ITEMS} tags or fewer may be deleted"
|
|
1543
|
-
raise ze.TooManyItemsError(msg)
|
|
1544
|
-
modified_tags = " || ".join(list(payload))
|
|
1545
|
-
# first, get version data by getting one tag
|
|
1546
|
-
self.tags(limit=1)
|
|
1547
|
-
headers = {
|
|
1548
|
-
"If-Unmodified-Since-Version": self.request.headers[
|
|
1549
|
-
"last-modified-version"
|
|
1550
|
-
],
|
|
1551
|
-
}
|
|
1552
|
-
return self.client.delete(
|
|
1553
|
-
url=build_url(
|
|
1554
|
-
self.endpoint,
|
|
1555
|
-
f"/{self.library_type}/{self.library_id}/tags",
|
|
1556
|
-
),
|
|
1557
|
-
params={"tag": modified_tags},
|
|
1558
|
-
headers=headers,
|
|
1559
|
-
)
|
|
1560
|
-
|
|
1561
|
-
@backoff_check
|
|
1562
|
-
def delete_item(self, payload, last_modified=None):
|
|
1563
|
-
"""Delete Items from a Zotero library
|
|
1564
|
-
Accepts a single argument:
|
|
1565
|
-
a dict containing item data
|
|
1566
|
-
OR a list of dicts containing item data
|
|
1567
|
-
"""
|
|
1568
|
-
params = None
|
|
1569
|
-
if isinstance(payload, list):
|
|
1570
|
-
params = {"itemKey": ",".join([p["key"] for p in payload])}
|
|
1571
|
-
if last_modified is not None:
|
|
1572
|
-
modified = last_modified
|
|
1573
|
-
else:
|
|
1574
|
-
modified = payload[0]["version"]
|
|
1575
|
-
url = build_url(
|
|
1576
|
-
self.endpoint,
|
|
1577
|
-
f"/{self.library_type}/{self.library_id}/items",
|
|
1578
|
-
)
|
|
1579
|
-
else:
|
|
1580
|
-
ident = payload["key"]
|
|
1581
|
-
if last_modified is not None:
|
|
1582
|
-
modified = last_modified
|
|
1583
|
-
else:
|
|
1584
|
-
modified = payload["version"]
|
|
1585
|
-
url = build_url(
|
|
1586
|
-
self.endpoint,
|
|
1587
|
-
f"/{self.library_type}/{self.library_id}/items/{ident}",
|
|
1588
|
-
)
|
|
1589
|
-
headers = {"If-Unmodified-Since-Version": str(modified)}
|
|
1590
|
-
return self.client.delete(url=url, params=params, headers=headers)
|
|
1591
|
-
|
|
1592
|
-
@backoff_check
|
|
1593
|
-
def delete_collection(self, payload, last_modified=None):
|
|
1594
|
-
"""Delete a Collection from a Zotero library
|
|
1595
|
-
Accepts a single argument:
|
|
1596
|
-
a dict containing item data
|
|
1597
|
-
OR a list of dicts containing item data
|
|
1598
|
-
"""
|
|
1599
|
-
params = None
|
|
1600
|
-
if isinstance(payload, list):
|
|
1601
|
-
params = {"collectionKey": ",".join([p["key"] for p in payload])}
|
|
1602
|
-
if last_modified is not None:
|
|
1603
|
-
modified = last_modified
|
|
1604
|
-
else:
|
|
1605
|
-
modified = payload[0]["version"]
|
|
1606
|
-
url = build_url(
|
|
1607
|
-
self.endpoint,
|
|
1608
|
-
f"/{self.library_type}/{self.library_id}/collections",
|
|
1609
|
-
)
|
|
1610
|
-
else:
|
|
1611
|
-
ident = payload["key"]
|
|
1612
|
-
if last_modified is not None:
|
|
1613
|
-
modified = last_modified
|
|
1614
|
-
else:
|
|
1615
|
-
modified = payload["version"]
|
|
1616
|
-
url = build_url(
|
|
1617
|
-
self.endpoint,
|
|
1618
|
-
f"/{self.library_type}/{self.library_id}/collections/{ident}",
|
|
1619
|
-
)
|
|
1620
|
-
headers = {"If-Unmodified-Since-Version": str(modified)}
|
|
1621
|
-
return self.client.delete(url=url, params=params, headers=headers)
|
|
1622
|
-
|
|
1623
|
-
|
|
1624
|
-
def error_handler(zot, req, exc=None):
|
|
1625
|
-
"""Error handler for HTTP requests"""
|
|
1626
|
-
error_codes = {
|
|
1627
|
-
400: ze.UnsupportedParamsError,
|
|
1628
|
-
401: ze.UserNotAuthorisedError,
|
|
1629
|
-
403: ze.UserNotAuthorisedError,
|
|
1630
|
-
404: ze.ResourceNotFoundError,
|
|
1631
|
-
409: ze.ConflictError,
|
|
1632
|
-
412: ze.PreConditionFailedError,
|
|
1633
|
-
413: ze.RequestEntityTooLargeError,
|
|
1634
|
-
428: ze.PreConditionRequiredError,
|
|
1635
|
-
429: ze.TooManyRequestsError,
|
|
1636
|
-
}
|
|
1637
|
-
|
|
1638
|
-
def err_msg(req):
|
|
1639
|
-
"""Return a nicely-formatted error message"""
|
|
1640
|
-
return f"\nCode: {req.status_code}\nURL: {req.url!s}\nMethod: {req.request.method}\nResponse: {req.text}"
|
|
1641
|
-
|
|
1642
|
-
if error_codes.get(req.status_code):
|
|
1643
|
-
# check to see whether its 429
|
|
1644
|
-
if req.status_code == httpx.codes.TOO_MANY_REQUESTS:
|
|
1645
|
-
# try to get backoff or delay duration
|
|
1646
|
-
delay = req.headers.get("backoff") or req.headers.get("retry-after")
|
|
1647
|
-
if not delay:
|
|
1648
|
-
msg = "You are being rate-limited and no backoff or retry duration has been received from the server. Try again later"
|
|
1649
|
-
raise ze.TooManyRetriesError(
|
|
1650
|
-
msg,
|
|
1651
|
-
)
|
|
1652
|
-
zot._set_backoff(delay)
|
|
1653
|
-
elif not exc:
|
|
1654
|
-
raise error_codes.get(req.status_code)(err_msg(req))
|
|
1655
|
-
else:
|
|
1656
|
-
raise error_codes.get(req.status_code)(err_msg(req)) from exc
|
|
1657
|
-
elif not exc:
|
|
1658
|
-
raise ze.HTTPError(err_msg(req))
|
|
1659
|
-
else:
|
|
1660
|
-
raise ze.HTTPError(err_msg(req)) from exc
|
|
1661
|
-
|
|
1662
|
-
|
|
1663
|
-
class SavedSearch:
|
|
1664
|
-
"""Saved search functionality
|
|
1665
|
-
See https://github.com/zotero/zotero/blob/master/chrome/content/zotero/xpcom/data/searchConditions.js
|
|
1666
|
-
"""
|
|
1667
|
-
|
|
1668
|
-
def __init__(self, zinstance):
|
|
1669
|
-
super().__init__()
|
|
1670
|
-
self.zinstance = zinstance
|
|
1671
|
-
self.searchkeys = ("condition", "operator", "value")
|
|
1672
|
-
# always exclude these fields from zotero.item_keys()
|
|
1673
|
-
self.excluded_items = (
|
|
1674
|
-
"accessDate",
|
|
1675
|
-
"date",
|
|
1676
|
-
"pages",
|
|
1677
|
-
"section",
|
|
1678
|
-
"seriesNumber",
|
|
1679
|
-
"issue",
|
|
1680
|
-
)
|
|
1681
|
-
self.operators = {
|
|
1682
|
-
# this is a bit hacky, but I can't be bothered with Python's enums
|
|
1683
|
-
"is": "is",
|
|
1684
|
-
"isNot": "isNot",
|
|
1685
|
-
"beginsWith": "beginsWith",
|
|
1686
|
-
"contains": "contains",
|
|
1687
|
-
"doesNotContain": "doesNotContain",
|
|
1688
|
-
"isLessThan": "isLessThan",
|
|
1689
|
-
"isGreaterThan": "isGreaterThan",
|
|
1690
|
-
"isBefore": "isBefore",
|
|
1691
|
-
"isAfter": "isAfter",
|
|
1692
|
-
"isInTheLast": "isInTheLast",
|
|
1693
|
-
"any": "any",
|
|
1694
|
-
"all": "all",
|
|
1695
|
-
"true": "true",
|
|
1696
|
-
"false": "false",
|
|
1697
|
-
}
|
|
1698
|
-
# common groupings of operators
|
|
1699
|
-
self.groups = {
|
|
1700
|
-
"A": (self.operators["true"], self.operators["false"]),
|
|
1701
|
-
"B": (self.operators["any"], self.operators["all"]),
|
|
1702
|
-
"C": (
|
|
1703
|
-
self.operators["is"],
|
|
1704
|
-
self.operators["isNot"],
|
|
1705
|
-
self.operators["contains"],
|
|
1706
|
-
self.operators["doesNotContain"],
|
|
1707
|
-
),
|
|
1708
|
-
"D": (self.operators["is"], self.operators["isNot"]),
|
|
1709
|
-
"E": (
|
|
1710
|
-
self.operators["is"],
|
|
1711
|
-
self.operators["isNot"],
|
|
1712
|
-
self.operators["isBefore"],
|
|
1713
|
-
self.operators["isInTheLast"],
|
|
1714
|
-
),
|
|
1715
|
-
"F": (self.operators["contains"], self.operators["doesNotContain"]),
|
|
1716
|
-
"G": (
|
|
1717
|
-
self.operators["is"],
|
|
1718
|
-
self.operators["isNot"],
|
|
1719
|
-
self.operators["contains"],
|
|
1720
|
-
self.operators["doesNotContain"],
|
|
1721
|
-
self.operators["isLessThan"],
|
|
1722
|
-
self.operators["isGreaterThan"],
|
|
1723
|
-
),
|
|
1724
|
-
"H": (
|
|
1725
|
-
self.operators["is"],
|
|
1726
|
-
self.operators["isNot"],
|
|
1727
|
-
self.operators["beginsWith"],
|
|
1728
|
-
),
|
|
1729
|
-
"I": (self.operators["is"]),
|
|
1730
|
-
}
|
|
1731
|
-
self.conditions_operators = {
|
|
1732
|
-
"deleted": self.groups["A"],
|
|
1733
|
-
"noChildren": self.groups["A"],
|
|
1734
|
-
"unfiled": self.groups["A"],
|
|
1735
|
-
"publications": self.groups["A"],
|
|
1736
|
-
"retracted": self.groups["A"],
|
|
1737
|
-
"includeParentsAndChildren": self.groups["A"],
|
|
1738
|
-
"includeParents": self.groups["A"],
|
|
1739
|
-
"includeChildren": self.groups["A"],
|
|
1740
|
-
"recursive": self.groups["A"],
|
|
1741
|
-
"joinMode": self.groups["B"],
|
|
1742
|
-
"quicksearch-titleCreatorYear": self.groups["C"],
|
|
1743
|
-
"quicksearch-titleCreatorYearNote": self.groups["C"],
|
|
1744
|
-
"quicksearch-fields": self.groups["C"],
|
|
1745
|
-
"quicksearch-everything": self.groups["C"],
|
|
1746
|
-
"collectionID": self.groups["D"],
|
|
1747
|
-
"savedSearchID": self.groups["D"],
|
|
1748
|
-
"collection": self.groups["D"],
|
|
1749
|
-
"savedSearch": self.groups["D"],
|
|
1750
|
-
"dateAdded": self.groups["E"],
|
|
1751
|
-
"dateModified": self.groups["E"],
|
|
1752
|
-
"itemType": self.groups["D"],
|
|
1753
|
-
"fileTypeID": self.groups["D"],
|
|
1754
|
-
"tagID": self.groups["D"],
|
|
1755
|
-
"tag": self.groups["C"],
|
|
1756
|
-
"note": self.groups["F"],
|
|
1757
|
-
"childNote": self.groups["F"],
|
|
1758
|
-
"creator": self.groups["C"],
|
|
1759
|
-
"lastName": self.groups["C"],
|
|
1760
|
-
"field": self.groups["C"],
|
|
1761
|
-
"datefield": self.groups["E"],
|
|
1762
|
-
"year": self.groups["C"],
|
|
1763
|
-
"numberfield": self.groups["G"],
|
|
1764
|
-
"libraryID": self.groups["D"],
|
|
1765
|
-
"key": self.groups["H"],
|
|
1766
|
-
"itemID": self.groups["D"],
|
|
1767
|
-
"annotationText": self.groups["F"],
|
|
1768
|
-
"annotationComment": self.groups["F"],
|
|
1769
|
-
"fulltextWord": self.groups["F"],
|
|
1770
|
-
"fulltextContent": self.groups["F"],
|
|
1771
|
-
"tempTable": self.groups["I"],
|
|
1772
|
-
}
|
|
1773
|
-
###########
|
|
1774
|
-
# ALIASES #
|
|
1775
|
-
###########
|
|
1776
|
-
# aliases for numberfield
|
|
1777
|
-
pagefields = (
|
|
1778
|
-
"pages",
|
|
1779
|
-
"numPages",
|
|
1780
|
-
"numberOfVolumes",
|
|
1781
|
-
"section",
|
|
1782
|
-
"seriesNumber",
|
|
1783
|
-
"issue",
|
|
1784
|
-
)
|
|
1785
|
-
for pf in pagefields:
|
|
1786
|
-
self.conditions_operators[pf] = self.conditions_operators.get("numberfield")
|
|
1787
|
-
# aliases for datefield
|
|
1788
|
-
datefields = ("accessDate", "date", "dateDue", "accepted")
|
|
1789
|
-
for df in datefields:
|
|
1790
|
-
self.conditions_operators[df] = self.conditions_operators.get("datefield")
|
|
1791
|
-
# aliases for field - this makes a blocking API call unless item types have been cached
|
|
1792
|
-
item_fields = [
|
|
1793
|
-
itm["field"]
|
|
1794
|
-
for itm in self.zinstance.item_fields()
|
|
1795
|
-
if itm["field"] not in set(self.excluded_items)
|
|
1796
|
-
]
|
|
1797
|
-
for itf in item_fields:
|
|
1798
|
-
self.conditions_operators[itf] = self.conditions_operators.get("field")
|
|
1799
|
-
|
|
1800
|
-
def _validate(self, conditions):
|
|
1801
|
-
"""Validate saved search conditions, raising an error if any contain invalid operators"""
|
|
1802
|
-
allowed_keys = set(self.searchkeys)
|
|
1803
|
-
operators_set = set(self.operators.keys())
|
|
1804
|
-
for condition in conditions:
|
|
1805
|
-
if set(condition.keys()) != allowed_keys:
|
|
1806
|
-
msg = f"Keys must be all of: {', '.join(self.searchkeys)}"
|
|
1807
|
-
raise ze.ParamNotPassedError(
|
|
1808
|
-
msg,
|
|
1809
|
-
)
|
|
1810
|
-
if condition.get("operator") not in operators_set:
|
|
1811
|
-
msg = f"You have specified an unknown operator: {condition.get('operator')}"
|
|
1812
|
-
raise ze.ParamNotPassedError(
|
|
1813
|
-
msg,
|
|
1814
|
-
)
|
|
1815
|
-
# dict keys of allowed operators for the current condition
|
|
1816
|
-
permitted_operators = self.conditions_operators.get(
|
|
1817
|
-
condition.get("condition"),
|
|
1818
|
-
)
|
|
1819
|
-
# transform these into values
|
|
1820
|
-
permitted_operators_list = {
|
|
1821
|
-
self.operators.get(op) for op in permitted_operators
|
|
1822
|
-
}
|
|
1823
|
-
if condition.get("operator") not in permitted_operators_list:
|
|
1824
|
-
msg = f"You may not use the '{condition.get('operator')}' operator when selecting the '{condition.get('condition')}' condition. \nAllowed operators: {', '.join(list(permitted_operators_list))}"
|
|
1825
|
-
raise ze.ParamNotPassedError(
|
|
1826
|
-
msg,
|
|
1827
|
-
)
|
|
1828
|
-
|
|
1829
|
-
|
|
1830
|
-
class Zupload:
|
|
1831
|
-
"""Zotero file attachment helper
|
|
1832
|
-
Receives a Zotero instance, file(s) to upload, and optional parent ID
|
|
1833
|
-
|
|
1834
|
-
"""
|
|
1835
|
-
|
|
1836
|
-
def __init__(self, zinstance, payload, parentid=None, basedir=None):
|
|
1837
|
-
super().__init__()
|
|
1838
|
-
self.zinstance = zinstance
|
|
1839
|
-
self.payload = payload
|
|
1840
|
-
self.parentid = parentid
|
|
1841
|
-
if basedir is None:
|
|
1842
|
-
self.basedir = Path()
|
|
1843
|
-
elif isinstance(basedir, Path):
|
|
1844
|
-
self.basedir = basedir
|
|
1845
|
-
else:
|
|
1846
|
-
self.basedir = Path(basedir)
|
|
1847
|
-
|
|
1848
|
-
def _verify(self, payload):
|
|
1849
|
-
"""Ensure that all files to be attached exist
|
|
1850
|
-
open()'s better than exists(), cos it avoids a race condition
|
|
1851
|
-
"""
|
|
1852
|
-
if not payload: # Check payload has nonzero length
|
|
1853
|
-
raise ze.ParamNotPassedError
|
|
1854
|
-
for templt in payload:
|
|
1855
|
-
if Path(str(self.basedir.joinpath(templt["filename"]))).is_file():
|
|
1856
|
-
try:
|
|
1857
|
-
# if it is a file, try to open it, and catch the error
|
|
1858
|
-
with Path(str(self.basedir.joinpath(templt["filename"]))).open():
|
|
1859
|
-
pass
|
|
1860
|
-
except OSError:
|
|
1861
|
-
msg = f"The file at {self.basedir.joinpath(templt['filename'])!s} couldn't be opened or found."
|
|
1862
|
-
raise ze.FileDoesNotExistError(
|
|
1863
|
-
msg,
|
|
1864
|
-
) from None
|
|
1865
|
-
# no point in continuing if the file isn't a file
|
|
1866
|
-
else:
|
|
1867
|
-
msg = f"The file at {self.basedir.joinpath(templt['filename'])!s} couldn't be opened or found."
|
|
1868
|
-
raise ze.FileDoesNotExistError(
|
|
1869
|
-
msg,
|
|
1870
|
-
)
|
|
1871
|
-
|
|
1872
|
-
def _create_prelim(self):
|
|
1873
|
-
"""Step 0: Register intent to upload files"""
|
|
1874
|
-
self._verify(self.payload)
|
|
1875
|
-
if "key" in self.payload[0] and self.payload[0]["key"]:
|
|
1876
|
-
if next((i for i in self.payload if "key" not in i), False):
|
|
1877
|
-
msg = "Can't pass payload entries with and without keys to Zupload"
|
|
1878
|
-
raise ze.UnsupportedParamsError(
|
|
1879
|
-
msg,
|
|
1880
|
-
)
|
|
1881
|
-
return None # Don't do anything if payload comes with keys
|
|
1882
|
-
# Set contentType for each attachment if not already provided
|
|
1883
|
-
for item in self.payload:
|
|
1884
|
-
if not item.get("contentType"):
|
|
1885
|
-
filepath = str(self.basedir.joinpath(item["filename"]))
|
|
1886
|
-
detected_type = mimetypes.guess_type(filepath)[0]
|
|
1887
|
-
item["contentType"] = detected_type or "application/octet-stream"
|
|
1888
|
-
liblevel = "/{t}/{u}/items"
|
|
1889
|
-
# Create one or more new attachments
|
|
1890
|
-
headers = {"Zotero-Write-Token": token(), "Content-Type": "application/json"}
|
|
1891
|
-
# If we have a Parent ID, add it as a parentItem
|
|
1892
|
-
if self.parentid:
|
|
1893
|
-
for child in self.payload:
|
|
1894
|
-
child["parentItem"] = self.parentid
|
|
1895
|
-
to_send = json.dumps(self.payload)
|
|
1896
|
-
self.zinstance._check_backoff()
|
|
1897
|
-
req = self.zinstance.client.post(
|
|
1898
|
-
url=build_url(
|
|
1899
|
-
self.zinstance.endpoint,
|
|
1900
|
-
liblevel.format(
|
|
1901
|
-
t=self.zinstance.library_type,
|
|
1902
|
-
u=self.zinstance.library_id,
|
|
1903
|
-
),
|
|
1904
|
-
),
|
|
1905
|
-
data=to_send,
|
|
1906
|
-
headers=headers,
|
|
1907
|
-
)
|
|
1908
|
-
try:
|
|
1909
|
-
req.raise_for_status()
|
|
1910
|
-
except httpx.HTTPError as exc:
|
|
1911
|
-
error_handler(self.zinstance, req, exc)
|
|
1912
|
-
backoff = req.headers.get("backoff") or req.headers.get("retry-after")
|
|
1913
|
-
if backoff:
|
|
1914
|
-
self.zinstance._set_backoff(backoff)
|
|
1915
|
-
data = req.json()
|
|
1916
|
-
for k in data["success"]:
|
|
1917
|
-
self.payload[int(k)]["key"] = data["success"][k]
|
|
1918
|
-
return data
|
|
1919
|
-
|
|
1920
|
-
def _get_auth(self, attachment, reg_key, md5=None):
|
|
1921
|
-
"""Step 1: get upload authorisation for a file"""
|
|
1922
|
-
mtypes = mimetypes.guess_type(attachment)
|
|
1923
|
-
digest = hashlib.md5() # noqa: S324
|
|
1924
|
-
with Path(attachment).open("rb") as att:
|
|
1925
|
-
for chunk in iter(lambda: att.read(8192), b""):
|
|
1926
|
-
digest.update(chunk)
|
|
1927
|
-
auth_headers = {"Content-Type": "application/x-www-form-urlencoded"}
|
|
1928
|
-
if not md5:
|
|
1929
|
-
auth_headers["If-None-Match"] = "*"
|
|
1930
|
-
else:
|
|
1931
|
-
# docs specify that for existing file we use this
|
|
1932
|
-
auth_headers["If-Match"] = md5
|
|
1933
|
-
data = {
|
|
1934
|
-
"md5": digest.hexdigest(),
|
|
1935
|
-
"filename": Path(attachment).name,
|
|
1936
|
-
"filesize": Path(attachment).stat().st_size,
|
|
1937
|
-
"mtime": str(int(Path(attachment).stat().st_mtime * 1000)),
|
|
1938
|
-
"contentType": mtypes[0] or "application/octet-stream",
|
|
1939
|
-
"charset": mtypes[1],
|
|
1940
|
-
"params": 1,
|
|
1941
|
-
}
|
|
1942
|
-
self.zinstance._check_backoff()
|
|
1943
|
-
auth_req = self.zinstance.client.post(
|
|
1944
|
-
url=build_url(
|
|
1945
|
-
self.zinstance.endpoint,
|
|
1946
|
-
f"/{self.zinstance.library_type}/{self.zinstance.library_id}/items/{reg_key}/file",
|
|
1947
|
-
),
|
|
1948
|
-
data=data,
|
|
1949
|
-
headers=auth_headers,
|
|
1950
|
-
)
|
|
1951
|
-
try:
|
|
1952
|
-
auth_req.raise_for_status()
|
|
1953
|
-
except httpx.HTTPError as exc:
|
|
1954
|
-
error_handler(self.zinstance, auth_req, exc)
|
|
1955
|
-
backoff = auth_req.headers.get("backoff") or auth_req.headers.get("retry-after")
|
|
1956
|
-
if backoff:
|
|
1957
|
-
self.zinstance._set_backoff(backoff)
|
|
1958
|
-
return auth_req.json()
|
|
1959
|
-
|
|
1960
|
-
def _upload_file(self, authdata, attachment, reg_key):
|
|
1961
|
-
"""Step 2: auth successful, and file not on server
|
|
1962
|
-
zotero.org/support/dev/server_api/file_upload#a_full_upload
|
|
1963
|
-
|
|
1964
|
-
reg_key isn't used, but we need to pass it through to Step 3
|
|
1965
|
-
"""
|
|
1966
|
-
upload_dict = authdata["params"]
|
|
1967
|
-
# pass tuple of tuples (not dict!), to ensure key comes first
|
|
1968
|
-
upload_list = [("key", upload_dict.pop("key"))]
|
|
1969
|
-
for key, value in upload_dict.items():
|
|
1970
|
-
upload_list.append((key, value))
|
|
1971
|
-
upload_list.append(("file", Path(attachment).open("rb").read()))
|
|
1972
|
-
upload_pairs = tuple(upload_list)
|
|
1973
|
-
try:
|
|
1974
|
-
self.zinstance._check_backoff()
|
|
1975
|
-
# We use a fresh httpx POST because we don't want our existing Pyzotero headers
|
|
1976
|
-
# for a call to the storage upload URL (currently S3)
|
|
1977
|
-
upload = httpx.post(
|
|
1978
|
-
url=authdata["url"],
|
|
1979
|
-
files=upload_pairs,
|
|
1980
|
-
headers={"User-Agent": f"Pyzotero/{pz.__version__}"},
|
|
1981
|
-
)
|
|
1982
|
-
except httpx.ConnectionError:
|
|
1983
|
-
msg = "ConnectionError"
|
|
1984
|
-
raise ze.UploadError(msg) from None
|
|
1985
|
-
try:
|
|
1986
|
-
upload.raise_for_status()
|
|
1987
|
-
except httpx.HTTPError as exc:
|
|
1988
|
-
error_handler(self.zinstance, upload, exc)
|
|
1989
|
-
backoff = upload.headers.get("backoff") or upload.headers.get("retry-after")
|
|
1990
|
-
if backoff:
|
|
1991
|
-
self.zinstance._set_backoff(backoff)
|
|
1992
|
-
# now check the responses
|
|
1993
|
-
return self._register_upload(authdata, reg_key)
|
|
13
|
+
"""
|
|
1994
14
|
|
|
1995
|
-
|
|
1996
|
-
|
|
1997
|
-
|
|
1998
|
-
|
|
1999
|
-
|
|
2000
|
-
|
|
2001
|
-
|
|
2002
|
-
|
|
2003
|
-
|
|
2004
|
-
|
|
2005
|
-
|
|
2006
|
-
|
|
2007
|
-
|
|
2008
|
-
|
|
2009
|
-
|
|
2010
|
-
|
|
2011
|
-
|
|
2012
|
-
|
|
2013
|
-
except httpx.HTTPError as exc:
|
|
2014
|
-
error_handler(self.zinstance, upload_reg, exc)
|
|
2015
|
-
backoff = upload_reg.headers.get("backoff") or upload_reg.headers.get(
|
|
2016
|
-
"retry-after",
|
|
2017
|
-
)
|
|
2018
|
-
if backoff:
|
|
2019
|
-
self._set_backoff(backoff)
|
|
15
|
+
# Re-export everything for backwards compatibility
|
|
16
|
+
# Also import the errors module for backwards compat
|
|
17
|
+
from pyzotero import zotero_errors as ze
|
|
18
|
+
from pyzotero._client import Zotero
|
|
19
|
+
from pyzotero._decorators import backoff_check, cleanwrap, retrieve, ss_wrap, tcache
|
|
20
|
+
from pyzotero._search import SavedSearch
|
|
21
|
+
from pyzotero._upload import Zupload
|
|
22
|
+
from pyzotero._utils import (
|
|
23
|
+
DEFAULT_ITEM_LIMIT,
|
|
24
|
+
DEFAULT_NUM_ITEMS,
|
|
25
|
+
DEFAULT_TIMEOUT,
|
|
26
|
+
ONE_HOUR,
|
|
27
|
+
build_url,
|
|
28
|
+
chunks,
|
|
29
|
+
merge_params,
|
|
30
|
+
token,
|
|
31
|
+
)
|
|
32
|
+
from pyzotero.errors import error_handler
|
|
2020
33
|
|
|
2021
|
-
|
|
2022
|
-
|
|
34
|
+
# Preserve original module-level attributes
|
|
35
|
+
__author__ = "Stephan Hügel"
|
|
36
|
+
__api_version__ = "3"
|
|
2023
37
|
|
|
2024
|
-
|
|
2025
|
-
|
|
2026
|
-
|
|
2027
|
-
|
|
2028
|
-
|
|
2029
|
-
|
|
2030
|
-
|
|
2031
|
-
|
|
2032
|
-
|
|
2033
|
-
|
|
2034
|
-
|
|
2035
|
-
|
|
2036
|
-
|
|
2037
|
-
|
|
2038
|
-
|
|
2039
|
-
|
|
2040
|
-
|
|
2041
|
-
|
|
2042
|
-
|
|
38
|
+
# Backwards compatibility: the old 'timeout' variable name
|
|
39
|
+
timeout = DEFAULT_TIMEOUT
|
|
40
|
+
|
|
41
|
+
__all__ = [
|
|
42
|
+
# Constants
|
|
43
|
+
"DEFAULT_ITEM_LIMIT",
|
|
44
|
+
"DEFAULT_NUM_ITEMS",
|
|
45
|
+
"DEFAULT_TIMEOUT",
|
|
46
|
+
"ONE_HOUR",
|
|
47
|
+
"SavedSearch",
|
|
48
|
+
# Classes
|
|
49
|
+
"Zotero",
|
|
50
|
+
"Zupload",
|
|
51
|
+
# Module attributes
|
|
52
|
+
"__api_version__",
|
|
53
|
+
"__author__",
|
|
54
|
+
# Decorators
|
|
55
|
+
"backoff_check",
|
|
56
|
+
# Utility functions
|
|
57
|
+
"build_url",
|
|
58
|
+
"chunks",
|
|
59
|
+
"cleanwrap",
|
|
60
|
+
"error_handler",
|
|
61
|
+
"merge_params",
|
|
62
|
+
"retrieve",
|
|
63
|
+
"ss_wrap",
|
|
64
|
+
"tcache",
|
|
65
|
+
"timeout",
|
|
66
|
+
"token",
|
|
67
|
+
# Backwards compat
|
|
68
|
+
"ze",
|
|
69
|
+
]
|