arkindex-client 1.0.15__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
apistar/__init__.py ADDED
@@ -0,0 +1,9 @@
1
+ # -*- coding: utf-8 -*-
2
+ import warnings
3
+
4
+ warnings.warn(
5
+ "The Arkindex API client no longer depends on APIStar. "
6
+ "Please update your `apistar` imports to use the `arkindex` package.",
7
+ FutureWarning,
8
+ stacklevel=2,
9
+ )
apistar/exceptions.py ADDED
@@ -0,0 +1,13 @@
1
+ # -*- coding: utf-8 -*-
2
+ import warnings
3
+
4
+ from arkindex.exceptions import ErrorResponse
5
+
6
+ __all__ = ["ErrorResponse"]
7
+
8
+ warnings.warn(
9
+ "The Arkindex API client no longer depends on APIStar. "
10
+ "Please update your `apistar.exceptions` imports to use the `arkindex.exceptions` module.",
11
+ FutureWarning,
12
+ stacklevel=2,
13
+ )
arkindex/auth.py CHANGED
@@ -1,11 +1,10 @@
1
1
  # -*- coding: utf-8 -*-
2
- from apistar.client.auth import SessionAuthentication, TokenAuthentication
2
+ from requests.auth import AuthBase
3
3
 
4
4
 
5
- class TokenSessionAuthentication(TokenAuthentication):
6
- """
7
- A token authentication that takes care of CSRF tokens.
8
- """
5
+ class TokenSessionAuthentication(AuthBase):
6
+
7
+ safe_methods = ("GET", "HEAD", "OPTIONS", "TRACE")
9
8
 
10
9
  def __init__(
11
10
  self,
@@ -20,14 +19,30 @@ class TokenSessionAuthentication(TokenAuthentication):
20
19
  :param str csrf_cookie_name: Name of the CSRF token cookie.
21
20
  :param str csrf_header_name: Name of the CSRF request header.
22
21
  """
23
- self.session_auth = SessionAuthentication(
24
- csrf_cookie_name=csrf_cookie_name,
25
- csrf_header_name=csrf_header_name,
26
- )
27
- super().__init__(token, scheme=scheme)
22
+ self.token = token
23
+ self.scheme = scheme
24
+ self.csrf_cookie_name = csrf_cookie_name
25
+ self.csrf_header_name = csrf_header_name
26
+ self.csrf_token = None
27
+
28
+ def store_csrf_token(self, response, **kwargs):
29
+ if self.csrf_cookie_name in response.cookies:
30
+ self.csrf_token = response.cookies[self.csrf_cookie_name]
28
31
 
29
32
  def __call__(self, request):
30
- request = self.session_auth(request)
31
- if self.token is None:
32
- return request
33
- return super().__call__(request)
33
+ # Add CSRF token
34
+ if (
35
+ self.csrf_token
36
+ and self.csrf_header_name is not None
37
+ and (request.method not in self.safe_methods)
38
+ ):
39
+ request.headers[self.csrf_header_name] = self.csrf_token
40
+
41
+ if self.csrf_cookie_name is not None:
42
+ request.register_hook("response", self.store_csrf_token)
43
+
44
+ # Add API token
45
+ if self.token is not None:
46
+ request.headers["Authorization"] = f"{self.scheme} {self.token}"
47
+
48
+ return request
@@ -0,0 +1,4 @@
1
+ # -*- coding: utf-8 -*-
2
+ from arkindex.client.client import ArkindexClient, options_from_env
3
+
4
+ __all__ = ["ArkindexClient", "options_from_env"]
@@ -0,0 +1,98 @@
1
+ # -*- coding: utf-8 -*-
2
+ from urllib.parse import quote, urljoin, urlparse
3
+
4
+ import typesystem
5
+
6
+ from arkindex import exceptions
7
+ from arkindex.client.transports import HTTPTransport
8
+ from arkindex.schema.validator import validate
9
+
10
+
11
+ class BaseClient:
12
+ def __init__(
13
+ self,
14
+ schema,
15
+ encoding=None,
16
+ auth=None,
17
+ decoders=None,
18
+ headers=None,
19
+ session=None,
20
+ allow_cookies=True,
21
+ verify=True,
22
+ ):
23
+ self.document = validate(schema, encoding=encoding)
24
+ self.transport = HTTPTransport(
25
+ auth=auth,
26
+ decoders=decoders,
27
+ headers=headers,
28
+ session=session,
29
+ allow_cookies=allow_cookies,
30
+ verify=verify,
31
+ )
32
+
33
+ def lookup_operation(self, operation_id: str):
34
+ for item in self.document.walk_links():
35
+ if item.link.name == operation_id:
36
+ return item.link
37
+ text = 'Operation ID "%s" not found in schema.' % operation_id
38
+ message = exceptions.ErrorMessage(text=text, code="invalid-operation")
39
+ raise exceptions.ClientError(messages=[message])
40
+
41
+ def get_url(self, link, params):
42
+ url = urljoin(self.document.url, link.url)
43
+
44
+ scheme = urlparse(url).scheme.lower()
45
+
46
+ if not scheme:
47
+ text = "URL missing scheme '%s'." % url
48
+ message = exceptions.ErrorMessage(text=text, code="invalid-url")
49
+ raise exceptions.ClientError(messages=[message])
50
+
51
+ if scheme not in self.transport.schemes:
52
+ text = "Unsupported URL scheme '%s'." % scheme
53
+ message = exceptions.ErrorMessage(text=text, code="invalid-url")
54
+ raise exceptions.ClientError(messages=[message])
55
+
56
+ for field in link.get_path_fields():
57
+ value = str(params[field.name])
58
+ if "{%s}" % field.name in url:
59
+ url = url.replace("{%s}" % field.name, quote(value, safe=""))
60
+ elif "{+%s}" % field.name in url:
61
+ url = url.replace("{+%s}" % field.name, quote(value, safe="/"))
62
+
63
+ return url
64
+
65
+ def get_query_params(self, link, params):
66
+ return {
67
+ field.name: params[field.name]
68
+ for field in link.get_query_fields()
69
+ if field.name in params
70
+ }
71
+
72
+ def get_content_and_encoding(self, link, params):
73
+ body_field = link.get_body_field()
74
+ if body_field and body_field.name in params:
75
+ return (params[body_field.name], link.encoding)
76
+ return (None, None)
77
+
78
+ def request(self, operation_id: str, **params):
79
+ link = self.lookup_operation(operation_id)
80
+
81
+ validator = typesystem.Object(
82
+ properties={field.name: typesystem.Any() for field in link.fields},
83
+ required=[field.name for field in link.fields if field.required],
84
+ additional_properties=False,
85
+ )
86
+ try:
87
+ validator.validate(params)
88
+ except typesystem.ValidationError as exc:
89
+ raise exceptions.ClientError(messages=exc.messages()) from None
90
+
91
+ method = link.method
92
+ url = self.get_url(link, params)
93
+ query_params = self.get_query_params(link, params)
94
+ (content, encoding) = self.get_content_and_encoding(link, params)
95
+
96
+ return self.transport.send(
97
+ method, url, query_params=query_params, content=content, encoding=encoding
98
+ )
@@ -8,14 +8,31 @@ import warnings
8
8
  from time import sleep
9
9
  from urllib.parse import urljoin, urlsplit, urlunsplit
10
10
 
11
- import apistar
12
11
  import requests
13
12
  import yaml
13
+ from tenacity import (
14
+ before_sleep_log,
15
+ retry,
16
+ retry_if_exception,
17
+ stop_after_attempt,
18
+ wait_exponential,
19
+ )
14
20
 
15
21
  from arkindex.auth import TokenSessionAuthentication
16
- from arkindex.exceptions import SchemaError
22
+ from arkindex.client.base import BaseClient
23
+ from arkindex.exceptions import ErrorResponse, SchemaError
17
24
  from arkindex.pagination import ResponsePaginator
18
- from arkindex.transports import ArkindexHTTPTransport
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+ try:
29
+ from yaml import CSafeLoader as SafeLoader
30
+
31
+ logger.debug("Using LibYAML-based parser")
32
+ except ImportError:
33
+ from yaml import SafeLoader
34
+
35
+ logger.debug("Using default PyYAML parser")
19
36
 
20
37
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
21
38
 
@@ -24,7 +41,16 @@ DEFAULT_BASE_URL = "https://arkindex.teklia.com/"
24
41
  # Endpoint accessed by the client on instantiation to retrieve the OpenAPI schema
25
42
  SCHEMA_ENDPOINT = "/api/v1/openapi/?format=json"
26
43
 
27
- logger = logging.getLogger(__name__)
44
+
45
+ def _is_500_error(exc: Exception) -> bool:
46
+ """
47
+ Check if an Arkindex API error is a 50x
48
+ This is used to retry most API calls implemented here
49
+ """
50
+ if not isinstance(exc, ErrorResponse):
51
+ return False
52
+
53
+ return 500 <= exc.status_code < 600
28
54
 
29
55
 
30
56
  def options_from_env():
@@ -71,7 +97,7 @@ def _find_param(operation, param_name):
71
97
  raise KeyError("Parameter '{}' not found".format(param_name))
72
98
 
73
99
 
74
- class ArkindexClient(apistar.Client):
100
+ class ArkindexClient(BaseClient):
75
101
  """
76
102
  An Arkindex API client.
77
103
  """
@@ -99,7 +125,7 @@ class ArkindexClient(apistar.Client):
99
125
  :type csrf_cookie: str or None
100
126
  :param float sleep: Number of seconds to wait before sending each API request,
101
127
  as a simple means of throttling.
102
- :param \**kwargs: Keyword arguments to send to ``apistar.Client``.
128
+ :param \**kwargs: Keyword arguments to send to ``arkindex.client.base.BaseClient``.
103
129
  """
104
130
  if not schema_url:
105
131
  schema_url = urljoin(base_url, SCHEMA_ENDPOINT)
@@ -110,11 +136,11 @@ class ArkindexClient(apistar.Client):
110
136
  if split.scheme == "file" or not (split.scheme or split.netloc):
111
137
  # This is a local path
112
138
  with open(schema_url) as f:
113
- schema = yaml.safe_load(f)
139
+ schema = yaml.load(f, Loader=SafeLoader)
114
140
  else:
115
141
  resp = requests.get(schema_url, verify=self.verify)
116
142
  resp.raise_for_status()
117
- schema = yaml.safe_load(resp.content)
143
+ schema = yaml.load(resp.content, Loader=SafeLoader)
118
144
  except Exception as e:
119
145
  raise SchemaError(
120
146
  f"Could not retrieve a proper OpenAPI schema from {schema_url}"
@@ -122,7 +148,7 @@ class ArkindexClient(apistar.Client):
122
148
 
123
149
  super().__init__(schema, **kwargs)
124
150
 
125
- # APIStar will treat a schema as valid even when there are no endpoints, making the client completely useless.
151
+ # An OpenAPI schema is considered valid even when there are no endpoints, making the client completely useless.
126
152
  if not len(self.document.walk_links()):
127
153
  raise SchemaError(
128
154
  f"The OpenAPI schema from {base_url} has no defined endpoints"
@@ -142,7 +168,7 @@ class ArkindexClient(apistar.Client):
142
168
  if "x-paginated" in operation:
143
169
  link_info.link._paginated = operation["x-paginated"]
144
170
 
145
- # Remove domains from each endpoint; allows APIStar to properly handle our base URL
171
+ # Remove domains from each endpoint; allows to properly handle our base URL
146
172
  # https://github.com/encode/apistar/issues/657
147
173
  original_url = urlsplit(link_info.link.url)
148
174
  # Removes the scheme and netloc
@@ -174,9 +200,6 @@ class ArkindexClient(apistar.Client):
174
200
  self.document.url if hasattr(self, "document") else "",
175
201
  )
176
202
 
177
- def init_transport(self, *args, **kwargs):
178
- return ArkindexHTTPTransport(self.verify, *args, **kwargs)
179
-
180
203
  def configure(
181
204
  self,
182
205
  token=None,
@@ -223,7 +246,7 @@ class ArkindexClient(apistar.Client):
223
246
 
224
247
  def paginate(self, operation_id, *args, **kwargs):
225
248
  """
226
- Perform a usual request as done by APIStar, but handle paginated endpoints.
249
+ Perform a usual API request, but handle paginated endpoints.
227
250
 
228
251
  :return: An iterator for a paginated endpoint.
229
252
  :rtype: Union[arkindex.pagination.ResponsePaginator, dict, list]
@@ -245,11 +268,11 @@ class ArkindexClient(apistar.Client):
245
268
  self.transport.session.auth.token = resp["auth_token"]
246
269
  return resp
247
270
 
248
- def request(self, operation_id, *args, **kwargs):
271
+ def single_request(self, operation_id, *args, **kwargs):
249
272
  """
250
273
  Perform an API request.
251
- :param args: Arguments passed to the APIStar client.
252
- :param kwargs: Keyword arguments passed to the APIStar client.
274
+ :param args: Arguments passed to the BaseClient.
275
+ :param kwargs: Keyword arguments passed to the BaseClient.
253
276
  """
254
277
  link = self.lookup_operation(operation_id)
255
278
  if link.deprecated:
@@ -274,3 +297,23 @@ class ArkindexClient(apistar.Client):
274
297
  )
275
298
  sleep(self.sleep_duration)
276
299
  return super().request(operation_id, *args, **kwargs)
300
+
301
+ @retry(
302
+ retry=retry_if_exception(_is_500_error),
303
+ wait=wait_exponential(multiplier=2, min=3),
304
+ reraise=True,
305
+ stop=stop_after_attempt(5),
306
+ before_sleep=before_sleep_log(logger, logging.INFO),
307
+ )
308
+ def request(self, operation_id, *args, **kwargs):
309
+ """
310
+ Proxy all Arkindex API requests with a retry mechanism in case of 50X errors.
311
+ The same API call will be retried 5 times, with an exponential sleep time
312
+ going through 3, 4, 8 and 16 seconds of wait between call.
313
+ If the 5th call still gives a 50x, the exception is re-raised and the caller should catch it.
314
+ Log messages are displayed before sleeping (when at least one exception occurred).
315
+
316
+ :param args: Arguments passed to the BaseClient.
317
+ :param kwargs: Keyword arguments passed to the BaseClient.
318
+ """
319
+ return self.single_request(operation_id, *args, **kwargs)
@@ -0,0 +1,252 @@
1
+ # -*- coding: utf-8 -*-
2
+ import cgi
3
+ import json
4
+ import os
5
+ import posixpath
6
+ import shutil
7
+ import tempfile
8
+ from urllib.parse import urlparse
9
+
10
+ from arkindex.compat import DownloadedFile
11
+
12
+
13
+ class BaseDecoder:
14
+ media_type = None
15
+
16
+ def decode(self, bytestring, **options):
17
+ raise NotImplementedError()
18
+
19
+
20
+ class JSONDecoder(BaseDecoder):
21
+ media_type = "application/json"
22
+
23
+ def decode(self, response):
24
+ """
25
+ Return raw JSON data.
26
+ """
27
+ content = response.content.decode("utf-8")
28
+ return json.loads(content)
29
+
30
+
31
+ class TextDecoder(BaseDecoder):
32
+ media_type = "text/*"
33
+
34
+ def decode(self, response):
35
+ return response.text
36
+
37
+
38
+ class DownloadDecoder(BaseDecoder):
39
+ """
40
+ A codec to handle raw file downloads, such as images and other media.
41
+ """
42
+
43
+ media_type = "*/*"
44
+
45
+ def __init__(self, download_dir=None):
46
+ """
47
+ `download_dir` - If `None` then downloaded files will be temporary files
48
+ that are deleted on close. If set to a value, then downloaded files
49
+ will be saved to this directory, and will not be automatically deleted.
50
+ """
51
+ self._delete_on_close = download_dir is None
52
+ self.download_dir = download_dir
53
+
54
+ def decode(self, response):
55
+ base_url = response.url
56
+ content_type = response.headers.get("content-type")
57
+ content_disposition = response.headers.get("content-disposition")
58
+
59
+ # Write the download to a temporary .download file.
60
+ fd, temp_path = tempfile.mkstemp(suffix=".download")
61
+ with os.fdopen(fd, "wb") as file_handle:
62
+ for chunk in response.iter_content(chunk_size=4096):
63
+ file_handle.write(chunk)
64
+
65
+ # Determine the output filename.
66
+ output_filename = _get_filename(base_url, content_type, content_disposition)
67
+
68
+ # Determine the output directory.
69
+ output_dir = self.download_dir
70
+ if output_dir is None:
71
+ output_dir = os.path.dirname(temp_path)
72
+
73
+ # Determine the full output path.
74
+ output_path = os.path.join(output_dir, output_filename)
75
+
76
+ # Move the temporary download file to the final location.
77
+ if output_path != temp_path:
78
+ output_path = _unique_output_path(output_path)
79
+ shutil.move(temp_path, output_path)
80
+
81
+ # Open the file and return the file object.
82
+ output_file = open(output_path, "rb")
83
+ downloaded = DownloadedFile(
84
+ output_file, output_path, delete=self._delete_on_close
85
+ )
86
+ downloaded.basename = output_filename
87
+ return downloaded
88
+
89
+
90
+ def _guess_extension(content_type):
91
+ """
92
+ Python's `mimetypes.guess_extension` is no use because it simply returns
93
+ the first of an unordered set. We use the same set of media types here,
94
+ but take a reasonable preference on what extension to map to.
95
+ """
96
+ return {
97
+ "application/javascript": ".js",
98
+ "application/msword": ".doc",
99
+ "application/octet-stream": ".bin",
100
+ "application/oda": ".oda",
101
+ "application/pdf": ".pdf",
102
+ "application/pkcs7-mime": ".p7c",
103
+ "application/postscript": ".ps",
104
+ "application/vnd.apple.mpegurl": ".m3u",
105
+ "application/vnd.ms-excel": ".xls",
106
+ "application/vnd.ms-powerpoint": ".ppt",
107
+ "application/x-bcpio": ".bcpio",
108
+ "application/x-cpio": ".cpio",
109
+ "application/x-csh": ".csh",
110
+ "application/x-dvi": ".dvi",
111
+ "application/x-gtar": ".gtar",
112
+ "application/x-hdf": ".hdf",
113
+ "application/x-latex": ".latex",
114
+ "application/x-mif": ".mif",
115
+ "application/x-netcdf": ".nc",
116
+ "application/x-pkcs12": ".p12",
117
+ "application/x-pn-realaudio": ".ram",
118
+ "application/x-python-code": ".pyc",
119
+ "application/x-sh": ".sh",
120
+ "application/x-shar": ".shar",
121
+ "application/x-shockwave-flash": ".swf",
122
+ "application/x-sv4cpio": ".sv4cpio",
123
+ "application/x-sv4crc": ".sv4crc",
124
+ "application/x-tar": ".tar",
125
+ "application/x-tcl": ".tcl",
126
+ "application/x-tex": ".tex",
127
+ "application/x-texinfo": ".texinfo",
128
+ "application/x-troff": ".tr",
129
+ "application/x-troff-man": ".man",
130
+ "application/x-troff-me": ".me",
131
+ "application/x-troff-ms": ".ms",
132
+ "application/x-ustar": ".ustar",
133
+ "application/x-wais-source": ".src",
134
+ "application/xml": ".xml",
135
+ "application/zip": ".zip",
136
+ "audio/basic": ".au",
137
+ "audio/mpeg": ".mp3",
138
+ "audio/x-aiff": ".aif",
139
+ "audio/x-pn-realaudio": ".ra",
140
+ "audio/x-wav": ".wav",
141
+ "image/gif": ".gif",
142
+ "image/ief": ".ief",
143
+ "image/jpeg": ".jpe",
144
+ "image/png": ".png",
145
+ "image/svg+xml": ".svg",
146
+ "image/tiff": ".tiff",
147
+ "image/vnd.microsoft.icon": ".ico",
148
+ "image/x-cmu-raster": ".ras",
149
+ "image/x-ms-bmp": ".bmp",
150
+ "image/x-portable-anymap": ".pnm",
151
+ "image/x-portable-bitmap": ".pbm",
152
+ "image/x-portable-graymap": ".pgm",
153
+ "image/x-portable-pixmap": ".ppm",
154
+ "image/x-rgb": ".rgb",
155
+ "image/x-xbitmap": ".xbm",
156
+ "image/x-xpixmap": ".xpm",
157
+ "image/x-xwindowdump": ".xwd",
158
+ "message/rfc822": ".eml",
159
+ "text/css": ".css",
160
+ "text/csv": ".csv",
161
+ "text/html": ".html",
162
+ "text/plain": ".txt",
163
+ "text/richtext": ".rtx",
164
+ "text/tab-separated-values": ".tsv",
165
+ "text/x-python": ".py",
166
+ "text/x-setext": ".etx",
167
+ "text/x-sgml": ".sgml",
168
+ "text/x-vcard": ".vcf",
169
+ "text/xml": ".xml",
170
+ "video/mp4": ".mp4",
171
+ "video/mpeg": ".mpeg",
172
+ "video/quicktime": ".mov",
173
+ "video/webm": ".webm",
174
+ "video/x-msvideo": ".avi",
175
+ "video/x-sgi-movie": ".movie",
176
+ }.get(content_type, "")
177
+
178
+
179
+ def _unique_output_path(path):
180
+ """
181
+ Given a path like '/a/b/c.txt'
182
+
183
+ Return the first available filename that doesn't already exist,
184
+ using an incrementing suffix if needed.
185
+
186
+ For example: '/a/b/c.txt' or '/a/b/c (1).txt' or '/a/b/c (2).txt'...
187
+ """
188
+ basename, ext = os.path.splitext(path)
189
+ idx = 0
190
+ while os.path.exists(path):
191
+ idx += 1
192
+ path = "%s (%d)%s" % (basename, idx, ext)
193
+ return path
194
+
195
+
196
+ def _safe_filename(filename):
197
+ """
198
+ Sanitize output filenames, to remove any potentially unsafe characters.
199
+ """
200
+ filename = os.path.basename(filename)
201
+
202
+ keepcharacters = (" ", ".", "_", "-")
203
+ filename = (
204
+ "".join(char for char in filename if char.isalnum() or char in keepcharacters)
205
+ .strip()
206
+ .strip(".")
207
+ )
208
+
209
+ return filename
210
+
211
+
212
+ def _get_filename_from_content_disposition(content_disposition):
213
+ """
214
+ Determine an output filename based on the `Content-Disposition` header.
215
+ """
216
+ params = value, params = cgi.parse_header(content_disposition)
217
+
218
+ if "filename" in params:
219
+ filename = params["filename"]
220
+ return _safe_filename(filename)
221
+
222
+ return None
223
+
224
+
225
+ def _get_filename_from_url(url, content_type=None):
226
+ """
227
+ Determine an output filename based on the download URL.
228
+ """
229
+ parsed = urlparse(url)
230
+ final_path_component = posixpath.basename(parsed.path.rstrip("/"))
231
+ filename = _safe_filename(final_path_component)
232
+ suffix = _guess_extension(content_type or "")
233
+
234
+ if filename:
235
+ if "." not in filename:
236
+ return filename + suffix
237
+ return filename
238
+
239
+ return "download" + suffix
240
+
241
+
242
+ def _get_filename(base_url, content_type=None, content_disposition=None):
243
+ """
244
+ Determine an output filename to use for the download.
245
+ """
246
+ filename = None
247
+ if content_disposition:
248
+ filename = _get_filename_from_content_disposition(content_disposition)
249
+ if filename is not None:
250
+ return filename
251
+
252
+ return _get_filename_from_url(base_url, content_type)