arkindex-client 1.1.0__py3-none-any.whl → 1.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
arkindex/client/client.py CHANGED
@@ -5,10 +5,12 @@ Arkindex API Client
5
5
  import logging
6
6
  import os
7
7
  import warnings
8
+ from importlib.metadata import version
8
9
  from time import sleep
9
- from urllib.parse import urljoin, urlsplit, urlunsplit
10
+ from urllib.parse import quote, urljoin, urlparse, urlsplit
10
11
 
11
12
  import requests
13
+ import typesystem
12
14
  import yaml
13
15
  from tenacity import (
14
16
  before_sleep_log,
@@ -19,12 +21,15 @@ from tenacity import (
19
21
  )
20
22
 
21
23
  from arkindex.auth import TokenSessionAuthentication
22
- from arkindex.client.base import BaseClient
23
- from arkindex.exceptions import ErrorResponse, SchemaError
24
+ from arkindex.client import decoders
25
+ from arkindex.exceptions import ClientError, ErrorMessage, ErrorResponse, SchemaError
24
26
  from arkindex.pagination import ResponsePaginator
27
+ from arkindex.schema.validator import validate
25
28
 
26
29
  logger = logging.getLogger(__name__)
27
30
 
31
+ REQUEST_TIMEOUT = (30, 60)
32
+
28
33
  try:
29
34
  from yaml import CSafeLoader as SafeLoader
30
35
 
@@ -82,22 +87,7 @@ def options_from_env():
82
87
  return options
83
88
 
84
89
 
85
- def _find_operation(schema, operation_id):
86
- for path_object in schema["paths"].values():
87
- for operation in path_object.values():
88
- if operation["operationId"] == operation_id:
89
- return operation
90
- raise KeyError("Operation '{}' not found".format(operation_id))
91
-
92
-
93
- def _find_param(operation, param_name):
94
- for parameter in operation.get("parameters", []):
95
- if parameter["name"] == param_name:
96
- return parameter
97
- raise KeyError("Parameter '{}' not found".format(param_name))
98
-
99
-
100
- class ArkindexClient(BaseClient):
90
+ class ArkindexClient:
101
91
  """
102
92
  An Arkindex API client.
103
93
  """
@@ -111,7 +101,6 @@ class ArkindexClient(BaseClient):
111
101
  csrf_cookie=None,
112
102
  sleep=0,
113
103
  verify=True,
114
- **kwargs,
115
104
  ):
116
105
  r"""
117
106
  :param token: An API token to use. If omitted, access is restricted to public endpoints.
@@ -125,12 +114,27 @@ class ArkindexClient(BaseClient):
125
114
  :type csrf_cookie: str or None
126
115
  :param float sleep: Number of seconds to wait before sending each API request,
127
116
  as a simple means of throttling.
128
- :param \**kwargs: Keyword arguments to send to ``arkindex.client.base.BaseClient``.
117
+ :param bool verify: Whether to verify the SSL certificate on each request. Enabled by default.
129
118
  """
119
+ self.decoders = [
120
+ decoders.JSONDecoder(),
121
+ decoders.TextDecoder(),
122
+ decoders.DownloadDecoder(),
123
+ ]
124
+
125
+ self.session = requests.Session()
126
+ self.session.verify = verify
127
+ client_version = version("arkindex-client")
128
+ self.session.headers.update(
129
+ {
130
+ "accept": ", ".join([decoder.media_type for decoder in self.decoders]),
131
+ "user-agent": f"arkindex-client/{client_version}",
132
+ }
133
+ )
134
+
130
135
  if not schema_url:
131
136
  schema_url = urljoin(base_url, SCHEMA_ENDPOINT)
132
137
 
133
- self.verify = verify
134
138
  try:
135
139
  split = urlsplit(schema_url)
136
140
  if split.scheme == "file" or not (split.scheme or split.netloc):
@@ -138,7 +142,7 @@ class ArkindexClient(BaseClient):
138
142
  with open(schema_url) as f:
139
143
  schema = yaml.load(f, Loader=SafeLoader)
140
144
  else:
141
- resp = requests.get(schema_url, verify=self.verify)
145
+ resp = self.session.get(schema_url)
142
146
  resp.raise_for_status()
143
147
  schema = yaml.load(resp.content, Loader=SafeLoader)
144
148
  except Exception as e:
@@ -146,34 +150,7 @@ class ArkindexClient(BaseClient):
146
150
  f"Could not retrieve a proper OpenAPI schema from {schema_url}"
147
151
  ) from e
148
152
 
149
- super().__init__(schema, **kwargs)
150
-
151
- # An OpenAPI schema is considered valid even when there are no endpoints, making the client completely useless.
152
- if not len(self.document.walk_links()):
153
- raise SchemaError(
154
- f"The OpenAPI schema from {base_url} has no defined endpoints"
155
- )
156
-
157
- # Post-processing of the parsed schema
158
- for link_info in self.document.walk_links():
159
- # Look for deprecated links
160
- # https://github.com/encode/apistar/issues/664
161
- operation = _find_operation(schema, link_info.link.name)
162
- link_info.link.deprecated = operation.get("deprecated", False)
163
- for item in link_info.link.get_query_fields():
164
- parameter = _find_param(operation, item.name)
165
- item.deprecated = parameter.get("deprecated", False)
166
-
167
- # Detect paginated links
168
- if "x-paginated" in operation:
169
- link_info.link._paginated = operation["x-paginated"]
170
-
171
- # Remove domains from each endpoint; allows to properly handle our base URL
172
- # https://github.com/encode/apistar/issues/657
173
- original_url = urlsplit(link_info.link.url)
174
- # Removes the scheme and netloc
175
- new_url = ("", "", *original_url[2:])
176
- link_info.link.url = urlunsplit(new_url)
153
+ self.document = validate(schema)
177
154
 
178
155
  # Try to autodetect the CSRF cookie:
179
156
  # - Try to find a matching server for this base URL and look for the x-csrf-cookie extension
@@ -228,7 +205,7 @@ class ArkindexClient(BaseClient):
228
205
  """
229
206
  if not csrf_cookie:
230
207
  csrf_cookie = "arkindex.csrf"
231
- self.transport.session.auth = TokenSessionAuthentication(
208
+ self.session.auth = TokenSessionAuthentication(
232
209
  token,
233
210
  csrf_cookie_name=csrf_cookie,
234
211
  scheme=auth_scheme,
@@ -242,7 +219,15 @@ class ArkindexClient(BaseClient):
242
219
  self.document.url = base_url
243
220
 
244
221
  # Add the Referer header to allow Django CSRF to function
245
- self.transport.headers.setdefault("Referer", self.document.url)
222
+ self.session.headers.setdefault("Referer", self.document.url)
223
+
224
+ def lookup_operation(self, operation_id: str):
225
+ if operation_id in self.document.links:
226
+ return self.document.links[operation_id]
227
+
228
+ text = 'Operation ID "%s" not found in schema.' % operation_id
229
+ message = ErrorMessage(text=text, code="invalid-operation")
230
+ raise ClientError(messages=[message])
246
231
 
247
232
  def paginate(self, operation_id, *args, **kwargs):
248
233
  """
@@ -251,9 +236,10 @@ class ArkindexClient(BaseClient):
251
236
  :return: An iterator for a paginated endpoint.
252
237
  :rtype: Union[arkindex.pagination.ResponsePaginator, dict, list]
253
238
  """
239
+
254
240
  link = self.lookup_operation(operation_id)
255
241
  # If there was no x-paginated, trust the caller and assume the endpoint is paginated
256
- if getattr(link, "_paginated", True):
242
+ if link.paginated is not False:
257
243
  return ResponsePaginator(self, operation_id, *args, **kwargs)
258
244
  return self.request(operation_id, *args, **kwargs)
259
245
 
@@ -264,15 +250,77 @@ class ArkindexClient(BaseClient):
264
250
  """
265
251
  resp = self.request("Login", body={"email": email, "password": password})
266
252
  if "auth_token" in resp:
267
- self.transport.session.auth.scheme = "Token"
268
- self.transport.session.auth.token = resp["auth_token"]
253
+ self.session.auth.scheme = "Token"
254
+ self.session.auth.token = resp["auth_token"]
269
255
  return resp
270
256
 
271
- def single_request(self, operation_id, *args, **kwargs):
257
+ def get_query_params(self, link, params):
258
+ return {
259
+ field.name: params[field.name]
260
+ for field in link.get_query_fields()
261
+ if field.name in params
262
+ }
263
+
264
+ def get_url(self, link, params):
265
+ url = urljoin(self.document.url, link.url)
266
+
267
+ scheme = urlparse(url).scheme.lower()
268
+
269
+ if not scheme:
270
+ text = "URL missing scheme '%s'." % url
271
+ message = ErrorMessage(text=text, code="invalid-url")
272
+ raise ClientError(messages=[message])
273
+
274
+ if scheme not in ("http", "https"):
275
+ text = "Unsupported URL scheme '%s'." % scheme
276
+ message = ErrorMessage(text=text, code="invalid-url")
277
+ raise ClientError(messages=[message])
278
+
279
+ for field in link.get_path_fields():
280
+ value = str(params[field.name])
281
+ if "{%s}" % field.name in url:
282
+ url = url.replace("{%s}" % field.name, quote(value, safe=""))
283
+ elif "{+%s}" % field.name in url:
284
+ url = url.replace("{+%s}" % field.name, quote(value, safe="/"))
285
+
286
+ return url
287
+
288
+ def get_content(self, link, params):
289
+ body_field = link.get_body_field()
290
+ if body_field and body_field.name in params:
291
+ assert (
292
+ link.encoding == "application/json"
293
+ ), "Only JSON request bodies are supported"
294
+ return params[body_field.name]
295
+
296
+ def get_decoder(self, content_type=None):
297
+ """
298
+ Given the value of a 'Content-Type' header, return the appropriate
299
+ decoder for handling the response content.
300
+ """
301
+ if content_type is None:
302
+ return self.decoders[0]
303
+
304
+ content_type = content_type.split(";")[0].strip().lower()
305
+ main_type = content_type.split("/")[0] + "/*"
306
+ wildcard_type = "*/*"
307
+
308
+ for codec in self.decoders:
309
+ if codec.media_type in (content_type, main_type, wildcard_type):
310
+ return codec
311
+
312
+ text = (
313
+ "Unsupported encoding '%s' in response Content-Type header." % content_type
314
+ )
315
+ message = ErrorMessage(text=text, code="cannot-decode-response")
316
+ raise ClientError(messages=[message])
317
+
318
+ def single_request(self, operation_id, **parameters):
272
319
  """
273
320
  Perform an API request.
274
- :param args: Arguments passed to the BaseClient.
275
- :param kwargs: Keyword arguments passed to the BaseClient.
321
+
322
+ :param str operation_id: Name of the API endpoint.
323
+ :param path_parameters: Path parameters for this endpoint.
276
324
  """
277
325
  link = self.lookup_operation(operation_id)
278
326
  if link.deprecated:
@@ -282,8 +330,23 @@ class ArkindexClient(BaseClient):
282
330
  stacklevel=2,
283
331
  )
284
332
 
285
- query_params = self.get_query_params(link, kwargs)
333
+ validator = typesystem.Object(
334
+ properties={field.name: typesystem.Any() for field in link.fields},
335
+ required=[field.name for field in link.fields if field.required],
336
+ additional_properties=False,
337
+ )
338
+ try:
339
+ validator.validate(parameters)
340
+ except typesystem.ValidationError as exc:
341
+ raise ClientError(messages=exc.messages()) from None
342
+
343
+ method = link.method
344
+ url = self.get_url(link, parameters)
345
+
346
+ content = self.get_content(link, parameters)
347
+ query_params = self.get_query_params(link, parameters)
286
348
  fields = link.get_query_fields()
349
+
287
350
  for field in fields:
288
351
  if field.deprecated and field.name in query_params:
289
352
  warnings.warn(
@@ -291,12 +354,41 @@ class ArkindexClient(BaseClient):
291
354
  DeprecationWarning,
292
355
  stacklevel=2,
293
356
  )
357
+
294
358
  if self.sleep_duration:
295
359
  logger.debug(
296
360
  "Delaying request by {:f} seconds...".format(self.sleep_duration)
297
361
  )
298
362
  sleep(self.sleep_duration)
299
- return super().request(operation_id, *args, **kwargs)
363
+
364
+ return self._send_request(
365
+ method, url, query_params=query_params, content=content
366
+ )
367
+
368
+ def _send_request(self, method, url, query_params=None, content=None):
369
+ options = {
370
+ "params": query_params,
371
+ "timeout": REQUEST_TIMEOUT,
372
+ }
373
+ if content is not None:
374
+ options["json"] = content
375
+
376
+ response = self.session.request(method, url, **options)
377
+
378
+ # Given an HTTP response, return the decoded data.
379
+ result = None
380
+ if response.content:
381
+ content_type = response.headers.get("content-type")
382
+ decoder = self.get_decoder(content_type)
383
+ result = decoder.decode(response)
384
+
385
+ if 400 <= response.status_code <= 599:
386
+ title = "%d %s" % (response.status_code, response.reason)
387
+ raise ErrorResponse(
388
+ title=title, status_code=response.status_code, content=result
389
+ )
390
+
391
+ return result
300
392
 
301
393
  @retry(
302
394
  retry=retry_if_exception(_is_500_error),
@@ -305,15 +397,34 @@ class ArkindexClient(BaseClient):
305
397
  stop=stop_after_attempt(5),
306
398
  before_sleep=before_sleep_log(logger, logging.INFO),
307
399
  )
308
- def request(self, operation_id, *args, **kwargs):
400
+ def request(self, operation_id, **parameters):
309
401
  """
310
- Proxy all Arkindex API requests with a retry mechanism in case of 50X errors.
311
- The same API call will be retried 5 times, with an exponential sleep time
312
- going through 3, 4, 8 and 16 seconds of wait between call.
402
+ Perform an API request with an automatic retry mechanism in case of 50X errors.
403
+ A failing API call will be retried 5 times, with an exponential sleep time going
404
+ through 3, 4, 8 and 16 seconds of wait between call.
313
405
  If the 5th call still gives a 50x, the exception is re-raised and the caller should catch it.
314
406
  Log messages are displayed before sleeping (when at least one exception occurred).
315
407
 
316
- :param args: Arguments passed to the BaseClient.
317
- :param kwargs: Keyword arguments passed to the BaseClient.
408
+ :param str operation_id: Name of the API endpoint.
409
+ :param parameters: Body, Path or Query parameters passed as kwargs.
410
+ Body parameters must be passed using the `body` keyword argument, others can be set directly.
411
+
412
+ Example usage for POST and unpaginated GET requests:
413
+
414
+ >>> request(
415
+ ... "CreateMetaDataBulk",
416
+ ... id="8f8f196f-49bc-444e-9cfe-c705c3cd01ae",
417
+ ... body={
418
+ ... "worker_run_id": "50e1f2d4-2087-41ed-a862-d17576bae480",
419
+ ... "metadata_list": [
420
+ ... …
421
+ ... ],
422
+ ... },
423
+ ... )
424
+ >>> request(
425
+ ... "ListElements",
426
+ ... corpus="7358ab03-cc36-4160-86ce-98f70e993a0f",
427
+ ... top_level=True,
428
+ ... )
318
429
  """
319
- return self.single_request(operation_id, *args, **kwargs)
430
+ return self.single_request(operation_id, **parameters)
arkindex/document.py CHANGED
@@ -1,103 +1,32 @@
1
1
  # -*- coding: utf-8 -*-
2
- import collections
3
2
  import re
4
3
  import typing
5
4
 
6
- LinkInfo = collections.namedtuple("LinkInfo", ["link", "name", "sections"])
5
+ from arkindex.exceptions import SchemaError
7
6
 
8
7
 
9
8
  class Document:
10
9
  def __init__(
11
10
  self,
12
- content: typing.Sequence[typing.Union["Section", "Link"]] = None,
11
+ links: typing.Sequence["Link"],
13
12
  url: str = "",
14
- title: str = "",
15
- description: str = "",
16
- version: str = "",
17
13
  ):
18
- content = [] if (content is None) else list(content)
19
-
20
- # Ensure all names within a document are unique.
21
- seen_fields = set()
22
- seen_sections = set()
23
- for item in content:
24
- if isinstance(item, Link):
25
- msg = 'Link "%s" in Document must have a unique name.'
26
- assert item.name not in seen_fields, msg % item.name
27
- seen_fields.add(item.name)
28
- else:
29
- msg = 'Section "%s" in Document must have a unique name.'
30
- assert item.name not in seen_sections, msg % item.name
31
- seen_sections.add(item.name)
32
-
33
- self.content = content
34
- self.url = url
35
- self.title = title
36
- self.description = description
37
- self.version = version
38
-
39
- def get_links(self):
40
- return [item for item in self.content if isinstance(item, Link)]
41
-
42
- def get_sections(self):
43
- return [item for item in self.content if isinstance(item, Section)]
44
-
45
- def walk_links(self):
46
- link_info_list = []
47
- for item in self.content:
48
- if isinstance(item, Link):
49
- link_info = LinkInfo(link=item, name=item.name, sections=())
50
- link_info_list.append(link_info)
51
- else:
52
- link_info_list.extend(item.walk_links())
53
- return link_info_list
14
+ if not len(links):
15
+ raise SchemaError(
16
+ "An OpenAPI document must contain at least one valid operation."
17
+ )
54
18
 
19
+ links_by_name = {}
55
20
 
56
- class Section:
57
- def __init__(
58
- self,
59
- name: str,
60
- content: typing.Sequence[typing.Union["Section", "Link"]] = None,
61
- title: str = "",
62
- description: str = "",
63
- ):
64
- content = [] if (content is None) else list(content)
65
-
66
- # Ensure all names within a section are unique.
67
- seen_fields = set()
68
- seen_sections = set()
69
- for item in content:
70
- if isinstance(item, Link):
71
- msg = 'Link "%s" in Section "%s" must have a unique name.'
72
- assert item.name not in seen_fields, msg % (item.name, name)
73
- seen_fields.add(item.name)
74
- else:
75
- msg = 'Section "%s" in Section "%s" must have a unique name.'
76
- assert item.name not in seen_sections, msg % (item.name, name)
77
- seen_sections.add(item.name)
78
-
79
- self.content = content
80
- self.name = name
81
- self.title = title
82
- self.description = description
83
-
84
- def get_links(self):
85
- return [item for item in self.content if isinstance(item, Link)]
86
-
87
- def get_sections(self):
88
- return [item for item in self.content if isinstance(item, Section)]
21
+ # Ensure all names within a document are unique.
22
+ for link in links:
23
+ assert (
24
+ link.name not in links_by_name
25
+ ), f'Link "{link.name}" in Document must have a unique name.'
26
+ links_by_name[link.name] = link
89
27
 
90
- def walk_links(self, previous_sections=()):
91
- link_info_list = []
92
- sections = previous_sections + (self,)
93
- for item in self.content:
94
- if isinstance(item, Link):
95
- name = ":".join([section.name for section in sections] + [item.name])
96
- link_info = LinkInfo(link=item, name=name, sections=sections)
97
- link_info_list.append(link_info)
98
- else:
99
- link_info_list.extend(item.walk_links(previous_sections=sections))
100
- return link_info_list
28
+ self.links = links_by_name
29
+ self.url = url
101
30
 
102
31
 
103
32
  class Link:
@@ -112,10 +41,9 @@ class Link:
112
41
  handler: typing.Callable = None,
113
42
  name: str = "",
114
43
  encoding: str = "",
115
- response: "Response" = None,
116
- title: str = "",
117
- description: str = "",
118
44
  fields: typing.Sequence["Field"] = None,
45
+ deprecated: bool = False,
46
+ paginated: typing.Optional[bool] = None,
119
47
  ):
120
48
  method = method.upper()
121
49
  fields = [] if (fields is None) else list(fields)
@@ -153,10 +81,9 @@ class Link:
153
81
  self.handler = handler
154
82
  self.name = name if name else handler.__name__
155
83
  self.encoding = encoding
156
- self.response = response
157
- self.title = title
158
- self.description = description
159
84
  self.fields = fields
85
+ self.deprecated = deprecated
86
+ self.paginated = paginated
160
87
 
161
88
  def get_path_fields(self):
162
89
  return [field for field in self.fields if field.location == "path"]
@@ -182,11 +109,10 @@ class Field:
182
109
  self,
183
110
  name: str,
184
111
  location: str,
185
- title: str = "",
186
- description: str = "",
187
112
  required: bool = None,
188
113
  schema: typing.Any = None,
189
114
  example: typing.Any = None,
115
+ deprecated: bool = False,
190
116
  ):
191
117
  assert location in ("path", "query", "body", "cookie", "header", "formData")
192
118
  if required is None:
@@ -195,18 +121,8 @@ class Field:
195
121
  assert required, "May not set 'required=False' on path fields."
196
122
 
197
123
  self.name = name
198
- self.title = title
199
- self.description = description
200
124
  self.location = location
201
125
  self.required = required
202
126
  self.schema = schema
203
127
  self.example = example
204
-
205
-
206
- class Response:
207
- def __init__(
208
- self, encoding: str, status_code: int = 200, schema: typing.Any = None
209
- ):
210
- self.encoding = encoding
211
- self.status_code = status_code
212
- self.schema = schema
128
+ self.deprecated = deprecated