PyPI - arkindex-client - Versions diffs - 1.1.0__py3-none-any.whl → 1.1.2__py3-none-any.whl - Mend

arkindex-client 1.1.0py3-none-any.whl → 1.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

arkindex/client/client.py +179 -68
arkindex/document.py +21 -105
arkindex/schema/openapi.py +44 -292
{arkindex_client-1.1.0.dist-info → arkindex_client-1.1.2.dist-info}/METADATA +3 -2
{arkindex_client-1.1.0.dist-info → arkindex_client-1.1.2.dist-info}/RECORD +8 -11
arkindex/client/base.py +0 -98
arkindex/client/transports.py +0 -132
arkindex/schema/jsonschema.py +0 -66
{arkindex_client-1.1.0.dist-info → arkindex_client-1.1.2.dist-info}/LICENSE +0 -0
{arkindex_client-1.1.0.dist-info → arkindex_client-1.1.2.dist-info}/WHEEL +0 -0
{arkindex_client-1.1.0.dist-info → arkindex_client-1.1.2.dist-info}/top_level.txt +0 -0

arkindex/client/client.py CHANGED Viewed

@@ -5,10 +5,12 @@ Arkindex API Client
 import logging
 import os
 import warnings
+from importlib.metadata import version
 from time import sleep
-from urllib.parse import urljoin, urlsplit, urlunsplit
+from urllib.parse import quote, urljoin, urlparse, urlsplit
 import requests
+import typesystem
 import yaml
 from tenacity import (
     before_sleep_log,
@@ -19,12 +21,15 @@ from tenacity import (
 )
 from arkindex.auth import TokenSessionAuthentication
-from arkindex.client.base import BaseClient
-from arkindex.exceptions import ErrorResponse, SchemaError
+from arkindex.client import decoders
+from arkindex.exceptions import ClientError, ErrorMessage, ErrorResponse, SchemaError
 from arkindex.pagination import ResponsePaginator
+from arkindex.schema.validator import validate
 logger = logging.getLogger(__name__)
+REQUEST_TIMEOUT = (30, 60)
 try:
     from yaml import CSafeLoader as SafeLoader
@@ -82,22 +87,7 @@ def options_from_env():
     return options
-def _find_operation(schema, operation_id):
-    for path_object in schema["paths"].values():
-        for operation in path_object.values():
-            if operation["operationId"] == operation_id:
-                return operation
-    raise KeyError("Operation '{}' not found".format(operation_id))
-def _find_param(operation, param_name):
-    for parameter in operation.get("parameters", []):
-        if parameter["name"] == param_name:
-            return parameter
-    raise KeyError("Parameter '{}' not found".format(param_name))
-class ArkindexClient(BaseClient):
+class ArkindexClient:
     """
     An Arkindex API client.
     """
@@ -111,7 +101,6 @@ class ArkindexClient(BaseClient):
         csrf_cookie=None,
         sleep=0,
         verify=True,
-        **kwargs,
     ):
         r"""
         :param token: An API token to use. If omitted, access is restricted to public endpoints.
@@ -125,12 +114,27 @@ class ArkindexClient(BaseClient):
         :type csrf_cookie: str or None
         :param float sleep: Number of seconds to wait before sending each API request,
            as a simple means of throttling.
-        :param \**kwargs: Keyword arguments to send to ``arkindex.client.base.BaseClient``.
+        :param bool verify: Whether to verify the SSL certificate on each request. Enabled by default.
         """
+        self.decoders = [
+            decoders.JSONDecoder(),
+            decoders.TextDecoder(),
+            decoders.DownloadDecoder(),
+        ]
+        self.session = requests.Session()
+        self.session.verify = verify
+        client_version = version("arkindex-client")
+        self.session.headers.update(
+            {
+                "accept": ", ".join([decoder.media_type for decoder in self.decoders]),
+                "user-agent": f"arkindex-client/{client_version}",
+            }
+        )
         if not schema_url:
             schema_url = urljoin(base_url, SCHEMA_ENDPOINT)
-        self.verify = verify
         try:
             split = urlsplit(schema_url)
             if split.scheme == "file" or not (split.scheme or split.netloc):
@@ -138,7 +142,7 @@ class ArkindexClient(BaseClient):
                 with open(schema_url) as f:
                     schema = yaml.load(f, Loader=SafeLoader)
             else:
-                resp = requests.get(schema_url, verify=self.verify)
+                resp = self.session.get(schema_url)
                 resp.raise_for_status()
                 schema = yaml.load(resp.content, Loader=SafeLoader)
         except Exception as e:
@@ -146,34 +150,7 @@ class ArkindexClient(BaseClient):
                 f"Could not retrieve a proper OpenAPI schema from {schema_url}"
             ) from e
-        super().__init__(schema, **kwargs)
-        # An OpenAPI schema is considered valid even when there are no endpoints, making the client completely useless.
-        if not len(self.document.walk_links()):
-            raise SchemaError(
-                f"The OpenAPI schema from {base_url} has no defined endpoints"
-            )
-        # Post-processing of the parsed schema
-        for link_info in self.document.walk_links():
-            # Look for deprecated links
-            # https://github.com/encode/apistar/issues/664
-            operation = _find_operation(schema, link_info.link.name)
-            link_info.link.deprecated = operation.get("deprecated", False)
-            for item in link_info.link.get_query_fields():
-                parameter = _find_param(operation, item.name)
-                item.deprecated = parameter.get("deprecated", False)
-            # Detect paginated links
-            if "x-paginated" in operation:
-                link_info.link._paginated = operation["x-paginated"]
-            # Remove domains from each endpoint; allows to properly handle our base URL
-            # https://github.com/encode/apistar/issues/657
-            original_url = urlsplit(link_info.link.url)
-            # Removes the scheme and netloc
-            new_url = ("", "", *original_url[2:])
-            link_info.link.url = urlunsplit(new_url)
+        self.document = validate(schema)
         # Try to autodetect the CSRF cookie:
         # - Try to find a matching server for this base URL and look for the x-csrf-cookie extension
@@ -228,7 +205,7 @@ class ArkindexClient(BaseClient):
         """
         if not csrf_cookie:
             csrf_cookie = "arkindex.csrf"
-        self.transport.session.auth = TokenSessionAuthentication(
+        self.session.auth = TokenSessionAuthentication(
             token,
             csrf_cookie_name=csrf_cookie,
             scheme=auth_scheme,
@@ -242,7 +219,15 @@ class ArkindexClient(BaseClient):
             self.document.url = base_url
         # Add the Referer header to allow Django CSRF to function
-        self.transport.headers.setdefault("Referer", self.document.url)
+        self.session.headers.setdefault("Referer", self.document.url)
+    def lookup_operation(self, operation_id: str):
+        if operation_id in self.document.links:
+            return self.document.links[operation_id]
+        text = 'Operation ID "%s" not found in schema.' % operation_id
+        message = ErrorMessage(text=text, code="invalid-operation")
+        raise ClientError(messages=[message])
     def paginate(self, operation_id, *args, **kwargs):
         """
@@ -251,9 +236,10 @@ class ArkindexClient(BaseClient):
         :return: An iterator for a paginated endpoint.
         :rtype: Union[arkindex.pagination.ResponsePaginator, dict, list]
         """
         link = self.lookup_operation(operation_id)
         # If there was no x-paginated, trust the caller and assume the endpoint is paginated
-        if getattr(link, "_paginated", True):
+        if link.paginated is not False:
             return ResponsePaginator(self, operation_id, *args, **kwargs)
         return self.request(operation_id, *args, **kwargs)
@@ -264,15 +250,77 @@ class ArkindexClient(BaseClient):
         """
         resp = self.request("Login", body={"email": email, "password": password})
         if "auth_token" in resp:
-            self.transport.session.auth.scheme = "Token"
-            self.transport.session.auth.token = resp["auth_token"]
+            self.session.auth.scheme = "Token"
+            self.session.auth.token = resp["auth_token"]
         return resp
-    def single_request(self, operation_id, *args, **kwargs):
+    def get_query_params(self, link, params):
+        return {
+            field.name: params[field.name]
+            for field in link.get_query_fields()
+            if field.name in params
+        }
+    def get_url(self, link, params):
+        url = urljoin(self.document.url, link.url)
+        scheme = urlparse(url).scheme.lower()
+        if not scheme:
+            text = "URL missing scheme '%s'." % url
+            message = ErrorMessage(text=text, code="invalid-url")
+            raise ClientError(messages=[message])
+        if scheme not in ("http", "https"):
+            text = "Unsupported URL scheme '%s'." % scheme
+            message = ErrorMessage(text=text, code="invalid-url")
+            raise ClientError(messages=[message])
+        for field in link.get_path_fields():
+            value = str(params[field.name])
+            if "{%s}" % field.name in url:
+                url = url.replace("{%s}" % field.name, quote(value, safe=""))
+            elif "{+%s}" % field.name in url:
+                url = url.replace("{+%s}" % field.name, quote(value, safe="/"))
+        return url
+    def get_content(self, link, params):
+        body_field = link.get_body_field()
+        if body_field and body_field.name in params:
+            assert (
+                link.encoding == "application/json"
+            ), "Only JSON request bodies are supported"
+            return params[body_field.name]
+    def get_decoder(self, content_type=None):
+        """
+        Given the value of a 'Content-Type' header, return the appropriate
+        decoder for handling the response content.
+        """
+        if content_type is None:
+            return self.decoders[0]
+        content_type = content_type.split(";")[0].strip().lower()
+        main_type = content_type.split("/")[0] + "/*"
+        wildcard_type = "*/*"
+        for codec in self.decoders:
+            if codec.media_type in (content_type, main_type, wildcard_type):
+                return codec
+        text = (
+            "Unsupported encoding '%s' in response Content-Type header." % content_type
+        )
+        message = ErrorMessage(text=text, code="cannot-decode-response")
+        raise ClientError(messages=[message])
+    def single_request(self, operation_id, **parameters):
         """
         Perform an API request.
-        :param args: Arguments passed to the BaseClient.
-        :param kwargs: Keyword arguments passed to the BaseClient.
+        :param str operation_id: Name of the API endpoint.
+        :param path_parameters: Path parameters for this endpoint.
         """
         link = self.lookup_operation(operation_id)
         if link.deprecated:
@@ -282,8 +330,23 @@ class ArkindexClient(BaseClient):
                 stacklevel=2,
             )
-        query_params = self.get_query_params(link, kwargs)
+        validator = typesystem.Object(
+            properties={field.name: typesystem.Any() for field in link.fields},
+            required=[field.name for field in link.fields if field.required],
+            additional_properties=False,
+        )
+        try:
+            validator.validate(parameters)
+        except typesystem.ValidationError as exc:
+            raise ClientError(messages=exc.messages()) from None
+        method = link.method
+        url = self.get_url(link, parameters)
+        content = self.get_content(link, parameters)
+        query_params = self.get_query_params(link, parameters)
         fields = link.get_query_fields()
         for field in fields:
             if field.deprecated and field.name in query_params:
                 warnings.warn(
@@ -291,12 +354,41 @@ class ArkindexClient(BaseClient):
                     DeprecationWarning,
                     stacklevel=2,
                 )
         if self.sleep_duration:
             logger.debug(
                 "Delaying request by {:f} seconds...".format(self.sleep_duration)
             )
             sleep(self.sleep_duration)
-        return super().request(operation_id, *args, **kwargs)
+        return self._send_request(
+            method, url, query_params=query_params, content=content
+        )
+    def _send_request(self, method, url, query_params=None, content=None):
+        options = {
+            "params": query_params,
+            "timeout": REQUEST_TIMEOUT,
+        }
+        if content is not None:
+            options["json"] = content
+        response = self.session.request(method, url, **options)
+        # Given an HTTP response, return the decoded data.
+        result = None
+        if response.content:
+            content_type = response.headers.get("content-type")
+            decoder = self.get_decoder(content_type)
+            result = decoder.decode(response)
+        if 400 <= response.status_code <= 599:
+            title = "%d %s" % (response.status_code, response.reason)
+            raise ErrorResponse(
+                title=title, status_code=response.status_code, content=result
+            )
+        return result
     @retry(
         retry=retry_if_exception(_is_500_error),
@@ -305,15 +397,34 @@ class ArkindexClient(BaseClient):
         stop=stop_after_attempt(5),
         before_sleep=before_sleep_log(logger, logging.INFO),
     )
-    def request(self, operation_id, *args, **kwargs):
+    def request(self, operation_id, **parameters):
         """
-        Proxy all Arkindex API requests with a retry mechanism in case of 50X errors.
-        The same API call will be retried 5 times, with an exponential sleep time
-        going through 3, 4, 8 and 16 seconds of wait between call.
+        Perform an API request with an automatic retry mechanism in case of 50X errors.
+        A failing API call will be retried 5 times, with an exponential sleep time going
+        through 3, 4, 8 and 16 seconds of wait between call.
         If the 5th call still gives a 50x, the exception is re-raised and the caller should catch it.
         Log messages are displayed before sleeping (when at least one exception occurred).
-        :param args: Arguments passed to the BaseClient.
-        :param kwargs: Keyword arguments passed to the BaseClient.
+        :param str operation_id: Name of the API endpoint.
+        :param parameters: Body, Path or Query parameters passed as kwargs.
+            Body parameters must be passed using the `body` keyword argument, others can be set directly.
+        Example usage for POST and unpaginated GET requests:
+        >>> request(
+        ...     "CreateMetaDataBulk",
+        ...     id="8f8f196f-49bc-444e-9cfe-c705c3cd01ae",
+        ...     body={
+        ...         "worker_run_id": "50e1f2d4-2087-41ed-a862-d17576bae480",
+        ...         "metadata_list": [
+        ...             …
+        ...         ],
+        ...     },
+        ... )
+        >>> request(
+        ...     "ListElements",
+        ...     corpus="7358ab03-cc36-4160-86ce-98f70e993a0f",
+        ...     top_level=True,
+        ... )
         """
-        return self.single_request(operation_id, *args, **kwargs)
+        return self.single_request(operation_id, **parameters)

arkindex/document.py CHANGED Viewed

@@ -1,103 +1,32 @@
 # -*- coding: utf-8 -*-
-import collections
 import re
 import typing
-LinkInfo = collections.namedtuple("LinkInfo", ["link", "name", "sections"])
+from arkindex.exceptions import SchemaError
 class Document:
     def __init__(
         self,
-        content: typing.Sequence[typing.Union["Section", "Link"]] = None,
+        links: typing.Sequence["Link"],
         url: str = "",
-        title: str = "",
-        description: str = "",
-        version: str = "",
     ):
-        content = [] if (content is None) else list(content)
-        # Ensure all names within a document are unique.
-        seen_fields = set()
-        seen_sections = set()
-        for item in content:
-            if isinstance(item, Link):
-                msg = 'Link "%s" in Document must have a unique name.'
-                assert item.name not in seen_fields, msg % item.name
-                seen_fields.add(item.name)
-            else:
-                msg = 'Section "%s" in Document must have a unique name.'
-                assert item.name not in seen_sections, msg % item.name
-                seen_sections.add(item.name)
-        self.content = content
-        self.url = url
-        self.title = title
-        self.description = description
-        self.version = version
-    def get_links(self):
-        return [item for item in self.content if isinstance(item, Link)]
-    def get_sections(self):
-        return [item for item in self.content if isinstance(item, Section)]
-    def walk_links(self):
-        link_info_list = []
-        for item in self.content:
-            if isinstance(item, Link):
-                link_info = LinkInfo(link=item, name=item.name, sections=())
-                link_info_list.append(link_info)
-            else:
-                link_info_list.extend(item.walk_links())
-        return link_info_list
+        if not len(links):
+            raise SchemaError(
+                "An OpenAPI document must contain at least one valid operation."
+            )
+        links_by_name = {}
-class Section:
-    def __init__(
-        self,
-        name: str,
-        content: typing.Sequence[typing.Union["Section", "Link"]] = None,
-        title: str = "",
-        description: str = "",
-    ):
-        content = [] if (content is None) else list(content)
-        # Ensure all names within a section are unique.
-        seen_fields = set()
-        seen_sections = set()
-        for item in content:
-            if isinstance(item, Link):
-                msg = 'Link "%s" in Section "%s" must have a unique name.'
-                assert item.name not in seen_fields, msg % (item.name, name)
-                seen_fields.add(item.name)
-            else:
-                msg = 'Section "%s" in Section "%s" must have a unique name.'
-                assert item.name not in seen_sections, msg % (item.name, name)
-                seen_sections.add(item.name)
-        self.content = content
-        self.name = name
-        self.title = title
-        self.description = description
-    def get_links(self):
-        return [item for item in self.content if isinstance(item, Link)]
-    def get_sections(self):
-        return [item for item in self.content if isinstance(item, Section)]
+        # Ensure all names within a document are unique.
+        for link in links:
+            assert (
+                link.name not in links_by_name
+            ), f'Link "{link.name}" in Document must have a unique name.'
+            links_by_name[link.name] = link
-    def walk_links(self, previous_sections=()):
-        link_info_list = []
-        sections = previous_sections + (self,)
-        for item in self.content:
-            if isinstance(item, Link):
-                name = ":".join([section.name for section in sections] + [item.name])
-                link_info = LinkInfo(link=item, name=name, sections=sections)
-                link_info_list.append(link_info)
-            else:
-                link_info_list.extend(item.walk_links(previous_sections=sections))
-        return link_info_list
+        self.links = links_by_name
+        self.url = url
 class Link:
@@ -112,10 +41,9 @@ class Link:
         handler: typing.Callable = None,
         name: str = "",
         encoding: str = "",
-        response: "Response" = None,
-        title: str = "",
-        description: str = "",
         fields: typing.Sequence["Field"] = None,
+        deprecated: bool = False,
+        paginated: typing.Optional[bool] = None,
     ):
         method = method.upper()
         fields = [] if (fields is None) else list(fields)
@@ -153,10 +81,9 @@ class Link:
         self.handler = handler
         self.name = name if name else handler.__name__
         self.encoding = encoding
-        self.response = response
-        self.title = title
-        self.description = description
         self.fields = fields
+        self.deprecated = deprecated
+        self.paginated = paginated
     def get_path_fields(self):
         return [field for field in self.fields if field.location == "path"]
@@ -182,11 +109,10 @@ class Field:
         self,
         name: str,
         location: str,
-        title: str = "",
-        description: str = "",
         required: bool = None,
         schema: typing.Any = None,
         example: typing.Any = None,
+        deprecated: bool = False,
     ):
         assert location in ("path", "query", "body", "cookie", "header", "formData")
         if required is None:
@@ -195,18 +121,8 @@ class Field:
             assert required, "May not set 'required=False' on path fields."
         self.name = name
-        self.title = title
-        self.description = description
         self.location = location
         self.required = required
         self.schema = schema
         self.example = example
-class Response:
-    def __init__(
-        self, encoding: str, status_code: int = 200, schema: typing.Any = None
-    ):
-        self.encoding = encoding
-        self.status_code = status_code
-        self.schema = schema
+        self.deprecated = deprecated

arkindex-client 1.1.0__py3-none-any.whl → 1.1.2__py3-none-any.whl

arkindex-client 1.1.0py3-none-any.whl → 1.1.2py3-none-any.whl