arkindex-client 1.0.15__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- apistar/__init__.py +9 -0
- apistar/exceptions.py +13 -0
- arkindex/auth.py +29 -14
- arkindex/client/__init__.py +4 -0
- arkindex/client/base.py +98 -0
- arkindex/{client.py → client/client.py} +60 -17
- arkindex/client/decoders.py +252 -0
- arkindex/client/transports.py +132 -0
- arkindex/compat.py +24 -0
- arkindex/document.py +212 -0
- arkindex/exceptions.py +73 -0
- arkindex/mock.py +3 -3
- arkindex/pagination.py +10 -9
- arkindex/schema/__init__.py +0 -0
- arkindex/schema/jsonschema.py +66 -0
- arkindex/schema/openapi.py +523 -0
- arkindex/schema/validator.py +54 -0
- arkindex_client-1.1.0.dist-info/LICENSE +661 -0
- arkindex_client-1.1.0.dist-info/METADATA +27 -0
- arkindex_client-1.1.0.dist-info/RECORD +23 -0
- {arkindex_client-1.0.15.dist-info → arkindex_client-1.1.0.dist-info}/WHEEL +1 -1
- {arkindex_client-1.0.15.dist-info → arkindex_client-1.1.0.dist-info}/top_level.txt +1 -0
- arkindex/transports.py +0 -14
- arkindex_client-1.0.15.dist-info/METADATA +0 -224
- arkindex_client-1.0.15.dist-info/RECORD +0 -11
apistar/__init__.py
ADDED
apistar/exceptions.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
import warnings
|
|
3
|
+
|
|
4
|
+
from arkindex.exceptions import ErrorResponse
|
|
5
|
+
|
|
6
|
+
__all__ = ["ErrorResponse"]
|
|
7
|
+
|
|
8
|
+
warnings.warn(
|
|
9
|
+
"The Arkindex API client no longer depends on APIStar. "
|
|
10
|
+
"Please update your `apistar.exceptions` imports to use the `arkindex.exceptions` module.",
|
|
11
|
+
FutureWarning,
|
|
12
|
+
stacklevel=2,
|
|
13
|
+
)
|
arkindex/auth.py
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
|
-
from
|
|
2
|
+
from requests.auth import AuthBase
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
class TokenSessionAuthentication(
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
"""
|
|
5
|
+
class TokenSessionAuthentication(AuthBase):
|
|
6
|
+
|
|
7
|
+
safe_methods = ("GET", "HEAD", "OPTIONS", "TRACE")
|
|
9
8
|
|
|
10
9
|
def __init__(
|
|
11
10
|
self,
|
|
@@ -20,14 +19,30 @@ class TokenSessionAuthentication(TokenAuthentication):
|
|
|
20
19
|
:param str csrf_cookie_name: Name of the CSRF token cookie.
|
|
21
20
|
:param str csrf_header_name: Name of the CSRF request header.
|
|
22
21
|
"""
|
|
23
|
-
self.
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
22
|
+
self.token = token
|
|
23
|
+
self.scheme = scheme
|
|
24
|
+
self.csrf_cookie_name = csrf_cookie_name
|
|
25
|
+
self.csrf_header_name = csrf_header_name
|
|
26
|
+
self.csrf_token = None
|
|
27
|
+
|
|
28
|
+
def store_csrf_token(self, response, **kwargs):
|
|
29
|
+
if self.csrf_cookie_name in response.cookies:
|
|
30
|
+
self.csrf_token = response.cookies[self.csrf_cookie_name]
|
|
28
31
|
|
|
29
32
|
def __call__(self, request):
|
|
30
|
-
|
|
31
|
-
if
|
|
32
|
-
|
|
33
|
-
|
|
33
|
+
# Add CSRF token
|
|
34
|
+
if (
|
|
35
|
+
self.csrf_token
|
|
36
|
+
and self.csrf_header_name is not None
|
|
37
|
+
and (request.method not in self.safe_methods)
|
|
38
|
+
):
|
|
39
|
+
request.headers[self.csrf_header_name] = self.csrf_token
|
|
40
|
+
|
|
41
|
+
if self.csrf_cookie_name is not None:
|
|
42
|
+
request.register_hook("response", self.store_csrf_token)
|
|
43
|
+
|
|
44
|
+
# Add API token
|
|
45
|
+
if self.token is not None:
|
|
46
|
+
request.headers["Authorization"] = f"{self.scheme} {self.token}"
|
|
47
|
+
|
|
48
|
+
return request
|
arkindex/client/base.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
from urllib.parse import quote, urljoin, urlparse
|
|
3
|
+
|
|
4
|
+
import typesystem
|
|
5
|
+
|
|
6
|
+
from arkindex import exceptions
|
|
7
|
+
from arkindex.client.transports import HTTPTransport
|
|
8
|
+
from arkindex.schema.validator import validate
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class BaseClient:
|
|
12
|
+
def __init__(
|
|
13
|
+
self,
|
|
14
|
+
schema,
|
|
15
|
+
encoding=None,
|
|
16
|
+
auth=None,
|
|
17
|
+
decoders=None,
|
|
18
|
+
headers=None,
|
|
19
|
+
session=None,
|
|
20
|
+
allow_cookies=True,
|
|
21
|
+
verify=True,
|
|
22
|
+
):
|
|
23
|
+
self.document = validate(schema, encoding=encoding)
|
|
24
|
+
self.transport = HTTPTransport(
|
|
25
|
+
auth=auth,
|
|
26
|
+
decoders=decoders,
|
|
27
|
+
headers=headers,
|
|
28
|
+
session=session,
|
|
29
|
+
allow_cookies=allow_cookies,
|
|
30
|
+
verify=verify,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
def lookup_operation(self, operation_id: str):
|
|
34
|
+
for item in self.document.walk_links():
|
|
35
|
+
if item.link.name == operation_id:
|
|
36
|
+
return item.link
|
|
37
|
+
text = 'Operation ID "%s" not found in schema.' % operation_id
|
|
38
|
+
message = exceptions.ErrorMessage(text=text, code="invalid-operation")
|
|
39
|
+
raise exceptions.ClientError(messages=[message])
|
|
40
|
+
|
|
41
|
+
def get_url(self, link, params):
|
|
42
|
+
url = urljoin(self.document.url, link.url)
|
|
43
|
+
|
|
44
|
+
scheme = urlparse(url).scheme.lower()
|
|
45
|
+
|
|
46
|
+
if not scheme:
|
|
47
|
+
text = "URL missing scheme '%s'." % url
|
|
48
|
+
message = exceptions.ErrorMessage(text=text, code="invalid-url")
|
|
49
|
+
raise exceptions.ClientError(messages=[message])
|
|
50
|
+
|
|
51
|
+
if scheme not in self.transport.schemes:
|
|
52
|
+
text = "Unsupported URL scheme '%s'." % scheme
|
|
53
|
+
message = exceptions.ErrorMessage(text=text, code="invalid-url")
|
|
54
|
+
raise exceptions.ClientError(messages=[message])
|
|
55
|
+
|
|
56
|
+
for field in link.get_path_fields():
|
|
57
|
+
value = str(params[field.name])
|
|
58
|
+
if "{%s}" % field.name in url:
|
|
59
|
+
url = url.replace("{%s}" % field.name, quote(value, safe=""))
|
|
60
|
+
elif "{+%s}" % field.name in url:
|
|
61
|
+
url = url.replace("{+%s}" % field.name, quote(value, safe="/"))
|
|
62
|
+
|
|
63
|
+
return url
|
|
64
|
+
|
|
65
|
+
def get_query_params(self, link, params):
|
|
66
|
+
return {
|
|
67
|
+
field.name: params[field.name]
|
|
68
|
+
for field in link.get_query_fields()
|
|
69
|
+
if field.name in params
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
def get_content_and_encoding(self, link, params):
|
|
73
|
+
body_field = link.get_body_field()
|
|
74
|
+
if body_field and body_field.name in params:
|
|
75
|
+
return (params[body_field.name], link.encoding)
|
|
76
|
+
return (None, None)
|
|
77
|
+
|
|
78
|
+
def request(self, operation_id: str, **params):
|
|
79
|
+
link = self.lookup_operation(operation_id)
|
|
80
|
+
|
|
81
|
+
validator = typesystem.Object(
|
|
82
|
+
properties={field.name: typesystem.Any() for field in link.fields},
|
|
83
|
+
required=[field.name for field in link.fields if field.required],
|
|
84
|
+
additional_properties=False,
|
|
85
|
+
)
|
|
86
|
+
try:
|
|
87
|
+
validator.validate(params)
|
|
88
|
+
except typesystem.ValidationError as exc:
|
|
89
|
+
raise exceptions.ClientError(messages=exc.messages()) from None
|
|
90
|
+
|
|
91
|
+
method = link.method
|
|
92
|
+
url = self.get_url(link, params)
|
|
93
|
+
query_params = self.get_query_params(link, params)
|
|
94
|
+
(content, encoding) = self.get_content_and_encoding(link, params)
|
|
95
|
+
|
|
96
|
+
return self.transport.send(
|
|
97
|
+
method, url, query_params=query_params, content=content, encoding=encoding
|
|
98
|
+
)
|
|
@@ -8,14 +8,31 @@ import warnings
|
|
|
8
8
|
from time import sleep
|
|
9
9
|
from urllib.parse import urljoin, urlsplit, urlunsplit
|
|
10
10
|
|
|
11
|
-
import apistar
|
|
12
11
|
import requests
|
|
13
12
|
import yaml
|
|
13
|
+
from tenacity import (
|
|
14
|
+
before_sleep_log,
|
|
15
|
+
retry,
|
|
16
|
+
retry_if_exception,
|
|
17
|
+
stop_after_attempt,
|
|
18
|
+
wait_exponential,
|
|
19
|
+
)
|
|
14
20
|
|
|
15
21
|
from arkindex.auth import TokenSessionAuthentication
|
|
16
|
-
from arkindex.
|
|
22
|
+
from arkindex.client.base import BaseClient
|
|
23
|
+
from arkindex.exceptions import ErrorResponse, SchemaError
|
|
17
24
|
from arkindex.pagination import ResponsePaginator
|
|
18
|
-
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
try:
|
|
29
|
+
from yaml import CSafeLoader as SafeLoader
|
|
30
|
+
|
|
31
|
+
logger.debug("Using LibYAML-based parser")
|
|
32
|
+
except ImportError:
|
|
33
|
+
from yaml import SafeLoader
|
|
34
|
+
|
|
35
|
+
logger.debug("Using default PyYAML parser")
|
|
19
36
|
|
|
20
37
|
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
21
38
|
|
|
@@ -24,7 +41,16 @@ DEFAULT_BASE_URL = "https://arkindex.teklia.com/"
|
|
|
24
41
|
# Endpoint accessed by the client on instantiation to retrieve the OpenAPI schema
|
|
25
42
|
SCHEMA_ENDPOINT = "/api/v1/openapi/?format=json"
|
|
26
43
|
|
|
27
|
-
|
|
44
|
+
|
|
45
|
+
def _is_500_error(exc: Exception) -> bool:
|
|
46
|
+
"""
|
|
47
|
+
Check if an Arkindex API error is a 50x
|
|
48
|
+
This is used to retry most API calls implemented here
|
|
49
|
+
"""
|
|
50
|
+
if not isinstance(exc, ErrorResponse):
|
|
51
|
+
return False
|
|
52
|
+
|
|
53
|
+
return 500 <= exc.status_code < 600
|
|
28
54
|
|
|
29
55
|
|
|
30
56
|
def options_from_env():
|
|
@@ -71,7 +97,7 @@ def _find_param(operation, param_name):
|
|
|
71
97
|
raise KeyError("Parameter '{}' not found".format(param_name))
|
|
72
98
|
|
|
73
99
|
|
|
74
|
-
class ArkindexClient(
|
|
100
|
+
class ArkindexClient(BaseClient):
|
|
75
101
|
"""
|
|
76
102
|
An Arkindex API client.
|
|
77
103
|
"""
|
|
@@ -99,7 +125,7 @@ class ArkindexClient(apistar.Client):
|
|
|
99
125
|
:type csrf_cookie: str or None
|
|
100
126
|
:param float sleep: Number of seconds to wait before sending each API request,
|
|
101
127
|
as a simple means of throttling.
|
|
102
|
-
:param \**kwargs: Keyword arguments to send to ``
|
|
128
|
+
:param \**kwargs: Keyword arguments to send to ``arkindex.client.base.BaseClient``.
|
|
103
129
|
"""
|
|
104
130
|
if not schema_url:
|
|
105
131
|
schema_url = urljoin(base_url, SCHEMA_ENDPOINT)
|
|
@@ -110,11 +136,11 @@ class ArkindexClient(apistar.Client):
|
|
|
110
136
|
if split.scheme == "file" or not (split.scheme or split.netloc):
|
|
111
137
|
# This is a local path
|
|
112
138
|
with open(schema_url) as f:
|
|
113
|
-
schema = yaml.
|
|
139
|
+
schema = yaml.load(f, Loader=SafeLoader)
|
|
114
140
|
else:
|
|
115
141
|
resp = requests.get(schema_url, verify=self.verify)
|
|
116
142
|
resp.raise_for_status()
|
|
117
|
-
schema = yaml.
|
|
143
|
+
schema = yaml.load(resp.content, Loader=SafeLoader)
|
|
118
144
|
except Exception as e:
|
|
119
145
|
raise SchemaError(
|
|
120
146
|
f"Could not retrieve a proper OpenAPI schema from {schema_url}"
|
|
@@ -122,7 +148,7 @@ class ArkindexClient(apistar.Client):
|
|
|
122
148
|
|
|
123
149
|
super().__init__(schema, **kwargs)
|
|
124
150
|
|
|
125
|
-
#
|
|
151
|
+
# An OpenAPI schema is considered valid even when there are no endpoints, making the client completely useless.
|
|
126
152
|
if not len(self.document.walk_links()):
|
|
127
153
|
raise SchemaError(
|
|
128
154
|
f"The OpenAPI schema from {base_url} has no defined endpoints"
|
|
@@ -142,7 +168,7 @@ class ArkindexClient(apistar.Client):
|
|
|
142
168
|
if "x-paginated" in operation:
|
|
143
169
|
link_info.link._paginated = operation["x-paginated"]
|
|
144
170
|
|
|
145
|
-
# Remove domains from each endpoint; allows
|
|
171
|
+
# Remove domains from each endpoint; allows to properly handle our base URL
|
|
146
172
|
# https://github.com/encode/apistar/issues/657
|
|
147
173
|
original_url = urlsplit(link_info.link.url)
|
|
148
174
|
# Removes the scheme and netloc
|
|
@@ -174,9 +200,6 @@ class ArkindexClient(apistar.Client):
|
|
|
174
200
|
self.document.url if hasattr(self, "document") else "",
|
|
175
201
|
)
|
|
176
202
|
|
|
177
|
-
def init_transport(self, *args, **kwargs):
|
|
178
|
-
return ArkindexHTTPTransport(self.verify, *args, **kwargs)
|
|
179
|
-
|
|
180
203
|
def configure(
|
|
181
204
|
self,
|
|
182
205
|
token=None,
|
|
@@ -223,7 +246,7 @@ class ArkindexClient(apistar.Client):
|
|
|
223
246
|
|
|
224
247
|
def paginate(self, operation_id, *args, **kwargs):
|
|
225
248
|
"""
|
|
226
|
-
Perform a usual request
|
|
249
|
+
Perform a usual API request, but handle paginated endpoints.
|
|
227
250
|
|
|
228
251
|
:return: An iterator for a paginated endpoint.
|
|
229
252
|
:rtype: Union[arkindex.pagination.ResponsePaginator, dict, list]
|
|
@@ -245,11 +268,11 @@ class ArkindexClient(apistar.Client):
|
|
|
245
268
|
self.transport.session.auth.token = resp["auth_token"]
|
|
246
269
|
return resp
|
|
247
270
|
|
|
248
|
-
def
|
|
271
|
+
def single_request(self, operation_id, *args, **kwargs):
|
|
249
272
|
"""
|
|
250
273
|
Perform an API request.
|
|
251
|
-
:param args: Arguments passed to the
|
|
252
|
-
:param kwargs: Keyword arguments passed to the
|
|
274
|
+
:param args: Arguments passed to the BaseClient.
|
|
275
|
+
:param kwargs: Keyword arguments passed to the BaseClient.
|
|
253
276
|
"""
|
|
254
277
|
link = self.lookup_operation(operation_id)
|
|
255
278
|
if link.deprecated:
|
|
@@ -274,3 +297,23 @@ class ArkindexClient(apistar.Client):
|
|
|
274
297
|
)
|
|
275
298
|
sleep(self.sleep_duration)
|
|
276
299
|
return super().request(operation_id, *args, **kwargs)
|
|
300
|
+
|
|
301
|
+
@retry(
|
|
302
|
+
retry=retry_if_exception(_is_500_error),
|
|
303
|
+
wait=wait_exponential(multiplier=2, min=3),
|
|
304
|
+
reraise=True,
|
|
305
|
+
stop=stop_after_attempt(5),
|
|
306
|
+
before_sleep=before_sleep_log(logger, logging.INFO),
|
|
307
|
+
)
|
|
308
|
+
def request(self, operation_id, *args, **kwargs):
|
|
309
|
+
"""
|
|
310
|
+
Proxy all Arkindex API requests with a retry mechanism in case of 50X errors.
|
|
311
|
+
The same API call will be retried 5 times, with an exponential sleep time
|
|
312
|
+
going through 3, 4, 8 and 16 seconds of wait between call.
|
|
313
|
+
If the 5th call still gives a 50x, the exception is re-raised and the caller should catch it.
|
|
314
|
+
Log messages are displayed before sleeping (when at least one exception occurred).
|
|
315
|
+
|
|
316
|
+
:param args: Arguments passed to the BaseClient.
|
|
317
|
+
:param kwargs: Keyword arguments passed to the BaseClient.
|
|
318
|
+
"""
|
|
319
|
+
return self.single_request(operation_id, *args, **kwargs)
|
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
import cgi
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import posixpath
|
|
6
|
+
import shutil
|
|
7
|
+
import tempfile
|
|
8
|
+
from urllib.parse import urlparse
|
|
9
|
+
|
|
10
|
+
from arkindex.compat import DownloadedFile
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class BaseDecoder:
|
|
14
|
+
media_type = None
|
|
15
|
+
|
|
16
|
+
def decode(self, bytestring, **options):
|
|
17
|
+
raise NotImplementedError()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class JSONDecoder(BaseDecoder):
|
|
21
|
+
media_type = "application/json"
|
|
22
|
+
|
|
23
|
+
def decode(self, response):
|
|
24
|
+
"""
|
|
25
|
+
Return raw JSON data.
|
|
26
|
+
"""
|
|
27
|
+
content = response.content.decode("utf-8")
|
|
28
|
+
return json.loads(content)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class TextDecoder(BaseDecoder):
|
|
32
|
+
media_type = "text/*"
|
|
33
|
+
|
|
34
|
+
def decode(self, response):
|
|
35
|
+
return response.text
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class DownloadDecoder(BaseDecoder):
|
|
39
|
+
"""
|
|
40
|
+
A codec to handle raw file downloads, such as images and other media.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
media_type = "*/*"
|
|
44
|
+
|
|
45
|
+
def __init__(self, download_dir=None):
|
|
46
|
+
"""
|
|
47
|
+
`download_dir` - If `None` then downloaded files will be temporary files
|
|
48
|
+
that are deleted on close. If set to a value, then downloaded files
|
|
49
|
+
will be saved to this directory, and will not be automatically deleted.
|
|
50
|
+
"""
|
|
51
|
+
self._delete_on_close = download_dir is None
|
|
52
|
+
self.download_dir = download_dir
|
|
53
|
+
|
|
54
|
+
def decode(self, response):
|
|
55
|
+
base_url = response.url
|
|
56
|
+
content_type = response.headers.get("content-type")
|
|
57
|
+
content_disposition = response.headers.get("content-disposition")
|
|
58
|
+
|
|
59
|
+
# Write the download to a temporary .download file.
|
|
60
|
+
fd, temp_path = tempfile.mkstemp(suffix=".download")
|
|
61
|
+
with os.fdopen(fd, "wb") as file_handle:
|
|
62
|
+
for chunk in response.iter_content(chunk_size=4096):
|
|
63
|
+
file_handle.write(chunk)
|
|
64
|
+
|
|
65
|
+
# Determine the output filename.
|
|
66
|
+
output_filename = _get_filename(base_url, content_type, content_disposition)
|
|
67
|
+
|
|
68
|
+
# Determine the output directory.
|
|
69
|
+
output_dir = self.download_dir
|
|
70
|
+
if output_dir is None:
|
|
71
|
+
output_dir = os.path.dirname(temp_path)
|
|
72
|
+
|
|
73
|
+
# Determine the full output path.
|
|
74
|
+
output_path = os.path.join(output_dir, output_filename)
|
|
75
|
+
|
|
76
|
+
# Move the temporary download file to the final location.
|
|
77
|
+
if output_path != temp_path:
|
|
78
|
+
output_path = _unique_output_path(output_path)
|
|
79
|
+
shutil.move(temp_path, output_path)
|
|
80
|
+
|
|
81
|
+
# Open the file and return the file object.
|
|
82
|
+
output_file = open(output_path, "rb")
|
|
83
|
+
downloaded = DownloadedFile(
|
|
84
|
+
output_file, output_path, delete=self._delete_on_close
|
|
85
|
+
)
|
|
86
|
+
downloaded.basename = output_filename
|
|
87
|
+
return downloaded
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _guess_extension(content_type):
|
|
91
|
+
"""
|
|
92
|
+
Python's `mimetypes.guess_extension` is no use because it simply returns
|
|
93
|
+
the first of an unordered set. We use the same set of media types here,
|
|
94
|
+
but take a reasonable preference on what extension to map to.
|
|
95
|
+
"""
|
|
96
|
+
return {
|
|
97
|
+
"application/javascript": ".js",
|
|
98
|
+
"application/msword": ".doc",
|
|
99
|
+
"application/octet-stream": ".bin",
|
|
100
|
+
"application/oda": ".oda",
|
|
101
|
+
"application/pdf": ".pdf",
|
|
102
|
+
"application/pkcs7-mime": ".p7c",
|
|
103
|
+
"application/postscript": ".ps",
|
|
104
|
+
"application/vnd.apple.mpegurl": ".m3u",
|
|
105
|
+
"application/vnd.ms-excel": ".xls",
|
|
106
|
+
"application/vnd.ms-powerpoint": ".ppt",
|
|
107
|
+
"application/x-bcpio": ".bcpio",
|
|
108
|
+
"application/x-cpio": ".cpio",
|
|
109
|
+
"application/x-csh": ".csh",
|
|
110
|
+
"application/x-dvi": ".dvi",
|
|
111
|
+
"application/x-gtar": ".gtar",
|
|
112
|
+
"application/x-hdf": ".hdf",
|
|
113
|
+
"application/x-latex": ".latex",
|
|
114
|
+
"application/x-mif": ".mif",
|
|
115
|
+
"application/x-netcdf": ".nc",
|
|
116
|
+
"application/x-pkcs12": ".p12",
|
|
117
|
+
"application/x-pn-realaudio": ".ram",
|
|
118
|
+
"application/x-python-code": ".pyc",
|
|
119
|
+
"application/x-sh": ".sh",
|
|
120
|
+
"application/x-shar": ".shar",
|
|
121
|
+
"application/x-shockwave-flash": ".swf",
|
|
122
|
+
"application/x-sv4cpio": ".sv4cpio",
|
|
123
|
+
"application/x-sv4crc": ".sv4crc",
|
|
124
|
+
"application/x-tar": ".tar",
|
|
125
|
+
"application/x-tcl": ".tcl",
|
|
126
|
+
"application/x-tex": ".tex",
|
|
127
|
+
"application/x-texinfo": ".texinfo",
|
|
128
|
+
"application/x-troff": ".tr",
|
|
129
|
+
"application/x-troff-man": ".man",
|
|
130
|
+
"application/x-troff-me": ".me",
|
|
131
|
+
"application/x-troff-ms": ".ms",
|
|
132
|
+
"application/x-ustar": ".ustar",
|
|
133
|
+
"application/x-wais-source": ".src",
|
|
134
|
+
"application/xml": ".xml",
|
|
135
|
+
"application/zip": ".zip",
|
|
136
|
+
"audio/basic": ".au",
|
|
137
|
+
"audio/mpeg": ".mp3",
|
|
138
|
+
"audio/x-aiff": ".aif",
|
|
139
|
+
"audio/x-pn-realaudio": ".ra",
|
|
140
|
+
"audio/x-wav": ".wav",
|
|
141
|
+
"image/gif": ".gif",
|
|
142
|
+
"image/ief": ".ief",
|
|
143
|
+
"image/jpeg": ".jpe",
|
|
144
|
+
"image/png": ".png",
|
|
145
|
+
"image/svg+xml": ".svg",
|
|
146
|
+
"image/tiff": ".tiff",
|
|
147
|
+
"image/vnd.microsoft.icon": ".ico",
|
|
148
|
+
"image/x-cmu-raster": ".ras",
|
|
149
|
+
"image/x-ms-bmp": ".bmp",
|
|
150
|
+
"image/x-portable-anymap": ".pnm",
|
|
151
|
+
"image/x-portable-bitmap": ".pbm",
|
|
152
|
+
"image/x-portable-graymap": ".pgm",
|
|
153
|
+
"image/x-portable-pixmap": ".ppm",
|
|
154
|
+
"image/x-rgb": ".rgb",
|
|
155
|
+
"image/x-xbitmap": ".xbm",
|
|
156
|
+
"image/x-xpixmap": ".xpm",
|
|
157
|
+
"image/x-xwindowdump": ".xwd",
|
|
158
|
+
"message/rfc822": ".eml",
|
|
159
|
+
"text/css": ".css",
|
|
160
|
+
"text/csv": ".csv",
|
|
161
|
+
"text/html": ".html",
|
|
162
|
+
"text/plain": ".txt",
|
|
163
|
+
"text/richtext": ".rtx",
|
|
164
|
+
"text/tab-separated-values": ".tsv",
|
|
165
|
+
"text/x-python": ".py",
|
|
166
|
+
"text/x-setext": ".etx",
|
|
167
|
+
"text/x-sgml": ".sgml",
|
|
168
|
+
"text/x-vcard": ".vcf",
|
|
169
|
+
"text/xml": ".xml",
|
|
170
|
+
"video/mp4": ".mp4",
|
|
171
|
+
"video/mpeg": ".mpeg",
|
|
172
|
+
"video/quicktime": ".mov",
|
|
173
|
+
"video/webm": ".webm",
|
|
174
|
+
"video/x-msvideo": ".avi",
|
|
175
|
+
"video/x-sgi-movie": ".movie",
|
|
176
|
+
}.get(content_type, "")
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def _unique_output_path(path):
|
|
180
|
+
"""
|
|
181
|
+
Given a path like '/a/b/c.txt'
|
|
182
|
+
|
|
183
|
+
Return the first available filename that doesn't already exist,
|
|
184
|
+
using an incrementing suffix if needed.
|
|
185
|
+
|
|
186
|
+
For example: '/a/b/c.txt' or '/a/b/c (1).txt' or '/a/b/c (2).txt'...
|
|
187
|
+
"""
|
|
188
|
+
basename, ext = os.path.splitext(path)
|
|
189
|
+
idx = 0
|
|
190
|
+
while os.path.exists(path):
|
|
191
|
+
idx += 1
|
|
192
|
+
path = "%s (%d)%s" % (basename, idx, ext)
|
|
193
|
+
return path
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def _safe_filename(filename):
|
|
197
|
+
"""
|
|
198
|
+
Sanitize output filenames, to remove any potentially unsafe characters.
|
|
199
|
+
"""
|
|
200
|
+
filename = os.path.basename(filename)
|
|
201
|
+
|
|
202
|
+
keepcharacters = (" ", ".", "_", "-")
|
|
203
|
+
filename = (
|
|
204
|
+
"".join(char for char in filename if char.isalnum() or char in keepcharacters)
|
|
205
|
+
.strip()
|
|
206
|
+
.strip(".")
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
return filename
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def _get_filename_from_content_disposition(content_disposition):
|
|
213
|
+
"""
|
|
214
|
+
Determine an output filename based on the `Content-Disposition` header.
|
|
215
|
+
"""
|
|
216
|
+
params = value, params = cgi.parse_header(content_disposition)
|
|
217
|
+
|
|
218
|
+
if "filename" in params:
|
|
219
|
+
filename = params["filename"]
|
|
220
|
+
return _safe_filename(filename)
|
|
221
|
+
|
|
222
|
+
return None
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def _get_filename_from_url(url, content_type=None):
|
|
226
|
+
"""
|
|
227
|
+
Determine an output filename based on the download URL.
|
|
228
|
+
"""
|
|
229
|
+
parsed = urlparse(url)
|
|
230
|
+
final_path_component = posixpath.basename(parsed.path.rstrip("/"))
|
|
231
|
+
filename = _safe_filename(final_path_component)
|
|
232
|
+
suffix = _guess_extension(content_type or "")
|
|
233
|
+
|
|
234
|
+
if filename:
|
|
235
|
+
if "." not in filename:
|
|
236
|
+
return filename + suffix
|
|
237
|
+
return filename
|
|
238
|
+
|
|
239
|
+
return "download" + suffix
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def _get_filename(base_url, content_type=None, content_disposition=None):
|
|
243
|
+
"""
|
|
244
|
+
Determine an output filename to use for the download.
|
|
245
|
+
"""
|
|
246
|
+
filename = None
|
|
247
|
+
if content_disposition:
|
|
248
|
+
filename = _get_filename_from_content_disposition(content_disposition)
|
|
249
|
+
if filename is not None:
|
|
250
|
+
return filename
|
|
251
|
+
|
|
252
|
+
return _get_filename_from_url(base_url, content_type)
|