python-documentcloud 4.2.0__py2.py3-none-any.whl → 4.4.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- documentcloud/client.py +21 -140
- documentcloud/documents.py +6 -12
- documentcloud/exceptions.py +8 -35
- {python_documentcloud-4.2.0.dist-info → python_documentcloud-4.4.0.dist-info}/METADATA +24 -14
- {python_documentcloud-4.2.0.dist-info → python_documentcloud-4.4.0.dist-info}/RECORD +8 -8
- {python_documentcloud-4.2.0.dist-info → python_documentcloud-4.4.0.dist-info}/WHEEL +1 -1
- {python_documentcloud-4.2.0.dist-info → python_documentcloud-4.4.0.dist-info}/LICENSE +0 -0
- {python_documentcloud-4.2.0.dist-info → python_documentcloud-4.4.0.dist-info}/top_level.txt +0 -0
documentcloud/client.py
CHANGED
|
@@ -1,55 +1,49 @@
|
|
|
1
|
-
|
|
2
|
-
The public interface for the DocumentCloud API
|
|
3
|
-
"""
|
|
4
|
-
|
|
1
|
+
# Import SquareletClient from python-squarelet
|
|
5
2
|
# Standard Library
|
|
6
3
|
import logging
|
|
7
|
-
from functools import partial
|
|
8
|
-
from urllib.parse import parse_qs, urlparse
|
|
9
4
|
|
|
10
5
|
# Third Party
|
|
11
|
-
import
|
|
12
|
-
import requests
|
|
6
|
+
from squarelet import SquareletClient
|
|
13
7
|
|
|
14
8
|
# Local
|
|
15
|
-
|
|
9
|
+
# Local Imports
|
|
16
10
|
from .documents import DocumentClient
|
|
17
|
-
from .exceptions import APIError, CredentialsFailedError, DoesNotExistError
|
|
18
11
|
from .organizations import OrganizationClient
|
|
19
12
|
from .projects import ProjectClient
|
|
20
|
-
from .toolbox import requests_retry_session
|
|
21
13
|
from .users import UserClient
|
|
22
14
|
|
|
23
15
|
logger = logging.getLogger("documentcloud")
|
|
24
16
|
|
|
25
17
|
|
|
26
|
-
class DocumentCloud(
|
|
18
|
+
class DocumentCloud(SquareletClient):
|
|
27
19
|
"""
|
|
28
|
-
The public interface for the DocumentCloud API
|
|
20
|
+
The public interface for the DocumentCloud API, now integrated with SquareletClient
|
|
29
21
|
"""
|
|
30
22
|
|
|
31
23
|
def __init__(
|
|
32
24
|
self,
|
|
33
25
|
username=None,
|
|
34
26
|
password=None,
|
|
35
|
-
base_uri=
|
|
36
|
-
auth_uri=
|
|
37
|
-
timeout=
|
|
27
|
+
base_uri="https://api.www.documentcloud.org/api/",
|
|
28
|
+
auth_uri="https://accounts.muckrock.com/api/",
|
|
29
|
+
timeout=20,
|
|
38
30
|
loglevel=None,
|
|
39
31
|
rate_limit=True,
|
|
40
32
|
rate_limit_sleep=True,
|
|
41
33
|
):
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
34
|
+
# Initialize SquareletClient for authentication and request handling
|
|
35
|
+
super().__init__(
|
|
36
|
+
base_uri=base_uri,
|
|
37
|
+
username=username,
|
|
38
|
+
password=password,
|
|
39
|
+
auth_uri=auth_uri,
|
|
40
|
+
timeout=timeout,
|
|
41
|
+
rate_limit=rate_limit,
|
|
42
|
+
rate_limit_sleep=rate_limit_sleep,
|
|
43
|
+
)
|
|
51
44
|
|
|
52
|
-
|
|
45
|
+
# Set up logging
|
|
46
|
+
if loglevel:
|
|
53
47
|
logging.basicConfig(
|
|
54
48
|
level=loglevel,
|
|
55
49
|
format="%(asctime)s %(levelname)-8s %(name)-25s %(message)s",
|
|
@@ -57,121 +51,8 @@ class DocumentCloud(object):
|
|
|
57
51
|
else:
|
|
58
52
|
logger.addHandler(logging.NullHandler())
|
|
59
53
|
|
|
54
|
+
# Initialize the sub-clients using SquareletClient
|
|
60
55
|
self.documents = DocumentClient(self)
|
|
61
56
|
self.projects = ProjectClient(self)
|
|
62
57
|
self.users = UserClient(self)
|
|
63
58
|
self.organizations = OrganizationClient(self)
|
|
64
|
-
|
|
65
|
-
if rate_limit:
|
|
66
|
-
self._request = ratelimit.limits(calls=RATE_LIMIT, period=RATE_PERIOD)(
|
|
67
|
-
self._request
|
|
68
|
-
)
|
|
69
|
-
if rate_limit_sleep:
|
|
70
|
-
self._request = ratelimit.sleep_and_retry(self._request)
|
|
71
|
-
|
|
72
|
-
def _set_tokens(self):
|
|
73
|
-
"""Set the refresh and access tokens"""
|
|
74
|
-
if self.refresh_token:
|
|
75
|
-
access_token, self.refresh_token = self._refresh_tokens(self.refresh_token)
|
|
76
|
-
elif self.username and self.password:
|
|
77
|
-
access_token, self.refresh_token = self._get_tokens(
|
|
78
|
-
self.username, self.password
|
|
79
|
-
)
|
|
80
|
-
else:
|
|
81
|
-
access_token = None
|
|
82
|
-
|
|
83
|
-
if access_token:
|
|
84
|
-
self.session.headers.update({"Authorization": f"Bearer {access_token}"})
|
|
85
|
-
|
|
86
|
-
def _get_tokens(self, username, password):
|
|
87
|
-
"""Get an access and refresh token in exchange for the username and password"""
|
|
88
|
-
response = requests_retry_session().post(
|
|
89
|
-
f"{self.auth_uri}token/",
|
|
90
|
-
json={"username": username, "password": password},
|
|
91
|
-
timeout=self.timeout,
|
|
92
|
-
)
|
|
93
|
-
|
|
94
|
-
if response.status_code == requests.codes.UNAUTHORIZED:
|
|
95
|
-
raise CredentialsFailedError("The username and password are incorrect")
|
|
96
|
-
|
|
97
|
-
self.raise_for_status(response)
|
|
98
|
-
|
|
99
|
-
json = response.json()
|
|
100
|
-
return (json["access"], json["refresh"])
|
|
101
|
-
|
|
102
|
-
def _refresh_tokens(self, refresh_token):
|
|
103
|
-
"""Refresh the access and refresh tokens"""
|
|
104
|
-
response = requests_retry_session().post(
|
|
105
|
-
f"{self.auth_uri}refresh/",
|
|
106
|
-
json={"refresh": refresh_token},
|
|
107
|
-
timeout=self.timeout,
|
|
108
|
-
)
|
|
109
|
-
|
|
110
|
-
if response.status_code == requests.codes.UNAUTHORIZED:
|
|
111
|
-
# refresh token is expired
|
|
112
|
-
return self._get_tokens(self.username, self.password)
|
|
113
|
-
|
|
114
|
-
self.raise_for_status(response)
|
|
115
|
-
|
|
116
|
-
json = response.json()
|
|
117
|
-
return (json["access"], json["refresh"])
|
|
118
|
-
|
|
119
|
-
@property
|
|
120
|
-
def user_id(self):
|
|
121
|
-
if self._user_id is None:
|
|
122
|
-
user = self.users.get("me")
|
|
123
|
-
self._user_id = user.id
|
|
124
|
-
return self._user_id
|
|
125
|
-
|
|
126
|
-
def _request(self, method, url, raise_error=True, **kwargs):
|
|
127
|
-
"""Generic method to make API requests"""
|
|
128
|
-
# pylint: disable=method-hidden
|
|
129
|
-
logger.info("request: %s - %s - %s", method, url, kwargs)
|
|
130
|
-
set_tokens = kwargs.pop("set_tokens", True)
|
|
131
|
-
full_url = kwargs.pop("full_url", False)
|
|
132
|
-
|
|
133
|
-
if not full_url:
|
|
134
|
-
url = f"{self.base_uri}{url}"
|
|
135
|
-
|
|
136
|
-
# set the API to version 2.0
|
|
137
|
-
parsed_url = urlparse(url)
|
|
138
|
-
if "version" not in parse_qs(parsed_url.query):
|
|
139
|
-
# check to avoid double setting version
|
|
140
|
-
kwargs.setdefault("params", {}).update({"version": "2.0"})
|
|
141
|
-
|
|
142
|
-
response = requests_retry_session(session=self.session).request(
|
|
143
|
-
method, url, timeout=self.timeout, **kwargs
|
|
144
|
-
)
|
|
145
|
-
logger.debug("response: %s - %s", response.status_code, response.content)
|
|
146
|
-
if (
|
|
147
|
-
response.status_code in [requests.codes.FORBIDDEN, requests.codes.TOO_MANY]
|
|
148
|
-
and set_tokens
|
|
149
|
-
):
|
|
150
|
-
self._set_tokens()
|
|
151
|
-
# track set_tokens to not enter an infinite loop
|
|
152
|
-
kwargs["set_tokens"] = False
|
|
153
|
-
return self._request(method, url, full_url=True, **kwargs)
|
|
154
|
-
|
|
155
|
-
if raise_error:
|
|
156
|
-
self.raise_for_status(response)
|
|
157
|
-
|
|
158
|
-
return response
|
|
159
|
-
|
|
160
|
-
def __getattr__(self, attr):
|
|
161
|
-
"""Generate methods for each HTTP request type"""
|
|
162
|
-
methods = ["get", "options", "head", "post", "put", "patch", "delete"]
|
|
163
|
-
if attr in methods:
|
|
164
|
-
return partial(self._request, attr)
|
|
165
|
-
raise AttributeError(
|
|
166
|
-
f"'{self.__class__.__name__}' object has no attribute '{attr}'"
|
|
167
|
-
)
|
|
168
|
-
|
|
169
|
-
def raise_for_status(self, response):
|
|
170
|
-
"""Raise for status with a custom error class"""
|
|
171
|
-
try:
|
|
172
|
-
response.raise_for_status()
|
|
173
|
-
except requests.exceptions.RequestException as exc:
|
|
174
|
-
if exc.response.status_code == 404:
|
|
175
|
-
raise DoesNotExistError(response=exc.response) from exc
|
|
176
|
-
else:
|
|
177
|
-
raise APIError(response=exc.response) from exc
|
documentcloud/documents.py
CHANGED
|
@@ -404,9 +404,7 @@ class DocumentClient(BaseAPIClient):
|
|
|
404
404
|
path_list = self._collect_files(path, extensions)
|
|
405
405
|
|
|
406
406
|
logger.info(
|
|
407
|
-
"Upload directory on %s: Found %d files to upload",
|
|
408
|
-
path,
|
|
409
|
-
len(path_list)
|
|
407
|
+
"Upload directory on %s: Found %d files to upload", path, len(path_list)
|
|
410
408
|
)
|
|
411
409
|
|
|
412
410
|
# Upload all the files using the bulk API to reduce the number
|
|
@@ -444,7 +442,7 @@ class DocumentClient(BaseAPIClient):
|
|
|
444
442
|
logger.info(
|
|
445
443
|
"Error creating the following documents: %s\n%s",
|
|
446
444
|
exc,
|
|
447
|
-
"\n".join(file_paths)
|
|
445
|
+
"\n".join(file_paths),
|
|
448
446
|
)
|
|
449
447
|
continue
|
|
450
448
|
else:
|
|
@@ -465,7 +463,7 @@ class DocumentClient(BaseAPIClient):
|
|
|
465
463
|
logger.info(
|
|
466
464
|
"Error uploading the following document: %s %s",
|
|
467
465
|
exc,
|
|
468
|
-
file_path
|
|
466
|
+
file_path,
|
|
469
467
|
)
|
|
470
468
|
continue
|
|
471
469
|
else:
|
|
@@ -481,7 +479,7 @@ class DocumentClient(BaseAPIClient):
|
|
|
481
479
|
logger.info(
|
|
482
480
|
"Error creating the following documents: %s\n%s",
|
|
483
481
|
exc,
|
|
484
|
-
"\n".join(file_paths)
|
|
482
|
+
"\n".join(file_paths),
|
|
485
483
|
)
|
|
486
484
|
continue
|
|
487
485
|
else:
|
|
@@ -504,11 +502,7 @@ class DocumentClient(BaseAPIClient):
|
|
|
504
502
|
# Grouper will put None's on the end of the last group
|
|
505
503
|
url_group = [url for url in url_group if url is not None]
|
|
506
504
|
|
|
507
|
-
logger.info(
|
|
508
|
-
"Uploading group %d: %s",
|
|
509
|
-
i + 1,
|
|
510
|
-
"\n".join(url_group)
|
|
511
|
-
)
|
|
505
|
+
logger.info("Uploading group %d: %s", i + 1, "\n".join(url_group))
|
|
512
506
|
|
|
513
507
|
# Create the documents
|
|
514
508
|
logger.info("Creating the documents...")
|
|
@@ -531,7 +525,7 @@ class DocumentClient(BaseAPIClient):
|
|
|
531
525
|
logger.info(
|
|
532
526
|
"Error creating the following documents: %s\n%s",
|
|
533
527
|
str(exc),
|
|
534
|
-
"\n".join(url_group)
|
|
528
|
+
"\n".join(url_group),
|
|
535
529
|
)
|
|
536
530
|
continue
|
|
537
531
|
else:
|
documentcloud/exceptions.py
CHANGED
|
@@ -2,38 +2,11 @@
|
|
|
2
2
|
Custom exceptions for python-documentcloud
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
self.status_code = self.response.status_code
|
|
14
|
-
if not args:
|
|
15
|
-
args = [f"{self.status_code} - {self.error}"]
|
|
16
|
-
else:
|
|
17
|
-
self.error = None
|
|
18
|
-
self.status_code = None
|
|
19
|
-
super().__init__(*args, **kwargs)
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class DuplicateObjectError(DocumentCloudError):
|
|
23
|
-
"""Raised when an object is added to a unique list more than once"""
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
class CredentialsFailedError(DocumentCloudError):
|
|
27
|
-
"""Raised if unable to obtain an access token due to bad login credentials"""
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
class APIError(DocumentCloudError):
|
|
31
|
-
"""Any other error calling the API"""
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
class DoesNotExistError(APIError):
|
|
35
|
-
"""Raised when the user asks the API for something it cannot find"""
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
class MultipleObjectsReturnedError(APIError):
|
|
39
|
-
"""Raised when the API returns multiple objects when it expected one"""
|
|
5
|
+
# pylint: disable=unused-import
|
|
6
|
+
# Import exceptions from python-squarelet
|
|
7
|
+
from squarelet.exceptions import SquareletError as DocumentCloudError
|
|
8
|
+
from squarelet.exceptions import DuplicateObjectError
|
|
9
|
+
from squarelet.exceptions import CredentialsFailedError
|
|
10
|
+
from squarelet.exceptions import APIError
|
|
11
|
+
from squarelet.exceptions import DoesNotExistError
|
|
12
|
+
from squarelet.exceptions import MultipleObjectsReturnedError
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: python-documentcloud
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.4.0
|
|
4
4
|
Summary: A simple Python wrapper for the DocumentCloud API
|
|
5
5
|
Home-page: https://github.com/muckrock/python-documentcloud
|
|
6
6
|
Author: Mitchell Kotler
|
|
@@ -10,7 +10,6 @@ Classifier: Development Status :: 5 - Production/Stable
|
|
|
10
10
|
Classifier: Intended Audience :: Developers
|
|
11
11
|
Classifier: Operating System :: OS Independent
|
|
12
12
|
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.7
|
|
14
13
|
Classifier: Programming Language :: Python :: 3.8
|
|
15
14
|
Classifier: Programming Language :: Python :: 3.9
|
|
16
15
|
Classifier: Programming Language :: Python :: 3.10
|
|
@@ -20,25 +19,36 @@ Classifier: Topic :: Internet :: WWW/HTTP
|
|
|
20
19
|
Description-Content-Type: text/markdown
|
|
21
20
|
License-File: LICENSE
|
|
22
21
|
Requires-Dist: future
|
|
23
|
-
Requires-Dist: listcrunch
|
|
22
|
+
Requires-Dist: listcrunch>=1.0.1
|
|
24
23
|
Requires-Dist: python-dateutil
|
|
25
24
|
Requires-Dist: ratelimit
|
|
26
25
|
Requires-Dist: requests
|
|
27
26
|
Requires-Dist: urllib3
|
|
28
27
|
Requires-Dist: pyyaml
|
|
29
28
|
Requires-Dist: fastjsonschema
|
|
29
|
+
Requires-Dist: python-squarelet
|
|
30
30
|
Provides-Extra: dev
|
|
31
|
-
Requires-Dist: black
|
|
32
|
-
Requires-Dist: coverage
|
|
33
|
-
Requires-Dist: isort
|
|
34
|
-
Requires-Dist: pylint
|
|
35
|
-
Requires-Dist: sphinx
|
|
36
|
-
Requires-Dist: twine
|
|
31
|
+
Requires-Dist: black; extra == "dev"
|
|
32
|
+
Requires-Dist: coverage; extra == "dev"
|
|
33
|
+
Requires-Dist: isort; extra == "dev"
|
|
34
|
+
Requires-Dist: pylint; extra == "dev"
|
|
35
|
+
Requires-Dist: sphinx; extra == "dev"
|
|
36
|
+
Requires-Dist: twine; extra == "dev"
|
|
37
37
|
Provides-Extra: test
|
|
38
|
-
Requires-Dist: pytest
|
|
39
|
-
Requires-Dist: pytest-mock
|
|
40
|
-
Requires-Dist: pytest-recording
|
|
41
|
-
Requires-Dist: vcrpy
|
|
38
|
+
Requires-Dist: pytest; extra == "test"
|
|
39
|
+
Requires-Dist: pytest-mock; extra == "test"
|
|
40
|
+
Requires-Dist: pytest-recording; extra == "test"
|
|
41
|
+
Requires-Dist: vcrpy; extra == "test"
|
|
42
|
+
Dynamic: author
|
|
43
|
+
Dynamic: author-email
|
|
44
|
+
Dynamic: classifier
|
|
45
|
+
Dynamic: description
|
|
46
|
+
Dynamic: description-content-type
|
|
47
|
+
Dynamic: home-page
|
|
48
|
+
Dynamic: license
|
|
49
|
+
Dynamic: provides-extra
|
|
50
|
+
Dynamic: requires-dist
|
|
51
|
+
Dynamic: summary
|
|
42
52
|
|
|
43
53
|
<pre><code> ____ _ ____ _ _
|
|
44
54
|
| _ \ ___ ___ _ _ _ __ ___ ___ _ __ | |_ / ___| | ___ _ _ __| |
|
|
@@ -2,17 +2,17 @@ documentcloud/__init__.py,sha256=XAwOR6JYL-flQV_uC616AMA2rYiXTkeogNolqE6LzN4,220
|
|
|
2
2
|
documentcloud/addon.py,sha256=3FxQjm26jknjLdd-GuztiZO4Z7NcgXq4WqunE9oh2es,11754
|
|
3
3
|
documentcloud/annotations.py,sha256=wVe3wYzyTRvc_hJ3r0m6iyDf6WIFlaGcCnyah_r53pg,2538
|
|
4
4
|
documentcloud/base.py,sha256=pNF45aleYpQ9fj75CiL3c4Ssv6MO1EmdzZ6wBLPKHDg,6545
|
|
5
|
-
documentcloud/client.py,sha256=
|
|
5
|
+
documentcloud/client.py,sha256=WXHNE1BT-LE2E55XlOvPuWl_g5N0zUIdXvB7Qj_fMNc,1658
|
|
6
6
|
documentcloud/constants.py,sha256=h6NStSkxPrjQ2gzaIlqftCF7tthkRimddOE8SsmlHag,1828
|
|
7
|
-
documentcloud/documents.py,sha256=
|
|
8
|
-
documentcloud/exceptions.py,sha256=
|
|
7
|
+
documentcloud/documents.py,sha256=CNEtdiDQ_Alcb3ruMfzlKn5QqWdn_PWicUWJlyxEaC0,19481
|
|
8
|
+
documentcloud/exceptions.py,sha256=AwIJpcylq6sF6oaenrZE6nr2EBuj23nxTOf3z_RwtuE,461
|
|
9
9
|
documentcloud/organizations.py,sha256=_Ot6MWzoa5JdU3jqedU-0Fec_K8WrgxqdlIp4oIijes,392
|
|
10
10
|
documentcloud/projects.py,sha256=KuOiw65a-8fdgbjo7BqjbEbWguds8inkhFJZJd578bs,5328
|
|
11
11
|
documentcloud/sections.py,sha256=cMf973KMvp6fAPSMXCD67L32Pz1_Tfh81oV2q2UQ9Uk,924
|
|
12
12
|
documentcloud/toolbox.py,sha256=zFZTyOn40YZjBpqa1H3qjpR4C3Wu1X2g72AvH_ljlic,1835
|
|
13
13
|
documentcloud/users.py,sha256=yydOXoEsfJlYqryZpXQ4G3aeRc5y_QCHqXd0dfF1aIc,354
|
|
14
|
-
python_documentcloud-4.
|
|
15
|
-
python_documentcloud-4.
|
|
16
|
-
python_documentcloud-4.
|
|
17
|
-
python_documentcloud-4.
|
|
18
|
-
python_documentcloud-4.
|
|
14
|
+
python_documentcloud-4.4.0.dist-info/LICENSE,sha256=Z1IBhHCzIeGR9F2iHtcLt2I2qoUhJ2pK139CAIAuFgo,1151
|
|
15
|
+
python_documentcloud-4.4.0.dist-info/METADATA,sha256=j7wqTddQt4tj-iOttcd_hB8buI1HiEZ8AaxLwVjWn20,2858
|
|
16
|
+
python_documentcloud-4.4.0.dist-info/WHEEL,sha256=9Hm2OB-j1QcCUq9Jguht7ayGIIZBRTdOXD1qg9cCgPM,109
|
|
17
|
+
python_documentcloud-4.4.0.dist-info/top_level.txt,sha256=rzNW2vA9GqU5ipNQYSP1XJQ54ippjKXVIo9oMlM0Tm4,14
|
|
18
|
+
python_documentcloud-4.4.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|