python-documentcloud 4.5.0__tar.gz → 4.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_documentcloud-4.5.0/python_documentcloud.egg-info → python_documentcloud-4.7.0}/PKG-INFO +3 -1
- {python_documentcloud-4.5.0 → python_documentcloud-4.7.0}/documentcloud/addon.py +20 -0
- python_documentcloud-4.7.0/documentcloud/client.py +100 -0
- {python_documentcloud-4.5.0 → python_documentcloud-4.7.0}/documentcloud/documents.py +33 -8
- python_documentcloud-4.7.0/documentcloud/exceptions.py +15 -0
- {python_documentcloud-4.5.0 → python_documentcloud-4.7.0/python_documentcloud.egg-info}/PKG-INFO +3 -1
- {python_documentcloud-4.5.0 → python_documentcloud-4.7.0}/python_documentcloud.egg-info/SOURCES.txt +1 -0
- {python_documentcloud-4.5.0 → python_documentcloud-4.7.0}/python_documentcloud.egg-info/requires.txt +2 -0
- {python_documentcloud-4.5.0 → python_documentcloud-4.7.0}/setup.py +3 -1
- python_documentcloud-4.7.0/tests/test_addon.py +141 -0
- {python_documentcloud-4.5.0 → python_documentcloud-4.7.0}/tests/test_client.py +75 -0
- python_documentcloud-4.5.0/documentcloud/client.py +0 -58
- python_documentcloud-4.5.0/documentcloud/exceptions.py +0 -12
- {python_documentcloud-4.5.0 → python_documentcloud-4.7.0}/LICENSE +0 -0
- {python_documentcloud-4.5.0 → python_documentcloud-4.7.0}/README.md +0 -0
- {python_documentcloud-4.5.0 → python_documentcloud-4.7.0}/documentcloud/__init__.py +0 -0
- {python_documentcloud-4.5.0 → python_documentcloud-4.7.0}/documentcloud/annotations.py +0 -0
- {python_documentcloud-4.5.0 → python_documentcloud-4.7.0}/documentcloud/base.py +0 -0
- {python_documentcloud-4.5.0 → python_documentcloud-4.7.0}/documentcloud/constants.py +0 -0
- {python_documentcloud-4.5.0 → python_documentcloud-4.7.0}/documentcloud/organizations.py +0 -0
- {python_documentcloud-4.5.0 → python_documentcloud-4.7.0}/documentcloud/projects.py +0 -0
- {python_documentcloud-4.5.0 → python_documentcloud-4.7.0}/documentcloud/sections.py +0 -0
- {python_documentcloud-4.5.0 → python_documentcloud-4.7.0}/documentcloud/toolbox.py +0 -0
- {python_documentcloud-4.5.0 → python_documentcloud-4.7.0}/documentcloud/users.py +0 -0
- {python_documentcloud-4.5.0 → python_documentcloud-4.7.0}/python_documentcloud.egg-info/dependency_links.txt +0 -0
- {python_documentcloud-4.5.0 → python_documentcloud-4.7.0}/python_documentcloud.egg-info/top_level.txt +0 -0
- {python_documentcloud-4.5.0 → python_documentcloud-4.7.0}/setup.cfg +0 -0
- {python_documentcloud-4.5.0 → python_documentcloud-4.7.0}/tests/test_annotations.py +0 -0
- {python_documentcloud-4.5.0 → python_documentcloud-4.7.0}/tests/test_base.py +0 -0
- {python_documentcloud-4.5.0 → python_documentcloud-4.7.0}/tests/test_documents.py +0 -0
- {python_documentcloud-4.5.0 → python_documentcloud-4.7.0}/tests/test_organizations.py +0 -0
- {python_documentcloud-4.5.0 → python_documentcloud-4.7.0}/tests/test_projects.py +0 -0
- {python_documentcloud-4.5.0 → python_documentcloud-4.7.0}/tests/test_sections.py +0 -0
- {python_documentcloud-4.5.0 → python_documentcloud-4.7.0}/tests/test_toolbox.py +0 -0
- {python_documentcloud-4.5.0 → python_documentcloud-4.7.0}/tests/test_users.py +0 -0
{python_documentcloud-4.5.0/python_documentcloud.egg-info → python_documentcloud-4.7.0}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: python-documentcloud
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.7.0
|
|
4
4
|
Summary: A simple Python wrapper for the DocumentCloud API
|
|
5
5
|
Home-page: https://github.com/muckrock/python-documentcloud
|
|
6
6
|
Author: Mitchell Kotler
|
|
@@ -27,6 +27,7 @@ Requires-Dist: urllib3
|
|
|
27
27
|
Requires-Dist: pyyaml
|
|
28
28
|
Requires-Dist: fastjsonschema
|
|
29
29
|
Requires-Dist: python-squarelet
|
|
30
|
+
Requires-Dist: token-bucket
|
|
30
31
|
Provides-Extra: dev
|
|
31
32
|
Requires-Dist: black; extra == "dev"
|
|
32
33
|
Requires-Dist: coverage; extra == "dev"
|
|
@@ -37,6 +38,7 @@ Requires-Dist: twine; extra == "dev"
|
|
|
37
38
|
Provides-Extra: test
|
|
38
39
|
Requires-Dist: pytest; extra == "test"
|
|
39
40
|
Requires-Dist: pytest-mock; extra == "test"
|
|
41
|
+
Requires-Dist: pytest-xdist; extra == "test"
|
|
40
42
|
Requires-Dist: pytest-recording; extra == "test"
|
|
41
43
|
Requires-Dist: vcrpy; extra == "test"
|
|
42
44
|
Dynamic: author
|
|
@@ -182,6 +182,26 @@ class AddOn(BaseAddOn):
|
|
|
182
182
|
f"addon_runs/{self.id}/", json={"file_name": file_name}
|
|
183
183
|
)
|
|
184
184
|
|
|
185
|
+
def load_run_data(self):
|
|
186
|
+
"Load persistent data from this run"
|
|
187
|
+
if not self.id:
|
|
188
|
+
return {}
|
|
189
|
+
|
|
190
|
+
response = self.client.get(f"addon_runs/{self.id}/")
|
|
191
|
+
response.raise_for_status()
|
|
192
|
+
return response.json().get("data", {})
|
|
193
|
+
|
|
194
|
+
def store_run_data(self, data):
|
|
195
|
+
"Store persistent data for this run"
|
|
196
|
+
if not self.id:
|
|
197
|
+
print("Run ID not set. Try again later or check if something went wrong.")
|
|
198
|
+
return None
|
|
199
|
+
|
|
200
|
+
if not isinstance(data, dict):
|
|
201
|
+
raise TypeError("Invalid data")
|
|
202
|
+
|
|
203
|
+
return self.client.patch(f"addon_runs/{self.id}/", json={"data": data})
|
|
204
|
+
|
|
185
205
|
def load_event_data(self):
|
|
186
206
|
"""Load persistent data for this event"""
|
|
187
207
|
if not self.event_id:
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
# Standard Library
|
|
2
|
+
import logging
|
|
3
|
+
import time
|
|
4
|
+
|
|
5
|
+
# Third Party
|
|
6
|
+
import token_bucket
|
|
7
|
+
from squarelet import SquareletClient
|
|
8
|
+
|
|
9
|
+
# Local
|
|
10
|
+
from .documents import DocumentClient
|
|
11
|
+
from .organizations import OrganizationClient
|
|
12
|
+
from .projects import ProjectClient
|
|
13
|
+
from .users import UserClient
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger("documentcloud")
|
|
16
|
+
|
|
17
|
+
# Per-endpoint rate limits applied on top of the global squarelet limit.
|
|
18
|
+
# Format: (method, url_pattern, rate_per_second, capacity)
|
|
19
|
+
#
|
|
20
|
+
# Endpoint Rate Burst Notes
|
|
21
|
+
# -------- ---- ----- -----
|
|
22
|
+
# GET documents/search 15/min 50
|
|
23
|
+
# POST documents/ 12/min 100 25 docs/bulk call = up to 300 docs/min
|
|
24
|
+
# PUT documents/ 12/min 100 25 docs/bulk call = up to 300 docs/min
|
|
25
|
+
# GET files/ 15/min 100 PDFs, full text, and other private assets
|
|
26
|
+
ENDPOINT_RATE_LIMITS = [
|
|
27
|
+
("GET", "documents/search", 15 / 60, 50),
|
|
28
|
+
("POST", "documents/", 12 / 60, 100),
|
|
29
|
+
("PUT", "documents/", 12 / 60, 100),
|
|
30
|
+
("GET", "files/", 15 / 60, 100),
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class DocumentCloud(SquareletClient):
|
|
35
|
+
"""
|
|
36
|
+
The public interface for the DocumentCloud API, now integrated with SquareletClient
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def __init__(
|
|
40
|
+
self,
|
|
41
|
+
username=None,
|
|
42
|
+
password=None,
|
|
43
|
+
base_uri="https://api.www.documentcloud.org/api/",
|
|
44
|
+
auth_uri="https://accounts.muckrock.com/api/",
|
|
45
|
+
timeout=20,
|
|
46
|
+
loglevel=None,
|
|
47
|
+
rate_limit=True,
|
|
48
|
+
rate_limit_sleep=True,
|
|
49
|
+
):
|
|
50
|
+
# Initialize SquareletClient for authentication and request handling
|
|
51
|
+
super().__init__(
|
|
52
|
+
base_uri=base_uri,
|
|
53
|
+
username=username,
|
|
54
|
+
password=password,
|
|
55
|
+
auth_uri=auth_uri,
|
|
56
|
+
timeout=timeout,
|
|
57
|
+
rate_limit=rate_limit,
|
|
58
|
+
rate_limit_sleep=rate_limit_sleep,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
# Set up logging
|
|
62
|
+
if loglevel:
|
|
63
|
+
logging.basicConfig(
|
|
64
|
+
level=loglevel,
|
|
65
|
+
format="%(asctime)s %(levelname)-8s %(name)-25s %(message)s",
|
|
66
|
+
)
|
|
67
|
+
else:
|
|
68
|
+
logger.addHandler(logging.NullHandler())
|
|
69
|
+
|
|
70
|
+
# Build per-endpoint token bucket rate limiters
|
|
71
|
+
storage = token_bucket.MemoryStorage()
|
|
72
|
+
self._endpoint_limiters = [
|
|
73
|
+
(
|
|
74
|
+
pattern_method,
|
|
75
|
+
pattern,
|
|
76
|
+
token_bucket.Limiter(rate=rate, capacity=capacity, storage=storage),
|
|
77
|
+
f"{pattern_method}:{pattern}",
|
|
78
|
+
)
|
|
79
|
+
for pattern_method, pattern, rate, capacity in ENDPOINT_RATE_LIMITS
|
|
80
|
+
]
|
|
81
|
+
|
|
82
|
+
# Initialize the sub-clients using SquareletClient
|
|
83
|
+
self.documents = DocumentClient(self)
|
|
84
|
+
self.projects = ProjectClient(self)
|
|
85
|
+
self.users = UserClient(self)
|
|
86
|
+
self.organizations = OrganizationClient(self)
|
|
87
|
+
|
|
88
|
+
def request(self, method, url, raise_error=True, **kwargs):
|
|
89
|
+
for pattern_method, pattern, limiter, bucket_key in self._endpoint_limiters:
|
|
90
|
+
if pattern_method.upper() == method.upper() and pattern in url:
|
|
91
|
+
if not limiter.consume(bucket_key):
|
|
92
|
+
logger.warning(
|
|
93
|
+
"Rate limit reached for %s %s, throttling...",
|
|
94
|
+
method.upper(),
|
|
95
|
+
pattern,
|
|
96
|
+
)
|
|
97
|
+
while not limiter.consume(bucket_key):
|
|
98
|
+
time.sleep(0.1)
|
|
99
|
+
return super().request(method, url, raise_error=raise_error, **kwargs)
|
|
100
|
+
return super().request(method, url, raise_error=raise_error, **kwargs)
|
|
@@ -7,10 +7,13 @@ import datetime
|
|
|
7
7
|
import logging
|
|
8
8
|
import os
|
|
9
9
|
import re
|
|
10
|
+
import time
|
|
10
11
|
import warnings
|
|
11
12
|
from functools import partial
|
|
13
|
+
from urllib.parse import urlparse
|
|
12
14
|
|
|
13
15
|
# Third Party
|
|
16
|
+
import token_bucket
|
|
14
17
|
from requests.exceptions import RequestException
|
|
15
18
|
|
|
16
19
|
# Local
|
|
@@ -23,15 +26,12 @@ from .sections import SectionClient
|
|
|
23
26
|
from .toolbox import grouper, is_url, merge_dicts, requests_retry_session
|
|
24
27
|
from .users import User
|
|
25
28
|
|
|
26
|
-
try:
|
|
27
|
-
from urllib.parse import urlparse
|
|
28
|
-
except ImportError:
|
|
29
|
-
from urlparse import urlparse
|
|
30
|
-
|
|
31
29
|
logger = logging.getLogger("documentcloud")
|
|
32
30
|
|
|
33
31
|
IMAGE_SIZES = ["thumbnail", "small", "normal", "large", "xlarge"]
|
|
34
32
|
|
|
33
|
+
DEFAULT_USER_AGENT = "python-documentcloud"
|
|
34
|
+
|
|
35
35
|
|
|
36
36
|
class Document(BaseAPIObject):
|
|
37
37
|
"""A single DocumentCloud document"""
|
|
@@ -168,12 +168,17 @@ class Document(BaseAPIObject):
|
|
|
168
168
|
|
|
169
169
|
if base_netloc == url_netloc:
|
|
170
170
|
# if the url host is the same as the base api host,
|
|
171
|
-
#
|
|
171
|
+
# send the request with the client in order to include
|
|
172
172
|
# authentication credentials
|
|
173
173
|
response = self._client.get(url, full_url=True)
|
|
174
174
|
else:
|
|
175
|
-
response =
|
|
176
|
-
url,
|
|
175
|
+
response = self._client.documents.asset_get(
|
|
176
|
+
url,
|
|
177
|
+
headers={
|
|
178
|
+
"User-Agent": self._client.session.headers.get(
|
|
179
|
+
"User-Agent", DEFAULT_USER_AGENT
|
|
180
|
+
)
|
|
181
|
+
},
|
|
177
182
|
)
|
|
178
183
|
if fmt == "text":
|
|
179
184
|
return response.content.decode("utf8")
|
|
@@ -250,6 +255,26 @@ class DocumentClient(BaseAPIClient):
|
|
|
250
255
|
api_path = "documents"
|
|
251
256
|
resource = Document
|
|
252
257
|
|
|
258
|
+
def __init__(self, client):
|
|
259
|
+
super().__init__(client)
|
|
260
|
+
# Rate limit for public document asset fetches (S3-hosted).
|
|
261
|
+
# Private document assets go through the API client and are limited there.
|
|
262
|
+
# Token bucket: burst of 100, sustained at 15/min (0.25/sec).
|
|
263
|
+
storage = token_bucket.MemoryStorage()
|
|
264
|
+
self._asset_limiter = token_bucket.Limiter(
|
|
265
|
+
rate=15 / 60,
|
|
266
|
+
capacity=100,
|
|
267
|
+
storage=storage,
|
|
268
|
+
)
|
|
269
|
+
self._asset_session = requests_retry_session()
|
|
270
|
+
|
|
271
|
+
def asset_get(self, url, **kwargs):
|
|
272
|
+
if not self._asset_limiter.consume("asset"):
|
|
273
|
+
logger.warning("Rate limit reached for asset fetch, throttling...")
|
|
274
|
+
while not self._asset_limiter.consume("asset"):
|
|
275
|
+
time.sleep(0.1)
|
|
276
|
+
return self._asset_session.get(url, **kwargs)
|
|
277
|
+
|
|
253
278
|
def search(self, query, **params):
|
|
254
279
|
"""Return documents matching a search query"""
|
|
255
280
|
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Custom exceptions for python-documentcloud
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
# Third Party
|
|
6
|
+
# pylint: disable=unused-import
|
|
7
|
+
# Import exceptions from python-squarelet
|
|
8
|
+
from squarelet.exceptions import (
|
|
9
|
+
APIError,
|
|
10
|
+
CredentialsFailedError,
|
|
11
|
+
DoesNotExistError,
|
|
12
|
+
DuplicateObjectError,
|
|
13
|
+
MultipleObjectsReturnedError,
|
|
14
|
+
SquareletError as DocumentCloudError,
|
|
15
|
+
)
|
{python_documentcloud-4.5.0 → python_documentcloud-4.7.0/python_documentcloud.egg-info}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: python-documentcloud
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.7.0
|
|
4
4
|
Summary: A simple Python wrapper for the DocumentCloud API
|
|
5
5
|
Home-page: https://github.com/muckrock/python-documentcloud
|
|
6
6
|
Author: Mitchell Kotler
|
|
@@ -27,6 +27,7 @@ Requires-Dist: urllib3
|
|
|
27
27
|
Requires-Dist: pyyaml
|
|
28
28
|
Requires-Dist: fastjsonschema
|
|
29
29
|
Requires-Dist: python-squarelet
|
|
30
|
+
Requires-Dist: token-bucket
|
|
30
31
|
Provides-Extra: dev
|
|
31
32
|
Requires-Dist: black; extra == "dev"
|
|
32
33
|
Requires-Dist: coverage; extra == "dev"
|
|
@@ -37,6 +38,7 @@ Requires-Dist: twine; extra == "dev"
|
|
|
37
38
|
Provides-Extra: test
|
|
38
39
|
Requires-Dist: pytest; extra == "test"
|
|
39
40
|
Requires-Dist: pytest-mock; extra == "test"
|
|
41
|
+
Requires-Dist: pytest-xdist; extra == "test"
|
|
40
42
|
Requires-Dist: pytest-recording; extra == "test"
|
|
41
43
|
Requires-Dist: vcrpy; extra == "test"
|
|
42
44
|
Dynamic: author
|
{python_documentcloud-4.5.0 → python_documentcloud-4.7.0}/python_documentcloud.egg-info/SOURCES.txt
RENAMED
|
@@ -20,6 +20,7 @@ python_documentcloud.egg-info/SOURCES.txt
|
|
|
20
20
|
python_documentcloud.egg-info/dependency_links.txt
|
|
21
21
|
python_documentcloud.egg-info/requires.txt
|
|
22
22
|
python_documentcloud.egg-info/top_level.txt
|
|
23
|
+
tests/test_addon.py
|
|
23
24
|
tests/test_annotations.py
|
|
24
25
|
tests/test_base.py
|
|
25
26
|
tests/test_client.py
|
|
@@ -7,7 +7,7 @@ with open("README.md", "r") as fh:
|
|
|
7
7
|
|
|
8
8
|
setup(
|
|
9
9
|
name="python-documentcloud",
|
|
10
|
-
version="4.
|
|
10
|
+
version="4.7.0",
|
|
11
11
|
description="A simple Python wrapper for the DocumentCloud API",
|
|
12
12
|
author="Mitchell Kotler",
|
|
13
13
|
author_email="mitch@muckrock.com",
|
|
@@ -27,6 +27,7 @@ setup(
|
|
|
27
27
|
"pyyaml",
|
|
28
28
|
"fastjsonschema",
|
|
29
29
|
"python-squarelet",
|
|
30
|
+
"token-bucket",
|
|
30
31
|
),
|
|
31
32
|
extras_require={
|
|
32
33
|
"dev": [
|
|
@@ -40,6 +41,7 @@ setup(
|
|
|
40
41
|
"test": [
|
|
41
42
|
"pytest",
|
|
42
43
|
"pytest-mock",
|
|
44
|
+
"pytest-xdist",
|
|
43
45
|
"pytest-recording",
|
|
44
46
|
"vcrpy",
|
|
45
47
|
],
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
# Standard Library
|
|
2
|
+
from unittest.mock import MagicMock
|
|
3
|
+
|
|
4
|
+
# Third Party
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
# DocumentCloud
|
|
8
|
+
from documentcloud.addon import AddOn
|
|
9
|
+
|
|
10
|
+
# pylint: disable=redefined-outer-name
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@pytest.fixture
|
|
14
|
+
def addon():
|
|
15
|
+
"""An AddOn instance built without invoking argparse or constructing a real client.
|
|
16
|
+
|
|
17
|
+
Tests can override `.id`, `.event_id`, `.client`, etc. as needed.
|
|
18
|
+
"""
|
|
19
|
+
instance = AddOn.__new__(AddOn)
|
|
20
|
+
instance.id = "run-123"
|
|
21
|
+
instance.addon_id = "addon-1"
|
|
22
|
+
instance.event_id = None
|
|
23
|
+
instance.documents = None
|
|
24
|
+
instance.query = None
|
|
25
|
+
instance.user_id = None
|
|
26
|
+
instance.org_id = None
|
|
27
|
+
instance.data = {}
|
|
28
|
+
instance.title = "Test AddOn"
|
|
29
|
+
instance.client = MagicMock()
|
|
30
|
+
return instance
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class TestLoadRunData:
|
|
34
|
+
def test_returns_data_when_run_id_set(self, addon):
|
|
35
|
+
addon.client.get.return_value.json.return_value = {"data": {"foo": "bar"}}
|
|
36
|
+
|
|
37
|
+
result = addon.load_run_data()
|
|
38
|
+
|
|
39
|
+
addon.client.get.assert_called_once_with("addon_runs/run-123/")
|
|
40
|
+
assert result == {"foo": "bar"}
|
|
41
|
+
|
|
42
|
+
def test_returns_empty_dict_when_no_run_id(self, addon):
|
|
43
|
+
addon.id = None
|
|
44
|
+
|
|
45
|
+
assert addon.load_run_data() == {}
|
|
46
|
+
addon.client.get.assert_not_called()
|
|
47
|
+
|
|
48
|
+
def test_returns_empty_dict_when_data_missing_from_response(self, addon):
|
|
49
|
+
addon.client.get.return_value.json.return_value = {}
|
|
50
|
+
|
|
51
|
+
assert addon.load_run_data() == {}
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class TestStoreRunData:
|
|
55
|
+
def test_patches_run_with_data(self, addon):
|
|
56
|
+
addon.store_run_data({"foo": "bar"})
|
|
57
|
+
|
|
58
|
+
addon.client.patch.assert_called_once_with(
|
|
59
|
+
"addon_runs/run-123/", json={"data": {"foo": "bar"}}
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
def test_no_op_when_no_run_id(self, addon, capsys):
|
|
63
|
+
addon.id = None
|
|
64
|
+
|
|
65
|
+
result = addon.store_run_data({"foo": "bar"})
|
|
66
|
+
|
|
67
|
+
assert result is None
|
|
68
|
+
addon.client.patch.assert_not_called()
|
|
69
|
+
assert "Run ID not set" in capsys.readouterr().out
|
|
70
|
+
|
|
71
|
+
def test_rejects_non_dict_data(self, addon):
|
|
72
|
+
with pytest.raises(TypeError):
|
|
73
|
+
addon.store_run_data("not a dict")
|
|
74
|
+
|
|
75
|
+
addon.client.patch.assert_not_called()
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class TestLoadEventData:
|
|
79
|
+
def test_returns_scratch_when_event_id_set(self, addon):
|
|
80
|
+
addon.event_id = "evt-9"
|
|
81
|
+
addon.client.get.return_value.json.return_value = {"scratch": {"x": 1}}
|
|
82
|
+
|
|
83
|
+
result = addon.load_event_data()
|
|
84
|
+
|
|
85
|
+
addon.client.get.assert_called_once_with("addon_events/evt-9/")
|
|
86
|
+
assert result == {"x": 1}
|
|
87
|
+
|
|
88
|
+
def test_returns_none_when_no_event_id(self, addon):
|
|
89
|
+
assert addon.load_event_data() is None
|
|
90
|
+
addon.client.get.assert_not_called()
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class TestStoreEventData:
|
|
94
|
+
def test_patches_event_with_scratch(self, addon):
|
|
95
|
+
addon.event_id = "evt-9"
|
|
96
|
+
|
|
97
|
+
addon.store_event_data({"x": 1})
|
|
98
|
+
|
|
99
|
+
addon.client.patch.assert_called_once_with(
|
|
100
|
+
"addon_events/evt-9/", json={"scratch": {"x": 1}}
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
def test_no_op_when_no_event_id(self, addon):
|
|
104
|
+
assert addon.store_event_data({"x": 1}) is None
|
|
105
|
+
addon.client.patch.assert_not_called()
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
@pytest.fixture
|
|
109
|
+
def real_addon(client, addon_run):
|
|
110
|
+
"""An AddOn wired to the real `client` fixture and a freshly created run."""
|
|
111
|
+
instance = AddOn.__new__(AddOn)
|
|
112
|
+
instance.id = addon_run
|
|
113
|
+
instance.addon_id = None
|
|
114
|
+
instance.event_id = None
|
|
115
|
+
instance.documents = None
|
|
116
|
+
instance.query = None
|
|
117
|
+
instance.user_id = None
|
|
118
|
+
instance.org_id = None
|
|
119
|
+
instance.data = {}
|
|
120
|
+
instance.title = "Test AddOn"
|
|
121
|
+
instance.client = client
|
|
122
|
+
return instance
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class TestRunDataVCR:
|
|
126
|
+
"""VCR-recorded round-trip tests against the dev DC.
|
|
127
|
+
|
|
128
|
+
Recording: set DC_TEST_ADDON_RUN_ID to an existing AddOnRun UUID on your
|
|
129
|
+
local dev DC, then run `make test-dev` (or `pytest --record-mode=new_episodes`).
|
|
130
|
+
"""
|
|
131
|
+
|
|
132
|
+
def test_load_run_data_returns_dict(self, real_addon):
|
|
133
|
+
result = real_addon.load_run_data()
|
|
134
|
+
assert isinstance(result, dict)
|
|
135
|
+
|
|
136
|
+
def test_store_then_load_run_data_round_trip(self, real_addon):
|
|
137
|
+
payload = {"foo": "bar", "n": 42}
|
|
138
|
+
real_addon.store_run_data(payload)
|
|
139
|
+
loaded = real_addon.load_run_data()
|
|
140
|
+
assert loaded.get("foo") == "bar"
|
|
141
|
+
assert loaded.get("n") == 42
|
|
@@ -9,6 +9,7 @@ import pytest
|
|
|
9
9
|
import ratelimit
|
|
10
10
|
|
|
11
11
|
# DocumentCloud
|
|
12
|
+
from documentcloud import DocumentCloud
|
|
12
13
|
from documentcloud.constants import RATE_LIMIT
|
|
13
14
|
from documentcloud.exceptions import APIError, CredentialsFailedError
|
|
14
15
|
|
|
@@ -111,3 +112,77 @@ def test_expired_refresh_token(short_client, record_mode):
|
|
|
111
112
|
assert short_client.users.get("me")
|
|
112
113
|
# check the refresh token was updated
|
|
113
114
|
assert old_refresh_token != short_client.refresh_token
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def test_endpoint_rate_limit_burst_exhaustion():
|
|
118
|
+
"""Token bucket should block after burst capacity is exhausted"""
|
|
119
|
+
client = DocumentCloud()
|
|
120
|
+
# Exhaust the search burst (capacity=50)
|
|
121
|
+
_pattern_method, _pattern, limiter, bucket_key = client._endpoint_limiters[0]
|
|
122
|
+
for _ in range(50):
|
|
123
|
+
limiter.consume(bucket_key)
|
|
124
|
+
assert not limiter.consume(bucket_key)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def test_endpoint_rate_limit_method_specificity():
|
|
128
|
+
"""GET and POST to documents/ should use different limiters"""
|
|
129
|
+
client = DocumentCloud()
|
|
130
|
+
limiters = {(pm, p): lim for pm, p, lim, _ in client._endpoint_limiters}
|
|
131
|
+
assert limiters[("GET", "files/")] is not limiters[("POST", "documents/")]
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def test_endpoint_rate_limit_pattern_ordering():
|
|
135
|
+
"""documents/search should match before documents/"""
|
|
136
|
+
client = DocumentCloud()
|
|
137
|
+
url = "documents/search/"
|
|
138
|
+
matched = next(
|
|
139
|
+
p for pm, p, _, _ in client._endpoint_limiters if pm == "GET" and p in url
|
|
140
|
+
)
|
|
141
|
+
assert matched == "documents/search"
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def test_asset_rate_limit_burst_exhaustion():
|
|
145
|
+
"""Asset token bucket should block after burst capacity is exhausted"""
|
|
146
|
+
client = DocumentCloud()
|
|
147
|
+
limiter = client.documents._asset_limiter
|
|
148
|
+
for _ in range(100):
|
|
149
|
+
limiter.consume("asset")
|
|
150
|
+
assert not limiter.consume("asset")
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def test_asset_rate_limit_refills():
|
|
154
|
+
"""Asset token bucket should refill over time"""
|
|
155
|
+
client = DocumentCloud()
|
|
156
|
+
limiter = client.documents._asset_limiter
|
|
157
|
+
for _ in range(100):
|
|
158
|
+
limiter.consume("asset")
|
|
159
|
+
assert not limiter.consume("asset")
|
|
160
|
+
time.sleep(5)
|
|
161
|
+
assert limiter.consume("asset")
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def test_endpoint_rate_limit_buckets_are_independent():
|
|
165
|
+
"""Exhausting one endpoint's bucket should not affect another"""
|
|
166
|
+
client = DocumentCloud()
|
|
167
|
+
limiters = {(pm, p): (lim, bk) for pm, p, lim, bk in client._endpoint_limiters}
|
|
168
|
+
search_limiter, search_key = limiters[("GET", "documents/search")]
|
|
169
|
+
files_limiter, files_key = limiters[("GET", "files/")]
|
|
170
|
+
|
|
171
|
+
# Exhaust search bucket
|
|
172
|
+
for _ in range(50):
|
|
173
|
+
search_limiter.consume(search_key)
|
|
174
|
+
assert not search_limiter.consume(search_key)
|
|
175
|
+
|
|
176
|
+
# Files bucket should still have tokens
|
|
177
|
+
assert files_limiter.consume(files_key)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def test_endpoint_rate_limit_no_match_for_unrecognized_url():
|
|
181
|
+
"""Unrecognized URLs should not match any endpoint limiter"""
|
|
182
|
+
client = DocumentCloud()
|
|
183
|
+
url = "users/me/"
|
|
184
|
+
matched = next(
|
|
185
|
+
(p for pm, p, _, _ in client._endpoint_limiters if p in url),
|
|
186
|
+
None,
|
|
187
|
+
)
|
|
188
|
+
assert matched is None
|
|
@@ -1,58 +0,0 @@
|
|
|
1
|
-
# Import SquareletClient from python-squarelet
|
|
2
|
-
# Standard Library
|
|
3
|
-
import logging
|
|
4
|
-
|
|
5
|
-
# Third Party
|
|
6
|
-
from squarelet import SquareletClient
|
|
7
|
-
|
|
8
|
-
# Local
|
|
9
|
-
# Local Imports
|
|
10
|
-
from .documents import DocumentClient
|
|
11
|
-
from .organizations import OrganizationClient
|
|
12
|
-
from .projects import ProjectClient
|
|
13
|
-
from .users import UserClient
|
|
14
|
-
|
|
15
|
-
logger = logging.getLogger("documentcloud")
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
class DocumentCloud(SquareletClient):
|
|
19
|
-
"""
|
|
20
|
-
The public interface for the DocumentCloud API, now integrated with SquareletClient
|
|
21
|
-
"""
|
|
22
|
-
|
|
23
|
-
def __init__(
|
|
24
|
-
self,
|
|
25
|
-
username=None,
|
|
26
|
-
password=None,
|
|
27
|
-
base_uri="https://api.www.documentcloud.org/api/",
|
|
28
|
-
auth_uri="https://accounts.muckrock.com/api/",
|
|
29
|
-
timeout=20,
|
|
30
|
-
loglevel=None,
|
|
31
|
-
rate_limit=True,
|
|
32
|
-
rate_limit_sleep=True,
|
|
33
|
-
):
|
|
34
|
-
# Initialize SquareletClient for authentication and request handling
|
|
35
|
-
super().__init__(
|
|
36
|
-
base_uri=base_uri,
|
|
37
|
-
username=username,
|
|
38
|
-
password=password,
|
|
39
|
-
auth_uri=auth_uri,
|
|
40
|
-
timeout=timeout,
|
|
41
|
-
rate_limit=rate_limit,
|
|
42
|
-
rate_limit_sleep=rate_limit_sleep,
|
|
43
|
-
)
|
|
44
|
-
|
|
45
|
-
# Set up logging
|
|
46
|
-
if loglevel:
|
|
47
|
-
logging.basicConfig(
|
|
48
|
-
level=loglevel,
|
|
49
|
-
format="%(asctime)s %(levelname)-8s %(name)-25s %(message)s",
|
|
50
|
-
)
|
|
51
|
-
else:
|
|
52
|
-
logger.addHandler(logging.NullHandler())
|
|
53
|
-
|
|
54
|
-
# Initialize the sub-clients using SquareletClient
|
|
55
|
-
self.documents = DocumentClient(self)
|
|
56
|
-
self.projects = ProjectClient(self)
|
|
57
|
-
self.users = UserClient(self)
|
|
58
|
-
self.organizations = OrganizationClient(self)
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Custom exceptions for python-documentcloud
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
# pylint: disable=unused-import
|
|
6
|
-
# Import exceptions from python-squarelet
|
|
7
|
-
from squarelet.exceptions import SquareletError as DocumentCloudError
|
|
8
|
-
from squarelet.exceptions import DuplicateObjectError
|
|
9
|
-
from squarelet.exceptions import CredentialsFailedError
|
|
10
|
-
from squarelet.exceptions import APIError
|
|
11
|
-
from squarelet.exceptions import DoesNotExistError
|
|
12
|
-
from squarelet.exceptions import MultipleObjectsReturnedError
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|