python-documentcloud 4.4.1__tar.gz → 4.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {python_documentcloud-4.4.1 → python_documentcloud-4.6.0}/PKG-INFO +1 -1
  2. {python_documentcloud-4.4.1 → python_documentcloud-4.6.0}/documentcloud/addon.py +20 -0
  3. {python_documentcloud-4.4.1 → python_documentcloud-4.6.0}/documentcloud/client.py +4 -8
  4. {python_documentcloud-4.4.1 → python_documentcloud-4.6.0}/documentcloud/documents.py +112 -145
  5. python_documentcloud-4.6.0/documentcloud/exceptions.py +15 -0
  6. {python_documentcloud-4.4.1 → python_documentcloud-4.6.0}/python_documentcloud.egg-info/PKG-INFO +1 -1
  7. {python_documentcloud-4.4.1 → python_documentcloud-4.6.0}/python_documentcloud.egg-info/SOURCES.txt +1 -0
  8. {python_documentcloud-4.4.1 → python_documentcloud-4.6.0}/setup.py +1 -1
  9. python_documentcloud-4.6.0/tests/test_addon.py +141 -0
  10. {python_documentcloud-4.4.1 → python_documentcloud-4.6.0}/tests/test_documents.py +1 -4
  11. python_documentcloud-4.4.1/documentcloud/exceptions.py +0 -12
  12. {python_documentcloud-4.4.1 → python_documentcloud-4.6.0}/LICENSE +0 -0
  13. {python_documentcloud-4.4.1 → python_documentcloud-4.6.0}/README.md +0 -0
  14. {python_documentcloud-4.4.1 → python_documentcloud-4.6.0}/documentcloud/__init__.py +0 -0
  15. {python_documentcloud-4.4.1 → python_documentcloud-4.6.0}/documentcloud/annotations.py +0 -0
  16. {python_documentcloud-4.4.1 → python_documentcloud-4.6.0}/documentcloud/base.py +0 -0
  17. {python_documentcloud-4.4.1 → python_documentcloud-4.6.0}/documentcloud/constants.py +0 -0
  18. {python_documentcloud-4.4.1 → python_documentcloud-4.6.0}/documentcloud/organizations.py +0 -0
  19. {python_documentcloud-4.4.1 → python_documentcloud-4.6.0}/documentcloud/projects.py +0 -0
  20. {python_documentcloud-4.4.1 → python_documentcloud-4.6.0}/documentcloud/sections.py +0 -0
  21. {python_documentcloud-4.4.1 → python_documentcloud-4.6.0}/documentcloud/toolbox.py +0 -0
  22. {python_documentcloud-4.4.1 → python_documentcloud-4.6.0}/documentcloud/users.py +0 -0
  23. {python_documentcloud-4.4.1 → python_documentcloud-4.6.0}/python_documentcloud.egg-info/dependency_links.txt +0 -0
  24. {python_documentcloud-4.4.1 → python_documentcloud-4.6.0}/python_documentcloud.egg-info/requires.txt +0 -0
  25. {python_documentcloud-4.4.1 → python_documentcloud-4.6.0}/python_documentcloud.egg-info/top_level.txt +0 -0
  26. {python_documentcloud-4.4.1 → python_documentcloud-4.6.0}/setup.cfg +0 -0
  27. {python_documentcloud-4.4.1 → python_documentcloud-4.6.0}/tests/test_annotations.py +0 -0
  28. {python_documentcloud-4.4.1 → python_documentcloud-4.6.0}/tests/test_base.py +0 -0
  29. {python_documentcloud-4.4.1 → python_documentcloud-4.6.0}/tests/test_client.py +0 -0
  30. {python_documentcloud-4.4.1 → python_documentcloud-4.6.0}/tests/test_organizations.py +0 -0
  31. {python_documentcloud-4.4.1 → python_documentcloud-4.6.0}/tests/test_projects.py +0 -0
  32. {python_documentcloud-4.4.1 → python_documentcloud-4.6.0}/tests/test_sections.py +0 -0
  33. {python_documentcloud-4.4.1 → python_documentcloud-4.6.0}/tests/test_toolbox.py +0 -0
  34. {python_documentcloud-4.4.1 → python_documentcloud-4.6.0}/tests/test_users.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-documentcloud
3
- Version: 4.4.1
3
+ Version: 4.6.0
4
4
  Summary: A simple Python wrapper for the DocumentCloud API
5
5
  Home-page: https://github.com/muckrock/python-documentcloud
6
6
  Author: Mitchell Kotler
@@ -182,6 +182,26 @@ class AddOn(BaseAddOn):
182
182
  f"addon_runs/{self.id}/", json={"file_name": file_name}
183
183
  )
184
184
 
185
+ def load_run_data(self):
186
+ "Load persistent data from this run"
187
+ if not self.id:
188
+ return {}
189
+
190
+ response = self.client.get(f"addon_runs/{self.id}/")
191
+ response.raise_for_status()
192
+ return response.json().get("data", {})
193
+
194
+ def store_run_data(self, data):
195
+ "Store persistent data for this run"
196
+ if not self.id:
197
+ print("Run ID not set. Try again later or check if something went wrong.")
198
+ return None
199
+
200
+ if not isinstance(data, dict):
201
+ raise TypeError("Invalid data")
202
+
203
+ return self.client.patch(f"addon_runs/{self.id}/", json={"data": data})
204
+
185
205
  def load_event_data(self):
186
206
  """Load persistent data for this event"""
187
207
  if not self.event_id:
@@ -14,11 +14,12 @@ from .users import UserClient
14
14
 
15
15
  logger = logging.getLogger("documentcloud")
16
16
 
17
+
17
18
  class DocumentCloud(SquareletClient):
18
19
  """
19
20
  The public interface for the DocumentCloud API, now integrated with SquareletClient
20
21
  """
21
- # pylint:disable=too-many-positional-arguments
22
+
22
23
  def __init__(
23
24
  self,
24
25
  username=None,
@@ -30,7 +31,7 @@ class DocumentCloud(SquareletClient):
30
31
  rate_limit=True,
31
32
  rate_limit_sleep=True,
32
33
  ):
33
- # Initialize SquareletClient for authentication and request handling
34
+ # Initialize SquareletClient for authentication and request handling
34
35
  super().__init__(
35
36
  base_uri=base_uri,
36
37
  username=username,
@@ -38,7 +39,7 @@ class DocumentCloud(SquareletClient):
38
39
  auth_uri=auth_uri,
39
40
  timeout=timeout,
40
41
  rate_limit=rate_limit,
41
- rate_limit_sleep=rate_limit_sleep
42
+ rate_limit_sleep=rate_limit_sleep,
42
43
  )
43
44
 
44
45
  # Set up logging
@@ -55,8 +56,3 @@ class DocumentCloud(SquareletClient):
55
56
  self.projects = ProjectClient(self)
56
57
  self.users = UserClient(self)
57
58
  self.organizations = OrganizationClient(self)
58
-
59
- """def _request(self, method, url, raise_error=True, **kwargs):
60
- Delegates request to the SquareletClient's _request method
61
- return self.squarelet_client.request(method, url, raise_error, **kwargs)
62
- """
@@ -9,6 +9,7 @@ import os
9
9
  import re
10
10
  import warnings
11
11
  from functools import partial
12
+ from urllib.parse import urlparse
12
13
 
13
14
  # Third Party
14
15
  from requests.exceptions import RequestException
@@ -23,11 +24,6 @@ from .sections import SectionClient
23
24
  from .toolbox import grouper, is_url, merge_dicts, requests_retry_session
24
25
  from .users import User
25
26
 
26
- try:
27
- from urllib.parse import urlparse
28
- except ImportError:
29
- from urlparse import urlparse
30
-
31
27
  logger = logging.getLogger("documentcloud")
32
28
 
33
29
  IMAGE_SIZES = ["thumbnail", "small", "normal", "large", "xlarge"]
@@ -74,8 +70,11 @@ class Document(BaseAPIObject):
74
70
  def __getattr__(self, attr):
75
71
  """Generate methods for fetching resources"""
76
72
  p_image = re.compile(
77
- r"^get_(?P<size>thumbnail|small|normal|large|xlarge)_image_url(?P<list>_list)?$"
73
+ r"^get_"
74
+ r"(?P<size>thumbnail|small|normal|large|xlarge)_image_url"
75
+ r"(?P<list>_list)?$"
78
76
  )
77
+
79
78
  get = attr.startswith("get_")
80
79
  url = attr.endswith("_url")
81
80
  text = attr.endswith("_text")
@@ -230,9 +229,15 @@ class Document(BaseAPIObject):
230
229
 
231
230
  return all_results
232
231
 
233
- def process(self):
234
- """Reprocess the document"""
235
- self._client.post(f"{self.api_path}/{self.id}/process/")
232
+ def process(self, **kwargs):
233
+ """Process the document, used on upload and for reprocessing"""
234
+ payload = {}
235
+ if "force_ocr" in kwargs:
236
+ payload["force_ocr"] = kwargs["force_ocr"]
237
+ if "ocr_engine" in kwargs:
238
+ payload["ocr_engine"] = kwargs["ocr_engine"]
239
+
240
+ self._client.post(f"{self.api_path}/{self.id}/process/", json=payload)
236
241
 
237
242
 
238
243
  class DocumentClient(BaseAPIClient):
@@ -310,6 +315,7 @@ class DocumentClient(BaseAPIClient):
310
315
  "title",
311
316
  "data",
312
317
  "force_ocr",
318
+ "ocr_engine",
313
319
  "projects",
314
320
  "delayed_index",
315
321
  "revision_control",
@@ -333,21 +339,55 @@ class DocumentClient(BaseAPIClient):
333
339
 
334
340
  return params
335
341
 
342
+ def _extract_ocr_options(self, kwargs):
343
+ """
344
+ Extract and validate OCR options from kwargs.
345
+
346
+ Returns:
347
+ force_ocr (bool)
348
+ ocr_engine (str)
349
+ """
350
+ force_ocr = kwargs.pop("force_ocr", False)
351
+ ocr_engine = kwargs.pop("ocr_engine", "tess4")
352
+
353
+ if not isinstance(force_ocr, bool):
354
+ raise ValueError("force_ocr must be a boolean")
355
+
356
+ if ocr_engine and ocr_engine not in ("tess4", "textract"):
357
+ raise ValueError(
358
+ "ocr_engine must be either 'tess4' for tesseract or 'textract'"
359
+ )
360
+
361
+ return force_ocr, ocr_engine
362
+
336
363
  def _get_title(self, name):
337
364
  """Get the default title for a document from its path"""
338
365
  return name.split(os.sep)[-1].rsplit(".", 1)[0]
339
366
 
340
367
  def _upload_url(self, file_url, **kwargs):
341
368
  """Upload a document from a publicly accessible URL"""
369
+ # extract process-related args
370
+ force_ocr, ocr_engine = self._extract_ocr_options(kwargs)
371
+
372
+ # create the document
342
373
  params = self._format_upload_parameters(file_url, **kwargs)
343
374
  params["file_url"] = file_url
375
+ if force_ocr:
376
+ params["force_ocr"] = force_ocr
377
+ params["ocr_engine"] = ocr_engine
344
378
  response = self.client.post("documents/", json=params)
345
- return Document(self.client, response.json())
379
+ create_json = response.json()
380
+
381
+ # wrap in Document object
382
+ doc = Document(self.client, create_json)
383
+
384
+ return doc
346
385
 
347
386
  def _upload_file(self, file_, **kwargs):
348
387
  """Upload a document directly"""
349
388
  # create the document
350
- force_ocr = kwargs.pop("force_ocr", False)
389
+ force_ocr, ocr_engine = self._extract_ocr_options(kwargs)
390
+
351
391
  params = self._format_upload_parameters(file_.name, **kwargs)
352
392
  response = self.client.post("documents/", json=params)
353
393
 
@@ -357,12 +397,12 @@ class DocumentClient(BaseAPIClient):
357
397
  response = requests_retry_session().put(presigned_url, data=file_.read())
358
398
 
359
399
  # begin processing the document
360
- doc_id = create_json["id"]
361
- response = self.client.post(
362
- f"documents/{doc_id}/process/", json={"force_ocr": force_ocr}
363
- )
400
+ doc = Document(self.client, create_json)
401
+
402
+ # begin processing
403
+ doc.process(force_ocr=force_ocr, ocr_engine=ocr_engine)
364
404
 
365
- return Document(self.client, create_json)
405
+ return doc
366
406
 
367
407
  def _collect_files(self, path, extensions):
368
408
  """Find the paths to files with specified extensions under a directory"""
@@ -379,171 +419,98 @@ class DocumentClient(BaseAPIClient):
379
419
 
380
420
  def upload_directory(self, path, handle_errors=False, extensions=".pdf", **kwargs):
381
421
  """Upload files with specified extensions in a directory"""
382
- # pylint: disable=too-many-locals, too-many-branches
383
-
384
- # Do not set the same title for all documents
422
+ # pylint:disable=too-many-locals
385
423
  kwargs.pop("title", None)
386
424
 
387
- # If extensions are specified as None, it will check for all supported
388
- # filetypes.
389
425
  if extensions is None:
390
426
  extensions = SUPPORTED_EXTENSIONS
391
-
392
- # Convert single extension to a list if provided
393
427
  if extensions and not isinstance(extensions, list):
394
428
  extensions = [extensions]
395
-
396
- # Checks to see if the extensions are supported, raises an error if not.
397
429
  invalid_extensions = set(extensions) - set(SUPPORTED_EXTENSIONS)
398
430
  if invalid_extensions:
399
431
  raise ValueError(
400
432
  f"Invalid extensions provided: {', '.join(invalid_extensions)}"
401
433
  )
402
434
 
403
- # Loop through the path and get all the files with matching extensions
404
435
  path_list = self._collect_files(path, extensions)
405
-
406
436
  logger.info(
407
- "Upload directory on %s: Found %d files to upload",
408
- path,
409
- len(path_list)
437
+ "Upload directory on %s: Found %d files to upload", path, len(path_list)
410
438
  )
411
439
 
412
- # Upload all the files using the bulk API to reduce the number
413
- # of API calls and improve performance
414
440
  obj_list = []
441
+ force_ocr, ocr_engine = self._extract_ocr_options(kwargs)
415
442
  params = self._format_upload_parameters("", **kwargs)
443
+
416
444
  for i, file_paths in enumerate(grouper(path_list, BULK_LIMIT)):
417
- # Grouper will put None's on the end of the last group
418
445
  file_paths = [p for p in file_paths if p is not None]
419
-
420
446
  logger.info("Uploading group %d:\n%s", i + 1, "\n".join(file_paths))
421
447
 
422
- # Create the documents
423
- logger.info("Creating the documents...")
424
- try:
425
- response = self.client.post(
426
- "documents/",
427
- json=[
428
- merge_dicts(
429
- params,
430
- {
431
- "title": self._get_title(p),
432
- "original_extension": os.path.splitext(
433
- os.path.basename(p)
434
- )[1]
435
- .lower()
436
- .lstrip("."),
437
- },
438
- )
439
- for p in file_paths
440
- ],
441
- )
442
- except (APIError, RequestException) as exc:
443
- if handle_errors:
444
- logger.info(
445
- "Error creating the following documents: %s\n%s",
446
- exc,
447
- "\n".join(file_paths)
448
- )
449
- continue
450
- else:
451
- raise
448
+ create_json = self._create_documents(file_paths, params, handle_errors)
449
+ sorted_create_json = sorted(create_json, key=lambda j: j["title"])
450
+ sorted_file_paths = sorted(file_paths, key=self._get_title)
451
+ obj_list.extend(sorted_create_json)
452
+ presigned_urls = [j["presigned_url"] for j in sorted_create_json]
452
453
 
453
- # Upload the files directly to storage
454
- create_json = response.json()
455
- obj_list.extend(create_json)
456
- presigned_urls = [j["presigned_url"] for j in create_json]
457
- for url, file_path in zip(presigned_urls, file_paths):
458
- logger.info("Uploading %s to S3...", file_path)
459
- try:
460
- with open(file_path, "rb") as file:
461
- response = requests_retry_session().put(url, data=file.read())
462
- self.client.raise_for_status(response)
463
- except (APIError, RequestException) as exc:
464
- if handle_errors:
465
- logger.info(
466
- "Error uploading the following document: %s %s",
467
- exc,
468
- file_path
469
- )
470
- continue
471
- else:
472
- raise
473
-
474
- # Begin processing the documents
475
- logger.info("Processing the documents...")
476
- doc_ids = [j["id"] for j in create_json]
477
- try:
478
- response = self.client.post("documents/process/", json={"ids": doc_ids})
479
- except (APIError, RequestException) as exc:
480
- if handle_errors:
481
- logger.info(
482
- "Error creating the following documents: %s\n%s",
483
- exc,
484
- "\n".join(file_paths)
485
- )
486
- continue
487
- else:
488
- raise
454
+ self._upload_files_to_s3(sorted_file_paths, presigned_urls, handle_errors)
455
+ self._process_documents(create_json, force_ocr, ocr_engine, handle_errors)
489
456
 
490
457
  logger.info("Upload directory complete")
491
-
492
- # Pass back the list of documents
493
458
  return [Document(self.client, d) for d in obj_list]
494
459
 
495
- def upload_urls(self, url_list, handle_errors=False, **kwargs):
496
- """Upload documents from a list of URLs"""
497
-
498
- # Do not set the same title for all documents
499
- kwargs.pop("title", None)
500
-
501
- obj_list = []
502
- params = self._format_upload_parameters("", **kwargs)
503
- for i, url_group in enumerate(grouper(url_list, BULK_LIMIT)):
504
- # Grouper will put None's on the end of the last group
505
- url_group = [url for url in url_group if url is not None]
506
-
507
- logger.info(
508
- "Uploading group %d: %s",
509
- i + 1,
510
- "\n".join(url_group)
460
+ def _create_documents(self, file_paths, params, handle_errors):
461
+ body = [
462
+ merge_dicts(
463
+ params,
464
+ {
465
+ "title": self._get_title(p),
466
+ "original_extension": os.path.splitext(os.path.basename(p))[1]
467
+ .lower()
468
+ .lstrip("."),
469
+ },
511
470
  )
512
-
513
- # Create the documents
514
- logger.info("Creating the documents...")
515
- try:
516
- response = self.client.post(
517
- "documents/",
518
- json=[
519
- merge_dicts(
520
- params,
521
- {
522
- "title": self._get_title(url),
523
- "file_url": url,
524
- },
525
- )
526
- for url in url_group
527
- ],
471
+ for p in sorted(file_paths)
472
+ ]
473
+ try:
474
+ response = self.client.post("documents/", json=body)
475
+ except (APIError, RequestException) as exc:
476
+ if handle_errors:
477
+ logger.info(
478
+ "Error creating the following documents: %s\n%s",
479
+ exc,
480
+ "\n".join(file_paths),
528
481
  )
482
+ return []
483
+ else:
484
+ raise
485
+ return response.json()
486
+
487
+ def _upload_files_to_s3(self, file_paths, presigned_urls, handle_errors):
488
+ for url, file_path in zip(presigned_urls, file_paths):
489
+ logger.info("Uploading %s to S3...", file_path)
490
+ try:
491
+ with open(file_path, "rb") as f:
492
+ response = requests_retry_session().put(url, data=f.read())
493
+ self.client.raise_for_status(response)
529
494
  except (APIError, RequestException) as exc:
530
495
  if handle_errors:
531
496
  logger.info(
532
- "Error creating the following documents: %s\n%s",
533
- str(exc),
534
- "\n".join(url_group)
497
+ "Error uploading the following document: %s %s", exc, file_path
535
498
  )
536
- continue
537
499
  else:
538
500
  raise
539
501
 
540
- create_json = response.json()
541
- obj_list.extend(create_json)
542
-
543
- logger.info("Upload URLs complete")
544
-
545
- # Pass back the list of documents
546
- return [Document(self.client, d) for d in obj_list]
502
+ def _process_documents(self, create_json, force_ocr, ocr_engine, handle_errors):
503
+ payload = [
504
+ {"id": j["id"], "force_ocr": force_ocr, "ocr_engine": ocr_engine}
505
+ for j in create_json
506
+ ]
507
+ try:
508
+ self.client.post("documents/process/", json=payload)
509
+ except (APIError, RequestException) as exc:
510
+ if handle_errors:
511
+ logger.info("Error processing documents: %s", exc)
512
+ else:
513
+ raise
547
514
 
548
515
 
549
516
  class Mention:
@@ -0,0 +1,15 @@
1
+ """
2
+ Custom exceptions for python-documentcloud
3
+ """
4
+
5
+ # Third Party
6
+ # pylint: disable=unused-import
7
+ # Import exceptions from python-squarelet
8
+ from squarelet.exceptions import (
9
+ APIError,
10
+ CredentialsFailedError,
11
+ DoesNotExistError,
12
+ DuplicateObjectError,
13
+ MultipleObjectsReturnedError,
14
+ SquareletError as DocumentCloudError,
15
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-documentcloud
3
- Version: 4.4.1
3
+ Version: 4.6.0
4
4
  Summary: A simple Python wrapper for the DocumentCloud API
5
5
  Home-page: https://github.com/muckrock/python-documentcloud
6
6
  Author: Mitchell Kotler
@@ -20,6 +20,7 @@ python_documentcloud.egg-info/SOURCES.txt
20
20
  python_documentcloud.egg-info/dependency_links.txt
21
21
  python_documentcloud.egg-info/requires.txt
22
22
  python_documentcloud.egg-info/top_level.txt
23
+ tests/test_addon.py
23
24
  tests/test_annotations.py
24
25
  tests/test_base.py
25
26
  tests/test_client.py
@@ -7,7 +7,7 @@ with open("README.md", "r") as fh:
7
7
 
8
8
  setup(
9
9
  name="python-documentcloud",
10
- version="4.4.1",
10
+ version="4.6.0",
11
11
  description="A simple Python wrapper for the DocumentCloud API",
12
12
  author="Mitchell Kotler",
13
13
  author_email="mitch@muckrock.com",
@@ -0,0 +1,141 @@
1
+ # Standard Library
2
+ from unittest.mock import MagicMock
3
+
4
+ # Third Party
5
+ import pytest
6
+
7
+ # DocumentCloud
8
+ from documentcloud.addon import AddOn
9
+
10
+ # pylint: disable=redefined-outer-name
11
+
12
+
13
+ @pytest.fixture
14
+ def addon():
15
+ """An AddOn instance built without invoking argparse or constructing a real client.
16
+
17
+ Tests can override `.id`, `.event_id`, `.client`, etc. as needed.
18
+ """
19
+ instance = AddOn.__new__(AddOn)
20
+ instance.id = "run-123"
21
+ instance.addon_id = "addon-1"
22
+ instance.event_id = None
23
+ instance.documents = None
24
+ instance.query = None
25
+ instance.user_id = None
26
+ instance.org_id = None
27
+ instance.data = {}
28
+ instance.title = "Test AddOn"
29
+ instance.client = MagicMock()
30
+ return instance
31
+
32
+
33
+ class TestLoadRunData:
34
+ def test_returns_data_when_run_id_set(self, addon):
35
+ addon.client.get.return_value.json.return_value = {"data": {"foo": "bar"}}
36
+
37
+ result = addon.load_run_data()
38
+
39
+ addon.client.get.assert_called_once_with("addon_runs/run-123/")
40
+ assert result == {"foo": "bar"}
41
+
42
+ def test_returns_empty_dict_when_no_run_id(self, addon):
43
+ addon.id = None
44
+
45
+ assert addon.load_run_data() == {}
46
+ addon.client.get.assert_not_called()
47
+
48
+ def test_returns_empty_dict_when_data_missing_from_response(self, addon):
49
+ addon.client.get.return_value.json.return_value = {}
50
+
51
+ assert addon.load_run_data() == {}
52
+
53
+
54
+ class TestStoreRunData:
55
+ def test_patches_run_with_data(self, addon):
56
+ addon.store_run_data({"foo": "bar"})
57
+
58
+ addon.client.patch.assert_called_once_with(
59
+ "addon_runs/run-123/", json={"data": {"foo": "bar"}}
60
+ )
61
+
62
+ def test_no_op_when_no_run_id(self, addon, capsys):
63
+ addon.id = None
64
+
65
+ result = addon.store_run_data({"foo": "bar"})
66
+
67
+ assert result is None
68
+ addon.client.patch.assert_not_called()
69
+ assert "Run ID not set" in capsys.readouterr().out
70
+
71
+ def test_rejects_non_dict_data(self, addon):
72
+ with pytest.raises(TypeError):
73
+ addon.store_run_data("not a dict")
74
+
75
+ addon.client.patch.assert_not_called()
76
+
77
+
78
+ class TestLoadEventData:
79
+ def test_returns_scratch_when_event_id_set(self, addon):
80
+ addon.event_id = "evt-9"
81
+ addon.client.get.return_value.json.return_value = {"scratch": {"x": 1}}
82
+
83
+ result = addon.load_event_data()
84
+
85
+ addon.client.get.assert_called_once_with("addon_events/evt-9/")
86
+ assert result == {"x": 1}
87
+
88
+ def test_returns_none_when_no_event_id(self, addon):
89
+ assert addon.load_event_data() is None
90
+ addon.client.get.assert_not_called()
91
+
92
+
93
+ class TestStoreEventData:
94
+ def test_patches_event_with_scratch(self, addon):
95
+ addon.event_id = "evt-9"
96
+
97
+ addon.store_event_data({"x": 1})
98
+
99
+ addon.client.patch.assert_called_once_with(
100
+ "addon_events/evt-9/", json={"scratch": {"x": 1}}
101
+ )
102
+
103
+ def test_no_op_when_no_event_id(self, addon):
104
+ assert addon.store_event_data({"x": 1}) is None
105
+ addon.client.patch.assert_not_called()
106
+
107
+
108
+ @pytest.fixture
109
+ def real_addon(client, addon_run):
110
+ """An AddOn wired to the real `client` fixture and a freshly created run."""
111
+ instance = AddOn.__new__(AddOn)
112
+ instance.id = addon_run
113
+ instance.addon_id = None
114
+ instance.event_id = None
115
+ instance.documents = None
116
+ instance.query = None
117
+ instance.user_id = None
118
+ instance.org_id = None
119
+ instance.data = {}
120
+ instance.title = "Test AddOn"
121
+ instance.client = client
122
+ return instance
123
+
124
+
125
+ class TestRunDataVCR:
126
+ """VCR-recorded round-trip tests against the dev DC.
127
+
128
+ Recording: set DC_TEST_ADDON_RUN_ID to an existing AddOnRun UUID on your
129
+ local dev DC, then run `make test-dev` (or `pytest --record-mode=new_episodes`).
130
+ """
131
+
132
+ def test_load_run_data_returns_dict(self, real_addon):
133
+ result = real_addon.load_run_data()
134
+ assert isinstance(result, dict)
135
+
136
+ def test_store_then_load_run_data_round_trip(self, real_addon):
137
+ payload = {"foo": "bar", "n": 42}
138
+ real_addon.store_run_data(payload)
139
+ loaded = real_addon.load_run_data()
140
+ assert loaded.get("foo") == "bar"
141
+ assert loaded.get("n") == 42
@@ -158,9 +158,7 @@ class TestDocument:
158
158
 
159
159
  class TestDocumentClient:
160
160
  def test_search(self, client, document):
161
- documents = client.documents.search(
162
- f"document:{document.id} simple"
163
- )
161
+ documents = client.documents.search(f"document:{document.id} simple")
164
162
  assert documents
165
163
 
166
164
  def test_list(self, client):
@@ -182,7 +180,6 @@ class TestDocumentClient:
182
180
  document = document_factory(pdf)
183
181
  assert document.status == "success"
184
182
 
185
-
186
183
  def test_upload_file_path(self, document_factory):
187
184
  document = document_factory("tests/test.pdf")
188
185
  assert document.status == "success"
@@ -1,12 +0,0 @@
1
- """
2
- Custom exceptions for python-documentcloud
3
- """
4
-
5
- # pylint: disable=unused-import
6
- # Import exceptions from python-squarelet
7
- from squarelet.exceptions import SquareletError as DocumentCloudError
8
- from squarelet.exceptions import DuplicateObjectError
9
- from squarelet.exceptions import CredentialsFailedError
10
- from squarelet.exceptions import APIError
11
- from squarelet.exceptions import DoesNotExistError
12
- from squarelet.exceptions import MultipleObjectsReturnedError