arkindex-base-worker 0.5.0a1__py3-none-any.whl → 0.5.0a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: arkindex-base-worker
3
- Version: 0.5.0a1
3
+ Version: 0.5.0a3
4
4
  Summary: Base Worker to easily build Arkindex ML workflows
5
5
  Author-email: Teklia <contact@teklia.com>
6
6
  Maintainer-email: Teklia <contact@teklia.com>
@@ -46,7 +46,7 @@ Requires-Dist: peewee~=3.17
46
46
  Requires-Dist: Pillow==11.0.0
47
47
  Requires-Dist: python-gnupg==0.5.3
48
48
  Requires-Dist: shapely==2.0.6
49
- Requires-Dist: teklia-toolbox==0.1.7
49
+ Requires-Dist: teklia-toolbox==0.1.8
50
50
  Requires-Dist: zstandard==0.23.0
51
51
  Provides-Extra: docs
52
52
  Requires-Dist: black==24.10.0; extra == "docs"
@@ -1,21 +1,21 @@
1
- arkindex_worker/__init__.py,sha256=OlgCtTC9MaWeejviY0a3iQpALcRQGMVArFVVYwTF6I8,162
2
- arkindex_worker/cache.py,sha256=qTblc_zKdYC47Wip6_O9Jf5qBkQW2ozQQrg-nsx1WuY,11221
3
- arkindex_worker/image.py,sha256=D4CdTZKbzFULdRNy-flsilAdfNPP2WSV01dkxQnfGeA,20770
1
+ arkindex_worker/__init__.py,sha256=Sdt5KXn8EgURb2MurYVrUWaHbH3iFA1XLRo0Lc5AJ44,250
2
+ arkindex_worker/cache.py,sha256=NpCsYFnqBmyBrACqeV7c3P6j6YrTtyi-HgtewwxUpxc,11221
3
+ arkindex_worker/image.py,sha256=-oBhLqzTRsxJoXkzDYFw1Ic4JxQtpmNtzGxe1zOHotw,20980
4
4
  arkindex_worker/models.py,sha256=bPQzGZNs5a6z6DEcygsa8T33VOqPlMUbwKzHqlKzwbw,9923
5
5
  arkindex_worker/utils.py,sha256=MbbJT8oh8DMHHR-vidFeXdUH0TSXGWm7ZDGWzrRXoEY,9933
6
- arkindex_worker/worker/__init__.py,sha256=0_YHeOe31KR_8ynbnYMIMwnSQTVbKkkeLGmnlTMhFx0,16234
7
- arkindex_worker/worker/base.py,sha256=7Pmw-UQSxV-xkW8NO5cXsxJ8W8szzyppMaNjq_az81A,19844
8
- arkindex_worker/worker/classification.py,sha256=zECSNzGCZFzoPoDVZN4kuGYRNLzMQLBaRt3q1jnBSaA,10952
9
- arkindex_worker/worker/corpus.py,sha256=0TQFOwZ6Te-CZi6lgkZY1wzyJ5wO9LAmcVQtqHvZpPk,2291
10
- arkindex_worker/worker/dataset.py,sha256=LwzKwNFX4FqfLxh29LSvJydPwRw3VHaB1wjuFhUshsE,5267
11
- arkindex_worker/worker/element.py,sha256=Qvvq9kJnAHNATHW7zi96eIY1x-0MsR-T5rrSJg6e9Y4,45309
12
- arkindex_worker/worker/entity.py,sha256=DG8oVAdy-r18fliTjnzGI1j6l7SOFmyIBmE6JlE6A8g,14799
13
- arkindex_worker/worker/image.py,sha256=t_Az6IGnj0EZyvcA4XxfPikOUjn_pztgsyxTkFZhaXU,621
14
- arkindex_worker/worker/metadata.py,sha256=VRajtd2kaBvar9GercX4knvR6l1WFYjoCdJWU9ccKgk,7291
6
+ arkindex_worker/worker/__init__.py,sha256=3adK1-BDv6uCKUmY0cqaz7LuEJChDHyNmRVPCA1y8lA,16238
7
+ arkindex_worker/worker/base.py,sha256=MbZW9WmSjwh0yKeMckyjm6WxHI9xpfFhWsx5AOzQ0aY,19844
8
+ arkindex_worker/worker/classification.py,sha256=gu_xAkGgvoebbG1xCZ4P7DqYe6cHwTGiHUc9nG0__8A,10996
9
+ arkindex_worker/worker/corpus.py,sha256=MeIMod7jkWyX0frtD0a37rhumnMV3p9ZOC1xwAoXrAA,2291
10
+ arkindex_worker/worker/dataset.py,sha256=tVaPx43vaH-KTtx4w5V06e26ha8XPfiJTRzBXlu928Y,5273
11
+ arkindex_worker/worker/element.py,sha256=0zwODtutkX4AIeSKe0wV9cmNeIZ5cRwTqnuFrVQmKOw,45403
12
+ arkindex_worker/worker/entity.py,sha256=5OaP3HBrA-jbP-3xF-_TpaYh5WxMlQAUUaGLmhynpEE,14833
13
+ arkindex_worker/worker/image.py,sha256=L6Ikuf0Z0RxJk7JarY5PggJGrYSHLaPK0vn0dy0CIaQ,623
14
+ arkindex_worker/worker/metadata.py,sha256=mb9hVU-nRw3drCN-0AvtZ0nPY-4tD-ye9_mVy6icbk4,7309
15
15
  arkindex_worker/worker/process.py,sha256=9TEHpMcBax1wc6PrWMMrdXe2uNfqyVj7n_dAYZRBGnY,1854
16
- arkindex_worker/worker/task.py,sha256=r1j7_qbdNu2Z8H8HbGzO3P3qdx-2N1pBbUPFDca0rqg,1519
17
- arkindex_worker/worker/training.py,sha256=H8FmCdzGcDW-WMMwcgvmZPlN5tPHwGo0BXn12qmzj8g,10875
18
- arkindex_worker/worker/transcription.py,sha256=52RY9kYsiR1sz9FxOigyo12Ker3VDbQ4U42gK9DpR3g,21146
16
+ arkindex_worker/worker/task.py,sha256=nYfMSFm_d-4t8y4PO4HjFBnLsZf7IsDjkS7-A2Pgnac,1525
17
+ arkindex_worker/worker/training.py,sha256=tyQOHcwv--_wdYz6CgLEe1YM7kwwwKN30LvGTsnWd78,10923
18
+ arkindex_worker/worker/transcription.py,sha256=sw718R119tsLNY8inPMVeIilvFJo94fMbMtYgH0zTM8,21250
19
19
  arkindex_worker/worker/version.py,sha256=JIT7OI3Mo7RPkNrjOB9hfqrsG-FYygz_zi4l8PbkuAo,1960
20
20
  hooks/pre_gen_project.py,sha256=xQJERv3vv9VzIqcBHI281eeWLWREXUF4mMw7PvJHHXM,269
21
21
  tests/__init__.py,sha256=DG--S6IpGl399rzSAjDdHL76CkOIeZIjajCcyUSDhOQ,241
@@ -43,7 +43,7 @@ tests/test_elements_worker/test_image.py,sha256=BljMNKgec_9a5bzNzFpYZIvSbuvwsWDf
43
43
  tests/test_elements_worker/test_metadata.py,sha256=Xfggy-vxw5DZ3hFKx3sB7OYb2d1tu1RiNK8fvKJIaBs,22294
44
44
  tests/test_elements_worker/test_process.py,sha256=y4RoVhPfyHzR795fw7-_FXElBcKo3fy4Ew_HI-kxJic,3088
45
45
  tests/test_elements_worker/test_task.py,sha256=wTUWqN9UhfKmJn3IcFY75EW4I1ulRhisflmY1kmP47s,5574
46
- tests/test_elements_worker/test_training.py,sha256=3W2LzpqxekvRiX42m_PvWcVel7ynQJmzO8gKcLmCMQI,8717
46
+ tests/test_elements_worker/test_training.py,sha256=qgK7BLucddRzc8ePbQtY75x17QvGDEq5XCwgyyvmAJE,8717
47
47
  tests/test_elements_worker/test_transcription_create.py,sha256=yznO9B_BVsOR0Z_VY5ZL8gJp0ZPCz_4sPUs5dXtixAg,29281
48
48
  tests/test_elements_worker/test_transcription_create_with_elements.py,sha256=tmcyglgssEqMnt1Mdy_u6X1m2wgLWTo_HdWst3GrK2k,33056
49
49
  tests/test_elements_worker/test_transcription_list.py,sha256=ikz7HYPCoQWTdTRCd382SB-y-T2BbigPLlIcx5Eow-I,15324
@@ -54,8 +54,8 @@ worker-demo/tests/conftest.py,sha256=XzNMNeg6pmABUAH8jN6eZTlZSFGLYjS3-DTXjiRN6Yc
54
54
  worker-demo/tests/test_worker.py,sha256=3DLd4NRK4bfyatG5P_PK4k9P9tJHx9XQq5_ryFEEFVg,304
55
55
  worker-demo/worker_demo/__init__.py,sha256=2BPomV8ZMNf3YXJgloatKeHQCE6QOkwmsHGkO6MkQuM,125
56
56
  worker-demo/worker_demo/worker.py,sha256=Rt-DjWa5iBP08k58NDZMfeyPuFbtNcbX6nc5jFX7GNo,440
57
- arkindex_base_worker-0.5.0a1.dist-info/LICENSE,sha256=NVshRi1efwVezMfW7xXYLrdDr2Li1AfwfGOd5WuH1kQ,1063
58
- arkindex_base_worker-0.5.0a1.dist-info/METADATA,sha256=eP4wgAkBFUHBWvNVcASdUXsxxz_0AMtQTgjJPuBlCCQ,3336
59
- arkindex_base_worker-0.5.0a1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
60
- arkindex_base_worker-0.5.0a1.dist-info/top_level.txt,sha256=58NuslgxQC2vT4DiqZEgO4JqJRrYa2yeNI9QvkbfGQU,40
61
- arkindex_base_worker-0.5.0a1.dist-info/RECORD,,
57
+ arkindex_base_worker-0.5.0a3.dist-info/LICENSE,sha256=NVshRi1efwVezMfW7xXYLrdDr2Li1AfwfGOd5WuH1kQ,1063
58
+ arkindex_base_worker-0.5.0a3.dist-info/METADATA,sha256=n7vTSWi9CRDbbL49E5IDV9T2J_hZmv0LaxoObBBvCeg,3336
59
+ arkindex_base_worker-0.5.0a3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
60
+ arkindex_base_worker-0.5.0a3.dist-info/top_level.txt,sha256=58NuslgxQC2vT4DiqZEgO4JqJRrYa2yeNI9QvkbfGQU,40
61
+ arkindex_base_worker-0.5.0a3.dist-info/RECORD,,
@@ -1,3 +1,4 @@
1
+ import importlib.metadata
1
2
  import logging
2
3
 
3
4
  logging.basicConfig(
@@ -5,3 +6,5 @@ logging.basicConfig(
5
6
  format="%(asctime)s %(levelname)s/%(name)s: %(message)s",
6
7
  )
7
8
  logger = logging.getLogger(__name__)
9
+
10
+ VERSION = importlib.metadata.version("arkindex-base-worker")
arkindex_worker/cache.py CHANGED
@@ -327,9 +327,9 @@ def check_version(cache_path: str | Path):
327
327
  except OperationalError:
328
328
  version = None
329
329
 
330
- assert (
331
- version == SQL_VERSION
332
- ), f"The SQLite database {cache_path} does not have the correct cache version, it should be {SQL_VERSION}"
330
+ assert version == SQL_VERSION, (
331
+ f"The SQLite database {cache_path} does not have the correct cache version, it should be {SQL_VERSION}"
332
+ )
333
333
 
334
334
 
335
335
  def merge_parents_cache(paths: list, current_database: Path):
arkindex_worker/image.py CHANGED
@@ -27,7 +27,7 @@ from tenacity import (
27
27
  wait_exponential,
28
28
  )
29
29
 
30
- from arkindex_worker import logger
30
+ from arkindex_worker import VERSION, logger
31
31
  from arkindex_worker.utils import pluralize
32
32
  from teklia_toolbox.requests import should_verify_cert
33
33
 
@@ -41,6 +41,8 @@ DOWNLOAD_TIMEOUT = (30, 60)
41
41
 
42
42
  BoundingBox = namedtuple("BoundingBox", ["x", "y", "width", "height"])
43
43
 
44
+ # Specific User-Agent to bypass potential server limitations
45
+ IIIF_USER_AGENT = f"Teklia/Workers {VERSION}"
44
46
  # To parse IIIF Urls
45
47
  IIIF_URL = re.compile(r"\w+:\/{2}.+\/.+\/.+\/.+\/(?P<size>.+)\/!?\d+\/\w+\.\w+")
46
48
  # Full size of the region
@@ -326,7 +328,7 @@ def polygon_bounding_box(polygon: list[list[int | float]]) -> BoundingBox:
326
328
  def _retry_log(retry_state, *args, **kwargs):
327
329
  logger.warning(
328
330
  f"Request to {retry_state.args[0]} failed ({repr(retry_state.outcome.exception())}), "
329
- f'retrying in {retry_state.idle_for} {pluralize("second", retry_state.idle_for)}'
331
+ f"retrying in {retry_state.idle_for} {pluralize('second', retry_state.idle_for)}"
330
332
  )
331
333
 
332
334
 
@@ -339,7 +341,12 @@ def _retry_log(retry_state, *args, **kwargs):
339
341
  )
340
342
  def _retried_request(url, *args, method=requests.get, **kwargs):
341
343
  resp = method(
342
- url, *args, timeout=DOWNLOAD_TIMEOUT, verify=should_verify_cert(url), **kwargs
344
+ url,
345
+ *args,
346
+ headers={"User-Agent": IIIF_USER_AGENT},
347
+ timeout=DOWNLOAD_TIMEOUT,
348
+ verify=should_verify_cert(url),
349
+ **kwargs,
343
350
  )
344
351
  resp.raise_for_status()
345
352
  return resp
@@ -359,9 +366,9 @@ def download_tiles(url: str) -> Image:
359
366
 
360
367
  image_width, image_height = info.get("width"), info.get("height")
361
368
  assert image_width and image_height, "Missing image dimensions in info.json"
362
- assert info.get(
363
- "tiles"
364
- ), "Image cannot be retrieved at full size and tiles are not supported"
369
+ assert info.get("tiles"), (
370
+ "Image cannot be retrieved at full size and tiles are not supported"
371
+ )
365
372
 
366
373
  # Take the biggest available tile size
367
374
  tile = sorted(info["tiles"], key=lambda tile: tile.get("width", 0), reverse=True)[0]
@@ -435,15 +442,15 @@ def trim_polygon(
435
442
  is entirely outside of the image's bounds.
436
443
  """
437
444
 
438
- assert isinstance(
439
- polygon, list | tuple
440
- ), "Input polygon must be a valid list or tuple of points."
441
- assert all(
442
- isinstance(point, list | tuple) for point in polygon
443
- ), "Polygon points must be tuples or lists."
444
- assert all(
445
- len(point) == 2 for point in polygon
446
- ), "Polygon points must be tuples or lists of 2 elements."
445
+ assert isinstance(polygon, list | tuple), (
446
+ "Input polygon must be a valid list or tuple of points."
447
+ )
448
+ assert all(isinstance(point, list | tuple) for point in polygon), (
449
+ "Polygon points must be tuples or lists."
450
+ )
451
+ assert all(len(point) == 2 for point in polygon), (
452
+ "Polygon points must be tuples or lists of 2 elements."
453
+ )
447
454
  assert all(
448
455
  isinstance(point[0], int) and isinstance(point[1], int) for point in polygon
449
456
  ), "Polygon point coordinates must be integers."
@@ -494,12 +501,12 @@ def revert_orientation(
494
501
  from arkindex_worker.cache import CachedElement
495
502
  from arkindex_worker.models import Element
496
503
 
497
- assert element and isinstance(
498
- element, Element | CachedElement
499
- ), "element shouldn't be null and should be an Element or CachedElement"
500
- assert polygon and isinstance(
501
- polygon, list
502
- ), "polygon shouldn't be null and should be a list"
504
+ assert element and isinstance(element, Element | CachedElement), (
505
+ "element shouldn't be null and should be an Element or CachedElement"
506
+ )
507
+ assert polygon and isinstance(polygon, list), (
508
+ "polygon shouldn't be null and should be a list"
509
+ )
503
510
  assert isinstance(reverse, bool), "Reverse should be a bool"
504
511
  # Rotating with Pillow can cause it to move the image around, as the image cannot have negative coordinates
505
512
  # and must be a rectangle. This means the origin point of any coordinates from an image is invalid, and the
@@ -507,9 +514,9 @@ def revert_orientation(
507
514
  # To properly undo the mirroring and rotation implicitly applied by open_image, we first need to find the center
508
515
  # of the rotated bounding box.
509
516
  if isinstance(element, Element):
510
- assert (
511
- element.zone and element.zone.polygon
512
- ), "element should have a zone and a polygon"
517
+ assert element.zone and element.zone.polygon, (
518
+ "element should have a zone and a polygon"
519
+ )
513
520
  parent_ring = LinearRing(element.zone.polygon)
514
521
  elif isinstance(element, CachedElement):
515
522
  assert element.polygon, "cached element should have a polygon"
@@ -82,9 +82,9 @@ class ElementsWorker(
82
82
  :return: An iterable of [CachedElement][arkindex_worker.cache.CachedElement] when cache support is enabled,
83
83
  or a list of strings representing element IDs otherwise.
84
84
  """
85
- assert not (
86
- self.args.elements_list and self.args.element
87
- ), "elements-list and element CLI args shouldn't be both set"
85
+ assert not (self.args.elements_list and self.args.element), (
86
+ "elements-list and element CLI args shouldn't be both set"
87
+ )
88
88
 
89
89
  def invalid_element_id(value: str) -> bool:
90
90
  """
@@ -125,9 +125,9 @@ class ElementsWorker(
125
125
  return {item["id"] for item in self.list_process_elements()}
126
126
 
127
127
  invalid_element_ids = list(filter(invalid_element_id, out))
128
- assert (
129
- not invalid_element_ids
130
- ), f"These element IDs are invalid: {', '.join(invalid_element_ids)}"
128
+ assert not invalid_element_ids, (
129
+ f"These element IDs are invalid: {', '.join(invalid_element_ids)}"
130
+ )
131
131
 
132
132
  return out
133
133
 
@@ -144,9 +144,9 @@ class ElementsWorker(
144
144
  # Worker activities are also disabled when running an ElementsWorker in a Dataset process
145
145
  # and when running export processes.
146
146
  return False
147
- assert (
148
- self.process_information
149
- ), "Worker must be configured to access its process activity state"
147
+ assert self.process_information, (
148
+ "Worker must be configured to access its process activity state"
149
+ )
150
150
  return self.process_information.get("activity_state") == "ready"
151
151
 
152
152
  def run(self):
@@ -221,7 +221,7 @@ class ElementsWorker(
221
221
  with contextlib.suppress(Exception):
222
222
  self.update_activity(element.id, ActivityState.Error)
223
223
 
224
- message = f'Ran on {count} {pluralize("element", count)}: {count - failed} completed, {failed} failed'
224
+ message = f"Ran on {count} {pluralize('element', count)}: {count - failed} completed, {failed} failed"
225
225
  if failed:
226
226
  logger.error(message)
227
227
  if failed >= count: # Everything failed!
@@ -256,9 +256,9 @@ class ElementsWorker(
256
256
  )
257
257
  return True
258
258
 
259
- assert element_id and isinstance(
260
- element_id, uuid.UUID | str
261
- ), "element_id shouldn't be null and should be an UUID or str"
259
+ assert element_id and isinstance(element_id, uuid.UUID | str), (
260
+ "element_id shouldn't be null and should be an UUID or str"
261
+ )
262
262
  assert isinstance(state, ActivityState), "state should be an ActivityState"
263
263
 
264
264
  try:
@@ -382,9 +382,9 @@ class DatasetWorker(DatasetMixin, BaseWorker, TaskMixin):
382
382
  failed = 0
383
383
  for i, dataset_set in enumerate(dataset_sets, start=1):
384
384
  try:
385
- assert (
386
- dataset_set.dataset.state == DatasetState.Complete.value
387
- ), "When processing a set, its dataset state should be Complete."
385
+ assert dataset_set.dataset.state == DatasetState.Complete.value, (
386
+ "When processing a set, its dataset state should be Complete."
387
+ )
388
388
 
389
389
  logger.info(f"Retrieving data for {dataset_set} ({i}/{count})")
390
390
  self.download_dataset_artifact(dataset_set.dataset)
@@ -405,7 +405,7 @@ class DatasetWorker(DatasetMixin, BaseWorker, TaskMixin):
405
405
  # Cleanup the latest downloaded dataset artifact
406
406
  self.cleanup_downloaded_artifact()
407
407
 
408
- message = f'Ran on {count} {pluralize("set", count)}: {count - failed} completed, {failed} failed'
408
+ message = f"Ran on {count} {pluralize('set', count)}: {count - failed} completed, {failed} failed"
409
409
  if failed:
410
410
  logger.error(message)
411
411
  if failed >= count: # Everything failed!
@@ -305,9 +305,9 @@ class BaseWorker:
305
305
 
306
306
  if self.use_cache:
307
307
  if self.args.database is not None:
308
- assert (
309
- self.args.database.is_file()
310
- ), f"Database in {self.args.database} does not exist"
308
+ assert self.args.database.is_file(), (
309
+ f"Database in {self.args.database} does not exist"
310
+ )
311
311
  self.cache_path = self.args.database
312
312
  else:
313
313
  cache_dir = self.task_data_dir / self.task_id
@@ -378,9 +378,9 @@ class BaseWorker:
378
378
  gpg = gnupg.GPG()
379
379
  with path.open("rb") as gpg_file:
380
380
  decrypted = gpg.decrypt_file(gpg_file)
381
- assert (
382
- decrypted.ok
383
- ), f"GPG error: {decrypted.status} - {decrypted.stderr}"
381
+ assert decrypted.ok, (
382
+ f"GPG error: {decrypted.status} - {decrypted.stderr}"
383
+ )
384
384
  secret = decrypted.data.decode("utf-8")
385
385
  logging.info(f"Loaded local secret {name}")
386
386
  except Exception as e:
@@ -27,7 +27,7 @@ class ClassificationMixin:
27
27
  )
28
28
  self.classes = {ml_class["name"]: ml_class["id"] for ml_class in corpus_classes}
29
29
  logger.info(
30
- f'Loaded {len(self.classes)} ML {pluralize("class", len(self.classes))} in corpus ({self.corpus_id})'
30
+ f"Loaded {len(self.classes)} ML {pluralize('class', len(self.classes))} in corpus ({self.corpus_id})"
31
31
  )
32
32
 
33
33
  def get_ml_class_id(self, ml_class: str) -> str:
@@ -60,9 +60,9 @@ class ClassificationMixin:
60
60
  f"Reloading corpus classes to see if {ml_class} already exists"
61
61
  )
62
62
  self.load_corpus_classes()
63
- assert (
64
- ml_class in self.classes
65
- ), "Missing class {ml_class} even after reloading"
63
+ assert ml_class in self.classes, (
64
+ "Missing class {ml_class} even after reloading"
65
+ )
66
66
  ml_class_id = self.classes[ml_class]
67
67
 
68
68
  return ml_class_id
@@ -86,9 +86,9 @@ class ClassificationMixin:
86
86
  ),
87
87
  None,
88
88
  )
89
- assert (
90
- ml_class_name is not None
91
- ), f"Missing class with id ({ml_class_id}) in corpus ({self.corpus_id})"
89
+ assert ml_class_name is not None, (
90
+ f"Missing class with id ({ml_class_id}) in corpus ({self.corpus_id})"
91
+ )
92
92
  return ml_class_name
93
93
 
94
94
  def create_classification(
@@ -107,18 +107,18 @@ class ClassificationMixin:
107
107
  :param high_confidence: Whether or not the classification is of high confidence.
108
108
  :returns: The created classification, as returned by the ``CreateClassification`` API endpoint.
109
109
  """
110
- assert element and isinstance(
111
- element, Element | CachedElement
112
- ), "element shouldn't be null and should be an Element or CachedElement"
113
- assert ml_class and isinstance(
114
- ml_class, str
115
- ), "ml_class shouldn't be null and should be of type str"
116
- assert (
117
- isinstance(confidence, float) and 0 <= confidence <= 1
118
- ), "confidence shouldn't be null and should be a float in [0..1] range"
119
- assert isinstance(
120
- high_confidence, bool
121
- ), "high_confidence shouldn't be null and should be of type bool"
110
+ assert element and isinstance(element, Element | CachedElement), (
111
+ "element shouldn't be null and should be an Element or CachedElement"
112
+ )
113
+ assert ml_class and isinstance(ml_class, str), (
114
+ "ml_class shouldn't be null and should be of type str"
115
+ )
116
+ assert isinstance(confidence, float) and 0 <= confidence <= 1, (
117
+ "confidence shouldn't be null and should be a float in [0..1] range"
118
+ )
119
+ assert isinstance(high_confidence, bool), (
120
+ "high_confidence shouldn't be null and should be of type bool"
121
+ )
122
122
  if self.is_read_only:
123
123
  logger.warning(
124
124
  "Cannot create classification as this worker is in read-only mode"
@@ -198,31 +198,33 @@ class ClassificationMixin:
198
198
  :returns: List of created classifications, as returned in the ``classifications`` field by
199
199
  the ``CreateClassifications`` API endpoint.
200
200
  """
201
- assert element and isinstance(
202
- element, Element | CachedElement
203
- ), "element shouldn't be null and should be an Element or CachedElement"
204
- assert classifications and isinstance(
205
- classifications, list
206
- ), "classifications shouldn't be null and should be of type list"
201
+ assert element and isinstance(element, Element | CachedElement), (
202
+ "element shouldn't be null and should be an Element or CachedElement"
203
+ )
204
+ assert classifications and isinstance(classifications, list), (
205
+ "classifications shouldn't be null and should be of type list"
206
+ )
207
207
 
208
208
  for index, classification in enumerate(classifications):
209
209
  ml_class = classification.get("ml_class")
210
- assert (
211
- ml_class and isinstance(ml_class, str)
212
- ), f"Classification at index {index} in classifications: ml_class shouldn't be null and should be of type str"
210
+ assert ml_class and isinstance(ml_class, str), (
211
+ f"Classification at index {index} in classifications: ml_class shouldn't be null and should be of type str"
212
+ )
213
213
 
214
214
  confidence = classification.get("confidence")
215
215
  assert (
216
216
  confidence is not None
217
217
  and isinstance(confidence, float)
218
218
  and 0 <= confidence <= 1
219
- ), f"Classification at index {index} in classifications: confidence shouldn't be null and should be a float in [0..1] range"
219
+ ), (
220
+ f"Classification at index {index} in classifications: confidence shouldn't be null and should be a float in [0..1] range"
221
+ )
220
222
 
221
223
  high_confidence = classification.get("high_confidence")
222
224
  if high_confidence is not None:
223
- assert isinstance(
224
- high_confidence, bool
225
- ), f"Classification at index {index} in classifications: high_confidence should be of type bool"
225
+ assert isinstance(high_confidence, bool), (
226
+ f"Classification at index {index} in classifications: high_confidence should be of type bool"
227
+ )
226
228
 
227
229
  if self.is_read_only:
228
230
  logger.warning(
@@ -76,9 +76,9 @@ class CorpusMixin:
76
76
  key=itemgetter("updated"),
77
77
  reverse=True,
78
78
  )
79
- assert (
80
- len(exports) > 0
81
- ), f'No available exports found for the corpus ({self.corpus_id}) with state "{CorpusExportState.Done.value.capitalize()}".'
79
+ assert len(exports) > 0, (
80
+ f'No available exports found for the corpus ({self.corpus_id}) with state "{CorpusExportState.Done.value.capitalize()}".'
81
+ )
82
82
 
83
83
  # Download latest export
84
84
  export_id: str = exports[0]["id"]
@@ -113,9 +113,9 @@ class DatasetMixin:
113
113
  :param dataset_set: Set to find elements in.
114
114
  :returns: An iterator of Element built from the ``ListDatasetElements`` API endpoint.
115
115
  """
116
- assert dataset_set and isinstance(
117
- dataset_set, Set
118
- ), "dataset_set shouldn't be null and should be a Set"
116
+ assert dataset_set and isinstance(dataset_set, Set), (
117
+ "dataset_set shouldn't be null and should be a Set"
118
+ )
119
119
 
120
120
  results = self.api_client.paginate(
121
121
  "ListDatasetElements", id=dataset_set.dataset.id, set=dataset_set.name
@@ -152,12 +152,12 @@ class DatasetMixin:
152
152
  :param state: State of the dataset.
153
153
  :returns: The updated ``Dataset`` object from the ``PartialUpdateDataset`` API endpoint.
154
154
  """
155
- assert dataset and isinstance(
156
- dataset, Dataset
157
- ), "dataset shouldn't be null and should be a Dataset"
158
- assert state and isinstance(
159
- state, DatasetState
160
- ), "state shouldn't be null and should be a str from DatasetState"
155
+ assert dataset and isinstance(dataset, Dataset), (
156
+ "dataset shouldn't be null and should be a Dataset"
157
+ )
158
+ assert state and isinstance(state, DatasetState), (
159
+ "state shouldn't be null and should be a str from DatasetState"
160
+ )
161
161
 
162
162
  if self.is_read_only:
163
163
  logger.warning("Cannot update dataset as this worker is in read-only mode")