arkindex-base-worker 0.3.6rc1__tar.gz → 0.3.6rc2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. arkindex-base-worker-0.3.6rc2/PKG-INFO +39 -0
  2. arkindex-base-worker-0.3.6rc2/arkindex_base_worker.egg-info/PKG-INFO +39 -0
  3. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/arkindex_base_worker.egg-info/SOURCES.txt +0 -2
  4. arkindex-base-worker-0.3.6rc2/arkindex_base_worker.egg-info/requires.txt +9 -0
  5. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/arkindex_worker/__init__.py +0 -1
  6. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/arkindex_worker/cache.py +19 -25
  7. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/arkindex_worker/image.py +16 -17
  8. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/arkindex_worker/models.py +17 -21
  9. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/arkindex_worker/utils.py +16 -17
  10. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/arkindex_worker/worker/__init__.py +14 -23
  11. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/arkindex_worker/worker/base.py +12 -7
  12. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/arkindex_worker/worker/classification.py +13 -15
  13. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/arkindex_worker/worker/dataset.py +3 -4
  14. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/arkindex_worker/worker/element.py +80 -75
  15. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/arkindex_worker/worker/entity.py +27 -29
  16. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/arkindex_worker/worker/metadata.py +19 -25
  17. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/arkindex_worker/worker/task.py +2 -3
  18. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/arkindex_worker/worker/training.py +21 -22
  19. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/arkindex_worker/worker/transcription.py +37 -34
  20. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/arkindex_worker/worker/version.py +1 -2
  21. arkindex-base-worker-0.3.6rc2/pyproject.toml +83 -0
  22. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/setup.py +2 -12
  23. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/tests/conftest.py +55 -75
  24. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/tests/test_base_worker.py +37 -31
  25. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/tests/test_cache.py +14 -7
  26. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/tests/test_dataset_worker.py +4 -4
  27. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/tests/test_element.py +0 -1
  28. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/tests/test_elements_worker/__init__.py +0 -1
  29. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/tests/test_elements_worker/test_classifications.py +0 -1
  30. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/tests/test_elements_worker/test_cli.py +22 -17
  31. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/tests/test_elements_worker/test_dataset.py +9 -10
  32. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/tests/test_elements_worker/test_elements.py +58 -63
  33. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/tests/test_elements_worker/test_entities.py +10 -20
  34. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/tests/test_elements_worker/test_metadata.py +72 -96
  35. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/tests/test_elements_worker/test_task.py +9 -10
  36. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/tests/test_elements_worker/test_training.py +20 -13
  37. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/tests/test_elements_worker/test_transcriptions.py +6 -10
  38. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/tests/test_elements_worker/test_worker.py +16 -14
  39. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/tests/test_image.py +21 -20
  40. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/tests/test_merge.py +5 -6
  41. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/tests/test_utils.py +0 -1
  42. arkindex-base-worker-0.3.6rc1/PKG-INFO +0 -26
  43. arkindex-base-worker-0.3.6rc1/arkindex_base_worker.egg-info/PKG-INFO +0 -26
  44. arkindex-base-worker-0.3.6rc1/arkindex_base_worker.egg-info/requires.txt +0 -19
  45. arkindex-base-worker-0.3.6rc1/arkindex_worker/git.py +0 -392
  46. arkindex-base-worker-0.3.6rc1/pyproject.toml +0 -24
  47. arkindex-base-worker-0.3.6rc1/tests/test_git.py +0 -480
  48. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/README.md +0 -0
  49. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/arkindex_base_worker.egg-info/dependency_links.txt +0 -0
  50. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/arkindex_base_worker.egg-info/top_level.txt +0 -0
  51. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/setup.cfg +0 -0
  52. {arkindex-base-worker-0.3.6rc1 → arkindex-base-worker-0.3.6rc2}/tests/__init__.py +0 -0
@@ -0,0 +1,39 @@
1
+ Metadata-Version: 2.1
2
+ Name: arkindex-base-worker
3
+ Version: 0.3.6rc2
4
+ Summary: Base Worker to easily build Arkindex ML workflows
5
+ Author-email: Teklia <contact@teklia.com>
6
+ Maintainer-email: Teklia <contact@teklia.com>
7
+ Project-URL: Homepage, https://workers.arkindex.org
8
+ Project-URL: Documentation, https://workers.arkindex.org
9
+ Project-URL: Repository, https://gitlab.teklia.com/workers/base-worker
10
+ Project-URL: Bug Tracker, https://gitlab.teklia.com/workers/base-worker/issues
11
+ Project-URL: Authors, https://teklia.com
12
+ Keywords: python
13
+ Classifier: Development Status :: 5 - Production/Stable
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3 :: Only
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Topic :: Text Processing :: Linguistic
19
+ Requires-Python: >=3.10
20
+ Description-Content-Type: text/markdown
21
+ Provides-Extra: docs
22
+ Requires-Dist: black==23.11.0; extra == "docs"
23
+ Requires-Dist: doc8==1.1.1; extra == "docs"
24
+ Requires-Dist: mkdocs==1.5.3; extra == "docs"
25
+ Requires-Dist: mkdocs-material==9.4.8; extra == "docs"
26
+ Requires-Dist: mkdocstrings==0.23.0; extra == "docs"
27
+ Requires-Dist: mkdocstrings-python==1.7.3; extra == "docs"
28
+ Requires-Dist: recommonmark==0.7.1; extra == "docs"
29
+
30
+ # Arkindex base Worker
31
+
32
+ An easy to use Python 3 high level API client, to build ML tasks.
33
+
34
+ ## Create a new worker using our template
35
+
36
+ ```
37
+ pip install --user cookiecutter
38
+ cookiecutter git@gitlab.teklia.com:workers/base-worker.git
39
+ ```
@@ -0,0 +1,39 @@
1
+ Metadata-Version: 2.1
2
+ Name: arkindex-base-worker
3
+ Version: 0.3.6rc2
4
+ Summary: Base Worker to easily build Arkindex ML workflows
5
+ Author-email: Teklia <contact@teklia.com>
6
+ Maintainer-email: Teklia <contact@teklia.com>
7
+ Project-URL: Homepage, https://workers.arkindex.org
8
+ Project-URL: Documentation, https://workers.arkindex.org
9
+ Project-URL: Repository, https://gitlab.teklia.com/workers/base-worker
10
+ Project-URL: Bug Tracker, https://gitlab.teklia.com/workers/base-worker/issues
11
+ Project-URL: Authors, https://teklia.com
12
+ Keywords: python
13
+ Classifier: Development Status :: 5 - Production/Stable
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3 :: Only
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Topic :: Text Processing :: Linguistic
19
+ Requires-Python: >=3.10
20
+ Description-Content-Type: text/markdown
21
+ Provides-Extra: docs
22
+ Requires-Dist: black==23.11.0; extra == "docs"
23
+ Requires-Dist: doc8==1.1.1; extra == "docs"
24
+ Requires-Dist: mkdocs==1.5.3; extra == "docs"
25
+ Requires-Dist: mkdocs-material==9.4.8; extra == "docs"
26
+ Requires-Dist: mkdocstrings==0.23.0; extra == "docs"
27
+ Requires-Dist: mkdocstrings-python==1.7.3; extra == "docs"
28
+ Requires-Dist: recommonmark==0.7.1; extra == "docs"
29
+
30
+ # Arkindex base Worker
31
+
32
+ An easy to use Python 3 high level API client, to build ML tasks.
33
+
34
+ ## Create a new worker using our template
35
+
36
+ ```
37
+ pip install --user cookiecutter
38
+ cookiecutter git@gitlab.teklia.com:workers/base-worker.git
39
+ ```
@@ -9,7 +9,6 @@ arkindex_base_worker.egg-info/requires.txt
9
9
  arkindex_base_worker.egg-info/top_level.txt
10
10
  arkindex_worker/__init__.py
11
11
  arkindex_worker/cache.py
12
- arkindex_worker/git.py
13
12
  arkindex_worker/image.py
14
13
  arkindex_worker/models.py
15
14
  arkindex_worker/utils.py
@@ -30,7 +29,6 @@ tests/test_base_worker.py
30
29
  tests/test_cache.py
31
30
  tests/test_dataset_worker.py
32
31
  tests/test_element.py
33
- tests/test_git.py
34
32
  tests/test_image.py
35
33
  tests/test_merge.py
36
34
  tests/test_utils.py
@@ -0,0 +1,9 @@
1
+
2
+ [docs]
3
+ black==23.11.0
4
+ doc8==1.1.1
5
+ mkdocs==1.5.3
6
+ mkdocs-material==9.4.8
7
+ mkdocstrings==0.23.0
8
+ mkdocstrings-python==1.7.3
9
+ recommonmark==0.7.1
@@ -1,4 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
1
  import logging
3
2
 
4
3
  logging.basicConfig(
@@ -1,4 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
1
  """
3
2
  Database mappings and helper methods for the experimental worker caching feature.
4
3
 
@@ -10,7 +9,6 @@ reducing network usage.
10
9
  import json
11
10
  import sqlite3
12
11
  from pathlib import Path
13
- from typing import Optional, Union
14
12
 
15
13
  from peewee import (
16
14
  SQL,
@@ -106,8 +104,8 @@ class CachedElement(Model):
106
104
  def open_image(
107
105
  self,
108
106
  *args,
109
- max_width: Optional[int] = None,
110
- max_height: Optional[int] = None,
107
+ max_width: int | None = None,
108
+ max_height: int | None = None,
111
109
  **kwargs,
112
110
  ) -> Image:
113
111
  """
@@ -145,17 +143,15 @@ class CachedElement(Model):
145
143
  if max_width is None and max_height is None:
146
144
  resize = "full"
147
145
  else:
148
- # Do not resize for polygons that do not exactly match the images
149
- # as the resize is made directly by the IIIF server using the box parameter
150
146
  if (
147
+ # Do not resize for polygons that do not exactly match the images
148
+ # as the resize is made directly by the IIIF server using the box parameter
151
149
  bounding_box.width != self.image.width
152
150
  or bounding_box.height != self.image.height
153
- ):
154
- resize = "full"
155
-
156
- # Do not resize when the image is below the maximum size
157
- elif (max_width is None or self.image.width <= max_width) and (
158
- max_height is None or self.image.height <= max_height
151
+ ) or (
152
+ # Do not resize when the image is below the maximum size
153
+ (max_width is None or self.image.width <= max_width)
154
+ and (max_height is None or self.image.height <= max_height)
159
155
  ):
160
156
  resize = "full"
161
157
  else:
@@ -319,22 +315,21 @@ def create_version_table():
319
315
  Version.create(version=SQL_VERSION)
320
316
 
321
317
 
322
- def check_version(cache_path: Union[str, Path]):
318
+ def check_version(cache_path: str | Path):
323
319
  """
324
320
  Check the validity of the SQLite version
325
321
 
326
322
  :param cache_path: Path towards a local SQLite database
327
323
  """
328
- with SqliteDatabase(cache_path) as provided_db:
329
- with provided_db.bind_ctx([Version]):
330
- try:
331
- version = Version.get().version
332
- except OperationalError:
333
- version = None
324
+ with SqliteDatabase(cache_path) as provided_db, provided_db.bind_ctx([Version]):
325
+ try:
326
+ version = Version.get().version
327
+ except OperationalError:
328
+ version = None
334
329
 
335
- assert (
336
- version == SQL_VERSION
337
- ), f"The SQLite database {cache_path} does not have the correct cache version, it should be {SQL_VERSION}"
330
+ assert (
331
+ version == SQL_VERSION
332
+ ), f"The SQLite database {cache_path} does not have the correct cache version, it should be {SQL_VERSION}"
338
333
 
339
334
 
340
335
  def merge_parents_cache(paths: list, current_database: Path):
@@ -358,9 +353,8 @@ def merge_parents_cache(paths: list, current_database: Path):
358
353
  # Check that the parent cache uses a compatible version
359
354
  check_version(path)
360
355
 
361
- with SqliteDatabase(path) as source:
362
- with source.bind_ctx(MODELS):
363
- source.create_tables(MODELS)
356
+ with SqliteDatabase(path) as source, source.bind_ctx(MODELS):
357
+ source.create_tables(MODELS)
364
358
 
365
359
  logger.info(f"Merging parent db {path} into {current_database}")
366
360
  statements = [
@@ -1,4 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
1
  """
3
2
  Helper methods to download and open IIIF images, and manage polygons.
4
3
  """
@@ -7,7 +6,7 @@ from collections import namedtuple
7
6
  from io import BytesIO
8
7
  from math import ceil
9
8
  from pathlib import Path
10
- from typing import TYPE_CHECKING, List, Optional, Union
9
+ from typing import TYPE_CHECKING
11
10
 
12
11
  import requests
13
12
  from PIL import Image
@@ -42,9 +41,9 @@ IIIF_MAX = "max"
42
41
 
43
42
  def open_image(
44
43
  path: str,
45
- mode: Optional[str] = "RGB",
46
- rotation_angle: Optional[int] = 0,
47
- mirrored: Optional[bool] = False,
44
+ mode: str | None = "RGB",
45
+ rotation_angle: int | None = 0,
46
+ mirrored: bool | None = False,
48
47
  ) -> Image:
49
48
  """
50
49
  Open an image from a path or a URL.
@@ -71,7 +70,7 @@ def open_image(
71
70
  else:
72
71
  try:
73
72
  image = Image.open(path)
74
- except (IOError, ValueError):
73
+ except (OSError, ValueError):
75
74
  image = download_image(path)
76
75
 
77
76
  if image.mode != mode:
@@ -141,14 +140,14 @@ def download_image(url: str) -> Image:
141
140
  return image
142
141
 
143
142
 
144
- def polygon_bounding_box(polygon: List[List[Union[int, float]]]) -> BoundingBox:
143
+ def polygon_bounding_box(polygon: list[list[int | float]]) -> BoundingBox:
145
144
  """
146
145
  Compute the rectangle bounding box of a polygon.
147
146
 
148
147
  :param polygon: Polygon to get the bounding box of.
149
148
  :returns: Bounding box of this polygon.
150
149
  """
151
- x_coords, y_coords = zip(*polygon)
150
+ x_coords, y_coords = zip(*polygon, strict=True)
152
151
  x, y = min(x_coords), min(y_coords)
153
152
  width, height = max(x_coords) - x, max(y_coords) - y
154
153
  return BoundingBox(x, y, width, height)
@@ -248,8 +247,8 @@ def download_tiles(url: str) -> Image:
248
247
 
249
248
 
250
249
  def trim_polygon(
251
- polygon: List[List[int]], image_width: int, image_height: int
252
- ) -> List[List[int]]:
250
+ polygon: list[list[int]], image_width: int, image_height: int
251
+ ) -> list[list[int]]:
253
252
  """
254
253
  Trim a polygon to an image's boundaries, with non-negative coordinates.
255
254
 
@@ -265,10 +264,10 @@ def trim_polygon(
265
264
  """
266
265
 
267
266
  assert isinstance(
268
- polygon, (list, tuple)
267
+ polygon, list | tuple
269
268
  ), "Input polygon must be a valid list or tuple of points."
270
269
  assert all(
271
- isinstance(point, (list, tuple)) for point in polygon
270
+ isinstance(point, list | tuple) for point in polygon
272
271
  ), "Polygon points must be tuples or lists."
273
272
  assert all(
274
273
  len(point) == 2 for point in polygon
@@ -301,10 +300,10 @@ def trim_polygon(
301
300
 
302
301
 
303
302
  def revert_orientation(
304
- element: Union["Element", "CachedElement"],
305
- polygon: List[List[Union[int, float]]],
306
- reverse: Optional[bool] = False,
307
- ) -> List[List[int]]:
303
+ element: "Element | CachedElement",
304
+ polygon: list[list[int | float]],
305
+ reverse: bool = False,
306
+ ) -> list[list[int]]:
308
307
  """
309
308
  Update the coordinates of the polygon of a child element based on the orientation of
310
309
  its parent.
@@ -324,7 +323,7 @@ def revert_orientation(
324
323
  from arkindex_worker.models import Element
325
324
 
326
325
  assert element and isinstance(
327
- element, (Element, CachedElement)
326
+ element, Element | CachedElement
328
327
  ), "element shouldn't be null and should be an Element or CachedElement"
329
328
  assert polygon and isinstance(
330
329
  polygon, list
@@ -1,11 +1,10 @@
1
- # -*- coding: utf-8 -*-
2
1
  """
3
2
  Wrappers around API results to provide more convenient attribute access and IIIF helpers.
4
3
  """
5
4
 
6
5
  import tempfile
6
+ from collections.abc import Generator
7
7
  from contextlib import contextmanager
8
- from typing import Generator, List, Optional
9
8
 
10
9
  from PIL import Image
11
10
  from requests import HTTPError
@@ -34,10 +33,10 @@ class MagicDict(dict):
34
33
  def __getattr__(self, name):
35
34
  try:
36
35
  return self[name]
37
- except KeyError:
36
+ except KeyError as e:
38
37
  raise AttributeError(
39
- "{} object has no attribute '{}'".format(self.__class__.__name__, name)
40
- )
38
+ f"{self.__class__.__name__} object has no attribute '{name}'"
39
+ ) from e
41
40
 
42
41
  def __setattr__(self, name, value):
43
42
  return super().__setitem__(name, value)
@@ -74,7 +73,7 @@ class Element(MagicDict):
74
73
  parts[-3] = size
75
74
  return "/".join(parts)
76
75
 
77
- def image_url(self, size: str = "full") -> Optional[str]:
76
+ def image_url(self, size: str = "full") -> str | None:
78
77
  """
79
78
  Build an URL to access the image.
80
79
  When possible, will return the S3 URL for images, so an ML worker can bypass IIIF servers.
@@ -89,10 +88,10 @@ class Element(MagicDict):
89
88
  url = self.zone.image.url
90
89
  if not url.endswith("/"):
91
90
  url += "/"
92
- return "{}full/{}/0/default.jpg".format(url, size)
91
+ return f"{url}full/{size}/0/default.jpg"
93
92
 
94
93
  @property
95
- def polygon(self) -> List[float]:
94
+ def polygon(self) -> list[float]:
96
95
  """
97
96
  Access an Element's polygon.
98
97
  This is a shortcut to an Element's polygon, normally accessed via
@@ -101,7 +100,7 @@ class Element(MagicDict):
101
100
  the [CachedElement][arkindex_worker.cache.CachedElement].polygon field.
102
101
  """
103
102
  if not self.get("zone"):
104
- raise ValueError("Element {} has no zone".format(self.id))
103
+ raise ValueError(f"Element {self.id} has no zone")
105
104
  return self.zone.polygon
106
105
 
107
106
  @property
@@ -122,11 +121,11 @@ class Element(MagicDict):
122
121
  def open_image(
123
122
  self,
124
123
  *args,
125
- max_width: Optional[int] = None,
126
- max_height: Optional[int] = None,
127
- use_full_image: Optional[bool] = False,
124
+ max_width: int | None = None,
125
+ max_height: int | None = None,
126
+ use_full_image: bool | None = False,
128
127
  **kwargs,
129
- ) -> Image:
128
+ ) -> Image.Image:
130
129
  """
131
130
  Open this element's image using Pillow, rotating and mirroring it according
132
131
  to the ``rotation_angle`` and ``mirrored`` attributes.
@@ -173,7 +172,7 @@ class Element(MagicDict):
173
172
  )
174
173
 
175
174
  if not self.get("zone"):
176
- raise ValueError("Element {} has no zone".format(self.id))
175
+ raise ValueError(f"Element {self.id} has no zone")
177
176
 
178
177
  if self.requires_tiles:
179
178
  if max_width is None and max_height is None:
@@ -194,10 +193,7 @@ class Element(MagicDict):
194
193
  else:
195
194
  resize = f"{max_width or ''},{max_height or ''}"
196
195
 
197
- if use_full_image:
198
- url = self.image_url(resize)
199
- else:
200
- url = self.resize_zone_url(resize)
196
+ url = self.image_url(resize) if use_full_image else self.resize_zone_url(resize)
201
197
 
202
198
  try:
203
199
  return open_image(
@@ -215,13 +211,13 @@ class Element(MagicDict):
215
211
  # This element uses an S3 URL: the URL may have expired.
216
212
  # Call the API to get a fresh URL and try again
217
213
  # TODO: this should be done by the worker
218
- raise NotImplementedError
214
+ raise NotImplementedError from e
219
215
  return open_image(self.image_url(resize), *args, **kwargs)
220
216
  raise
221
217
 
222
218
  @contextmanager
223
219
  def open_image_tempfile(
224
- self, format: Optional[str] = "jpeg", *args, **kwargs
220
+ self, format: str | None = "jpeg", *args, **kwargs
225
221
  ) -> Generator[tempfile.NamedTemporaryFile, None, None]:
226
222
  """
227
223
  Get the element's image as a temporary file stored on the disk.
@@ -249,7 +245,7 @@ class Element(MagicDict):
249
245
  type_name = self.type["display_name"]
250
246
  else:
251
247
  type_name = str(self.type)
252
- return "{} {} ({})".format(type_name, self.name, self.id)
248
+ return f"{type_name} {self.name} ({self.id})"
253
249
 
254
250
 
255
251
  class ArkindexModel(MagicDict):
@@ -1,11 +1,9 @@
1
- # -*- coding: utf-8 -*-
2
1
  import hashlib
3
2
  import logging
4
3
  import os
5
4
  import tarfile
6
5
  import tempfile
7
6
  from pathlib import Path
8
- from typing import Optional, Tuple, Union
9
7
 
10
8
  import zstandard
11
9
  import zstandard as zstd
@@ -16,7 +14,7 @@ CHUNK_SIZE = 1024
16
14
  """Chunk Size used for ZSTD compression"""
17
15
 
18
16
 
19
- def decompress_zst_archive(compressed_archive: Path) -> Tuple[int, Path]:
17
+ def decompress_zst_archive(compressed_archive: Path) -> tuple[int, Path]:
20
18
  """
21
19
  Decompress a ZST-compressed tar archive in data dir. The tar archive is not extracted.
22
20
  This returns the path to the archive and the file descriptor.
@@ -29,18 +27,19 @@ def decompress_zst_archive(compressed_archive: Path) -> Tuple[int, Path]:
29
27
  """
30
28
  dctx = zstandard.ZstdDecompressor()
31
29
  archive_fd, archive_path = tempfile.mkstemp(suffix=".tar")
30
+ archive_path = Path(archive_path)
32
31
 
33
32
  logger.debug(f"Uncompressing file to {archive_path}")
34
33
  try:
35
- with open(compressed_archive, "rb") as compressed, open(
36
- archive_path, "wb"
34
+ with compressed_archive.open("rb") as compressed, archive_path.open(
35
+ "wb"
37
36
  ) as decompressed:
38
37
  dctx.copy_stream(compressed, decompressed)
39
38
  logger.debug(f"Successfully uncompressed archive {compressed_archive}")
40
39
  except zstandard.ZstdError as e:
41
- raise Exception(f"Couldn't uncompressed archive: {e}")
40
+ raise Exception(f"Couldn't uncompressed archive: {e}") from e
42
41
 
43
- return archive_fd, Path(archive_path)
42
+ return archive_fd, archive_path
44
43
 
45
44
 
46
45
  def extract_tar_archive(archive_path: Path, destination: Path):
@@ -54,12 +53,12 @@ def extract_tar_archive(archive_path: Path, destination: Path):
54
53
  with tarfile.open(archive_path) as tar_archive:
55
54
  tar_archive.extractall(destination)
56
55
  except tarfile.ReadError as e:
57
- raise Exception(f"Couldn't handle the decompressed Tar archive: {e}")
56
+ raise Exception(f"Couldn't handle the decompressed Tar archive: {e}") from e
58
57
 
59
58
 
60
59
  def extract_tar_zst_archive(
61
60
  compressed_archive: Path, destination: Path
62
- ) -> Tuple[int, Path]:
61
+ ) -> tuple[int, Path]:
63
62
  """
64
63
  Extract a ZST-compressed tar archive's content to a specific destination
65
64
 
@@ -89,8 +88,8 @@ def close_delete_file(file_descriptor: int, file_path: Path):
89
88
 
90
89
 
91
90
  def zstd_compress(
92
- source: Path, destination: Optional[Path] = None
93
- ) -> Tuple[Union[int, None], Path, str]:
91
+ source: Path, destination: Path | None = None
92
+ ) -> tuple[int | None, Path, str]:
94
93
  """Compress a file using the Zstandard compression algorithm.
95
94
 
96
95
  :param source: Path to the file to compress.
@@ -117,13 +116,13 @@ def zstd_compress(
117
116
  archive_file.write(compressed_chunk)
118
117
  logger.debug(f"Successfully compressed {source}")
119
118
  except zstandard.ZstdError as e:
120
- raise Exception(f"Couldn't compress archive: {e}")
119
+ raise Exception(f"Couldn't compress archive: {e}") from e
121
120
  return file_d, destination, archive_hasher.hexdigest()
122
121
 
123
122
 
124
123
  def create_tar_archive(
125
- path: Path, destination: Optional[Path] = None
126
- ) -> Tuple[Union[int, None], Path, str]:
124
+ path: Path, destination: Path | None = None
125
+ ) -> tuple[int | None, Path, str]:
127
126
  """Create a tar archive using the content at specified location.
128
127
 
129
128
  :param path: Path to the file to archive
@@ -153,7 +152,7 @@ def create_tar_archive(
153
152
  files.append(p)
154
153
  logger.debug(f"Successfully created Tar archive from files @ {path}")
155
154
  except tarfile.TarError as e:
156
- raise Exception(f"Couldn't create Tar archive: {e}")
155
+ raise Exception(f"Couldn't create Tar archive: {e}") from e
157
156
 
158
157
  # Sort by path
159
158
  files.sort()
@@ -168,8 +167,8 @@ def create_tar_archive(
168
167
 
169
168
 
170
169
  def create_tar_zst_archive(
171
- source: Path, destination: Optional[Path] = None
172
- ) -> Tuple[Union[int, None], Path, str, str]:
170
+ source: Path, destination: Path | None = None
171
+ ) -> tuple[int | None, Path, str, str]:
173
172
  """Helper to create a TAR+ZST archive from a source folder.
174
173
 
175
174
  :param source: Path to the folder whose content should be archived.
@@ -1,17 +1,16 @@
1
- # -*- coding: utf-8 -*-
2
1
  """
3
2
  Base classes to implement Arkindex workers.
4
3
  """
5
-
4
+ import contextlib
6
5
  import json
7
6
  import os
8
7
  import sys
9
8
  import uuid
9
+ from collections.abc import Iterable, Iterator
10
10
  from enum import Enum
11
11
  from itertools import groupby
12
12
  from operator import itemgetter
13
13
  from pathlib import Path
14
- from typing import Iterable, Iterator, List, Tuple, Union
15
14
 
16
15
  from apistar.exceptions import ErrorResponse
17
16
 
@@ -102,7 +101,7 @@ class ElementsWorker(
102
101
 
103
102
  self._worker_version_cache = {}
104
103
 
105
- def list_elements(self) -> Union[Iterable[CachedElement], List[str]]:
104
+ def list_elements(self) -> Iterable[CachedElement] | list[str]:
106
105
  """
107
106
  List the elements to be processed, either from the CLI arguments or
108
107
  the cache database when enabled.
@@ -227,21 +226,17 @@ class ElementsWorker(
227
226
  )
228
227
  if element:
229
228
  # Try to update the activity to error state regardless of the response
230
- try:
229
+ with contextlib.suppress(Exception):
231
230
  self.update_activity(element.id, ActivityState.Error)
232
- except Exception:
233
- pass
234
231
 
235
232
  if failed:
236
233
  logger.error(
237
- "Ran on {} elements: {} completed, {} failed".format(
238
- count, count - failed, failed
239
- )
234
+ f"Ran on {count} elements: {count - failed} completed, {failed} failed"
240
235
  )
241
236
  if failed >= count: # Everything failed!
242
237
  sys.exit(1)
243
238
 
244
- def process_element(self, element: Union[Element, CachedElement]):
239
+ def process_element(self, element: Element | CachedElement):
245
240
  """
246
241
  Override this method to implement your worker and process a single Arkindex element at once.
247
242
 
@@ -251,7 +246,7 @@ class ElementsWorker(
251
246
  """
252
247
 
253
248
  def update_activity(
254
- self, element_id: Union[str, uuid.UUID], state: ActivityState
249
+ self, element_id: str | uuid.UUID, state: ActivityState
255
250
  ) -> bool:
256
251
  """
257
252
  Update the WorkerActivity for this element and worker.
@@ -269,7 +264,7 @@ class ElementsWorker(
269
264
  return True
270
265
 
271
266
  assert element_id and isinstance(
272
- element_id, (uuid.UUID, str)
267
+ element_id, uuid.UUID | str
273
268
  ), "element_id shouldn't be null and should be an UUID or str"
274
269
  assert isinstance(state, ActivityState), "state should be an ActivityState"
275
270
 
@@ -382,7 +377,7 @@ class DatasetWorker(BaseWorker, DatasetMixin, TaskMixin):
382
377
 
383
378
  def list_dataset_elements_per_split(
384
379
  self, dataset: Dataset
385
- ) -> Iterator[Tuple[str, List[Element]]]:
380
+ ) -> Iterator[tuple[str, list[Element]]]:
386
381
  """
387
382
  List the elements in the dataset, grouped by split, using the
388
383
  [list_dataset_elements][arkindex_worker.worker.dataset.DatasetMixin.list_dataset_elements] method.
@@ -392,8 +387,8 @@ class DatasetWorker(BaseWorker, DatasetMixin, TaskMixin):
392
387
  """
393
388
 
394
389
  def format_split(
395
- split: Tuple[str, Iterator[Tuple[str, Element]]]
396
- ) -> Tuple[str, List[Element]]:
390
+ split: tuple[str, Iterator[tuple[str, Element]]]
391
+ ) -> tuple[str, list[Element]]:
397
392
  return (split[0], list(map(itemgetter(1), list(split[1]))))
398
393
 
399
394
  return map(
@@ -435,7 +430,7 @@ class DatasetWorker(BaseWorker, DatasetMixin, TaskMixin):
435
430
  """
436
431
  self.configure()
437
432
 
438
- datasets: List[Dataset] | List[str] = list(self.list_datasets())
433
+ datasets: list[Dataset] | list[str] = list(self.list_datasets())
439
434
  if not datasets:
440
435
  logger.warning("No datasets to process, stopping.")
441
436
  sys.exit(1)
@@ -499,16 +494,12 @@ class DatasetWorker(BaseWorker, DatasetMixin, TaskMixin):
499
494
  )
500
495
  if dataset and self.generator:
501
496
  # Try to update the state to Error regardless of the response
502
- try:
497
+ with contextlib.suppress(Exception):
503
498
  self.update_dataset_state(dataset, DatasetState.Error)
504
- except Exception:
505
- pass
506
499
 
507
500
  if failed:
508
501
  logger.error(
509
- "Ran on {} datasets: {} completed, {} failed".format(
510
- count, count - failed, failed
511
- )
502
+ f"Ran on {count} datasets: {count - failed} completed, {failed} failed"
512
503
  )
513
504
  if failed >= count: # Everything failed!
514
505
  sys.exit(1)