arkindex-base-worker 0.4.0rc6__py3-none-any.whl → 0.5.0a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: arkindex-base-worker
3
- Version: 0.4.0rc6
3
+ Version: 0.5.0a2
4
4
  Summary: Base Worker to easily build Arkindex ML workflows
5
5
  Author-email: Teklia <contact@teklia.com>
6
6
  Maintainer-email: Teklia <contact@teklia.com>
@@ -46,7 +46,7 @@ Requires-Dist: peewee~=3.17
46
46
  Requires-Dist: Pillow==11.0.0
47
47
  Requires-Dist: python-gnupg==0.5.3
48
48
  Requires-Dist: shapely==2.0.6
49
- Requires-Dist: teklia-toolbox==0.1.7b1
49
+ Requires-Dist: teklia-toolbox==0.1.7
50
50
  Requires-Dist: zstandard==0.23.0
51
51
  Provides-Extra: docs
52
52
  Requires-Dist: black==24.10.0; extra == "docs"
@@ -1,32 +1,32 @@
1
- arkindex_worker/__init__.py,sha256=OlgCtTC9MaWeejviY0a3iQpALcRQGMVArFVVYwTF6I8,162
2
- arkindex_worker/cache.py,sha256=qTblc_zKdYC47Wip6_O9Jf5qBkQW2ozQQrg-nsx1WuY,11221
3
- arkindex_worker/image.py,sha256=oEgVCrSHiGh3D5-UXfM6PvT17TttSxC0115irpvB3Dw,18581
1
+ arkindex_worker/__init__.py,sha256=Sdt5KXn8EgURb2MurYVrUWaHbH3iFA1XLRo0Lc5AJ44,250
2
+ arkindex_worker/cache.py,sha256=NpCsYFnqBmyBrACqeV7c3P6j6YrTtyi-HgtewwxUpxc,11221
3
+ arkindex_worker/image.py,sha256=-oBhLqzTRsxJoXkzDYFw1Ic4JxQtpmNtzGxe1zOHotw,20980
4
4
  arkindex_worker/models.py,sha256=bPQzGZNs5a6z6DEcygsa8T33VOqPlMUbwKzHqlKzwbw,9923
5
- arkindex_worker/utils.py,sha256=q1EeLdC6ebYIH-C0LOAqw2cNpjCjVoP-Vbr-39mF4w0,9884
6
- arkindex_worker/worker/__init__.py,sha256=0_YHeOe31KR_8ynbnYMIMwnSQTVbKkkeLGmnlTMhFx0,16234
7
- arkindex_worker/worker/base.py,sha256=7Pmw-UQSxV-xkW8NO5cXsxJ8W8szzyppMaNjq_az81A,19844
8
- arkindex_worker/worker/classification.py,sha256=zECSNzGCZFzoPoDVZN4kuGYRNLzMQLBaRt3q1jnBSaA,10952
9
- arkindex_worker/worker/corpus.py,sha256=0TQFOwZ6Te-CZi6lgkZY1wzyJ5wO9LAmcVQtqHvZpPk,2291
10
- arkindex_worker/worker/dataset.py,sha256=LwzKwNFX4FqfLxh29LSvJydPwRw3VHaB1wjuFhUshsE,5267
11
- arkindex_worker/worker/element.py,sha256=Qvvq9kJnAHNATHW7zi96eIY1x-0MsR-T5rrSJg6e9Y4,45309
12
- arkindex_worker/worker/entity.py,sha256=s5wjX6_JfTyk4qfMoV0OWfOXUx6T-9WpOiEpaoaCEFM,14808
13
- arkindex_worker/worker/image.py,sha256=t_Az6IGnj0EZyvcA4XxfPikOUjn_pztgsyxTkFZhaXU,621
14
- arkindex_worker/worker/metadata.py,sha256=VRajtd2kaBvar9GercX4knvR6l1WFYjoCdJWU9ccKgk,7291
5
+ arkindex_worker/utils.py,sha256=MbbJT8oh8DMHHR-vidFeXdUH0TSXGWm7ZDGWzrRXoEY,9933
6
+ arkindex_worker/worker/__init__.py,sha256=3adK1-BDv6uCKUmY0cqaz7LuEJChDHyNmRVPCA1y8lA,16238
7
+ arkindex_worker/worker/base.py,sha256=MbZW9WmSjwh0yKeMckyjm6WxHI9xpfFhWsx5AOzQ0aY,19844
8
+ arkindex_worker/worker/classification.py,sha256=gu_xAkGgvoebbG1xCZ4P7DqYe6cHwTGiHUc9nG0__8A,10996
9
+ arkindex_worker/worker/corpus.py,sha256=MeIMod7jkWyX0frtD0a37rhumnMV3p9ZOC1xwAoXrAA,2291
10
+ arkindex_worker/worker/dataset.py,sha256=tVaPx43vaH-KTtx4w5V06e26ha8XPfiJTRzBXlu928Y,5273
11
+ arkindex_worker/worker/element.py,sha256=0zwODtutkX4AIeSKe0wV9cmNeIZ5cRwTqnuFrVQmKOw,45403
12
+ arkindex_worker/worker/entity.py,sha256=5OaP3HBrA-jbP-3xF-_TpaYh5WxMlQAUUaGLmhynpEE,14833
13
+ arkindex_worker/worker/image.py,sha256=L6Ikuf0Z0RxJk7JarY5PggJGrYSHLaPK0vn0dy0CIaQ,623
14
+ arkindex_worker/worker/metadata.py,sha256=mb9hVU-nRw3drCN-0AvtZ0nPY-4tD-ye9_mVy6icbk4,7309
15
15
  arkindex_worker/worker/process.py,sha256=9TEHpMcBax1wc6PrWMMrdXe2uNfqyVj7n_dAYZRBGnY,1854
16
- arkindex_worker/worker/task.py,sha256=r1j7_qbdNu2Z8H8HbGzO3P3qdx-2N1pBbUPFDca0rqg,1519
17
- arkindex_worker/worker/training.py,sha256=H8FmCdzGcDW-WMMwcgvmZPlN5tPHwGo0BXn12qmzj8g,10875
18
- arkindex_worker/worker/transcription.py,sha256=52RY9kYsiR1sz9FxOigyo12Ker3VDbQ4U42gK9DpR3g,21146
16
+ arkindex_worker/worker/task.py,sha256=nYfMSFm_d-4t8y4PO4HjFBnLsZf7IsDjkS7-A2Pgnac,1525
17
+ arkindex_worker/worker/training.py,sha256=tyQOHcwv--_wdYz6CgLEe1YM7kwwwKN30LvGTsnWd78,10923
18
+ arkindex_worker/worker/transcription.py,sha256=sw718R119tsLNY8inPMVeIilvFJo94fMbMtYgH0zTM8,21250
19
19
  arkindex_worker/worker/version.py,sha256=JIT7OI3Mo7RPkNrjOB9hfqrsG-FYygz_zi4l8PbkuAo,1960
20
20
  hooks/pre_gen_project.py,sha256=xQJERv3vv9VzIqcBHI281eeWLWREXUF4mMw7PvJHHXM,269
21
21
  tests/__init__.py,sha256=DG--S6IpGl399rzSAjDdHL76CkOIeZIjajCcyUSDhOQ,241
22
- tests/conftest.py,sha256=2ocZ2x-mZQrNe9zvWwhWk2_4ExdaBHIB74SvtDlExRE,21580
22
+ tests/conftest.py,sha256=Z9amrKmVtFltzTUUm07fGDrT4m540biaTpjedmplyzc,21536
23
23
  tests/test_base_worker.py,sha256=2EIYcd_3f9O0zB5WiGIQV0Cn9wndLvnEnSfcAE1qWWU,30607
24
24
  tests/test_cache.py,sha256=ii0gyr0DrG7ChEs7pmT8hMdSguAOAcCze4bRMiFQxuk,10640
25
- tests/test_dataset_worker.py,sha256=gApYz0LArHr1cNn079_fa_BQABF6RVQYuM1Tc4m3NsQ,22089
25
+ tests/test_dataset_worker.py,sha256=z8ydliUlwW2j-irgLAotJMacgJXkVvF5TgsWLyCn1Jo,22087
26
26
  tests/test_element.py,sha256=2G9M15TLxQRmvrWM9Kw2ucnElh4kSv_oF_5FYwwAxTY,13181
27
- tests/test_image.py,sha256=03E24JVa7TZJfuwQyfVEBe3RAq3R993IMl1AHXRr7zY,25497
27
+ tests/test_image.py,sha256=mYyRfDXGLLzcQQtmaM7GR3jt7ScxsLLog16pUVHrH3M,27824
28
28
  tests/test_merge.py,sha256=TuOeUS0UCz66DPOQFFhc4NQBxIjZL9f5czi4XnvGrr4,8270
29
- tests/test_utils.py,sha256=_WJUPnt-pM_TQ0er4yjPZy-u_LePrHq1lxwk_teky7M,2544
29
+ tests/test_utils.py,sha256=nYL1s2ViZoLoMiNpLGDaWwxf8dJ1D8aT522AO-PVaEQ,3607
30
30
  tests/test_elements_worker/__init__.py,sha256=Fh4nkbbyJSMv_VtjQxnWrOqTnxXaaWI8S9WU0VrzCHs,179
31
31
  tests/test_elements_worker/test_classification.py,sha256=nya7veSPR_O9G41Enodp2-o6AifMBcaSTWJP2vXSSJ4,30133
32
32
  tests/test_elements_worker/test_cli.py,sha256=a23i1pUDbXi23MUtbWwGEcLLrmc_YlrbDgOG3h66wLM,2620
@@ -43,19 +43,19 @@ tests/test_elements_worker/test_image.py,sha256=BljMNKgec_9a5bzNzFpYZIvSbuvwsWDf
43
43
  tests/test_elements_worker/test_metadata.py,sha256=Xfggy-vxw5DZ3hFKx3sB7OYb2d1tu1RiNK8fvKJIaBs,22294
44
44
  tests/test_elements_worker/test_process.py,sha256=y4RoVhPfyHzR795fw7-_FXElBcKo3fy4Ew_HI-kxJic,3088
45
45
  tests/test_elements_worker/test_task.py,sha256=wTUWqN9UhfKmJn3IcFY75EW4I1ulRhisflmY1kmP47s,5574
46
- tests/test_elements_worker/test_training.py,sha256=Qxi9EzGr_uKcn2Fh5aE6jNrq1K8QKLiOiSew4upASPs,8721
46
+ tests/test_elements_worker/test_training.py,sha256=qgK7BLucddRzc8ePbQtY75x17QvGDEq5XCwgyyvmAJE,8717
47
47
  tests/test_elements_worker/test_transcription_create.py,sha256=yznO9B_BVsOR0Z_VY5ZL8gJp0ZPCz_4sPUs5dXtixAg,29281
48
48
  tests/test_elements_worker/test_transcription_create_with_elements.py,sha256=tmcyglgssEqMnt1Mdy_u6X1m2wgLWTo_HdWst3GrK2k,33056
49
49
  tests/test_elements_worker/test_transcription_list.py,sha256=ikz7HYPCoQWTdTRCd382SB-y-T2BbigPLlIcx5Eow-I,15324
50
50
  tests/test_elements_worker/test_version.py,sha256=xqCgcgukTFJzkMgYfQG-8mTbu0o2fdYjWC07FktThfw,2125
51
- tests/test_elements_worker/test_worker.py,sha256=pLUgjyrrXrzVD6T-kdH1ppk5Yn_iDuI8JdFGweTEMXE,25156
51
+ tests/test_elements_worker/test_worker.py,sha256=HDw_UQdiMUzlBd4-jRvC-B3pNrZmmpps4sfZ9a87JVY,25378
52
52
  worker-demo/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
53
  worker-demo/tests/conftest.py,sha256=XzNMNeg6pmABUAH8jN6eZTlZSFGLYjS3-DTXjiRN6Yc,1002
54
54
  worker-demo/tests/test_worker.py,sha256=3DLd4NRK4bfyatG5P_PK4k9P9tJHx9XQq5_ryFEEFVg,304
55
55
  worker-demo/worker_demo/__init__.py,sha256=2BPomV8ZMNf3YXJgloatKeHQCE6QOkwmsHGkO6MkQuM,125
56
56
  worker-demo/worker_demo/worker.py,sha256=Rt-DjWa5iBP08k58NDZMfeyPuFbtNcbX6nc5jFX7GNo,440
57
- arkindex_base_worker-0.4.0rc6.dist-info/LICENSE,sha256=NVshRi1efwVezMfW7xXYLrdDr2Li1AfwfGOd5WuH1kQ,1063
58
- arkindex_base_worker-0.4.0rc6.dist-info/METADATA,sha256=gJd_0X7A26nuBe2EsIPHwap1XV2KnJBq2QwjBBB3Wi0,3339
59
- arkindex_base_worker-0.4.0rc6.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
60
- arkindex_base_worker-0.4.0rc6.dist-info/top_level.txt,sha256=58NuslgxQC2vT4DiqZEgO4JqJRrYa2yeNI9QvkbfGQU,40
61
- arkindex_base_worker-0.4.0rc6.dist-info/RECORD,,
57
+ arkindex_base_worker-0.5.0a2.dist-info/LICENSE,sha256=NVshRi1efwVezMfW7xXYLrdDr2Li1AfwfGOd5WuH1kQ,1063
58
+ arkindex_base_worker-0.5.0a2.dist-info/METADATA,sha256=MAvsl683tKCx2pqnw1JmQXydhj3JSfcC1Kb3n37R4Kw,3336
59
+ arkindex_base_worker-0.5.0a2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
60
+ arkindex_base_worker-0.5.0a2.dist-info/top_level.txt,sha256=58NuslgxQC2vT4DiqZEgO4JqJRrYa2yeNI9QvkbfGQU,40
61
+ arkindex_base_worker-0.5.0a2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.6.0)
2
+ Generator: setuptools (75.8.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,3 +1,4 @@
1
+ import importlib.metadata
1
2
  import logging
2
3
 
3
4
  logging.basicConfig(
@@ -5,3 +6,5 @@ logging.basicConfig(
5
6
  format="%(asctime)s %(levelname)s/%(name)s: %(message)s",
6
7
  )
7
8
  logger = logging.getLogger(__name__)
9
+
10
+ VERSION = importlib.metadata.version("arkindex-base-worker")
arkindex_worker/cache.py CHANGED
@@ -327,9 +327,9 @@ def check_version(cache_path: str | Path):
327
327
  except OperationalError:
328
328
  version = None
329
329
 
330
- assert (
331
- version == SQL_VERSION
332
- ), f"The SQLite database {cache_path} does not have the correct cache version, it should be {SQL_VERSION}"
330
+ assert version == SQL_VERSION, (
331
+ f"The SQLite database {cache_path} does not have the correct cache version, it should be {SQL_VERSION}"
332
+ )
333
333
 
334
334
 
335
335
  def merge_parents_cache(paths: list, current_database: Path):
arkindex_worker/image.py CHANGED
@@ -2,6 +2,7 @@
2
2
  Helper methods to download and open IIIF images, and manage polygons.
3
3
  """
4
4
 
5
+ import base64
5
6
  import functools
6
7
  import os
7
8
  import re
@@ -14,6 +15,7 @@ from pathlib import Path
14
15
  from typing import TYPE_CHECKING
15
16
 
16
17
  import humanize
18
+ import numpy as np
17
19
  import requests
18
20
  from PIL import Image
19
21
  from shapely.affinity import rotate, scale, translate
@@ -25,7 +27,7 @@ from tenacity import (
25
27
  wait_exponential,
26
28
  )
27
29
 
28
- from arkindex_worker import logger
30
+ from arkindex_worker import VERSION, logger
29
31
  from arkindex_worker.utils import pluralize
30
32
  from teklia_toolbox.requests import should_verify_cert
31
33
 
@@ -39,14 +41,16 @@ DOWNLOAD_TIMEOUT = (30, 60)
39
41
 
40
42
  BoundingBox = namedtuple("BoundingBox", ["x", "y", "width", "height"])
41
43
 
44
+ # Specific User-Agent to bypass potential server limitations
45
+ IIIF_USER_AGENT = f"Teklia/Workers {VERSION}"
42
46
  # To parse IIIF Urls
43
47
  IIIF_URL = re.compile(r"\w+:\/{2}.+\/.+\/.+\/.+\/(?P<size>.+)\/!?\d+\/\w+\.\w+")
44
48
  # Full size of the region
45
49
  IIIF_FULL = "full"
46
50
  # Maximum size available
47
51
  IIIF_MAX = "max"
48
- # Ratio to resize image
49
- IMAGE_RATIO = [1, 0.9, 0.85, 0.80, 0.75, 0.70, 0.60, 0.50, 0.40, 0.30]
52
+ # Ratios to resize images: 1.0, 0.95, [...], 0.1, 0.05
53
+ IMAGE_RATIOS = np.arange(1, 0, -0.05).round(2).tolist()
50
54
 
51
55
 
52
56
  def update_pillow_image_size_limit(func):
@@ -206,44 +210,81 @@ def upload_image(image: Image, url: str) -> requests.Response:
206
210
  def resized_images(
207
211
  *args,
208
212
  element: "Element",
209
- max_pixels: int | None = None,
213
+ max_pixels_short: int | None = None,
214
+ max_pixels_long: int | None = None,
210
215
  max_bytes: int | None = None,
216
+ use_base64: bool = False,
211
217
  **kwargs,
212
- ) -> Iterator[Generator[tempfile.NamedTemporaryFile, None, None]]:
218
+ ) -> Iterator[Generator[tempfile._TemporaryFileWrapper | str]]:
213
219
  """
214
- Build resized images according to the pixel and byte limits.
220
+ Build resized images according to pixel and byte limits.
215
221
 
216
222
  :param *args: Positional arguments passed to [arkindex_worker.models.Element.open_image_tempfile][].
217
223
  :param element: Element whose image needs to be resized.
218
- :param max_pixels: Maximum pixel size of the resized images.
224
+ :param max_pixels_short: Maximum pixel size of the resized images' short side.
225
+ :param max_pixels_long: Maximum pixel size of the resized images' long side.
219
226
  :param max_bytes: Maximum byte size of the resized images.
227
+ :param use_base64: Whether or not to encode resized images in base64 before calculating their size.
220
228
  :param **kwargs: Keyword arguments passed to [arkindex_worker.models.Element.open_image_tempfile][].
221
- :returns: An iterator of the temporary file of the resized image.
229
+ :returns: An iterator of temporary files for resized images OR an iterator of base64-encoded strings if `use_base64` is set.
222
230
  """
223
231
  _, _, element_width, element_height = polygon_bounding_box(element.polygon)
232
+ logger.info(
233
+ f"This element's image dimensions are ({element_width} x {element_height})."
234
+ )
235
+
236
+ portrait_format = element_width <= element_height
237
+ max_pixels_width, max_pixels_height = (
238
+ (max_pixels_short, max_pixels_long)
239
+ if portrait_format
240
+ else (max_pixels_long, max_pixels_short)
241
+ )
242
+
243
+ # The image dimension is already within the pixel limitation, no need to resize the image
244
+ if max_pixels_width and max_pixels_width >= element_width:
245
+ max_pixels_width = None
246
+ if max_pixels_height and max_pixels_height >= element_height:
247
+ max_pixels_height = None
224
248
 
225
- logger.info(f"This element's image sizes are ({element_width} x {element_height}).")
226
- if max_pixels and max(element_width, element_height) > max_pixels:
249
+ if (max_pixels_width and element_width > max_pixels_width) or (
250
+ max_pixels_height and element_height > max_pixels_height
251
+ ):
227
252
  logger.warning(
228
- f"Maximum image input size supported is ({max_pixels} x {max_pixels})."
253
+ f"Maximum image dimensions supported are ({max_pixels_width or element_width} x {max_pixels_height or element_height})."
229
254
  )
230
255
  logger.warning("The image will be resized.")
231
256
 
232
- element_pixel, param = (
233
- (element_width, "max_width")
234
- if element_width > element_height
235
- else (element_height, "max_height")
236
- )
257
+ # No limitations provided, we keep the image initial dimensions
258
+ if max_pixels_width is None and max_pixels_height is None:
259
+ open_image_param, max_value = (
260
+ ("max_height", element_height)
261
+ if portrait_format
262
+ else ("max_width", element_width)
263
+ )
264
+ # A limitation is only given for the height, we resize it
265
+ elif max_pixels_width is None:
266
+ open_image_param, max_value = ("max_height", max_pixels_height)
267
+ # A limitation is only given for the width, we resize it
268
+ elif max_pixels_height is None:
269
+ open_image_param, max_value = ("max_width", max_pixels_width)
270
+ # Limitations are provided for both sides:
271
+ # - we resize only the one with the biggest scale factor
272
+ # - the remaining one will automatically fall within the other limitation
273
+ else:
274
+ width_rescaling_factor = element_width / max_pixels_width
275
+ height_rescaling_factor = element_height / max_pixels_height
276
+ open_image_param, max_value = (
277
+ ("max_height", max_pixels_height)
278
+ if height_rescaling_factor > width_rescaling_factor
279
+ else ("max_width", max_pixels_width)
280
+ )
237
281
 
238
- for resized_pixel in sorted(
239
- set(
240
- min(round(ratio * element_pixel), max_pixels or element_pixel)
241
- for ratio in IMAGE_RATIO
242
- ),
243
- reverse=True,
244
- ):
282
+ resized_pixels = set(
283
+ min(round(ratio * max_value), max_value) for ratio in IMAGE_RATIOS
284
+ )
285
+ for resized_pixel in sorted(resized_pixels, reverse=True):
245
286
  with element.open_image_tempfile(
246
- *args, **{**kwargs, param: resized_pixel}
287
+ *args, **{**kwargs, open_image_param: resized_pixel}
247
288
  ) as image:
248
289
  pillow_image = Image.open(image)
249
290
  if (
@@ -254,8 +295,12 @@ def resized_images(
254
295
  f"The image was resized to ({pillow_image.width} x {pillow_image.height})."
255
296
  )
256
297
 
257
- # The image is still too large
258
298
  image_size = Path(image.name).stat().st_size
299
+ if use_base64:
300
+ image = base64.b64encode(Path(image.name).read_bytes()).decode("utf-8")
301
+ image_size = len(image)
302
+
303
+ # The image is still too heavy
259
304
  if max_bytes and image_size > max_bytes:
260
305
  logger.warning(f"The image size is {humanize.naturalsize(image_size)}.")
261
306
  logger.warning(
@@ -283,7 +328,7 @@ def polygon_bounding_box(polygon: list[list[int | float]]) -> BoundingBox:
283
328
  def _retry_log(retry_state, *args, **kwargs):
284
329
  logger.warning(
285
330
  f"Request to {retry_state.args[0]} failed ({repr(retry_state.outcome.exception())}), "
286
- f'retrying in {retry_state.idle_for} {pluralize("second", retry_state.idle_for)}'
331
+ f"retrying in {retry_state.idle_for} {pluralize('second', retry_state.idle_for)}"
287
332
  )
288
333
 
289
334
 
@@ -296,7 +341,12 @@ def _retry_log(retry_state, *args, **kwargs):
296
341
  )
297
342
  def _retried_request(url, *args, method=requests.get, **kwargs):
298
343
  resp = method(
299
- url, *args, timeout=DOWNLOAD_TIMEOUT, verify=should_verify_cert(url), **kwargs
344
+ url,
345
+ *args,
346
+ headers={"User-Agent": IIIF_USER_AGENT},
347
+ timeout=DOWNLOAD_TIMEOUT,
348
+ verify=should_verify_cert(url),
349
+ **kwargs,
300
350
  )
301
351
  resp.raise_for_status()
302
352
  return resp
@@ -316,9 +366,9 @@ def download_tiles(url: str) -> Image:
316
366
 
317
367
  image_width, image_height = info.get("width"), info.get("height")
318
368
  assert image_width and image_height, "Missing image dimensions in info.json"
319
- assert info.get(
320
- "tiles"
321
- ), "Image cannot be retrieved at full size and tiles are not supported"
369
+ assert info.get("tiles"), (
370
+ "Image cannot be retrieved at full size and tiles are not supported"
371
+ )
322
372
 
323
373
  # Take the biggest available tile size
324
374
  tile = sorted(info["tiles"], key=lambda tile: tile.get("width", 0), reverse=True)[0]
@@ -392,15 +442,15 @@ def trim_polygon(
392
442
  is entirely outside of the image's bounds.
393
443
  """
394
444
 
395
- assert isinstance(
396
- polygon, list | tuple
397
- ), "Input polygon must be a valid list or tuple of points."
398
- assert all(
399
- isinstance(point, list | tuple) for point in polygon
400
- ), "Polygon points must be tuples or lists."
401
- assert all(
402
- len(point) == 2 for point in polygon
403
- ), "Polygon points must be tuples or lists of 2 elements."
445
+ assert isinstance(polygon, list | tuple), (
446
+ "Input polygon must be a valid list or tuple of points."
447
+ )
448
+ assert all(isinstance(point, list | tuple) for point in polygon), (
449
+ "Polygon points must be tuples or lists."
450
+ )
451
+ assert all(len(point) == 2 for point in polygon), (
452
+ "Polygon points must be tuples or lists of 2 elements."
453
+ )
404
454
  assert all(
405
455
  isinstance(point[0], int) and isinstance(point[1], int) for point in polygon
406
456
  ), "Polygon point coordinates must be integers."
@@ -451,12 +501,12 @@ def revert_orientation(
451
501
  from arkindex_worker.cache import CachedElement
452
502
  from arkindex_worker.models import Element
453
503
 
454
- assert element and isinstance(
455
- element, Element | CachedElement
456
- ), "element shouldn't be null and should be an Element or CachedElement"
457
- assert polygon and isinstance(
458
- polygon, list
459
- ), "polygon shouldn't be null and should be a list"
504
+ assert element and isinstance(element, Element | CachedElement), (
505
+ "element shouldn't be null and should be an Element or CachedElement"
506
+ )
507
+ assert polygon and isinstance(polygon, list), (
508
+ "polygon shouldn't be null and should be a list"
509
+ )
460
510
  assert isinstance(reverse, bool), "Reverse should be a bool"
461
511
  # Rotating with Pillow can cause it to move the image around, as the image cannot have negative coordinates
462
512
  # and must be a rectangle. This means the origin point of any coordinates from an image is invalid, and the
@@ -464,9 +514,9 @@ def revert_orientation(
464
514
  # To properly undo the mirroring and rotation implicitly applied by open_image, we first need to find the center
465
515
  # of the rotated bounding box.
466
516
  if isinstance(element, Element):
467
- assert (
468
- element.zone and element.zone.polygon
469
- ), "element should have a zone and a polygon"
517
+ assert element.zone and element.zone.polygon, (
518
+ "element should have a zone and a polygon"
519
+ )
470
520
  parent_ring = LinearRing(element.zone.polygon)
471
521
  elif isinstance(element, CachedElement):
472
522
  assert element.polygon, "cached element should have a polygon"
arkindex_worker/utils.py CHANGED
@@ -243,11 +243,12 @@ def batch_publication(func: Callable) -> Callable:
243
243
  bound_func.apply_defaults()
244
244
  batch_size = bound_func.arguments.get("batch_size")
245
245
  assert (
246
- batch_size and isinstance(batch_size, int) and batch_size > 0
246
+ batch_size is not None and isinstance(batch_size, int) and batch_size > 0
247
247
  ), "batch_size shouldn't be null and should be a strictly positive integer"
248
248
 
249
249
  return func(self, *args, **kwargs)
250
250
 
251
+ wrapper.__name__ = func.__name__
251
252
  return wrapper
252
253
 
253
254
 
@@ -82,9 +82,9 @@ class ElementsWorker(
82
82
  :return: An iterable of [CachedElement][arkindex_worker.cache.CachedElement] when cache support is enabled,
83
83
  or a list of strings representing element IDs otherwise.
84
84
  """
85
- assert not (
86
- self.args.elements_list and self.args.element
87
- ), "elements-list and element CLI args shouldn't be both set"
85
+ assert not (self.args.elements_list and self.args.element), (
86
+ "elements-list and element CLI args shouldn't be both set"
87
+ )
88
88
 
89
89
  def invalid_element_id(value: str) -> bool:
90
90
  """
@@ -125,9 +125,9 @@ class ElementsWorker(
125
125
  return {item["id"] for item in self.list_process_elements()}
126
126
 
127
127
  invalid_element_ids = list(filter(invalid_element_id, out))
128
- assert (
129
- not invalid_element_ids
130
- ), f"These element IDs are invalid: {', '.join(invalid_element_ids)}"
128
+ assert not invalid_element_ids, (
129
+ f"These element IDs are invalid: {', '.join(invalid_element_ids)}"
130
+ )
131
131
 
132
132
  return out
133
133
 
@@ -144,9 +144,9 @@ class ElementsWorker(
144
144
  # Worker activities are also disabled when running an ElementsWorker in a Dataset process
145
145
  # and when running export processes.
146
146
  return False
147
- assert (
148
- self.process_information
149
- ), "Worker must be configured to access its process activity state"
147
+ assert self.process_information, (
148
+ "Worker must be configured to access its process activity state"
149
+ )
150
150
  return self.process_information.get("activity_state") == "ready"
151
151
 
152
152
  def run(self):
@@ -221,7 +221,7 @@ class ElementsWorker(
221
221
  with contextlib.suppress(Exception):
222
222
  self.update_activity(element.id, ActivityState.Error)
223
223
 
224
- message = f'Ran on {count} {pluralize("element", count)}: {count - failed} completed, {failed} failed'
224
+ message = f"Ran on {count} {pluralize('element', count)}: {count - failed} completed, {failed} failed"
225
225
  if failed:
226
226
  logger.error(message)
227
227
  if failed >= count: # Everything failed!
@@ -256,9 +256,9 @@ class ElementsWorker(
256
256
  )
257
257
  return True
258
258
 
259
- assert element_id and isinstance(
260
- element_id, uuid.UUID | str
261
- ), "element_id shouldn't be null and should be an UUID or str"
259
+ assert element_id and isinstance(element_id, uuid.UUID | str), (
260
+ "element_id shouldn't be null and should be an UUID or str"
261
+ )
262
262
  assert isinstance(state, ActivityState), "state should be an ActivityState"
263
263
 
264
264
  try:
@@ -382,9 +382,9 @@ class DatasetWorker(DatasetMixin, BaseWorker, TaskMixin):
382
382
  failed = 0
383
383
  for i, dataset_set in enumerate(dataset_sets, start=1):
384
384
  try:
385
- assert (
386
- dataset_set.dataset.state == DatasetState.Complete.value
387
- ), "When processing a set, its dataset state should be Complete."
385
+ assert dataset_set.dataset.state == DatasetState.Complete.value, (
386
+ "When processing a set, its dataset state should be Complete."
387
+ )
388
388
 
389
389
  logger.info(f"Retrieving data for {dataset_set} ({i}/{count})")
390
390
  self.download_dataset_artifact(dataset_set.dataset)
@@ -405,7 +405,7 @@ class DatasetWorker(DatasetMixin, BaseWorker, TaskMixin):
405
405
  # Cleanup the latest downloaded dataset artifact
406
406
  self.cleanup_downloaded_artifact()
407
407
 
408
- message = f'Ran on {count} {pluralize("set", count)}: {count - failed} completed, {failed} failed'
408
+ message = f"Ran on {count} {pluralize('set', count)}: {count - failed} completed, {failed} failed"
409
409
  if failed:
410
410
  logger.error(message)
411
411
  if failed >= count: # Everything failed!
@@ -305,9 +305,9 @@ class BaseWorker:
305
305
 
306
306
  if self.use_cache:
307
307
  if self.args.database is not None:
308
- assert (
309
- self.args.database.is_file()
310
- ), f"Database in {self.args.database} does not exist"
308
+ assert self.args.database.is_file(), (
309
+ f"Database in {self.args.database} does not exist"
310
+ )
311
311
  self.cache_path = self.args.database
312
312
  else:
313
313
  cache_dir = self.task_data_dir / self.task_id
@@ -378,9 +378,9 @@ class BaseWorker:
378
378
  gpg = gnupg.GPG()
379
379
  with path.open("rb") as gpg_file:
380
380
  decrypted = gpg.decrypt_file(gpg_file)
381
- assert (
382
- decrypted.ok
383
- ), f"GPG error: {decrypted.status} - {decrypted.stderr}"
381
+ assert decrypted.ok, (
382
+ f"GPG error: {decrypted.status} - {decrypted.stderr}"
383
+ )
384
384
  secret = decrypted.data.decode("utf-8")
385
385
  logging.info(f"Loaded local secret {name}")
386
386
  except Exception as e:
@@ -27,7 +27,7 @@ class ClassificationMixin:
27
27
  )
28
28
  self.classes = {ml_class["name"]: ml_class["id"] for ml_class in corpus_classes}
29
29
  logger.info(
30
- f'Loaded {len(self.classes)} ML {pluralize("class", len(self.classes))} in corpus ({self.corpus_id})'
30
+ f"Loaded {len(self.classes)} ML {pluralize('class', len(self.classes))} in corpus ({self.corpus_id})"
31
31
  )
32
32
 
33
33
  def get_ml_class_id(self, ml_class: str) -> str:
@@ -60,9 +60,9 @@ class ClassificationMixin:
60
60
  f"Reloading corpus classes to see if {ml_class} already exists"
61
61
  )
62
62
  self.load_corpus_classes()
63
- assert (
64
- ml_class in self.classes
65
- ), "Missing class {ml_class} even after reloading"
63
+ assert ml_class in self.classes, (
64
+ "Missing class {ml_class} even after reloading"
65
+ )
66
66
  ml_class_id = self.classes[ml_class]
67
67
 
68
68
  return ml_class_id
@@ -86,9 +86,9 @@ class ClassificationMixin:
86
86
  ),
87
87
  None,
88
88
  )
89
- assert (
90
- ml_class_name is not None
91
- ), f"Missing class with id ({ml_class_id}) in corpus ({self.corpus_id})"
89
+ assert ml_class_name is not None, (
90
+ f"Missing class with id ({ml_class_id}) in corpus ({self.corpus_id})"
91
+ )
92
92
  return ml_class_name
93
93
 
94
94
  def create_classification(
@@ -107,18 +107,18 @@ class ClassificationMixin:
107
107
  :param high_confidence: Whether or not the classification is of high confidence.
108
108
  :returns: The created classification, as returned by the ``CreateClassification`` API endpoint.
109
109
  """
110
- assert element and isinstance(
111
- element, Element | CachedElement
112
- ), "element shouldn't be null and should be an Element or CachedElement"
113
- assert ml_class and isinstance(
114
- ml_class, str
115
- ), "ml_class shouldn't be null and should be of type str"
116
- assert (
117
- isinstance(confidence, float) and 0 <= confidence <= 1
118
- ), "confidence shouldn't be null and should be a float in [0..1] range"
119
- assert isinstance(
120
- high_confidence, bool
121
- ), "high_confidence shouldn't be null and should be of type bool"
110
+ assert element and isinstance(element, Element | CachedElement), (
111
+ "element shouldn't be null and should be an Element or CachedElement"
112
+ )
113
+ assert ml_class and isinstance(ml_class, str), (
114
+ "ml_class shouldn't be null and should be of type str"
115
+ )
116
+ assert isinstance(confidence, float) and 0 <= confidence <= 1, (
117
+ "confidence shouldn't be null and should be a float in [0..1] range"
118
+ )
119
+ assert isinstance(high_confidence, bool), (
120
+ "high_confidence shouldn't be null and should be of type bool"
121
+ )
122
122
  if self.is_read_only:
123
123
  logger.warning(
124
124
  "Cannot create classification as this worker is in read-only mode"
@@ -198,31 +198,33 @@ class ClassificationMixin:
198
198
  :returns: List of created classifications, as returned in the ``classifications`` field by
199
199
  the ``CreateClassifications`` API endpoint.
200
200
  """
201
- assert element and isinstance(
202
- element, Element | CachedElement
203
- ), "element shouldn't be null and should be an Element or CachedElement"
204
- assert classifications and isinstance(
205
- classifications, list
206
- ), "classifications shouldn't be null and should be of type list"
201
+ assert element and isinstance(element, Element | CachedElement), (
202
+ "element shouldn't be null and should be an Element or CachedElement"
203
+ )
204
+ assert classifications and isinstance(classifications, list), (
205
+ "classifications shouldn't be null and should be of type list"
206
+ )
207
207
 
208
208
  for index, classification in enumerate(classifications):
209
209
  ml_class = classification.get("ml_class")
210
- assert (
211
- ml_class and isinstance(ml_class, str)
212
- ), f"Classification at index {index} in classifications: ml_class shouldn't be null and should be of type str"
210
+ assert ml_class and isinstance(ml_class, str), (
211
+ f"Classification at index {index} in classifications: ml_class shouldn't be null and should be of type str"
212
+ )
213
213
 
214
214
  confidence = classification.get("confidence")
215
215
  assert (
216
216
  confidence is not None
217
217
  and isinstance(confidence, float)
218
218
  and 0 <= confidence <= 1
219
- ), f"Classification at index {index} in classifications: confidence shouldn't be null and should be a float in [0..1] range"
219
+ ), (
220
+ f"Classification at index {index} in classifications: confidence shouldn't be null and should be a float in [0..1] range"
221
+ )
220
222
 
221
223
  high_confidence = classification.get("high_confidence")
222
224
  if high_confidence is not None:
223
- assert isinstance(
224
- high_confidence, bool
225
- ), f"Classification at index {index} in classifications: high_confidence should be of type bool"
225
+ assert isinstance(high_confidence, bool), (
226
+ f"Classification at index {index} in classifications: high_confidence should be of type bool"
227
+ )
226
228
 
227
229
  if self.is_read_only:
228
230
  logger.warning(
@@ -76,9 +76,9 @@ class CorpusMixin:
76
76
  key=itemgetter("updated"),
77
77
  reverse=True,
78
78
  )
79
- assert (
80
- len(exports) > 0
81
- ), f'No available exports found for the corpus ({self.corpus_id}) with state "{CorpusExportState.Done.value.capitalize()}".'
79
+ assert len(exports) > 0, (
80
+ f'No available exports found for the corpus ({self.corpus_id}) with state "{CorpusExportState.Done.value.capitalize()}".'
81
+ )
82
82
 
83
83
  # Download latest export
84
84
  export_id: str = exports[0]["id"]