arkindex-base-worker 0.3.5rc6__tar.gz → 0.3.6rc2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. arkindex-base-worker-0.3.6rc2/PKG-INFO +39 -0
  2. arkindex-base-worker-0.3.6rc2/arkindex_base_worker.egg-info/PKG-INFO +39 -0
  3. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/arkindex_base_worker.egg-info/SOURCES.txt +0 -2
  4. arkindex-base-worker-0.3.6rc2/arkindex_base_worker.egg-info/requires.txt +9 -0
  5. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/arkindex_worker/__init__.py +0 -1
  6. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/arkindex_worker/cache.py +19 -25
  7. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/arkindex_worker/image.py +16 -17
  8. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/arkindex_worker/models.py +24 -21
  9. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/arkindex_worker/utils.py +18 -19
  10. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/arkindex_worker/worker/__init__.py +17 -27
  11. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/arkindex_worker/worker/base.py +12 -7
  12. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/arkindex_worker/worker/classification.py +13 -15
  13. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/arkindex_worker/worker/dataset.py +3 -4
  14. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/arkindex_worker/worker/element.py +80 -76
  15. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/arkindex_worker/worker/entity.py +28 -30
  16. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/arkindex_worker/worker/metadata.py +21 -27
  17. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/arkindex_worker/worker/task.py +2 -3
  18. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/arkindex_worker/worker/training.py +25 -26
  19. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/arkindex_worker/worker/transcription.py +37 -34
  20. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/arkindex_worker/worker/version.py +1 -2
  21. arkindex-base-worker-0.3.6rc2/pyproject.toml +83 -0
  22. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/setup.py +2 -12
  23. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/tests/conftest.py +56 -76
  24. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/tests/test_base_worker.py +38 -32
  25. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/tests/test_cache.py +14 -7
  26. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/tests/test_dataset_worker.py +25 -22
  27. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/tests/test_element.py +0 -1
  28. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/tests/test_elements_worker/__init__.py +0 -1
  29. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/tests/test_elements_worker/test_classifications.py +0 -1
  30. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/tests/test_elements_worker/test_cli.py +22 -17
  31. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/tests/test_elements_worker/test_dataset.py +9 -10
  32. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/tests/test_elements_worker/test_elements.py +58 -63
  33. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/tests/test_elements_worker/test_entities.py +10 -20
  34. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/tests/test_elements_worker/test_metadata.py +72 -96
  35. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/tests/test_elements_worker/test_task.py +22 -20
  36. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/tests/test_elements_worker/test_training.py +20 -13
  37. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/tests/test_elements_worker/test_transcriptions.py +6 -10
  38. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/tests/test_elements_worker/test_worker.py +16 -14
  39. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/tests/test_image.py +21 -20
  40. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/tests/test_merge.py +5 -6
  41. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/tests/test_utils.py +0 -1
  42. arkindex-base-worker-0.3.5rc6/PKG-INFO +0 -26
  43. arkindex-base-worker-0.3.5rc6/arkindex_base_worker.egg-info/PKG-INFO +0 -26
  44. arkindex-base-worker-0.3.5rc6/arkindex_base_worker.egg-info/requires.txt +0 -19
  45. arkindex-base-worker-0.3.5rc6/arkindex_worker/git.py +0 -392
  46. arkindex-base-worker-0.3.5rc6/pyproject.toml +0 -24
  47. arkindex-base-worker-0.3.5rc6/tests/test_git.py +0 -480
  48. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/README.md +0 -0
  49. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/arkindex_base_worker.egg-info/dependency_links.txt +0 -0
  50. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/arkindex_base_worker.egg-info/top_level.txt +0 -0
  51. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/setup.cfg +0 -0
  52. {arkindex-base-worker-0.3.5rc6 → arkindex-base-worker-0.3.6rc2}/tests/__init__.py +0 -0
@@ -0,0 +1,39 @@
1
+ Metadata-Version: 2.1
2
+ Name: arkindex-base-worker
3
+ Version: 0.3.6rc2
4
+ Summary: Base Worker to easily build Arkindex ML workflows
5
+ Author-email: Teklia <contact@teklia.com>
6
+ Maintainer-email: Teklia <contact@teklia.com>
7
+ Project-URL: Homepage, https://workers.arkindex.org
8
+ Project-URL: Documentation, https://workers.arkindex.org
9
+ Project-URL: Repository, https://gitlab.teklia.com/workers/base-worker
10
+ Project-URL: Bug Tracker, https://gitlab.teklia.com/workers/base-worker/issues
11
+ Project-URL: Authors, https://teklia.com
12
+ Keywords: python
13
+ Classifier: Development Status :: 5 - Production/Stable
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3 :: Only
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Topic :: Text Processing :: Linguistic
19
+ Requires-Python: >=3.10
20
+ Description-Content-Type: text/markdown
21
+ Provides-Extra: docs
22
+ Requires-Dist: black==23.11.0; extra == "docs"
23
+ Requires-Dist: doc8==1.1.1; extra == "docs"
24
+ Requires-Dist: mkdocs==1.5.3; extra == "docs"
25
+ Requires-Dist: mkdocs-material==9.4.8; extra == "docs"
26
+ Requires-Dist: mkdocstrings==0.23.0; extra == "docs"
27
+ Requires-Dist: mkdocstrings-python==1.7.3; extra == "docs"
28
+ Requires-Dist: recommonmark==0.7.1; extra == "docs"
29
+
30
+ # Arkindex base Worker
31
+
32
+ An easy to use Python 3 high level API client, to build ML tasks.
33
+
34
+ ## Create a new worker using our template
35
+
36
+ ```
37
+ pip install --user cookiecutter
38
+ cookiecutter git@gitlab.teklia.com:workers/base-worker.git
39
+ ```
@@ -0,0 +1,39 @@
1
+ Metadata-Version: 2.1
2
+ Name: arkindex-base-worker
3
+ Version: 0.3.6rc2
4
+ Summary: Base Worker to easily build Arkindex ML workflows
5
+ Author-email: Teklia <contact@teklia.com>
6
+ Maintainer-email: Teklia <contact@teklia.com>
7
+ Project-URL: Homepage, https://workers.arkindex.org
8
+ Project-URL: Documentation, https://workers.arkindex.org
9
+ Project-URL: Repository, https://gitlab.teklia.com/workers/base-worker
10
+ Project-URL: Bug Tracker, https://gitlab.teklia.com/workers/base-worker/issues
11
+ Project-URL: Authors, https://teklia.com
12
+ Keywords: python
13
+ Classifier: Development Status :: 5 - Production/Stable
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3 :: Only
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Topic :: Text Processing :: Linguistic
19
+ Requires-Python: >=3.10
20
+ Description-Content-Type: text/markdown
21
+ Provides-Extra: docs
22
+ Requires-Dist: black==23.11.0; extra == "docs"
23
+ Requires-Dist: doc8==1.1.1; extra == "docs"
24
+ Requires-Dist: mkdocs==1.5.3; extra == "docs"
25
+ Requires-Dist: mkdocs-material==9.4.8; extra == "docs"
26
+ Requires-Dist: mkdocstrings==0.23.0; extra == "docs"
27
+ Requires-Dist: mkdocstrings-python==1.7.3; extra == "docs"
28
+ Requires-Dist: recommonmark==0.7.1; extra == "docs"
29
+
30
+ # Arkindex base Worker
31
+
32
+ An easy to use Python 3 high level API client, to build ML tasks.
33
+
34
+ ## Create a new worker using our template
35
+
36
+ ```
37
+ pip install --user cookiecutter
38
+ cookiecutter git@gitlab.teklia.com:workers/base-worker.git
39
+ ```
@@ -9,7 +9,6 @@ arkindex_base_worker.egg-info/requires.txt
9
9
  arkindex_base_worker.egg-info/top_level.txt
10
10
  arkindex_worker/__init__.py
11
11
  arkindex_worker/cache.py
12
- arkindex_worker/git.py
13
12
  arkindex_worker/image.py
14
13
  arkindex_worker/models.py
15
14
  arkindex_worker/utils.py
@@ -30,7 +29,6 @@ tests/test_base_worker.py
30
29
  tests/test_cache.py
31
30
  tests/test_dataset_worker.py
32
31
  tests/test_element.py
33
- tests/test_git.py
34
32
  tests/test_image.py
35
33
  tests/test_merge.py
36
34
  tests/test_utils.py
@@ -0,0 +1,9 @@
1
+
2
+ [docs]
3
+ black==23.11.0
4
+ doc8==1.1.1
5
+ mkdocs==1.5.3
6
+ mkdocs-material==9.4.8
7
+ mkdocstrings==0.23.0
8
+ mkdocstrings-python==1.7.3
9
+ recommonmark==0.7.1
@@ -1,4 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
1
  import logging
3
2
 
4
3
  logging.basicConfig(
@@ -1,4 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
1
  """
3
2
  Database mappings and helper methods for the experimental worker caching feature.
4
3
 
@@ -10,7 +9,6 @@ reducing network usage.
10
9
  import json
11
10
  import sqlite3
12
11
  from pathlib import Path
13
- from typing import Optional, Union
14
12
 
15
13
  from peewee import (
16
14
  SQL,
@@ -106,8 +104,8 @@ class CachedElement(Model):
106
104
  def open_image(
107
105
  self,
108
106
  *args,
109
- max_width: Optional[int] = None,
110
- max_height: Optional[int] = None,
107
+ max_width: int | None = None,
108
+ max_height: int | None = None,
111
109
  **kwargs,
112
110
  ) -> Image:
113
111
  """
@@ -145,17 +143,15 @@ class CachedElement(Model):
145
143
  if max_width is None and max_height is None:
146
144
  resize = "full"
147
145
  else:
148
- # Do not resize for polygons that do not exactly match the images
149
- # as the resize is made directly by the IIIF server using the box parameter
150
146
  if (
147
+ # Do not resize for polygons that do not exactly match the images
148
+ # as the resize is made directly by the IIIF server using the box parameter
151
149
  bounding_box.width != self.image.width
152
150
  or bounding_box.height != self.image.height
153
- ):
154
- resize = "full"
155
-
156
- # Do not resize when the image is below the maximum size
157
- elif (max_width is None or self.image.width <= max_width) and (
158
- max_height is None or self.image.height <= max_height
151
+ ) or (
152
+ # Do not resize when the image is below the maximum size
153
+ (max_width is None or self.image.width <= max_width)
154
+ and (max_height is None or self.image.height <= max_height)
159
155
  ):
160
156
  resize = "full"
161
157
  else:
@@ -319,22 +315,21 @@ def create_version_table():
319
315
  Version.create(version=SQL_VERSION)
320
316
 
321
317
 
322
- def check_version(cache_path: Union[str, Path]):
318
+ def check_version(cache_path: str | Path):
323
319
  """
324
320
  Check the validity of the SQLite version
325
321
 
326
322
  :param cache_path: Path towards a local SQLite database
327
323
  """
328
- with SqliteDatabase(cache_path) as provided_db:
329
- with provided_db.bind_ctx([Version]):
330
- try:
331
- version = Version.get().version
332
- except OperationalError:
333
- version = None
324
+ with SqliteDatabase(cache_path) as provided_db, provided_db.bind_ctx([Version]):
325
+ try:
326
+ version = Version.get().version
327
+ except OperationalError:
328
+ version = None
334
329
 
335
- assert (
336
- version == SQL_VERSION
337
- ), f"The SQLite database {cache_path} does not have the correct cache version, it should be {SQL_VERSION}"
330
+ assert (
331
+ version == SQL_VERSION
332
+ ), f"The SQLite database {cache_path} does not have the correct cache version, it should be {SQL_VERSION}"
338
333
 
339
334
 
340
335
  def merge_parents_cache(paths: list, current_database: Path):
@@ -358,9 +353,8 @@ def merge_parents_cache(paths: list, current_database: Path):
358
353
  # Check that the parent cache uses a compatible version
359
354
  check_version(path)
360
355
 
361
- with SqliteDatabase(path) as source:
362
- with source.bind_ctx(MODELS):
363
- source.create_tables(MODELS)
356
+ with SqliteDatabase(path) as source, source.bind_ctx(MODELS):
357
+ source.create_tables(MODELS)
364
358
 
365
359
  logger.info(f"Merging parent db {path} into {current_database}")
366
360
  statements = [
@@ -1,4 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
1
  """
3
2
  Helper methods to download and open IIIF images, and manage polygons.
4
3
  """
@@ -7,7 +6,7 @@ from collections import namedtuple
7
6
  from io import BytesIO
8
7
  from math import ceil
9
8
  from pathlib import Path
10
- from typing import TYPE_CHECKING, List, Optional, Union
9
+ from typing import TYPE_CHECKING
11
10
 
12
11
  import requests
13
12
  from PIL import Image
@@ -42,9 +41,9 @@ IIIF_MAX = "max"
42
41
 
43
42
  def open_image(
44
43
  path: str,
45
- mode: Optional[str] = "RGB",
46
- rotation_angle: Optional[int] = 0,
47
- mirrored: Optional[bool] = False,
44
+ mode: str | None = "RGB",
45
+ rotation_angle: int | None = 0,
46
+ mirrored: bool | None = False,
48
47
  ) -> Image:
49
48
  """
50
49
  Open an image from a path or a URL.
@@ -71,7 +70,7 @@ def open_image(
71
70
  else:
72
71
  try:
73
72
  image = Image.open(path)
74
- except (IOError, ValueError):
73
+ except (OSError, ValueError):
75
74
  image = download_image(path)
76
75
 
77
76
  if image.mode != mode:
@@ -141,14 +140,14 @@ def download_image(url: str) -> Image:
141
140
  return image
142
141
 
143
142
 
144
- def polygon_bounding_box(polygon: List[List[Union[int, float]]]) -> BoundingBox:
143
+ def polygon_bounding_box(polygon: list[list[int | float]]) -> BoundingBox:
145
144
  """
146
145
  Compute the rectangle bounding box of a polygon.
147
146
 
148
147
  :param polygon: Polygon to get the bounding box of.
149
148
  :returns: Bounding box of this polygon.
150
149
  """
151
- x_coords, y_coords = zip(*polygon)
150
+ x_coords, y_coords = zip(*polygon, strict=True)
152
151
  x, y = min(x_coords), min(y_coords)
153
152
  width, height = max(x_coords) - x, max(y_coords) - y
154
153
  return BoundingBox(x, y, width, height)
@@ -248,8 +247,8 @@ def download_tiles(url: str) -> Image:
248
247
 
249
248
 
250
249
  def trim_polygon(
251
- polygon: List[List[int]], image_width: int, image_height: int
252
- ) -> List[List[int]]:
250
+ polygon: list[list[int]], image_width: int, image_height: int
251
+ ) -> list[list[int]]:
253
252
  """
254
253
  Trim a polygon to an image's boundaries, with non-negative coordinates.
255
254
 
@@ -265,10 +264,10 @@ def trim_polygon(
265
264
  """
266
265
 
267
266
  assert isinstance(
268
- polygon, (list, tuple)
267
+ polygon, list | tuple
269
268
  ), "Input polygon must be a valid list or tuple of points."
270
269
  assert all(
271
- isinstance(point, (list, tuple)) for point in polygon
270
+ isinstance(point, list | tuple) for point in polygon
272
271
  ), "Polygon points must be tuples or lists."
273
272
  assert all(
274
273
  len(point) == 2 for point in polygon
@@ -301,10 +300,10 @@ def trim_polygon(
301
300
 
302
301
 
303
302
  def revert_orientation(
304
- element: Union["Element", "CachedElement"],
305
- polygon: List[List[Union[int, float]]],
306
- reverse: Optional[bool] = False,
307
- ) -> List[List[int]]:
303
+ element: "Element | CachedElement",
304
+ polygon: list[list[int | float]],
305
+ reverse: bool = False,
306
+ ) -> list[list[int]]:
308
307
  """
309
308
  Update the coordinates of the polygon of a child element based on the orientation of
310
309
  its parent.
@@ -324,7 +323,7 @@ def revert_orientation(
324
323
  from arkindex_worker.models import Element
325
324
 
326
325
  assert element and isinstance(
327
- element, (Element, CachedElement)
326
+ element, Element | CachedElement
328
327
  ), "element shouldn't be null and should be an Element or CachedElement"
329
328
  assert polygon and isinstance(
330
329
  polygon, list
@@ -1,11 +1,10 @@
1
- # -*- coding: utf-8 -*-
2
1
  """
3
2
  Wrappers around API results to provide more convenient attribute access and IIIF helpers.
4
3
  """
5
4
 
6
5
  import tempfile
6
+ from collections.abc import Generator
7
7
  from contextlib import contextmanager
8
- from typing import Generator, List, Optional
9
8
 
10
9
  from PIL import Image
11
10
  from requests import HTTPError
@@ -34,10 +33,10 @@ class MagicDict(dict):
34
33
  def __getattr__(self, name):
35
34
  try:
36
35
  return self[name]
37
- except KeyError:
36
+ except KeyError as e:
38
37
  raise AttributeError(
39
- "{} object has no attribute '{}'".format(self.__class__.__name__, name)
40
- )
38
+ f"{self.__class__.__name__} object has no attribute '{name}'"
39
+ ) from e
41
40
 
42
41
  def __setattr__(self, name, value):
43
42
  return super().__setitem__(name, value)
@@ -74,7 +73,7 @@ class Element(MagicDict):
74
73
  parts[-3] = size
75
74
  return "/".join(parts)
76
75
 
77
- def image_url(self, size: str = "full") -> Optional[str]:
76
+ def image_url(self, size: str = "full") -> str | None:
78
77
  """
79
78
  Build an URL to access the image.
80
79
  When possible, will return the S3 URL for images, so an ML worker can bypass IIIF servers.
@@ -89,10 +88,10 @@ class Element(MagicDict):
89
88
  url = self.zone.image.url
90
89
  if not url.endswith("/"):
91
90
  url += "/"
92
- return "{}full/{}/0/default.jpg".format(url, size)
91
+ return f"{url}full/{size}/0/default.jpg"
93
92
 
94
93
  @property
95
- def polygon(self) -> List[float]:
94
+ def polygon(self) -> list[float]:
96
95
  """
97
96
  Access an Element's polygon.
98
97
  This is a shortcut to an Element's polygon, normally accessed via
@@ -101,7 +100,7 @@ class Element(MagicDict):
101
100
  the [CachedElement][arkindex_worker.cache.CachedElement].polygon field.
102
101
  """
103
102
  if not self.get("zone"):
104
- raise ValueError("Element {} has no zone".format(self.id))
103
+ raise ValueError(f"Element {self.id} has no zone")
105
104
  return self.zone.polygon
106
105
 
107
106
  @property
@@ -122,11 +121,11 @@ class Element(MagicDict):
122
121
  def open_image(
123
122
  self,
124
123
  *args,
125
- max_width: Optional[int] = None,
126
- max_height: Optional[int] = None,
127
- use_full_image: Optional[bool] = False,
124
+ max_width: int | None = None,
125
+ max_height: int | None = None,
126
+ use_full_image: bool | None = False,
128
127
  **kwargs,
129
- ) -> Image:
128
+ ) -> Image.Image:
130
129
  """
131
130
  Open this element's image using Pillow, rotating and mirroring it according
132
131
  to the ``rotation_angle`` and ``mirrored`` attributes.
@@ -173,7 +172,7 @@ class Element(MagicDict):
173
172
  )
174
173
 
175
174
  if not self.get("zone"):
176
- raise ValueError("Element {} has no zone".format(self.id))
175
+ raise ValueError(f"Element {self.id} has no zone")
177
176
 
178
177
  if self.requires_tiles:
179
178
  if max_width is None and max_height is None:
@@ -194,10 +193,7 @@ class Element(MagicDict):
194
193
  else:
195
194
  resize = f"{max_width or ''},{max_height or ''}"
196
195
 
197
- if use_full_image:
198
- url = self.image_url(resize)
199
- else:
200
- url = self.resize_zone_url(resize)
196
+ url = self.image_url(resize) if use_full_image else self.resize_zone_url(resize)
201
197
 
202
198
  try:
203
199
  return open_image(
@@ -215,13 +211,13 @@ class Element(MagicDict):
215
211
  # This element uses an S3 URL: the URL may have expired.
216
212
  # Call the API to get a fresh URL and try again
217
213
  # TODO: this should be done by the worker
218
- raise NotImplementedError
214
+ raise NotImplementedError from e
219
215
  return open_image(self.image_url(resize), *args, **kwargs)
220
216
  raise
221
217
 
222
218
  @contextmanager
223
219
  def open_image_tempfile(
224
- self, format: Optional[str] = "jpeg", *args, **kwargs
220
+ self, format: str | None = "jpeg", *args, **kwargs
225
221
  ) -> Generator[tempfile.NamedTemporaryFile, None, None]:
226
222
  """
227
223
  Get the element's image as a temporary file stored on the disk.
@@ -249,7 +245,7 @@ class Element(MagicDict):
249
245
  type_name = self.type["display_name"]
250
246
  else:
251
247
  type_name = str(self.type)
252
- return "{} {} ({})".format(type_name, self.name, self.id)
248
+ return f"{type_name} {self.name} ({self.id})"
253
249
 
254
250
 
255
251
  class ArkindexModel(MagicDict):
@@ -268,6 +264,13 @@ class Dataset(ArkindexModel):
268
264
  Describes an Arkindex dataset.
269
265
  """
270
266
 
267
+ @property
268
+ def filepath(self) -> str:
269
+ """
270
+ Generic filepath to the Dataset compressed archive.
271
+ """
272
+ return f"{self.id}.tar.zst"
273
+
271
274
 
272
275
  class Artifact(ArkindexModel):
273
276
  """
@@ -1,11 +1,9 @@
1
- # -*- coding: utf-8 -*-
2
1
  import hashlib
3
2
  import logging
4
3
  import os
5
4
  import tarfile
6
5
  import tempfile
7
6
  from pathlib import Path
8
- from typing import Optional, Tuple, Union
9
7
 
10
8
  import zstandard
11
9
  import zstandard as zstd
@@ -16,7 +14,7 @@ CHUNK_SIZE = 1024
16
14
  """Chunk Size used for ZSTD compression"""
17
15
 
18
16
 
19
- def decompress_zst_archive(compressed_archive: Path) -> Tuple[int, Path]:
17
+ def decompress_zst_archive(compressed_archive: Path) -> tuple[int, Path]:
20
18
  """
21
19
  Decompress a ZST-compressed tar archive in data dir. The tar archive is not extracted.
22
20
  This returns the path to the archive and the file descriptor.
@@ -29,18 +27,19 @@ def decompress_zst_archive(compressed_archive: Path) -> Tuple[int, Path]:
29
27
  """
30
28
  dctx = zstandard.ZstdDecompressor()
31
29
  archive_fd, archive_path = tempfile.mkstemp(suffix=".tar")
30
+ archive_path = Path(archive_path)
32
31
 
33
32
  logger.debug(f"Uncompressing file to {archive_path}")
34
33
  try:
35
- with open(compressed_archive, "rb") as compressed, open(
36
- archive_path, "wb"
34
+ with compressed_archive.open("rb") as compressed, archive_path.open(
35
+ "wb"
37
36
  ) as decompressed:
38
37
  dctx.copy_stream(compressed, decompressed)
39
38
  logger.debug(f"Successfully uncompressed archive {compressed_archive}")
40
39
  except zstandard.ZstdError as e:
41
- raise Exception(f"Couldn't uncompressed archive: {e}")
40
+ raise Exception(f"Couldn't uncompressed archive: {e}") from e
42
41
 
43
- return archive_fd, Path(archive_path)
42
+ return archive_fd, archive_path
44
43
 
45
44
 
46
45
  def extract_tar_archive(archive_path: Path, destination: Path):
@@ -54,12 +53,12 @@ def extract_tar_archive(archive_path: Path, destination: Path):
54
53
  with tarfile.open(archive_path) as tar_archive:
55
54
  tar_archive.extractall(destination)
56
55
  except tarfile.ReadError as e:
57
- raise Exception(f"Couldn't handle the decompressed Tar archive: {e}")
56
+ raise Exception(f"Couldn't handle the decompressed Tar archive: {e}") from e
58
57
 
59
58
 
60
59
  def extract_tar_zst_archive(
61
60
  compressed_archive: Path, destination: Path
62
- ) -> Tuple[int, Path]:
61
+ ) -> tuple[int, Path]:
63
62
  """
64
63
  Extract a ZST-compressed tar archive's content to a specific destination
65
64
 
@@ -89,8 +88,8 @@ def close_delete_file(file_descriptor: int, file_path: Path):
89
88
 
90
89
 
91
90
  def zstd_compress(
92
- source: Path, destination: Optional[Path] = None
93
- ) -> Tuple[Union[int, None], Path, str]:
91
+ source: Path, destination: Path | None = None
92
+ ) -> tuple[int | None, Path, str]:
94
93
  """Compress a file using the Zstandard compression algorithm.
95
94
 
96
95
  :param source: Path to the file to compress.
@@ -117,13 +116,13 @@ def zstd_compress(
117
116
  archive_file.write(compressed_chunk)
118
117
  logger.debug(f"Successfully compressed {source}")
119
118
  except zstandard.ZstdError as e:
120
- raise Exception(f"Couldn't compress archive: {e}")
119
+ raise Exception(f"Couldn't compress archive: {e}") from e
121
120
  return file_d, destination, archive_hasher.hexdigest()
122
121
 
123
122
 
124
123
  def create_tar_archive(
125
- path: Path, destination: Optional[Path] = None
126
- ) -> Tuple[Union[int, None], Path, str]:
124
+ path: Path, destination: Path | None = None
125
+ ) -> tuple[int | None, Path, str]:
127
126
  """Create a tar archive using the content at specified location.
128
127
 
129
128
  :param path: Path to the file to archive
@@ -153,7 +152,7 @@ def create_tar_archive(
153
152
  files.append(p)
154
153
  logger.debug(f"Successfully created Tar archive from files @ {path}")
155
154
  except tarfile.TarError as e:
156
- raise Exception(f"Couldn't create Tar archive: {e}")
155
+ raise Exception(f"Couldn't create Tar archive: {e}") from e
157
156
 
158
157
  # Sort by path
159
158
  files.sort()
@@ -168,8 +167,8 @@ def create_tar_archive(
168
167
 
169
168
 
170
169
  def create_tar_zst_archive(
171
- source: Path, destination: Optional[Path] = None
172
- ) -> Tuple[Union[int, None], Path, str, str]:
170
+ source: Path, destination: Path | None = None
171
+ ) -> tuple[int | None, Path, str, str]:
173
172
  """Helper to create a TAR+ZST archive from a source folder.
174
173
 
175
174
  :param source: Path to the folder whose content should be archived.
@@ -179,8 +178,8 @@ def create_tar_zst_archive(
179
178
  # Create tar archive
180
179
  tar_fd, tar_archive, tar_hash = create_tar_archive(source)
181
180
 
182
- zstd_fd, zstd_archive, zstd_hash = zstd_compress(tar_archive, destination)
181
+ zst_fd, zst_archive, zst_hash = zstd_compress(tar_archive, destination)
183
182
 
184
183
  close_delete_file(tar_fd, tar_archive)
185
184
 
186
- return zstd_fd, zstd_archive, zstd_hash, tar_hash
185
+ return zst_fd, zst_archive, zst_hash, tar_hash