arkindex-base-worker 0.3.5rc6__py3-none-any.whl → 0.3.6rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. arkindex_base_worker-0.3.6rc2.dist-info/METADATA +39 -0
  2. arkindex_base_worker-0.3.6rc2.dist-info/RECORD +40 -0
  3. arkindex_worker/__init__.py +0 -1
  4. arkindex_worker/cache.py +19 -25
  5. arkindex_worker/image.py +16 -17
  6. arkindex_worker/models.py +24 -21
  7. arkindex_worker/utils.py +18 -19
  8. arkindex_worker/worker/__init__.py +17 -27
  9. arkindex_worker/worker/base.py +12 -7
  10. arkindex_worker/worker/classification.py +13 -15
  11. arkindex_worker/worker/dataset.py +3 -4
  12. arkindex_worker/worker/element.py +80 -76
  13. arkindex_worker/worker/entity.py +28 -30
  14. arkindex_worker/worker/metadata.py +21 -27
  15. arkindex_worker/worker/task.py +2 -3
  16. arkindex_worker/worker/training.py +25 -26
  17. arkindex_worker/worker/transcription.py +37 -34
  18. arkindex_worker/worker/version.py +1 -2
  19. tests/conftest.py +56 -76
  20. tests/test_base_worker.py +38 -32
  21. tests/test_cache.py +14 -7
  22. tests/test_dataset_worker.py +25 -22
  23. tests/test_element.py +0 -1
  24. tests/test_elements_worker/__init__.py +0 -1
  25. tests/test_elements_worker/test_classifications.py +0 -1
  26. tests/test_elements_worker/test_cli.py +22 -17
  27. tests/test_elements_worker/test_dataset.py +9 -10
  28. tests/test_elements_worker/test_elements.py +58 -63
  29. tests/test_elements_worker/test_entities.py +10 -20
  30. tests/test_elements_worker/test_metadata.py +72 -96
  31. tests/test_elements_worker/test_task.py +22 -20
  32. tests/test_elements_worker/test_training.py +20 -13
  33. tests/test_elements_worker/test_transcriptions.py +6 -10
  34. tests/test_elements_worker/test_worker.py +16 -14
  35. tests/test_image.py +21 -20
  36. tests/test_merge.py +5 -6
  37. tests/test_utils.py +0 -1
  38. arkindex_base_worker-0.3.5rc6.dist-info/METADATA +0 -27
  39. arkindex_base_worker-0.3.5rc6.dist-info/RECORD +0 -42
  40. arkindex_worker/git.py +0 -392
  41. tests/test_git.py +0 -480
  42. {arkindex_base_worker-0.3.5rc6.dist-info → arkindex_base_worker-0.3.6rc2.dist-info}/WHEEL +0 -0
  43. {arkindex_base_worker-0.3.5rc6.dist-info → arkindex_base_worker-0.3.6rc2.dist-info}/top_level.txt +0 -0
@@ -1,10 +1,8 @@
1
- # -*- coding: utf-8 -*-
2
1
  """
3
2
  ElementsWorker methods for metadata.
4
3
  """
5
4
 
6
5
  from enum import Enum
7
- from typing import Dict, List, Optional, Union
8
6
 
9
7
  from arkindex_worker import logger
10
8
  from arkindex_worker.cache import CachedElement
@@ -57,14 +55,14 @@ class MetaType(Enum):
57
55
  """
58
56
 
59
57
 
60
- class MetaDataMixin(object):
58
+ class MetaDataMixin:
61
59
  def create_metadata(
62
60
  self,
63
- element: Union[Element, CachedElement],
61
+ element: Element | CachedElement,
64
62
  type: MetaType,
65
63
  name: str,
66
64
  value: str,
67
- entity: Optional[str] = None,
65
+ entity: str | None = None,
68
66
  ) -> str:
69
67
  """
70
68
  Create a metadata on the given element through API.
@@ -77,7 +75,7 @@ class MetaDataMixin(object):
77
75
  :returns: UUID of the created metadata.
78
76
  """
79
77
  assert element and isinstance(
80
- element, (Element, CachedElement)
78
+ element, Element | CachedElement
81
79
  ), "element shouldn't be null and should be of type Element or CachedElement"
82
80
  assert type and isinstance(
83
81
  type, MetaType
@@ -110,26 +108,22 @@ class MetaDataMixin(object):
110
108
 
111
109
  def create_metadatas(
112
110
  self,
113
- element: Union[Element, CachedElement],
114
- metadatas: List[
115
- Dict[
116
- str, Union[MetaType, str, Union[str, Union[int, float]], Optional[str]]
117
- ]
118
- ],
119
- ) -> List[Dict[str, str]]:
111
+ element: Element | CachedElement,
112
+ metadatas: list[dict[str, MetaType | str | int | float | None]],
113
+ ) -> list[dict[str, str]]:
120
114
  """
121
- Create multiple metadatas on an existing element.
115
+ Create multiple metadata on an existing element.
122
116
  This method does not support cache.
123
117
 
124
- :param element Element: The element to create multiple metadata on.
125
- :param metadata_list List(Dict): The list of dict whose keys are the following:
126
- - type : MetaType
127
- - name : str
128
- - value : Union[str, Union[int, float]]
129
- - entity_id : Union[str, None]
118
+ :param element: The element to create multiple metadata on.
119
+ :param metadatas: The list of dict whose keys are the following:
120
+ - type: MetaType
121
+ - name: str
122
+ - value: str | int | float
123
+ - entity_id: str | None
130
124
  """
131
125
  assert element and isinstance(
132
- element, (Element, CachedElement)
126
+ element, Element | CachedElement
133
127
  ), "element shouldn't be null and should be of type Element or CachedElement"
134
128
 
135
129
  assert metadatas and isinstance(
@@ -152,7 +146,7 @@ class MetaDataMixin(object):
152
146
  ), "name shouldn't be null and should be of type str"
153
147
 
154
148
  assert metadata.get("value") is not None and isinstance(
155
- metadata.get("value"), (str, float, int)
149
+ metadata.get("value"), str | float | int
156
150
  ), "value shouldn't be null and should be of type (str or float or int)"
157
151
 
158
152
  assert metadata.get("entity_id") is None or isinstance(
@@ -172,7 +166,7 @@ class MetaDataMixin(object):
172
166
  logger.warning("Cannot create metadata as this worker is in read-only mode")
173
167
  return
174
168
 
175
- created_metadatas = self.request(
169
+ created_metadata_list = self.request(
176
170
  "CreateMetaDataBulk",
177
171
  id=element.id,
178
172
  body={
@@ -181,11 +175,11 @@ class MetaDataMixin(object):
181
175
  },
182
176
  )["metadata_list"]
183
177
 
184
- return created_metadatas
178
+ return created_metadata_list
185
179
 
186
180
  def list_element_metadata(
187
- self, element: Union[Element, CachedElement]
188
- ) -> List[Dict[str, str]]:
181
+ self, element: Element | CachedElement
182
+ ) -> list[dict[str, str]]:
189
183
  """
190
184
  List all metadata linked to an element.
191
185
  This method does not support cache.
@@ -193,7 +187,7 @@ class MetaDataMixin(object):
193
187
  :param element: The element to list metadata on.
194
188
  """
195
189
  assert element and isinstance(
196
- element, (Element, CachedElement)
190
+ element, Element | CachedElement
197
191
  ), "element shouldn't be null and should be of type Element or CachedElement"
198
192
 
199
193
  return self.api_client.paginate("ListElementMetaData", id=element.id)
@@ -1,17 +1,16 @@
1
- # -*- coding: utf-8 -*-
2
1
  """
3
2
  BaseWorker methods for tasks.
4
3
  """
5
4
 
6
5
  import uuid
7
- from typing import Iterator
6
+ from collections.abc import Iterator
8
7
 
9
8
  from apistar.compat import DownloadedFile
10
9
 
11
10
  from arkindex_worker.models import Artifact
12
11
 
13
12
 
14
- class TaskMixin(object):
13
+ class TaskMixin:
15
14
  def list_artifacts(self, task_id: uuid.UUID) -> Iterator[Artifact]:
16
15
  """
17
16
  List artifacts associated to a task.
@@ -1,4 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
1
  """
3
2
  BaseWorker methods for training.
4
3
  """
@@ -6,7 +5,7 @@ BaseWorker methods for training.
6
5
  import functools
7
6
  from contextlib import contextmanager
8
7
  from pathlib import Path
9
- from typing import NewType, Optional, Tuple, Union
8
+ from typing import NewType
10
9
  from uuid import UUID
11
10
 
12
11
  import requests
@@ -26,7 +25,7 @@ FileSize = NewType("FileSize", int)
26
25
 
27
26
 
28
27
  @contextmanager
29
- def create_archive(path: DirPath) -> Tuple[Path, Hash, FileSize, Hash]:
28
+ def create_archive(path: DirPath) -> tuple[Path, Hash, FileSize, Hash]:
30
29
  """
31
30
  Create a tar archive from the files at the given location then compress it to a zst archive.
32
31
 
@@ -37,15 +36,15 @@ def create_archive(path: DirPath) -> Tuple[Path, Hash, FileSize, Hash]:
37
36
  """
38
37
  assert path.is_dir(), "create_archive needs a directory"
39
38
 
40
- zstd_descriptor, zstd_archive, archive_hash, content_hash = create_tar_zst_archive(
39
+ zst_descriptor, zst_archive, archive_hash, content_hash = create_tar_zst_archive(
41
40
  path
42
41
  )
43
42
 
44
43
  # Get content hash, archive size and hash
45
- yield zstd_archive, content_hash, zstd_archive.stat().st_size, archive_hash
44
+ yield zst_archive, content_hash, zst_archive.stat().st_size, archive_hash
46
45
 
47
- # Remove the zstd archive
48
- close_delete_file(zstd_descriptor, zstd_archive)
46
+ # Remove the zst archive
47
+ close_delete_file(zst_descriptor, zst_archive)
49
48
 
50
49
 
51
50
  def build_clean_payload(**kwargs):
@@ -72,7 +71,7 @@ def skip_if_read_only(func):
72
71
  return wrapper
73
72
 
74
73
 
75
- class TrainingMixin(object):
74
+ class TrainingMixin:
76
75
  """
77
76
  A mixin helper to create a new model version easily.
78
77
  You may use `publish_model_version` to publish a ready model version directly, or
@@ -87,10 +86,10 @@ class TrainingMixin(object):
87
86
  self,
88
87
  model_path: DirPath,
89
88
  model_id: str,
90
- tag: Optional[str] = None,
91
- description: Optional[str] = None,
92
- configuration: Optional[dict] = {},
93
- parent: Optional[Union[str, UUID]] = None,
89
+ tag: str | None = None,
90
+ description: str | None = None,
91
+ configuration: dict | None = None,
92
+ parent: str | UUID | None = None,
94
93
  ):
95
94
  """
96
95
  Publish a unique version of a model in Arkindex, identified by its hash.
@@ -105,6 +104,7 @@ class TrainingMixin(object):
105
104
  :param parent: ID of the parent model version
106
105
  """
107
106
 
107
+ configuration = configuration or {}
108
108
  if not self.model_version:
109
109
  self.create_model_version(
110
110
  model_id=model_id,
@@ -161,10 +161,10 @@ class TrainingMixin(object):
161
161
  def create_model_version(
162
162
  self,
163
163
  model_id: str,
164
- tag: Optional[str] = None,
165
- description: Optional[str] = None,
166
- configuration: Optional[dict] = {},
167
- parent: Optional[Union[str, UUID]] = None,
164
+ tag: str | None = None,
165
+ description: str | None = None,
166
+ configuration: dict | None = None,
167
+ parent: str | UUID | None = None,
168
168
  ):
169
169
  """
170
170
  Create a new version of the specified model with its base attributes.
@@ -176,6 +176,8 @@ class TrainingMixin(object):
176
176
  :param parent: ID of the parent model version
177
177
  """
178
178
  assert not self.model_version, "A model version has already been created."
179
+
180
+ configuration = configuration or {}
179
181
  self.model_version = self.request(
180
182
  "CreateModelVersion",
181
183
  id=model_id,
@@ -186,6 +188,7 @@ class TrainingMixin(object):
186
188
  parent=parent,
187
189
  ),
188
190
  )
191
+
189
192
  logger.info(
190
193
  f"Model version ({self.model_version['id']}) was successfully created"
191
194
  )
@@ -193,10 +196,10 @@ class TrainingMixin(object):
193
196
  @skip_if_read_only
194
197
  def update_model_version(
195
198
  self,
196
- tag: Optional[str] = None,
197
- description: Optional[str] = None,
198
- configuration: Optional[dict] = None,
199
- parent: Optional[Union[str, UUID]] = None,
199
+ tag: str | None = None,
200
+ description: str | None = None,
201
+ configuration: dict | None = None,
202
+ parent: str | UUID | None = None,
200
203
  ):
201
204
  """
202
205
  Update the current model version with the given attributes.
@@ -235,9 +238,7 @@ class TrainingMixin(object):
235
238
  ), "The model is already marked as available."
236
239
 
237
240
  s3_put_url = self.model_version.get("s3_put_url")
238
- assert (
239
- s3_put_url
240
- ), "S3 PUT URL is not set, please ensure you have the right to validate a model version."
241
+ assert s3_put_url, "S3 PUT URL is not set, please ensure you have the right to validate a model version."
241
242
 
242
243
  logger.info("Uploading to s3...")
243
244
  # Upload the archive on s3
@@ -263,9 +264,7 @@ class TrainingMixin(object):
263
264
  :param size: The size of the uploaded archive
264
265
  :param archive_hash: MD5 hash of the uploaded archive
265
266
  """
266
- assert (
267
- self.model_version
268
- ), "You must create the model version and upload its archive before validating it."
267
+ assert self.model_version, "You must create the model version and upload its archive before validating it."
269
268
  try:
270
269
  self.model_version = self.request(
271
270
  "ValidateModelVersion",
@@ -1,10 +1,9 @@
1
- # -*- coding: utf-8 -*-
2
1
  """
3
2
  ElementsWorker methods for transcriptions.
4
3
  """
5
4
 
5
+ from collections.abc import Iterable
6
6
  from enum import Enum
7
- from typing import Dict, Iterable, List, Optional, Union
8
7
 
9
8
  from peewee import IntegrityError
10
9
 
@@ -40,14 +39,14 @@ class TextOrientation(Enum):
40
39
  """
41
40
 
42
41
 
43
- class TranscriptionMixin(object):
42
+ class TranscriptionMixin:
44
43
  def create_transcription(
45
44
  self,
46
- element: Union[Element, CachedElement],
45
+ element: Element | CachedElement,
47
46
  text: str,
48
47
  confidence: float,
49
48
  orientation: TextOrientation = TextOrientation.HorizontalLeftToRight,
50
- ) -> Optional[Dict[str, Union[str, float]]]:
49
+ ) -> dict[str, str | float] | None:
51
50
  """
52
51
  Create a transcription on the given element through the API.
53
52
 
@@ -59,7 +58,7 @@ class TranscriptionMixin(object):
59
58
  or None if the worker is in read-only mode.
60
59
  """
61
60
  assert element and isinstance(
62
- element, (Element, CachedElement)
61
+ element, Element | CachedElement
63
62
  ), "element shouldn't be null and should be an Element or CachedElement"
64
63
  assert text and isinstance(
65
64
  text, str
@@ -111,8 +110,8 @@ class TranscriptionMixin(object):
111
110
 
112
111
  def create_transcriptions(
113
112
  self,
114
- transcriptions: List[Dict[str, Union[str, float, Optional[TextOrientation]]]],
115
- ) -> List[Dict[str, Union[str, float]]]:
113
+ transcriptions: list[dict[str, str | float | TextOrientation | None]],
114
+ ) -> list[dict[str, str | float]]:
116
115
  """
117
116
  Create multiple transcriptions at once on existing elements through the API,
118
117
  and creates [CachedTranscription][arkindex_worker.cache.CachedTranscription] instances if cache support is enabled.
@@ -140,13 +139,13 @@ class TranscriptionMixin(object):
140
139
 
141
140
  for index, transcription in enumerate(transcriptions_payload):
142
141
  element_id = transcription.get("element_id")
143
- assert element_id and isinstance(
144
- element_id, str
142
+ assert (
143
+ element_id and isinstance(element_id, str)
145
144
  ), f"Transcription at index {index} in transcriptions: element_id shouldn't be null and should be of type str"
146
145
 
147
146
  text = transcription.get("text")
148
- assert text and isinstance(
149
- text, str
147
+ assert (
148
+ text and isinstance(text, str)
150
149
  ), f"Transcription at index {index} in transcriptions: text shouldn't be null and should be of type str"
151
150
 
152
151
  confidence = transcription.get("confidence")
@@ -159,8 +158,8 @@ class TranscriptionMixin(object):
159
158
  orientation = transcription.get(
160
159
  "orientation", TextOrientation.HorizontalLeftToRight
161
160
  )
162
- assert orientation and isinstance(
163
- orientation, TextOrientation
161
+ assert (
162
+ orientation and isinstance(orientation, TextOrientation)
164
163
  ), f"Transcription at index {index} in transcriptions: orientation shouldn't be null and should be of type TextOrientation"
165
164
  if orientation:
166
165
  transcription["orientation"] = orientation.value
@@ -203,10 +202,10 @@ class TranscriptionMixin(object):
203
202
 
204
203
  def create_element_transcriptions(
205
204
  self,
206
- element: Union[Element, CachedElement],
205
+ element: Element | CachedElement,
207
206
  sub_element_type: str,
208
- transcriptions: List[Dict[str, Union[str, float]]],
209
- ) -> Dict[str, Union[str, bool]]:
207
+ transcriptions: list[dict[str, str | float]],
208
+ ) -> dict[str, str | bool]:
210
209
  """
211
210
  Create multiple elements and transcriptions at once on a single parent element through the API.
212
211
 
@@ -228,7 +227,7 @@ class TranscriptionMixin(object):
228
227
  :returns: A list of dicts as returned by the ``CreateElementTranscriptions`` API endpoint.
229
228
  """
230
229
  assert element and isinstance(
231
- element, (Element, CachedElement)
230
+ element, Element | CachedElement
232
231
  ), "element shouldn't be null and should be an Element or CachedElement"
233
232
  assert sub_element_type and isinstance(
234
233
  sub_element_type, str
@@ -242,8 +241,8 @@ class TranscriptionMixin(object):
242
241
 
243
242
  for index, transcription in enumerate(transcriptions_payload):
244
243
  text = transcription.get("text")
245
- assert text and isinstance(
246
- text, str
244
+ assert (
245
+ text and isinstance(text, str)
247
246
  ), f"Transcription at index {index} in transcriptions: text shouldn't be null and should be of type str"
248
247
 
249
248
  confidence = transcription.get("confidence")
@@ -256,15 +255,15 @@ class TranscriptionMixin(object):
256
255
  orientation = transcription.get(
257
256
  "orientation", TextOrientation.HorizontalLeftToRight
258
257
  )
259
- assert orientation and isinstance(
260
- orientation, TextOrientation
258
+ assert (
259
+ orientation and isinstance(orientation, TextOrientation)
261
260
  ), f"Transcription at index {index} in transcriptions: orientation shouldn't be null and should be of type TextOrientation"
262
261
  if orientation:
263
262
  transcription["orientation"] = orientation.value
264
263
 
265
264
  polygon = transcription.get("polygon")
266
- assert polygon and isinstance(
267
- polygon, list
265
+ assert (
266
+ polygon and isinstance(polygon, list)
268
267
  ), f"Transcription at index {index} in transcriptions: polygon shouldn't be null and should be of type list"
269
268
  assert (
270
269
  len(polygon) >= 3
@@ -273,12 +272,16 @@ class TranscriptionMixin(object):
273
272
  isinstance(point, list) and len(point) == 2 for point in polygon
274
273
  ), f"Transcription at index {index} in transcriptions: polygon points should be lists of two items"
275
274
  assert all(
276
- isinstance(coord, (int, float)) for point in polygon for coord in point
275
+ isinstance(coord, int | float) for point in polygon for coord in point
277
276
  ), f"Transcription at index {index} in transcriptions: polygon points should be lists of two numbers"
278
277
 
279
278
  element_confidence = transcription.get("element_confidence")
280
- assert element_confidence is None or (
281
- isinstance(element_confidence, float) and 0 <= element_confidence <= 1
279
+ assert (
280
+ element_confidence is None
281
+ or (
282
+ isinstance(element_confidence, float)
283
+ and 0 <= element_confidence <= 1
284
+ )
282
285
  ), f"Transcription at index {index} in transcriptions: element_confidence should be either null or a float in [0..1] range"
283
286
 
284
287
  if self.is_read_only:
@@ -359,11 +362,11 @@ class TranscriptionMixin(object):
359
362
 
360
363
  def list_transcriptions(
361
364
  self,
362
- element: Union[Element, CachedElement],
363
- element_type: Optional[str] = None,
364
- recursive: Optional[bool] = None,
365
- worker_version: Optional[Union[str, bool]] = None,
366
- ) -> Union[Iterable[dict], Iterable[CachedTranscription]]:
365
+ element: Element | CachedElement,
366
+ element_type: str | None = None,
367
+ recursive: bool | None = None,
368
+ worker_version: str | bool | None = None,
369
+ ) -> Iterable[dict] | Iterable[CachedTranscription]:
367
370
  """
368
371
  List transcriptions on an element.
369
372
 
@@ -375,7 +378,7 @@ class TranscriptionMixin(object):
375
378
  or an iterable of CachedTranscription when cache support is enabled.
376
379
  """
377
380
  assert element and isinstance(
378
- element, (Element, CachedElement)
381
+ element, Element | CachedElement
379
382
  ), "element shouldn't be null and should be an Element or CachedElement"
380
383
  query_params = {}
381
384
  if element_type:
@@ -386,7 +389,7 @@ class TranscriptionMixin(object):
386
389
  query_params["recursive"] = recursive
387
390
  if worker_version is not None:
388
391
  assert isinstance(
389
- worker_version, (str, bool)
392
+ worker_version, str | bool
390
393
  ), "worker_version should be of type str or bool"
391
394
  if isinstance(worker_version, bool):
392
395
  assert (
@@ -1,10 +1,9 @@
1
- # -*- coding: utf-8 -*-
2
1
  """
3
2
  ElementsWorker methods for worker versions.
4
3
  """
5
4
 
6
5
 
7
- class WorkerVersionMixin(object):
6
+ class WorkerVersionMixin:
8
7
  def get_worker_version(self, worker_version_id: str) -> dict:
9
8
  """
10
9
  Retrieve a worker version, using the [ElementsWorker][arkindex_worker.worker.ElementsWorker]'s internal cache when possible.