arkindex-base-worker 0.5.1b4__py3-none-any.whl → 0.5.1.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arkindex_base_worker-0.5.1b4.dist-info → arkindex_base_worker-0.5.1.post1.dist-info}/METADATA +5 -6
- {arkindex_base_worker-0.5.1b4.dist-info → arkindex_base_worker-0.5.1.post1.dist-info}/RECORD +17 -17
- arkindex_worker/cache.py +6 -1
- arkindex_worker/image.py +5 -1
- arkindex_worker/models.py +5 -0
- arkindex_worker/utils.py +27 -0
- arkindex_worker/worker/base.py +21 -12
- arkindex_worker/worker/metadata.py +3 -3
- tests/conftest.py +0 -18
- tests/test_base_worker.py +33 -123
- tests/test_cache.py +1 -1
- tests/test_element.py +52 -12
- tests/test_image.py +19 -3
- tests/test_utils.py +42 -0
- {arkindex_base_worker-0.5.1b4.dist-info → arkindex_base_worker-0.5.1.post1.dist-info}/WHEEL +0 -0
- {arkindex_base_worker-0.5.1b4.dist-info → arkindex_base_worker-0.5.1.post1.dist-info}/licenses/LICENSE +0 -0
- {arkindex_base_worker-0.5.1b4.dist-info → arkindex_base_worker-0.5.1.post1.dist-info}/top_level.txt +0 -0
{arkindex_base_worker-0.5.1b4.dist-info → arkindex_base_worker-0.5.1.post1.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: arkindex-base-worker
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.1.post1
|
|
4
4
|
Summary: Base Worker to easily build Arkindex ML workflows
|
|
5
5
|
Author-email: Teklia <contact@teklia.com>
|
|
6
6
|
Maintainer-email: Teklia <contact@teklia.com>
|
|
@@ -41,16 +41,15 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
41
41
|
Requires-Python: >=3.10
|
|
42
42
|
Description-Content-Type: text/markdown
|
|
43
43
|
License-File: LICENSE
|
|
44
|
-
Requires-Dist: humanize==4.
|
|
44
|
+
Requires-Dist: humanize==4.14.0
|
|
45
45
|
Requires-Dist: peewee~=3.17
|
|
46
46
|
Requires-Dist: Pillow==11.3.0
|
|
47
|
-
Requires-Dist: python-gnupg==0.5.
|
|
47
|
+
Requires-Dist: python-gnupg==0.5.5
|
|
48
48
|
Requires-Dist: shapely==2.0.6
|
|
49
49
|
Requires-Dist: teklia-toolbox==0.1.11
|
|
50
|
-
Requires-Dist: zstandard==0.
|
|
50
|
+
Requires-Dist: zstandard==0.25.0
|
|
51
51
|
Provides-Extra: tests
|
|
52
|
-
Requires-Dist: pytest==
|
|
53
|
-
Requires-Dist: pytest-mock==3.14.0; extra == "tests"
|
|
52
|
+
Requires-Dist: pytest-mock==3.15.1; extra == "tests"
|
|
54
53
|
Requires-Dist: pytest-responses==0.5.1; extra == "tests"
|
|
55
54
|
Dynamic: license-file
|
|
56
55
|
|
{arkindex_base_worker-0.5.1b4.dist-info → arkindex_base_worker-0.5.1.post1.dist-info}/RECORD
RENAMED
|
@@ -1,18 +1,18 @@
|
|
|
1
|
-
arkindex_base_worker-0.5.
|
|
1
|
+
arkindex_base_worker-0.5.1.post1.dist-info/licenses/LICENSE,sha256=NVshRi1efwVezMfW7xXYLrdDr2Li1AfwfGOd5WuH1kQ,1063
|
|
2
2
|
arkindex_worker/__init__.py,sha256=Sdt5KXn8EgURb2MurYVrUWaHbH3iFA1XLRo0Lc5AJ44,250
|
|
3
|
-
arkindex_worker/cache.py,sha256=
|
|
4
|
-
arkindex_worker/image.py,sha256=
|
|
5
|
-
arkindex_worker/models.py,sha256=
|
|
6
|
-
arkindex_worker/utils.py,sha256=
|
|
3
|
+
arkindex_worker/cache.py,sha256=XpEXMSnbhYCvrJquwA9XXqZo-ajMLpaCxKG5wH3Gp6Y,10959
|
|
4
|
+
arkindex_worker/image.py,sha256=sGE8to5iykXv25bpkftOEWzlh5NzBZSKy4lSRoHYHPU,20929
|
|
5
|
+
arkindex_worker/models.py,sha256=7GnKqpWPOSxyR_eKlDNVBe_r3TcE4ofK-1GzaonJEdM,10132
|
|
6
|
+
arkindex_worker/utils.py,sha256=Eqg5pGAuOmuwMT3EhKTQDMek7wHC1KzZL7XXqYVVfHY,10977
|
|
7
7
|
arkindex_worker/worker/__init__.py,sha256=SzD0s1_m6gMV02EUF-NeciqZdVPA4dpXI84tSj-g494,17869
|
|
8
|
-
arkindex_worker/worker/base.py,sha256
|
|
8
|
+
arkindex_worker/worker/base.py,sha256=-R_aLMJHbR6X1uM-U0zExsF_KLy5Wl3WJ_YMGO9We0I,22153
|
|
9
9
|
arkindex_worker/worker/classification.py,sha256=qvykymkgd4nGywHCxL8obo4egstoGsmWNS4Ztc1qNWQ,11024
|
|
10
10
|
arkindex_worker/worker/corpus.py,sha256=MeIMod7jkWyX0frtD0a37rhumnMV3p9ZOC1xwAoXrAA,2291
|
|
11
11
|
arkindex_worker/worker/dataset.py,sha256=tVaPx43vaH-KTtx4w5V06e26ha8XPfiJTRzBXlu928Y,5273
|
|
12
12
|
arkindex_worker/worker/element.py,sha256=sLfnf09AfJ5tSCKQ7cAkl7WsGhjsfq14swsT30MDnYk,47385
|
|
13
13
|
arkindex_worker/worker/entity.py,sha256=Aj6EOfzHEm7qQV-Egm0YKLZgCrLS_3ggOKTY81M2JbI,12323
|
|
14
14
|
arkindex_worker/worker/image.py,sha256=L6Ikuf0Z0RxJk7JarY5PggJGrYSHLaPK0vn0dy0CIaQ,623
|
|
15
|
-
arkindex_worker/worker/metadata.py,sha256=
|
|
15
|
+
arkindex_worker/worker/metadata.py,sha256=keZdOdUthSH2hAw9iet5pN7rzWihTUYjZHRGTEjaltw,6843
|
|
16
16
|
arkindex_worker/worker/process.py,sha256=9TEHpMcBax1wc6PrWMMrdXe2uNfqyVj7n_dAYZRBGnY,1854
|
|
17
17
|
arkindex_worker/worker/task.py,sha256=nYfMSFm_d-4t8y4PO4HjFBnLsZf7IsDjkS7-A2Pgnac,1525
|
|
18
18
|
arkindex_worker/worker/training.py,sha256=tyQOHcwv--_wdYz6CgLEe1YM7kwwwKN30LvGTsnWd78,10923
|
|
@@ -21,15 +21,15 @@ examples/standalone/python/worker.py,sha256=Zr4s4pHvgexEjlkixLFYZp1UuwMLeoTxjyNG
|
|
|
21
21
|
examples/tooled/python/worker.py,sha256=kIYlHLsO5UpwX4XtERRq4tf2qTsvqKK30C-w8t0yyhA,1821
|
|
22
22
|
hooks/pre_gen_project.py,sha256=xQJERv3vv9VzIqcBHI281eeWLWREXUF4mMw7PvJHHXM,269
|
|
23
23
|
tests/__init__.py,sha256=DG--S6IpGl399rzSAjDdHL76CkOIeZIjajCcyUSDhOQ,241
|
|
24
|
-
tests/conftest.py,sha256=
|
|
25
|
-
tests/test_base_worker.py,sha256=
|
|
26
|
-
tests/test_cache.py,sha256=
|
|
24
|
+
tests/conftest.py,sha256=Tp7YFK17NATwF2yAcBwi0QFNyKSXtLS0VhZ-zZngsQI,24343
|
|
25
|
+
tests/test_base_worker.py,sha256=lwS4X3atS2ktEKd1XdogmN3mbzq-tO206-k_0EDITlw,29302
|
|
26
|
+
tests/test_cache.py,sha256=_wztzh94EwVrb8UvpFqgl2aa2_FLaCcJKaqunCYR5Dw,10435
|
|
27
27
|
tests/test_dataset_worker.py,sha256=iDJM2C4PfQNH0r4_QqSWoPt8BcM0geUUdODtWY0Z9PA,22412
|
|
28
|
-
tests/test_element.py,sha256=
|
|
29
|
-
tests/test_image.py,sha256=
|
|
28
|
+
tests/test_element.py,sha256=hlj5VSF4plwC7uz9R4LGOOXZJQcHZiYCIDZT5V6EIB8,14334
|
|
29
|
+
tests/test_image.py,sha256=yAM5mMfpQcIurT1KLHmu0AhSX2Qm3YvCu7afyZ3XUdU,28314
|
|
30
30
|
tests/test_merge.py,sha256=REpZ13jkq_qm_4L5URQgFy5lxvPZtXxQEiWfYLMdmF0,7956
|
|
31
31
|
tests/test_modern_config.py,sha256=Bm-a4LYQXgLZWQX7AmVyfJW0LNoLy1wj2d2GjzDkcBk,2683
|
|
32
|
-
tests/test_utils.py,sha256=
|
|
32
|
+
tests/test_utils.py,sha256=tgzNqyJMpddpeFWEjgsew_yDzmqnCA9HDaA5IpevAcM,5353
|
|
33
33
|
tests/test_elements_worker/__init__.py,sha256=2t3NciCIOun_N-Wv63FWGsTm5W9N3mbwAWVuFORlMg8,308
|
|
34
34
|
tests/test_elements_worker/test_classification.py,sha256=nya7veSPR_O9G41Enodp2-o6AifMBcaSTWJP2vXSSJ4,30133
|
|
35
35
|
tests/test_elements_worker/test_cli.py,sha256=a23i1pUDbXi23MUtbWwGEcLLrmc_YlrbDgOG3h66wLM,2620
|
|
@@ -55,7 +55,7 @@ worker-demo/tests/conftest.py,sha256=XzNMNeg6pmABUAH8jN6eZTlZSFGLYjS3-DTXjiRN6Yc
|
|
|
55
55
|
worker-demo/tests/test_worker.py,sha256=3DLd4NRK4bfyatG5P_PK4k9P9tJHx9XQq5_ryFEEFVg,304
|
|
56
56
|
worker-demo/worker_demo/__init__.py,sha256=2BPomV8ZMNf3YXJgloatKeHQCE6QOkwmsHGkO6MkQuM,125
|
|
57
57
|
worker-demo/worker_demo/worker.py,sha256=Rt-DjWa5iBP08k58NDZMfeyPuFbtNcbX6nc5jFX7GNo,440
|
|
58
|
-
arkindex_base_worker-0.5.
|
|
59
|
-
arkindex_base_worker-0.5.
|
|
60
|
-
arkindex_base_worker-0.5.
|
|
61
|
-
arkindex_base_worker-0.5.
|
|
58
|
+
arkindex_base_worker-0.5.1.post1.dist-info/METADATA,sha256=guiF6br4V1kfGBTRwX1ezI4b1oRG7RYdMGPnXq2jvIU,3094
|
|
59
|
+
arkindex_base_worker-0.5.1.post1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
60
|
+
arkindex_base_worker-0.5.1.post1.dist-info/top_level.txt,sha256=-vNjP2VfROx0j83mdi9aIqRZ88eoJjxeWz-R_gPgyXU,49
|
|
61
|
+
arkindex_base_worker-0.5.1.post1.dist-info/RECORD,,
|
arkindex_worker/cache.py
CHANGED
|
@@ -73,6 +73,7 @@ class CachedImage(Model):
|
|
|
73
73
|
width = IntegerField()
|
|
74
74
|
height = IntegerField()
|
|
75
75
|
url = TextField()
|
|
76
|
+
version = IntegerField(default=2)
|
|
76
77
|
|
|
77
78
|
class Meta:
|
|
78
79
|
database = db
|
|
@@ -157,6 +158,10 @@ class CachedElement(Model):
|
|
|
157
158
|
else:
|
|
158
159
|
resize = f"{max_width or ''},{max_height or ''}"
|
|
159
160
|
|
|
161
|
+
# Use `max` instead of `full` for IIIF 3, since `full` was deprecated in 2.1 then removed in 3.0
|
|
162
|
+
if self.image.version == 3 and resize == "full":
|
|
163
|
+
resize = "max"
|
|
164
|
+
|
|
160
165
|
url = self.image.url
|
|
161
166
|
if not url.endswith("/"):
|
|
162
167
|
url += "/"
|
|
@@ -259,7 +264,7 @@ MODELS = [
|
|
|
259
264
|
CachedDataset,
|
|
260
265
|
CachedDatasetElement,
|
|
261
266
|
]
|
|
262
|
-
SQL_VERSION =
|
|
267
|
+
SQL_VERSION = 5
|
|
263
268
|
|
|
264
269
|
|
|
265
270
|
def init_cache_db(path: Path):
|
arkindex_worker/image.py
CHANGED
|
@@ -366,6 +366,10 @@ def download_tiles(url: str) -> Image:
|
|
|
366
366
|
logger.debug("Downloading image information")
|
|
367
367
|
info = _retried_request(url + "info.json").json()
|
|
368
368
|
|
|
369
|
+
# Use `max` instead of `full` for IIIF 3, since `full` was deprecated in 2.1 then removed in 3.0
|
|
370
|
+
# With IIIF 3, the image's ID will be at `id`, while IIIF 2 will use `@id``
|
|
371
|
+
resize = "max" if "id" in info else "full"
|
|
372
|
+
|
|
369
373
|
image_width, image_height = info.get("width"), info.get("height")
|
|
370
374
|
assert image_width and image_height, "Missing image dimensions in info.json"
|
|
371
375
|
assert info.get("tiles"), (
|
|
@@ -391,7 +395,7 @@ def download_tiles(url: str) -> Image:
|
|
|
391
395
|
|
|
392
396
|
logger.debug(f"Downloading tile {tile_x},{tile_y}")
|
|
393
397
|
resp = _retried_request(
|
|
394
|
-
f"{url}{region_x},{region_y},{region_width},{region_height}/
|
|
398
|
+
f"{url}{region_x},{region_y},{region_width},{region_height}/{resize}/0/default.jpg"
|
|
395
399
|
)
|
|
396
400
|
|
|
397
401
|
tile_img = Image.open(BytesIO(resp.content))
|
arkindex_worker/models.py
CHANGED
|
@@ -87,6 +87,11 @@ class Element(MagicDict):
|
|
|
87
87
|
url = self.zone.image.get("s3_url")
|
|
88
88
|
if url:
|
|
89
89
|
return url
|
|
90
|
+
|
|
91
|
+
# Use `max` instead of `full` for IIIF 3, since `full` was deprecated in 2.1 then removed in 3.0
|
|
92
|
+
if self.zone.image.server.get("version", 2) == 3 and size == "full":
|
|
93
|
+
size = "max"
|
|
94
|
+
|
|
90
95
|
url = self.zone.image.url
|
|
91
96
|
if not url.endswith("/"):
|
|
92
97
|
url += "/"
|
arkindex_worker/utils.py
CHANGED
|
@@ -4,6 +4,7 @@ import logging
|
|
|
4
4
|
import os
|
|
5
5
|
import tarfile
|
|
6
6
|
import tempfile
|
|
7
|
+
import zipfile
|
|
7
8
|
from collections.abc import Callable, Generator
|
|
8
9
|
from itertools import islice
|
|
9
10
|
from pathlib import Path
|
|
@@ -225,6 +226,32 @@ def create_tar_zst_archive(
|
|
|
225
226
|
return zst_fd, zst_archive, zst_hash, tar_hash
|
|
226
227
|
|
|
227
228
|
|
|
229
|
+
def create_zip_archive(source: Path, destination: Path | None = None) -> Path:
|
|
230
|
+
"""Helper to create a ZIP archive from a source folder.
|
|
231
|
+
|
|
232
|
+
:param source: Path to the folder whose content should be archived.
|
|
233
|
+
:param destination: Path to the created archive, defaults to None. If unspecified, a temporary file will be created.
|
|
234
|
+
:return: The file descriptor of the created tempfile (if one was created), path to the archive.
|
|
235
|
+
"""
|
|
236
|
+
# Parse destination and create a tmpfile if none was specified
|
|
237
|
+
file_d, destination = (
|
|
238
|
+
tempfile.mkstemp(prefix="teklia-", suffix=".zip")
|
|
239
|
+
if destination is None
|
|
240
|
+
else (None, destination)
|
|
241
|
+
)
|
|
242
|
+
destination = Path(destination)
|
|
243
|
+
logger.debug(f"Compressing file to {destination}")
|
|
244
|
+
|
|
245
|
+
with zipfile.ZipFile(
|
|
246
|
+
destination, mode="w", compression=zipfile.ZIP_DEFLATED
|
|
247
|
+
) as archive:
|
|
248
|
+
for p in source.rglob("*"):
|
|
249
|
+
relpath = p.relative_to(source)
|
|
250
|
+
archive.write(p, arcname=relpath)
|
|
251
|
+
|
|
252
|
+
return archive, destination
|
|
253
|
+
|
|
254
|
+
|
|
228
255
|
DEFAULT_BATCH_SIZE = 50
|
|
229
256
|
"""Batch size used for bulk publication to Arkindex"""
|
|
230
257
|
|
arkindex_worker/worker/base.py
CHANGED
|
@@ -265,6 +265,11 @@ class BaseWorker:
|
|
|
265
265
|
if not item["secret"]:
|
|
266
266
|
return (item["key"], item["value"])
|
|
267
267
|
|
|
268
|
+
# The secret may not be picked by the user
|
|
269
|
+
if item["value"] is None:
|
|
270
|
+
logger.info(f"Optional secret `{item['key']}` is not set")
|
|
271
|
+
return (item["key"], None)
|
|
272
|
+
|
|
268
273
|
# Load secret, only available in Arkindex EE
|
|
269
274
|
try:
|
|
270
275
|
secret = self.load_secret(Path(item["value"]))
|
|
@@ -276,6 +281,19 @@ class BaseWorker:
|
|
|
276
281
|
|
|
277
282
|
return (item["key"], secret)
|
|
278
283
|
|
|
284
|
+
# Load model version configuration when available
|
|
285
|
+
# Workers will use model version ID and details to download the model
|
|
286
|
+
model_version = worker_run.get("model_version")
|
|
287
|
+
if model_version:
|
|
288
|
+
logger.info("Loaded model version configuration from WorkerRun")
|
|
289
|
+
self.model_configuration.update(model_version["configuration"])
|
|
290
|
+
|
|
291
|
+
# Set model_version ID as worker attribute
|
|
292
|
+
self.model_version_id = model_version["id"]
|
|
293
|
+
|
|
294
|
+
# Set model details as worker attribute
|
|
295
|
+
self.model_details = model_version["model"]
|
|
296
|
+
|
|
279
297
|
# Load worker run information
|
|
280
298
|
try:
|
|
281
299
|
config = self.api_client.request(
|
|
@@ -295,6 +313,9 @@ class BaseWorker:
|
|
|
295
313
|
}
|
|
296
314
|
logger.info("Using modern configuration")
|
|
297
315
|
|
|
316
|
+
# Reset the model configuration to make sure workers rely on the single new source
|
|
317
|
+
self.model_configuration = {}
|
|
318
|
+
|
|
298
319
|
return # Stop here once we have modern configuration
|
|
299
320
|
|
|
300
321
|
except ErrorResponse as e:
|
|
@@ -303,18 +324,6 @@ class BaseWorker:
|
|
|
303
324
|
logger.info("Modern configuration is not available")
|
|
304
325
|
|
|
305
326
|
# Use old-style configuration with local merge
|
|
306
|
-
# Load model version configuration when available
|
|
307
|
-
model_version = worker_run.get("model_version")
|
|
308
|
-
if model_version:
|
|
309
|
-
logger.info("Loaded model version configuration from WorkerRun")
|
|
310
|
-
self.model_configuration.update(model_version["configuration"])
|
|
311
|
-
|
|
312
|
-
# Set model_version ID as worker attribute
|
|
313
|
-
self.model_version_id = model_version["id"]
|
|
314
|
-
|
|
315
|
-
# Set model details as worker attribute
|
|
316
|
-
self.model_details = model_version["model"]
|
|
317
|
-
|
|
318
327
|
# Retrieve initial configuration from API
|
|
319
328
|
self.config = worker_version["configuration"].get("configuration", {})
|
|
320
329
|
if "user_configuration" in worker_version["configuration"]:
|
|
@@ -20,10 +20,10 @@ class MetaType(Enum):
|
|
|
20
20
|
A regular string with no special interpretation.
|
|
21
21
|
"""
|
|
22
22
|
|
|
23
|
-
|
|
23
|
+
Markdown = "markdown"
|
|
24
24
|
"""
|
|
25
|
-
A metadata with a string value that should be interpreted as
|
|
26
|
-
|
|
25
|
+
A metadata with a string value that should be interpreted as Markdown content.
|
|
26
|
+
HTML is allowed, but the allowed HTML tags are restricted for security reasons.
|
|
27
27
|
"""
|
|
28
28
|
|
|
29
29
|
Date = "date"
|
tests/conftest.py
CHANGED
|
@@ -103,12 +103,6 @@ def _mock_worker_run_api(responses):
|
|
|
103
103
|
payload = {
|
|
104
104
|
"id": "56785678-5678-5678-5678-567856785678",
|
|
105
105
|
"parents": [],
|
|
106
|
-
"worker": {
|
|
107
|
-
"id": "deadbeef-1234-5678-1234-worker",
|
|
108
|
-
"name": "Fake worker",
|
|
109
|
-
"slug": "fake_worker",
|
|
110
|
-
"type": "classifier",
|
|
111
|
-
},
|
|
112
106
|
"worker_version": {
|
|
113
107
|
"id": "12341234-1234-1234-1234-123412341234",
|
|
114
108
|
"configuration": {
|
|
@@ -180,12 +174,6 @@ def _mock_worker_run_no_revision_api(responses):
|
|
|
180
174
|
payload = {
|
|
181
175
|
"id": "56785678-5678-5678-5678-567856785678",
|
|
182
176
|
"parents": [],
|
|
183
|
-
"worker": {
|
|
184
|
-
"id": "deadbeef-1234-5678-1234-worker",
|
|
185
|
-
"name": "Fake worker",
|
|
186
|
-
"slug": "fake_worker",
|
|
187
|
-
"type": "classifier",
|
|
188
|
-
},
|
|
189
177
|
"worker_version": {
|
|
190
178
|
"id": "12341234-1234-1234-1234-123412341234",
|
|
191
179
|
"configuration": {
|
|
@@ -355,12 +343,6 @@ def mock_elements_worker_consume_wa(monkeypatch, responses, mock_elements_worker
|
|
|
355
343
|
json={
|
|
356
344
|
"id": "56785678-5678-5678-5678-567856785678",
|
|
357
345
|
"parents": [],
|
|
358
|
-
"worker": {
|
|
359
|
-
"id": "deadbeef-1234-5678-1234-worker",
|
|
360
|
-
"name": "Fake worker",
|
|
361
|
-
"slug": "fake_worker",
|
|
362
|
-
"type": "classifier",
|
|
363
|
-
},
|
|
364
346
|
"worker_version": {
|
|
365
347
|
"id": "12341234-1234-1234-1234-123412341234",
|
|
366
348
|
"configuration": {
|
tests/test_base_worker.py
CHANGED
|
@@ -13,6 +13,29 @@ from arkindex_worker.worker import BaseWorker, ElementsWorker
|
|
|
13
13
|
from arkindex_worker.worker.base import ExtrasDirNotFoundError
|
|
14
14
|
from tests import CORPUS_ID, FIXTURES_DIR
|
|
15
15
|
|
|
16
|
+
SIMPLE_PAYLOAD = {
|
|
17
|
+
"id": "56785678-5678-5678-5678-567856785678",
|
|
18
|
+
"parents": [],
|
|
19
|
+
"worker_version": {
|
|
20
|
+
"id": "12341234-1234-1234-1234-123412341234",
|
|
21
|
+
"worker": {
|
|
22
|
+
"id": "deadbeef-1234-5678-1234-worker",
|
|
23
|
+
"name": "Fake worker",
|
|
24
|
+
"slug": "fake_worker",
|
|
25
|
+
"type": "classifier",
|
|
26
|
+
},
|
|
27
|
+
"revision": {"hash": "deadbeef1234"},
|
|
28
|
+
"configuration": {"configuration": {}},
|
|
29
|
+
},
|
|
30
|
+
"configuration": None,
|
|
31
|
+
"model_version": None,
|
|
32
|
+
"process": {
|
|
33
|
+
"id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
|
|
34
|
+
"corpus": CORPUS_ID,
|
|
35
|
+
},
|
|
36
|
+
"summary": "Worker Fake worker @ 123412",
|
|
37
|
+
}
|
|
38
|
+
|
|
16
39
|
|
|
17
40
|
def test_init_default_local_share():
|
|
18
41
|
worker = BaseWorker()
|
|
@@ -149,38 +172,13 @@ def test_configure_worker_run(mocker, responses, caplog):
|
|
|
149
172
|
|
|
150
173
|
worker = BaseWorker()
|
|
151
174
|
mocker.patch.object(sys, "argv", ["worker"])
|
|
152
|
-
user_configuration = {
|
|
153
|
-
"id": "bbbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb",
|
|
154
|
-
"name": "BBB",
|
|
155
|
-
"configuration": {"a": "b"},
|
|
156
|
-
}
|
|
157
175
|
payload = {
|
|
158
|
-
|
|
159
|
-
"
|
|
160
|
-
|
|
161
|
-
"
|
|
162
|
-
"
|
|
163
|
-
"slug": "fake_worker",
|
|
164
|
-
"type": "classifier",
|
|
165
|
-
},
|
|
166
|
-
"worker_version": {
|
|
167
|
-
"id": "12341234-1234-1234-1234-123412341234",
|
|
168
|
-
"worker": {
|
|
169
|
-
"id": "deadbeef-1234-5678-1234-worker",
|
|
170
|
-
"name": "Fake worker",
|
|
171
|
-
"slug": "fake_worker",
|
|
172
|
-
"type": "classifier",
|
|
173
|
-
},
|
|
174
|
-
"revision": {"hash": "deadbeef1234"},
|
|
175
|
-
"configuration": {"configuration": {}},
|
|
176
|
-
},
|
|
177
|
-
"configuration": user_configuration,
|
|
178
|
-
"model_version": None,
|
|
179
|
-
"process": {
|
|
180
|
-
"id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
|
|
181
|
-
"corpus": CORPUS_ID,
|
|
176
|
+
**SIMPLE_PAYLOAD,
|
|
177
|
+
"configuration": {
|
|
178
|
+
"id": "bbbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb",
|
|
179
|
+
"name": "BBB",
|
|
180
|
+
"configuration": {"a": "b"},
|
|
182
181
|
},
|
|
183
|
-
"summary": "Worker Fake worker @ 123412",
|
|
184
182
|
}
|
|
185
183
|
|
|
186
184
|
responses.add(
|
|
@@ -262,8 +260,7 @@ def test_configure_user_configuration_defaults(mocker, responses):
|
|
|
262
260
|
worker.args = worker.parser.parse_args()
|
|
263
261
|
|
|
264
262
|
payload = {
|
|
265
|
-
|
|
266
|
-
"parents": [],
|
|
263
|
+
**SIMPLE_PAYLOAD,
|
|
267
264
|
"worker_version": {
|
|
268
265
|
"id": "12341234-1234-1234-1234-123412341234",
|
|
269
266
|
"worker": {
|
|
@@ -293,12 +290,6 @@ def test_configure_user_configuration_defaults(mocker, responses):
|
|
|
293
290
|
"param_5": True,
|
|
294
291
|
},
|
|
295
292
|
},
|
|
296
|
-
"model_version": None,
|
|
297
|
-
"process": {
|
|
298
|
-
"id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
|
|
299
|
-
"corpus": CORPUS_ID,
|
|
300
|
-
},
|
|
301
|
-
"summary": "Worker Fake worker @ 123412",
|
|
302
293
|
}
|
|
303
294
|
responses.add(
|
|
304
295
|
responses.GET,
|
|
@@ -340,30 +331,12 @@ def test_configure_user_config_debug(mocker, responses, debug):
|
|
|
340
331
|
mocker.patch.object(sys, "argv", ["worker"])
|
|
341
332
|
assert logger.level == logging.NOTSET
|
|
342
333
|
payload = {
|
|
343
|
-
|
|
344
|
-
"parents": [],
|
|
345
|
-
"worker_version": {
|
|
346
|
-
"id": "12341234-1234-1234-1234-123412341234",
|
|
347
|
-
"worker": {
|
|
348
|
-
"id": "deadbeef-1234-5678-1234-worker",
|
|
349
|
-
"name": "Fake worker",
|
|
350
|
-
"slug": "fake_worker",
|
|
351
|
-
"type": "classifier",
|
|
352
|
-
},
|
|
353
|
-
"revision": {"hash": "deadbeef1234"},
|
|
354
|
-
"configuration": {"configuration": {}},
|
|
355
|
-
},
|
|
356
|
-
"model_version": None,
|
|
334
|
+
**SIMPLE_PAYLOAD,
|
|
357
335
|
"configuration": {
|
|
358
336
|
"id": "af0daaf4-983e-4703-a7ed-a10f146d6684",
|
|
359
337
|
"name": "BBB",
|
|
360
338
|
"configuration": {"debug": debug},
|
|
361
339
|
},
|
|
362
|
-
"process": {
|
|
363
|
-
"id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
|
|
364
|
-
"corpus": CORPUS_ID,
|
|
365
|
-
},
|
|
366
|
-
"summary": "Worker Fake worker @ 123412",
|
|
367
340
|
}
|
|
368
341
|
responses.add(
|
|
369
342
|
responses.GET,
|
|
@@ -393,32 +366,8 @@ def test_configure_worker_run_missing_conf(mocker, responses):
|
|
|
393
366
|
mocker.patch.object(sys, "argv", ["worker"])
|
|
394
367
|
|
|
395
368
|
payload = {
|
|
396
|
-
|
|
397
|
-
"parents": [],
|
|
398
|
-
"worker": {
|
|
399
|
-
"id": "deadbeef-1234-5678-1234-worker",
|
|
400
|
-
"name": "Fake worker",
|
|
401
|
-
"slug": "fake_worker",
|
|
402
|
-
"type": "classifier",
|
|
403
|
-
},
|
|
404
|
-
"worker_version": {
|
|
405
|
-
"id": "12341234-1234-1234-1234-123412341234",
|
|
406
|
-
"worker": {
|
|
407
|
-
"id": "deadbeef-1234-5678-1234-worker",
|
|
408
|
-
"name": "Fake worker",
|
|
409
|
-
"slug": "fake_worker",
|
|
410
|
-
"type": "classifier",
|
|
411
|
-
},
|
|
412
|
-
"revision": {"hash": "deadbeef1234"},
|
|
413
|
-
"configuration": {"configuration": {}},
|
|
414
|
-
},
|
|
415
|
-
"model_version": None,
|
|
369
|
+
**SIMPLE_PAYLOAD,
|
|
416
370
|
"configuration": {"id": "bbbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb", "name": "BBB"},
|
|
417
|
-
"process": {
|
|
418
|
-
"id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
|
|
419
|
-
"corpus": CORPUS_ID,
|
|
420
|
-
},
|
|
421
|
-
"summary": "Worker Fake worker @ 123412",
|
|
422
371
|
}
|
|
423
372
|
responses.add(
|
|
424
373
|
responses.GET,
|
|
@@ -446,28 +395,7 @@ def test_configure_worker_run_no_worker_run_conf(mocker, responses):
|
|
|
446
395
|
worker = BaseWorker()
|
|
447
396
|
mocker.patch.object(sys, "argv", ["worker"])
|
|
448
397
|
|
|
449
|
-
payload =
|
|
450
|
-
"id": "56785678-5678-5678-5678-567856785678",
|
|
451
|
-
"parents": [],
|
|
452
|
-
"worker_version": {
|
|
453
|
-
"id": "12341234-1234-1234-1234-123412341234",
|
|
454
|
-
"worker": {
|
|
455
|
-
"id": "deadbeef-1234-5678-1234-worker",
|
|
456
|
-
"name": "Fake worker",
|
|
457
|
-
"slug": "fake_worker",
|
|
458
|
-
"type": "classifier",
|
|
459
|
-
},
|
|
460
|
-
"revision": {"hash": "deadbeef1234"},
|
|
461
|
-
"configuration": {},
|
|
462
|
-
},
|
|
463
|
-
"model_version": None,
|
|
464
|
-
"configuration": None,
|
|
465
|
-
"process": {
|
|
466
|
-
"id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
|
|
467
|
-
"corpus": CORPUS_ID,
|
|
468
|
-
},
|
|
469
|
-
"summary": "Worker Fake worker @ 123412",
|
|
470
|
-
}
|
|
398
|
+
payload = SIMPLE_PAYLOAD
|
|
471
399
|
responses.add(
|
|
472
400
|
responses.GET,
|
|
473
401
|
"http://testserver/api/v1/process/workers/56785678-5678-5678-5678-567856785678/",
|
|
@@ -491,20 +419,7 @@ def test_configure_load_model_configuration(mocker, responses):
|
|
|
491
419
|
worker = BaseWorker()
|
|
492
420
|
mocker.patch.object(sys, "argv", ["worker"])
|
|
493
421
|
payload = {
|
|
494
|
-
|
|
495
|
-
"parents": [],
|
|
496
|
-
"worker_version": {
|
|
497
|
-
"id": "12341234-1234-1234-1234-123412341234",
|
|
498
|
-
"worker": {
|
|
499
|
-
"id": "deadbeef-1234-5678-1234-worker",
|
|
500
|
-
"name": "Fake worker",
|
|
501
|
-
"slug": "fake_worker",
|
|
502
|
-
"type": "classifier",
|
|
503
|
-
},
|
|
504
|
-
"revision": {"hash": "deadbeef1234"},
|
|
505
|
-
"configuration": {"configuration": {}},
|
|
506
|
-
},
|
|
507
|
-
"configuration": None,
|
|
422
|
+
**SIMPLE_PAYLOAD,
|
|
508
423
|
"model_version": {
|
|
509
424
|
"id": "12341234-1234-1234-1234-123412341234",
|
|
510
425
|
"model": {
|
|
@@ -517,11 +432,6 @@ def test_configure_load_model_configuration(mocker, responses):
|
|
|
517
432
|
"param3": None,
|
|
518
433
|
},
|
|
519
434
|
},
|
|
520
|
-
"process": {
|
|
521
|
-
"id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
|
|
522
|
-
"corpus": CORPUS_ID,
|
|
523
|
-
},
|
|
524
|
-
"summary": "Worker Fake worker @ 123412",
|
|
525
435
|
}
|
|
526
436
|
|
|
527
437
|
responses.add(
|
tests/test_cache.py
CHANGED
|
@@ -60,7 +60,7 @@ def test_create_tables(tmp_path):
|
|
|
60
60
|
CREATE TABLE "dataset_elements" ("id" TEXT NOT NULL PRIMARY KEY, "element_id" TEXT NOT NULL, "dataset_id" TEXT NOT NULL, "set_name" VARCHAR(255) NOT NULL, FOREIGN KEY ("element_id") REFERENCES "elements" ("id"), FOREIGN KEY ("dataset_id") REFERENCES "datasets" ("id"))
|
|
61
61
|
CREATE TABLE "datasets" ("id" TEXT NOT NULL PRIMARY KEY, "name" VARCHAR(255) NOT NULL, "state" VARCHAR(255) NOT NULL DEFAULT 'open', "sets" TEXT NOT NULL)
|
|
62
62
|
CREATE TABLE "elements" ("id" TEXT NOT NULL PRIMARY KEY, "parent_id" TEXT, "type" VARCHAR(50) NOT NULL, "image_id" TEXT, "polygon" text, "rotation_angle" INTEGER NOT NULL, "mirrored" INTEGER NOT NULL, "initial" INTEGER NOT NULL, "worker_version_id" TEXT, "worker_run_id" TEXT, "confidence" REAL, FOREIGN KEY ("image_id") REFERENCES "images" ("id"))
|
|
63
|
-
CREATE TABLE "images" ("id" TEXT NOT NULL PRIMARY KEY, "width" INTEGER NOT NULL, "height" INTEGER NOT NULL, "url" TEXT NOT NULL)
|
|
63
|
+
CREATE TABLE "images" ("id" TEXT NOT NULL PRIMARY KEY, "width" INTEGER NOT NULL, "height" INTEGER NOT NULL, "url" TEXT NOT NULL, "version" INTEGER NOT NULL)
|
|
64
64
|
CREATE TABLE "transcription_entities" ("transcription_id" TEXT NOT NULL, "type" VARCHAR(50) NOT NULL, "offset" INTEGER NOT NULL CHECK (offset >= 0), "length" INTEGER NOT NULL CHECK (length > 0), "worker_run_id" TEXT, "confidence" REAL, PRIMARY KEY ("transcription_id", "type"), FOREIGN KEY ("transcription_id") REFERENCES "transcriptions" ("id"))
|
|
65
65
|
CREATE TABLE "transcriptions" ("id" TEXT NOT NULL PRIMARY KEY, "element_id" TEXT NOT NULL, "text" TEXT NOT NULL, "confidence" REAL, "orientation" VARCHAR(50) NOT NULL, "worker_version_id" TEXT, "worker_run_id" TEXT, FOREIGN KEY ("element_id") REFERENCES "elements" ("id"))"""
|
|
66
66
|
|
tests/test_element.py
CHANGED
|
@@ -5,26 +5,36 @@ from arkindex_worker.cache import CachedElement
|
|
|
5
5
|
from arkindex_worker.models import Element
|
|
6
6
|
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
@pytest.mark.parametrize(
|
|
9
|
+
("zone", "expected_url"),
|
|
10
|
+
[
|
|
11
|
+
(None, None),
|
|
12
|
+
(
|
|
13
|
+
{"image": {"url": "http://something/", "server": {"version": 2}}},
|
|
14
|
+
"http://something/full/full/0/default.jpg",
|
|
15
|
+
),
|
|
16
|
+
(
|
|
17
|
+
{"image": {"url": "http://something", "server": {"version": 2}}},
|
|
18
|
+
"http://something/full/full/0/default.jpg",
|
|
19
|
+
),
|
|
20
|
+
(
|
|
21
|
+
{"image": {"url": "http://something/", "server": {"version": 3}}},
|
|
22
|
+
"http://something/full/max/0/default.jpg",
|
|
23
|
+
),
|
|
24
|
+
],
|
|
25
|
+
)
|
|
26
|
+
def test_image_url(zone, expected_url):
|
|
9
27
|
url = Element({"zone": None}).image_url()
|
|
10
28
|
assert not url
|
|
11
29
|
|
|
12
30
|
|
|
13
|
-
def test_image_url_iiif():
|
|
14
|
-
url = Element({"zone": {"image": {"url": "http://something/"}}}).image_url()
|
|
15
|
-
assert url == "http://something/full/full/0/default.jpg"
|
|
16
|
-
|
|
17
|
-
|
|
18
31
|
def test_image_url_iiif_resize():
|
|
19
|
-
url = Element(
|
|
32
|
+
url = Element(
|
|
33
|
+
{"zone": {"image": {"url": "http://something/", "server": {"version": 2}}}}
|
|
34
|
+
).image_url(500)
|
|
20
35
|
assert url == "http://something/full/500/0/default.jpg"
|
|
21
36
|
|
|
22
37
|
|
|
23
|
-
def test_image_url_iiif_append_slash():
|
|
24
|
-
url = Element({"zone": {"image": {"url": "http://something"}}}).image_url()
|
|
25
|
-
assert url == "http://something/full/full/0/default.jpg"
|
|
26
|
-
|
|
27
|
-
|
|
28
38
|
def test_image_url_s3():
|
|
29
39
|
url = Element(
|
|
30
40
|
{
|
|
@@ -418,6 +428,36 @@ def test_open_image_rotation_mirror(mocker):
|
|
|
418
428
|
)
|
|
419
429
|
|
|
420
430
|
|
|
431
|
+
def test_open_image_iiif_3(mocker):
|
|
432
|
+
open_mock = mocker.patch(
|
|
433
|
+
"arkindex_worker.image.open_image", return_value="an image!"
|
|
434
|
+
)
|
|
435
|
+
elt = Element(
|
|
436
|
+
{
|
|
437
|
+
"zone": {
|
|
438
|
+
"image": {
|
|
439
|
+
"url": "http://something",
|
|
440
|
+
"server": {
|
|
441
|
+
"max_width": None,
|
|
442
|
+
"max_height": None,
|
|
443
|
+
"version": 3,
|
|
444
|
+
},
|
|
445
|
+
},
|
|
446
|
+
"polygon": [[0, 0], [181, 0], [181, 240], [0, 240], [0, 0]],
|
|
447
|
+
},
|
|
448
|
+
"rotation_angle": 0,
|
|
449
|
+
"mirrored": False,
|
|
450
|
+
},
|
|
451
|
+
)
|
|
452
|
+
assert elt.open_image(use_full_image=True) == "an image!"
|
|
453
|
+
assert open_mock.call_count == 1
|
|
454
|
+
assert open_mock.call_args == mocker.call(
|
|
455
|
+
"http://something/full/max/0/default.jpg",
|
|
456
|
+
rotation_angle=0,
|
|
457
|
+
mirrored=False,
|
|
458
|
+
)
|
|
459
|
+
|
|
460
|
+
|
|
421
461
|
def test_setattr_setitem():
|
|
422
462
|
element = Element({"name": "something"})
|
|
423
463
|
element.type = "page"
|
tests/test_image.py
CHANGED
|
@@ -113,21 +113,37 @@ def test_update_pillow_image_size_limit(max_image_pixels, expected_image_pixels)
|
|
|
113
113
|
assert Image.MAX_IMAGE_PIXELS == MAX_IMAGE_PIXELS
|
|
114
114
|
|
|
115
115
|
|
|
116
|
-
|
|
116
|
+
@pytest.mark.parametrize(
|
|
117
|
+
("id_key", "resize"),
|
|
118
|
+
[
|
|
119
|
+
# IIIF version 2
|
|
120
|
+
("@id", "full"),
|
|
121
|
+
# IIIF version 3
|
|
122
|
+
("id", "max"),
|
|
123
|
+
],
|
|
124
|
+
)
|
|
125
|
+
def test_download_tiles(responses, id_key, resize):
|
|
117
126
|
expected = Image.open(FULL_IMAGE).convert("RGB")
|
|
118
127
|
tile_bytes = TILE.read_bytes()
|
|
119
128
|
|
|
120
129
|
responses.add(
|
|
121
130
|
responses.GET,
|
|
122
131
|
"http://nowhere/info.json",
|
|
123
|
-
json={
|
|
132
|
+
json={
|
|
133
|
+
id_key: "http://nowhere",
|
|
134
|
+
"width": 543,
|
|
135
|
+
"height": 720,
|
|
136
|
+
"tiles": [
|
|
137
|
+
{"width": 181, "height": 240},
|
|
138
|
+
],
|
|
139
|
+
},
|
|
124
140
|
)
|
|
125
141
|
|
|
126
142
|
for x in (0, 181, 362):
|
|
127
143
|
for y in (0, 240, 480):
|
|
128
144
|
responses.add(
|
|
129
145
|
responses.GET,
|
|
130
|
-
f"http://nowhere/{x},{y},181,240/
|
|
146
|
+
f"http://nowhere/{x},{y},181,240/{resize}/0/default.jpg",
|
|
131
147
|
body=tile_bytes,
|
|
132
148
|
)
|
|
133
149
|
|
tests/test_utils.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
import zipfile
|
|
2
3
|
|
|
3
4
|
import pytest
|
|
4
5
|
|
|
@@ -7,6 +8,7 @@ from arkindex_worker.utils import (
|
|
|
7
8
|
DEFAULT_BATCH_SIZE,
|
|
8
9
|
batch_publication,
|
|
9
10
|
close_delete_file,
|
|
11
|
+
create_zip_archive,
|
|
10
12
|
extract_tar_zst_archive,
|
|
11
13
|
parse_source_id,
|
|
12
14
|
)
|
|
@@ -118,3 +120,43 @@ def test_batch_publication_decorator_alongside_unsupported_cache(caplog):
|
|
|
118
120
|
"This API helper `custom_publication_in_batches_without_cache` did not update the cache database",
|
|
119
121
|
),
|
|
120
122
|
]
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def test_zip_archive():
|
|
126
|
+
# Create zip archive from fixtures
|
|
127
|
+
_, archive = create_zip_archive(FIXTURES_DIR / "extract_parent_archives/expected")
|
|
128
|
+
|
|
129
|
+
# Check the files in the archive
|
|
130
|
+
with zipfile.ZipFile(archive, mode="r") as f:
|
|
131
|
+
assert sorted(f.namelist()) == [
|
|
132
|
+
"test/",
|
|
133
|
+
"test/images/",
|
|
134
|
+
"test/images/f2649ce7-333e-44d2-ae73-387f18aad1f6.png",
|
|
135
|
+
"test/labels/",
|
|
136
|
+
"test/labels/f2649ce7-333e-44d2-ae73-387f18aad1f6.png",
|
|
137
|
+
"test/labels_json/",
|
|
138
|
+
"test/labels_json/f2649ce7-333e-44d2-ae73-387f18aad1f6.json",
|
|
139
|
+
"train/",
|
|
140
|
+
"train/images/",
|
|
141
|
+
"train/images/98115546-df07-448c-a2f0-34aa24789b77.png",
|
|
142
|
+
"train/images/ebeaa451-9287-4df7-9c40-07eb25cadb78.png",
|
|
143
|
+
"train/labels/",
|
|
144
|
+
"train/labels/98115546-df07-448c-a2f0-34aa24789b77.png",
|
|
145
|
+
"train/labels/ebeaa451-9287-4df7-9c40-07eb25cadb78.png",
|
|
146
|
+
"train/labels_json/",
|
|
147
|
+
"train/labels_json/98115546-df07-448c-a2f0-34aa24789b77.json",
|
|
148
|
+
"train/labels_json/ebeaa451-9287-4df7-9c40-07eb25cadb78.json",
|
|
149
|
+
"val/",
|
|
150
|
+
"val/images/",
|
|
151
|
+
"val/images/2987176d-4338-40f2-90d9-6d2cb4fd4a00.png",
|
|
152
|
+
"val/images/e3f91312-9201-45b7-9c32-e04a97ff1334.png",
|
|
153
|
+
"val/labels/",
|
|
154
|
+
"val/labels/2987176d-4338-40f2-90d9-6d2cb4fd4a00.png",
|
|
155
|
+
"val/labels/e3f91312-9201-45b7-9c32-e04a97ff1334.png",
|
|
156
|
+
"val/labels_json/",
|
|
157
|
+
"val/labels_json/2987176d-4338-40f2-90d9-6d2cb4fd4a00.json",
|
|
158
|
+
"val/labels_json/e3f91312-9201-45b7-9c32-e04a97ff1334.json",
|
|
159
|
+
]
|
|
160
|
+
|
|
161
|
+
# Cleanup
|
|
162
|
+
archive.unlink()
|
|
File without changes
|
|
File without changes
|
{arkindex_base_worker-0.5.1b4.dist-info → arkindex_base_worker-0.5.1.post1.dist-info}/top_level.txt
RENAMED
|
File without changes
|