arkindex-base-worker 0.3.6rc3__tar.gz → 0.3.6rc4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/PKG-INFO +1 -1
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/arkindex_base_worker.egg-info/PKG-INFO +1 -1
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/arkindex_worker/worker/__init__.py +8 -2
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/arkindex_worker/worker/entity.py +9 -4
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/pyproject.toml +6 -3
- arkindex-base-worker-0.3.6rc4/setup.py +4 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/tests/test_cache.py +3 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/tests/test_elements_worker/test_entities.py +5 -4
- arkindex-base-worker-0.3.6rc3/setup.py +0 -9
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/README.md +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/arkindex_base_worker.egg-info/SOURCES.txt +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/arkindex_base_worker.egg-info/dependency_links.txt +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/arkindex_base_worker.egg-info/requires.txt +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/arkindex_base_worker.egg-info/top_level.txt +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/arkindex_worker/__init__.py +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/arkindex_worker/cache.py +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/arkindex_worker/image.py +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/arkindex_worker/models.py +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/arkindex_worker/utils.py +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/arkindex_worker/worker/base.py +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/arkindex_worker/worker/classification.py +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/arkindex_worker/worker/dataset.py +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/arkindex_worker/worker/element.py +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/arkindex_worker/worker/metadata.py +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/arkindex_worker/worker/task.py +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/arkindex_worker/worker/training.py +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/arkindex_worker/worker/transcription.py +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/arkindex_worker/worker/version.py +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/docs-requirements.txt +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/requirements.txt +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/setup.cfg +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/tests/__init__.py +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/tests/conftest.py +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/tests/test_base_worker.py +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/tests/test_dataset_worker.py +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/tests/test_element.py +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/tests/test_elements_worker/__init__.py +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/tests/test_elements_worker/test_classifications.py +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/tests/test_elements_worker/test_cli.py +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/tests/test_elements_worker/test_dataset.py +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/tests/test_elements_worker/test_elements.py +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/tests/test_elements_worker/test_metadata.py +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/tests/test_elements_worker/test_task.py +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/tests/test_elements_worker/test_training.py +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/tests/test_elements_worker/test_transcriptions.py +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/tests/test_elements_worker/test_worker.py +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/tests/test_image.py +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/tests/test_merge.py +0 -0
- {arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/tests/test_utils.py +0 -0
{arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/arkindex_worker/worker/__init__.py
RENAMED
|
@@ -387,7 +387,7 @@ class DatasetWorker(BaseWorker, DatasetMixin, TaskMixin):
|
|
|
387
387
|
"""
|
|
388
388
|
|
|
389
389
|
def format_split(
|
|
390
|
-
split: tuple[str, Iterator[tuple[str, Element]]]
|
|
390
|
+
split: tuple[str, Iterator[tuple[str, Element]]],
|
|
391
391
|
) -> tuple[str, list[Element]]:
|
|
392
392
|
return (split[0], list(map(itemgetter(1), list(split[1]))))
|
|
393
393
|
|
|
@@ -440,6 +440,8 @@ class DatasetWorker(BaseWorker, DatasetMixin, TaskMixin):
|
|
|
440
440
|
failed = 0
|
|
441
441
|
for i, item in enumerate(datasets, start=1):
|
|
442
442
|
dataset = None
|
|
443
|
+
dataset_artifact = None
|
|
444
|
+
|
|
443
445
|
try:
|
|
444
446
|
if not self.is_read_only:
|
|
445
447
|
# Just use the result of list_datasets as the dataset
|
|
@@ -465,7 +467,7 @@ class DatasetWorker(BaseWorker, DatasetMixin, TaskMixin):
|
|
|
465
467
|
self.update_dataset_state(dataset, DatasetState.Building)
|
|
466
468
|
else:
|
|
467
469
|
logger.info(f"Downloading data for {dataset} ({i}/{count})")
|
|
468
|
-
self.download_dataset_artifact(dataset)
|
|
470
|
+
dataset_artifact = self.download_dataset_artifact(dataset)
|
|
469
471
|
|
|
470
472
|
# Process the dataset
|
|
471
473
|
self.process_dataset(dataset)
|
|
@@ -496,6 +498,10 @@ class DatasetWorker(BaseWorker, DatasetMixin, TaskMixin):
|
|
|
496
498
|
# Try to update the state to Error regardless of the response
|
|
497
499
|
with contextlib.suppress(Exception):
|
|
498
500
|
self.update_dataset_state(dataset, DatasetState.Error)
|
|
501
|
+
finally:
|
|
502
|
+
# Cleanup the dataset artifact if it was downloaded, no matter what
|
|
503
|
+
if dataset_artifact:
|
|
504
|
+
dataset_artifact.unlink(missing_ok=True)
|
|
499
505
|
|
|
500
506
|
if failed:
|
|
501
507
|
logger.error(
|
{arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/arkindex_worker/worker/entity.py
RENAMED
|
@@ -331,8 +331,7 @@ class EntityMixin:
|
|
|
331
331
|
parent: Element | None = None,
|
|
332
332
|
):
|
|
333
333
|
"""
|
|
334
|
-
List all entities in the worker's corpus
|
|
335
|
-
This method does not support cache
|
|
334
|
+
List all entities in the worker's corpus and store them in the ``self.entities`` cache.
|
|
336
335
|
:param name: Filter entities by part of their name (case-insensitive)
|
|
337
336
|
:param parent: Restrict entities to those linked to all transcriptions of an element and all its descendants. Note that links to metadata are ignored.
|
|
338
337
|
"""
|
|
@@ -346,8 +345,14 @@ class EntityMixin:
|
|
|
346
345
|
assert isinstance(parent, Element), "parent should be of type Element"
|
|
347
346
|
query_params["parent"] = parent.id
|
|
348
347
|
|
|
349
|
-
|
|
350
|
-
"
|
|
348
|
+
self.entities = {
|
|
349
|
+
entity["id"]: entity
|
|
350
|
+
for entity in self.api_client.paginate(
|
|
351
|
+
"ListCorpusEntities", id=self.corpus_id, **query_params
|
|
352
|
+
)
|
|
353
|
+
}
|
|
354
|
+
logger.info(
|
|
355
|
+
f"Loaded {len(self.entities)} entities in corpus ({self.corpus_id})"
|
|
351
356
|
)
|
|
352
357
|
|
|
353
358
|
def list_corpus_entity_types(
|
|
@@ -4,8 +4,9 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "arkindex-base-worker"
|
|
7
|
+
version = "0.3.6-rc4"
|
|
7
8
|
description = "Base Worker to easily build Arkindex ML workflows"
|
|
8
|
-
dynamic = ["
|
|
9
|
+
dynamic = ["dependencies", "optional-dependencies"]
|
|
9
10
|
authors = [
|
|
10
11
|
{ name = "Teklia", email = "contact@teklia.com" },
|
|
11
12
|
]
|
|
@@ -34,8 +35,8 @@ Repository = "https://gitlab.teklia.com/workers/base-worker"
|
|
|
34
35
|
Authors = "https://teklia.com"
|
|
35
36
|
|
|
36
37
|
[tool.setuptools.dynamic]
|
|
37
|
-
dependencies = {file = ["requirements.txt"]}
|
|
38
|
-
optional-dependencies = {docs = { file = ["docs-requirements.txt"] }}
|
|
38
|
+
dependencies = { file = ["requirements.txt"] }
|
|
39
|
+
optional-dependencies = { docs = { file = ["docs-requirements.txt"] } }
|
|
39
40
|
|
|
40
41
|
[tool.ruff]
|
|
41
42
|
exclude = [".git", "__pycache__"]
|
|
@@ -52,6 +53,8 @@ select = [
|
|
|
52
53
|
"I",
|
|
53
54
|
# Implicit Optional
|
|
54
55
|
"RUF013",
|
|
56
|
+
# Invalid pyproject.toml
|
|
57
|
+
"RUF200",
|
|
55
58
|
# pyupgrade
|
|
56
59
|
"UP",
|
|
57
60
|
# flake8-bugbear
|
|
@@ -53,6 +53,9 @@ def test_create_tables(tmp_path):
|
|
|
53
53
|
init_cache_db(db_path)
|
|
54
54
|
create_tables()
|
|
55
55
|
|
|
56
|
+
# WARNING: If you are updating this schema following a development you have made
|
|
57
|
+
# in base-worker, make sure to upgrade the arkindex_worker.cache.SQL_VERSION in
|
|
58
|
+
# the same merge request as your changes.
|
|
56
59
|
expected_schema = """CREATE TABLE "classifications" ("id" TEXT NOT NULL PRIMARY KEY, "element_id" TEXT NOT NULL, "class_name" TEXT NOT NULL, "confidence" REAL NOT NULL, "state" VARCHAR(10) NOT NULL, "worker_run_id" TEXT, FOREIGN KEY ("element_id") REFERENCES "elements" ("id"))
|
|
57
60
|
CREATE TABLE "dataset_elements" ("id" TEXT NOT NULL PRIMARY KEY, "element_id" TEXT NOT NULL, "dataset_id" TEXT NOT NULL, "set_name" VARCHAR(255) NOT NULL, FOREIGN KEY ("element_id") REFERENCES "elements" ("id"), FOREIGN KEY ("dataset_id") REFERENCES "datasets" ("id"))
|
|
58
61
|
CREATE TABLE "datasets" ("id" TEXT NOT NULL PRIMARY KEY, "name" VARCHAR(255) NOT NULL, "state" VARCHAR(255) NOT NULL DEFAULT 'open', "sets" TEXT NOT NULL)
|
|
@@ -741,12 +741,13 @@ def test_list_corpus_entities(responses, mock_elements_worker):
|
|
|
741
741
|
},
|
|
742
742
|
)
|
|
743
743
|
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
744
|
+
mock_elements_worker.list_corpus_entities()
|
|
745
|
+
|
|
746
|
+
assert mock_elements_worker.entities == {
|
|
747
|
+
"fake_entity_id": {
|
|
747
748
|
"id": "fake_entity_id",
|
|
748
749
|
}
|
|
749
|
-
|
|
750
|
+
}
|
|
750
751
|
|
|
751
752
|
assert len(responses.calls) == len(BASE_API_CALLS) + 1
|
|
752
753
|
assert [
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/arkindex_worker/worker/base.py
RENAMED
|
File without changes
|
|
File without changes
|
{arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/arkindex_worker/worker/dataset.py
RENAMED
|
File without changes
|
{arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/arkindex_worker/worker/element.py
RENAMED
|
File without changes
|
{arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/arkindex_worker/worker/metadata.py
RENAMED
|
File without changes
|
{arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/arkindex_worker/worker/task.py
RENAMED
|
File without changes
|
{arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/arkindex_worker/worker/training.py
RENAMED
|
File without changes
|
|
File without changes
|
{arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/arkindex_worker/worker/version.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{arkindex-base-worker-0.3.6rc3 → arkindex-base-worker-0.3.6rc4}/tests/test_dataset_worker.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|