arkindex-base-worker 0.4.0rc3__tar.gz → 0.4.0rc5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/PKG-INFO +3 -2
  2. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/arkindex_base_worker.egg-info/PKG-INFO +3 -2
  3. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/arkindex_base_worker.egg-info/SOURCES.txt +12 -4
  4. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/arkindex_base_worker.egg-info/requires.txt +1 -1
  5. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/cache.py +1 -1
  6. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/worker/__init__.py +6 -2
  7. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/worker/entity.py +8 -19
  8. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/worker/process.py +5 -0
  9. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/pyproject.toml +3 -2
  10. arkindex_base_worker-0.4.0rc3/tests/test_elements_worker/test_classifications.py → arkindex_base_worker-0.4.0rc5/tests/test_elements_worker/test_classification.py +86 -0
  11. arkindex_base_worker-0.4.0rc5/tests/test_elements_worker/test_element.py +427 -0
  12. arkindex_base_worker-0.4.0rc5/tests/test_elements_worker/test_element_create_multiple.py +715 -0
  13. arkindex_base_worker-0.4.0rc5/tests/test_elements_worker/test_element_create_single.py +528 -0
  14. arkindex_base_worker-0.4.0rc5/tests/test_elements_worker/test_element_list_children.py +969 -0
  15. arkindex_base_worker-0.4.0rc5/tests/test_elements_worker/test_element_list_parents.py +530 -0
  16. arkindex_base_worker-0.4.0rc3/tests/test_elements_worker/test_entities.py → arkindex_base_worker-0.4.0rc5/tests/test_elements_worker/test_entity_create.py +42 -245
  17. arkindex_base_worker-0.4.0rc5/tests/test_elements_worker/test_entity_list_and_check.py +160 -0
  18. arkindex_base_worker-0.4.0rc5/tests/test_elements_worker/test_transcription_create.py +873 -0
  19. arkindex_base_worker-0.4.0rc5/tests/test_elements_worker/test_transcription_create_with_elements.py +951 -0
  20. arkindex_base_worker-0.4.0rc5/tests/test_elements_worker/test_transcription_list.py +450 -0
  21. arkindex_base_worker-0.4.0rc5/tests/test_elements_worker/test_version.py +60 -0
  22. arkindex_base_worker-0.4.0rc5/tests/test_elements_worker/test_worker.py +751 -0
  23. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/tests/test_image.py +181 -198
  24. arkindex_base_worker-0.4.0rc3/tests/test_elements_worker/test_elements.py +0 -3704
  25. arkindex_base_worker-0.4.0rc3/tests/test_elements_worker/test_transcriptions.py +0 -2252
  26. arkindex_base_worker-0.4.0rc3/tests/test_elements_worker/test_worker.py +0 -314
  27. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/LICENSE +0 -0
  28. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/README.md +0 -0
  29. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/arkindex_base_worker.egg-info/dependency_links.txt +0 -0
  30. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/arkindex_base_worker.egg-info/top_level.txt +0 -0
  31. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/__init__.py +0 -0
  32. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/image.py +0 -0
  33. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/models.py +0 -0
  34. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/utils.py +0 -0
  35. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/worker/base.py +0 -0
  36. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/worker/classification.py +0 -0
  37. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/worker/corpus.py +0 -0
  38. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/worker/dataset.py +0 -0
  39. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/worker/element.py +0 -0
  40. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/worker/image.py +0 -0
  41. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/worker/metadata.py +0 -0
  42. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/worker/task.py +0 -0
  43. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/worker/training.py +0 -0
  44. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/worker/transcription.py +0 -0
  45. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/worker/version.py +0 -0
  46. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/hooks/pre_gen_project.py +0 -0
  47. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/setup.cfg +0 -0
  48. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/tests/__init__.py +0 -0
  49. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/tests/conftest.py +0 -0
  50. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/tests/test_base_worker.py +0 -0
  51. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/tests/test_cache.py +0 -0
  52. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/tests/test_dataset_worker.py +0 -0
  53. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/tests/test_element.py +0 -0
  54. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/tests/test_elements_worker/__init__.py +0 -0
  55. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/tests/test_elements_worker/test_cli.py +0 -0
  56. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/tests/test_elements_worker/test_corpus.py +31 -31
  57. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/tests/test_elements_worker/test_dataset.py +0 -0
  58. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/tests/test_elements_worker/test_image.py +0 -0
  59. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/tests/test_elements_worker/test_metadata.py +0 -0
  60. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/tests/test_elements_worker/test_task.py +0 -0
  61. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/tests/test_elements_worker/test_training.py +0 -0
  62. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/tests/test_merge.py +0 -0
  63. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/tests/test_utils.py +0 -0
  64. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/worker-demo/tests/__init__.py +0 -0
  65. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/worker-demo/tests/conftest.py +0 -0
  66. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/worker-demo/tests/test_worker.py +0 -0
  67. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/worker-demo/worker_demo/__init__.py +0 -0
  68. {arkindex_base_worker-0.4.0rc3 → arkindex_base_worker-0.4.0rc5}/worker-demo/worker_demo/worker.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: arkindex-base-worker
3
- Version: 0.4.0rc3
3
+ Version: 0.4.0rc5
4
4
  Summary: Base Worker to easily build Arkindex ML workflows
5
5
  Author-email: Teklia <contact@teklia.com>
6
6
  Maintainer-email: Teklia <contact@teklia.com>
@@ -37,6 +37,7 @@ Classifier: License :: OSI Approved :: MIT License
37
37
  Classifier: Programming Language :: Python :: 3 :: Only
38
38
  Classifier: Programming Language :: Python :: 3.10
39
39
  Classifier: Programming Language :: Python :: 3.11
40
+ Classifier: Programming Language :: Python :: 3.12
40
41
  Requires-Python: >=3.10
41
42
  Description-Content-Type: text/markdown
42
43
  License-File: LICENSE
@@ -44,7 +45,7 @@ Requires-Dist: humanize==4.10.0
44
45
  Requires-Dist: peewee~=3.17
45
46
  Requires-Dist: Pillow==10.4.0
46
47
  Requires-Dist: python-gnupg==0.5.2
47
- Requires-Dist: shapely==2.0.5
48
+ Requires-Dist: shapely==2.0.6
48
49
  Requires-Dist: teklia-toolbox==0.1.7b1
49
50
  Requires-Dist: zstandard==0.22.0
50
51
  Provides-Extra: docs
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: arkindex-base-worker
3
- Version: 0.4.0rc3
3
+ Version: 0.4.0rc5
4
4
  Summary: Base Worker to easily build Arkindex ML workflows
5
5
  Author-email: Teklia <contact@teklia.com>
6
6
  Maintainer-email: Teklia <contact@teklia.com>
@@ -37,6 +37,7 @@ Classifier: License :: OSI Approved :: MIT License
37
37
  Classifier: Programming Language :: Python :: 3 :: Only
38
38
  Classifier: Programming Language :: Python :: 3.10
39
39
  Classifier: Programming Language :: Python :: 3.11
40
+ Classifier: Programming Language :: Python :: 3.12
40
41
  Requires-Python: >=3.10
41
42
  Description-Content-Type: text/markdown
42
43
  License-File: LICENSE
@@ -44,7 +45,7 @@ Requires-Dist: humanize==4.10.0
44
45
  Requires-Dist: peewee~=3.17
45
46
  Requires-Dist: Pillow==10.4.0
46
47
  Requires-Dist: python-gnupg==0.5.2
47
- Requires-Dist: shapely==2.0.5
48
+ Requires-Dist: shapely==2.0.6
48
49
  Requires-Dist: teklia-toolbox==0.1.7b1
49
50
  Requires-Dist: zstandard==0.22.0
50
51
  Provides-Extra: docs
@@ -36,17 +36,25 @@ tests/test_image.py
36
36
  tests/test_merge.py
37
37
  tests/test_utils.py
38
38
  tests/test_elements_worker/__init__.py
39
- tests/test_elements_worker/test_classifications.py
39
+ tests/test_elements_worker/test_classification.py
40
40
  tests/test_elements_worker/test_cli.py
41
41
  tests/test_elements_worker/test_corpus.py
42
42
  tests/test_elements_worker/test_dataset.py
43
- tests/test_elements_worker/test_elements.py
44
- tests/test_elements_worker/test_entities.py
43
+ tests/test_elements_worker/test_element.py
44
+ tests/test_elements_worker/test_element_create_multiple.py
45
+ tests/test_elements_worker/test_element_create_single.py
46
+ tests/test_elements_worker/test_element_list_children.py
47
+ tests/test_elements_worker/test_element_list_parents.py
48
+ tests/test_elements_worker/test_entity_create.py
49
+ tests/test_elements_worker/test_entity_list_and_check.py
45
50
  tests/test_elements_worker/test_image.py
46
51
  tests/test_elements_worker/test_metadata.py
47
52
  tests/test_elements_worker/test_task.py
48
53
  tests/test_elements_worker/test_training.py
49
- tests/test_elements_worker/test_transcriptions.py
54
+ tests/test_elements_worker/test_transcription_create.py
55
+ tests/test_elements_worker/test_transcription_create_with_elements.py
56
+ tests/test_elements_worker/test_transcription_list.py
57
+ tests/test_elements_worker/test_version.py
50
58
  tests/test_elements_worker/test_worker.py
51
59
  worker-demo/tests/__init__.py
52
60
  worker-demo/tests/conftest.py
@@ -2,7 +2,7 @@ humanize==4.10.0
2
2
  peewee~=3.17
3
3
  Pillow==10.4.0
4
4
  python-gnupg==0.5.2
5
- shapely==2.0.5
5
+ shapely==2.0.6
6
6
  teklia-toolbox==0.1.7b1
7
7
  zstandard==0.22.0
8
8
 
@@ -380,7 +380,7 @@ def unsupported_cache(func):
380
380
  def wrapper(self, *args, **kwargs):
381
381
  results = func(self, *args, **kwargs)
382
382
 
383
- if not (self.is_read_only or self.use_cache):
383
+ if self.use_cache:
384
384
  logger.warning(
385
385
  f"This API helper `{func.__name__}` did not update the cache database"
386
386
  )
@@ -133,8 +133,12 @@ class ElementsWorker(
133
133
  Whether or not WorkerActivity support has been enabled on the DataImport
134
134
  used to run this worker.
135
135
  """
136
- if self.is_read_only or self.process_mode == ProcessMode.Dataset:
137
- # Worker activities are also disabled when running an ElementsWorker in a Dataset process.
136
+ if self.is_read_only or self.process_mode in [
137
+ ProcessMode.Dataset,
138
+ ProcessMode.Export,
139
+ ]:
140
+ # Worker activities are also disabled when running an ElementsWorker in a Dataset process
141
+ # and when running export processes.
138
142
  return False
139
143
  assert (
140
144
  self.process_information
@@ -16,9 +16,6 @@ from arkindex_worker.cache import (
16
16
  )
17
17
  from arkindex_worker.models import Element, Transcription
18
18
  from arkindex_worker.utils import (
19
- DEFAULT_BATCH_SIZE,
20
- batch_publication,
21
- make_batches,
22
19
  pluralize,
23
20
  )
24
21
 
@@ -219,12 +216,10 @@ class EntityMixin:
219
216
  return transcription_ent
220
217
 
221
218
  @unsupported_cache
222
- @batch_publication
223
219
  def create_transcription_entities(
224
220
  self,
225
221
  transcription: Transcription,
226
222
  entities: list[Entity],
227
- batch_size: int = DEFAULT_BATCH_SIZE,
228
223
  ) -> list[dict[str, str]]:
229
224
  """
230
225
  Create multiple entities attached to a transcription in a single API request.
@@ -247,8 +242,6 @@ class EntityMixin:
247
242
  confidence (float or None)
248
243
  Optional confidence score, between 0.0 and 1.0.
249
244
 
250
- :param batch_size: The size of each batch, which will be used to split the publication to avoid API errors.
251
-
252
245
  :return: List of dicts, with each dict having a two keys, `transcription_entity_id` and `entity_id`, holding the UUID of each created object.
253
246
  """
254
247
  assert transcription and isinstance(
@@ -300,18 +293,14 @@ class EntityMixin:
300
293
  )
301
294
  return
302
295
 
303
- created_entities = [
304
- created_entity
305
- for batch in make_batches(entities, "entity", batch_size)
306
- for created_entity in self.api_client.request(
307
- "CreateTranscriptionEntities",
308
- id=transcription.id,
309
- body={
310
- "worker_run_id": self.worker_run_id,
311
- "entities": batch,
312
- },
313
- )["entities"]
314
- ]
296
+ created_entities = self.api_client.request(
297
+ "CreateTranscriptionEntities",
298
+ id=transcription.id,
299
+ body={
300
+ "worker_run_id": self.worker_run_id,
301
+ "entities": entities,
302
+ },
303
+ )["entities"]
315
304
 
316
305
  return created_entities
317
306
 
@@ -61,3 +61,8 @@ class ProcessMode(Enum):
61
61
  """
62
62
  Dataset processes.
63
63
  """
64
+
65
+ Export = "export"
66
+ """
67
+ Export processes.
68
+ """
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "arkindex-base-worker"
7
- version = "0.4.0rc3"
7
+ version = "0.4.0rc5"
8
8
  description = "Base Worker to easily build Arkindex ML workflows"
9
9
  license = { file = "LICENSE" }
10
10
  dependencies = [
@@ -12,7 +12,7 @@ dependencies = [
12
12
  "peewee~=3.17",
13
13
  "Pillow==10.4.0",
14
14
  "python-gnupg==0.5.2",
15
- "shapely==2.0.5",
15
+ "shapely==2.0.6",
16
16
  "teklia-toolbox==0.1.7b1",
17
17
  "zstandard==0.22.0",
18
18
  ]
@@ -32,6 +32,7 @@ classifiers = [
32
32
  "Programming Language :: Python :: 3 :: Only",
33
33
  "Programming Language :: Python :: 3.10",
34
34
  "Programming Language :: Python :: 3.11",
35
+ "Programming Language :: Python :: 3.12",
35
36
  ]
36
37
 
37
38
  [project.urls]
@@ -17,6 +17,92 @@ from . import BASE_API_CALLS
17
17
  DELETE_PARAMETER = "DELETE_PARAMETER"
18
18
 
19
19
 
20
+ def test_load_corpus_classes_api_error(responses, mock_elements_worker):
21
+ responses.add(
22
+ responses.GET,
23
+ f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
24
+ status=418,
25
+ )
26
+
27
+ assert not mock_elements_worker.classes
28
+ with pytest.raises(
29
+ Exception, match="Stopping pagination as data will be incomplete"
30
+ ):
31
+ mock_elements_worker.load_corpus_classes()
32
+
33
+ assert len(responses.calls) == len(BASE_API_CALLS) + 5
34
+ assert [
35
+ (call.request.method, call.request.url) for call in responses.calls
36
+ ] == BASE_API_CALLS + [
37
+ # We do 5 retries
38
+ (
39
+ "GET",
40
+ f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
41
+ ),
42
+ (
43
+ "GET",
44
+ f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
45
+ ),
46
+ (
47
+ "GET",
48
+ f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
49
+ ),
50
+ (
51
+ "GET",
52
+ f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
53
+ ),
54
+ (
55
+ "GET",
56
+ f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
57
+ ),
58
+ ]
59
+ assert not mock_elements_worker.classes
60
+
61
+
62
+ def test_load_corpus_classes(responses, mock_elements_worker):
63
+ responses.add(
64
+ responses.GET,
65
+ f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
66
+ status=200,
67
+ json={
68
+ "count": 3,
69
+ "next": None,
70
+ "results": [
71
+ {
72
+ "id": "0000",
73
+ "name": "good",
74
+ },
75
+ {
76
+ "id": "1111",
77
+ "name": "average",
78
+ },
79
+ {
80
+ "id": "2222",
81
+ "name": "bad",
82
+ },
83
+ ],
84
+ },
85
+ )
86
+
87
+ assert not mock_elements_worker.classes
88
+ mock_elements_worker.load_corpus_classes()
89
+
90
+ assert len(responses.calls) == len(BASE_API_CALLS) + 1
91
+ assert [
92
+ (call.request.method, call.request.url) for call in responses.calls
93
+ ] == BASE_API_CALLS + [
94
+ (
95
+ "GET",
96
+ f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
97
+ ),
98
+ ]
99
+ assert mock_elements_worker.classes == {
100
+ "good": "0000",
101
+ "average": "1111",
102
+ "bad": "2222",
103
+ }
104
+
105
+
20
106
  def test_get_ml_class_id_load_classes(responses, mock_elements_worker):
21
107
  responses.add(
22
108
  responses.GET,