arkindex-base-worker 0.4.0rc4__tar.gz → 0.4.0rc5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/PKG-INFO +1 -1
  2. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/arkindex_base_worker.egg-info/PKG-INFO +1 -1
  3. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/worker/__init__.py +6 -2
  4. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/worker/entity.py +8 -19
  5. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/worker/process.py +5 -0
  6. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/pyproject.toml +1 -1
  7. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/tests/test_elements_worker/test_entity_create.py +42 -92
  8. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/LICENSE +0 -0
  9. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/README.md +0 -0
  10. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/arkindex_base_worker.egg-info/SOURCES.txt +0 -0
  11. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/arkindex_base_worker.egg-info/dependency_links.txt +0 -0
  12. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/arkindex_base_worker.egg-info/requires.txt +0 -0
  13. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/arkindex_base_worker.egg-info/top_level.txt +0 -0
  14. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/__init__.py +0 -0
  15. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/cache.py +0 -0
  16. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/image.py +0 -0
  17. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/models.py +0 -0
  18. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/utils.py +0 -0
  19. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/worker/base.py +0 -0
  20. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/worker/classification.py +0 -0
  21. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/worker/corpus.py +0 -0
  22. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/worker/dataset.py +0 -0
  23. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/worker/element.py +0 -0
  24. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/worker/image.py +0 -0
  25. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/worker/metadata.py +0 -0
  26. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/worker/task.py +0 -0
  27. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/worker/training.py +0 -0
  28. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/worker/transcription.py +0 -0
  29. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/arkindex_worker/worker/version.py +0 -0
  30. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/hooks/pre_gen_project.py +0 -0
  31. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/setup.cfg +0 -0
  32. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/tests/__init__.py +0 -0
  33. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/tests/conftest.py +0 -0
  34. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/tests/test_base_worker.py +0 -0
  35. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/tests/test_cache.py +0 -0
  36. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/tests/test_dataset_worker.py +0 -0
  37. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/tests/test_element.py +0 -0
  38. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/tests/test_elements_worker/__init__.py +0 -0
  39. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/tests/test_elements_worker/test_classification.py +0 -0
  40. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/tests/test_elements_worker/test_cli.py +0 -0
  41. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/tests/test_elements_worker/test_corpus.py +0 -0
  42. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/tests/test_elements_worker/test_dataset.py +0 -0
  43. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/tests/test_elements_worker/test_element.py +0 -0
  44. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/tests/test_elements_worker/test_element_create_multiple.py +0 -0
  45. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/tests/test_elements_worker/test_element_create_single.py +0 -0
  46. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/tests/test_elements_worker/test_element_list_children.py +0 -0
  47. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/tests/test_elements_worker/test_element_list_parents.py +0 -0
  48. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/tests/test_elements_worker/test_entity_list_and_check.py +0 -0
  49. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/tests/test_elements_worker/test_image.py +0 -0
  50. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/tests/test_elements_worker/test_metadata.py +0 -0
  51. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/tests/test_elements_worker/test_task.py +0 -0
  52. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/tests/test_elements_worker/test_training.py +0 -0
  53. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/tests/test_elements_worker/test_transcription_create.py +0 -0
  54. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/tests/test_elements_worker/test_transcription_create_with_elements.py +0 -0
  55. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/tests/test_elements_worker/test_transcription_list.py +0 -0
  56. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/tests/test_elements_worker/test_version.py +0 -0
  57. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/tests/test_elements_worker/test_worker.py +0 -0
  58. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/tests/test_image.py +0 -0
  59. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/tests/test_merge.py +0 -0
  60. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/tests/test_utils.py +0 -0
  61. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/worker-demo/tests/__init__.py +0 -0
  62. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/worker-demo/tests/conftest.py +0 -0
  63. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/worker-demo/tests/test_worker.py +0 -0
  64. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/worker-demo/worker_demo/__init__.py +0 -0
  65. {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc5}/worker-demo/worker_demo/worker.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: arkindex-base-worker
3
- Version: 0.4.0rc4
3
+ Version: 0.4.0rc5
4
4
  Summary: Base Worker to easily build Arkindex ML workflows
5
5
  Author-email: Teklia <contact@teklia.com>
6
6
  Maintainer-email: Teklia <contact@teklia.com>
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: arkindex-base-worker
3
- Version: 0.4.0rc4
3
+ Version: 0.4.0rc5
4
4
  Summary: Base Worker to easily build Arkindex ML workflows
5
5
  Author-email: Teklia <contact@teklia.com>
6
6
  Maintainer-email: Teklia <contact@teklia.com>
@@ -133,8 +133,12 @@ class ElementsWorker(
133
133
  Whether or not WorkerActivity support has been enabled on the DataImport
134
134
  used to run this worker.
135
135
  """
136
- if self.is_read_only or self.process_mode == ProcessMode.Dataset:
137
- # Worker activities are also disabled when running an ElementsWorker in a Dataset process.
136
+ if self.is_read_only or self.process_mode in [
137
+ ProcessMode.Dataset,
138
+ ProcessMode.Export,
139
+ ]:
140
+ # Worker activities are also disabled when running an ElementsWorker in a Dataset process
141
+ # and when running export processes.
138
142
  return False
139
143
  assert (
140
144
  self.process_information
@@ -16,9 +16,6 @@ from arkindex_worker.cache import (
16
16
  )
17
17
  from arkindex_worker.models import Element, Transcription
18
18
  from arkindex_worker.utils import (
19
- DEFAULT_BATCH_SIZE,
20
- batch_publication,
21
- make_batches,
22
19
  pluralize,
23
20
  )
24
21
 
@@ -219,12 +216,10 @@ class EntityMixin:
219
216
  return transcription_ent
220
217
 
221
218
  @unsupported_cache
222
- @batch_publication
223
219
  def create_transcription_entities(
224
220
  self,
225
221
  transcription: Transcription,
226
222
  entities: list[Entity],
227
- batch_size: int = DEFAULT_BATCH_SIZE,
228
223
  ) -> list[dict[str, str]]:
229
224
  """
230
225
  Create multiple entities attached to a transcription in a single API request.
@@ -247,8 +242,6 @@ class EntityMixin:
247
242
  confidence (float or None)
248
243
  Optional confidence score, between 0.0 and 1.0.
249
244
 
250
- :param batch_size: The size of each batch, which will be used to split the publication to avoid API errors.
251
-
252
245
  :return: List of dicts, with each dict having a two keys, `transcription_entity_id` and `entity_id`, holding the UUID of each created object.
253
246
  """
254
247
  assert transcription and isinstance(
@@ -300,18 +293,14 @@ class EntityMixin:
300
293
  )
301
294
  return
302
295
 
303
- created_entities = [
304
- created_entity
305
- for batch in make_batches(entities, "entity", batch_size)
306
- for created_entity in self.api_client.request(
307
- "CreateTranscriptionEntities",
308
- id=transcription.id,
309
- body={
310
- "worker_run_id": self.worker_run_id,
311
- "entities": batch,
312
- },
313
- )["entities"]
314
- ]
296
+ created_entities = self.api_client.request(
297
+ "CreateTranscriptionEntities",
298
+ id=transcription.id,
299
+ body={
300
+ "worker_run_id": self.worker_run_id,
301
+ "entities": entities,
302
+ },
303
+ )["entities"]
315
304
 
316
305
  return created_entities
317
306
 
@@ -61,3 +61,8 @@ class ProcessMode(Enum):
61
61
  """
62
62
  Dataset processes.
63
63
  """
64
+
65
+ Export = "export"
66
+ """
67
+ Export processes.
68
+ """
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "arkindex-base-worker"
7
- version = "0.4.0rc4"
7
+ version = "0.4.0rc5"
8
8
  description = "Base Worker to easily build Arkindex ML workflows"
9
9
  license = { file = "LICENSE" }
10
10
  dependencies = [
@@ -13,7 +13,6 @@ from arkindex_worker.cache import (
13
13
  CachedTranscriptionEntity,
14
14
  )
15
15
  from arkindex_worker.models import Transcription
16
- from arkindex_worker.utils import DEFAULT_BATCH_SIZE
17
16
  from arkindex_worker.worker.transcription import TextOrientation
18
17
  from tests import CORPUS_ID
19
18
 
@@ -836,89 +835,50 @@ def test_create_transcription_entities_wrong_entity(
836
835
  )
837
836
 
838
837
 
839
- @pytest.mark.parametrize("batch_size", [DEFAULT_BATCH_SIZE, 1])
840
- def test_create_transcription_entities(batch_size, responses, mock_elements_worker):
838
+ def test_create_transcription_entities(responses, mock_elements_worker):
841
839
  transcription = Transcription(id="transcription-id")
842
840
 
843
841
  # Call to Transcription entities creation in bulk
844
- if batch_size > 1:
845
- responses.add(
846
- responses.POST,
847
- "http://testserver/api/v1/transcription/transcription-id/entities/bulk/",
848
- status=201,
849
- match=[
850
- matchers.json_params_matcher(
851
- {
852
- "worker_run_id": "56785678-5678-5678-5678-567856785678",
853
- "entities": [
854
- {
855
- "name": "Teklia",
856
- "type_id": "22222222-2222-2222-2222-222222222222",
857
- "offset": 0,
858
- "length": 6,
859
- "confidence": 1.0,
860
- },
861
- {
862
- "name": "Team Rocket",
863
- "type_id": "22222222-2222-2222-2222-222222222222",
864
- "offset": 7,
865
- "length": 11,
866
- "confidence": 1.0,
867
- },
868
- ],
869
- }
870
- )
871
- ],
872
- json={
873
- "entities": [
874
- {
875
- "transcription_entity_id": "transc-entity-id",
876
- "entity_id": "entity-id1",
877
- },
878
- {
879
- "transcription_entity_id": "transc-entity-id",
880
- "entity_id": "entity-id2",
881
- },
882
- ]
883
- },
884
- )
885
- else:
886
- for idx, (name, offset, length) in enumerate(
887
- [
888
- ("Teklia", 0, 6),
889
- ("Team Rocket", 7, 11),
890
- ],
891
- start=1,
892
- ):
893
- responses.add(
894
- responses.POST,
895
- "http://testserver/api/v1/transcription/transcription-id/entities/bulk/",
896
- status=201,
897
- match=[
898
- matchers.json_params_matcher(
899
- {
900
- "worker_run_id": "56785678-5678-5678-5678-567856785678",
901
- "entities": [
902
- {
903
- "name": name,
904
- "type_id": "22222222-2222-2222-2222-222222222222",
905
- "offset": offset,
906
- "length": length,
907
- "confidence": 1.0,
908
- }
909
- ],
910
- }
911
- )
912
- ],
913
- json={
842
+ responses.add(
843
+ responses.POST,
844
+ "http://testserver/api/v1/transcription/transcription-id/entities/bulk/",
845
+ status=201,
846
+ match=[
847
+ matchers.json_params_matcher(
848
+ {
849
+ "worker_run_id": "56785678-5678-5678-5678-567856785678",
914
850
  "entities": [
915
851
  {
916
- "transcription_entity_id": "transc-entity-id",
917
- "entity_id": f"entity-id{idx}",
918
- }
919
- ]
920
- },
852
+ "name": "Teklia",
853
+ "type_id": "22222222-2222-2222-2222-222222222222",
854
+ "offset": 0,
855
+ "length": 6,
856
+ "confidence": 1.0,
857
+ },
858
+ {
859
+ "name": "Team Rocket",
860
+ "type_id": "22222222-2222-2222-2222-222222222222",
861
+ "offset": 7,
862
+ "length": 11,
863
+ "confidence": 1.0,
864
+ },
865
+ ],
866
+ }
921
867
  )
868
+ ],
869
+ json={
870
+ "entities": [
871
+ {
872
+ "transcription_entity_id": "transc-entity-id",
873
+ "entity_id": "entity-id1",
874
+ },
875
+ {
876
+ "transcription_entity_id": "transc-entity-id",
877
+ "entity_id": "entity-id2",
878
+ },
879
+ ]
880
+ },
881
+ )
922
882
 
923
883
  # Store entity type/slug correspondence on the worker
924
884
  mock_elements_worker.entity_types = {
@@ -942,26 +902,16 @@ def test_create_transcription_entities(batch_size, responses, mock_elements_work
942
902
  "confidence": 1.0,
943
903
  },
944
904
  ],
945
- batch_size=batch_size,
946
905
  )
947
906
 
948
907
  assert len(created_objects) == 2
949
908
 
950
- bulk_api_calls = [
909
+ assert len(responses.calls) == len(BASE_API_CALLS) + 1
910
+ assert [
911
+ (call.request.method, call.request.url) for call in responses.calls
912
+ ] == BASE_API_CALLS + [
951
913
  (
952
914
  "POST",
953
915
  "http://testserver/api/v1/transcription/transcription-id/entities/bulk/",
954
916
  )
955
917
  ]
956
- if batch_size != DEFAULT_BATCH_SIZE:
957
- bulk_api_calls.append(
958
- (
959
- "POST",
960
- "http://testserver/api/v1/transcription/transcription-id/entities/bulk/",
961
- )
962
- )
963
-
964
- assert len(responses.calls) == len(BASE_API_CALLS) + len(bulk_api_calls)
965
- assert [
966
- (call.request.method, call.request.url) for call in responses.calls
967
- ] == BASE_API_CALLS + bulk_api_calls