arkindex-base-worker 0.5.1b2__tar.gz → 0.5.1b4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/PKG-INFO +1 -1
  2. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/arkindex_base_worker.egg-info/PKG-INFO +1 -1
  3. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/arkindex_worker/worker/__init__.py +7 -20
  4. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/arkindex_worker/worker/element.py +11 -0
  5. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/pyproject.toml +1 -1
  6. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/tests/conftest.py +46 -17
  7. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_worker.py +26 -14
  8. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/LICENSE +0 -0
  9. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/README.md +0 -0
  10. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/arkindex_base_worker.egg-info/SOURCES.txt +0 -0
  11. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/arkindex_base_worker.egg-info/dependency_links.txt +0 -0
  12. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/arkindex_base_worker.egg-info/requires.txt +0 -0
  13. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/arkindex_base_worker.egg-info/top_level.txt +0 -0
  14. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/arkindex_worker/__init__.py +0 -0
  15. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/arkindex_worker/cache.py +0 -0
  16. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/arkindex_worker/image.py +0 -0
  17. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/arkindex_worker/models.py +0 -0
  18. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/arkindex_worker/utils.py +0 -0
  19. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/arkindex_worker/worker/base.py +0 -0
  20. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/arkindex_worker/worker/classification.py +0 -0
  21. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/arkindex_worker/worker/corpus.py +0 -0
  22. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/arkindex_worker/worker/dataset.py +0 -0
  23. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/arkindex_worker/worker/entity.py +0 -0
  24. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/arkindex_worker/worker/image.py +0 -0
  25. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/arkindex_worker/worker/metadata.py +0 -0
  26. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/arkindex_worker/worker/process.py +0 -0
  27. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/arkindex_worker/worker/task.py +0 -0
  28. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/arkindex_worker/worker/training.py +0 -0
  29. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/arkindex_worker/worker/transcription.py +0 -0
  30. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/examples/standalone/python/worker.py +0 -0
  31. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/examples/tooled/python/worker.py +0 -0
  32. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/hooks/pre_gen_project.py +0 -0
  33. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/setup.cfg +0 -0
  34. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/tests/__init__.py +0 -0
  35. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/tests/test_base_worker.py +0 -0
  36. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/tests/test_cache.py +0 -0
  37. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/tests/test_dataset_worker.py +0 -0
  38. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/tests/test_element.py +0 -0
  39. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/__init__.py +0 -0
  40. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_classification.py +0 -0
  41. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_cli.py +0 -0
  42. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_corpus.py +0 -0
  43. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_dataset.py +0 -0
  44. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_element.py +0 -0
  45. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_element_create_multiple.py +0 -0
  46. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_element_create_single.py +0 -0
  47. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_element_list_children.py +0 -0
  48. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_element_list_parents.py +0 -0
  49. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_entity.py +0 -0
  50. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_image.py +0 -0
  51. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_metadata.py +0 -0
  52. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_process.py +0 -0
  53. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_task.py +0 -0
  54. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_training.py +0 -0
  55. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_transcription_create.py +0 -0
  56. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_transcription_create_with_elements.py +0 -0
  57. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_transcription_list.py +0 -0
  58. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/tests/test_image.py +0 -0
  59. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/tests/test_merge.py +0 -0
  60. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/tests/test_modern_config.py +0 -0
  61. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/tests/test_utils.py +0 -0
  62. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/worker-demo/tests/__init__.py +0 -0
  63. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/worker-demo/tests/conftest.py +0 -0
  64. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/worker-demo/tests/test_worker.py +0 -0
  65. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/worker-demo/worker_demo/__init__.py +0 -0
  66. {arkindex_base_worker-0.5.1b2 → arkindex_base_worker-0.5.1b4}/worker-demo/worker_demo/worker.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: arkindex-base-worker
3
- Version: 0.5.1b2
3
+ Version: 0.5.1b4
4
4
  Summary: Base Worker to easily build Arkindex ML workflows
5
5
  Author-email: Teklia <contact@teklia.com>
6
6
  Maintainer-email: Teklia <contact@teklia.com>
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: arkindex-base-worker
3
- Version: 0.5.1b2
3
+ Version: 0.5.1b4
4
4
  Summary: Base Worker to easily build Arkindex ML workflows
5
5
  Author-email: Teklia <contact@teklia.com>
6
6
  Maintainer-email: Teklia <contact@teklia.com>
@@ -33,13 +33,10 @@ from arkindex_worker.worker.transcription import TranscriptionMixin
33
33
 
34
34
 
35
35
  class WorkerActivityIterator:
36
- def __init__(self, api_client, types):
36
+ def __init__(self, api_client):
37
37
  # Use same api client as main class
38
38
  self.api_client = api_client
39
39
 
40
- # Index element types by ID
41
- self.types = {t["id"]: t["slug"] for t in types}
42
-
43
40
  logger.info(
44
41
  "Using StartWorkerActivity instead of reading init_elements JSON file"
45
42
  )
@@ -53,7 +50,7 @@ class WorkerActivityIterator:
53
50
 
54
51
  def __next__(self):
55
52
  """
56
- Provide a new element from a worker activity upon each iteration
53
+ Provide a new element ID from a worker activity upon each iteration
57
54
  """
58
55
  try:
59
56
  data = self.api_client.request("StartWorkerActivity")
@@ -67,12 +64,7 @@ class WorkerActivityIterator:
67
64
  )
68
65
  raise e
69
66
 
70
- # Find the type slug using API provided type_id
71
- type_id = data["type_id"]
72
- if type_id not in self.types:
73
- raise Exception(f"Unknown type {type_id}")
74
-
75
- return Element(type=self.types[type_id], **data)
67
+ return data["id"]
76
68
 
77
69
 
78
70
  class ElementsWorker(
@@ -154,14 +146,9 @@ class ElementsWorker(
154
146
  elif self.process_mode == ProcessMode.Export:
155
147
  # For export mode processes, use list_process_elements and return element IDs
156
148
  return {item["id"] for item in self.list_process_elements()}
157
- elif self.args.consume_worker_activities:
158
- # We need to list corpus types as the StartWorkerActivity endpoint only provide type_id
159
- self.list_corpus_types()
160
-
149
+ elif self.consume_worker_activities:
161
150
  # Consume worker activitives one by one
162
- return WorkerActivityIterator(
163
- self.api_client, types=self.corpus_types.values()
164
- )
151
+ return WorkerActivityIterator(self.api_client)
165
152
 
166
153
  invalid_element_ids = list(filter(invalid_element_id, out))
167
154
  assert not invalid_element_ids, (
@@ -195,7 +182,7 @@ class ElementsWorker(
195
182
  - when running with init_elements, we have a known list
196
183
  - when running with StartWorkerActivity, we have a queue of unknown size
197
184
  """
198
- return self.args.consume_worker_activities
185
+ return self.consume_worker_activities
199
186
 
200
187
  def run(self):
201
188
  """
@@ -241,7 +228,7 @@ class ElementsWorker(
241
228
 
242
229
  # Process the element and report its progress if activities are enabled
243
230
  # We do not update the worker activity to "Started" state when consuming them
244
- if self.args.consume_worker_activities or self.update_activity(
231
+ if self.consume_worker_activities or self.update_activity(
245
232
  element.id, ActivityState.Started
246
233
  ):
247
234
  self.process_element(element)
@@ -55,6 +55,17 @@ class ElementMixin:
55
55
  )
56
56
  super().add_arguments()
57
57
 
58
+ @property
59
+ def consume_worker_activities(self) -> bool:
60
+ """
61
+ Helper to detect if the worker rely on an elements.json or consume directly worker activities
62
+ Uses the process information when available, fallback to CLI args
63
+ """
64
+ if self.process_information is not None:
65
+ return self.process_information.get("skip_elements_json") is True
66
+
67
+ return self.args.consume_worker_activities
68
+
58
69
  def list_corpus_types(self):
59
70
  """
60
71
  Loads available element types in corpus.
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "arkindex-base-worker"
7
- version = "0.5.1b2"
7
+ version = "0.5.1b4"
8
8
  description = "Base Worker to easily build Arkindex ML workflows"
9
9
  license = { file = "LICENSE" }
10
10
  dependencies = [
@@ -153,6 +153,7 @@ def _mock_worker_run_api(responses):
153
153
  "train_folder_id": None,
154
154
  "validation_folder_id": None,
155
155
  "test_folder_id": None,
156
+ "skip_elements_json": False,
156
157
  },
157
158
  "summary": "Worker Fake worker @ 123412",
158
159
  }
@@ -346,29 +347,57 @@ def mock_elements_worker_consume_wa(monkeypatch, responses, mock_elements_worker
346
347
  instead of reading a JSON file
347
348
  """
348
349
 
349
- # Enable consume worker activities mode from CLI args
350
- mock_elements_worker.args.consume_worker_activities = True
351
-
352
- # Worker requires element types from corpus details as they are not provided by StartWorkerActivity
353
- responses.add(
350
+ # Enable consume worker activities through the process configuration
351
+ responses.replace(
354
352
  responses.GET,
355
- "http://testserver/api/v1/corpus/11111111-1111-1111-1111-111111111111/",
353
+ "http://testserver/api/v1/process/workers/56785678-5678-5678-5678-567856785678/",
356
354
  status=200,
357
355
  json={
358
- "id": "11111111-1111-1111-1111-111111111111",
359
- "name": "Test corpus",
360
- "types": [
361
- {
362
- "id": "page-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
363
- "slug": "page",
364
- "display_name": "Page",
365
- "folder": False,
366
- "color": "28b62c",
367
- }
368
- ],
356
+ "id": "56785678-5678-5678-5678-567856785678",
357
+ "parents": [],
358
+ "worker": {
359
+ "id": "deadbeef-1234-5678-1234-worker",
360
+ "name": "Fake worker",
361
+ "slug": "fake_worker",
362
+ "type": "classifier",
363
+ },
364
+ "worker_version": {
365
+ "id": "12341234-1234-1234-1234-123412341234",
366
+ "configuration": {
367
+ "docker": {"image": "python:3"},
368
+ "configuration": {"someKey": "someValue"},
369
+ "secrets": [],
370
+ },
371
+ "worker": {
372
+ "id": "deadbeef-1234-5678-1234-worker",
373
+ "name": "Fake worker",
374
+ "slug": "fake_worker",
375
+ "type": "classifier",
376
+ },
377
+ },
378
+ "configuration": None,
379
+ "model_version": None,
380
+ "process": {
381
+ "name": None,
382
+ "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
383
+ "state": "running",
384
+ "mode": "workers",
385
+ "corpus": CORPUS_ID,
386
+ "use_cache": False,
387
+ "activity_state": "ready",
388
+ "model_id": None,
389
+ "train_folder_id": None,
390
+ "validation_folder_id": None,
391
+ "test_folder_id": None,
392
+ "skip_elements_json": True,
393
+ },
394
+ "summary": "Worker Fake worker @ 123412",
369
395
  },
370
396
  )
371
397
 
398
+ # Call configure again to use updated process infos
399
+ mock_elements_worker.configure()
400
+
372
401
  return mock_elements_worker
373
402
 
374
403
 
@@ -698,7 +698,8 @@ def test_run_consuming_worker_activities(
698
698
  assert mock_elements_worker_consume_wa.is_read_only is False
699
699
 
700
700
  # Provide 2 worker activities to run and the corresponding update call
701
- for elt_id in ("page_1", "page_2"):
701
+ # and 2 element details response
702
+ for i, elt_id in enumerate(("page_1", "page_2"), 1):
702
703
  responses.add(
703
704
  responses.POST,
704
705
  "http://testserver/api/v1/process/start-activity/",
@@ -706,7 +707,7 @@ def test_run_consuming_worker_activities(
706
707
  json={
707
708
  "id": elt_id,
708
709
  "type_id": "page-aaaa-aaaa-aaaa-aaaaaaaaaaaa", # Element type provided by mock corpus
709
- "name": "Page XXX",
710
+ "name": f"Page n°{i}",
710
711
  },
711
712
  )
712
713
  responses.add(
@@ -714,6 +715,16 @@ def test_run_consuming_worker_activities(
714
715
  "http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
715
716
  status=200,
716
717
  )
718
+ responses.add(
719
+ responses.GET,
720
+ f"http://testserver/api/v1/element/{elt_id}/",
721
+ status=200,
722
+ json={
723
+ "id": elt_id,
724
+ "type": "page",
725
+ "name": f"Page n°{i}",
726
+ },
727
+ )
717
728
 
718
729
  # Then a 404 to stop iterating
719
730
  responses.add(
@@ -725,18 +736,19 @@ def test_run_consuming_worker_activities(
725
736
  # Simply run the process
726
737
  mock_elements_worker_consume_wa.run()
727
738
 
728
- assert len(responses.calls) == len(BASE_API_CALLS) + 6
739
+ # We call twice configure in the conftest
740
+ assert len(responses.calls) == len(BASE_API_CALLS) * 2 + 7
729
741
  assert [
730
742
  (call.request.method, call.request.url) for call in responses.calls
731
- ] == BASE_API_CALLS + [
732
- (
733
- "GET",
734
- "http://testserver/api/v1/corpus/11111111-1111-1111-1111-111111111111/",
735
- ),
743
+ ] == BASE_API_CALLS * 2 + [
736
744
  (
737
745
  "POST",
738
746
  "http://testserver/api/v1/process/start-activity/",
739
747
  ),
748
+ (
749
+ "GET",
750
+ "http://testserver/api/v1/element/page_1/",
751
+ ),
740
752
  (
741
753
  "PUT",
742
754
  "http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
@@ -745,6 +757,10 @@ def test_run_consuming_worker_activities(
745
757
  "POST",
746
758
  "http://testserver/api/v1/process/start-activity/",
747
759
  ),
760
+ (
761
+ "GET",
762
+ "http://testserver/api/v1/element/page_2/",
763
+ ),
748
764
  (
749
765
  "PUT",
750
766
  "http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
@@ -756,21 +772,17 @@ def test_run_consuming_worker_activities(
756
772
  ]
757
773
 
758
774
  assert [(record.levelno, record.message) for record in caplog.records] == [
759
- (
760
- logging.INFO,
761
- "Loaded 1 element type in corpus (11111111-1111-1111-1111-111111111111).",
762
- ),
763
775
  (
764
776
  logging.INFO,
765
777
  "Using StartWorkerActivity instead of reading init_elements JSON file",
766
778
  ),
767
779
  (
768
780
  logging.INFO,
769
- "Processing page Page XXX (page_1) (n°1)",
781
+ "Processing page Page n°1 (page_1) (n°1)",
770
782
  ),
771
783
  (
772
784
  logging.INFO,
773
- "Processing page Page XXX (page_2) (n°2)",
785
+ "Processing page Page n°2 (page_2) (n°2)",
774
786
  ),
775
787
  (
776
788
  logging.INFO,