arkindex-base-worker 0.5.1b1__tar.gz → 0.5.1b4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/PKG-INFO +1 -1
  2. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/arkindex_base_worker.egg-info/PKG-INFO +1 -1
  3. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/arkindex_base_worker.egg-info/SOURCES.txt +1 -0
  4. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/arkindex_worker/worker/__init__.py +7 -20
  5. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/arkindex_worker/worker/base.py +44 -1
  6. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/arkindex_worker/worker/element.py +11 -0
  7. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/pyproject.toml +1 -1
  8. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/tests/conftest.py +103 -17
  9. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/tests/test_base_worker.py +66 -2
  10. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/tests/test_dataset_worker.py +5 -0
  11. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/__init__.py +4 -0
  12. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_worker.py +26 -14
  13. arkindex_base_worker-0.5.1b4/tests/test_modern_config.py +81 -0
  14. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/LICENSE +0 -0
  15. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/README.md +0 -0
  16. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/arkindex_base_worker.egg-info/dependency_links.txt +0 -0
  17. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/arkindex_base_worker.egg-info/requires.txt +0 -0
  18. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/arkindex_base_worker.egg-info/top_level.txt +0 -0
  19. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/arkindex_worker/__init__.py +0 -0
  20. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/arkindex_worker/cache.py +0 -0
  21. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/arkindex_worker/image.py +0 -0
  22. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/arkindex_worker/models.py +0 -0
  23. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/arkindex_worker/utils.py +0 -0
  24. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/arkindex_worker/worker/classification.py +0 -0
  25. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/arkindex_worker/worker/corpus.py +0 -0
  26. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/arkindex_worker/worker/dataset.py +0 -0
  27. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/arkindex_worker/worker/entity.py +0 -0
  28. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/arkindex_worker/worker/image.py +0 -0
  29. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/arkindex_worker/worker/metadata.py +0 -0
  30. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/arkindex_worker/worker/process.py +0 -0
  31. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/arkindex_worker/worker/task.py +0 -0
  32. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/arkindex_worker/worker/training.py +0 -0
  33. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/arkindex_worker/worker/transcription.py +0 -0
  34. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/examples/standalone/python/worker.py +0 -0
  35. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/examples/tooled/python/worker.py +0 -0
  36. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/hooks/pre_gen_project.py +0 -0
  37. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/setup.cfg +0 -0
  38. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/tests/__init__.py +0 -0
  39. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/tests/test_cache.py +0 -0
  40. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/tests/test_element.py +0 -0
  41. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_classification.py +0 -0
  42. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_cli.py +0 -0
  43. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_corpus.py +0 -0
  44. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_dataset.py +0 -0
  45. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_element.py +0 -0
  46. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_element_create_multiple.py +0 -0
  47. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_element_create_single.py +0 -0
  48. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_element_list_children.py +0 -0
  49. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_element_list_parents.py +0 -0
  50. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_entity.py +0 -0
  51. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_image.py +0 -0
  52. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_metadata.py +0 -0
  53. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_process.py +0 -0
  54. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_task.py +0 -0
  55. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_training.py +0 -0
  56. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_transcription_create.py +0 -0
  57. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_transcription_create_with_elements.py +0 -0
  58. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/tests/test_elements_worker/test_transcription_list.py +0 -0
  59. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/tests/test_image.py +0 -0
  60. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/tests/test_merge.py +0 -0
  61. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/tests/test_utils.py +0 -0
  62. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/worker-demo/tests/__init__.py +0 -0
  63. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/worker-demo/tests/conftest.py +0 -0
  64. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/worker-demo/tests/test_worker.py +0 -0
  65. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/worker-demo/worker_demo/__init__.py +0 -0
  66. {arkindex_base_worker-0.5.1b1 → arkindex_base_worker-0.5.1b4}/worker-demo/worker_demo/worker.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: arkindex-base-worker
3
- Version: 0.5.1b1
3
+ Version: 0.5.1b4
4
4
  Summary: Base Worker to easily build Arkindex ML workflows
5
5
  Author-email: Teklia <contact@teklia.com>
6
6
  Maintainer-email: Teklia <contact@teklia.com>
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: arkindex-base-worker
3
- Version: 0.5.1b1
3
+ Version: 0.5.1b4
4
4
  Summary: Base Worker to easily build Arkindex ML workflows
5
5
  Author-email: Teklia <contact@teklia.com>
6
6
  Maintainer-email: Teklia <contact@teklia.com>
@@ -35,6 +35,7 @@ tests/test_dataset_worker.py
35
35
  tests/test_element.py
36
36
  tests/test_image.py
37
37
  tests/test_merge.py
38
+ tests/test_modern_config.py
38
39
  tests/test_utils.py
39
40
  tests/test_elements_worker/__init__.py
40
41
  tests/test_elements_worker/test_classification.py
@@ -33,13 +33,10 @@ from arkindex_worker.worker.transcription import TranscriptionMixin
33
33
 
34
34
 
35
35
  class WorkerActivityIterator:
36
- def __init__(self, api_client, types):
36
+ def __init__(self, api_client):
37
37
  # Use same api client as main class
38
38
  self.api_client = api_client
39
39
 
40
- # Index element types by ID
41
- self.types = {t["id"]: t["slug"] for t in types}
42
-
43
40
  logger.info(
44
41
  "Using StartWorkerActivity instead of reading init_elements JSON file"
45
42
  )
@@ -53,7 +50,7 @@ class WorkerActivityIterator:
53
50
 
54
51
  def __next__(self):
55
52
  """
56
- Provide a new element from a worker activity upon each iteration
53
+ Provide a new element ID from a worker activity upon each iteration
57
54
  """
58
55
  try:
59
56
  data = self.api_client.request("StartWorkerActivity")
@@ -67,12 +64,7 @@ class WorkerActivityIterator:
67
64
  )
68
65
  raise e
69
66
 
70
- # Find the type slug using API provided type_id
71
- type_id = data["type_id"]
72
- if type_id not in self.types:
73
- raise Exception(f"Unknown type {type_id}")
74
-
75
- return Element(type=self.types[type_id], **data)
67
+ return data["id"]
76
68
 
77
69
 
78
70
  class ElementsWorker(
@@ -154,14 +146,9 @@ class ElementsWorker(
154
146
  elif self.process_mode == ProcessMode.Export:
155
147
  # For export mode processes, use list_process_elements and return element IDs
156
148
  return {item["id"] for item in self.list_process_elements()}
157
- elif self.args.consume_worker_activities:
158
- # We need to list corpus types as the StartWorkerActivity endpoint only provide type_id
159
- self.list_corpus_types()
160
-
149
+ elif self.consume_worker_activities:
161
150
  # Consume worker activitives one by one
162
- return WorkerActivityIterator(
163
- self.api_client, types=self.corpus_types.values()
164
- )
151
+ return WorkerActivityIterator(self.api_client)
165
152
 
166
153
  invalid_element_ids = list(filter(invalid_element_id, out))
167
154
  assert not invalid_element_ids, (
@@ -195,7 +182,7 @@ class ElementsWorker(
195
182
  - when running with init_elements, we have a known list
196
183
  - when running with StartWorkerActivity, we have a queue of unknown size
197
184
  """
198
- return self.args.consume_worker_activities
185
+ return self.consume_worker_activities
199
186
 
200
187
  def run(self):
201
188
  """
@@ -241,7 +228,7 @@ class ElementsWorker(
241
228
 
242
229
  # Process the element and report its progress if activities are enabled
243
230
  # We do not update the worker activity to "Started" state when consuming them
244
- if self.args.consume_worker_activities or self.update_activity(
231
+ if self.consume_worker_activities or self.update_activity(
245
232
  element.id, ActivityState.Started
246
233
  ):
247
234
  self.process_element(element)
@@ -9,12 +9,13 @@ import os
9
9
  import shutil
10
10
  from pathlib import Path
11
11
  from tempfile import mkdtemp
12
+ from typing import Any
12
13
 
13
14
  import gnupg
14
15
  import yaml
15
16
 
16
17
  from arkindex import options_from_env
17
- from arkindex.exceptions import ErrorResponse
18
+ from arkindex.exceptions import ClientError, ErrorResponse
18
19
  from arkindex_worker import logger
19
20
  from arkindex_worker.cache import (
20
21
  check_version,
@@ -260,6 +261,48 @@ class BaseWorker:
260
261
 
261
262
  logger.info(f"Loaded {worker_run['summary']} from API")
262
263
 
264
+ def _process_config_item(item: dict) -> tuple[str, Any]:
265
+ if not item["secret"]:
266
+ return (item["key"], item["value"])
267
+
268
+ # Load secret, only available in Arkindex EE
269
+ try:
270
+ secret = self.load_secret(Path(item["value"]))
271
+ except ClientError as e:
272
+ logger.error(
273
+ f"Failed to retrieve the secret {item['value']}, probably an Arkindex Community Edition: {e}"
274
+ )
275
+ return (item["key"], None)
276
+
277
+ return (item["key"], secret)
278
+
279
+ # Load worker run information
280
+ try:
281
+ config = self.api_client.request(
282
+ "RetrieveWorkerRunConfiguration", id=self.worker_run_id
283
+ )
284
+
285
+ # Provide the same configuration through all previous attributes
286
+ self.config = self.user_configuration = dict(
287
+ map(_process_config_item, config["configuration"])
288
+ )
289
+
290
+ # Provide secret values through the previous attribute
291
+ self.secrets = {
292
+ item["key"]: self.config[item["key"]]
293
+ for item in config["configuration"]
294
+ if item["secret"]
295
+ }
296
+ logger.info("Using modern configuration")
297
+
298
+ return # Stop here once we have modern configuration
299
+
300
+ except ErrorResponse as e:
301
+ if e.status_code != 400:
302
+ raise
303
+ logger.info("Modern configuration is not available")
304
+
305
+ # Use old-style configuration with local merge
263
306
  # Load model version configuration when available
264
307
  model_version = worker_run.get("model_version")
265
308
  if model_version:
@@ -55,6 +55,17 @@ class ElementMixin:
55
55
  )
56
56
  super().add_arguments()
57
57
 
58
+ @property
59
+ def consume_worker_activities(self) -> bool:
60
+ """
61
+ Helper to detect if the worker rely on an elements.json or consume directly worker activities
62
+ Uses the process information when available, fallback to CLI args
63
+ """
64
+ if self.process_information is not None:
65
+ return self.process_information.get("skip_elements_json") is True
66
+
67
+ return self.args.consume_worker_activities
68
+
58
69
  def list_corpus_types(self):
59
70
  """
60
71
  Loads available element types in corpus.
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "arkindex-base-worker"
7
- version = "0.5.1b1"
7
+ version = "0.5.1b4"
8
8
  description = "Base Worker to easily build Arkindex ML workflows"
9
9
  license = { file = "LICENSE" }
10
10
  dependencies = [
@@ -153,6 +153,7 @@ def _mock_worker_run_api(responses):
153
153
  "train_folder_id": None,
154
154
  "validation_folder_id": None,
155
155
  "test_folder_id": None,
156
+ "skip_elements_json": False,
156
157
  },
157
158
  "summary": "Worker Fake worker @ 123412",
158
159
  }
@@ -165,6 +166,13 @@ def _mock_worker_run_api(responses):
165
166
  content_type="application/json",
166
167
  )
167
168
 
169
+ # By default, stick to classic configuration
170
+ responses.add(
171
+ responses.GET,
172
+ "http://testserver/api/v1/workers/runs/56785678-5678-5678-5678-567856785678/configuration/",
173
+ status=400,
174
+ )
175
+
168
176
 
169
177
  @pytest.fixture
170
178
  def _mock_worker_run_no_revision_api(responses):
@@ -233,6 +241,56 @@ def _mock_worker_run_no_revision_api(responses):
233
241
  )
234
242
 
235
243
 
244
+ @pytest.fixture
245
+ def mock_base_worker_modern_conf(mocker, responses):
246
+ """
247
+ Provide a base worker to test modern configuration with (not provided in the fixture)
248
+ """
249
+ worker = BaseWorker()
250
+ mocker.patch.object(sys, "argv")
251
+ worker.args = worker.parser.parse_args()
252
+
253
+ payload = {
254
+ "id": "56785678-5678-5678-5678-567856785678",
255
+ "parents": [],
256
+ "worker_version": {
257
+ "id": "12341234-1234-1234-1234-123412341234",
258
+ "worker": {
259
+ "id": "deadbeef-1234-5678-1234-worker",
260
+ "name": "Fake worker",
261
+ "slug": "fake_worker",
262
+ "type": "classifier",
263
+ },
264
+ "revision": {"hash": "deadbeef1234"},
265
+ "configuration": {
266
+ "configuration": {"extra_key1": "not showing up"},
267
+ "user_configuration": {"extra_key2": "not showing up"},
268
+ },
269
+ },
270
+ "configuration": {
271
+ "id": "af0daaf4-983e-4703-a7ed-a10f146d6684",
272
+ "name": "my-userconfig",
273
+ "configuration": {
274
+ "extra_key3": "not showing up",
275
+ },
276
+ },
277
+ "model_version": None,
278
+ "process": {
279
+ "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
280
+ "corpus": CORPUS_ID,
281
+ },
282
+ "summary": "Worker Fake worker @ 123412",
283
+ }
284
+ responses.add(
285
+ responses.GET,
286
+ "http://testserver/api/v1/process/workers/56785678-5678-5678-5678-567856785678/",
287
+ status=200,
288
+ json=payload,
289
+ )
290
+
291
+ return worker
292
+
293
+
236
294
  @pytest.fixture
237
295
  def _mock_activity_calls(responses):
238
296
  """
@@ -289,29 +347,57 @@ def mock_elements_worker_consume_wa(monkeypatch, responses, mock_elements_worker
289
347
  instead of reading a JSON file
290
348
  """
291
349
 
292
- # Enable consume worker activities mode from CLI args
293
- mock_elements_worker.args.consume_worker_activities = True
294
-
295
- # Worker requires element types from corpus details as they are not provided by StartWorkerActivity
296
- responses.add(
350
+ # Enable consume worker activities through the process configuration
351
+ responses.replace(
297
352
  responses.GET,
298
- "http://testserver/api/v1/corpus/11111111-1111-1111-1111-111111111111/",
353
+ "http://testserver/api/v1/process/workers/56785678-5678-5678-5678-567856785678/",
299
354
  status=200,
300
355
  json={
301
- "id": "11111111-1111-1111-1111-111111111111",
302
- "name": "Test corpus",
303
- "types": [
304
- {
305
- "id": "page-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
306
- "slug": "page",
307
- "display_name": "Page",
308
- "folder": False,
309
- "color": "28b62c",
310
- }
311
- ],
356
+ "id": "56785678-5678-5678-5678-567856785678",
357
+ "parents": [],
358
+ "worker": {
359
+ "id": "deadbeef-1234-5678-1234-worker",
360
+ "name": "Fake worker",
361
+ "slug": "fake_worker",
362
+ "type": "classifier",
363
+ },
364
+ "worker_version": {
365
+ "id": "12341234-1234-1234-1234-123412341234",
366
+ "configuration": {
367
+ "docker": {"image": "python:3"},
368
+ "configuration": {"someKey": "someValue"},
369
+ "secrets": [],
370
+ },
371
+ "worker": {
372
+ "id": "deadbeef-1234-5678-1234-worker",
373
+ "name": "Fake worker",
374
+ "slug": "fake_worker",
375
+ "type": "classifier",
376
+ },
377
+ },
378
+ "configuration": None,
379
+ "model_version": None,
380
+ "process": {
381
+ "name": None,
382
+ "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
383
+ "state": "running",
384
+ "mode": "workers",
385
+ "corpus": CORPUS_ID,
386
+ "use_cache": False,
387
+ "activity_state": "ready",
388
+ "model_id": None,
389
+ "train_folder_id": None,
390
+ "validation_folder_id": None,
391
+ "test_folder_id": None,
392
+ "skip_elements_json": True,
393
+ },
394
+ "summary": "Worker Fake worker @ 123412",
312
395
  },
313
396
  )
314
397
 
398
+ # Call configure again to use updated process infos
399
+ mock_elements_worker.configure()
400
+
315
401
  return mock_elements_worker
316
402
 
317
403
 
@@ -190,6 +190,14 @@ def test_configure_worker_run(mocker, responses, caplog):
190
190
  body=json.dumps(payload),
191
191
  content_type="application/json",
192
192
  )
193
+
194
+ # By default, stick to classic configuration
195
+ responses.add(
196
+ responses.GET,
197
+ "http://testserver/api/v1/workers/runs/56785678-5678-5678-5678-567856785678/configuration/",
198
+ status=400,
199
+ )
200
+
193
201
  worker.args = worker.parser.parse_args()
194
202
  assert worker.is_read_only is False
195
203
  assert worker.worker_run_id == "56785678-5678-5678-5678-567856785678"
@@ -205,6 +213,11 @@ def test_configure_worker_run(mocker, responses, caplog):
205
213
  logging.INFO,
206
214
  "Loaded Worker Fake worker @ 123412 from API",
207
215
  ),
216
+ (
217
+ "arkindex_worker",
218
+ logging.INFO,
219
+ "Modern configuration is not available",
220
+ ),
208
221
  ("arkindex_worker", logging.INFO, "Loaded user configuration from WorkerRun"),
209
222
  ("arkindex_worker", logging.INFO, "User configuration retrieved"),
210
223
  ]
@@ -213,9 +226,16 @@ def test_configure_worker_run(mocker, responses, caplog):
213
226
 
214
227
 
215
228
  @pytest.mark.usefixtures("_mock_worker_run_no_revision_api")
216
- def test_configure_worker_run_no_revision(mocker, caplog):
229
+ def test_configure_worker_run_no_revision(mocker, caplog, responses):
217
230
  worker = BaseWorker()
218
231
 
232
+ # By default, stick to classic configuration
233
+ responses.add(
234
+ responses.GET,
235
+ "http://testserver/api/v1/workers/runs/56785678-5678-5678-5678-567856785678/configuration/",
236
+ status=400,
237
+ )
238
+
219
239
  mocker.patch.object(sys, "argv", ["worker"])
220
240
  worker.args = worker.parser.parse_args()
221
241
  assert worker.is_read_only is False
@@ -227,7 +247,12 @@ def test_configure_worker_run_no_revision(mocker, caplog):
227
247
  worker.configure()
228
248
 
229
249
  assert caplog.record_tuples == [
230
- ("arkindex_worker", logging.INFO, "Loaded Worker Fake worker @ 1 from API")
250
+ ("arkindex_worker", logging.INFO, "Loaded Worker Fake worker @ 1 from API"),
251
+ (
252
+ "arkindex_worker",
253
+ logging.INFO,
254
+ "Modern configuration is not available",
255
+ ),
231
256
  ]
232
257
 
233
258
 
@@ -283,6 +308,13 @@ def test_configure_user_configuration_defaults(mocker, responses):
283
308
  content_type="application/json",
284
309
  )
285
310
 
311
+ # By default, stick to classic configuration
312
+ responses.add(
313
+ responses.GET,
314
+ "http://testserver/api/v1/workers/runs/56785678-5678-5678-5678-567856785678/configuration/",
315
+ status=400,
316
+ )
317
+
286
318
  worker.configure()
287
319
 
288
320
  assert worker.user_configuration == {
@@ -340,6 +372,13 @@ def test_configure_user_config_debug(mocker, responses, debug):
340
372
  body=json.dumps(payload),
341
373
  content_type="application/json",
342
374
  )
375
+
376
+ # By default, stick to classic configuration
377
+ responses.add(
378
+ responses.GET,
379
+ "http://testserver/api/v1/workers/runs/56785678-5678-5678-5678-567856785678/configuration/",
380
+ status=400,
381
+ )
343
382
  worker.args = worker.parser.parse_args()
344
383
  worker.configure()
345
384
 
@@ -388,6 +427,12 @@ def test_configure_worker_run_missing_conf(mocker, responses):
388
427
  body=json.dumps(payload),
389
428
  content_type="application/json",
390
429
  )
430
+ # By default, stick to classic configuration
431
+ responses.add(
432
+ responses.GET,
433
+ "http://testserver/api/v1/workers/runs/56785678-5678-5678-5678-567856785678/configuration/",
434
+ status=400,
435
+ )
391
436
  worker.args = worker.parser.parse_args()
392
437
  worker.configure()
393
438
 
@@ -430,6 +475,12 @@ def test_configure_worker_run_no_worker_run_conf(mocker, responses):
430
475
  body=json.dumps(payload),
431
476
  content_type="application/json",
432
477
  )
478
+ # By default, stick to classic configuration
479
+ responses.add(
480
+ responses.GET,
481
+ "http://testserver/api/v1/workers/runs/56785678-5678-5678-5678-567856785678/configuration/",
482
+ status=400,
483
+ )
433
484
  worker.args = worker.parser.parse_args()
434
485
  worker.configure()
435
486
 
@@ -480,6 +531,12 @@ def test_configure_load_model_configuration(mocker, responses):
480
531
  body=json.dumps(payload),
481
532
  content_type="application/json",
482
533
  )
534
+ # By default, stick to classic configuration
535
+ responses.add(
536
+ responses.GET,
537
+ "http://testserver/api/v1/workers/runs/56785678-5678-5678-5678-567856785678/configuration/",
538
+ status=400,
539
+ )
483
540
  worker.args = worker.parser.parse_args()
484
541
  assert worker.is_read_only is False
485
542
  assert worker.worker_run_id == "56785678-5678-5678-5678-567856785678"
@@ -947,6 +1004,13 @@ def test_worker_config_multiple_source(
947
1004
  content_type="application/json",
948
1005
  )
949
1006
 
1007
+ # By default, stick to classic configuration
1008
+ responses.add(
1009
+ responses.GET,
1010
+ "http://testserver/api/v1/workers/runs/56785678-5678-5678-5678-567856785678/configuration/",
1011
+ status=400,
1012
+ )
1013
+
950
1014
  # Create and configure a worker
951
1015
  monkeypatch.setattr(sys, "argv", ["worker"])
952
1016
  worker = BaseWorker()
@@ -430,6 +430,7 @@ def test_run_no_sets(mocker, caplog, mock_dataset_worker):
430
430
 
431
431
  assert [(level, message) for _, level, message in caplog.record_tuples] == [
432
432
  (logging.INFO, "Loaded Worker Fake worker @ 123412 from API"),
433
+ (logging.INFO, "Modern configuration is not available"),
433
434
  (logging.WARNING, "No sets to process, stopping."),
434
435
  ]
435
436
 
@@ -453,6 +454,7 @@ def test_run_initial_dataset_state_error(
453
454
 
454
455
  assert [(level, message) for _, level, message in caplog.record_tuples] == [
455
456
  (logging.INFO, "Loaded Worker Fake worker @ 123412 from API"),
457
+ (logging.INFO, "Modern configuration is not available"),
456
458
  (
457
459
  logging.WARNING,
458
460
  "Failed running worker on Set (train) from Dataset (dataset_id): AssertionError('When processing a set, its dataset state should be Complete.')",
@@ -497,6 +499,7 @@ def test_run_download_dataset_artifact_api_error(
497
499
 
498
500
  assert [(level, message) for _, level, message in caplog.record_tuples] == [
499
501
  (logging.INFO, "Loaded Worker Fake worker @ 123412 from API"),
502
+ (logging.INFO, "Modern configuration is not available"),
500
503
  (
501
504
  logging.INFO,
502
505
  "Retrieving data for Set (train) from Dataset (dataset_id) (1/1)",
@@ -550,6 +553,7 @@ def test_run_no_downloaded_dataset_artifact_error(
550
553
 
551
554
  assert [(level, message) for _, level, message in caplog.record_tuples] == [
552
555
  (logging.INFO, "Loaded Worker Fake worker @ 123412 from API"),
556
+ (logging.INFO, "Modern configuration is not available"),
553
557
  (
554
558
  logging.INFO,
555
559
  "Retrieving data for Set (train) from Dataset (dataset_id) (1/1)",
@@ -626,6 +630,7 @@ def test_run(
626
630
 
627
631
  assert [(level, message) for _, level, message in caplog.record_tuples] == [
628
632
  (logging.INFO, "Loaded Worker Fake worker @ 123412 from API"),
633
+ (logging.INFO, "Modern configuration is not available"),
629
634
  (
630
635
  logging.INFO,
631
636
  "Retrieving data for Set (train) from Dataset (dataset_id) (1/1)",
@@ -4,4 +4,8 @@ BASE_API_CALLS = [
4
4
  "GET",
5
5
  "http://testserver/api/v1/process/workers/56785678-5678-5678-5678-567856785678/",
6
6
  ),
7
+ (
8
+ "GET",
9
+ "http://testserver/api/v1/workers/runs/56785678-5678-5678-5678-567856785678/configuration/",
10
+ ),
7
11
  ]
@@ -698,7 +698,8 @@ def test_run_consuming_worker_activities(
698
698
  assert mock_elements_worker_consume_wa.is_read_only is False
699
699
 
700
700
  # Provide 2 worker activities to run and the corresponding update call
701
- for elt_id in ("page_1", "page_2"):
701
+ # and 2 element details response
702
+ for i, elt_id in enumerate(("page_1", "page_2"), 1):
702
703
  responses.add(
703
704
  responses.POST,
704
705
  "http://testserver/api/v1/process/start-activity/",
@@ -706,7 +707,7 @@ def test_run_consuming_worker_activities(
706
707
  json={
707
708
  "id": elt_id,
708
709
  "type_id": "page-aaaa-aaaa-aaaa-aaaaaaaaaaaa", # Element type provided by mock corpus
709
- "name": "Page XXX",
710
+ "name": f"Page n°{i}",
710
711
  },
711
712
  )
712
713
  responses.add(
@@ -714,6 +715,16 @@ def test_run_consuming_worker_activities(
714
715
  "http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
715
716
  status=200,
716
717
  )
718
+ responses.add(
719
+ responses.GET,
720
+ f"http://testserver/api/v1/element/{elt_id}/",
721
+ status=200,
722
+ json={
723
+ "id": elt_id,
724
+ "type": "page",
725
+ "name": f"Page n°{i}",
726
+ },
727
+ )
717
728
 
718
729
  # Then a 404 to stop iterating
719
730
  responses.add(
@@ -725,18 +736,19 @@ def test_run_consuming_worker_activities(
725
736
  # Simply run the process
726
737
  mock_elements_worker_consume_wa.run()
727
738
 
728
- assert len(responses.calls) == len(BASE_API_CALLS) + 6
739
+ # We call twice configure in the conftest
740
+ assert len(responses.calls) == len(BASE_API_CALLS) * 2 + 7
729
741
  assert [
730
742
  (call.request.method, call.request.url) for call in responses.calls
731
- ] == BASE_API_CALLS + [
732
- (
733
- "GET",
734
- "http://testserver/api/v1/corpus/11111111-1111-1111-1111-111111111111/",
735
- ),
743
+ ] == BASE_API_CALLS * 2 + [
736
744
  (
737
745
  "POST",
738
746
  "http://testserver/api/v1/process/start-activity/",
739
747
  ),
748
+ (
749
+ "GET",
750
+ "http://testserver/api/v1/element/page_1/",
751
+ ),
740
752
  (
741
753
  "PUT",
742
754
  "http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
@@ -745,6 +757,10 @@ def test_run_consuming_worker_activities(
745
757
  "POST",
746
758
  "http://testserver/api/v1/process/start-activity/",
747
759
  ),
760
+ (
761
+ "GET",
762
+ "http://testserver/api/v1/element/page_2/",
763
+ ),
748
764
  (
749
765
  "PUT",
750
766
  "http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
@@ -756,21 +772,17 @@ def test_run_consuming_worker_activities(
756
772
  ]
757
773
 
758
774
  assert [(record.levelno, record.message) for record in caplog.records] == [
759
- (
760
- logging.INFO,
761
- "Loaded 1 element type in corpus (11111111-1111-1111-1111-111111111111).",
762
- ),
763
775
  (
764
776
  logging.INFO,
765
777
  "Using StartWorkerActivity instead of reading init_elements JSON file",
766
778
  ),
767
779
  (
768
780
  logging.INFO,
769
- "Processing page Page XXX (page_1) (n°1)",
781
+ "Processing page Page n°1 (page_1) (n°1)",
770
782
  ),
771
783
  (
772
784
  logging.INFO,
773
- "Processing page Page XXX (page_2) (n°2)",
785
+ "Processing page Page n°2 (page_2) (n°2)",
774
786
  ),
775
787
  (
776
788
  logging.INFO,
@@ -0,0 +1,81 @@
1
+ def test_simple_configuration(mock_base_worker_modern_conf, responses):
2
+ # Provide the full configuration directly from the worker run
3
+ responses.add(
4
+ responses.GET,
5
+ "http://testserver/api/v1/workers/runs/56785678-5678-5678-5678-567856785678/configuration/",
6
+ status=200,
7
+ json={"configuration": [{"key": "some_key", "value": "test", "secret": False}]},
8
+ )
9
+
10
+ mock_base_worker_modern_conf.configure()
11
+
12
+ assert mock_base_worker_modern_conf.config == {"some_key": "test"}
13
+ assert (
14
+ mock_base_worker_modern_conf.user_configuration
15
+ == mock_base_worker_modern_conf.config
16
+ )
17
+ assert mock_base_worker_modern_conf.secrets == {}
18
+
19
+
20
+ def test_empty(mock_base_worker_modern_conf, responses):
21
+ # Provide the full configuration directly from the worker run
22
+ responses.add(
23
+ responses.GET,
24
+ "http://testserver/api/v1/workers/runs/56785678-5678-5678-5678-567856785678/configuration/",
25
+ status=200,
26
+ json={"configuration": []},
27
+ )
28
+
29
+ mock_base_worker_modern_conf.configure()
30
+
31
+ assert mock_base_worker_modern_conf.config == {}
32
+ assert (
33
+ mock_base_worker_modern_conf.user_configuration
34
+ == mock_base_worker_modern_conf.config
35
+ )
36
+ assert mock_base_worker_modern_conf.secrets == {}
37
+
38
+
39
+ def test_with_secrets(mock_base_worker_modern_conf, responses):
40
+ # Provide the full configuration directly from the worker run
41
+ responses.add(
42
+ responses.GET,
43
+ "http://testserver/api/v1/workers/runs/56785678-5678-5678-5678-567856785678/configuration/",
44
+ status=200,
45
+ json={
46
+ "configuration": [
47
+ {"key": "some_key", "value": "test", "secret": False},
48
+ {
49
+ "key": "a_secret",
50
+ "value": "471b9e64-29af-48dc-8bda-1a64a2da0c12",
51
+ "secret": True,
52
+ },
53
+ ]
54
+ },
55
+ )
56
+
57
+ # Provide a secret value
58
+ responses.add(
59
+ responses.GET,
60
+ "http://testserver/api/v1/secret/471b9e64-29af-48dc-8bda-1a64a2da0c12",
61
+ status=200,
62
+ json={
63
+ "id": "471b9e64-29af-48dc-8bda-1a64a2da0c12",
64
+ "name": "a_secret",
65
+ "content": "My super duper secret value",
66
+ },
67
+ )
68
+
69
+ mock_base_worker_modern_conf.configure()
70
+
71
+ assert mock_base_worker_modern_conf.config == {
72
+ "a_secret": "My super duper secret value",
73
+ "some_key": "test",
74
+ }
75
+ assert (
76
+ mock_base_worker_modern_conf.user_configuration
77
+ == mock_base_worker_modern_conf.config
78
+ )
79
+ assert mock_base_worker_modern_conf.secrets == {
80
+ "a_secret": "My super duper secret value"
81
+ }