arkindex-base-worker 0.5.1b2__py3-none-any.whl → 0.5.1b5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: arkindex-base-worker
3
- Version: 0.5.1b2
3
+ Version: 0.5.1b5
4
4
  Summary: Base Worker to easily build Arkindex ML workflows
5
5
  Author-email: Teklia <contact@teklia.com>
6
6
  Maintainer-email: Teklia <contact@teklia.com>
@@ -1,15 +1,15 @@
1
- arkindex_base_worker-0.5.1b2.dist-info/licenses/LICENSE,sha256=NVshRi1efwVezMfW7xXYLrdDr2Li1AfwfGOd5WuH1kQ,1063
1
+ arkindex_base_worker-0.5.1b5.dist-info/licenses/LICENSE,sha256=NVshRi1efwVezMfW7xXYLrdDr2Li1AfwfGOd5WuH1kQ,1063
2
2
  arkindex_worker/__init__.py,sha256=Sdt5KXn8EgURb2MurYVrUWaHbH3iFA1XLRo0Lc5AJ44,250
3
3
  arkindex_worker/cache.py,sha256=x1d1oVF297ItLoZnPkZQoEefa39ZigrwRoHC_6az94k,10731
4
4
  arkindex_worker/image.py,sha256=GvIpW7LNSalVw3Obt9nySDWnW7-NbC0__SWREEQqVCk,20696
5
5
  arkindex_worker/models.py,sha256=bPQzGZNs5a6z6DEcygsa8T33VOqPlMUbwKzHqlKzwbw,9923
6
6
  arkindex_worker/utils.py,sha256=MbbJT8oh8DMHHR-vidFeXdUH0TSXGWm7ZDGWzrRXoEY,9933
7
- arkindex_worker/worker/__init__.py,sha256=Ho4rzGDtpFwd13cl5tL45yAVZJuyGCOIOroLPGZsvmk,18399
8
- arkindex_worker/worker/base.py,sha256=fbRJ5vDON3DfSQfwxFqto85HY8Dw2_YgOmnm5cxbQ2g,21725
7
+ arkindex_worker/worker/__init__.py,sha256=SzD0s1_m6gMV02EUF-NeciqZdVPA4dpXI84tSj-g494,17869
8
+ arkindex_worker/worker/base.py,sha256=-ASOgLzemHOAz7EnPZ9hT6wON_9GJ7uadxx7oZLgUD4,22149
9
9
  arkindex_worker/worker/classification.py,sha256=qvykymkgd4nGywHCxL8obo4egstoGsmWNS4Ztc1qNWQ,11024
10
10
  arkindex_worker/worker/corpus.py,sha256=MeIMod7jkWyX0frtD0a37rhumnMV3p9ZOC1xwAoXrAA,2291
11
11
  arkindex_worker/worker/dataset.py,sha256=tVaPx43vaH-KTtx4w5V06e26ha8XPfiJTRzBXlu928Y,5273
12
- arkindex_worker/worker/element.py,sha256=YUYKPhWrt2J5bfCA58slhI9CVIBmgIdcgC6-hhR9Zwg,46942
12
+ arkindex_worker/worker/element.py,sha256=sLfnf09AfJ5tSCKQ7cAkl7WsGhjsfq14swsT30MDnYk,47385
13
13
  arkindex_worker/worker/entity.py,sha256=Aj6EOfzHEm7qQV-Egm0YKLZgCrLS_3ggOKTY81M2JbI,12323
14
14
  arkindex_worker/worker/image.py,sha256=L6Ikuf0Z0RxJk7JarY5PggJGrYSHLaPK0vn0dy0CIaQ,623
15
15
  arkindex_worker/worker/metadata.py,sha256=rBjU057xngwrf32vAo-2cpgYfmrdEj3lfDg_kv4-zr0,6810
@@ -21,8 +21,8 @@ examples/standalone/python/worker.py,sha256=Zr4s4pHvgexEjlkixLFYZp1UuwMLeoTxjyNG
21
21
  examples/tooled/python/worker.py,sha256=kIYlHLsO5UpwX4XtERRq4tf2qTsvqKK30C-w8t0yyhA,1821
22
22
  hooks/pre_gen_project.py,sha256=xQJERv3vv9VzIqcBHI281eeWLWREXUF4mMw7PvJHHXM,269
23
23
  tests/__init__.py,sha256=DG--S6IpGl399rzSAjDdHL76CkOIeZIjajCcyUSDhOQ,241
24
- tests/conftest.py,sha256=jRFykK7cQm2AsZduVp78oEvKtHyzm3wr95IVkF4XpyY,23825
25
- tests/test_base_worker.py,sha256=3YjhjxSWVjEWFYS8m8pYYoaVAhHFkJLNTs0QPQIkBDM,32651
24
+ tests/conftest.py,sha256=Tp7YFK17NATwF2yAcBwi0QFNyKSXtLS0VhZ-zZngsQI,24343
25
+ tests/test_base_worker.py,sha256=lwS4X3atS2ktEKd1XdogmN3mbzq-tO206-k_0EDITlw,29302
26
26
  tests/test_cache.py,sha256=nnEFfAAqtYHk2ymOwN0spXJd8nrRiwp3voj0tOmIbQ8,10407
27
27
  tests/test_dataset_worker.py,sha256=iDJM2C4PfQNH0r4_QqSWoPt8BcM0geUUdODtWY0Z9PA,22412
28
28
  tests/test_element.py,sha256=2G9M15TLxQRmvrWM9Kw2ucnElh4kSv_oF_5FYwwAxTY,13181
@@ -49,13 +49,13 @@ tests/test_elements_worker/test_training.py,sha256=qgK7BLucddRzc8ePbQtY75x17QvGD
49
49
  tests/test_elements_worker/test_transcription_create.py,sha256=yznO9B_BVsOR0Z_VY5ZL8gJp0ZPCz_4sPUs5dXtixAg,29281
50
50
  tests/test_elements_worker/test_transcription_create_with_elements.py,sha256=tmcyglgssEqMnt1Mdy_u6X1m2wgLWTo_HdWst3GrK2k,33056
51
51
  tests/test_elements_worker/test_transcription_list.py,sha256=ikz7HYPCoQWTdTRCd382SB-y-T2BbigPLlIcx5Eow-I,15324
52
- tests/test_elements_worker/test_worker.py,sha256=tFxLZhX83Z-g2pZyZ2vRzTh_yBhHjpBnZ9kdYxlOt4g,28272
52
+ tests/test_elements_worker/test_worker.py,sha256=ypAQS_DJj9qGlRJCs9g5qUXe7IgqaKXWDcxqwlhAqSg,28598
53
53
  worker-demo/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
54
54
  worker-demo/tests/conftest.py,sha256=XzNMNeg6pmABUAH8jN6eZTlZSFGLYjS3-DTXjiRN6Yc,1002
55
55
  worker-demo/tests/test_worker.py,sha256=3DLd4NRK4bfyatG5P_PK4k9P9tJHx9XQq5_ryFEEFVg,304
56
56
  worker-demo/worker_demo/__init__.py,sha256=2BPomV8ZMNf3YXJgloatKeHQCE6QOkwmsHGkO6MkQuM,125
57
57
  worker-demo/worker_demo/worker.py,sha256=Rt-DjWa5iBP08k58NDZMfeyPuFbtNcbX6nc5jFX7GNo,440
58
- arkindex_base_worker-0.5.1b2.dist-info/METADATA,sha256=oAv0lAu9iGzHWuW8YL_i7uf1dqppslQ7iRznUJYLiGw,3137
59
- arkindex_base_worker-0.5.1b2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
60
- arkindex_base_worker-0.5.1b2.dist-info/top_level.txt,sha256=-vNjP2VfROx0j83mdi9aIqRZ88eoJjxeWz-R_gPgyXU,49
61
- arkindex_base_worker-0.5.1b2.dist-info/RECORD,,
58
+ arkindex_base_worker-0.5.1b5.dist-info/METADATA,sha256=pVX35tc38qCuTCXwhXS2veoEJpTMJ3KWh3a1CUZQ3yg,3137
59
+ arkindex_base_worker-0.5.1b5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
60
+ arkindex_base_worker-0.5.1b5.dist-info/top_level.txt,sha256=-vNjP2VfROx0j83mdi9aIqRZ88eoJjxeWz-R_gPgyXU,49
61
+ arkindex_base_worker-0.5.1b5.dist-info/RECORD,,
@@ -33,13 +33,10 @@ from arkindex_worker.worker.transcription import TranscriptionMixin
33
33
 
34
34
 
35
35
  class WorkerActivityIterator:
36
- def __init__(self, api_client, types):
36
+ def __init__(self, api_client):
37
37
  # Use same api client as main class
38
38
  self.api_client = api_client
39
39
 
40
- # Index element types by ID
41
- self.types = {t["id"]: t["slug"] for t in types}
42
-
43
40
  logger.info(
44
41
  "Using StartWorkerActivity instead of reading init_elements JSON file"
45
42
  )
@@ -53,7 +50,7 @@ class WorkerActivityIterator:
53
50
 
54
51
  def __next__(self):
55
52
  """
56
- Provide a new element from a worker activity upon each iteration
53
+ Provide a new element ID from a worker activity upon each iteration
57
54
  """
58
55
  try:
59
56
  data = self.api_client.request("StartWorkerActivity")
@@ -67,12 +64,7 @@ class WorkerActivityIterator:
67
64
  )
68
65
  raise e
69
66
 
70
- # Find the type slug using API provided type_id
71
- type_id = data["type_id"]
72
- if type_id not in self.types:
73
- raise Exception(f"Unknown type {type_id}")
74
-
75
- return Element(type=self.types[type_id], **data)
67
+ return data["id"]
76
68
 
77
69
 
78
70
  class ElementsWorker(
@@ -154,14 +146,9 @@ class ElementsWorker(
154
146
  elif self.process_mode == ProcessMode.Export:
155
147
  # For export mode processes, use list_process_elements and return element IDs
156
148
  return {item["id"] for item in self.list_process_elements()}
157
- elif self.args.consume_worker_activities:
158
- # We need to list corpus types as the StartWorkerActivity endpoint only provide type_id
159
- self.list_corpus_types()
160
-
149
+ elif self.consume_worker_activities:
161
150
  # Consume worker activitives one by one
162
- return WorkerActivityIterator(
163
- self.api_client, types=self.corpus_types.values()
164
- )
151
+ return WorkerActivityIterator(self.api_client)
165
152
 
166
153
  invalid_element_ids = list(filter(invalid_element_id, out))
167
154
  assert not invalid_element_ids, (
@@ -195,7 +182,7 @@ class ElementsWorker(
195
182
  - when running with init_elements, we have a known list
196
183
  - when running with StartWorkerActivity, we have a queue of unknown size
197
184
  """
198
- return self.args.consume_worker_activities
185
+ return self.consume_worker_activities
199
186
 
200
187
  def run(self):
201
188
  """
@@ -241,7 +228,7 @@ class ElementsWorker(
241
228
 
242
229
  # Process the element and report its progress if activities are enabled
243
230
  # We do not update the worker activity to "Started" state when consuming them
244
- if self.args.consume_worker_activities or self.update_activity(
231
+ if self.consume_worker_activities or self.update_activity(
245
232
  element.id, ActivityState.Started
246
233
  ):
247
234
  self.process_element(element)
@@ -265,6 +265,11 @@ class BaseWorker:
265
265
  if not item["secret"]:
266
266
  return (item["key"], item["value"])
267
267
 
268
+ # The secret may not be picked by the user
269
+ if item["value"] is None:
270
+ logger.info(f"Skipping optional secret {item['key']}")
271
+ return (item["key"], None)
272
+
268
273
  # Load secret, only available in Arkindex EE
269
274
  try:
270
275
  secret = self.load_secret(Path(item["value"]))
@@ -276,6 +281,19 @@ class BaseWorker:
276
281
 
277
282
  return (item["key"], secret)
278
283
 
284
+ # Load model version configuration when available
285
+ # Workers will use model version ID and details to download the model
286
+ model_version = worker_run.get("model_version")
287
+ if model_version:
288
+ logger.info("Loaded model version configuration from WorkerRun")
289
+ self.model_configuration.update(model_version["configuration"])
290
+
291
+ # Set model_version ID as worker attribute
292
+ self.model_version_id = model_version["id"]
293
+
294
+ # Set model details as worker attribute
295
+ self.model_details = model_version["model"]
296
+
279
297
  # Load worker run information
280
298
  try:
281
299
  config = self.api_client.request(
@@ -295,6 +313,9 @@ class BaseWorker:
295
313
  }
296
314
  logger.info("Using modern configuration")
297
315
 
316
+ # Reset the model configuration to make sure workers rely on the single new source
317
+ self.model_configuration = {}
318
+
298
319
  return # Stop here once we have modern configuration
299
320
 
300
321
  except ErrorResponse as e:
@@ -303,18 +324,6 @@ class BaseWorker:
303
324
  logger.info("Modern configuration is not available")
304
325
 
305
326
  # Use old-style configuration with local merge
306
- # Load model version configuration when available
307
- model_version = worker_run.get("model_version")
308
- if model_version:
309
- logger.info("Loaded model version configuration from WorkerRun")
310
- self.model_configuration.update(model_version["configuration"])
311
-
312
- # Set model_version ID as worker attribute
313
- self.model_version_id = model_version["id"]
314
-
315
- # Set model details as worker attribute
316
- self.model_details = model_version["model"]
317
-
318
327
  # Retrieve initial configuration from API
319
328
  self.config = worker_version["configuration"].get("configuration", {})
320
329
  if "user_configuration" in worker_version["configuration"]:
@@ -55,6 +55,17 @@ class ElementMixin:
55
55
  )
56
56
  super().add_arguments()
57
57
 
58
+ @property
59
+ def consume_worker_activities(self) -> bool:
60
+ """
61
+ Helper to detect if the worker rely on an elements.json or consume directly worker activities
62
+ Uses the process information when available, fallback to CLI args
63
+ """
64
+ if self.process_information is not None:
65
+ return self.process_information.get("skip_elements_json") is True
66
+
67
+ return self.args.consume_worker_activities
68
+
58
69
  def list_corpus_types(self):
59
70
  """
60
71
  Loads available element types in corpus.
tests/conftest.py CHANGED
@@ -103,12 +103,6 @@ def _mock_worker_run_api(responses):
103
103
  payload = {
104
104
  "id": "56785678-5678-5678-5678-567856785678",
105
105
  "parents": [],
106
- "worker": {
107
- "id": "deadbeef-1234-5678-1234-worker",
108
- "name": "Fake worker",
109
- "slug": "fake_worker",
110
- "type": "classifier",
111
- },
112
106
  "worker_version": {
113
107
  "id": "12341234-1234-1234-1234-123412341234",
114
108
  "configuration": {
@@ -153,6 +147,7 @@ def _mock_worker_run_api(responses):
153
147
  "train_folder_id": None,
154
148
  "validation_folder_id": None,
155
149
  "test_folder_id": None,
150
+ "skip_elements_json": False,
156
151
  },
157
152
  "summary": "Worker Fake worker @ 123412",
158
153
  }
@@ -179,12 +174,6 @@ def _mock_worker_run_no_revision_api(responses):
179
174
  payload = {
180
175
  "id": "56785678-5678-5678-5678-567856785678",
181
176
  "parents": [],
182
- "worker": {
183
- "id": "deadbeef-1234-5678-1234-worker",
184
- "name": "Fake worker",
185
- "slug": "fake_worker",
186
- "type": "classifier",
187
- },
188
177
  "worker_version": {
189
178
  "id": "12341234-1234-1234-1234-123412341234",
190
179
  "configuration": {
@@ -346,29 +335,51 @@ def mock_elements_worker_consume_wa(monkeypatch, responses, mock_elements_worker
346
335
  instead of reading a JSON file
347
336
  """
348
337
 
349
- # Enable consume worker activities mode from CLI args
350
- mock_elements_worker.args.consume_worker_activities = True
351
-
352
- # Worker requires element types from corpus details as they are not provided by StartWorkerActivity
353
- responses.add(
338
+ # Enable consume worker activities through the process configuration
339
+ responses.replace(
354
340
  responses.GET,
355
- "http://testserver/api/v1/corpus/11111111-1111-1111-1111-111111111111/",
341
+ "http://testserver/api/v1/process/workers/56785678-5678-5678-5678-567856785678/",
356
342
  status=200,
357
343
  json={
358
- "id": "11111111-1111-1111-1111-111111111111",
359
- "name": "Test corpus",
360
- "types": [
361
- {
362
- "id": "page-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
363
- "slug": "page",
364
- "display_name": "Page",
365
- "folder": False,
366
- "color": "28b62c",
367
- }
368
- ],
344
+ "id": "56785678-5678-5678-5678-567856785678",
345
+ "parents": [],
346
+ "worker_version": {
347
+ "id": "12341234-1234-1234-1234-123412341234",
348
+ "configuration": {
349
+ "docker": {"image": "python:3"},
350
+ "configuration": {"someKey": "someValue"},
351
+ "secrets": [],
352
+ },
353
+ "worker": {
354
+ "id": "deadbeef-1234-5678-1234-worker",
355
+ "name": "Fake worker",
356
+ "slug": "fake_worker",
357
+ "type": "classifier",
358
+ },
359
+ },
360
+ "configuration": None,
361
+ "model_version": None,
362
+ "process": {
363
+ "name": None,
364
+ "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
365
+ "state": "running",
366
+ "mode": "workers",
367
+ "corpus": CORPUS_ID,
368
+ "use_cache": False,
369
+ "activity_state": "ready",
370
+ "model_id": None,
371
+ "train_folder_id": None,
372
+ "validation_folder_id": None,
373
+ "test_folder_id": None,
374
+ "skip_elements_json": True,
375
+ },
376
+ "summary": "Worker Fake worker @ 123412",
369
377
  },
370
378
  )
371
379
 
380
+ # Call configure again to use updated process infos
381
+ mock_elements_worker.configure()
382
+
372
383
  return mock_elements_worker
373
384
 
374
385
 
tests/test_base_worker.py CHANGED
@@ -13,6 +13,29 @@ from arkindex_worker.worker import BaseWorker, ElementsWorker
13
13
  from arkindex_worker.worker.base import ExtrasDirNotFoundError
14
14
  from tests import CORPUS_ID, FIXTURES_DIR
15
15
 
16
+ SIMPLE_PAYLOAD = {
17
+ "id": "56785678-5678-5678-5678-567856785678",
18
+ "parents": [],
19
+ "worker_version": {
20
+ "id": "12341234-1234-1234-1234-123412341234",
21
+ "worker": {
22
+ "id": "deadbeef-1234-5678-1234-worker",
23
+ "name": "Fake worker",
24
+ "slug": "fake_worker",
25
+ "type": "classifier",
26
+ },
27
+ "revision": {"hash": "deadbeef1234"},
28
+ "configuration": {"configuration": {}},
29
+ },
30
+ "configuration": None,
31
+ "model_version": None,
32
+ "process": {
33
+ "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
34
+ "corpus": CORPUS_ID,
35
+ },
36
+ "summary": "Worker Fake worker @ 123412",
37
+ }
38
+
16
39
 
17
40
  def test_init_default_local_share():
18
41
  worker = BaseWorker()
@@ -149,38 +172,13 @@ def test_configure_worker_run(mocker, responses, caplog):
149
172
 
150
173
  worker = BaseWorker()
151
174
  mocker.patch.object(sys, "argv", ["worker"])
152
- user_configuration = {
153
- "id": "bbbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb",
154
- "name": "BBB",
155
- "configuration": {"a": "b"},
156
- }
157
175
  payload = {
158
- "id": "56785678-5678-5678-5678-567856785678",
159
- "parents": [],
160
- "worker": {
161
- "id": "deadbeef-1234-5678-1234-worker",
162
- "name": "Fake worker",
163
- "slug": "fake_worker",
164
- "type": "classifier",
165
- },
166
- "worker_version": {
167
- "id": "12341234-1234-1234-1234-123412341234",
168
- "worker": {
169
- "id": "deadbeef-1234-5678-1234-worker",
170
- "name": "Fake worker",
171
- "slug": "fake_worker",
172
- "type": "classifier",
173
- },
174
- "revision": {"hash": "deadbeef1234"},
175
- "configuration": {"configuration": {}},
176
- },
177
- "configuration": user_configuration,
178
- "model_version": None,
179
- "process": {
180
- "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
181
- "corpus": CORPUS_ID,
176
+ **SIMPLE_PAYLOAD,
177
+ "configuration": {
178
+ "id": "bbbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb",
179
+ "name": "BBB",
180
+ "configuration": {"a": "b"},
182
181
  },
183
- "summary": "Worker Fake worker @ 123412",
184
182
  }
185
183
 
186
184
  responses.add(
@@ -262,8 +260,7 @@ def test_configure_user_configuration_defaults(mocker, responses):
262
260
  worker.args = worker.parser.parse_args()
263
261
 
264
262
  payload = {
265
- "id": "56785678-5678-5678-5678-567856785678",
266
- "parents": [],
263
+ **SIMPLE_PAYLOAD,
267
264
  "worker_version": {
268
265
  "id": "12341234-1234-1234-1234-123412341234",
269
266
  "worker": {
@@ -293,12 +290,6 @@ def test_configure_user_configuration_defaults(mocker, responses):
293
290
  "param_5": True,
294
291
  },
295
292
  },
296
- "model_version": None,
297
- "process": {
298
- "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
299
- "corpus": CORPUS_ID,
300
- },
301
- "summary": "Worker Fake worker @ 123412",
302
293
  }
303
294
  responses.add(
304
295
  responses.GET,
@@ -340,30 +331,12 @@ def test_configure_user_config_debug(mocker, responses, debug):
340
331
  mocker.patch.object(sys, "argv", ["worker"])
341
332
  assert logger.level == logging.NOTSET
342
333
  payload = {
343
- "id": "56785678-5678-5678-5678-567856785678",
344
- "parents": [],
345
- "worker_version": {
346
- "id": "12341234-1234-1234-1234-123412341234",
347
- "worker": {
348
- "id": "deadbeef-1234-5678-1234-worker",
349
- "name": "Fake worker",
350
- "slug": "fake_worker",
351
- "type": "classifier",
352
- },
353
- "revision": {"hash": "deadbeef1234"},
354
- "configuration": {"configuration": {}},
355
- },
356
- "model_version": None,
334
+ **SIMPLE_PAYLOAD,
357
335
  "configuration": {
358
336
  "id": "af0daaf4-983e-4703-a7ed-a10f146d6684",
359
337
  "name": "BBB",
360
338
  "configuration": {"debug": debug},
361
339
  },
362
- "process": {
363
- "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
364
- "corpus": CORPUS_ID,
365
- },
366
- "summary": "Worker Fake worker @ 123412",
367
340
  }
368
341
  responses.add(
369
342
  responses.GET,
@@ -393,32 +366,8 @@ def test_configure_worker_run_missing_conf(mocker, responses):
393
366
  mocker.patch.object(sys, "argv", ["worker"])
394
367
 
395
368
  payload = {
396
- "id": "56785678-5678-5678-5678-567856785678",
397
- "parents": [],
398
- "worker": {
399
- "id": "deadbeef-1234-5678-1234-worker",
400
- "name": "Fake worker",
401
- "slug": "fake_worker",
402
- "type": "classifier",
403
- },
404
- "worker_version": {
405
- "id": "12341234-1234-1234-1234-123412341234",
406
- "worker": {
407
- "id": "deadbeef-1234-5678-1234-worker",
408
- "name": "Fake worker",
409
- "slug": "fake_worker",
410
- "type": "classifier",
411
- },
412
- "revision": {"hash": "deadbeef1234"},
413
- "configuration": {"configuration": {}},
414
- },
415
- "model_version": None,
369
+ **SIMPLE_PAYLOAD,
416
370
  "configuration": {"id": "bbbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb", "name": "BBB"},
417
- "process": {
418
- "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
419
- "corpus": CORPUS_ID,
420
- },
421
- "summary": "Worker Fake worker @ 123412",
422
371
  }
423
372
  responses.add(
424
373
  responses.GET,
@@ -446,28 +395,7 @@ def test_configure_worker_run_no_worker_run_conf(mocker, responses):
446
395
  worker = BaseWorker()
447
396
  mocker.patch.object(sys, "argv", ["worker"])
448
397
 
449
- payload = {
450
- "id": "56785678-5678-5678-5678-567856785678",
451
- "parents": [],
452
- "worker_version": {
453
- "id": "12341234-1234-1234-1234-123412341234",
454
- "worker": {
455
- "id": "deadbeef-1234-5678-1234-worker",
456
- "name": "Fake worker",
457
- "slug": "fake_worker",
458
- "type": "classifier",
459
- },
460
- "revision": {"hash": "deadbeef1234"},
461
- "configuration": {},
462
- },
463
- "model_version": None,
464
- "configuration": None,
465
- "process": {
466
- "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
467
- "corpus": CORPUS_ID,
468
- },
469
- "summary": "Worker Fake worker @ 123412",
470
- }
398
+ payload = SIMPLE_PAYLOAD
471
399
  responses.add(
472
400
  responses.GET,
473
401
  "http://testserver/api/v1/process/workers/56785678-5678-5678-5678-567856785678/",
@@ -491,20 +419,7 @@ def test_configure_load_model_configuration(mocker, responses):
491
419
  worker = BaseWorker()
492
420
  mocker.patch.object(sys, "argv", ["worker"])
493
421
  payload = {
494
- "id": "56785678-5678-5678-5678-567856785678",
495
- "parents": [],
496
- "worker_version": {
497
- "id": "12341234-1234-1234-1234-123412341234",
498
- "worker": {
499
- "id": "deadbeef-1234-5678-1234-worker",
500
- "name": "Fake worker",
501
- "slug": "fake_worker",
502
- "type": "classifier",
503
- },
504
- "revision": {"hash": "deadbeef1234"},
505
- "configuration": {"configuration": {}},
506
- },
507
- "configuration": None,
422
+ **SIMPLE_PAYLOAD,
508
423
  "model_version": {
509
424
  "id": "12341234-1234-1234-1234-123412341234",
510
425
  "model": {
@@ -517,11 +432,6 @@ def test_configure_load_model_configuration(mocker, responses):
517
432
  "param3": None,
518
433
  },
519
434
  },
520
- "process": {
521
- "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
522
- "corpus": CORPUS_ID,
523
- },
524
- "summary": "Worker Fake worker @ 123412",
525
435
  }
526
436
 
527
437
  responses.add(
@@ -698,7 +698,8 @@ def test_run_consuming_worker_activities(
698
698
  assert mock_elements_worker_consume_wa.is_read_only is False
699
699
 
700
700
  # Provide 2 worker activities to run and the corresponding update call
701
- for elt_id in ("page_1", "page_2"):
701
+ # and 2 element details response
702
+ for i, elt_id in enumerate(("page_1", "page_2"), 1):
702
703
  responses.add(
703
704
  responses.POST,
704
705
  "http://testserver/api/v1/process/start-activity/",
@@ -706,7 +707,7 @@ def test_run_consuming_worker_activities(
706
707
  json={
707
708
  "id": elt_id,
708
709
  "type_id": "page-aaaa-aaaa-aaaa-aaaaaaaaaaaa", # Element type provided by mock corpus
709
- "name": "Page XXX",
710
+ "name": f"Page n°{i}",
710
711
  },
711
712
  )
712
713
  responses.add(
@@ -714,6 +715,16 @@ def test_run_consuming_worker_activities(
714
715
  "http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
715
716
  status=200,
716
717
  )
718
+ responses.add(
719
+ responses.GET,
720
+ f"http://testserver/api/v1/element/{elt_id}/",
721
+ status=200,
722
+ json={
723
+ "id": elt_id,
724
+ "type": "page",
725
+ "name": f"Page n°{i}",
726
+ },
727
+ )
717
728
 
718
729
  # Then a 404 to stop iterating
719
730
  responses.add(
@@ -725,18 +736,19 @@ def test_run_consuming_worker_activities(
725
736
  # Simply run the process
726
737
  mock_elements_worker_consume_wa.run()
727
738
 
728
- assert len(responses.calls) == len(BASE_API_CALLS) + 6
739
+ # We call twice configure in the conftest
740
+ assert len(responses.calls) == len(BASE_API_CALLS) * 2 + 7
729
741
  assert [
730
742
  (call.request.method, call.request.url) for call in responses.calls
731
- ] == BASE_API_CALLS + [
732
- (
733
- "GET",
734
- "http://testserver/api/v1/corpus/11111111-1111-1111-1111-111111111111/",
735
- ),
743
+ ] == BASE_API_CALLS * 2 + [
736
744
  (
737
745
  "POST",
738
746
  "http://testserver/api/v1/process/start-activity/",
739
747
  ),
748
+ (
749
+ "GET",
750
+ "http://testserver/api/v1/element/page_1/",
751
+ ),
740
752
  (
741
753
  "PUT",
742
754
  "http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
@@ -745,6 +757,10 @@ def test_run_consuming_worker_activities(
745
757
  "POST",
746
758
  "http://testserver/api/v1/process/start-activity/",
747
759
  ),
760
+ (
761
+ "GET",
762
+ "http://testserver/api/v1/element/page_2/",
763
+ ),
748
764
  (
749
765
  "PUT",
750
766
  "http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
@@ -756,21 +772,17 @@ def test_run_consuming_worker_activities(
756
772
  ]
757
773
 
758
774
  assert [(record.levelno, record.message) for record in caplog.records] == [
759
- (
760
- logging.INFO,
761
- "Loaded 1 element type in corpus (11111111-1111-1111-1111-111111111111).",
762
- ),
763
775
  (
764
776
  logging.INFO,
765
777
  "Using StartWorkerActivity instead of reading init_elements JSON file",
766
778
  ),
767
779
  (
768
780
  logging.INFO,
769
- "Processing page Page XXX (page_1) (n°1)",
781
+ "Processing page Page n°1 (page_1) (n°1)",
770
782
  ),
771
783
  (
772
784
  logging.INFO,
773
- "Processing page Page XXX (page_2) (n°2)",
785
+ "Processing page Page n°2 (page_2) (n°2)",
774
786
  ),
775
787
  (
776
788
  logging.INFO,