arkindex-base-worker 0.5.1b2__py3-none-any.whl → 0.5.1b5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arkindex_base_worker-0.5.1b2.dist-info → arkindex_base_worker-0.5.1b5.dist-info}/METADATA +1 -1
- {arkindex_base_worker-0.5.1b2.dist-info → arkindex_base_worker-0.5.1b5.dist-info}/RECORD +11 -11
- arkindex_worker/worker/__init__.py +7 -20
- arkindex_worker/worker/base.py +21 -12
- arkindex_worker/worker/element.py +11 -0
- tests/conftest.py +40 -29
- tests/test_base_worker.py +33 -123
- tests/test_elements_worker/test_worker.py +26 -14
- {arkindex_base_worker-0.5.1b2.dist-info → arkindex_base_worker-0.5.1b5.dist-info}/WHEEL +0 -0
- {arkindex_base_worker-0.5.1b2.dist-info → arkindex_base_worker-0.5.1b5.dist-info}/licenses/LICENSE +0 -0
- {arkindex_base_worker-0.5.1b2.dist-info → arkindex_base_worker-0.5.1b5.dist-info}/top_level.txt +0 -0
|
@@ -1,15 +1,15 @@
|
|
|
1
|
-
arkindex_base_worker-0.5.
|
|
1
|
+
arkindex_base_worker-0.5.1b5.dist-info/licenses/LICENSE,sha256=NVshRi1efwVezMfW7xXYLrdDr2Li1AfwfGOd5WuH1kQ,1063
|
|
2
2
|
arkindex_worker/__init__.py,sha256=Sdt5KXn8EgURb2MurYVrUWaHbH3iFA1XLRo0Lc5AJ44,250
|
|
3
3
|
arkindex_worker/cache.py,sha256=x1d1oVF297ItLoZnPkZQoEefa39ZigrwRoHC_6az94k,10731
|
|
4
4
|
arkindex_worker/image.py,sha256=GvIpW7LNSalVw3Obt9nySDWnW7-NbC0__SWREEQqVCk,20696
|
|
5
5
|
arkindex_worker/models.py,sha256=bPQzGZNs5a6z6DEcygsa8T33VOqPlMUbwKzHqlKzwbw,9923
|
|
6
6
|
arkindex_worker/utils.py,sha256=MbbJT8oh8DMHHR-vidFeXdUH0TSXGWm7ZDGWzrRXoEY,9933
|
|
7
|
-
arkindex_worker/worker/__init__.py,sha256=
|
|
8
|
-
arkindex_worker/worker/base.py,sha256
|
|
7
|
+
arkindex_worker/worker/__init__.py,sha256=SzD0s1_m6gMV02EUF-NeciqZdVPA4dpXI84tSj-g494,17869
|
|
8
|
+
arkindex_worker/worker/base.py,sha256=-ASOgLzemHOAz7EnPZ9hT6wON_9GJ7uadxx7oZLgUD4,22149
|
|
9
9
|
arkindex_worker/worker/classification.py,sha256=qvykymkgd4nGywHCxL8obo4egstoGsmWNS4Ztc1qNWQ,11024
|
|
10
10
|
arkindex_worker/worker/corpus.py,sha256=MeIMod7jkWyX0frtD0a37rhumnMV3p9ZOC1xwAoXrAA,2291
|
|
11
11
|
arkindex_worker/worker/dataset.py,sha256=tVaPx43vaH-KTtx4w5V06e26ha8XPfiJTRzBXlu928Y,5273
|
|
12
|
-
arkindex_worker/worker/element.py,sha256=
|
|
12
|
+
arkindex_worker/worker/element.py,sha256=sLfnf09AfJ5tSCKQ7cAkl7WsGhjsfq14swsT30MDnYk,47385
|
|
13
13
|
arkindex_worker/worker/entity.py,sha256=Aj6EOfzHEm7qQV-Egm0YKLZgCrLS_3ggOKTY81M2JbI,12323
|
|
14
14
|
arkindex_worker/worker/image.py,sha256=L6Ikuf0Z0RxJk7JarY5PggJGrYSHLaPK0vn0dy0CIaQ,623
|
|
15
15
|
arkindex_worker/worker/metadata.py,sha256=rBjU057xngwrf32vAo-2cpgYfmrdEj3lfDg_kv4-zr0,6810
|
|
@@ -21,8 +21,8 @@ examples/standalone/python/worker.py,sha256=Zr4s4pHvgexEjlkixLFYZp1UuwMLeoTxjyNG
|
|
|
21
21
|
examples/tooled/python/worker.py,sha256=kIYlHLsO5UpwX4XtERRq4tf2qTsvqKK30C-w8t0yyhA,1821
|
|
22
22
|
hooks/pre_gen_project.py,sha256=xQJERv3vv9VzIqcBHI281eeWLWREXUF4mMw7PvJHHXM,269
|
|
23
23
|
tests/__init__.py,sha256=DG--S6IpGl399rzSAjDdHL76CkOIeZIjajCcyUSDhOQ,241
|
|
24
|
-
tests/conftest.py,sha256=
|
|
25
|
-
tests/test_base_worker.py,sha256=
|
|
24
|
+
tests/conftest.py,sha256=Tp7YFK17NATwF2yAcBwi0QFNyKSXtLS0VhZ-zZngsQI,24343
|
|
25
|
+
tests/test_base_worker.py,sha256=lwS4X3atS2ktEKd1XdogmN3mbzq-tO206-k_0EDITlw,29302
|
|
26
26
|
tests/test_cache.py,sha256=nnEFfAAqtYHk2ymOwN0spXJd8nrRiwp3voj0tOmIbQ8,10407
|
|
27
27
|
tests/test_dataset_worker.py,sha256=iDJM2C4PfQNH0r4_QqSWoPt8BcM0geUUdODtWY0Z9PA,22412
|
|
28
28
|
tests/test_element.py,sha256=2G9M15TLxQRmvrWM9Kw2ucnElh4kSv_oF_5FYwwAxTY,13181
|
|
@@ -49,13 +49,13 @@ tests/test_elements_worker/test_training.py,sha256=qgK7BLucddRzc8ePbQtY75x17QvGD
|
|
|
49
49
|
tests/test_elements_worker/test_transcription_create.py,sha256=yznO9B_BVsOR0Z_VY5ZL8gJp0ZPCz_4sPUs5dXtixAg,29281
|
|
50
50
|
tests/test_elements_worker/test_transcription_create_with_elements.py,sha256=tmcyglgssEqMnt1Mdy_u6X1m2wgLWTo_HdWst3GrK2k,33056
|
|
51
51
|
tests/test_elements_worker/test_transcription_list.py,sha256=ikz7HYPCoQWTdTRCd382SB-y-T2BbigPLlIcx5Eow-I,15324
|
|
52
|
-
tests/test_elements_worker/test_worker.py,sha256=
|
|
52
|
+
tests/test_elements_worker/test_worker.py,sha256=ypAQS_DJj9qGlRJCs9g5qUXe7IgqaKXWDcxqwlhAqSg,28598
|
|
53
53
|
worker-demo/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
54
54
|
worker-demo/tests/conftest.py,sha256=XzNMNeg6pmABUAH8jN6eZTlZSFGLYjS3-DTXjiRN6Yc,1002
|
|
55
55
|
worker-demo/tests/test_worker.py,sha256=3DLd4NRK4bfyatG5P_PK4k9P9tJHx9XQq5_ryFEEFVg,304
|
|
56
56
|
worker-demo/worker_demo/__init__.py,sha256=2BPomV8ZMNf3YXJgloatKeHQCE6QOkwmsHGkO6MkQuM,125
|
|
57
57
|
worker-demo/worker_demo/worker.py,sha256=Rt-DjWa5iBP08k58NDZMfeyPuFbtNcbX6nc5jFX7GNo,440
|
|
58
|
-
arkindex_base_worker-0.5.
|
|
59
|
-
arkindex_base_worker-0.5.
|
|
60
|
-
arkindex_base_worker-0.5.
|
|
61
|
-
arkindex_base_worker-0.5.
|
|
58
|
+
arkindex_base_worker-0.5.1b5.dist-info/METADATA,sha256=pVX35tc38qCuTCXwhXS2veoEJpTMJ3KWh3a1CUZQ3yg,3137
|
|
59
|
+
arkindex_base_worker-0.5.1b5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
60
|
+
arkindex_base_worker-0.5.1b5.dist-info/top_level.txt,sha256=-vNjP2VfROx0j83mdi9aIqRZ88eoJjxeWz-R_gPgyXU,49
|
|
61
|
+
arkindex_base_worker-0.5.1b5.dist-info/RECORD,,
|
|
@@ -33,13 +33,10 @@ from arkindex_worker.worker.transcription import TranscriptionMixin
|
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
class WorkerActivityIterator:
|
|
36
|
-
def __init__(self, api_client
|
|
36
|
+
def __init__(self, api_client):
|
|
37
37
|
# Use same api client as main class
|
|
38
38
|
self.api_client = api_client
|
|
39
39
|
|
|
40
|
-
# Index element types by ID
|
|
41
|
-
self.types = {t["id"]: t["slug"] for t in types}
|
|
42
|
-
|
|
43
40
|
logger.info(
|
|
44
41
|
"Using StartWorkerActivity instead of reading init_elements JSON file"
|
|
45
42
|
)
|
|
@@ -53,7 +50,7 @@ class WorkerActivityIterator:
|
|
|
53
50
|
|
|
54
51
|
def __next__(self):
|
|
55
52
|
"""
|
|
56
|
-
Provide a new element from a worker activity upon each iteration
|
|
53
|
+
Provide a new element ID from a worker activity upon each iteration
|
|
57
54
|
"""
|
|
58
55
|
try:
|
|
59
56
|
data = self.api_client.request("StartWorkerActivity")
|
|
@@ -67,12 +64,7 @@ class WorkerActivityIterator:
|
|
|
67
64
|
)
|
|
68
65
|
raise e
|
|
69
66
|
|
|
70
|
-
|
|
71
|
-
type_id = data["type_id"]
|
|
72
|
-
if type_id not in self.types:
|
|
73
|
-
raise Exception(f"Unknown type {type_id}")
|
|
74
|
-
|
|
75
|
-
return Element(type=self.types[type_id], **data)
|
|
67
|
+
return data["id"]
|
|
76
68
|
|
|
77
69
|
|
|
78
70
|
class ElementsWorker(
|
|
@@ -154,14 +146,9 @@ class ElementsWorker(
|
|
|
154
146
|
elif self.process_mode == ProcessMode.Export:
|
|
155
147
|
# For export mode processes, use list_process_elements and return element IDs
|
|
156
148
|
return {item["id"] for item in self.list_process_elements()}
|
|
157
|
-
elif self.
|
|
158
|
-
# We need to list corpus types as the StartWorkerActivity endpoint only provide type_id
|
|
159
|
-
self.list_corpus_types()
|
|
160
|
-
|
|
149
|
+
elif self.consume_worker_activities:
|
|
161
150
|
# Consume worker activitives one by one
|
|
162
|
-
return WorkerActivityIterator(
|
|
163
|
-
self.api_client, types=self.corpus_types.values()
|
|
164
|
-
)
|
|
151
|
+
return WorkerActivityIterator(self.api_client)
|
|
165
152
|
|
|
166
153
|
invalid_element_ids = list(filter(invalid_element_id, out))
|
|
167
154
|
assert not invalid_element_ids, (
|
|
@@ -195,7 +182,7 @@ class ElementsWorker(
|
|
|
195
182
|
- when running with init_elements, we have a known list
|
|
196
183
|
- when running with StartWorkerActivity, we have a queue of unknown size
|
|
197
184
|
"""
|
|
198
|
-
return self.
|
|
185
|
+
return self.consume_worker_activities
|
|
199
186
|
|
|
200
187
|
def run(self):
|
|
201
188
|
"""
|
|
@@ -241,7 +228,7 @@ class ElementsWorker(
|
|
|
241
228
|
|
|
242
229
|
# Process the element and report its progress if activities are enabled
|
|
243
230
|
# We do not update the worker activity to "Started" state when consuming them
|
|
244
|
-
if self.
|
|
231
|
+
if self.consume_worker_activities or self.update_activity(
|
|
245
232
|
element.id, ActivityState.Started
|
|
246
233
|
):
|
|
247
234
|
self.process_element(element)
|
arkindex_worker/worker/base.py
CHANGED
|
@@ -265,6 +265,11 @@ class BaseWorker:
|
|
|
265
265
|
if not item["secret"]:
|
|
266
266
|
return (item["key"], item["value"])
|
|
267
267
|
|
|
268
|
+
# The secret may not be picked by the user
|
|
269
|
+
if item["value"] is None:
|
|
270
|
+
logger.info(f"Skipping optional secret {item['key']}")
|
|
271
|
+
return (item["key"], None)
|
|
272
|
+
|
|
268
273
|
# Load secret, only available in Arkindex EE
|
|
269
274
|
try:
|
|
270
275
|
secret = self.load_secret(Path(item["value"]))
|
|
@@ -276,6 +281,19 @@ class BaseWorker:
|
|
|
276
281
|
|
|
277
282
|
return (item["key"], secret)
|
|
278
283
|
|
|
284
|
+
# Load model version configuration when available
|
|
285
|
+
# Workers will use model version ID and details to download the model
|
|
286
|
+
model_version = worker_run.get("model_version")
|
|
287
|
+
if model_version:
|
|
288
|
+
logger.info("Loaded model version configuration from WorkerRun")
|
|
289
|
+
self.model_configuration.update(model_version["configuration"])
|
|
290
|
+
|
|
291
|
+
# Set model_version ID as worker attribute
|
|
292
|
+
self.model_version_id = model_version["id"]
|
|
293
|
+
|
|
294
|
+
# Set model details as worker attribute
|
|
295
|
+
self.model_details = model_version["model"]
|
|
296
|
+
|
|
279
297
|
# Load worker run information
|
|
280
298
|
try:
|
|
281
299
|
config = self.api_client.request(
|
|
@@ -295,6 +313,9 @@ class BaseWorker:
|
|
|
295
313
|
}
|
|
296
314
|
logger.info("Using modern configuration")
|
|
297
315
|
|
|
316
|
+
# Reset the model configuration to make sure workers rely on the single new source
|
|
317
|
+
self.model_configuration = {}
|
|
318
|
+
|
|
298
319
|
return # Stop here once we have modern configuration
|
|
299
320
|
|
|
300
321
|
except ErrorResponse as e:
|
|
@@ -303,18 +324,6 @@ class BaseWorker:
|
|
|
303
324
|
logger.info("Modern configuration is not available")
|
|
304
325
|
|
|
305
326
|
# Use old-style configuration with local merge
|
|
306
|
-
# Load model version configuration when available
|
|
307
|
-
model_version = worker_run.get("model_version")
|
|
308
|
-
if model_version:
|
|
309
|
-
logger.info("Loaded model version configuration from WorkerRun")
|
|
310
|
-
self.model_configuration.update(model_version["configuration"])
|
|
311
|
-
|
|
312
|
-
# Set model_version ID as worker attribute
|
|
313
|
-
self.model_version_id = model_version["id"]
|
|
314
|
-
|
|
315
|
-
# Set model details as worker attribute
|
|
316
|
-
self.model_details = model_version["model"]
|
|
317
|
-
|
|
318
327
|
# Retrieve initial configuration from API
|
|
319
328
|
self.config = worker_version["configuration"].get("configuration", {})
|
|
320
329
|
if "user_configuration" in worker_version["configuration"]:
|
|
@@ -55,6 +55,17 @@ class ElementMixin:
|
|
|
55
55
|
)
|
|
56
56
|
super().add_arguments()
|
|
57
57
|
|
|
58
|
+
@property
|
|
59
|
+
def consume_worker_activities(self) -> bool:
|
|
60
|
+
"""
|
|
61
|
+
Helper to detect if the worker rely on an elements.json or consume directly worker activities
|
|
62
|
+
Uses the process information when available, fallback to CLI args
|
|
63
|
+
"""
|
|
64
|
+
if self.process_information is not None:
|
|
65
|
+
return self.process_information.get("skip_elements_json") is True
|
|
66
|
+
|
|
67
|
+
return self.args.consume_worker_activities
|
|
68
|
+
|
|
58
69
|
def list_corpus_types(self):
|
|
59
70
|
"""
|
|
60
71
|
Loads available element types in corpus.
|
tests/conftest.py
CHANGED
|
@@ -103,12 +103,6 @@ def _mock_worker_run_api(responses):
|
|
|
103
103
|
payload = {
|
|
104
104
|
"id": "56785678-5678-5678-5678-567856785678",
|
|
105
105
|
"parents": [],
|
|
106
|
-
"worker": {
|
|
107
|
-
"id": "deadbeef-1234-5678-1234-worker",
|
|
108
|
-
"name": "Fake worker",
|
|
109
|
-
"slug": "fake_worker",
|
|
110
|
-
"type": "classifier",
|
|
111
|
-
},
|
|
112
106
|
"worker_version": {
|
|
113
107
|
"id": "12341234-1234-1234-1234-123412341234",
|
|
114
108
|
"configuration": {
|
|
@@ -153,6 +147,7 @@ def _mock_worker_run_api(responses):
|
|
|
153
147
|
"train_folder_id": None,
|
|
154
148
|
"validation_folder_id": None,
|
|
155
149
|
"test_folder_id": None,
|
|
150
|
+
"skip_elements_json": False,
|
|
156
151
|
},
|
|
157
152
|
"summary": "Worker Fake worker @ 123412",
|
|
158
153
|
}
|
|
@@ -179,12 +174,6 @@ def _mock_worker_run_no_revision_api(responses):
|
|
|
179
174
|
payload = {
|
|
180
175
|
"id": "56785678-5678-5678-5678-567856785678",
|
|
181
176
|
"parents": [],
|
|
182
|
-
"worker": {
|
|
183
|
-
"id": "deadbeef-1234-5678-1234-worker",
|
|
184
|
-
"name": "Fake worker",
|
|
185
|
-
"slug": "fake_worker",
|
|
186
|
-
"type": "classifier",
|
|
187
|
-
},
|
|
188
177
|
"worker_version": {
|
|
189
178
|
"id": "12341234-1234-1234-1234-123412341234",
|
|
190
179
|
"configuration": {
|
|
@@ -346,29 +335,51 @@ def mock_elements_worker_consume_wa(monkeypatch, responses, mock_elements_worker
|
|
|
346
335
|
instead of reading a JSON file
|
|
347
336
|
"""
|
|
348
337
|
|
|
349
|
-
# Enable consume worker activities
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
# Worker requires element types from corpus details as they are not provided by StartWorkerActivity
|
|
353
|
-
responses.add(
|
|
338
|
+
# Enable consume worker activities through the process configuration
|
|
339
|
+
responses.replace(
|
|
354
340
|
responses.GET,
|
|
355
|
-
"http://testserver/api/v1/
|
|
341
|
+
"http://testserver/api/v1/process/workers/56785678-5678-5678-5678-567856785678/",
|
|
356
342
|
status=200,
|
|
357
343
|
json={
|
|
358
|
-
"id": "
|
|
359
|
-
"
|
|
360
|
-
"
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
"
|
|
364
|
-
"
|
|
365
|
-
"
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
344
|
+
"id": "56785678-5678-5678-5678-567856785678",
|
|
345
|
+
"parents": [],
|
|
346
|
+
"worker_version": {
|
|
347
|
+
"id": "12341234-1234-1234-1234-123412341234",
|
|
348
|
+
"configuration": {
|
|
349
|
+
"docker": {"image": "python:3"},
|
|
350
|
+
"configuration": {"someKey": "someValue"},
|
|
351
|
+
"secrets": [],
|
|
352
|
+
},
|
|
353
|
+
"worker": {
|
|
354
|
+
"id": "deadbeef-1234-5678-1234-worker",
|
|
355
|
+
"name": "Fake worker",
|
|
356
|
+
"slug": "fake_worker",
|
|
357
|
+
"type": "classifier",
|
|
358
|
+
},
|
|
359
|
+
},
|
|
360
|
+
"configuration": None,
|
|
361
|
+
"model_version": None,
|
|
362
|
+
"process": {
|
|
363
|
+
"name": None,
|
|
364
|
+
"id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
|
|
365
|
+
"state": "running",
|
|
366
|
+
"mode": "workers",
|
|
367
|
+
"corpus": CORPUS_ID,
|
|
368
|
+
"use_cache": False,
|
|
369
|
+
"activity_state": "ready",
|
|
370
|
+
"model_id": None,
|
|
371
|
+
"train_folder_id": None,
|
|
372
|
+
"validation_folder_id": None,
|
|
373
|
+
"test_folder_id": None,
|
|
374
|
+
"skip_elements_json": True,
|
|
375
|
+
},
|
|
376
|
+
"summary": "Worker Fake worker @ 123412",
|
|
369
377
|
},
|
|
370
378
|
)
|
|
371
379
|
|
|
380
|
+
# Call configure again to use updated process infos
|
|
381
|
+
mock_elements_worker.configure()
|
|
382
|
+
|
|
372
383
|
return mock_elements_worker
|
|
373
384
|
|
|
374
385
|
|
tests/test_base_worker.py
CHANGED
|
@@ -13,6 +13,29 @@ from arkindex_worker.worker import BaseWorker, ElementsWorker
|
|
|
13
13
|
from arkindex_worker.worker.base import ExtrasDirNotFoundError
|
|
14
14
|
from tests import CORPUS_ID, FIXTURES_DIR
|
|
15
15
|
|
|
16
|
+
SIMPLE_PAYLOAD = {
|
|
17
|
+
"id": "56785678-5678-5678-5678-567856785678",
|
|
18
|
+
"parents": [],
|
|
19
|
+
"worker_version": {
|
|
20
|
+
"id": "12341234-1234-1234-1234-123412341234",
|
|
21
|
+
"worker": {
|
|
22
|
+
"id": "deadbeef-1234-5678-1234-worker",
|
|
23
|
+
"name": "Fake worker",
|
|
24
|
+
"slug": "fake_worker",
|
|
25
|
+
"type": "classifier",
|
|
26
|
+
},
|
|
27
|
+
"revision": {"hash": "deadbeef1234"},
|
|
28
|
+
"configuration": {"configuration": {}},
|
|
29
|
+
},
|
|
30
|
+
"configuration": None,
|
|
31
|
+
"model_version": None,
|
|
32
|
+
"process": {
|
|
33
|
+
"id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
|
|
34
|
+
"corpus": CORPUS_ID,
|
|
35
|
+
},
|
|
36
|
+
"summary": "Worker Fake worker @ 123412",
|
|
37
|
+
}
|
|
38
|
+
|
|
16
39
|
|
|
17
40
|
def test_init_default_local_share():
|
|
18
41
|
worker = BaseWorker()
|
|
@@ -149,38 +172,13 @@ def test_configure_worker_run(mocker, responses, caplog):
|
|
|
149
172
|
|
|
150
173
|
worker = BaseWorker()
|
|
151
174
|
mocker.patch.object(sys, "argv", ["worker"])
|
|
152
|
-
user_configuration = {
|
|
153
|
-
"id": "bbbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb",
|
|
154
|
-
"name": "BBB",
|
|
155
|
-
"configuration": {"a": "b"},
|
|
156
|
-
}
|
|
157
175
|
payload = {
|
|
158
|
-
|
|
159
|
-
"
|
|
160
|
-
|
|
161
|
-
"
|
|
162
|
-
"
|
|
163
|
-
"slug": "fake_worker",
|
|
164
|
-
"type": "classifier",
|
|
165
|
-
},
|
|
166
|
-
"worker_version": {
|
|
167
|
-
"id": "12341234-1234-1234-1234-123412341234",
|
|
168
|
-
"worker": {
|
|
169
|
-
"id": "deadbeef-1234-5678-1234-worker",
|
|
170
|
-
"name": "Fake worker",
|
|
171
|
-
"slug": "fake_worker",
|
|
172
|
-
"type": "classifier",
|
|
173
|
-
},
|
|
174
|
-
"revision": {"hash": "deadbeef1234"},
|
|
175
|
-
"configuration": {"configuration": {}},
|
|
176
|
-
},
|
|
177
|
-
"configuration": user_configuration,
|
|
178
|
-
"model_version": None,
|
|
179
|
-
"process": {
|
|
180
|
-
"id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
|
|
181
|
-
"corpus": CORPUS_ID,
|
|
176
|
+
**SIMPLE_PAYLOAD,
|
|
177
|
+
"configuration": {
|
|
178
|
+
"id": "bbbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb",
|
|
179
|
+
"name": "BBB",
|
|
180
|
+
"configuration": {"a": "b"},
|
|
182
181
|
},
|
|
183
|
-
"summary": "Worker Fake worker @ 123412",
|
|
184
182
|
}
|
|
185
183
|
|
|
186
184
|
responses.add(
|
|
@@ -262,8 +260,7 @@ def test_configure_user_configuration_defaults(mocker, responses):
|
|
|
262
260
|
worker.args = worker.parser.parse_args()
|
|
263
261
|
|
|
264
262
|
payload = {
|
|
265
|
-
|
|
266
|
-
"parents": [],
|
|
263
|
+
**SIMPLE_PAYLOAD,
|
|
267
264
|
"worker_version": {
|
|
268
265
|
"id": "12341234-1234-1234-1234-123412341234",
|
|
269
266
|
"worker": {
|
|
@@ -293,12 +290,6 @@ def test_configure_user_configuration_defaults(mocker, responses):
|
|
|
293
290
|
"param_5": True,
|
|
294
291
|
},
|
|
295
292
|
},
|
|
296
|
-
"model_version": None,
|
|
297
|
-
"process": {
|
|
298
|
-
"id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
|
|
299
|
-
"corpus": CORPUS_ID,
|
|
300
|
-
},
|
|
301
|
-
"summary": "Worker Fake worker @ 123412",
|
|
302
293
|
}
|
|
303
294
|
responses.add(
|
|
304
295
|
responses.GET,
|
|
@@ -340,30 +331,12 @@ def test_configure_user_config_debug(mocker, responses, debug):
|
|
|
340
331
|
mocker.patch.object(sys, "argv", ["worker"])
|
|
341
332
|
assert logger.level == logging.NOTSET
|
|
342
333
|
payload = {
|
|
343
|
-
|
|
344
|
-
"parents": [],
|
|
345
|
-
"worker_version": {
|
|
346
|
-
"id": "12341234-1234-1234-1234-123412341234",
|
|
347
|
-
"worker": {
|
|
348
|
-
"id": "deadbeef-1234-5678-1234-worker",
|
|
349
|
-
"name": "Fake worker",
|
|
350
|
-
"slug": "fake_worker",
|
|
351
|
-
"type": "classifier",
|
|
352
|
-
},
|
|
353
|
-
"revision": {"hash": "deadbeef1234"},
|
|
354
|
-
"configuration": {"configuration": {}},
|
|
355
|
-
},
|
|
356
|
-
"model_version": None,
|
|
334
|
+
**SIMPLE_PAYLOAD,
|
|
357
335
|
"configuration": {
|
|
358
336
|
"id": "af0daaf4-983e-4703-a7ed-a10f146d6684",
|
|
359
337
|
"name": "BBB",
|
|
360
338
|
"configuration": {"debug": debug},
|
|
361
339
|
},
|
|
362
|
-
"process": {
|
|
363
|
-
"id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
|
|
364
|
-
"corpus": CORPUS_ID,
|
|
365
|
-
},
|
|
366
|
-
"summary": "Worker Fake worker @ 123412",
|
|
367
340
|
}
|
|
368
341
|
responses.add(
|
|
369
342
|
responses.GET,
|
|
@@ -393,32 +366,8 @@ def test_configure_worker_run_missing_conf(mocker, responses):
|
|
|
393
366
|
mocker.patch.object(sys, "argv", ["worker"])
|
|
394
367
|
|
|
395
368
|
payload = {
|
|
396
|
-
|
|
397
|
-
"parents": [],
|
|
398
|
-
"worker": {
|
|
399
|
-
"id": "deadbeef-1234-5678-1234-worker",
|
|
400
|
-
"name": "Fake worker",
|
|
401
|
-
"slug": "fake_worker",
|
|
402
|
-
"type": "classifier",
|
|
403
|
-
},
|
|
404
|
-
"worker_version": {
|
|
405
|
-
"id": "12341234-1234-1234-1234-123412341234",
|
|
406
|
-
"worker": {
|
|
407
|
-
"id": "deadbeef-1234-5678-1234-worker",
|
|
408
|
-
"name": "Fake worker",
|
|
409
|
-
"slug": "fake_worker",
|
|
410
|
-
"type": "classifier",
|
|
411
|
-
},
|
|
412
|
-
"revision": {"hash": "deadbeef1234"},
|
|
413
|
-
"configuration": {"configuration": {}},
|
|
414
|
-
},
|
|
415
|
-
"model_version": None,
|
|
369
|
+
**SIMPLE_PAYLOAD,
|
|
416
370
|
"configuration": {"id": "bbbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb", "name": "BBB"},
|
|
417
|
-
"process": {
|
|
418
|
-
"id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
|
|
419
|
-
"corpus": CORPUS_ID,
|
|
420
|
-
},
|
|
421
|
-
"summary": "Worker Fake worker @ 123412",
|
|
422
371
|
}
|
|
423
372
|
responses.add(
|
|
424
373
|
responses.GET,
|
|
@@ -446,28 +395,7 @@ def test_configure_worker_run_no_worker_run_conf(mocker, responses):
|
|
|
446
395
|
worker = BaseWorker()
|
|
447
396
|
mocker.patch.object(sys, "argv", ["worker"])
|
|
448
397
|
|
|
449
|
-
payload =
|
|
450
|
-
"id": "56785678-5678-5678-5678-567856785678",
|
|
451
|
-
"parents": [],
|
|
452
|
-
"worker_version": {
|
|
453
|
-
"id": "12341234-1234-1234-1234-123412341234",
|
|
454
|
-
"worker": {
|
|
455
|
-
"id": "deadbeef-1234-5678-1234-worker",
|
|
456
|
-
"name": "Fake worker",
|
|
457
|
-
"slug": "fake_worker",
|
|
458
|
-
"type": "classifier",
|
|
459
|
-
},
|
|
460
|
-
"revision": {"hash": "deadbeef1234"},
|
|
461
|
-
"configuration": {},
|
|
462
|
-
},
|
|
463
|
-
"model_version": None,
|
|
464
|
-
"configuration": None,
|
|
465
|
-
"process": {
|
|
466
|
-
"id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
|
|
467
|
-
"corpus": CORPUS_ID,
|
|
468
|
-
},
|
|
469
|
-
"summary": "Worker Fake worker @ 123412",
|
|
470
|
-
}
|
|
398
|
+
payload = SIMPLE_PAYLOAD
|
|
471
399
|
responses.add(
|
|
472
400
|
responses.GET,
|
|
473
401
|
"http://testserver/api/v1/process/workers/56785678-5678-5678-5678-567856785678/",
|
|
@@ -491,20 +419,7 @@ def test_configure_load_model_configuration(mocker, responses):
|
|
|
491
419
|
worker = BaseWorker()
|
|
492
420
|
mocker.patch.object(sys, "argv", ["worker"])
|
|
493
421
|
payload = {
|
|
494
|
-
|
|
495
|
-
"parents": [],
|
|
496
|
-
"worker_version": {
|
|
497
|
-
"id": "12341234-1234-1234-1234-123412341234",
|
|
498
|
-
"worker": {
|
|
499
|
-
"id": "deadbeef-1234-5678-1234-worker",
|
|
500
|
-
"name": "Fake worker",
|
|
501
|
-
"slug": "fake_worker",
|
|
502
|
-
"type": "classifier",
|
|
503
|
-
},
|
|
504
|
-
"revision": {"hash": "deadbeef1234"},
|
|
505
|
-
"configuration": {"configuration": {}},
|
|
506
|
-
},
|
|
507
|
-
"configuration": None,
|
|
422
|
+
**SIMPLE_PAYLOAD,
|
|
508
423
|
"model_version": {
|
|
509
424
|
"id": "12341234-1234-1234-1234-123412341234",
|
|
510
425
|
"model": {
|
|
@@ -517,11 +432,6 @@ def test_configure_load_model_configuration(mocker, responses):
|
|
|
517
432
|
"param3": None,
|
|
518
433
|
},
|
|
519
434
|
},
|
|
520
|
-
"process": {
|
|
521
|
-
"id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
|
|
522
|
-
"corpus": CORPUS_ID,
|
|
523
|
-
},
|
|
524
|
-
"summary": "Worker Fake worker @ 123412",
|
|
525
435
|
}
|
|
526
436
|
|
|
527
437
|
responses.add(
|
|
@@ -698,7 +698,8 @@ def test_run_consuming_worker_activities(
|
|
|
698
698
|
assert mock_elements_worker_consume_wa.is_read_only is False
|
|
699
699
|
|
|
700
700
|
# Provide 2 worker activities to run and the corresponding update call
|
|
701
|
-
|
|
701
|
+
# and 2 element details response
|
|
702
|
+
for i, elt_id in enumerate(("page_1", "page_2"), 1):
|
|
702
703
|
responses.add(
|
|
703
704
|
responses.POST,
|
|
704
705
|
"http://testserver/api/v1/process/start-activity/",
|
|
@@ -706,7 +707,7 @@ def test_run_consuming_worker_activities(
|
|
|
706
707
|
json={
|
|
707
708
|
"id": elt_id,
|
|
708
709
|
"type_id": "page-aaaa-aaaa-aaaa-aaaaaaaaaaaa", # Element type provided by mock corpus
|
|
709
|
-
"name": "Page
|
|
710
|
+
"name": f"Page n°{i}",
|
|
710
711
|
},
|
|
711
712
|
)
|
|
712
713
|
responses.add(
|
|
@@ -714,6 +715,16 @@ def test_run_consuming_worker_activities(
|
|
|
714
715
|
"http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
|
|
715
716
|
status=200,
|
|
716
717
|
)
|
|
718
|
+
responses.add(
|
|
719
|
+
responses.GET,
|
|
720
|
+
f"http://testserver/api/v1/element/{elt_id}/",
|
|
721
|
+
status=200,
|
|
722
|
+
json={
|
|
723
|
+
"id": elt_id,
|
|
724
|
+
"type": "page",
|
|
725
|
+
"name": f"Page n°{i}",
|
|
726
|
+
},
|
|
727
|
+
)
|
|
717
728
|
|
|
718
729
|
# Then a 404 to stop iterating
|
|
719
730
|
responses.add(
|
|
@@ -725,18 +736,19 @@ def test_run_consuming_worker_activities(
|
|
|
725
736
|
# Simply run the process
|
|
726
737
|
mock_elements_worker_consume_wa.run()
|
|
727
738
|
|
|
728
|
-
|
|
739
|
+
# We call twice configure in the conftest
|
|
740
|
+
assert len(responses.calls) == len(BASE_API_CALLS) * 2 + 7
|
|
729
741
|
assert [
|
|
730
742
|
(call.request.method, call.request.url) for call in responses.calls
|
|
731
|
-
] == BASE_API_CALLS + [
|
|
732
|
-
(
|
|
733
|
-
"GET",
|
|
734
|
-
"http://testserver/api/v1/corpus/11111111-1111-1111-1111-111111111111/",
|
|
735
|
-
),
|
|
743
|
+
] == BASE_API_CALLS * 2 + [
|
|
736
744
|
(
|
|
737
745
|
"POST",
|
|
738
746
|
"http://testserver/api/v1/process/start-activity/",
|
|
739
747
|
),
|
|
748
|
+
(
|
|
749
|
+
"GET",
|
|
750
|
+
"http://testserver/api/v1/element/page_1/",
|
|
751
|
+
),
|
|
740
752
|
(
|
|
741
753
|
"PUT",
|
|
742
754
|
"http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
|
|
@@ -745,6 +757,10 @@ def test_run_consuming_worker_activities(
|
|
|
745
757
|
"POST",
|
|
746
758
|
"http://testserver/api/v1/process/start-activity/",
|
|
747
759
|
),
|
|
760
|
+
(
|
|
761
|
+
"GET",
|
|
762
|
+
"http://testserver/api/v1/element/page_2/",
|
|
763
|
+
),
|
|
748
764
|
(
|
|
749
765
|
"PUT",
|
|
750
766
|
"http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
|
|
@@ -756,21 +772,17 @@ def test_run_consuming_worker_activities(
|
|
|
756
772
|
]
|
|
757
773
|
|
|
758
774
|
assert [(record.levelno, record.message) for record in caplog.records] == [
|
|
759
|
-
(
|
|
760
|
-
logging.INFO,
|
|
761
|
-
"Loaded 1 element type in corpus (11111111-1111-1111-1111-111111111111).",
|
|
762
|
-
),
|
|
763
775
|
(
|
|
764
776
|
logging.INFO,
|
|
765
777
|
"Using StartWorkerActivity instead of reading init_elements JSON file",
|
|
766
778
|
),
|
|
767
779
|
(
|
|
768
780
|
logging.INFO,
|
|
769
|
-
"Processing page Page
|
|
781
|
+
"Processing page Page n°1 (page_1) (n°1)",
|
|
770
782
|
),
|
|
771
783
|
(
|
|
772
784
|
logging.INFO,
|
|
773
|
-
"Processing page Page
|
|
785
|
+
"Processing page Page n°2 (page_2) (n°2)",
|
|
774
786
|
),
|
|
775
787
|
(
|
|
776
788
|
logging.INFO,
|
|
File without changes
|
{arkindex_base_worker-0.5.1b2.dist-info → arkindex_base_worker-0.5.1b5.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{arkindex_base_worker-0.5.1b2.dist-info → arkindex_base_worker-0.5.1b5.dist-info}/top_level.txt
RENAMED
|
File without changes
|