arkindex-base-worker 0.5.1b1__py3-none-any.whl → 0.5.1b4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arkindex_base_worker-0.5.1b1.dist-info → arkindex_base_worker-0.5.1b4.dist-info}/METADATA +1 -1
- {arkindex_base_worker-0.5.1b1.dist-info → arkindex_base_worker-0.5.1b4.dist-info}/RECORD +14 -13
- arkindex_worker/worker/__init__.py +7 -20
- arkindex_worker/worker/base.py +44 -1
- arkindex_worker/worker/element.py +11 -0
- tests/conftest.py +103 -17
- tests/test_base_worker.py +66 -2
- tests/test_dataset_worker.py +5 -0
- tests/test_elements_worker/__init__.py +4 -0
- tests/test_elements_worker/test_worker.py +26 -14
- tests/test_modern_config.py +81 -0
- {arkindex_base_worker-0.5.1b1.dist-info → arkindex_base_worker-0.5.1b4.dist-info}/WHEEL +0 -0
- {arkindex_base_worker-0.5.1b1.dist-info → arkindex_base_worker-0.5.1b4.dist-info}/licenses/LICENSE +0 -0
- {arkindex_base_worker-0.5.1b1.dist-info → arkindex_base_worker-0.5.1b4.dist-info}/top_level.txt +0 -0
|
@@ -1,15 +1,15 @@
|
|
|
1
|
-
arkindex_base_worker-0.5.
|
|
1
|
+
arkindex_base_worker-0.5.1b4.dist-info/licenses/LICENSE,sha256=NVshRi1efwVezMfW7xXYLrdDr2Li1AfwfGOd5WuH1kQ,1063
|
|
2
2
|
arkindex_worker/__init__.py,sha256=Sdt5KXn8EgURb2MurYVrUWaHbH3iFA1XLRo0Lc5AJ44,250
|
|
3
3
|
arkindex_worker/cache.py,sha256=x1d1oVF297ItLoZnPkZQoEefa39ZigrwRoHC_6az94k,10731
|
|
4
4
|
arkindex_worker/image.py,sha256=GvIpW7LNSalVw3Obt9nySDWnW7-NbC0__SWREEQqVCk,20696
|
|
5
5
|
arkindex_worker/models.py,sha256=bPQzGZNs5a6z6DEcygsa8T33VOqPlMUbwKzHqlKzwbw,9923
|
|
6
6
|
arkindex_worker/utils.py,sha256=MbbJT8oh8DMHHR-vidFeXdUH0TSXGWm7ZDGWzrRXoEY,9933
|
|
7
|
-
arkindex_worker/worker/__init__.py,sha256=
|
|
8
|
-
arkindex_worker/worker/base.py,sha256=
|
|
7
|
+
arkindex_worker/worker/__init__.py,sha256=SzD0s1_m6gMV02EUF-NeciqZdVPA4dpXI84tSj-g494,17869
|
|
8
|
+
arkindex_worker/worker/base.py,sha256=fbRJ5vDON3DfSQfwxFqto85HY8Dw2_YgOmnm5cxbQ2g,21725
|
|
9
9
|
arkindex_worker/worker/classification.py,sha256=qvykymkgd4nGywHCxL8obo4egstoGsmWNS4Ztc1qNWQ,11024
|
|
10
10
|
arkindex_worker/worker/corpus.py,sha256=MeIMod7jkWyX0frtD0a37rhumnMV3p9ZOC1xwAoXrAA,2291
|
|
11
11
|
arkindex_worker/worker/dataset.py,sha256=tVaPx43vaH-KTtx4w5V06e26ha8XPfiJTRzBXlu928Y,5273
|
|
12
|
-
arkindex_worker/worker/element.py,sha256=
|
|
12
|
+
arkindex_worker/worker/element.py,sha256=sLfnf09AfJ5tSCKQ7cAkl7WsGhjsfq14swsT30MDnYk,47385
|
|
13
13
|
arkindex_worker/worker/entity.py,sha256=Aj6EOfzHEm7qQV-Egm0YKLZgCrLS_3ggOKTY81M2JbI,12323
|
|
14
14
|
arkindex_worker/worker/image.py,sha256=L6Ikuf0Z0RxJk7JarY5PggJGrYSHLaPK0vn0dy0CIaQ,623
|
|
15
15
|
arkindex_worker/worker/metadata.py,sha256=rBjU057xngwrf32vAo-2cpgYfmrdEj3lfDg_kv4-zr0,6810
|
|
@@ -21,15 +21,16 @@ examples/standalone/python/worker.py,sha256=Zr4s4pHvgexEjlkixLFYZp1UuwMLeoTxjyNG
|
|
|
21
21
|
examples/tooled/python/worker.py,sha256=kIYlHLsO5UpwX4XtERRq4tf2qTsvqKK30C-w8t0yyhA,1821
|
|
22
22
|
hooks/pre_gen_project.py,sha256=xQJERv3vv9VzIqcBHI281eeWLWREXUF4mMw7PvJHHXM,269
|
|
23
23
|
tests/__init__.py,sha256=DG--S6IpGl399rzSAjDdHL76CkOIeZIjajCcyUSDhOQ,241
|
|
24
|
-
tests/conftest.py,sha256=
|
|
25
|
-
tests/test_base_worker.py,sha256=
|
|
24
|
+
tests/conftest.py,sha256=OaajGB0FS0Jh3rFhL8a3HdaN84XzIxNbRORvzI5W2ns,24928
|
|
25
|
+
tests/test_base_worker.py,sha256=3YjhjxSWVjEWFYS8m8pYYoaVAhHFkJLNTs0QPQIkBDM,32651
|
|
26
26
|
tests/test_cache.py,sha256=nnEFfAAqtYHk2ymOwN0spXJd8nrRiwp3voj0tOmIbQ8,10407
|
|
27
|
-
tests/test_dataset_worker.py,sha256=
|
|
27
|
+
tests/test_dataset_worker.py,sha256=iDJM2C4PfQNH0r4_QqSWoPt8BcM0geUUdODtWY0Z9PA,22412
|
|
28
28
|
tests/test_element.py,sha256=2G9M15TLxQRmvrWM9Kw2ucnElh4kSv_oF_5FYwwAxTY,13181
|
|
29
29
|
tests/test_image.py,sha256=NEIp5evr6QoTWgJ-_fze19IEFm_hG6YEcuW1kxnxS_I,28013
|
|
30
30
|
tests/test_merge.py,sha256=REpZ13jkq_qm_4L5URQgFy5lxvPZtXxQEiWfYLMdmF0,7956
|
|
31
|
+
tests/test_modern_config.py,sha256=Bm-a4LYQXgLZWQX7AmVyfJW0LNoLy1wj2d2GjzDkcBk,2683
|
|
31
32
|
tests/test_utils.py,sha256=nYL1s2ViZoLoMiNpLGDaWwxf8dJ1D8aT522AO-PVaEQ,3607
|
|
32
|
-
tests/test_elements_worker/__init__.py,sha256=
|
|
33
|
+
tests/test_elements_worker/__init__.py,sha256=2t3NciCIOun_N-Wv63FWGsTm5W9N3mbwAWVuFORlMg8,308
|
|
33
34
|
tests/test_elements_worker/test_classification.py,sha256=nya7veSPR_O9G41Enodp2-o6AifMBcaSTWJP2vXSSJ4,30133
|
|
34
35
|
tests/test_elements_worker/test_cli.py,sha256=a23i1pUDbXi23MUtbWwGEcLLrmc_YlrbDgOG3h66wLM,2620
|
|
35
36
|
tests/test_elements_worker/test_corpus.py,sha256=kscJyM8k1njYJJFGuvliVzn89lWh41mEyDCCawnp3W8,5483
|
|
@@ -48,13 +49,13 @@ tests/test_elements_worker/test_training.py,sha256=qgK7BLucddRzc8ePbQtY75x17QvGD
|
|
|
48
49
|
tests/test_elements_worker/test_transcription_create.py,sha256=yznO9B_BVsOR0Z_VY5ZL8gJp0ZPCz_4sPUs5dXtixAg,29281
|
|
49
50
|
tests/test_elements_worker/test_transcription_create_with_elements.py,sha256=tmcyglgssEqMnt1Mdy_u6X1m2wgLWTo_HdWst3GrK2k,33056
|
|
50
51
|
tests/test_elements_worker/test_transcription_list.py,sha256=ikz7HYPCoQWTdTRCd382SB-y-T2BbigPLlIcx5Eow-I,15324
|
|
51
|
-
tests/test_elements_worker/test_worker.py,sha256=
|
|
52
|
+
tests/test_elements_worker/test_worker.py,sha256=ypAQS_DJj9qGlRJCs9g5qUXe7IgqaKXWDcxqwlhAqSg,28598
|
|
52
53
|
worker-demo/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
53
54
|
worker-demo/tests/conftest.py,sha256=XzNMNeg6pmABUAH8jN6eZTlZSFGLYjS3-DTXjiRN6Yc,1002
|
|
54
55
|
worker-demo/tests/test_worker.py,sha256=3DLd4NRK4bfyatG5P_PK4k9P9tJHx9XQq5_ryFEEFVg,304
|
|
55
56
|
worker-demo/worker_demo/__init__.py,sha256=2BPomV8ZMNf3YXJgloatKeHQCE6QOkwmsHGkO6MkQuM,125
|
|
56
57
|
worker-demo/worker_demo/worker.py,sha256=Rt-DjWa5iBP08k58NDZMfeyPuFbtNcbX6nc5jFX7GNo,440
|
|
57
|
-
arkindex_base_worker-0.5.
|
|
58
|
-
arkindex_base_worker-0.5.
|
|
59
|
-
arkindex_base_worker-0.5.
|
|
60
|
-
arkindex_base_worker-0.5.
|
|
58
|
+
arkindex_base_worker-0.5.1b4.dist-info/METADATA,sha256=hLR30IODhVUqKVq_4qXv_AcNswb0afCA-2Nce13UPno,3137
|
|
59
|
+
arkindex_base_worker-0.5.1b4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
60
|
+
arkindex_base_worker-0.5.1b4.dist-info/top_level.txt,sha256=-vNjP2VfROx0j83mdi9aIqRZ88eoJjxeWz-R_gPgyXU,49
|
|
61
|
+
arkindex_base_worker-0.5.1b4.dist-info/RECORD,,
|
|
@@ -33,13 +33,10 @@ from arkindex_worker.worker.transcription import TranscriptionMixin
|
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
class WorkerActivityIterator:
|
|
36
|
-
def __init__(self, api_client
|
|
36
|
+
def __init__(self, api_client):
|
|
37
37
|
# Use same api client as main class
|
|
38
38
|
self.api_client = api_client
|
|
39
39
|
|
|
40
|
-
# Index element types by ID
|
|
41
|
-
self.types = {t["id"]: t["slug"] for t in types}
|
|
42
|
-
|
|
43
40
|
logger.info(
|
|
44
41
|
"Using StartWorkerActivity instead of reading init_elements JSON file"
|
|
45
42
|
)
|
|
@@ -53,7 +50,7 @@ class WorkerActivityIterator:
|
|
|
53
50
|
|
|
54
51
|
def __next__(self):
|
|
55
52
|
"""
|
|
56
|
-
Provide a new element from a worker activity upon each iteration
|
|
53
|
+
Provide a new element ID from a worker activity upon each iteration
|
|
57
54
|
"""
|
|
58
55
|
try:
|
|
59
56
|
data = self.api_client.request("StartWorkerActivity")
|
|
@@ -67,12 +64,7 @@ class WorkerActivityIterator:
|
|
|
67
64
|
)
|
|
68
65
|
raise e
|
|
69
66
|
|
|
70
|
-
|
|
71
|
-
type_id = data["type_id"]
|
|
72
|
-
if type_id not in self.types:
|
|
73
|
-
raise Exception(f"Unknown type {type_id}")
|
|
74
|
-
|
|
75
|
-
return Element(type=self.types[type_id], **data)
|
|
67
|
+
return data["id"]
|
|
76
68
|
|
|
77
69
|
|
|
78
70
|
class ElementsWorker(
|
|
@@ -154,14 +146,9 @@ class ElementsWorker(
|
|
|
154
146
|
elif self.process_mode == ProcessMode.Export:
|
|
155
147
|
# For export mode processes, use list_process_elements and return element IDs
|
|
156
148
|
return {item["id"] for item in self.list_process_elements()}
|
|
157
|
-
elif self.
|
|
158
|
-
# We need to list corpus types as the StartWorkerActivity endpoint only provide type_id
|
|
159
|
-
self.list_corpus_types()
|
|
160
|
-
|
|
149
|
+
elif self.consume_worker_activities:
|
|
161
150
|
# Consume worker activitives one by one
|
|
162
|
-
return WorkerActivityIterator(
|
|
163
|
-
self.api_client, types=self.corpus_types.values()
|
|
164
|
-
)
|
|
151
|
+
return WorkerActivityIterator(self.api_client)
|
|
165
152
|
|
|
166
153
|
invalid_element_ids = list(filter(invalid_element_id, out))
|
|
167
154
|
assert not invalid_element_ids, (
|
|
@@ -195,7 +182,7 @@ class ElementsWorker(
|
|
|
195
182
|
- when running with init_elements, we have a known list
|
|
196
183
|
- when running with StartWorkerActivity, we have a queue of unknown size
|
|
197
184
|
"""
|
|
198
|
-
return self.
|
|
185
|
+
return self.consume_worker_activities
|
|
199
186
|
|
|
200
187
|
def run(self):
|
|
201
188
|
"""
|
|
@@ -241,7 +228,7 @@ class ElementsWorker(
|
|
|
241
228
|
|
|
242
229
|
# Process the element and report its progress if activities are enabled
|
|
243
230
|
# We do not update the worker activity to "Started" state when consuming them
|
|
244
|
-
if self.
|
|
231
|
+
if self.consume_worker_activities or self.update_activity(
|
|
245
232
|
element.id, ActivityState.Started
|
|
246
233
|
):
|
|
247
234
|
self.process_element(element)
|
arkindex_worker/worker/base.py
CHANGED
|
@@ -9,12 +9,13 @@ import os
|
|
|
9
9
|
import shutil
|
|
10
10
|
from pathlib import Path
|
|
11
11
|
from tempfile import mkdtemp
|
|
12
|
+
from typing import Any
|
|
12
13
|
|
|
13
14
|
import gnupg
|
|
14
15
|
import yaml
|
|
15
16
|
|
|
16
17
|
from arkindex import options_from_env
|
|
17
|
-
from arkindex.exceptions import ErrorResponse
|
|
18
|
+
from arkindex.exceptions import ClientError, ErrorResponse
|
|
18
19
|
from arkindex_worker import logger
|
|
19
20
|
from arkindex_worker.cache import (
|
|
20
21
|
check_version,
|
|
@@ -260,6 +261,48 @@ class BaseWorker:
|
|
|
260
261
|
|
|
261
262
|
logger.info(f"Loaded {worker_run['summary']} from API")
|
|
262
263
|
|
|
264
|
+
def _process_config_item(item: dict) -> tuple[str, Any]:
|
|
265
|
+
if not item["secret"]:
|
|
266
|
+
return (item["key"], item["value"])
|
|
267
|
+
|
|
268
|
+
# Load secret, only available in Arkindex EE
|
|
269
|
+
try:
|
|
270
|
+
secret = self.load_secret(Path(item["value"]))
|
|
271
|
+
except ClientError as e:
|
|
272
|
+
logger.error(
|
|
273
|
+
f"Failed to retrieve the secret {item['value']}, probably an Arkindex Community Edition: {e}"
|
|
274
|
+
)
|
|
275
|
+
return (item["key"], None)
|
|
276
|
+
|
|
277
|
+
return (item["key"], secret)
|
|
278
|
+
|
|
279
|
+
# Load worker run information
|
|
280
|
+
try:
|
|
281
|
+
config = self.api_client.request(
|
|
282
|
+
"RetrieveWorkerRunConfiguration", id=self.worker_run_id
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
# Provide the same configuration through all previous attributes
|
|
286
|
+
self.config = self.user_configuration = dict(
|
|
287
|
+
map(_process_config_item, config["configuration"])
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
# Provide secret values through the previous attribute
|
|
291
|
+
self.secrets = {
|
|
292
|
+
item["key"]: self.config[item["key"]]
|
|
293
|
+
for item in config["configuration"]
|
|
294
|
+
if item["secret"]
|
|
295
|
+
}
|
|
296
|
+
logger.info("Using modern configuration")
|
|
297
|
+
|
|
298
|
+
return # Stop here once we have modern configuration
|
|
299
|
+
|
|
300
|
+
except ErrorResponse as e:
|
|
301
|
+
if e.status_code != 400:
|
|
302
|
+
raise
|
|
303
|
+
logger.info("Modern configuration is not available")
|
|
304
|
+
|
|
305
|
+
# Use old-style configuration with local merge
|
|
263
306
|
# Load model version configuration when available
|
|
264
307
|
model_version = worker_run.get("model_version")
|
|
265
308
|
if model_version:
|
|
@@ -55,6 +55,17 @@ class ElementMixin:
|
|
|
55
55
|
)
|
|
56
56
|
super().add_arguments()
|
|
57
57
|
|
|
58
|
+
@property
|
|
59
|
+
def consume_worker_activities(self) -> bool:
|
|
60
|
+
"""
|
|
61
|
+
Helper to detect if the worker rely on an elements.json or consume directly worker activities
|
|
62
|
+
Uses the process information when available, fallback to CLI args
|
|
63
|
+
"""
|
|
64
|
+
if self.process_information is not None:
|
|
65
|
+
return self.process_information.get("skip_elements_json") is True
|
|
66
|
+
|
|
67
|
+
return self.args.consume_worker_activities
|
|
68
|
+
|
|
58
69
|
def list_corpus_types(self):
|
|
59
70
|
"""
|
|
60
71
|
Loads available element types in corpus.
|
tests/conftest.py
CHANGED
|
@@ -153,6 +153,7 @@ def _mock_worker_run_api(responses):
|
|
|
153
153
|
"train_folder_id": None,
|
|
154
154
|
"validation_folder_id": None,
|
|
155
155
|
"test_folder_id": None,
|
|
156
|
+
"skip_elements_json": False,
|
|
156
157
|
},
|
|
157
158
|
"summary": "Worker Fake worker @ 123412",
|
|
158
159
|
}
|
|
@@ -165,6 +166,13 @@ def _mock_worker_run_api(responses):
|
|
|
165
166
|
content_type="application/json",
|
|
166
167
|
)
|
|
167
168
|
|
|
169
|
+
# By default, stick to classic configuration
|
|
170
|
+
responses.add(
|
|
171
|
+
responses.GET,
|
|
172
|
+
"http://testserver/api/v1/workers/runs/56785678-5678-5678-5678-567856785678/configuration/",
|
|
173
|
+
status=400,
|
|
174
|
+
)
|
|
175
|
+
|
|
168
176
|
|
|
169
177
|
@pytest.fixture
|
|
170
178
|
def _mock_worker_run_no_revision_api(responses):
|
|
@@ -233,6 +241,56 @@ def _mock_worker_run_no_revision_api(responses):
|
|
|
233
241
|
)
|
|
234
242
|
|
|
235
243
|
|
|
244
|
+
@pytest.fixture
|
|
245
|
+
def mock_base_worker_modern_conf(mocker, responses):
|
|
246
|
+
"""
|
|
247
|
+
Provide a base worker to test modern configuration with (not provided in the fixture)
|
|
248
|
+
"""
|
|
249
|
+
worker = BaseWorker()
|
|
250
|
+
mocker.patch.object(sys, "argv")
|
|
251
|
+
worker.args = worker.parser.parse_args()
|
|
252
|
+
|
|
253
|
+
payload = {
|
|
254
|
+
"id": "56785678-5678-5678-5678-567856785678",
|
|
255
|
+
"parents": [],
|
|
256
|
+
"worker_version": {
|
|
257
|
+
"id": "12341234-1234-1234-1234-123412341234",
|
|
258
|
+
"worker": {
|
|
259
|
+
"id": "deadbeef-1234-5678-1234-worker",
|
|
260
|
+
"name": "Fake worker",
|
|
261
|
+
"slug": "fake_worker",
|
|
262
|
+
"type": "classifier",
|
|
263
|
+
},
|
|
264
|
+
"revision": {"hash": "deadbeef1234"},
|
|
265
|
+
"configuration": {
|
|
266
|
+
"configuration": {"extra_key1": "not showing up"},
|
|
267
|
+
"user_configuration": {"extra_key2": "not showing up"},
|
|
268
|
+
},
|
|
269
|
+
},
|
|
270
|
+
"configuration": {
|
|
271
|
+
"id": "af0daaf4-983e-4703-a7ed-a10f146d6684",
|
|
272
|
+
"name": "my-userconfig",
|
|
273
|
+
"configuration": {
|
|
274
|
+
"extra_key3": "not showing up",
|
|
275
|
+
},
|
|
276
|
+
},
|
|
277
|
+
"model_version": None,
|
|
278
|
+
"process": {
|
|
279
|
+
"id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
|
|
280
|
+
"corpus": CORPUS_ID,
|
|
281
|
+
},
|
|
282
|
+
"summary": "Worker Fake worker @ 123412",
|
|
283
|
+
}
|
|
284
|
+
responses.add(
|
|
285
|
+
responses.GET,
|
|
286
|
+
"http://testserver/api/v1/process/workers/56785678-5678-5678-5678-567856785678/",
|
|
287
|
+
status=200,
|
|
288
|
+
json=payload,
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
return worker
|
|
292
|
+
|
|
293
|
+
|
|
236
294
|
@pytest.fixture
|
|
237
295
|
def _mock_activity_calls(responses):
|
|
238
296
|
"""
|
|
@@ -289,29 +347,57 @@ def mock_elements_worker_consume_wa(monkeypatch, responses, mock_elements_worker
|
|
|
289
347
|
instead of reading a JSON file
|
|
290
348
|
"""
|
|
291
349
|
|
|
292
|
-
# Enable consume worker activities
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
# Worker requires element types from corpus details as they are not provided by StartWorkerActivity
|
|
296
|
-
responses.add(
|
|
350
|
+
# Enable consume worker activities through the process configuration
|
|
351
|
+
responses.replace(
|
|
297
352
|
responses.GET,
|
|
298
|
-
"http://testserver/api/v1/
|
|
353
|
+
"http://testserver/api/v1/process/workers/56785678-5678-5678-5678-567856785678/",
|
|
299
354
|
status=200,
|
|
300
355
|
json={
|
|
301
|
-
"id": "
|
|
302
|
-
"
|
|
303
|
-
"
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
356
|
+
"id": "56785678-5678-5678-5678-567856785678",
|
|
357
|
+
"parents": [],
|
|
358
|
+
"worker": {
|
|
359
|
+
"id": "deadbeef-1234-5678-1234-worker",
|
|
360
|
+
"name": "Fake worker",
|
|
361
|
+
"slug": "fake_worker",
|
|
362
|
+
"type": "classifier",
|
|
363
|
+
},
|
|
364
|
+
"worker_version": {
|
|
365
|
+
"id": "12341234-1234-1234-1234-123412341234",
|
|
366
|
+
"configuration": {
|
|
367
|
+
"docker": {"image": "python:3"},
|
|
368
|
+
"configuration": {"someKey": "someValue"},
|
|
369
|
+
"secrets": [],
|
|
370
|
+
},
|
|
371
|
+
"worker": {
|
|
372
|
+
"id": "deadbeef-1234-5678-1234-worker",
|
|
373
|
+
"name": "Fake worker",
|
|
374
|
+
"slug": "fake_worker",
|
|
375
|
+
"type": "classifier",
|
|
376
|
+
},
|
|
377
|
+
},
|
|
378
|
+
"configuration": None,
|
|
379
|
+
"model_version": None,
|
|
380
|
+
"process": {
|
|
381
|
+
"name": None,
|
|
382
|
+
"id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
|
|
383
|
+
"state": "running",
|
|
384
|
+
"mode": "workers",
|
|
385
|
+
"corpus": CORPUS_ID,
|
|
386
|
+
"use_cache": False,
|
|
387
|
+
"activity_state": "ready",
|
|
388
|
+
"model_id": None,
|
|
389
|
+
"train_folder_id": None,
|
|
390
|
+
"validation_folder_id": None,
|
|
391
|
+
"test_folder_id": None,
|
|
392
|
+
"skip_elements_json": True,
|
|
393
|
+
},
|
|
394
|
+
"summary": "Worker Fake worker @ 123412",
|
|
312
395
|
},
|
|
313
396
|
)
|
|
314
397
|
|
|
398
|
+
# Call configure again to use updated process infos
|
|
399
|
+
mock_elements_worker.configure()
|
|
400
|
+
|
|
315
401
|
return mock_elements_worker
|
|
316
402
|
|
|
317
403
|
|
tests/test_base_worker.py
CHANGED
|
@@ -190,6 +190,14 @@ def test_configure_worker_run(mocker, responses, caplog):
|
|
|
190
190
|
body=json.dumps(payload),
|
|
191
191
|
content_type="application/json",
|
|
192
192
|
)
|
|
193
|
+
|
|
194
|
+
# By default, stick to classic configuration
|
|
195
|
+
responses.add(
|
|
196
|
+
responses.GET,
|
|
197
|
+
"http://testserver/api/v1/workers/runs/56785678-5678-5678-5678-567856785678/configuration/",
|
|
198
|
+
status=400,
|
|
199
|
+
)
|
|
200
|
+
|
|
193
201
|
worker.args = worker.parser.parse_args()
|
|
194
202
|
assert worker.is_read_only is False
|
|
195
203
|
assert worker.worker_run_id == "56785678-5678-5678-5678-567856785678"
|
|
@@ -205,6 +213,11 @@ def test_configure_worker_run(mocker, responses, caplog):
|
|
|
205
213
|
logging.INFO,
|
|
206
214
|
"Loaded Worker Fake worker @ 123412 from API",
|
|
207
215
|
),
|
|
216
|
+
(
|
|
217
|
+
"arkindex_worker",
|
|
218
|
+
logging.INFO,
|
|
219
|
+
"Modern configuration is not available",
|
|
220
|
+
),
|
|
208
221
|
("arkindex_worker", logging.INFO, "Loaded user configuration from WorkerRun"),
|
|
209
222
|
("arkindex_worker", logging.INFO, "User configuration retrieved"),
|
|
210
223
|
]
|
|
@@ -213,9 +226,16 @@ def test_configure_worker_run(mocker, responses, caplog):
|
|
|
213
226
|
|
|
214
227
|
|
|
215
228
|
@pytest.mark.usefixtures("_mock_worker_run_no_revision_api")
|
|
216
|
-
def test_configure_worker_run_no_revision(mocker, caplog):
|
|
229
|
+
def test_configure_worker_run_no_revision(mocker, caplog, responses):
|
|
217
230
|
worker = BaseWorker()
|
|
218
231
|
|
|
232
|
+
# By default, stick to classic configuration
|
|
233
|
+
responses.add(
|
|
234
|
+
responses.GET,
|
|
235
|
+
"http://testserver/api/v1/workers/runs/56785678-5678-5678-5678-567856785678/configuration/",
|
|
236
|
+
status=400,
|
|
237
|
+
)
|
|
238
|
+
|
|
219
239
|
mocker.patch.object(sys, "argv", ["worker"])
|
|
220
240
|
worker.args = worker.parser.parse_args()
|
|
221
241
|
assert worker.is_read_only is False
|
|
@@ -227,7 +247,12 @@ def test_configure_worker_run_no_revision(mocker, caplog):
|
|
|
227
247
|
worker.configure()
|
|
228
248
|
|
|
229
249
|
assert caplog.record_tuples == [
|
|
230
|
-
("arkindex_worker", logging.INFO, "Loaded Worker Fake worker @ 1 from API")
|
|
250
|
+
("arkindex_worker", logging.INFO, "Loaded Worker Fake worker @ 1 from API"),
|
|
251
|
+
(
|
|
252
|
+
"arkindex_worker",
|
|
253
|
+
logging.INFO,
|
|
254
|
+
"Modern configuration is not available",
|
|
255
|
+
),
|
|
231
256
|
]
|
|
232
257
|
|
|
233
258
|
|
|
@@ -283,6 +308,13 @@ def test_configure_user_configuration_defaults(mocker, responses):
|
|
|
283
308
|
content_type="application/json",
|
|
284
309
|
)
|
|
285
310
|
|
|
311
|
+
# By default, stick to classic configuration
|
|
312
|
+
responses.add(
|
|
313
|
+
responses.GET,
|
|
314
|
+
"http://testserver/api/v1/workers/runs/56785678-5678-5678-5678-567856785678/configuration/",
|
|
315
|
+
status=400,
|
|
316
|
+
)
|
|
317
|
+
|
|
286
318
|
worker.configure()
|
|
287
319
|
|
|
288
320
|
assert worker.user_configuration == {
|
|
@@ -340,6 +372,13 @@ def test_configure_user_config_debug(mocker, responses, debug):
|
|
|
340
372
|
body=json.dumps(payload),
|
|
341
373
|
content_type="application/json",
|
|
342
374
|
)
|
|
375
|
+
|
|
376
|
+
# By default, stick to classic configuration
|
|
377
|
+
responses.add(
|
|
378
|
+
responses.GET,
|
|
379
|
+
"http://testserver/api/v1/workers/runs/56785678-5678-5678-5678-567856785678/configuration/",
|
|
380
|
+
status=400,
|
|
381
|
+
)
|
|
343
382
|
worker.args = worker.parser.parse_args()
|
|
344
383
|
worker.configure()
|
|
345
384
|
|
|
@@ -388,6 +427,12 @@ def test_configure_worker_run_missing_conf(mocker, responses):
|
|
|
388
427
|
body=json.dumps(payload),
|
|
389
428
|
content_type="application/json",
|
|
390
429
|
)
|
|
430
|
+
# By default, stick to classic configuration
|
|
431
|
+
responses.add(
|
|
432
|
+
responses.GET,
|
|
433
|
+
"http://testserver/api/v1/workers/runs/56785678-5678-5678-5678-567856785678/configuration/",
|
|
434
|
+
status=400,
|
|
435
|
+
)
|
|
391
436
|
worker.args = worker.parser.parse_args()
|
|
392
437
|
worker.configure()
|
|
393
438
|
|
|
@@ -430,6 +475,12 @@ def test_configure_worker_run_no_worker_run_conf(mocker, responses):
|
|
|
430
475
|
body=json.dumps(payload),
|
|
431
476
|
content_type="application/json",
|
|
432
477
|
)
|
|
478
|
+
# By default, stick to classic configuration
|
|
479
|
+
responses.add(
|
|
480
|
+
responses.GET,
|
|
481
|
+
"http://testserver/api/v1/workers/runs/56785678-5678-5678-5678-567856785678/configuration/",
|
|
482
|
+
status=400,
|
|
483
|
+
)
|
|
433
484
|
worker.args = worker.parser.parse_args()
|
|
434
485
|
worker.configure()
|
|
435
486
|
|
|
@@ -480,6 +531,12 @@ def test_configure_load_model_configuration(mocker, responses):
|
|
|
480
531
|
body=json.dumps(payload),
|
|
481
532
|
content_type="application/json",
|
|
482
533
|
)
|
|
534
|
+
# By default, stick to classic configuration
|
|
535
|
+
responses.add(
|
|
536
|
+
responses.GET,
|
|
537
|
+
"http://testserver/api/v1/workers/runs/56785678-5678-5678-5678-567856785678/configuration/",
|
|
538
|
+
status=400,
|
|
539
|
+
)
|
|
483
540
|
worker.args = worker.parser.parse_args()
|
|
484
541
|
assert worker.is_read_only is False
|
|
485
542
|
assert worker.worker_run_id == "56785678-5678-5678-5678-567856785678"
|
|
@@ -947,6 +1004,13 @@ def test_worker_config_multiple_source(
|
|
|
947
1004
|
content_type="application/json",
|
|
948
1005
|
)
|
|
949
1006
|
|
|
1007
|
+
# By default, stick to classic configuration
|
|
1008
|
+
responses.add(
|
|
1009
|
+
responses.GET,
|
|
1010
|
+
"http://testserver/api/v1/workers/runs/56785678-5678-5678-5678-567856785678/configuration/",
|
|
1011
|
+
status=400,
|
|
1012
|
+
)
|
|
1013
|
+
|
|
950
1014
|
# Create and configure a worker
|
|
951
1015
|
monkeypatch.setattr(sys, "argv", ["worker"])
|
|
952
1016
|
worker = BaseWorker()
|
tests/test_dataset_worker.py
CHANGED
|
@@ -430,6 +430,7 @@ def test_run_no_sets(mocker, caplog, mock_dataset_worker):
|
|
|
430
430
|
|
|
431
431
|
assert [(level, message) for _, level, message in caplog.record_tuples] == [
|
|
432
432
|
(logging.INFO, "Loaded Worker Fake worker @ 123412 from API"),
|
|
433
|
+
(logging.INFO, "Modern configuration is not available"),
|
|
433
434
|
(logging.WARNING, "No sets to process, stopping."),
|
|
434
435
|
]
|
|
435
436
|
|
|
@@ -453,6 +454,7 @@ def test_run_initial_dataset_state_error(
|
|
|
453
454
|
|
|
454
455
|
assert [(level, message) for _, level, message in caplog.record_tuples] == [
|
|
455
456
|
(logging.INFO, "Loaded Worker Fake worker @ 123412 from API"),
|
|
457
|
+
(logging.INFO, "Modern configuration is not available"),
|
|
456
458
|
(
|
|
457
459
|
logging.WARNING,
|
|
458
460
|
"Failed running worker on Set (train) from Dataset (dataset_id): AssertionError('When processing a set, its dataset state should be Complete.')",
|
|
@@ -497,6 +499,7 @@ def test_run_download_dataset_artifact_api_error(
|
|
|
497
499
|
|
|
498
500
|
assert [(level, message) for _, level, message in caplog.record_tuples] == [
|
|
499
501
|
(logging.INFO, "Loaded Worker Fake worker @ 123412 from API"),
|
|
502
|
+
(logging.INFO, "Modern configuration is not available"),
|
|
500
503
|
(
|
|
501
504
|
logging.INFO,
|
|
502
505
|
"Retrieving data for Set (train) from Dataset (dataset_id) (1/1)",
|
|
@@ -550,6 +553,7 @@ def test_run_no_downloaded_dataset_artifact_error(
|
|
|
550
553
|
|
|
551
554
|
assert [(level, message) for _, level, message in caplog.record_tuples] == [
|
|
552
555
|
(logging.INFO, "Loaded Worker Fake worker @ 123412 from API"),
|
|
556
|
+
(logging.INFO, "Modern configuration is not available"),
|
|
553
557
|
(
|
|
554
558
|
logging.INFO,
|
|
555
559
|
"Retrieving data for Set (train) from Dataset (dataset_id) (1/1)",
|
|
@@ -626,6 +630,7 @@ def test_run(
|
|
|
626
630
|
|
|
627
631
|
assert [(level, message) for _, level, message in caplog.record_tuples] == [
|
|
628
632
|
(logging.INFO, "Loaded Worker Fake worker @ 123412 from API"),
|
|
633
|
+
(logging.INFO, "Modern configuration is not available"),
|
|
629
634
|
(
|
|
630
635
|
logging.INFO,
|
|
631
636
|
"Retrieving data for Set (train) from Dataset (dataset_id) (1/1)",
|
|
@@ -698,7 +698,8 @@ def test_run_consuming_worker_activities(
|
|
|
698
698
|
assert mock_elements_worker_consume_wa.is_read_only is False
|
|
699
699
|
|
|
700
700
|
# Provide 2 worker activities to run and the corresponding update call
|
|
701
|
-
|
|
701
|
+
# and 2 element details response
|
|
702
|
+
for i, elt_id in enumerate(("page_1", "page_2"), 1):
|
|
702
703
|
responses.add(
|
|
703
704
|
responses.POST,
|
|
704
705
|
"http://testserver/api/v1/process/start-activity/",
|
|
@@ -706,7 +707,7 @@ def test_run_consuming_worker_activities(
|
|
|
706
707
|
json={
|
|
707
708
|
"id": elt_id,
|
|
708
709
|
"type_id": "page-aaaa-aaaa-aaaa-aaaaaaaaaaaa", # Element type provided by mock corpus
|
|
709
|
-
"name": "Page
|
|
710
|
+
"name": f"Page n°{i}",
|
|
710
711
|
},
|
|
711
712
|
)
|
|
712
713
|
responses.add(
|
|
@@ -714,6 +715,16 @@ def test_run_consuming_worker_activities(
|
|
|
714
715
|
"http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
|
|
715
716
|
status=200,
|
|
716
717
|
)
|
|
718
|
+
responses.add(
|
|
719
|
+
responses.GET,
|
|
720
|
+
f"http://testserver/api/v1/element/{elt_id}/",
|
|
721
|
+
status=200,
|
|
722
|
+
json={
|
|
723
|
+
"id": elt_id,
|
|
724
|
+
"type": "page",
|
|
725
|
+
"name": f"Page n°{i}",
|
|
726
|
+
},
|
|
727
|
+
)
|
|
717
728
|
|
|
718
729
|
# Then a 404 to stop iterating
|
|
719
730
|
responses.add(
|
|
@@ -725,18 +736,19 @@ def test_run_consuming_worker_activities(
|
|
|
725
736
|
# Simply run the process
|
|
726
737
|
mock_elements_worker_consume_wa.run()
|
|
727
738
|
|
|
728
|
-
|
|
739
|
+
# We call twice configure in the conftest
|
|
740
|
+
assert len(responses.calls) == len(BASE_API_CALLS) * 2 + 7
|
|
729
741
|
assert [
|
|
730
742
|
(call.request.method, call.request.url) for call in responses.calls
|
|
731
|
-
] == BASE_API_CALLS + [
|
|
732
|
-
(
|
|
733
|
-
"GET",
|
|
734
|
-
"http://testserver/api/v1/corpus/11111111-1111-1111-1111-111111111111/",
|
|
735
|
-
),
|
|
743
|
+
] == BASE_API_CALLS * 2 + [
|
|
736
744
|
(
|
|
737
745
|
"POST",
|
|
738
746
|
"http://testserver/api/v1/process/start-activity/",
|
|
739
747
|
),
|
|
748
|
+
(
|
|
749
|
+
"GET",
|
|
750
|
+
"http://testserver/api/v1/element/page_1/",
|
|
751
|
+
),
|
|
740
752
|
(
|
|
741
753
|
"PUT",
|
|
742
754
|
"http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
|
|
@@ -745,6 +757,10 @@ def test_run_consuming_worker_activities(
|
|
|
745
757
|
"POST",
|
|
746
758
|
"http://testserver/api/v1/process/start-activity/",
|
|
747
759
|
),
|
|
760
|
+
(
|
|
761
|
+
"GET",
|
|
762
|
+
"http://testserver/api/v1/element/page_2/",
|
|
763
|
+
),
|
|
748
764
|
(
|
|
749
765
|
"PUT",
|
|
750
766
|
"http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
|
|
@@ -756,21 +772,17 @@ def test_run_consuming_worker_activities(
|
|
|
756
772
|
]
|
|
757
773
|
|
|
758
774
|
assert [(record.levelno, record.message) for record in caplog.records] == [
|
|
759
|
-
(
|
|
760
|
-
logging.INFO,
|
|
761
|
-
"Loaded 1 element type in corpus (11111111-1111-1111-1111-111111111111).",
|
|
762
|
-
),
|
|
763
775
|
(
|
|
764
776
|
logging.INFO,
|
|
765
777
|
"Using StartWorkerActivity instead of reading init_elements JSON file",
|
|
766
778
|
),
|
|
767
779
|
(
|
|
768
780
|
logging.INFO,
|
|
769
|
-
"Processing page Page
|
|
781
|
+
"Processing page Page n°1 (page_1) (n°1)",
|
|
770
782
|
),
|
|
771
783
|
(
|
|
772
784
|
logging.INFO,
|
|
773
|
-
"Processing page Page
|
|
785
|
+
"Processing page Page n°2 (page_2) (n°2)",
|
|
774
786
|
),
|
|
775
787
|
(
|
|
776
788
|
logging.INFO,
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
def test_simple_configuration(mock_base_worker_modern_conf, responses):
|
|
2
|
+
# Provide the full configuration directly from the worker run
|
|
3
|
+
responses.add(
|
|
4
|
+
responses.GET,
|
|
5
|
+
"http://testserver/api/v1/workers/runs/56785678-5678-5678-5678-567856785678/configuration/",
|
|
6
|
+
status=200,
|
|
7
|
+
json={"configuration": [{"key": "some_key", "value": "test", "secret": False}]},
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
mock_base_worker_modern_conf.configure()
|
|
11
|
+
|
|
12
|
+
assert mock_base_worker_modern_conf.config == {"some_key": "test"}
|
|
13
|
+
assert (
|
|
14
|
+
mock_base_worker_modern_conf.user_configuration
|
|
15
|
+
== mock_base_worker_modern_conf.config
|
|
16
|
+
)
|
|
17
|
+
assert mock_base_worker_modern_conf.secrets == {}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def test_empty(mock_base_worker_modern_conf, responses):
|
|
21
|
+
# Provide the full configuration directly from the worker run
|
|
22
|
+
responses.add(
|
|
23
|
+
responses.GET,
|
|
24
|
+
"http://testserver/api/v1/workers/runs/56785678-5678-5678-5678-567856785678/configuration/",
|
|
25
|
+
status=200,
|
|
26
|
+
json={"configuration": []},
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
mock_base_worker_modern_conf.configure()
|
|
30
|
+
|
|
31
|
+
assert mock_base_worker_modern_conf.config == {}
|
|
32
|
+
assert (
|
|
33
|
+
mock_base_worker_modern_conf.user_configuration
|
|
34
|
+
== mock_base_worker_modern_conf.config
|
|
35
|
+
)
|
|
36
|
+
assert mock_base_worker_modern_conf.secrets == {}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def test_with_secrets(mock_base_worker_modern_conf, responses):
|
|
40
|
+
# Provide the full configuration directly from the worker run
|
|
41
|
+
responses.add(
|
|
42
|
+
responses.GET,
|
|
43
|
+
"http://testserver/api/v1/workers/runs/56785678-5678-5678-5678-567856785678/configuration/",
|
|
44
|
+
status=200,
|
|
45
|
+
json={
|
|
46
|
+
"configuration": [
|
|
47
|
+
{"key": "some_key", "value": "test", "secret": False},
|
|
48
|
+
{
|
|
49
|
+
"key": "a_secret",
|
|
50
|
+
"value": "471b9e64-29af-48dc-8bda-1a64a2da0c12",
|
|
51
|
+
"secret": True,
|
|
52
|
+
},
|
|
53
|
+
]
|
|
54
|
+
},
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
# Provide a secret value
|
|
58
|
+
responses.add(
|
|
59
|
+
responses.GET,
|
|
60
|
+
"http://testserver/api/v1/secret/471b9e64-29af-48dc-8bda-1a64a2da0c12",
|
|
61
|
+
status=200,
|
|
62
|
+
json={
|
|
63
|
+
"id": "471b9e64-29af-48dc-8bda-1a64a2da0c12",
|
|
64
|
+
"name": "a_secret",
|
|
65
|
+
"content": "My super duper secret value",
|
|
66
|
+
},
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
mock_base_worker_modern_conf.configure()
|
|
70
|
+
|
|
71
|
+
assert mock_base_worker_modern_conf.config == {
|
|
72
|
+
"a_secret": "My super duper secret value",
|
|
73
|
+
"some_key": "test",
|
|
74
|
+
}
|
|
75
|
+
assert (
|
|
76
|
+
mock_base_worker_modern_conf.user_configuration
|
|
77
|
+
== mock_base_worker_modern_conf.config
|
|
78
|
+
)
|
|
79
|
+
assert mock_base_worker_modern_conf.secrets == {
|
|
80
|
+
"a_secret": "My super duper secret value"
|
|
81
|
+
}
|
|
File without changes
|
{arkindex_base_worker-0.5.1b1.dist-info → arkindex_base_worker-0.5.1b4.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{arkindex_base_worker-0.5.1b1.dist-info → arkindex_base_worker-0.5.1b4.dist-info}/top_level.txt
RENAMED
|
File without changes
|