arkindex-base-worker 0.3.7rc4__py3-none-any.whl → 0.5.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {arkindex_base_worker-0.3.7rc4.dist-info → arkindex_base_worker-0.5.0a1.dist-info}/METADATA +18 -19
  2. arkindex_base_worker-0.5.0a1.dist-info/RECORD +61 -0
  3. {arkindex_base_worker-0.3.7rc4.dist-info → arkindex_base_worker-0.5.0a1.dist-info}/WHEEL +1 -1
  4. {arkindex_base_worker-0.3.7rc4.dist-info → arkindex_base_worker-0.5.0a1.dist-info}/top_level.txt +2 -0
  5. arkindex_worker/cache.py +1 -1
  6. arkindex_worker/image.py +167 -2
  7. arkindex_worker/models.py +18 -0
  8. arkindex_worker/utils.py +98 -4
  9. arkindex_worker/worker/__init__.py +117 -218
  10. arkindex_worker/worker/base.py +39 -46
  11. arkindex_worker/worker/classification.py +45 -29
  12. arkindex_worker/worker/corpus.py +86 -0
  13. arkindex_worker/worker/dataset.py +89 -26
  14. arkindex_worker/worker/element.py +352 -91
  15. arkindex_worker/worker/entity.py +13 -11
  16. arkindex_worker/worker/image.py +21 -0
  17. arkindex_worker/worker/metadata.py +26 -16
  18. arkindex_worker/worker/process.py +92 -0
  19. arkindex_worker/worker/task.py +5 -4
  20. arkindex_worker/worker/training.py +25 -10
  21. arkindex_worker/worker/transcription.py +89 -68
  22. arkindex_worker/worker/version.py +3 -1
  23. hooks/pre_gen_project.py +3 -0
  24. tests/__init__.py +8 -0
  25. tests/conftest.py +47 -58
  26. tests/test_base_worker.py +212 -12
  27. tests/test_dataset_worker.py +294 -437
  28. tests/test_elements_worker/{test_classifications.py → test_classification.py} +313 -200
  29. tests/test_elements_worker/test_cli.py +3 -11
  30. tests/test_elements_worker/test_corpus.py +168 -0
  31. tests/test_elements_worker/test_dataset.py +106 -157
  32. tests/test_elements_worker/test_element.py +427 -0
  33. tests/test_elements_worker/test_element_create_multiple.py +715 -0
  34. tests/test_elements_worker/test_element_create_single.py +528 -0
  35. tests/test_elements_worker/test_element_list_children.py +969 -0
  36. tests/test_elements_worker/test_element_list_parents.py +530 -0
  37. tests/test_elements_worker/{test_entities.py → test_entity_create.py} +37 -195
  38. tests/test_elements_worker/test_entity_list_and_check.py +160 -0
  39. tests/test_elements_worker/test_image.py +66 -0
  40. tests/test_elements_worker/test_metadata.py +252 -161
  41. tests/test_elements_worker/test_process.py +89 -0
  42. tests/test_elements_worker/test_task.py +8 -18
  43. tests/test_elements_worker/test_training.py +17 -8
  44. tests/test_elements_worker/test_transcription_create.py +873 -0
  45. tests/test_elements_worker/test_transcription_create_with_elements.py +951 -0
  46. tests/test_elements_worker/test_transcription_list.py +450 -0
  47. tests/test_elements_worker/test_version.py +60 -0
  48. tests/test_elements_worker/test_worker.py +578 -293
  49. tests/test_image.py +542 -209
  50. tests/test_merge.py +1 -2
  51. tests/test_utils.py +89 -4
  52. worker-demo/tests/__init__.py +0 -0
  53. worker-demo/tests/conftest.py +32 -0
  54. worker-demo/tests/test_worker.py +12 -0
  55. worker-demo/worker_demo/__init__.py +6 -0
  56. worker-demo/worker_demo/worker.py +19 -0
  57. arkindex_base_worker-0.3.7rc4.dist-info/RECORD +0 -41
  58. tests/test_elements_worker/test_elements.py +0 -2713
  59. tests/test_elements_worker/test_transcriptions.py +0 -2119
  60. {arkindex_base_worker-0.3.7rc4.dist-info → arkindex_base_worker-0.5.0a1.dist-info}/LICENSE +0 -0
@@ -1,89 +1,573 @@
1
1
  import json
2
+ import logging
2
3
  import sys
4
+ from argparse import Namespace
5
+ from uuid import UUID
3
6
 
4
7
  import pytest
5
- from apistar.exceptions import ErrorResponse
6
8
 
7
- from arkindex_worker.cache import CachedElement
9
+ from arkindex.exceptions import ErrorResponse
10
+ from arkindex_worker.cache import (
11
+ SQL_VERSION,
12
+ CachedElement,
13
+ create_version_table,
14
+ init_cache_db,
15
+ )
16
+ from arkindex_worker.models import Element
8
17
  from arkindex_worker.worker import ActivityState, ElementsWorker
18
+ from arkindex_worker.worker.dataset import DatasetState
19
+ from arkindex_worker.worker.process import ProcessMode
20
+ from tests import PROCESS_ID
9
21
 
10
22
  from . import BASE_API_CALLS
11
23
 
12
- TEST_VERSION_ID = "test_123"
13
- TEST_SLUG = "some_slug"
14
24
 
25
+ def test_database_arg(mocker, mock_elements_worker, tmp_path):
26
+ database_path = tmp_path / "my_database.sqlite"
27
+ init_cache_db(database_path)
28
+ create_version_table()
29
+
30
+ mocker.patch(
31
+ "arkindex_worker.worker.base.argparse.ArgumentParser.parse_args",
32
+ return_value=Namespace(
33
+ element=["volumeid", "pageid"],
34
+ verbose=False,
35
+ elements_list=None,
36
+ database=database_path,
37
+ dev=False,
38
+ set=[],
39
+ ),
40
+ )
41
+
42
+ worker = ElementsWorker(support_cache=True)
43
+ worker.configure()
44
+
45
+ assert worker.use_cache is True
46
+ assert worker.cache_path == database_path
47
+
48
+
49
+ def test_database_arg_cache_missing_version_table(
50
+ mocker, mock_elements_worker, tmp_path
51
+ ):
52
+ database_path = tmp_path / "my_database.sqlite"
53
+ database_path.touch()
54
+
55
+ mocker.patch(
56
+ "arkindex_worker.worker.base.argparse.ArgumentParser.parse_args",
57
+ return_value=Namespace(
58
+ element=["volumeid", "pageid"],
59
+ verbose=False,
60
+ elements_list=None,
61
+ database=database_path,
62
+ dev=False,
63
+ set=[],
64
+ ),
65
+ )
66
+
67
+ worker = ElementsWorker(support_cache=True)
68
+ with pytest.raises(
69
+ AssertionError,
70
+ match=f"The SQLite database {database_path} does not have the correct cache version, it should be {SQL_VERSION}",
71
+ ):
72
+ worker.configure()
15
73
 
16
- def test_get_worker_version(fake_dummy_worker):
17
- api_client = fake_dummy_worker.api_client
18
74
 
19
- response = {"worker": {"slug": TEST_SLUG}}
75
+ def test_readonly(responses, mock_elements_worker):
76
+ """Test readonly worker does not trigger any API calls"""
77
+
78
+ # Setup the worker as read-only
79
+ mock_elements_worker.worker_run_id = None
80
+ assert mock_elements_worker.is_read_only is True
20
81
 
21
- api_client.add_response("RetrieveWorkerVersion", response, id=TEST_VERSION_ID)
82
+ out = mock_elements_worker.update_activity("1234-deadbeef", ActivityState.Processed)
22
83
 
23
- with pytest.deprecated_call(match="WorkerVersion usage is deprecated."):
24
- res = fake_dummy_worker.get_worker_version(TEST_VERSION_ID)
84
+ # update_activity returns False in very specific cases
85
+ assert out is True
86
+ assert len(responses.calls) == len(BASE_API_CALLS)
87
+ assert [
88
+ (call.request.method, call.request.url) for call in responses.calls
89
+ ] == BASE_API_CALLS
25
90
 
26
- assert res == response
27
- assert fake_dummy_worker._worker_version_cache[TEST_VERSION_ID] == response
28
91
 
92
+ def test_get_elements_elements_list_arg_wrong_type(
93
+ monkeypatch, tmp_path, mock_elements_worker
94
+ ):
95
+ elements_path = tmp_path / "elements.json"
96
+ elements_path.write_text("{}")
29
97
 
30
- def test_get_worker_version__uses_cache(fake_dummy_worker):
31
- api_client = fake_dummy_worker.api_client
98
+ monkeypatch.setenv("TASK_ELEMENTS", str(elements_path))
99
+ worker = ElementsWorker()
100
+ worker.configure()
32
101
 
33
- response = {"worker": {"slug": TEST_SLUG}}
102
+ with pytest.raises(AssertionError, match="Elements list must be a list"):
103
+ worker.get_elements()
34
104
 
35
- api_client.add_response("RetrieveWorkerVersion", response, id=TEST_VERSION_ID)
36
105
 
37
- with pytest.deprecated_call(match="WorkerVersion usage is deprecated."):
38
- response_1 = fake_dummy_worker.get_worker_version(TEST_VERSION_ID)
106
+ def test_get_elements_elements_list_arg_empty_list(
107
+ monkeypatch, tmp_path, mock_elements_worker
108
+ ):
109
+ elements_path = tmp_path / "elements.json"
110
+ elements_path.write_text("[]")
39
111
 
40
- with pytest.deprecated_call(match="WorkerVersion usage is deprecated."):
41
- response_2 = fake_dummy_worker.get_worker_version(TEST_VERSION_ID)
112
+ monkeypatch.setenv("TASK_ELEMENTS", str(elements_path))
113
+ worker = ElementsWorker()
114
+ worker.configure()
42
115
 
43
- assert response_1 == response
44
- assert response_1 == response_2
116
+ with pytest.raises(AssertionError, match="No elements in elements list"):
117
+ worker.get_elements()
45
118
 
46
- # assert that only one call to the API
47
- assert len(api_client.history) == 1
48
- assert not api_client.responses
49
119
 
120
+ def test_get_elements_elements_list_arg_missing_id(
121
+ monkeypatch, tmp_path, mock_elements_worker
122
+ ):
123
+ elements_path = tmp_path / "elements.json"
124
+ elements_path.write_text(json.dumps([{"type": "volume"}]))
50
125
 
51
- def test_get_worker_version_slug(mocker, fake_dummy_worker):
52
- fake_dummy_worker.get_worker_version = mocker.MagicMock()
53
- fake_dummy_worker.get_worker_version.return_value = {
54
- "id": TEST_VERSION_ID,
55
- "worker": {"slug": "mock_slug"},
56
- }
126
+ monkeypatch.setenv("TASK_ELEMENTS", str(elements_path))
127
+ worker = ElementsWorker()
128
+ worker.configure()
57
129
 
58
- with pytest.deprecated_call(match="WorkerVersion usage is deprecated."):
59
- slug = fake_dummy_worker.get_worker_version_slug(TEST_VERSION_ID)
60
- assert slug == "mock_slug"
130
+ elt_list = worker.get_elements()
61
131
 
132
+ assert elt_list == []
62
133
 
63
- def test_get_worker_version_slug_none(fake_dummy_worker):
64
- # WARNING: pytest.deprecated_call must be placed BEFORE pytest.raises, otherwise `match` argument won't be checked
65
- with (
66
- pytest.deprecated_call(match="WorkerVersion usage is deprecated."),
67
- pytest.raises(ValueError, match="No worker version ID"),
134
+
135
+ def test_get_elements_elements_list_arg_not_uuid(
136
+ monkeypatch, tmp_path, mock_elements_worker
137
+ ):
138
+ elements_path = tmp_path / "elements.json"
139
+ elements_path.write_text(
140
+ json.dumps(
141
+ [
142
+ {"id": "volumeid", "type": "volume"},
143
+ {"id": "pageid", "type": "page"},
144
+ {"id": "actid", "type": "act"},
145
+ {"id": "surfaceid", "type": "surface"},
146
+ ]
147
+ )
148
+ )
149
+
150
+ monkeypatch.setenv("TASK_ELEMENTS", str(elements_path))
151
+ worker = ElementsWorker()
152
+ worker.configure()
153
+
154
+ with pytest.raises(
155
+ Exception,
156
+ match="These element IDs are invalid: volumeid, pageid, actid, surfaceid",
68
157
  ):
69
- fake_dummy_worker.get_worker_version_slug(None)
158
+ worker.get_elements()
159
+
160
+
161
+ def test_get_elements_elements_list_arg(monkeypatch, tmp_path, mock_elements_worker):
162
+ elements_path = tmp_path / "elements.json"
163
+ elements_path.write_text(
164
+ json.dumps(
165
+ [
166
+ {"id": "11111111-1111-1111-1111-111111111111", "type": "volume"},
167
+ {"id": "22222222-2222-2222-2222-222222222222", "type": "page"},
168
+ {"id": "33333333-3333-3333-3333-333333333333", "type": "act"},
169
+ ]
170
+ )
171
+ )
70
172
 
173
+ monkeypatch.setenv("TASK_ELEMENTS", str(elements_path))
174
+ worker = ElementsWorker()
175
+ worker.configure()
71
176
 
72
- def test_readonly(responses, mock_elements_worker):
73
- """Test readonly worker does not trigger any API calls"""
177
+ elt_list = worker.get_elements()
74
178
 
75
- # Setup the worker as read-only
76
- mock_elements_worker.worker_run_id = None
77
- assert mock_elements_worker.is_read_only is True
179
+ assert elt_list == [
180
+ "11111111-1111-1111-1111-111111111111",
181
+ "22222222-2222-2222-2222-222222222222",
182
+ "33333333-3333-3333-3333-333333333333",
183
+ ]
78
184
 
79
- out = mock_elements_worker.update_activity("1234-deadbeef", ActivityState.Processed)
80
185
 
81
- # update_activity returns False in very specific cases
82
- assert out is True
83
- assert len(responses.calls) == len(BASE_API_CALLS)
84
- assert [
85
- (call.request.method, call.request.url) for call in responses.calls
86
- ] == BASE_API_CALLS
186
+ def test_get_elements_element_arg_not_uuid(mocker, mock_elements_worker):
187
+ mocker.patch(
188
+ "arkindex_worker.worker.base.argparse.ArgumentParser.parse_args",
189
+ return_value=Namespace(
190
+ element=["volumeid", "pageid"],
191
+ config={},
192
+ verbose=False,
193
+ elements_list=None,
194
+ database=None,
195
+ dev=True,
196
+ set=[],
197
+ ),
198
+ )
199
+
200
+ worker = ElementsWorker()
201
+ worker.configure()
202
+
203
+ with pytest.raises(
204
+ Exception, match="These element IDs are invalid: volumeid, pageid"
205
+ ):
206
+ worker.get_elements()
207
+
208
+
209
+ def test_get_elements_element_arg(mocker, mock_elements_worker):
210
+ mocker.patch(
211
+ "arkindex_worker.worker.base.argparse.ArgumentParser.parse_args",
212
+ return_value=Namespace(
213
+ element=[
214
+ "11111111-1111-1111-1111-111111111111",
215
+ "22222222-2222-2222-2222-222222222222",
216
+ ],
217
+ config={},
218
+ verbose=False,
219
+ elements_list=None,
220
+ database=None,
221
+ dev=True,
222
+ set=[],
223
+ ),
224
+ )
225
+
226
+ worker = ElementsWorker()
227
+ worker.configure()
228
+
229
+ elt_list = worker.get_elements()
230
+
231
+ assert elt_list == [
232
+ "11111111-1111-1111-1111-111111111111",
233
+ "22222222-2222-2222-2222-222222222222",
234
+ ]
235
+
236
+
237
+ def test_get_elements_dataset_set_arg(responses, mocker, mock_elements_worker):
238
+ mocker.patch(
239
+ "arkindex_worker.worker.base.argparse.ArgumentParser.parse_args",
240
+ return_value=Namespace(
241
+ element=[],
242
+ config={},
243
+ verbose=False,
244
+ elements_list=None,
245
+ database=None,
246
+ dev=True,
247
+ set=[(UUID("11111111-1111-1111-1111-111111111111"), "train")],
248
+ ),
249
+ )
250
+
251
+ # Mock RetrieveDataset call
252
+ responses.add(
253
+ responses.GET,
254
+ "http://testserver/api/v1/datasets/11111111-1111-1111-1111-111111111111/",
255
+ status=200,
256
+ json={
257
+ "id": "11111111-1111-1111-1111-111111111111",
258
+ "name": "My dataset",
259
+ "description": "A dataset about cats.",
260
+ "sets": ["train", "dev", "test"],
261
+ "state": DatasetState.Complete.value,
262
+ },
263
+ content_type="application/json",
264
+ )
265
+
266
+ # Mock ListSetElements call
267
+ element = {
268
+ "id": "22222222-2222-2222-2222-222222222222",
269
+ "type": "page",
270
+ "name": "1",
271
+ "corpus": {
272
+ "id": "11111111-1111-1111-1111-111111111111",
273
+ },
274
+ "thumbnail_url": "http://example.com",
275
+ "zone": {
276
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
277
+ "polygon": [[0, 0], [0, 0], [0, 0]],
278
+ "image": {
279
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
280
+ "path": "string",
281
+ "width": 0,
282
+ "height": 0,
283
+ "url": "http://example.com",
284
+ "s3_url": "string",
285
+ "status": "checked",
286
+ "server": {
287
+ "display_name": "string",
288
+ "url": "http://example.com",
289
+ "max_width": 2147483647,
290
+ "max_height": 2147483647,
291
+ },
292
+ },
293
+ "url": "http://example.com",
294
+ },
295
+ "rotation_angle": 0,
296
+ "mirrored": False,
297
+ "created": "2019-08-24T14:15:22Z",
298
+ "classes": [
299
+ {
300
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
301
+ "ml_class": {
302
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
303
+ "name": "string",
304
+ },
305
+ "state": "pending",
306
+ "confidence": 0,
307
+ "high_confidence": True,
308
+ "worker_run": {
309
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
310
+ "summary": "string",
311
+ },
312
+ }
313
+ ],
314
+ "metadata": [
315
+ {
316
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
317
+ "type": "text",
318
+ "name": "string",
319
+ "value": "string",
320
+ "dates": [{"type": "exact", "year": 0, "month": 1, "day": 1}],
321
+ }
322
+ ],
323
+ "transcriptions": [
324
+ {
325
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
326
+ "text": "string",
327
+ "confidence": 0,
328
+ "orientation": "horizontal-lr",
329
+ "worker_run": {
330
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
331
+ "summary": "string",
332
+ },
333
+ }
334
+ ],
335
+ "has_children": True,
336
+ "worker_run": {
337
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
338
+ "summary": "string",
339
+ },
340
+ "confidence": 1,
341
+ }
342
+ responses.add(
343
+ responses.GET,
344
+ "http://testserver/api/v1/datasets/11111111-1111-1111-1111-111111111111/elements/?set=train&with_count=true",
345
+ status=200,
346
+ json={
347
+ "next": None,
348
+ "previous": None,
349
+ "results": [
350
+ {
351
+ "set": "train",
352
+ "element": element,
353
+ }
354
+ ],
355
+ "count": 1,
356
+ },
357
+ content_type="application/json",
358
+ )
359
+
360
+ worker = ElementsWorker()
361
+ worker.configure()
362
+
363
+ elt_list = worker.get_elements()
364
+
365
+ assert elt_list == [
366
+ Element(**element),
367
+ ]
368
+
369
+
370
+ def test_get_elements_dataset_set_api(responses, mocker, mock_elements_worker):
371
+ # Mock ListProcessSets call
372
+ responses.add(
373
+ responses.GET,
374
+ "http://testserver/api/v1/process/aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff/sets/",
375
+ status=200,
376
+ json={
377
+ "next": None,
378
+ "previous": None,
379
+ "results": [
380
+ {
381
+ "id": "33333333-3333-3333-3333-333333333333",
382
+ "dataset": {"id": "11111111-1111-1111-1111-111111111111"},
383
+ "set_name": "train",
384
+ }
385
+ ],
386
+ "count": 1,
387
+ },
388
+ content_type="application/json",
389
+ )
390
+
391
+ # Mock ListSetElements call
392
+ element = {
393
+ "id": "22222222-2222-2222-2222-222222222222",
394
+ "type": "page",
395
+ "name": "1",
396
+ "corpus": {
397
+ "id": "11111111-1111-1111-1111-111111111111",
398
+ },
399
+ "thumbnail_url": "http://example.com",
400
+ "zone": {
401
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
402
+ "polygon": [[0, 0], [0, 0], [0, 0]],
403
+ "image": {
404
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
405
+ "path": "string",
406
+ "width": 0,
407
+ "height": 0,
408
+ "url": "http://example.com",
409
+ "s3_url": "string",
410
+ "status": "checked",
411
+ "server": {
412
+ "display_name": "string",
413
+ "url": "http://example.com",
414
+ "max_width": 2147483647,
415
+ "max_height": 2147483647,
416
+ },
417
+ },
418
+ "url": "http://example.com",
419
+ },
420
+ "rotation_angle": 0,
421
+ "mirrored": False,
422
+ "created": "2019-08-24T14:15:22Z",
423
+ "classes": [
424
+ {
425
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
426
+ "ml_class": {
427
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
428
+ "name": "string",
429
+ },
430
+ "state": "pending",
431
+ "confidence": 0,
432
+ "high_confidence": True,
433
+ "worker_run": {
434
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
435
+ "summary": "string",
436
+ },
437
+ }
438
+ ],
439
+ "metadata": [
440
+ {
441
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
442
+ "type": "text",
443
+ "name": "string",
444
+ "value": "string",
445
+ "dates": [{"type": "exact", "year": 0, "month": 1, "day": 1}],
446
+ }
447
+ ],
448
+ "transcriptions": [
449
+ {
450
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
451
+ "text": "string",
452
+ "confidence": 0,
453
+ "orientation": "horizontal-lr",
454
+ "worker_run": {
455
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
456
+ "summary": "string",
457
+ },
458
+ }
459
+ ],
460
+ "has_children": True,
461
+ "worker_run": {
462
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
463
+ "summary": "string",
464
+ },
465
+ "confidence": 1,
466
+ }
467
+ responses.add(
468
+ responses.GET,
469
+ "http://testserver/api/v1/datasets/11111111-1111-1111-1111-111111111111/elements/?set=train&with_count=true",
470
+ status=200,
471
+ json={
472
+ "next": None,
473
+ "previous": None,
474
+ "results": [
475
+ {
476
+ "set": "train",
477
+ "element": element,
478
+ }
479
+ ],
480
+ "count": 1,
481
+ },
482
+ content_type="application/json",
483
+ )
484
+
485
+ # Update ProcessMode to Dataset
486
+ mock_elements_worker.process_information["mode"] = ProcessMode.Dataset
487
+
488
+ elt_list = mock_elements_worker.get_elements()
489
+
490
+ assert elt_list == [
491
+ Element(**element),
492
+ ]
493
+
494
+
495
+ def test_get_elements_both_args_error(mocker, mock_elements_worker, tmp_path):
496
+ elements_path = tmp_path / "elements.json"
497
+ elements_path.write_text(
498
+ json.dumps(
499
+ [
500
+ {"id": "volumeid", "type": "volume"},
501
+ {"id": "pageid", "type": "page"},
502
+ {"id": "actid", "type": "act"},
503
+ {"id": "surfaceid", "type": "surface"},
504
+ ]
505
+ )
506
+ )
507
+ mocker.patch(
508
+ "arkindex_worker.worker.base.argparse.ArgumentParser.parse_args",
509
+ return_value=Namespace(
510
+ element=["anotherid", "againanotherid"],
511
+ verbose=False,
512
+ elements_list=elements_path.open(),
513
+ database=None,
514
+ dev=False,
515
+ set=[],
516
+ ),
517
+ )
518
+
519
+ worker = ElementsWorker()
520
+ worker.configure()
521
+
522
+ with pytest.raises(
523
+ AssertionError, match="elements-list and element CLI args shouldn't be both set"
524
+ ):
525
+ worker.get_elements()
526
+
527
+
528
+ def test_get_elements_export_process(mock_elements_worker, responses):
529
+ responses.add(
530
+ responses.GET,
531
+ f"http://testserver/api/v1/process/{PROCESS_ID}/elements/?page_size=500&with_count=true&with_image=False",
532
+ status=200,
533
+ json={
534
+ "count": 2,
535
+ "next": None,
536
+ "results": [
537
+ {
538
+ "id": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
539
+ "type_id": "baaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
540
+ "name": "element 1",
541
+ "confidence": 1,
542
+ "image_id": None,
543
+ "image_width": None,
544
+ "image_height": None,
545
+ "image_url": None,
546
+ "polygon": None,
547
+ "rotation_angle": 0,
548
+ "mirrored": False,
549
+ },
550
+ {
551
+ "id": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa0",
552
+ "type_id": "baaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
553
+ "name": "element 2",
554
+ "confidence": 1,
555
+ "image_id": None,
556
+ "image_width": None,
557
+ "image_height": None,
558
+ "image_url": None,
559
+ "polygon": None,
560
+ "rotation_angle": 0,
561
+ "mirrored": False,
562
+ },
563
+ ],
564
+ },
565
+ )
566
+ mock_elements_worker.process_information["mode"] = "export"
567
+ assert set(mock_elements_worker.get_elements()) == {
568
+ "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
569
+ "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa0",
570
+ }
87
571
 
88
572
 
89
573
  @pytest.mark.usefixtures("_mock_worker_run_api")
@@ -114,43 +598,6 @@ def test_activities_dev_mode(mocker):
114
598
  assert worker.store_activity is False
115
599
 
116
600
 
117
- @pytest.mark.usefixtures("_mock_worker_run_api")
118
- def test_update_call(responses, mock_elements_worker):
119
- """Test an update call with feature enabled triggers an API call"""
120
- responses.add(
121
- responses.PUT,
122
- "http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
123
- status=200,
124
- json={
125
- "element_id": "1234-deadbeef",
126
- "process_id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
127
- "state": "processed",
128
- },
129
- )
130
-
131
- out = mock_elements_worker.update_activity("1234-deadbeef", ActivityState.Processed)
132
-
133
- # Check the response received by worker
134
- assert out is True
135
-
136
- assert len(responses.calls) == len(BASE_API_CALLS) + 1
137
- assert [
138
- (call.request.method, call.request.url) for call in responses.calls
139
- ] == BASE_API_CALLS + [
140
- (
141
- "PUT",
142
- "http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
143
- ),
144
- ]
145
-
146
- # Check the request sent by worker
147
- assert json.loads(responses.calls[-1].request.body) == {
148
- "element_id": "1234-deadbeef",
149
- "process_id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
150
- "state": "processed",
151
- }
152
-
153
-
154
601
  @pytest.mark.usefixtures("_mock_activity_calls")
155
602
  @pytest.mark.parametrize(
156
603
  ("process_exception", "final_state"),
@@ -239,7 +686,7 @@ def test_run_cache(monkeypatch, mocker, mock_elements_worker_with_cache):
239
686
 
240
687
 
241
688
  def test_start_activity_conflict(
242
- monkeypatch, responses, mocker, mock_elements_worker_with_list
689
+ monkeypatch, responses, mock_elements_worker_with_list, caplog
243
690
  ):
244
691
  # Disable second configure call from run()
245
692
  monkeypatch.setattr(mock_elements_worker_with_list, "configure", lambda: None)
@@ -254,9 +701,6 @@ def test_start_activity_conflict(
254
701
  content="Either this activity does not exists or this state is not allowed.",
255
702
  ),
256
703
  )
257
- from arkindex_worker.worker import logger
258
-
259
- logger.info = mocker.MagicMock()
260
704
 
261
705
  mock_elements_worker_with_list.run()
262
706
 
@@ -270,14 +714,15 @@ def test_start_activity_conflict(
270
714
  "http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
271
715
  ),
272
716
  ]
273
- assert logger.info.call_args_list[:2] == [
274
- mocker.call("Processing page Test Page n°1 (1234-deadbeef) (1/1)"),
275
- mocker.call("Skipping element 1234-deadbeef as it was already processed"),
717
+ assert [(record.levelno, record.message) for record in caplog.records] == [
718
+ (logging.INFO, "Processing page Test Page n°1 (1234-deadbeef) (1/1)"),
719
+ (logging.INFO, "Skipping element 1234-deadbeef as it was already processed"),
720
+ (logging.INFO, "Ran on 1 element: 1 completed, 0 failed"),
276
721
  ]
277
722
 
278
723
 
279
724
  def test_start_activity_error(
280
- monkeypatch, responses, mocker, mock_elements_worker_with_list
725
+ monkeypatch, responses, mock_elements_worker_with_list, caplog
281
726
  ):
282
727
  # Disable second configure call from run()
283
728
  monkeypatch.setattr(mock_elements_worker_with_list, "configure", lambda: None)
@@ -286,11 +731,8 @@ def test_start_activity_error(
286
731
  responses.add(
287
732
  responses.PUT,
288
733
  "http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
289
- body=Exception("A wild Petilil appears !"),
734
+ body=Exception("A wild Petilil appears!"),
290
735
  )
291
- from arkindex_worker.worker import logger
292
-
293
- logger.error = mocker.MagicMock()
294
736
 
295
737
  with pytest.raises(SystemExit):
296
738
  mock_elements_worker_with_list.run()
@@ -309,205 +751,48 @@ def test_start_activity_error(
309
751
  "http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
310
752
  ),
311
753
  ]
312
- assert logger.error.call_args_list == [
313
- mocker.call("Ran on 1 element: 0 completed, 1 failed")
314
- ]
315
-
316
-
317
- @pytest.mark.parametrize(
318
- (
319
- "wk_version_config",
320
- "wk_version_user_config",
321
- "frontend_user_config",
322
- "model_config",
323
- "expected_config",
324
- ),
325
- [
326
- ({}, {}, {}, {}, {}),
327
- # Keep parameters from worker version configuration
328
- ({"parameter": 0}, {}, {}, {}, {"parameter": 0}),
329
- # Keep parameters from worker version configuration + user_config defaults
754
+ assert [(record.levelno, record.message) for record in caplog.records] == [
755
+ (logging.INFO, "Processing page Test Page n°1 (1234-deadbeef) (1/1)"),
330
756
  (
331
- {"parameter": 0},
332
- {
333
- "parameter2": {
334
- "type": "int",
335
- "title": "Lambda",
336
- "default": 0,
337
- "required": False,
338
- }
339
- },
340
- {},
341
- {},
342
- {"parameter": 0, "parameter2": 0},
757
+ logging.WARNING,
758
+ "Failed running worker on element 1234-deadbeef: Exception('A wild Petilil appears!')",
343
759
  ),
344
- # Keep parameters from worker version configuration + user_config no defaults
345
- (
346
- {"parameter": 0},
347
- {
348
- "parameter2": {
349
- "type": "int",
350
- "title": "Lambda",
351
- "required": False,
352
- }
353
- },
354
- {},
355
- {},
356
- {"parameter": 0},
357
- ),
358
- # Keep parameters from worker version configuration but user_config defaults overrides
359
- (
360
- {"parameter": 0},
361
- {
362
- "parameter": {
363
- "type": "int",
364
- "title": "Lambda",
365
- "default": 1,
366
- "required": False,
367
- }
368
- },
369
- {},
370
- {},
371
- {"parameter": 1},
372
- ),
373
- # Keep parameters from worker version configuration + frontend config
374
- (
375
- {"parameter": 0},
376
- {},
377
- {"parameter2": 0},
378
- {},
379
- {"parameter": 0, "parameter2": 0},
380
- ),
381
- # Keep parameters from worker version configuration + frontend config overrides
382
- ({"parameter": 0}, {}, {"parameter": 1}, {}, {"parameter": 1}),
383
- # Keep parameters from worker version configuration + model config
384
- (
385
- {"parameter": 0},
386
- {},
387
- {},
388
- {"parameter2": 0},
389
- {"parameter": 0, "parameter2": 0},
390
- ),
391
- # Keep parameters from worker version configuration + model config overrides
392
- ({"parameter": 0}, {}, {}, {"parameter": 1}, {"parameter": 1}),
393
- # Keep parameters from worker version configuration + user_config default + model config overrides
394
- (
395
- {"parameter": 0},
396
- {
397
- "parameter": {
398
- "type": "int",
399
- "title": "Lambda",
400
- "default": 1,
401
- "required": False,
402
- }
403
- },
404
- {},
405
- {"parameter": 2},
406
- {"parameter": 2},
407
- ),
408
- # Keep parameters from worker version configuration + model config + frontend config overrides
409
- ({"parameter": 0}, {}, {"parameter": 2}, {"parameter": 1}, {"parameter": 2}),
410
- # Keep parameters from worker version configuration + user_config default + model config + frontend config overrides all
411
- (
412
- {"parameter": 0},
413
- {
414
- "parameter": {
415
- "type": "int",
416
- "title": "Lambda",
417
- "default": 1,
418
- "required": False,
419
- }
420
- },
421
- {"parameter": 3},
422
- {"parameter": 2},
423
- {"parameter": 3},
424
- ),
425
- ],
426
- )
427
- def test_worker_config_multiple_source(
428
- monkeypatch,
429
- responses,
430
- wk_version_config,
431
- wk_version_user_config,
432
- frontend_user_config,
433
- model_config,
434
- expected_config,
435
- ):
436
- # Compute WorkerRun info
437
- payload = {
438
- "id": "56785678-5678-5678-5678-567856785678",
439
- "parents": [],
440
- "worker_version": {
441
- "id": "12341234-1234-1234-1234-123412341234",
442
- "configuration": {
443
- "docker": {"image": "python:3"},
444
- "configuration": wk_version_config,
445
- "secrets": [],
446
- "user_configuration": wk_version_user_config,
447
- },
448
- "revision": {
449
- "hash": "deadbeef1234",
450
- "name": "some git revision",
451
- },
452
- "docker_image": "python:3",
453
- "docker_image_name": "python:3",
454
- "state": "created",
455
- "worker": {
456
- "id": "deadbeef-1234-5678-1234-worker",
457
- "name": "Fake worker",
458
- "slug": "fake_worker",
459
- "type": "classifier",
460
- },
461
- },
462
- "configuration": {
463
- "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
464
- "name": "Configuration entered by user",
465
- "configuration": frontend_user_config,
466
- },
467
- "model_version": {
468
- "id": "12341234-1234-1234-1234-123412341234",
469
- "name": "Model version 1337",
470
- "configuration": model_config,
471
- "model": {
472
- "id": "hahahaha-haha-haha-haha-hahahahahaha",
473
- "name": "My model",
474
- },
475
- },
476
- "process": {
477
- "name": None,
478
- "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
479
- "state": "running",
480
- "mode": "workers",
481
- "corpus": "11111111-1111-1111-1111-111111111111",
482
- "use_cache": False,
483
- "activity_state": "ready",
484
- "model_id": None,
485
- "train_folder_id": None,
486
- "validation_folder_id": None,
487
- "test_folder_id": None,
488
- },
489
- "summary": "Worker Fake worker @ 123412",
490
- }
760
+ (logging.ERROR, "Ran on 1 element: 0 completed, 1 failed"),
761
+ ]
762
+
491
763
 
764
+ @pytest.mark.usefixtures("_mock_worker_run_api")
765
+ def test_update_activity(responses, mock_elements_worker):
766
+ """Test an update call with feature enabled triggers an API call"""
492
767
  responses.add(
493
- responses.GET,
494
- "http://testserver/api/v1/process/workers/56785678-5678-5678-5678-567856785678/",
768
+ responses.PUT,
769
+ "http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
495
770
  status=200,
496
- body=json.dumps(payload),
497
- content_type="application/json",
771
+ json={
772
+ "element_id": "1234-deadbeef",
773
+ "process_id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
774
+ "state": "processed",
775
+ },
498
776
  )
499
777
 
500
- # Create and configure a worker
501
- monkeypatch.setattr(sys, "argv", ["worker"])
502
- worker = ElementsWorker()
503
- worker.configure()
778
+ out = mock_elements_worker.update_activity("1234-deadbeef", ActivityState.Processed)
504
779
 
505
- # Do what people do with a model configuration
506
- if worker.model_configuration:
507
- worker.config.update(worker.model_configuration)
780
+ # Check the response received by worker
781
+ assert out is True
508
782
 
509
- if worker.user_configuration:
510
- worker.config.update(worker.user_configuration)
783
+ assert len(responses.calls) == len(BASE_API_CALLS) + 1
784
+ assert [
785
+ (call.request.method, call.request.url) for call in responses.calls
786
+ ] == BASE_API_CALLS + [
787
+ (
788
+ "PUT",
789
+ "http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
790
+ ),
791
+ ]
511
792
 
512
- # Check final config
513
- assert worker.config == expected_config
793
+ # Check the request sent by worker
794
+ assert json.loads(responses.calls[-1].request.body) == {
795
+ "element_id": "1234-deadbeef",
796
+ "process_id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
797
+ "state": "processed",
798
+ }