arkindex-base-worker 0.4.0b3__py3-none-any.whl → 0.4.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,7 +17,9 @@ from arkindex_worker.cache import (
17
17
  from arkindex_worker.models import Element
18
18
  from arkindex_worker.utils import DEFAULT_BATCH_SIZE
19
19
  from arkindex_worker.worker import ElementsWorker
20
+ from arkindex_worker.worker.dataset import DatasetState
20
21
  from arkindex_worker.worker.element import MissingTypeError
22
+ from arkindex_worker.worker.process import ProcessMode
21
23
  from tests import CORPUS_ID
22
24
 
23
25
  from . import BASE_API_CALLS
@@ -109,7 +111,7 @@ def test_create_missing_types(responses, mock_elements_worker):
109
111
  )
110
112
 
111
113
 
112
- def test_list_elements_elements_list_arg_wrong_type(
114
+ def test_get_elements_elements_list_arg_wrong_type(
113
115
  monkeypatch, tmp_path, mock_elements_worker
114
116
  ):
115
117
  elements_path = tmp_path / "elements.json"
@@ -120,10 +122,10 @@ def test_list_elements_elements_list_arg_wrong_type(
120
122
  worker.configure()
121
123
 
122
124
  with pytest.raises(AssertionError, match="Elements list must be a list"):
123
- worker.list_elements()
125
+ worker.get_elements()
124
126
 
125
127
 
126
- def test_list_elements_elements_list_arg_empty_list(
128
+ def test_get_elements_elements_list_arg_empty_list(
127
129
  monkeypatch, tmp_path, mock_elements_worker
128
130
  ):
129
131
  elements_path = tmp_path / "elements.json"
@@ -134,10 +136,10 @@ def test_list_elements_elements_list_arg_empty_list(
134
136
  worker.configure()
135
137
 
136
138
  with pytest.raises(AssertionError, match="No elements in elements list"):
137
- worker.list_elements()
139
+ worker.get_elements()
138
140
 
139
141
 
140
- def test_list_elements_elements_list_arg_missing_id(
142
+ def test_get_elements_elements_list_arg_missing_id(
141
143
  monkeypatch, tmp_path, mock_elements_worker
142
144
  ):
143
145
  elements_path = tmp_path / "elements.json"
@@ -147,12 +149,12 @@ def test_list_elements_elements_list_arg_missing_id(
147
149
  worker = ElementsWorker()
148
150
  worker.configure()
149
151
 
150
- elt_list = worker.list_elements()
152
+ elt_list = worker.get_elements()
151
153
 
152
154
  assert elt_list == []
153
155
 
154
156
 
155
- def test_list_elements_elements_list_arg_not_uuid(
157
+ def test_get_elements_elements_list_arg_not_uuid(
156
158
  monkeypatch, tmp_path, mock_elements_worker
157
159
  ):
158
160
  elements_path = tmp_path / "elements.json"
@@ -175,10 +177,10 @@ def test_list_elements_elements_list_arg_not_uuid(
175
177
  Exception,
176
178
  match="These element IDs are invalid: volumeid, pageid, actid, surfaceid",
177
179
  ):
178
- worker.list_elements()
180
+ worker.get_elements()
179
181
 
180
182
 
181
- def test_list_elements_elements_list_arg(monkeypatch, tmp_path, mock_elements_worker):
183
+ def test_get_elements_elements_list_arg(monkeypatch, tmp_path, mock_elements_worker):
182
184
  elements_path = tmp_path / "elements.json"
183
185
  elements_path.write_text(
184
186
  json.dumps(
@@ -194,7 +196,7 @@ def test_list_elements_elements_list_arg(monkeypatch, tmp_path, mock_elements_wo
194
196
  worker = ElementsWorker()
195
197
  worker.configure()
196
198
 
197
- elt_list = worker.list_elements()
199
+ elt_list = worker.get_elements()
198
200
 
199
201
  assert elt_list == [
200
202
  "11111111-1111-1111-1111-111111111111",
@@ -203,15 +205,17 @@ def test_list_elements_elements_list_arg(monkeypatch, tmp_path, mock_elements_wo
203
205
  ]
204
206
 
205
207
 
206
- def test_list_elements_element_arg_not_uuid(mocker, mock_elements_worker):
208
+ def test_get_elements_element_arg_not_uuid(mocker, mock_elements_worker):
207
209
  mocker.patch(
208
210
  "arkindex_worker.worker.base.argparse.ArgumentParser.parse_args",
209
211
  return_value=Namespace(
210
212
  element=["volumeid", "pageid"],
213
+ config={},
211
214
  verbose=False,
212
215
  elements_list=None,
213
216
  database=None,
214
- dev=False,
217
+ dev=True,
218
+ set=[],
215
219
  ),
216
220
  )
217
221
 
@@ -221,10 +225,10 @@ def test_list_elements_element_arg_not_uuid(mocker, mock_elements_worker):
221
225
  with pytest.raises(
222
226
  Exception, match="These element IDs are invalid: volumeid, pageid"
223
227
  ):
224
- worker.list_elements()
228
+ worker.get_elements()
225
229
 
226
230
 
227
- def test_list_elements_element_arg(mocker, mock_elements_worker):
231
+ def test_get_elements_element_arg(mocker, mock_elements_worker):
228
232
  mocker.patch(
229
233
  "arkindex_worker.worker.base.argparse.ArgumentParser.parse_args",
230
234
  return_value=Namespace(
@@ -232,17 +236,19 @@ def test_list_elements_element_arg(mocker, mock_elements_worker):
232
236
  "11111111-1111-1111-1111-111111111111",
233
237
  "22222222-2222-2222-2222-222222222222",
234
238
  ],
239
+ config={},
235
240
  verbose=False,
236
241
  elements_list=None,
237
242
  database=None,
238
- dev=False,
243
+ dev=True,
244
+ set=[],
239
245
  ),
240
246
  )
241
247
 
242
248
  worker = ElementsWorker()
243
249
  worker.configure()
244
250
 
245
- elt_list = worker.list_elements()
251
+ elt_list = worker.get_elements()
246
252
 
247
253
  assert elt_list == [
248
254
  "11111111-1111-1111-1111-111111111111",
@@ -250,7 +256,265 @@ def test_list_elements_element_arg(mocker, mock_elements_worker):
250
256
  ]
251
257
 
252
258
 
253
- def test_list_elements_both_args_error(mocker, mock_elements_worker, tmp_path):
259
+ def test_get_elements_dataset_set_arg(responses, mocker, mock_elements_worker):
260
+ mocker.patch(
261
+ "arkindex_worker.worker.base.argparse.ArgumentParser.parse_args",
262
+ return_value=Namespace(
263
+ element=[],
264
+ config={},
265
+ verbose=False,
266
+ elements_list=None,
267
+ database=None,
268
+ dev=True,
269
+ set=[(UUID("11111111-1111-1111-1111-111111111111"), "train")],
270
+ ),
271
+ )
272
+
273
+ # Mock RetrieveDataset call
274
+ responses.add(
275
+ responses.GET,
276
+ "http://testserver/api/v1/datasets/11111111-1111-1111-1111-111111111111/",
277
+ status=200,
278
+ json={
279
+ "id": "11111111-1111-1111-1111-111111111111",
280
+ "name": "My dataset",
281
+ "description": "A dataset about cats.",
282
+ "sets": ["train", "dev", "test"],
283
+ "state": DatasetState.Complete.value,
284
+ },
285
+ content_type="application/json",
286
+ )
287
+
288
+ # Mock ListSetElements call
289
+ element = {
290
+ "id": "22222222-2222-2222-2222-222222222222",
291
+ "type": "page",
292
+ "name": "1",
293
+ "corpus": {
294
+ "id": "11111111-1111-1111-1111-111111111111",
295
+ },
296
+ "thumbnail_url": "http://example.com",
297
+ "zone": {
298
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
299
+ "polygon": [[0, 0], [0, 0], [0, 0]],
300
+ "image": {
301
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
302
+ "path": "string",
303
+ "width": 0,
304
+ "height": 0,
305
+ "url": "http://example.com",
306
+ "s3_url": "string",
307
+ "status": "checked",
308
+ "server": {
309
+ "display_name": "string",
310
+ "url": "http://example.com",
311
+ "max_width": 2147483647,
312
+ "max_height": 2147483647,
313
+ },
314
+ },
315
+ "url": "http://example.com",
316
+ },
317
+ "rotation_angle": 0,
318
+ "mirrored": False,
319
+ "created": "2019-08-24T14:15:22Z",
320
+ "classes": [
321
+ {
322
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
323
+ "ml_class": {
324
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
325
+ "name": "string",
326
+ },
327
+ "state": "pending",
328
+ "confidence": 0,
329
+ "high_confidence": True,
330
+ "worker_run": {
331
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
332
+ "summary": "string",
333
+ },
334
+ }
335
+ ],
336
+ "metadata": [
337
+ {
338
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
339
+ "type": "text",
340
+ "name": "string",
341
+ "value": "string",
342
+ "dates": [{"type": "exact", "year": 0, "month": 1, "day": 1}],
343
+ }
344
+ ],
345
+ "transcriptions": [
346
+ {
347
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
348
+ "text": "string",
349
+ "confidence": 0,
350
+ "orientation": "horizontal-lr",
351
+ "worker_run": {
352
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
353
+ "summary": "string",
354
+ },
355
+ }
356
+ ],
357
+ "has_children": True,
358
+ "worker_run": {
359
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
360
+ "summary": "string",
361
+ },
362
+ "confidence": 1,
363
+ }
364
+ responses.add(
365
+ responses.GET,
366
+ "http://testserver/api/v1/datasets/11111111-1111-1111-1111-111111111111/elements/?set=train&with_count=true",
367
+ status=200,
368
+ json={
369
+ "next": None,
370
+ "previous": None,
371
+ "results": [
372
+ {
373
+ "set": "train",
374
+ "element": element,
375
+ }
376
+ ],
377
+ "count": 1,
378
+ },
379
+ content_type="application/json",
380
+ )
381
+
382
+ worker = ElementsWorker()
383
+ worker.configure()
384
+
385
+ elt_list = worker.get_elements()
386
+
387
+ assert elt_list == [
388
+ Element(**element),
389
+ ]
390
+
391
+
392
+ def test_get_elements_dataset_set_api(responses, mocker, mock_elements_worker):
393
+ # Mock ListProcessSets call
394
+ responses.add(
395
+ responses.GET,
396
+ "http://testserver/api/v1/process/aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff/sets/",
397
+ status=200,
398
+ json={
399
+ "next": None,
400
+ "previous": None,
401
+ "results": [
402
+ {
403
+ "id": "33333333-3333-3333-3333-333333333333",
404
+ "dataset": {"id": "11111111-1111-1111-1111-111111111111"},
405
+ "set_name": "train",
406
+ }
407
+ ],
408
+ "count": 1,
409
+ },
410
+ content_type="application/json",
411
+ )
412
+
413
+ # Mock ListSetElements call
414
+ element = {
415
+ "id": "22222222-2222-2222-2222-222222222222",
416
+ "type": "page",
417
+ "name": "1",
418
+ "corpus": {
419
+ "id": "11111111-1111-1111-1111-111111111111",
420
+ },
421
+ "thumbnail_url": "http://example.com",
422
+ "zone": {
423
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
424
+ "polygon": [[0, 0], [0, 0], [0, 0]],
425
+ "image": {
426
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
427
+ "path": "string",
428
+ "width": 0,
429
+ "height": 0,
430
+ "url": "http://example.com",
431
+ "s3_url": "string",
432
+ "status": "checked",
433
+ "server": {
434
+ "display_name": "string",
435
+ "url": "http://example.com",
436
+ "max_width": 2147483647,
437
+ "max_height": 2147483647,
438
+ },
439
+ },
440
+ "url": "http://example.com",
441
+ },
442
+ "rotation_angle": 0,
443
+ "mirrored": False,
444
+ "created": "2019-08-24T14:15:22Z",
445
+ "classes": [
446
+ {
447
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
448
+ "ml_class": {
449
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
450
+ "name": "string",
451
+ },
452
+ "state": "pending",
453
+ "confidence": 0,
454
+ "high_confidence": True,
455
+ "worker_run": {
456
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
457
+ "summary": "string",
458
+ },
459
+ }
460
+ ],
461
+ "metadata": [
462
+ {
463
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
464
+ "type": "text",
465
+ "name": "string",
466
+ "value": "string",
467
+ "dates": [{"type": "exact", "year": 0, "month": 1, "day": 1}],
468
+ }
469
+ ],
470
+ "transcriptions": [
471
+ {
472
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
473
+ "text": "string",
474
+ "confidence": 0,
475
+ "orientation": "horizontal-lr",
476
+ "worker_run": {
477
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
478
+ "summary": "string",
479
+ },
480
+ }
481
+ ],
482
+ "has_children": True,
483
+ "worker_run": {
484
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
485
+ "summary": "string",
486
+ },
487
+ "confidence": 1,
488
+ }
489
+ responses.add(
490
+ responses.GET,
491
+ "http://testserver/api/v1/datasets/11111111-1111-1111-1111-111111111111/elements/?set=train&with_count=true",
492
+ status=200,
493
+ json={
494
+ "next": None,
495
+ "previous": None,
496
+ "results": [
497
+ {
498
+ "set": "train",
499
+ "element": element,
500
+ }
501
+ ],
502
+ "count": 1,
503
+ },
504
+ content_type="application/json",
505
+ )
506
+
507
+ # Update ProcessMode to Dataset
508
+ mock_elements_worker.process_information["mode"] = ProcessMode.Dataset
509
+
510
+ elt_list = mock_elements_worker.get_elements()
511
+
512
+ assert elt_list == [
513
+ Element(**element),
514
+ ]
515
+
516
+
517
+ def test_get_elements_both_args_error(mocker, mock_elements_worker, tmp_path):
254
518
  elements_path = tmp_path / "elements.json"
255
519
  elements_path.write_text(
256
520
  json.dumps(
@@ -270,6 +534,7 @@ def test_list_elements_both_args_error(mocker, mock_elements_worker, tmp_path):
270
534
  elements_list=elements_path.open(),
271
535
  database=None,
272
536
  dev=False,
537
+ set=[],
273
538
  ),
274
539
  )
275
540
 
@@ -279,7 +544,7 @@ def test_list_elements_both_args_error(mocker, mock_elements_worker, tmp_path):
279
544
  with pytest.raises(
280
545
  AssertionError, match="elements-list and element CLI args shouldn't be both set"
281
546
  ):
282
- worker.list_elements()
547
+ worker.get_elements()
283
548
 
284
549
 
285
550
  def test_database_arg(mocker, mock_elements_worker, tmp_path):
@@ -295,6 +560,7 @@ def test_database_arg(mocker, mock_elements_worker, tmp_path):
295
560
  elements_list=None,
296
561
  database=database_path,
297
562
  dev=False,
563
+ set=[],
298
564
  ),
299
565
  )
300
566
 
@@ -319,6 +585,7 @@ def test_database_arg_cache_missing_version_table(
319
585
  elements_list=None,
320
586
  database=database_path,
321
587
  dev=False,
588
+ set=[],
322
589
  ),
323
590
  )
324
591
 
@@ -1958,6 +2225,433 @@ def test_partial_update_element_confidence(
1958
2225
  assert cached_element.confidence == confidence
1959
2226
 
1960
2227
 
2228
+ def test_list_elements_wrong_folder(mock_elements_worker):
2229
+ with pytest.raises(AssertionError, match="folder should be of type bool"):
2230
+ mock_elements_worker.list_elements(folder="not bool")
2231
+
2232
+
2233
+ def test_list_elements_wrong_name(mock_elements_worker):
2234
+ with pytest.raises(AssertionError, match="name should be of type str"):
2235
+ mock_elements_worker.list_elements(name=1234)
2236
+
2237
+
2238
+ def test_list_elements_wrong_top_level(mock_elements_worker):
2239
+ with pytest.raises(AssertionError, match="top_level should be of type bool"):
2240
+ mock_elements_worker.list_elements(top_level="not bool")
2241
+
2242
+
2243
+ def test_list_elements_wrong_type(mock_elements_worker):
2244
+ with pytest.raises(AssertionError, match="type should be of type str"):
2245
+ mock_elements_worker.list_elements(type=1234)
2246
+
2247
+
2248
+ def test_list_elements_wrong_with_classes(mock_elements_worker):
2249
+ with pytest.raises(AssertionError, match="with_classes should be of type bool"):
2250
+ mock_elements_worker.list_elements(with_classes="not bool")
2251
+
2252
+
2253
+ def test_list_elements_wrong_with_corpus(mock_elements_worker):
2254
+ with pytest.raises(AssertionError, match="with_corpus should be of type bool"):
2255
+ mock_elements_worker.list_elements(with_corpus="not bool")
2256
+
2257
+
2258
+ def test_list_elements_wrong_with_has_children(mock_elements_worker):
2259
+ with pytest.raises(
2260
+ AssertionError, match="with_has_children should be of type bool"
2261
+ ):
2262
+ mock_elements_worker.list_elements(with_has_children="not bool")
2263
+
2264
+
2265
+ def test_list_elements_wrong_with_zone(mock_elements_worker):
2266
+ with pytest.raises(AssertionError, match="with_zone should be of type bool"):
2267
+ mock_elements_worker.list_elements(with_zone="not bool")
2268
+
2269
+
2270
+ def test_list_elements_wrong_with_metadata(mock_elements_worker):
2271
+ with pytest.raises(AssertionError, match="with_metadata should be of type bool"):
2272
+ mock_elements_worker.list_elements(with_metadata="not bool")
2273
+
2274
+
2275
+ @pytest.mark.parametrize(
2276
+ ("param", "value"),
2277
+ [
2278
+ ("worker_run", 1234),
2279
+ ("transcription_worker_run", 1234),
2280
+ ],
2281
+ )
2282
+ def test_list_elements_wrong_worker_run(mock_elements_worker, param, value):
2283
+ with pytest.raises(AssertionError, match=f"{param} should be of type str or bool"):
2284
+ mock_elements_worker.list_elements(**{param: value})
2285
+
2286
+
2287
+ @pytest.mark.parametrize(
2288
+ ("param", "alternative", "value"),
2289
+ [
2290
+ ("worker_version", "worker_run", 1234),
2291
+ ("transcription_worker_version", "transcription_worker_run", 1234),
2292
+ ],
2293
+ )
2294
+ def test_list_elements_wrong_worker_version(
2295
+ mock_elements_worker, param, alternative, value
2296
+ ):
2297
+ # WARNING: pytest.deprecated_call must be placed BEFORE pytest.raises, otherwise `match` argument won't be checked
2298
+ with (
2299
+ pytest.deprecated_call(
2300
+ match=f"`{param}` usage is deprecated. Consider using `{alternative}` instead."
2301
+ ),
2302
+ pytest.raises(AssertionError, match=f"{param} should be of type str or bool"),
2303
+ ):
2304
+ mock_elements_worker.list_elements(**{param: value})
2305
+
2306
+
2307
+ @pytest.mark.parametrize(
2308
+ "param",
2309
+ [
2310
+ "worker_run",
2311
+ "transcription_worker_run",
2312
+ ],
2313
+ )
2314
+ def test_list_elements_wrong_bool_worker_run(mock_elements_worker, param):
2315
+ with pytest.raises(
2316
+ AssertionError, match=f"if of type bool, {param} can only be set to False"
2317
+ ):
2318
+ mock_elements_worker.list_elements(**{param: True})
2319
+
2320
+
2321
+ @pytest.mark.parametrize(
2322
+ ("param", "alternative"),
2323
+ [
2324
+ ("worker_version", "worker_run"),
2325
+ ("transcription_worker_version", "transcription_worker_run"),
2326
+ ],
2327
+ )
2328
+ def test_list_elements_wrong_bool_worker_version(
2329
+ mock_elements_worker, param, alternative
2330
+ ):
2331
+ # WARNING: pytest.deprecated_call must be placed BEFORE pytest.raises, otherwise `match` argument won't be checked
2332
+ with (
2333
+ pytest.deprecated_call(
2334
+ match=f"`{param}` usage is deprecated. Consider using `{alternative}` instead."
2335
+ ),
2336
+ pytest.raises(
2337
+ AssertionError, match=f"if of type bool, {param} can only be set to False"
2338
+ ),
2339
+ ):
2340
+ mock_elements_worker.list_elements(**{param: True})
2341
+
2342
+
2343
+ def test_list_elements_api_error(responses, mock_elements_worker):
2344
+ responses.add(
2345
+ responses.GET,
2346
+ f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/elements/",
2347
+ status=418,
2348
+ )
2349
+
2350
+ with pytest.raises(
2351
+ Exception, match="Stopping pagination as data will be incomplete"
2352
+ ):
2353
+ next(mock_elements_worker.list_elements())
2354
+
2355
+ assert len(responses.calls) == len(BASE_API_CALLS) + 5
2356
+ assert [
2357
+ (call.request.method, call.request.url) for call in responses.calls
2358
+ ] == BASE_API_CALLS + [
2359
+ # We do 5 retries
2360
+ (
2361
+ "GET",
2362
+ f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/elements/",
2363
+ ),
2364
+ (
2365
+ "GET",
2366
+ f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/elements/",
2367
+ ),
2368
+ (
2369
+ "GET",
2370
+ f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/elements/",
2371
+ ),
2372
+ (
2373
+ "GET",
2374
+ f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/elements/",
2375
+ ),
2376
+ (
2377
+ "GET",
2378
+ f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/elements/",
2379
+ ),
2380
+ ]
2381
+
2382
+
2383
+ def test_list_elements(responses, mock_elements_worker):
2384
+ expected_children = [
2385
+ {
2386
+ "id": "0000",
2387
+ "type": "page",
2388
+ "name": "Test",
2389
+ "corpus": {},
2390
+ "thumbnail_url": None,
2391
+ "zone": {},
2392
+ "best_classes": None,
2393
+ "has_children": None,
2394
+ "worker_version_id": None,
2395
+ "worker_run_id": None,
2396
+ },
2397
+ {
2398
+ "id": "1111",
2399
+ "type": "page",
2400
+ "name": "Test 2",
2401
+ "corpus": {},
2402
+ "thumbnail_url": None,
2403
+ "zone": {},
2404
+ "best_classes": None,
2405
+ "has_children": None,
2406
+ "worker_version_id": None,
2407
+ "worker_run_id": None,
2408
+ },
2409
+ {
2410
+ "id": "2222",
2411
+ "type": "page",
2412
+ "name": "Test 3",
2413
+ "corpus": {},
2414
+ "thumbnail_url": None,
2415
+ "zone": {},
2416
+ "best_classes": None,
2417
+ "has_children": None,
2418
+ "worker_version_id": None,
2419
+ "worker_run_id": None,
2420
+ },
2421
+ ]
2422
+ responses.add(
2423
+ responses.GET,
2424
+ f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/elements/",
2425
+ status=200,
2426
+ json={
2427
+ "count": 3,
2428
+ "next": None,
2429
+ "results": expected_children,
2430
+ },
2431
+ )
2432
+
2433
+ for idx, child in enumerate(mock_elements_worker.list_elements()):
2434
+ assert child == expected_children[idx]
2435
+
2436
+ assert len(responses.calls) == len(BASE_API_CALLS) + 1
2437
+ assert [
2438
+ (call.request.method, call.request.url) for call in responses.calls
2439
+ ] == BASE_API_CALLS + [
2440
+ (
2441
+ "GET",
2442
+ f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/elements/",
2443
+ ),
2444
+ ]
2445
+
2446
+
2447
+ def test_list_elements_manual_worker_version(responses, mock_elements_worker):
2448
+ expected_children = [
2449
+ {
2450
+ "id": "0000",
2451
+ "type": "page",
2452
+ "name": "Test",
2453
+ "corpus": {},
2454
+ "thumbnail_url": None,
2455
+ "zone": {},
2456
+ "best_classes": None,
2457
+ "has_children": None,
2458
+ "worker_version_id": None,
2459
+ "worker_run_id": None,
2460
+ }
2461
+ ]
2462
+ responses.add(
2463
+ responses.GET,
2464
+ f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/elements/?worker_version=False",
2465
+ status=200,
2466
+ json={
2467
+ "count": 1,
2468
+ "next": None,
2469
+ "results": expected_children,
2470
+ },
2471
+ )
2472
+
2473
+ with pytest.deprecated_call(
2474
+ match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
2475
+ ):
2476
+ for idx, child in enumerate(
2477
+ mock_elements_worker.list_elements(worker_version=False)
2478
+ ):
2479
+ assert child == expected_children[idx]
2480
+
2481
+ assert len(responses.calls) == len(BASE_API_CALLS) + 1
2482
+ assert [
2483
+ (call.request.method, call.request.url) for call in responses.calls
2484
+ ] == BASE_API_CALLS + [
2485
+ (
2486
+ "GET",
2487
+ f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/elements/?worker_version=False",
2488
+ ),
2489
+ ]
2490
+
2491
+
2492
+ def test_list_elements_manual_worker_run(responses, mock_elements_worker):
2493
+ expected_children = [
2494
+ {
2495
+ "id": "0000",
2496
+ "type": "page",
2497
+ "name": "Test",
2498
+ "corpus": {},
2499
+ "thumbnail_url": None,
2500
+ "zone": {},
2501
+ "best_classes": None,
2502
+ "has_children": None,
2503
+ "worker_version_id": None,
2504
+ "worker_run_id": None,
2505
+ }
2506
+ ]
2507
+ responses.add(
2508
+ responses.GET,
2509
+ f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/elements/?worker_run=False",
2510
+ status=200,
2511
+ json={
2512
+ "count": 1,
2513
+ "next": None,
2514
+ "results": expected_children,
2515
+ },
2516
+ )
2517
+
2518
+ for idx, child in enumerate(mock_elements_worker.list_elements(worker_run=False)):
2519
+ assert child == expected_children[idx]
2520
+
2521
+ assert len(responses.calls) == len(BASE_API_CALLS) + 1
2522
+ assert [
2523
+ (call.request.method, call.request.url) for call in responses.calls
2524
+ ] == BASE_API_CALLS + [
2525
+ (
2526
+ "GET",
2527
+ f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/elements/?worker_run=False",
2528
+ ),
2529
+ ]
2530
+
2531
+
2532
+ def test_list_elements_with_cache_unhandled_param(mock_elements_worker_with_cache):
2533
+ with pytest.raises(
2534
+ AssertionError,
2535
+ match="When using the local cache, you can only filter by 'type' and/or 'worker_version' and/or 'worker_run'",
2536
+ ):
2537
+ mock_elements_worker_with_cache.list_elements(with_corpus=True)
2538
+
2539
+
2540
+ @pytest.mark.usefixtures("_mock_cached_elements")
2541
+ @pytest.mark.parametrize(
2542
+ ("filters", "expected_ids"),
2543
+ [
2544
+ # Filter on element should give all elements inserted
2545
+ (
2546
+ {},
2547
+ (
2548
+ "99999999-9999-9999-9999-999999999999",
2549
+ "12341234-1234-1234-1234-123412341234",
2550
+ "11111111-1111-1111-1111-111111111111",
2551
+ "22222222-2222-2222-2222-222222222222",
2552
+ "33333333-3333-3333-3333-333333333333",
2553
+ ),
2554
+ ),
2555
+ # Filter on element and page should give the second element
2556
+ (
2557
+ {"type": "page"},
2558
+ ("22222222-2222-2222-2222-222222222222",),
2559
+ ),
2560
+ # Filter on element and worker run should give second
2561
+ (
2562
+ {
2563
+ "worker_run": "56785678-5678-5678-5678-567856785678",
2564
+ },
2565
+ (
2566
+ "12341234-1234-1234-1234-123412341234",
2567
+ "22222222-2222-2222-2222-222222222222",
2568
+ ),
2569
+ ),
2570
+ # Filter on element, manual worker run should give first and third
2571
+ (
2572
+ {"worker_run": False},
2573
+ (
2574
+ "99999999-9999-9999-9999-999999999999",
2575
+ "11111111-1111-1111-1111-111111111111",
2576
+ "33333333-3333-3333-3333-333333333333",
2577
+ ),
2578
+ ),
2579
+ ],
2580
+ )
2581
+ def test_list_elements_with_cache(
2582
+ responses, mock_elements_worker_with_cache, filters, expected_ids
2583
+ ):
2584
+ # Check we have 5 elements already present in database
2585
+ assert CachedElement.select().count() == 5
2586
+
2587
+ # Query database through cache
2588
+ elements = mock_elements_worker_with_cache.list_elements(**filters)
2589
+ assert elements.count() == len(expected_ids)
2590
+ for child, expected_id in zip(elements.order_by("id"), expected_ids, strict=True):
2591
+ assert child.id == UUID(expected_id)
2592
+
2593
+ # Check the worker never hits the API for elements
2594
+ assert len(responses.calls) == len(BASE_API_CALLS)
2595
+ assert [
2596
+ (call.request.method, call.request.url) for call in responses.calls
2597
+ ] == BASE_API_CALLS
2598
+
2599
+
2600
+ @pytest.mark.usefixtures("_mock_cached_elements")
2601
+ @pytest.mark.parametrize(
2602
+ ("filters", "expected_ids"),
2603
+ [
2604
+ # Filter on element and worker version
2605
+ (
2606
+ {
2607
+ "worker_version": "56785678-5678-5678-5678-567856785678",
2608
+ },
2609
+ (
2610
+ "12341234-1234-1234-1234-123412341234",
2611
+ "11111111-1111-1111-1111-111111111111",
2612
+ "22222222-2222-2222-2222-222222222222",
2613
+ ),
2614
+ ),
2615
+ # Filter on element, type double_page and worker version
2616
+ (
2617
+ {"type": "page", "worker_version": "56785678-5678-5678-5678-567856785678"},
2618
+ ("22222222-2222-2222-2222-222222222222",),
2619
+ ),
2620
+ # Filter on element, manual worker version
2621
+ (
2622
+ {"worker_version": False},
2623
+ (
2624
+ "99999999-9999-9999-9999-999999999999",
2625
+ "33333333-3333-3333-3333-333333333333",
2626
+ ),
2627
+ ),
2628
+ ],
2629
+ )
2630
+ def test_list_elements_with_cache_deprecation(
2631
+ responses,
2632
+ mock_elements_worker_with_cache,
2633
+ filters,
2634
+ expected_ids,
2635
+ ):
2636
+ # Check we have 5 elements already present in database
2637
+ assert CachedElement.select().count() == 5
2638
+
2639
+ with pytest.deprecated_call(
2640
+ match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
2641
+ ):
2642
+ # Query database through cache
2643
+ elements = mock_elements_worker_with_cache.list_elements(**filters)
2644
+ assert elements.count() == len(expected_ids)
2645
+ for child, expected_id in zip(elements.order_by("id"), expected_ids, strict=True):
2646
+ assert child.id == UUID(expected_id)
2647
+
2648
+ # Check the worker never hits the API for elements
2649
+ assert len(responses.calls) == len(BASE_API_CALLS)
2650
+ assert [
2651
+ (call.request.method, call.request.url) for call in responses.calls
2652
+ ] == BASE_API_CALLS
2653
+
2654
+
1961
2655
  def test_list_element_children_wrong_element(mock_elements_worker):
1962
2656
  with pytest.raises(
1963
2657
  AssertionError,