arkindex-base-worker 0.5.0b3__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tests/test_base_worker.py CHANGED
@@ -13,6 +13,29 @@ from arkindex_worker.worker import BaseWorker, ElementsWorker
13
13
  from arkindex_worker.worker.base import ExtrasDirNotFoundError
14
14
  from tests import CORPUS_ID, FIXTURES_DIR
15
15
 
16
+ SIMPLE_PAYLOAD = {
17
+ "id": "56785678-5678-5678-5678-567856785678",
18
+ "parents": [],
19
+ "worker_version": {
20
+ "id": "12341234-1234-1234-1234-123412341234",
21
+ "worker": {
22
+ "id": "deadbeef-1234-5678-1234-worker",
23
+ "name": "Fake worker",
24
+ "slug": "fake_worker",
25
+ "type": "classifier",
26
+ },
27
+ "revision": {"hash": "deadbeef1234"},
28
+ "configuration": {"configuration": {}},
29
+ },
30
+ "configuration": None,
31
+ "model_version": None,
32
+ "process": {
33
+ "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
34
+ "corpus": CORPUS_ID,
35
+ },
36
+ "summary": "Worker Fake worker @ 123412",
37
+ }
38
+
16
39
 
17
40
  def test_init_default_local_share():
18
41
  worker = BaseWorker()
@@ -149,38 +172,13 @@ def test_configure_worker_run(mocker, responses, caplog):
149
172
 
150
173
  worker = BaseWorker()
151
174
  mocker.patch.object(sys, "argv", ["worker"])
152
- user_configuration = {
153
- "id": "bbbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb",
154
- "name": "BBB",
155
- "configuration": {"a": "b"},
156
- }
157
175
  payload = {
158
- "id": "56785678-5678-5678-5678-567856785678",
159
- "parents": [],
160
- "worker": {
161
- "id": "deadbeef-1234-5678-1234-worker",
162
- "name": "Fake worker",
163
- "slug": "fake_worker",
164
- "type": "classifier",
165
- },
166
- "worker_version": {
167
- "id": "12341234-1234-1234-1234-123412341234",
168
- "worker": {
169
- "id": "deadbeef-1234-5678-1234-worker",
170
- "name": "Fake worker",
171
- "slug": "fake_worker",
172
- "type": "classifier",
173
- },
174
- "revision": {"hash": "deadbeef1234"},
175
- "configuration": {"configuration": {}},
176
- },
177
- "configuration": user_configuration,
178
- "model_version": None,
179
- "process": {
180
- "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
181
- "corpus": CORPUS_ID,
176
+ **SIMPLE_PAYLOAD,
177
+ "configuration": {
178
+ "id": "bbbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb",
179
+ "name": "BBB",
180
+ "configuration": {"a": "b"},
182
181
  },
183
- "summary": "Worker Fake worker @ 123412",
184
182
  }
185
183
 
186
184
  responses.add(
@@ -190,6 +188,14 @@ def test_configure_worker_run(mocker, responses, caplog):
190
188
  body=json.dumps(payload),
191
189
  content_type="application/json",
192
190
  )
191
+
192
+ # By default, stick to classic configuration
193
+ responses.add(
194
+ responses.GET,
195
+ "http://testserver/api/v1/workers/runs/56785678-5678-5678-5678-567856785678/configuration/",
196
+ status=400,
197
+ )
198
+
193
199
  worker.args = worker.parser.parse_args()
194
200
  assert worker.is_read_only is False
195
201
  assert worker.worker_run_id == "56785678-5678-5678-5678-567856785678"
@@ -205,6 +211,11 @@ def test_configure_worker_run(mocker, responses, caplog):
205
211
  logging.INFO,
206
212
  "Loaded Worker Fake worker @ 123412 from API",
207
213
  ),
214
+ (
215
+ "arkindex_worker",
216
+ logging.INFO,
217
+ "Modern configuration is not available",
218
+ ),
208
219
  ("arkindex_worker", logging.INFO, "Loaded user configuration from WorkerRun"),
209
220
  ("arkindex_worker", logging.INFO, "User configuration retrieved"),
210
221
  ]
@@ -213,9 +224,16 @@ def test_configure_worker_run(mocker, responses, caplog):
213
224
 
214
225
 
215
226
  @pytest.mark.usefixtures("_mock_worker_run_no_revision_api")
216
- def test_configure_worker_run_no_revision(mocker, caplog):
227
+ def test_configure_worker_run_no_revision(mocker, caplog, responses):
217
228
  worker = BaseWorker()
218
229
 
230
+ # By default, stick to classic configuration
231
+ responses.add(
232
+ responses.GET,
233
+ "http://testserver/api/v1/workers/runs/56785678-5678-5678-5678-567856785678/configuration/",
234
+ status=400,
235
+ )
236
+
219
237
  mocker.patch.object(sys, "argv", ["worker"])
220
238
  worker.args = worker.parser.parse_args()
221
239
  assert worker.is_read_only is False
@@ -227,7 +245,12 @@ def test_configure_worker_run_no_revision(mocker, caplog):
227
245
  worker.configure()
228
246
 
229
247
  assert caplog.record_tuples == [
230
- ("arkindex_worker", logging.INFO, "Loaded Worker Fake worker @ 1 from API")
248
+ ("arkindex_worker", logging.INFO, "Loaded Worker Fake worker @ 1 from API"),
249
+ (
250
+ "arkindex_worker",
251
+ logging.INFO,
252
+ "Modern configuration is not available",
253
+ ),
231
254
  ]
232
255
 
233
256
 
@@ -237,8 +260,7 @@ def test_configure_user_configuration_defaults(mocker, responses):
237
260
  worker.args = worker.parser.parse_args()
238
261
 
239
262
  payload = {
240
- "id": "56785678-5678-5678-5678-567856785678",
241
- "parents": [],
263
+ **SIMPLE_PAYLOAD,
242
264
  "worker_version": {
243
265
  "id": "12341234-1234-1234-1234-123412341234",
244
266
  "worker": {
@@ -268,12 +290,6 @@ def test_configure_user_configuration_defaults(mocker, responses):
268
290
  "param_5": True,
269
291
  },
270
292
  },
271
- "model_version": None,
272
- "process": {
273
- "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
274
- "corpus": CORPUS_ID,
275
- },
276
- "summary": "Worker Fake worker @ 123412",
277
293
  }
278
294
  responses.add(
279
295
  responses.GET,
@@ -283,6 +299,13 @@ def test_configure_user_configuration_defaults(mocker, responses):
283
299
  content_type="application/json",
284
300
  )
285
301
 
302
+ # By default, stick to classic configuration
303
+ responses.add(
304
+ responses.GET,
305
+ "http://testserver/api/v1/workers/runs/56785678-5678-5678-5678-567856785678/configuration/",
306
+ status=400,
307
+ )
308
+
286
309
  worker.configure()
287
310
 
288
311
  assert worker.user_configuration == {
@@ -308,30 +331,12 @@ def test_configure_user_config_debug(mocker, responses, debug):
308
331
  mocker.patch.object(sys, "argv", ["worker"])
309
332
  assert logger.level == logging.NOTSET
310
333
  payload = {
311
- "id": "56785678-5678-5678-5678-567856785678",
312
- "parents": [],
313
- "worker_version": {
314
- "id": "12341234-1234-1234-1234-123412341234",
315
- "worker": {
316
- "id": "deadbeef-1234-5678-1234-worker",
317
- "name": "Fake worker",
318
- "slug": "fake_worker",
319
- "type": "classifier",
320
- },
321
- "revision": {"hash": "deadbeef1234"},
322
- "configuration": {"configuration": {}},
323
- },
324
- "model_version": None,
334
+ **SIMPLE_PAYLOAD,
325
335
  "configuration": {
326
336
  "id": "af0daaf4-983e-4703-a7ed-a10f146d6684",
327
337
  "name": "BBB",
328
338
  "configuration": {"debug": debug},
329
339
  },
330
- "process": {
331
- "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
332
- "corpus": CORPUS_ID,
333
- },
334
- "summary": "Worker Fake worker @ 123412",
335
340
  }
336
341
  responses.add(
337
342
  responses.GET,
@@ -340,6 +345,13 @@ def test_configure_user_config_debug(mocker, responses, debug):
340
345
  body=json.dumps(payload),
341
346
  content_type="application/json",
342
347
  )
348
+
349
+ # By default, stick to classic configuration
350
+ responses.add(
351
+ responses.GET,
352
+ "http://testserver/api/v1/workers/runs/56785678-5678-5678-5678-567856785678/configuration/",
353
+ status=400,
354
+ )
343
355
  worker.args = worker.parser.parse_args()
344
356
  worker.configure()
345
357
 
@@ -354,32 +366,8 @@ def test_configure_worker_run_missing_conf(mocker, responses):
354
366
  mocker.patch.object(sys, "argv", ["worker"])
355
367
 
356
368
  payload = {
357
- "id": "56785678-5678-5678-5678-567856785678",
358
- "parents": [],
359
- "worker": {
360
- "id": "deadbeef-1234-5678-1234-worker",
361
- "name": "Fake worker",
362
- "slug": "fake_worker",
363
- "type": "classifier",
364
- },
365
- "worker_version": {
366
- "id": "12341234-1234-1234-1234-123412341234",
367
- "worker": {
368
- "id": "deadbeef-1234-5678-1234-worker",
369
- "name": "Fake worker",
370
- "slug": "fake_worker",
371
- "type": "classifier",
372
- },
373
- "revision": {"hash": "deadbeef1234"},
374
- "configuration": {"configuration": {}},
375
- },
376
- "model_version": None,
369
+ **SIMPLE_PAYLOAD,
377
370
  "configuration": {"id": "bbbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb", "name": "BBB"},
378
- "process": {
379
- "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
380
- "corpus": CORPUS_ID,
381
- },
382
- "summary": "Worker Fake worker @ 123412",
383
371
  }
384
372
  responses.add(
385
373
  responses.GET,
@@ -388,6 +376,12 @@ def test_configure_worker_run_missing_conf(mocker, responses):
388
376
  body=json.dumps(payload),
389
377
  content_type="application/json",
390
378
  )
379
+ # By default, stick to classic configuration
380
+ responses.add(
381
+ responses.GET,
382
+ "http://testserver/api/v1/workers/runs/56785678-5678-5678-5678-567856785678/configuration/",
383
+ status=400,
384
+ )
391
385
  worker.args = worker.parser.parse_args()
392
386
  worker.configure()
393
387
 
@@ -401,28 +395,7 @@ def test_configure_worker_run_no_worker_run_conf(mocker, responses):
401
395
  worker = BaseWorker()
402
396
  mocker.patch.object(sys, "argv", ["worker"])
403
397
 
404
- payload = {
405
- "id": "56785678-5678-5678-5678-567856785678",
406
- "parents": [],
407
- "worker_version": {
408
- "id": "12341234-1234-1234-1234-123412341234",
409
- "worker": {
410
- "id": "deadbeef-1234-5678-1234-worker",
411
- "name": "Fake worker",
412
- "slug": "fake_worker",
413
- "type": "classifier",
414
- },
415
- "revision": {"hash": "deadbeef1234"},
416
- "configuration": {},
417
- },
418
- "model_version": None,
419
- "configuration": None,
420
- "process": {
421
- "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
422
- "corpus": CORPUS_ID,
423
- },
424
- "summary": "Worker Fake worker @ 123412",
425
- }
398
+ payload = SIMPLE_PAYLOAD
426
399
  responses.add(
427
400
  responses.GET,
428
401
  "http://testserver/api/v1/process/workers/56785678-5678-5678-5678-567856785678/",
@@ -430,6 +403,12 @@ def test_configure_worker_run_no_worker_run_conf(mocker, responses):
430
403
  body=json.dumps(payload),
431
404
  content_type="application/json",
432
405
  )
406
+ # By default, stick to classic configuration
407
+ responses.add(
408
+ responses.GET,
409
+ "http://testserver/api/v1/workers/runs/56785678-5678-5678-5678-567856785678/configuration/",
410
+ status=400,
411
+ )
433
412
  worker.args = worker.parser.parse_args()
434
413
  worker.configure()
435
414
 
@@ -440,20 +419,7 @@ def test_configure_load_model_configuration(mocker, responses):
440
419
  worker = BaseWorker()
441
420
  mocker.patch.object(sys, "argv", ["worker"])
442
421
  payload = {
443
- "id": "56785678-5678-5678-5678-567856785678",
444
- "parents": [],
445
- "worker_version": {
446
- "id": "12341234-1234-1234-1234-123412341234",
447
- "worker": {
448
- "id": "deadbeef-1234-5678-1234-worker",
449
- "name": "Fake worker",
450
- "slug": "fake_worker",
451
- "type": "classifier",
452
- },
453
- "revision": {"hash": "deadbeef1234"},
454
- "configuration": {"configuration": {}},
455
- },
456
- "configuration": None,
422
+ **SIMPLE_PAYLOAD,
457
423
  "model_version": {
458
424
  "id": "12341234-1234-1234-1234-123412341234",
459
425
  "model": {
@@ -466,11 +432,6 @@ def test_configure_load_model_configuration(mocker, responses):
466
432
  "param3": None,
467
433
  },
468
434
  },
469
- "process": {
470
- "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
471
- "corpus": CORPUS_ID,
472
- },
473
- "summary": "Worker Fake worker @ 123412",
474
435
  }
475
436
 
476
437
  responses.add(
@@ -480,6 +441,12 @@ def test_configure_load_model_configuration(mocker, responses):
480
441
  body=json.dumps(payload),
481
442
  content_type="application/json",
482
443
  )
444
+ # By default, stick to classic configuration
445
+ responses.add(
446
+ responses.GET,
447
+ "http://testserver/api/v1/workers/runs/56785678-5678-5678-5678-567856785678/configuration/",
448
+ status=400,
449
+ )
483
450
  worker.args = worker.parser.parse_args()
484
451
  assert worker.is_read_only is False
485
452
  assert worker.worker_run_id == "56785678-5678-5678-5678-567856785678"
@@ -947,6 +914,13 @@ def test_worker_config_multiple_source(
947
914
  content_type="application/json",
948
915
  )
949
916
 
917
+ # By default, stick to classic configuration
918
+ responses.add(
919
+ responses.GET,
920
+ "http://testserver/api/v1/workers/runs/56785678-5678-5678-5678-567856785678/configuration/",
921
+ status=400,
922
+ )
923
+
950
924
  # Create and configure a worker
951
925
  monkeypatch.setattr(sys, "argv", ["worker"])
952
926
  worker = BaseWorker()
tests/test_cache.py CHANGED
@@ -60,9 +60,8 @@ def test_create_tables(tmp_path):
60
60
  CREATE TABLE "dataset_elements" ("id" TEXT NOT NULL PRIMARY KEY, "element_id" TEXT NOT NULL, "dataset_id" TEXT NOT NULL, "set_name" VARCHAR(255) NOT NULL, FOREIGN KEY ("element_id") REFERENCES "elements" ("id"), FOREIGN KEY ("dataset_id") REFERENCES "datasets" ("id"))
61
61
  CREATE TABLE "datasets" ("id" TEXT NOT NULL PRIMARY KEY, "name" VARCHAR(255) NOT NULL, "state" VARCHAR(255) NOT NULL DEFAULT 'open', "sets" TEXT NOT NULL)
62
62
  CREATE TABLE "elements" ("id" TEXT NOT NULL PRIMARY KEY, "parent_id" TEXT, "type" VARCHAR(50) NOT NULL, "image_id" TEXT, "polygon" text, "rotation_angle" INTEGER NOT NULL, "mirrored" INTEGER NOT NULL, "initial" INTEGER NOT NULL, "worker_version_id" TEXT, "worker_run_id" TEXT, "confidence" REAL, FOREIGN KEY ("image_id") REFERENCES "images" ("id"))
63
- CREATE TABLE "entities" ("id" TEXT NOT NULL PRIMARY KEY, "type" VARCHAR(50) NOT NULL, "name" TEXT NOT NULL, "validated" INTEGER NOT NULL, "metas" text, "worker_run_id" TEXT)
64
- CREATE TABLE "images" ("id" TEXT NOT NULL PRIMARY KEY, "width" INTEGER NOT NULL, "height" INTEGER NOT NULL, "url" TEXT NOT NULL)
65
- CREATE TABLE "transcription_entities" ("transcription_id" TEXT NOT NULL, "entity_id" TEXT NOT NULL, "offset" INTEGER NOT NULL CHECK (offset >= 0), "length" INTEGER NOT NULL CHECK (length > 0), "worker_run_id" TEXT, "confidence" REAL, PRIMARY KEY ("transcription_id", "entity_id"), FOREIGN KEY ("transcription_id") REFERENCES "transcriptions" ("id"), FOREIGN KEY ("entity_id") REFERENCES "entities" ("id"))
63
+ CREATE TABLE "images" ("id" TEXT NOT NULL PRIMARY KEY, "width" INTEGER NOT NULL, "height" INTEGER NOT NULL, "url" TEXT NOT NULL, "version" INTEGER NOT NULL)
64
+ CREATE TABLE "transcription_entities" ("transcription_id" TEXT NOT NULL, "type" VARCHAR(50) NOT NULL, "offset" INTEGER NOT NULL CHECK (offset >= 0), "length" INTEGER NOT NULL CHECK (length > 0), "worker_run_id" TEXT, "confidence" REAL, PRIMARY KEY ("transcription_id", "type"), FOREIGN KEY ("transcription_id") REFERENCES "transcriptions" ("id"))
66
65
  CREATE TABLE "transcriptions" ("id" TEXT NOT NULL PRIMARY KEY, "element_id" TEXT NOT NULL, "text" TEXT NOT NULL, "confidence" REAL, "orientation" VARCHAR(50) NOT NULL, "worker_version_id" TEXT, "worker_run_id" TEXT, FOREIGN KEY ("element_id") REFERENCES "elements" ("id"))"""
67
66
 
68
67
  actual_schema = "\n".join(
@@ -430,6 +430,7 @@ def test_run_no_sets(mocker, caplog, mock_dataset_worker):
430
430
 
431
431
  assert [(level, message) for _, level, message in caplog.record_tuples] == [
432
432
  (logging.INFO, "Loaded Worker Fake worker @ 123412 from API"),
433
+ (logging.INFO, "Modern configuration is not available"),
433
434
  (logging.WARNING, "No sets to process, stopping."),
434
435
  ]
435
436
 
@@ -453,6 +454,7 @@ def test_run_initial_dataset_state_error(
453
454
 
454
455
  assert [(level, message) for _, level, message in caplog.record_tuples] == [
455
456
  (logging.INFO, "Loaded Worker Fake worker @ 123412 from API"),
457
+ (logging.INFO, "Modern configuration is not available"),
456
458
  (
457
459
  logging.WARNING,
458
460
  "Failed running worker on Set (train) from Dataset (dataset_id): AssertionError('When processing a set, its dataset state should be Complete.')",
@@ -497,6 +499,7 @@ def test_run_download_dataset_artifact_api_error(
497
499
 
498
500
  assert [(level, message) for _, level, message in caplog.record_tuples] == [
499
501
  (logging.INFO, "Loaded Worker Fake worker @ 123412 from API"),
502
+ (logging.INFO, "Modern configuration is not available"),
500
503
  (
501
504
  logging.INFO,
502
505
  "Retrieving data for Set (train) from Dataset (dataset_id) (1/1)",
@@ -550,6 +553,7 @@ def test_run_no_downloaded_dataset_artifact_error(
550
553
 
551
554
  assert [(level, message) for _, level, message in caplog.record_tuples] == [
552
555
  (logging.INFO, "Loaded Worker Fake worker @ 123412 from API"),
556
+ (logging.INFO, "Modern configuration is not available"),
553
557
  (
554
558
  logging.INFO,
555
559
  "Retrieving data for Set (train) from Dataset (dataset_id) (1/1)",
@@ -626,6 +630,7 @@ def test_run(
626
630
 
627
631
  assert [(level, message) for _, level, message in caplog.record_tuples] == [
628
632
  (logging.INFO, "Loaded Worker Fake worker @ 123412 from API"),
633
+ (logging.INFO, "Modern configuration is not available"),
629
634
  (
630
635
  logging.INFO,
631
636
  "Retrieving data for Set (train) from Dataset (dataset_id) (1/1)",
tests/test_element.py CHANGED
@@ -5,26 +5,36 @@ from arkindex_worker.cache import CachedElement
5
5
  from arkindex_worker.models import Element
6
6
 
7
7
 
8
- def test_no_image_url():
8
+ @pytest.mark.parametrize(
9
+ ("zone", "expected_url"),
10
+ [
11
+ (None, None),
12
+ (
13
+ {"image": {"url": "http://something/", "server": {"version": 2}}},
14
+ "http://something/full/full/0/default.jpg",
15
+ ),
16
+ (
17
+ {"image": {"url": "http://something", "server": {"version": 2}}},
18
+ "http://something/full/full/0/default.jpg",
19
+ ),
20
+ (
21
+ {"image": {"url": "http://something/", "server": {"version": 3}}},
22
+ "http://something/full/max/0/default.jpg",
23
+ ),
24
+ ],
25
+ )
26
+ def test_image_url(zone, expected_url):
9
27
  url = Element({"zone": None}).image_url()
10
28
  assert not url
11
29
 
12
30
 
13
- def test_image_url_iiif():
14
- url = Element({"zone": {"image": {"url": "http://something/"}}}).image_url()
15
- assert url == "http://something/full/full/0/default.jpg"
16
-
17
-
18
31
  def test_image_url_iiif_resize():
19
- url = Element({"zone": {"image": {"url": "http://something/"}}}).image_url(500)
32
+ url = Element(
33
+ {"zone": {"image": {"url": "http://something/", "server": {"version": 2}}}}
34
+ ).image_url(500)
20
35
  assert url == "http://something/full/500/0/default.jpg"
21
36
 
22
37
 
23
- def test_image_url_iiif_append_slash():
24
- url = Element({"zone": {"image": {"url": "http://something"}}}).image_url()
25
- assert url == "http://something/full/full/0/default.jpg"
26
-
27
-
28
38
  def test_image_url_s3():
29
39
  url = Element(
30
40
  {
@@ -418,6 +428,36 @@ def test_open_image_rotation_mirror(mocker):
418
428
  )
419
429
 
420
430
 
431
+ def test_open_image_iiif_3(mocker):
432
+ open_mock = mocker.patch(
433
+ "arkindex_worker.image.open_image", return_value="an image!"
434
+ )
435
+ elt = Element(
436
+ {
437
+ "zone": {
438
+ "image": {
439
+ "url": "http://something",
440
+ "server": {
441
+ "max_width": None,
442
+ "max_height": None,
443
+ "version": 3,
444
+ },
445
+ },
446
+ "polygon": [[0, 0], [181, 0], [181, 240], [0, 240], [0, 0]],
447
+ },
448
+ "rotation_angle": 0,
449
+ "mirrored": False,
450
+ },
451
+ )
452
+ assert elt.open_image(use_full_image=True) == "an image!"
453
+ assert open_mock.call_count == 1
454
+ assert open_mock.call_args == mocker.call(
455
+ "http://something/full/max/0/default.jpg",
456
+ rotation_angle=0,
457
+ mirrored=False,
458
+ )
459
+
460
+
421
461
  def test_setattr_setitem():
422
462
  element = Element({"name": "something"})
423
463
  element.type = "page"
@@ -4,4 +4,8 @@ BASE_API_CALLS = [
4
4
  "GET",
5
5
  "http://testserver/api/v1/process/workers/56785678-5678-5678-5678-567856785678/",
6
6
  ),
7
+ (
8
+ "GET",
9
+ "http://testserver/api/v1/workers/runs/56785678-5678-5678-5678-567856785678/configuration/",
10
+ ),
7
11
  ]