arkindex-base-worker 0.5.0b2__py3-none-any.whl → 0.5.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -119,19 +119,6 @@ def test_create_metadata_wrong_value(mock_elements_worker):
119
119
  )
120
120
 
121
121
 
122
- def test_create_metadata_wrong_entity(mock_elements_worker):
123
- elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
124
-
125
- with pytest.raises(AssertionError, match="entity should be of type str"):
126
- mock_elements_worker.create_metadata(
127
- element=elt,
128
- type=MetaType.Location,
129
- name="Teklia",
130
- value="La Turbine, Grenoble 38000",
131
- entity=1234,
132
- )
133
-
134
-
135
122
  def test_create_metadata_api_error(responses, mock_elements_worker):
136
123
  elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
137
124
  responses.add(
@@ -188,7 +175,6 @@ def test_create_metadata(responses, mock_elements_worker):
188
175
  "type": "location",
189
176
  "name": "Teklia",
190
177
  "value": "La Turbine, Grenoble 38000",
191
- "entity_id": None,
192
178
  "worker_run_id": "56785678-5678-5678-5678-567856785678",
193
179
  }
194
180
  assert metadata_id == "12345678-1234-1234-1234-123456789123"
@@ -223,7 +209,6 @@ def test_create_metadata_cached_element(responses, mock_elements_worker_with_cac
223
209
  "type": "location",
224
210
  "name": "Teklia",
225
211
  "value": "La Turbine, Grenoble 38000",
226
- "entity_id": None,
227
212
  "worker_run_id": "56785678-5678-5678-5678-567856785678",
228
213
  }
229
214
  assert metadata_id == "12345678-1234-1234-1234-123456789123"
@@ -239,7 +224,6 @@ def test_create_metadata_bulk(batch_size, responses, mock_elements_worker):
239
224
  "type": MetaType.Text,
240
225
  "name": "Year",
241
226
  "value": "2024",
242
- "entity_id": "entity_id",
243
227
  },
244
228
  ]
245
229
  if batch_size > 1:
@@ -256,7 +240,6 @@ def test_create_metadata_bulk(batch_size, responses, mock_elements_worker):
256
240
  "name": metadata_list[0]["name"],
257
241
  "value": metadata_list[0]["value"],
258
242
  "dates": [],
259
- "entity_id": None,
260
243
  },
261
244
  {
262
245
  "id": "fake_metadata_id2",
@@ -264,7 +247,6 @@ def test_create_metadata_bulk(batch_size, responses, mock_elements_worker):
264
247
  "name": metadata_list[1]["name"],
265
248
  "value": metadata_list[1]["value"],
266
249
  "dates": [],
267
- "entity_id": metadata_list[1]["entity_id"],
268
250
  },
269
251
  ],
270
252
  },
@@ -284,7 +266,6 @@ def test_create_metadata_bulk(batch_size, responses, mock_elements_worker):
284
266
  "name": meta["name"],
285
267
  "value": meta["value"],
286
268
  "dates": [],
287
- "entity_id": meta.get("entity_id"),
288
269
  }
289
270
  ],
290
271
  },
@@ -316,7 +297,6 @@ def test_create_metadata_bulk(batch_size, responses, mock_elements_worker):
316
297
  first_meta = {
317
298
  **metadata_list[0],
318
299
  "type": metadata_list[0]["type"].value,
319
- "entity_id": None,
320
300
  }
321
301
  second_meta = {**metadata_list[1], "type": metadata_list[1]["type"].value}
322
302
  empty_payload = {
@@ -346,7 +326,6 @@ def test_create_metadata_bulk(batch_size, responses, mock_elements_worker):
346
326
  "name": metadata_list[0]["name"],
347
327
  "value": metadata_list[0]["value"],
348
328
  "dates": [],
349
- "entity_id": None,
350
329
  },
351
330
  {
352
331
  "id": "fake_metadata_id2",
@@ -354,7 +333,6 @@ def test_create_metadata_bulk(batch_size, responses, mock_elements_worker):
354
333
  "name": metadata_list[1]["name"],
355
334
  "value": metadata_list[1]["value"],
356
335
  "dates": [],
357
- "entity_id": metadata_list[1]["entity_id"],
358
336
  },
359
337
  ]
360
338
 
@@ -373,7 +351,6 @@ def test_create_metadata_bulk_cached_element(
373
351
  "type": MetaType.Text,
374
352
  "name": "Year",
375
353
  "value": "2024",
376
- "entity_id": "entity_id",
377
354
  },
378
355
  ]
379
356
  if batch_size > 1:
@@ -390,7 +367,6 @@ def test_create_metadata_bulk_cached_element(
390
367
  "name": metadata_list[0]["name"],
391
368
  "value": metadata_list[0]["value"],
392
369
  "dates": [],
393
- "entity_id": None,
394
370
  },
395
371
  {
396
372
  "id": "fake_metadata_id2",
@@ -398,7 +374,6 @@ def test_create_metadata_bulk_cached_element(
398
374
  "name": metadata_list[1]["name"],
399
375
  "value": metadata_list[1]["value"],
400
376
  "dates": [],
401
- "entity_id": metadata_list[1]["entity_id"],
402
377
  },
403
378
  ],
404
379
  },
@@ -418,7 +393,6 @@ def test_create_metadata_bulk_cached_element(
418
393
  "name": meta["name"],
419
394
  "value": meta["value"],
420
395
  "dates": [],
421
- "entity_id": meta.get("entity_id"),
422
396
  }
423
397
  ],
424
398
  },
@@ -450,7 +424,6 @@ def test_create_metadata_bulk_cached_element(
450
424
  first_meta = {
451
425
  **metadata_list[0],
452
426
  "type": metadata_list[0]["type"].value,
453
- "entity_id": None,
454
427
  }
455
428
  second_meta = {**metadata_list[1], "type": metadata_list[1]["type"].value}
456
429
  empty_payload = {
@@ -480,7 +453,6 @@ def test_create_metadata_bulk_cached_element(
480
453
  "name": metadata_list[0]["name"],
481
454
  "value": metadata_list[0]["value"],
482
455
  "dates": [],
483
- "entity_id": None,
484
456
  },
485
457
  {
486
458
  "id": "fake_metadata_id2",
@@ -488,7 +460,6 @@ def test_create_metadata_bulk_cached_element(
488
460
  "name": metadata_list[1]["name"],
489
461
  "value": metadata_list[1]["value"],
490
462
  "dates": [],
491
- "entity_id": metadata_list[1]["entity_id"],
492
463
  },
493
464
  ]
494
465
 
@@ -552,23 +523,6 @@ def test_create_metadata_bulk_wrong_value(mock_elements_worker, wrong_value):
552
523
  )
553
524
 
554
525
 
555
- @pytest.mark.parametrize("wrong_entity", [[1, 2, 3, 4], 1234, 12.5])
556
- def test_create_metadata_bulk_wrong_entity(mock_elements_worker, wrong_entity):
557
- element = Element({"id": "fake_element_id"})
558
- wrong_metadata_list = [
559
- {
560
- "type": MetaType.Text,
561
- "name": "fake_name",
562
- "value": "fake_value",
563
- "entity_id": wrong_entity,
564
- }
565
- ]
566
- with pytest.raises(AssertionError, match="entity_id should be None or a str"):
567
- mock_elements_worker.create_metadata_bulk(
568
- element=element, metadata_list=wrong_metadata_list
569
- )
570
-
571
-
572
526
  def test_create_metadata_bulk_api_error(responses, mock_elements_worker):
573
527
  element = Element({"id": "12341234-1234-1234-1234-123412341234"})
574
528
  metadata_list = [
@@ -576,7 +530,6 @@ def test_create_metadata_bulk_api_error(responses, mock_elements_worker):
576
530
  "type": MetaType.Text,
577
531
  "name": "fake_name",
578
532
  "value": "fake_value",
579
- "entity_id": "fake_entity_id",
580
533
  }
581
534
  ]
582
535
  responses.add(
tests/test_merge.py CHANGED
@@ -7,7 +7,6 @@ from arkindex_worker.cache import (
7
7
  SQL_VERSION,
8
8
  CachedClassification,
9
9
  CachedElement,
10
- CachedEntity,
11
10
  CachedImage,
12
11
  CachedTranscription,
13
12
  CachedTranscriptionEntity,
@@ -85,7 +84,6 @@ def test_merge_databases(
85
84
  assert CachedElement.select().count() == 0
86
85
  assert CachedTranscription.select().count() == 0
87
86
  assert CachedClassification.select().count() == 0
88
- assert CachedEntity.select().count() == 0
89
87
  assert CachedTranscriptionEntity.select().count() == 0
90
88
 
91
89
  # Retrieve parents databases paths
@@ -103,7 +101,6 @@ def test_merge_databases(
103
101
  assert CachedElement.select().count() == len(expected_elements)
104
102
  assert CachedTranscription.select().count() == len(expected_transcriptions)
105
103
  assert CachedClassification.select().count() == 0
106
- assert CachedEntity.select().count() == 0
107
104
  assert CachedTranscriptionEntity.select().count() == 0
108
105
  assert [
109
106
  e.id for e in CachedElement.select().order_by("id")
@@ -124,7 +121,6 @@ def test_merge_chunk(mock_databases, tmp_path):
124
121
  assert CachedElement.select().count() == 0
125
122
  assert CachedTranscription.select().count() == 0
126
123
  assert CachedClassification.select().count() == 0
127
- assert CachedEntity.select().count() == 0
128
124
  assert CachedTranscriptionEntity.select().count() == 0
129
125
 
130
126
  # Check filenames
@@ -144,7 +140,6 @@ def test_merge_chunk(mock_databases, tmp_path):
144
140
  assert CachedElement.select().count() == 3
145
141
  assert CachedTranscription.select().count() == 0
146
142
  assert CachedClassification.select().count() == 0
147
- assert CachedEntity.select().count() == 0
148
143
  assert CachedTranscriptionEntity.select().count() == 0
149
144
  assert [e.id for e in CachedElement.select().order_by("id")] == [
150
145
  UUID("42424242-4242-4242-4242-424242424242"),
@@ -171,7 +166,6 @@ def test_merge_from_worker(
171
166
  assert CachedElement.select().count() == 0
172
167
  assert CachedTranscription.select().count() == 0
173
168
  assert CachedClassification.select().count() == 0
174
- assert CachedEntity.select().count() == 0
175
169
  assert CachedTranscriptionEntity.select().count() == 0
176
170
 
177
171
  # Configure worker with a specific data directory
@@ -191,7 +185,6 @@ def test_merge_from_worker(
191
185
  assert CachedElement.select().count() == 3
192
186
  assert CachedTranscription.select().count() == 1
193
187
  assert CachedClassification.select().count() == 0
194
- assert CachedEntity.select().count() == 0
195
188
  assert CachedTranscriptionEntity.select().count() == 0
196
189
  assert [e.id for e in CachedElement.select().order_by("id")] == [
197
190
  UUID("12341234-1234-1234-1234-123412341234"),
@@ -1,293 +0,0 @@
1
- import pytest
2
- from responses import matchers
3
-
4
- from arkindex.exceptions import ErrorResponse
5
- from arkindex_worker.models import Transcription
6
- from arkindex_worker.worker.entity import MissingEntityType
7
- from tests import CORPUS_ID
8
-
9
- from . import BASE_API_CALLS
10
-
11
-
12
- def test_create_entity_type_wrong_name(mock_elements_worker):
13
- with pytest.raises(
14
- AssertionError, match="name shouldn't be null and should be of type str"
15
- ):
16
- mock_elements_worker.create_entity_type(name=None)
17
-
18
- with pytest.raises(
19
- AssertionError, match="name shouldn't be null and should be of type str"
20
- ):
21
- mock_elements_worker.create_entity_type(name=1234)
22
-
23
-
24
- def test_create_entity_type_api_error(responses, mock_elements_worker):
25
- responses.add(
26
- responses.POST,
27
- "http://testserver/api/v1/entity/types/",
28
- status=418,
29
- )
30
-
31
- with pytest.raises(ErrorResponse):
32
- mock_elements_worker.create_entity_type(name="firstname")
33
-
34
- assert len(responses.calls) == len(BASE_API_CALLS) + 1
35
- assert [
36
- (call.request.method, call.request.url) for call in responses.calls
37
- ] == BASE_API_CALLS + [("POST", "http://testserver/api/v1/entity/types/")]
38
-
39
-
40
- def test_create_entity_type_already_exists(responses, mock_elements_worker):
41
- assert mock_elements_worker.entity_types == {}
42
-
43
- responses.add(
44
- responses.POST,
45
- "http://testserver/api/v1/entity/types/",
46
- status=400,
47
- match=[
48
- matchers.json_params_matcher({"name": "firstname", "corpus": CORPUS_ID})
49
- ],
50
- )
51
- responses.add(
52
- responses.GET,
53
- f"http://testserver/api/v1/corpus/{CORPUS_ID}/entity-types/",
54
- status=200,
55
- json={
56
- "count": 1,
57
- "next": None,
58
- "results": [
59
- {"id": "lastname-id", "name": "lastname", "color": "ffd1b3"},
60
- {"id": "firstname-id", "name": "firstname", "color": "ffd1b3"},
61
- ],
62
- },
63
- )
64
-
65
- mock_elements_worker.create_entity_type(name="firstname")
66
-
67
- assert len(responses.calls) == len(BASE_API_CALLS) + 2
68
- assert [
69
- (call.request.method, call.request.url) for call in responses.calls
70
- ] == BASE_API_CALLS + [
71
- ("POST", "http://testserver/api/v1/entity/types/"),
72
- ("GET", f"http://testserver/api/v1/corpus/{CORPUS_ID}/entity-types/"),
73
- ]
74
-
75
- # Make sure the entity_types attribute has been updated
76
- assert mock_elements_worker.entity_types == {
77
- "lastname": "lastname-id",
78
- "firstname": "firstname-id",
79
- }
80
-
81
-
82
- def test_create_entity_type(responses, mock_elements_worker):
83
- assert mock_elements_worker.entity_types == {}
84
-
85
- responses.add(
86
- responses.POST,
87
- "http://testserver/api/v1/entity/types/",
88
- status=200,
89
- match=[
90
- matchers.json_params_matcher({"name": "firstname", "corpus": CORPUS_ID})
91
- ],
92
- json={
93
- "id": "firstname-id",
94
- "name": "firstname",
95
- "corpus": CORPUS_ID,
96
- "color": "ffd1b3",
97
- },
98
- )
99
-
100
- mock_elements_worker.create_entity_type(name="firstname")
101
-
102
- assert len(responses.calls) == len(BASE_API_CALLS) + 1
103
- assert [
104
- (call.request.method, call.request.url) for call in responses.calls
105
- ] == BASE_API_CALLS + [
106
- ("POST", "http://testserver/api/v1/entity/types/"),
107
- ]
108
-
109
- # Make sure the entity_types attribute has been updated
110
- assert mock_elements_worker.entity_types == {"firstname": "firstname-id"}
111
-
112
-
113
- def test_check_required_entity_types_wrong_entity_types(mock_elements_worker):
114
- with pytest.raises(
115
- AssertionError,
116
- match="entity_types shouldn't be null and should be of type list",
117
- ):
118
- mock_elements_worker.check_required_entity_types(entity_types=None)
119
-
120
- with pytest.raises(
121
- AssertionError,
122
- match="entity_types shouldn't be null and should be of type list",
123
- ):
124
- mock_elements_worker.check_required_entity_types(entity_types=1234)
125
-
126
- with pytest.raises(
127
- AssertionError,
128
- match="Entity type at index 1 in entity_types: Should be of type str",
129
- ):
130
- mock_elements_worker.check_required_entity_types(
131
- entity_types=["firstname", 1234]
132
- )
133
-
134
-
135
- def test_check_required_entity_types_wrong_create_missing(mock_elements_worker):
136
- with pytest.raises(
137
- AssertionError,
138
- match="create_missing shouldn't be null and should be of type bool",
139
- ):
140
- mock_elements_worker.check_required_entity_types(
141
- entity_types=["firstname"], create_missing=None
142
- )
143
-
144
- with pytest.raises(
145
- AssertionError,
146
- match="create_missing shouldn't be null and should be of type bool",
147
- ):
148
- mock_elements_worker.check_required_entity_types(
149
- entity_types=["firstname"], create_missing=1234
150
- )
151
-
152
-
153
- def test_check_required_entity_types_do_not_create_missing(
154
- responses, mock_elements_worker
155
- ):
156
- # Set one entity type
157
- mock_elements_worker.entity_types = {"lastname": "lastname-id"}
158
-
159
- with pytest.raises(
160
- MissingEntityType, match="Entity type `firstname` was not in the corpus."
161
- ):
162
- mock_elements_worker.check_required_entity_types(
163
- entity_types=["lastname", "firstname"], create_missing=False
164
- )
165
-
166
- assert len(responses.calls) == len(BASE_API_CALLS)
167
- assert [
168
- (call.request.method, call.request.url) for call in responses.calls
169
- ] == BASE_API_CALLS
170
-
171
-
172
- def test_check_required_entity_types(responses, mock_elements_worker):
173
- # Set one entity type
174
- mock_elements_worker.entity_types = {"lastname": "lastname-id"}
175
-
176
- # Call to create a new entity type
177
- responses.add(
178
- responses.POST,
179
- "http://testserver/api/v1/entity/types/",
180
- status=200,
181
- match=[
182
- matchers.json_params_matcher({"name": "firstname", "corpus": CORPUS_ID})
183
- ],
184
- json={
185
- "id": "firstname-id",
186
- "name": "firstname",
187
- "corpus": CORPUS_ID,
188
- "color": "ffd1b3",
189
- },
190
- )
191
-
192
- mock_elements_worker.check_required_entity_types(
193
- entity_types=["lastname", "firstname"], create_missing=True
194
- )
195
-
196
- assert len(responses.calls) == len(BASE_API_CALLS) + 1
197
- assert [
198
- (call.request.method, call.request.url) for call in responses.calls
199
- ] == BASE_API_CALLS + [
200
- (
201
- "POST",
202
- "http://testserver/api/v1/entity/types/",
203
- ),
204
- ]
205
-
206
- # Make sure the entity_types attribute has been updated
207
- assert mock_elements_worker.entity_types == {
208
- "lastname": "lastname-id",
209
- "firstname": "firstname-id",
210
- }
211
-
212
-
213
- def test_list_transcription_entities_deprecation(fake_dummy_worker):
214
- transcription = Transcription({"id": "fake_transcription_id"})
215
- worker_version = "worker_version_id"
216
- fake_dummy_worker.api_client.add_response(
217
- "ListTranscriptionEntities",
218
- id=transcription.id,
219
- worker_version=worker_version,
220
- response={"id": "entity_id"},
221
- )
222
- with pytest.deprecated_call(
223
- match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
224
- ):
225
- assert fake_dummy_worker.list_transcription_entities(
226
- transcription, worker_version=worker_version
227
- ) == {"id": "entity_id"}
228
-
229
- assert len(fake_dummy_worker.api_client.history) == 1
230
- assert len(fake_dummy_worker.api_client.responses) == 0
231
-
232
-
233
- def test_list_transcription_entities(fake_dummy_worker):
234
- transcription = Transcription({"id": "fake_transcription_id"})
235
- worker_run = "worker_run_id"
236
- fake_dummy_worker.api_client.add_response(
237
- "ListTranscriptionEntities",
238
- id=transcription.id,
239
- worker_run=worker_run,
240
- response={"id": "entity_id"},
241
- )
242
- assert fake_dummy_worker.list_transcription_entities(
243
- transcription, worker_run=worker_run
244
- ) == {"id": "entity_id"}
245
-
246
- assert len(fake_dummy_worker.api_client.history) == 1
247
- assert len(fake_dummy_worker.api_client.responses) == 0
248
-
249
-
250
- def test_list_corpus_entities(responses, mock_elements_worker):
251
- responses.add(
252
- responses.GET,
253
- f"http://testserver/api/v1/corpus/{CORPUS_ID}/entities/",
254
- json={
255
- "count": 1,
256
- "next": None,
257
- "results": [
258
- {
259
- "id": "fake_entity_id",
260
- }
261
- ],
262
- },
263
- )
264
-
265
- mock_elements_worker.list_corpus_entities()
266
-
267
- assert mock_elements_worker.entities == {
268
- "fake_entity_id": {
269
- "id": "fake_entity_id",
270
- }
271
- }
272
-
273
- assert len(responses.calls) == len(BASE_API_CALLS) + 1
274
- assert [
275
- (call.request.method, call.request.url) for call in responses.calls
276
- ] == BASE_API_CALLS + [
277
- (
278
- "GET",
279
- f"http://testserver/api/v1/corpus/{CORPUS_ID}/entities/",
280
- ),
281
- ]
282
-
283
-
284
- @pytest.mark.parametrize("wrong_name", [1234, 12.5])
285
- def test_list_corpus_entities_wrong_name(mock_elements_worker, wrong_name):
286
- with pytest.raises(AssertionError, match="name should be of type str"):
287
- mock_elements_worker.list_corpus_entities(name=wrong_name)
288
-
289
-
290
- @pytest.mark.parametrize("wrong_parent", [{"id": "element_id"}, 12.5, "blabla"])
291
- def test_list_corpus_entities_wrong_parent(mock_elements_worker, wrong_parent):
292
- with pytest.raises(AssertionError, match="parent should be of type Element"):
293
- mock_elements_worker.list_corpus_entities(parent=wrong_parent)