arkindex-base-worker 0.5.0b3__py3-none-any.whl → 0.5.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arkindex_base_worker-0.5.0b3.dist-info → arkindex_base_worker-0.5.0rc1.dist-info}/METADATA +1 -1
- {arkindex_base_worker-0.5.0b3.dist-info → arkindex_base_worker-0.5.0rc1.dist-info}/RECORD +12 -13
- arkindex_worker/cache.py +3 -22
- arkindex_worker/worker/entity.py +17 -126
- arkindex_worker/worker/metadata.py +0 -11
- tests/test_cache.py +1 -2
- tests/test_elements_worker/{test_entity_create.py → test_entity.py} +220 -227
- tests/test_elements_worker/test_metadata.py +0 -47
- tests/test_merge.py +0 -7
- tests/test_elements_worker/test_entity_list_and_check.py +0 -293
- {arkindex_base_worker-0.5.0b3.dist-info → arkindex_base_worker-0.5.0rc1.dist-info}/WHEEL +0 -0
- {arkindex_base_worker-0.5.0b3.dist-info → arkindex_base_worker-0.5.0rc1.dist-info}/licenses/LICENSE +0 -0
- {arkindex_base_worker-0.5.0b3.dist-info → arkindex_base_worker-0.5.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
arkindex_base_worker-0.5.
|
|
1
|
+
arkindex_base_worker-0.5.0rc1.dist-info/licenses/LICENSE,sha256=NVshRi1efwVezMfW7xXYLrdDr2Li1AfwfGOd5WuH1kQ,1063
|
|
2
2
|
arkindex_worker/__init__.py,sha256=Sdt5KXn8EgURb2MurYVrUWaHbH3iFA1XLRo0Lc5AJ44,250
|
|
3
|
-
arkindex_worker/cache.py,sha256=
|
|
3
|
+
arkindex_worker/cache.py,sha256=x1d1oVF297ItLoZnPkZQoEefa39ZigrwRoHC_6az94k,10731
|
|
4
4
|
arkindex_worker/image.py,sha256=GvIpW7LNSalVw3Obt9nySDWnW7-NbC0__SWREEQqVCk,20696
|
|
5
5
|
arkindex_worker/models.py,sha256=bPQzGZNs5a6z6DEcygsa8T33VOqPlMUbwKzHqlKzwbw,9923
|
|
6
6
|
arkindex_worker/utils.py,sha256=MbbJT8oh8DMHHR-vidFeXdUH0TSXGWm7ZDGWzrRXoEY,9933
|
|
@@ -10,9 +10,9 @@ arkindex_worker/worker/classification.py,sha256=qvykymkgd4nGywHCxL8obo4egstoGsmW
|
|
|
10
10
|
arkindex_worker/worker/corpus.py,sha256=MeIMod7jkWyX0frtD0a37rhumnMV3p9ZOC1xwAoXrAA,2291
|
|
11
11
|
arkindex_worker/worker/dataset.py,sha256=tVaPx43vaH-KTtx4w5V06e26ha8XPfiJTRzBXlu928Y,5273
|
|
12
12
|
arkindex_worker/worker/element.py,sha256=982Dnk73v8wykCh3gweVi3q-bnvaY1LwkDFoNIoJ3KY,46579
|
|
13
|
-
arkindex_worker/worker/entity.py,sha256=
|
|
13
|
+
arkindex_worker/worker/entity.py,sha256=Aj6EOfzHEm7qQV-Egm0YKLZgCrLS_3ggOKTY81M2JbI,12323
|
|
14
14
|
arkindex_worker/worker/image.py,sha256=L6Ikuf0Z0RxJk7JarY5PggJGrYSHLaPK0vn0dy0CIaQ,623
|
|
15
|
-
arkindex_worker/worker/metadata.py,sha256=
|
|
15
|
+
arkindex_worker/worker/metadata.py,sha256=rBjU057xngwrf32vAo-2cpgYfmrdEj3lfDg_kv4-zr0,6810
|
|
16
16
|
arkindex_worker/worker/process.py,sha256=9TEHpMcBax1wc6PrWMMrdXe2uNfqyVj7n_dAYZRBGnY,1854
|
|
17
17
|
arkindex_worker/worker/task.py,sha256=nYfMSFm_d-4t8y4PO4HjFBnLsZf7IsDjkS7-A2Pgnac,1525
|
|
18
18
|
arkindex_worker/worker/training.py,sha256=tyQOHcwv--_wdYz6CgLEe1YM7kwwwKN30LvGTsnWd78,10923
|
|
@@ -23,11 +23,11 @@ hooks/pre_gen_project.py,sha256=xQJERv3vv9VzIqcBHI281eeWLWREXUF4mMw7PvJHHXM,269
|
|
|
23
23
|
tests/__init__.py,sha256=DG--S6IpGl399rzSAjDdHL76CkOIeZIjajCcyUSDhOQ,241
|
|
24
24
|
tests/conftest.py,sha256=kR9zYRHri2BPvzQbbhnvylHba2xvw0w8v1qaLwdGkK0,20993
|
|
25
25
|
tests/test_base_worker.py,sha256=dA00oxauTSCwnFX3ZFBl-RI71HN6GmK48FBBW_oYN-k,30627
|
|
26
|
-
tests/test_cache.py,sha256=
|
|
26
|
+
tests/test_cache.py,sha256=nnEFfAAqtYHk2ymOwN0spXJd8nrRiwp3voj0tOmIbQ8,10407
|
|
27
27
|
tests/test_dataset_worker.py,sha256=z8ydliUlwW2j-irgLAotJMacgJXkVvF5TgsWLyCn1Jo,22087
|
|
28
28
|
tests/test_element.py,sha256=2G9M15TLxQRmvrWM9Kw2ucnElh4kSv_oF_5FYwwAxTY,13181
|
|
29
29
|
tests/test_image.py,sha256=NEIp5evr6QoTWgJ-_fze19IEFm_hG6YEcuW1kxnxS_I,28013
|
|
30
|
-
tests/test_merge.py,sha256=
|
|
30
|
+
tests/test_merge.py,sha256=REpZ13jkq_qm_4L5URQgFy5lxvPZtXxQEiWfYLMdmF0,7956
|
|
31
31
|
tests/test_utils.py,sha256=nYL1s2ViZoLoMiNpLGDaWwxf8dJ1D8aT522AO-PVaEQ,3607
|
|
32
32
|
tests/test_elements_worker/__init__.py,sha256=Fh4nkbbyJSMv_VtjQxnWrOqTnxXaaWI8S9WU0VrzCHs,179
|
|
33
33
|
tests/test_elements_worker/test_classification.py,sha256=nya7veSPR_O9G41Enodp2-o6AifMBcaSTWJP2vXSSJ4,30133
|
|
@@ -39,10 +39,9 @@ tests/test_elements_worker/test_element_create_multiple.py,sha256=arYFGmxc0517ZU
|
|
|
39
39
|
tests/test_elements_worker/test_element_create_single.py,sha256=Fa9zm12J2rQ3VrUe3yIlHAc7Vty_eQYb_YGnNPQB3IE,16697
|
|
40
40
|
tests/test_elements_worker/test_element_list_children.py,sha256=2zH4h663w3EduqpzQr-7bf9zIDzO1x2WxdUYYHsIHkI,31358
|
|
41
41
|
tests/test_elements_worker/test_element_list_parents.py,sha256=TXeGW-a3W-7GmB2QrhJH9mMnvxuybeAwQ4tL3iIxwXo,16734
|
|
42
|
-
tests/test_elements_worker/
|
|
43
|
-
tests/test_elements_worker/test_entity_list_and_check.py,sha256=zAfwa49D8lHZdB7dqQu14R0P0SQu40qNalW7RjOPYic,9456
|
|
42
|
+
tests/test_elements_worker/test_entity.py,sha256=SNAZEsVVLnqlliOmjkgv_cZhw0bAuJUY70_z57PpEE0,29624
|
|
44
43
|
tests/test_elements_worker/test_image.py,sha256=BljMNKgec_9a5bzNzFpYZIvSbuvwsWDfdqLHVJaTa7M,2079
|
|
45
|
-
tests/test_elements_worker/test_metadata.py,sha256=
|
|
44
|
+
tests/test_elements_worker/test_metadata.py,sha256=qtTDtlp3VnBkfck7PAguK2dEgTLlr1i1EVnmNTeNf3A,20515
|
|
46
45
|
tests/test_elements_worker/test_process.py,sha256=y4RoVhPfyHzR795fw7-_FXElBcKo3fy4Ew_HI-kxJic,3088
|
|
47
46
|
tests/test_elements_worker/test_task.py,sha256=wTUWqN9UhfKmJn3IcFY75EW4I1ulRhisflmY1kmP47s,5574
|
|
48
47
|
tests/test_elements_worker/test_training.py,sha256=qgK7BLucddRzc8ePbQtY75x17QvGDEq5XCwgyyvmAJE,8717
|
|
@@ -55,7 +54,7 @@ worker-demo/tests/conftest.py,sha256=XzNMNeg6pmABUAH8jN6eZTlZSFGLYjS3-DTXjiRN6Yc
|
|
|
55
54
|
worker-demo/tests/test_worker.py,sha256=3DLd4NRK4bfyatG5P_PK4k9P9tJHx9XQq5_ryFEEFVg,304
|
|
56
55
|
worker-demo/worker_demo/__init__.py,sha256=2BPomV8ZMNf3YXJgloatKeHQCE6QOkwmsHGkO6MkQuM,125
|
|
57
56
|
worker-demo/worker_demo/worker.py,sha256=Rt-DjWa5iBP08k58NDZMfeyPuFbtNcbX6nc5jFX7GNo,440
|
|
58
|
-
arkindex_base_worker-0.5.
|
|
59
|
-
arkindex_base_worker-0.5.
|
|
60
|
-
arkindex_base_worker-0.5.
|
|
61
|
-
arkindex_base_worker-0.5.
|
|
57
|
+
arkindex_base_worker-0.5.0rc1.dist-info/METADATA,sha256=dwzUo0A4B3Rn1oA9i43q2qwSyeF3g2bPrNtyIPv2Mwk,3137
|
|
58
|
+
arkindex_base_worker-0.5.0rc1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
59
|
+
arkindex_base_worker-0.5.0rc1.dist-info/top_level.txt,sha256=-vNjP2VfROx0j83mdi9aIqRZ88eoJjxeWz-R_gPgyXU,49
|
|
60
|
+
arkindex_base_worker-0.5.0rc1.dist-info/RECORD,,
|
arkindex_worker/cache.py
CHANGED
|
@@ -206,23 +206,6 @@ class CachedClassification(Model):
|
|
|
206
206
|
table_name = "classifications"
|
|
207
207
|
|
|
208
208
|
|
|
209
|
-
class CachedEntity(Model):
|
|
210
|
-
"""
|
|
211
|
-
Cache entity table
|
|
212
|
-
"""
|
|
213
|
-
|
|
214
|
-
id = UUIDField(primary_key=True)
|
|
215
|
-
type = CharField(max_length=50)
|
|
216
|
-
name = TextField()
|
|
217
|
-
validated = BooleanField(default=False)
|
|
218
|
-
metas = JSONField(null=True)
|
|
219
|
-
worker_run_id = UUIDField(null=True)
|
|
220
|
-
|
|
221
|
-
class Meta:
|
|
222
|
-
database = db
|
|
223
|
-
table_name = "entities"
|
|
224
|
-
|
|
225
|
-
|
|
226
209
|
class CachedTranscriptionEntity(Model):
|
|
227
210
|
"""
|
|
228
211
|
Cache transcription entity table
|
|
@@ -231,14 +214,14 @@ class CachedTranscriptionEntity(Model):
|
|
|
231
214
|
transcription = ForeignKeyField(
|
|
232
215
|
CachedTranscription, backref="transcription_entities"
|
|
233
216
|
)
|
|
234
|
-
|
|
217
|
+
type = CharField(max_length=50)
|
|
235
218
|
offset = IntegerField(constraints=[Check("offset >= 0")])
|
|
236
219
|
length = IntegerField(constraints=[Check("length > 0")])
|
|
237
220
|
worker_run_id = UUIDField(null=True)
|
|
238
221
|
confidence = FloatField(null=True)
|
|
239
222
|
|
|
240
223
|
class Meta:
|
|
241
|
-
primary_key = CompositeKey("transcription", "
|
|
224
|
+
primary_key = CompositeKey("transcription", "type")
|
|
242
225
|
database = db
|
|
243
226
|
table_name = "transcription_entities"
|
|
244
227
|
|
|
@@ -272,12 +255,11 @@ MODELS = [
|
|
|
272
255
|
CachedElement,
|
|
273
256
|
CachedTranscription,
|
|
274
257
|
CachedClassification,
|
|
275
|
-
CachedEntity,
|
|
276
258
|
CachedTranscriptionEntity,
|
|
277
259
|
CachedDataset,
|
|
278
260
|
CachedDatasetElement,
|
|
279
261
|
]
|
|
280
|
-
SQL_VERSION =
|
|
262
|
+
SQL_VERSION = 4
|
|
281
263
|
|
|
282
264
|
|
|
283
265
|
def init_cache_db(path: Path):
|
|
@@ -365,7 +347,6 @@ def merge_parents_cache(paths: list, current_database: Path):
|
|
|
365
347
|
f"REPLACE INTO elements SELECT * FROM source_{idx}.elements;",
|
|
366
348
|
f"REPLACE INTO transcriptions SELECT * FROM source_{idx}.transcriptions;",
|
|
367
349
|
f"REPLACE INTO classifications SELECT * FROM source_{idx}.classifications;",
|
|
368
|
-
f"REPLACE INTO entities SELECT * FROM source_{idx}.entities;",
|
|
369
350
|
f"REPLACE INTO transcription_entities SELECT * FROM source_{idx}.transcription_entities;",
|
|
370
351
|
f"REPLACE INTO datasets SELECT * FROM source_{idx}.datasets;",
|
|
371
352
|
f"REPLACE INTO dataset_elements SELECT * FROM source_{idx}.dataset_elements;",
|
arkindex_worker/worker/entity.py
CHANGED
|
@@ -11,16 +11,14 @@ from peewee import IntegrityError
|
|
|
11
11
|
from arkindex.exceptions import ErrorResponse
|
|
12
12
|
from arkindex_worker import logger
|
|
13
13
|
from arkindex_worker.cache import (
|
|
14
|
-
CachedEntity,
|
|
15
14
|
CachedTranscriptionEntity,
|
|
16
15
|
unsupported_cache,
|
|
17
16
|
)
|
|
18
|
-
from arkindex_worker.models import
|
|
17
|
+
from arkindex_worker.models import Transcription
|
|
19
18
|
from arkindex_worker.utils import pluralize
|
|
20
19
|
|
|
21
20
|
|
|
22
21
|
class Entity(TypedDict):
|
|
23
|
-
name: str
|
|
24
22
|
type_id: str
|
|
25
23
|
length: int
|
|
26
24
|
offset: int
|
|
@@ -126,88 +124,20 @@ class EntityMixin:
|
|
|
126
124
|
# Create the type if non-existent
|
|
127
125
|
self.create_entity_type(entity_type)
|
|
128
126
|
|
|
129
|
-
def create_entity(
|
|
130
|
-
self,
|
|
131
|
-
name: str,
|
|
132
|
-
type: str,
|
|
133
|
-
metas=None,
|
|
134
|
-
validated=None,
|
|
135
|
-
):
|
|
136
|
-
"""
|
|
137
|
-
Create an entity on the given corpus.
|
|
138
|
-
If cache support is enabled, a [CachedEntity][arkindex_worker.cache.CachedEntity] will also be created.
|
|
139
|
-
|
|
140
|
-
:param name: Name of the entity.
|
|
141
|
-
:param type: Type of the entity.
|
|
142
|
-
"""
|
|
143
|
-
assert name and isinstance(name, str), (
|
|
144
|
-
"name shouldn't be null and should be of type str"
|
|
145
|
-
)
|
|
146
|
-
assert type and isinstance(type, str), (
|
|
147
|
-
"type shouldn't be null and should be of type str"
|
|
148
|
-
)
|
|
149
|
-
metas = metas or {}
|
|
150
|
-
if metas:
|
|
151
|
-
assert isinstance(metas, dict), "metas should be of type dict"
|
|
152
|
-
if validated is not None:
|
|
153
|
-
assert isinstance(validated, bool), "validated should be of type bool"
|
|
154
|
-
if self.is_read_only:
|
|
155
|
-
logger.warning("Cannot create entity as this worker is in read-only mode")
|
|
156
|
-
return
|
|
157
|
-
|
|
158
|
-
# Retrieve entity_type ID
|
|
159
|
-
if not self.entity_types:
|
|
160
|
-
# Load entity_types of corpus
|
|
161
|
-
self.list_corpus_entity_types()
|
|
162
|
-
|
|
163
|
-
entity_type_id = self.entity_types.get(type)
|
|
164
|
-
assert entity_type_id, f"Entity type `{type}` not found in the corpus."
|
|
165
|
-
|
|
166
|
-
entity = self.api_client.request(
|
|
167
|
-
"CreateEntity",
|
|
168
|
-
body={
|
|
169
|
-
"name": name,
|
|
170
|
-
"type_id": entity_type_id,
|
|
171
|
-
"metas": metas,
|
|
172
|
-
"validated": validated,
|
|
173
|
-
"corpus": self.corpus_id,
|
|
174
|
-
"worker_run_id": self.worker_run_id,
|
|
175
|
-
},
|
|
176
|
-
)
|
|
177
|
-
|
|
178
|
-
if self.use_cache:
|
|
179
|
-
# Store entity in local cache
|
|
180
|
-
try:
|
|
181
|
-
to_insert = [
|
|
182
|
-
{
|
|
183
|
-
"id": entity["id"],
|
|
184
|
-
"type": type,
|
|
185
|
-
"name": name,
|
|
186
|
-
"validated": validated if validated is not None else False,
|
|
187
|
-
"metas": metas,
|
|
188
|
-
"worker_run_id": self.worker_run_id,
|
|
189
|
-
}
|
|
190
|
-
]
|
|
191
|
-
CachedEntity.insert_many(to_insert).execute()
|
|
192
|
-
except IntegrityError as e:
|
|
193
|
-
logger.warning(f"Couldn't save created entity in local cache: {e}")
|
|
194
|
-
|
|
195
|
-
return entity["id"]
|
|
196
|
-
|
|
197
127
|
def create_transcription_entity(
|
|
198
128
|
self,
|
|
199
129
|
transcription: Transcription,
|
|
200
|
-
|
|
130
|
+
type_id: str,
|
|
201
131
|
offset: int,
|
|
202
132
|
length: int,
|
|
203
133
|
confidence: float | None = None,
|
|
204
134
|
) -> dict[str, str | int] | None:
|
|
205
135
|
"""
|
|
206
|
-
Create
|
|
136
|
+
Create an entity on an existing transcription.
|
|
207
137
|
If cache support is enabled, a `CachedTranscriptionEntity` will also be created.
|
|
208
138
|
|
|
209
139
|
:param transcription: Transcription to create the entity on.
|
|
210
|
-
:param
|
|
140
|
+
:param type_id: UUID of the entity type.
|
|
211
141
|
:param offset: Starting position of the entity in the transcription's text,
|
|
212
142
|
as a 0-based index.
|
|
213
143
|
:param length: Length of the entity in the transcription's text.
|
|
@@ -218,8 +148,8 @@ class EntityMixin:
|
|
|
218
148
|
assert transcription and isinstance(transcription, Transcription), (
|
|
219
149
|
"transcription shouldn't be null and should be a Transcription"
|
|
220
150
|
)
|
|
221
|
-
assert
|
|
222
|
-
"
|
|
151
|
+
assert type_id and isinstance(type_id, str), (
|
|
152
|
+
"type_id shouldn't be null and should be of type str"
|
|
223
153
|
)
|
|
224
154
|
assert offset is not None and isinstance(offset, int) and offset >= 0, (
|
|
225
155
|
"offset shouldn't be null and should be a positive integer"
|
|
@@ -237,7 +167,7 @@ class EntityMixin:
|
|
|
237
167
|
return
|
|
238
168
|
|
|
239
169
|
body = {
|
|
240
|
-
"
|
|
170
|
+
"type_id": type_id,
|
|
241
171
|
"length": length,
|
|
242
172
|
"offset": offset,
|
|
243
173
|
"worker_run_id": self.worker_run_id,
|
|
@@ -245,7 +175,7 @@ class EntityMixin:
|
|
|
245
175
|
if confidence is not None:
|
|
246
176
|
body["confidence"] = confidence
|
|
247
177
|
|
|
248
|
-
|
|
178
|
+
tr_entity = self.api_client.request(
|
|
249
179
|
"CreateTranscriptionEntity",
|
|
250
180
|
id=transcription.id,
|
|
251
181
|
body=body,
|
|
@@ -256,7 +186,7 @@ class EntityMixin:
|
|
|
256
186
|
try:
|
|
257
187
|
CachedTranscriptionEntity.create(
|
|
258
188
|
transcription=transcription.id,
|
|
259
|
-
|
|
189
|
+
type=tr_entity["type"]["name"],
|
|
260
190
|
offset=offset,
|
|
261
191
|
length=length,
|
|
262
192
|
worker_run_id=self.worker_run_id,
|
|
@@ -267,7 +197,7 @@ class EntityMixin:
|
|
|
267
197
|
f"Couldn't save created transcription entity in local cache: {e}"
|
|
268
198
|
)
|
|
269
199
|
|
|
270
|
-
return
|
|
200
|
+
return tr_entity
|
|
271
201
|
|
|
272
202
|
@unsupported_cache
|
|
273
203
|
def create_transcription_entities(
|
|
@@ -276,14 +206,11 @@ class EntityMixin:
|
|
|
276
206
|
entities: list[Entity],
|
|
277
207
|
) -> list[dict[str, str]]:
|
|
278
208
|
"""
|
|
279
|
-
Create multiple entities
|
|
209
|
+
Create multiple entities on a transcription in a single API request.
|
|
280
210
|
|
|
281
211
|
:param transcription: Transcription to create the entity on.
|
|
282
212
|
:param entities: List of dicts, one per element. Each dict can have the following keys:
|
|
283
213
|
|
|
284
|
-
name (str)
|
|
285
|
-
Required. Name of the entity.
|
|
286
|
-
|
|
287
214
|
type_id (str)
|
|
288
215
|
Required. ID of the EntityType of the entity.
|
|
289
216
|
|
|
@@ -296,7 +223,7 @@ class EntityMixin:
|
|
|
296
223
|
confidence (float or None)
|
|
297
224
|
Optional confidence score, between 0.0 and 1.0.
|
|
298
225
|
|
|
299
|
-
:return: List of
|
|
226
|
+
:return: List of strings, holding the UUID of each created object.
|
|
300
227
|
"""
|
|
301
228
|
assert transcription and isinstance(transcription, Transcription), (
|
|
302
229
|
"transcription shouldn't be null and should be of type Transcription"
|
|
@@ -311,11 +238,6 @@ class EntityMixin:
|
|
|
311
238
|
f"Entity at index {index} in entities: Should be of type dict"
|
|
312
239
|
)
|
|
313
240
|
|
|
314
|
-
name = entity.get("name")
|
|
315
|
-
assert name and isinstance(name, str), (
|
|
316
|
-
f"Entity at index {index} in entities: name shouldn't be null and should be of type str"
|
|
317
|
-
)
|
|
318
|
-
|
|
319
241
|
type_id = entity.get("type_id")
|
|
320
242
|
assert type_id and isinstance(type_id, str), (
|
|
321
243
|
f"Entity at index {index} in entities: type_id shouldn't be null and should be of type str"
|
|
@@ -339,7 +261,7 @@ class EntityMixin:
|
|
|
339
261
|
)
|
|
340
262
|
|
|
341
263
|
assert len(entities) == len(
|
|
342
|
-
set(map(itemgetter("offset", "length", "
|
|
264
|
+
set(map(itemgetter("offset", "length", "type_id"), entities))
|
|
343
265
|
), "entities should be unique"
|
|
344
266
|
|
|
345
267
|
if self.is_read_only:
|
|
@@ -348,16 +270,16 @@ class EntityMixin:
|
|
|
348
270
|
)
|
|
349
271
|
return
|
|
350
272
|
|
|
351
|
-
|
|
273
|
+
created_tr_entities = self.api_client.request(
|
|
352
274
|
"CreateTranscriptionEntities",
|
|
353
275
|
id=transcription.id,
|
|
354
276
|
body={
|
|
355
277
|
"worker_run_id": self.worker_run_id,
|
|
356
|
-
"
|
|
278
|
+
"transcription_entities": entities,
|
|
357
279
|
},
|
|
358
|
-
)["
|
|
280
|
+
)["transcription_entities"]
|
|
359
281
|
|
|
360
|
-
return
|
|
282
|
+
return created_tr_entities
|
|
361
283
|
|
|
362
284
|
def list_transcription_entities(
|
|
363
285
|
self,
|
|
@@ -412,34 +334,3 @@ class EntityMixin:
|
|
|
412
334
|
return self.api_client.paginate(
|
|
413
335
|
"ListTranscriptionEntities", id=transcription.id, **query_params
|
|
414
336
|
)
|
|
415
|
-
|
|
416
|
-
def list_corpus_entities(
|
|
417
|
-
self,
|
|
418
|
-
name: str | None = None,
|
|
419
|
-
parent: Element | None = None,
|
|
420
|
-
):
|
|
421
|
-
"""
|
|
422
|
-
List all entities in the worker's corpus and store them in the ``self.entities`` cache.
|
|
423
|
-
:param name: Filter entities by part of their name (case-insensitive)
|
|
424
|
-
:param parent: Restrict entities to those linked to all transcriptions of an element and all its descendants. Note that links to metadata are ignored.
|
|
425
|
-
"""
|
|
426
|
-
query_params = {}
|
|
427
|
-
|
|
428
|
-
if name is not None:
|
|
429
|
-
assert name and isinstance(name, str), "name should be of type str"
|
|
430
|
-
query_params["name"] = name
|
|
431
|
-
|
|
432
|
-
if parent is not None:
|
|
433
|
-
assert isinstance(parent, Element), "parent should be of type Element"
|
|
434
|
-
query_params["parent"] = parent.id
|
|
435
|
-
|
|
436
|
-
self.entities = {
|
|
437
|
-
entity["id"]: entity
|
|
438
|
-
for entity in self.api_client.paginate(
|
|
439
|
-
"ListCorpusEntities", id=self.corpus_id, **query_params
|
|
440
|
-
)
|
|
441
|
-
}
|
|
442
|
-
count = len(self.entities)
|
|
443
|
-
logger.info(
|
|
444
|
-
f"Loaded {count} {pluralize('entity', count)} in corpus ({self.corpus_id})"
|
|
445
|
-
)
|
|
@@ -64,7 +64,6 @@ class MetaDataMixin:
|
|
|
64
64
|
type: MetaType,
|
|
65
65
|
name: str,
|
|
66
66
|
value: str,
|
|
67
|
-
entity: str | None = None,
|
|
68
67
|
) -> str:
|
|
69
68
|
"""
|
|
70
69
|
Create a metadata on the given element through API.
|
|
@@ -73,7 +72,6 @@ class MetaDataMixin:
|
|
|
73
72
|
:param type: Type of the metadata.
|
|
74
73
|
:param name: Name of the metadata.
|
|
75
74
|
:param value: Value of the metadata.
|
|
76
|
-
:param entity: UUID of an entity this metadata is related to.
|
|
77
75
|
:returns: UUID of the created metadata.
|
|
78
76
|
"""
|
|
79
77
|
assert element and isinstance(element, Element | CachedElement), (
|
|
@@ -88,8 +86,6 @@ class MetaDataMixin:
|
|
|
88
86
|
assert value and isinstance(value, str), (
|
|
89
87
|
"value shouldn't be null and should be of type str"
|
|
90
88
|
)
|
|
91
|
-
if entity:
|
|
92
|
-
assert isinstance(entity, str), "entity should be of type str"
|
|
93
89
|
if self.is_read_only:
|
|
94
90
|
logger.warning("Cannot create metadata as this worker is in read-only mode")
|
|
95
91
|
return
|
|
@@ -101,7 +97,6 @@ class MetaDataMixin:
|
|
|
101
97
|
"type": type.value,
|
|
102
98
|
"name": name,
|
|
103
99
|
"value": value,
|
|
104
|
-
"entity_id": entity,
|
|
105
100
|
"worker_run_id": self.worker_run_id,
|
|
106
101
|
},
|
|
107
102
|
)
|
|
@@ -125,7 +120,6 @@ class MetaDataMixin:
|
|
|
125
120
|
- type: MetaType
|
|
126
121
|
- name: str
|
|
127
122
|
- value: str | int | float
|
|
128
|
-
- entity_id: str | None
|
|
129
123
|
:param batch_size: The size of each batch, which will be used to split the publication to avoid API errors.
|
|
130
124
|
|
|
131
125
|
:returns: A list of dicts as returned in the ``metadata_list`` field by the ``CreateMetaDataBulk`` API endpoint.
|
|
@@ -157,16 +151,11 @@ class MetaDataMixin:
|
|
|
157
151
|
metadata.get("value"), str | float | int
|
|
158
152
|
), "value shouldn't be null and should be of type (str or float or int)"
|
|
159
153
|
|
|
160
|
-
assert metadata.get("entity_id") is None or isinstance(
|
|
161
|
-
metadata.get("entity_id"), str
|
|
162
|
-
), "entity_id should be None or a str"
|
|
163
|
-
|
|
164
154
|
metas.append(
|
|
165
155
|
{
|
|
166
156
|
"type": metadata.get("type").value,
|
|
167
157
|
"name": metadata.get("name"),
|
|
168
158
|
"value": metadata.get("value"),
|
|
169
|
-
"entity_id": metadata.get("entity_id"),
|
|
170
159
|
}
|
|
171
160
|
)
|
|
172
161
|
|
tests/test_cache.py
CHANGED
|
@@ -60,9 +60,8 @@ def test_create_tables(tmp_path):
|
|
|
60
60
|
CREATE TABLE "dataset_elements" ("id" TEXT NOT NULL PRIMARY KEY, "element_id" TEXT NOT NULL, "dataset_id" TEXT NOT NULL, "set_name" VARCHAR(255) NOT NULL, FOREIGN KEY ("element_id") REFERENCES "elements" ("id"), FOREIGN KEY ("dataset_id") REFERENCES "datasets" ("id"))
|
|
61
61
|
CREATE TABLE "datasets" ("id" TEXT NOT NULL PRIMARY KEY, "name" VARCHAR(255) NOT NULL, "state" VARCHAR(255) NOT NULL DEFAULT 'open', "sets" TEXT NOT NULL)
|
|
62
62
|
CREATE TABLE "elements" ("id" TEXT NOT NULL PRIMARY KEY, "parent_id" TEXT, "type" VARCHAR(50) NOT NULL, "image_id" TEXT, "polygon" text, "rotation_angle" INTEGER NOT NULL, "mirrored" INTEGER NOT NULL, "initial" INTEGER NOT NULL, "worker_version_id" TEXT, "worker_run_id" TEXT, "confidence" REAL, FOREIGN KEY ("image_id") REFERENCES "images" ("id"))
|
|
63
|
-
CREATE TABLE "entities" ("id" TEXT NOT NULL PRIMARY KEY, "type" VARCHAR(50) NOT NULL, "name" TEXT NOT NULL, "validated" INTEGER NOT NULL, "metas" text, "worker_run_id" TEXT)
|
|
64
63
|
CREATE TABLE "images" ("id" TEXT NOT NULL PRIMARY KEY, "width" INTEGER NOT NULL, "height" INTEGER NOT NULL, "url" TEXT NOT NULL)
|
|
65
|
-
CREATE TABLE "transcription_entities" ("transcription_id" TEXT NOT NULL, "
|
|
64
|
+
CREATE TABLE "transcription_entities" ("transcription_id" TEXT NOT NULL, "type" VARCHAR(50) NOT NULL, "offset" INTEGER NOT NULL CHECK (offset >= 0), "length" INTEGER NOT NULL CHECK (length > 0), "worker_run_id" TEXT, "confidence" REAL, PRIMARY KEY ("transcription_id", "type"), FOREIGN KEY ("transcription_id") REFERENCES "transcriptions" ("id"))
|
|
66
65
|
CREATE TABLE "transcriptions" ("id" TEXT NOT NULL PRIMARY KEY, "element_id" TEXT NOT NULL, "text" TEXT NOT NULL, "confidence" REAL, "orientation" VARCHAR(50) NOT NULL, "worker_version_id" TEXT, "worker_run_id" TEXT, FOREIGN KEY ("element_id") REFERENCES "elements" ("id"))"""
|
|
67
66
|
|
|
68
67
|
actual_schema = "\n".join(
|