arkindex-base-worker 0.5.0a1__py3-none-any.whl → 0.5.0a3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arkindex_base_worker-0.5.0a1.dist-info → arkindex_base_worker-0.5.0a3.dist-info}/METADATA +2 -2
- {arkindex_base_worker-0.5.0a1.dist-info → arkindex_base_worker-0.5.0a3.dist-info}/RECORD +21 -21
- arkindex_worker/__init__.py +3 -0
- arkindex_worker/cache.py +3 -3
- arkindex_worker/image.py +31 -24
- arkindex_worker/worker/__init__.py +17 -17
- arkindex_worker/worker/base.py +6 -6
- arkindex_worker/worker/classification.py +34 -32
- arkindex_worker/worker/corpus.py +3 -3
- arkindex_worker/worker/dataset.py +9 -9
- arkindex_worker/worker/element.py +193 -189
- arkindex_worker/worker/entity.py +61 -60
- arkindex_worker/worker/image.py +3 -3
- arkindex_worker/worker/metadata.py +27 -27
- arkindex_worker/worker/task.py +9 -9
- arkindex_worker/worker/training.py +15 -11
- arkindex_worker/worker/transcription.py +77 -71
- tests/test_elements_worker/test_training.py +6 -6
- {arkindex_base_worker-0.5.0a1.dist-info → arkindex_base_worker-0.5.0a3.dist-info}/LICENSE +0 -0
- {arkindex_base_worker-0.5.0a1.dist-info → arkindex_base_worker-0.5.0a3.dist-info}/WHEEL +0 -0
- {arkindex_base_worker-0.5.0a1.dist-info → arkindex_base_worker-0.5.0a3.dist-info}/top_level.txt +0 -0
arkindex_worker/worker/entity.py
CHANGED
|
@@ -85,12 +85,12 @@ class EntityMixin:
|
|
|
85
85
|
:param name: Name of the entity.
|
|
86
86
|
:param type: Type of the entity.
|
|
87
87
|
"""
|
|
88
|
-
assert name and isinstance(
|
|
89
|
-
name
|
|
90
|
-
)
|
|
91
|
-
assert type and isinstance(
|
|
92
|
-
type
|
|
93
|
-
)
|
|
88
|
+
assert name and isinstance(name, str), (
|
|
89
|
+
"name shouldn't be null and should be of type str"
|
|
90
|
+
)
|
|
91
|
+
assert type and isinstance(type, str), (
|
|
92
|
+
"type shouldn't be null and should be of type str"
|
|
93
|
+
)
|
|
94
94
|
metas = metas or {}
|
|
95
95
|
if metas:
|
|
96
96
|
assert isinstance(metas, dict), "metas should be of type dict"
|
|
@@ -160,18 +160,18 @@ class EntityMixin:
|
|
|
160
160
|
:returns: A dict as returned by the ``CreateTranscriptionEntity`` API endpoint,
|
|
161
161
|
or None if the worker is in read-only mode.
|
|
162
162
|
"""
|
|
163
|
-
assert transcription and isinstance(
|
|
164
|
-
transcription
|
|
165
|
-
)
|
|
166
|
-
assert entity and isinstance(
|
|
167
|
-
entity
|
|
168
|
-
)
|
|
169
|
-
assert (
|
|
170
|
-
offset
|
|
171
|
-
)
|
|
172
|
-
assert (
|
|
173
|
-
length
|
|
174
|
-
)
|
|
163
|
+
assert transcription and isinstance(transcription, Transcription), (
|
|
164
|
+
"transcription shouldn't be null and should be a Transcription"
|
|
165
|
+
)
|
|
166
|
+
assert entity and isinstance(entity, str), (
|
|
167
|
+
"entity shouldn't be null and should be of type str"
|
|
168
|
+
)
|
|
169
|
+
assert offset is not None and isinstance(offset, int) and offset >= 0, (
|
|
170
|
+
"offset shouldn't be null and should be a positive integer"
|
|
171
|
+
)
|
|
172
|
+
assert length is not None and isinstance(length, int) and length > 0, (
|
|
173
|
+
"length shouldn't be null and should be a strictly positive integer"
|
|
174
|
+
)
|
|
175
175
|
assert (
|
|
176
176
|
confidence is None or isinstance(confidence, float) and 0 <= confidence <= 1
|
|
177
177
|
), "confidence should be null or a float in [0..1] range"
|
|
@@ -242,44 +242,45 @@ class EntityMixin:
|
|
|
242
242
|
|
|
243
243
|
:return: List of dicts, with each dict having a two keys, `transcription_entity_id` and `entity_id`, holding the UUID of each created object.
|
|
244
244
|
"""
|
|
245
|
-
assert transcription and isinstance(
|
|
246
|
-
transcription
|
|
247
|
-
)
|
|
245
|
+
assert transcription and isinstance(transcription, Transcription), (
|
|
246
|
+
"transcription shouldn't be null and should be of type Transcription"
|
|
247
|
+
)
|
|
248
248
|
|
|
249
|
-
assert entities and isinstance(
|
|
250
|
-
entities
|
|
251
|
-
)
|
|
249
|
+
assert entities and isinstance(entities, list), (
|
|
250
|
+
"entities shouldn't be null and should be of type list"
|
|
251
|
+
)
|
|
252
252
|
|
|
253
253
|
for index, entity in enumerate(entities):
|
|
254
|
-
assert isinstance(
|
|
255
|
-
|
|
256
|
-
)
|
|
254
|
+
assert isinstance(entity, dict), (
|
|
255
|
+
f"Entity at index {index} in entities: Should be of type dict"
|
|
256
|
+
)
|
|
257
257
|
|
|
258
258
|
name = entity.get("name")
|
|
259
|
-
assert (
|
|
260
|
-
name and
|
|
261
|
-
)
|
|
259
|
+
assert name and isinstance(name, str), (
|
|
260
|
+
f"Entity at index {index} in entities: name shouldn't be null and should be of type str"
|
|
261
|
+
)
|
|
262
262
|
|
|
263
263
|
type_id = entity.get("type_id")
|
|
264
|
-
assert (
|
|
265
|
-
type_id and
|
|
266
|
-
)
|
|
264
|
+
assert type_id and isinstance(type_id, str), (
|
|
265
|
+
f"Entity at index {index} in entities: type_id shouldn't be null and should be of type str"
|
|
266
|
+
)
|
|
267
267
|
|
|
268
268
|
offset = entity.get("offset")
|
|
269
|
-
assert (
|
|
270
|
-
|
|
271
|
-
)
|
|
269
|
+
assert offset is not None and isinstance(offset, int) and offset >= 0, (
|
|
270
|
+
f"Entity at index {index} in entities: offset shouldn't be null and should be a positive integer"
|
|
271
|
+
)
|
|
272
272
|
|
|
273
273
|
length = entity.get("length")
|
|
274
|
-
assert (
|
|
275
|
-
|
|
276
|
-
)
|
|
274
|
+
assert length is not None and isinstance(length, int) and length > 0, (
|
|
275
|
+
f"Entity at index {index} in entities: length shouldn't be null and should be a strictly positive integer"
|
|
276
|
+
)
|
|
277
277
|
|
|
278
278
|
confidence = entity.get("confidence")
|
|
279
|
-
assert (
|
|
280
|
-
confidence
|
|
281
|
-
|
|
282
|
-
|
|
279
|
+
assert confidence is None or (
|
|
280
|
+
isinstance(confidence, float) and 0 <= confidence <= 1
|
|
281
|
+
), (
|
|
282
|
+
f"Entity at index {index} in entities: confidence should be None or a float in [0..1] range"
|
|
283
|
+
)
|
|
283
284
|
|
|
284
285
|
assert len(entities) == len(
|
|
285
286
|
set(map(itemgetter("offset", "length", "name", "type_id"), entities))
|
|
@@ -323,9 +324,9 @@ class EntityMixin:
|
|
|
323
324
|
:param worker_run: Restrict to entities created by a worker run with this UUID. Set to False to look for manually created entities.
|
|
324
325
|
"""
|
|
325
326
|
query_params = {}
|
|
326
|
-
assert transcription and isinstance(
|
|
327
|
-
transcription
|
|
328
|
-
)
|
|
327
|
+
assert transcription and isinstance(transcription, Transcription), (
|
|
328
|
+
"transcription shouldn't be null and should be a Transcription"
|
|
329
|
+
)
|
|
329
330
|
|
|
330
331
|
if worker_version is not None:
|
|
331
332
|
warn(
|
|
@@ -333,23 +334,23 @@ class EntityMixin:
|
|
|
333
334
|
DeprecationWarning,
|
|
334
335
|
stacklevel=1,
|
|
335
336
|
)
|
|
336
|
-
assert isinstance(
|
|
337
|
-
worker_version
|
|
338
|
-
)
|
|
337
|
+
assert isinstance(worker_version, str | bool), (
|
|
338
|
+
"worker_version should be of type str or bool"
|
|
339
|
+
)
|
|
339
340
|
|
|
340
341
|
if isinstance(worker_version, bool):
|
|
341
|
-
assert (
|
|
342
|
-
worker_version
|
|
343
|
-
)
|
|
342
|
+
assert worker_version is False, (
|
|
343
|
+
"if of type bool, worker_version can only be set to False"
|
|
344
|
+
)
|
|
344
345
|
query_params["worker_version"] = worker_version
|
|
345
346
|
if worker_run is not None:
|
|
346
|
-
assert isinstance(
|
|
347
|
-
worker_run
|
|
348
|
-
)
|
|
347
|
+
assert isinstance(worker_run, str | bool), (
|
|
348
|
+
"worker_run should be of type str or bool"
|
|
349
|
+
)
|
|
349
350
|
if isinstance(worker_run, bool):
|
|
350
|
-
assert (
|
|
351
|
-
worker_run
|
|
352
|
-
)
|
|
351
|
+
assert worker_run is False, (
|
|
352
|
+
"if of type bool, worker_run can only be set to False"
|
|
353
|
+
)
|
|
353
354
|
query_params["worker_run"] = worker_run
|
|
354
355
|
|
|
355
356
|
return self.api_client.paginate(
|
|
@@ -384,7 +385,7 @@ class EntityMixin:
|
|
|
384
385
|
}
|
|
385
386
|
count = len(self.entities)
|
|
386
387
|
logger.info(
|
|
387
|
-
f
|
|
388
|
+
f"Loaded {count} {pluralize('entity', count)} in corpus ({self.corpus_id})"
|
|
388
389
|
)
|
|
389
390
|
|
|
390
391
|
def list_corpus_entity_types(self):
|
|
@@ -399,5 +400,5 @@ class EntityMixin:
|
|
|
399
400
|
}
|
|
400
401
|
count = len(self.entity_types)
|
|
401
402
|
logger.info(
|
|
402
|
-
f
|
|
403
|
+
f"Loaded {count} entity {pluralize('type', count)} in corpus ({self.corpus_id})."
|
|
403
404
|
)
|
arkindex_worker/worker/image.py
CHANGED
|
@@ -14,8 +14,8 @@ class ImageMixin:
|
|
|
14
14
|
:param url: URL of the image.
|
|
15
15
|
:returns: The created image.
|
|
16
16
|
"""
|
|
17
|
-
assert url and isinstance(
|
|
18
|
-
url
|
|
19
|
-
)
|
|
17
|
+
assert url and isinstance(url, str), (
|
|
18
|
+
"url shouldn't be null and should be of type str"
|
|
19
|
+
)
|
|
20
20
|
|
|
21
21
|
return Image(self.api_client.request("CreateIIIFURL", body={"url": url}))
|
|
@@ -76,18 +76,18 @@ class MetaDataMixin:
|
|
|
76
76
|
:param entity: UUID of an entity this metadata is related to.
|
|
77
77
|
:returns: UUID of the created metadata.
|
|
78
78
|
"""
|
|
79
|
-
assert element and isinstance(
|
|
80
|
-
element
|
|
81
|
-
)
|
|
82
|
-
assert type and isinstance(
|
|
83
|
-
type
|
|
84
|
-
)
|
|
85
|
-
assert name and isinstance(
|
|
86
|
-
name
|
|
87
|
-
)
|
|
88
|
-
assert value and isinstance(
|
|
89
|
-
value
|
|
90
|
-
)
|
|
79
|
+
assert element and isinstance(element, Element | CachedElement), (
|
|
80
|
+
"element shouldn't be null and should be of type Element or CachedElement"
|
|
81
|
+
)
|
|
82
|
+
assert type and isinstance(type, MetaType), (
|
|
83
|
+
"type shouldn't be null and should be of type MetaType"
|
|
84
|
+
)
|
|
85
|
+
assert name and isinstance(name, str), (
|
|
86
|
+
"name shouldn't be null and should be of type str"
|
|
87
|
+
)
|
|
88
|
+
assert value and isinstance(value, str), (
|
|
89
|
+
"value shouldn't be null and should be of type str"
|
|
90
|
+
)
|
|
91
91
|
if entity:
|
|
92
92
|
assert isinstance(entity, str), "entity should be of type str"
|
|
93
93
|
if self.is_read_only:
|
|
@@ -130,28 +130,28 @@ class MetaDataMixin:
|
|
|
130
130
|
|
|
131
131
|
:returns: A list of dicts as returned in the ``metadata_list`` field by the ``CreateMetaDataBulk`` API endpoint.
|
|
132
132
|
"""
|
|
133
|
-
assert element and isinstance(
|
|
134
|
-
element
|
|
135
|
-
)
|
|
133
|
+
assert element and isinstance(element, Element | CachedElement), (
|
|
134
|
+
"element shouldn't be null and should be of type Element or CachedElement"
|
|
135
|
+
)
|
|
136
136
|
|
|
137
|
-
assert metadata_list and isinstance(
|
|
138
|
-
metadata_list
|
|
139
|
-
)
|
|
137
|
+
assert metadata_list and isinstance(metadata_list, list), (
|
|
138
|
+
"metadata_list shouldn't be null and should be of type list of dict"
|
|
139
|
+
)
|
|
140
140
|
|
|
141
141
|
# Make a copy to avoid modifying the metadata_list argument
|
|
142
142
|
metas = []
|
|
143
143
|
for index, metadata in enumerate(metadata_list):
|
|
144
|
-
assert isinstance(
|
|
145
|
-
|
|
146
|
-
)
|
|
144
|
+
assert isinstance(metadata, dict), (
|
|
145
|
+
f"Element at index {index} in metadata_list: Should be of type dict"
|
|
146
|
+
)
|
|
147
147
|
|
|
148
148
|
assert metadata.get("type") and isinstance(
|
|
149
149
|
metadata.get("type"), MetaType
|
|
150
150
|
), "type shouldn't be null and should be of type MetaType"
|
|
151
151
|
|
|
152
|
-
assert metadata.get("name") and isinstance(
|
|
153
|
-
|
|
154
|
-
)
|
|
152
|
+
assert metadata.get("name") and isinstance(metadata.get("name"), str), (
|
|
153
|
+
"name shouldn't be null and should be of type str"
|
|
154
|
+
)
|
|
155
155
|
|
|
156
156
|
assert metadata.get("value") is not None and isinstance(
|
|
157
157
|
metadata.get("value"), str | float | int
|
|
@@ -199,9 +199,9 @@ class MetaDataMixin:
|
|
|
199
199
|
:param element: The element to list metadata on.
|
|
200
200
|
:param load_parents: Also include all metadata from the element's parents in the response.
|
|
201
201
|
"""
|
|
202
|
-
assert element and isinstance(
|
|
203
|
-
element
|
|
204
|
-
)
|
|
202
|
+
assert element and isinstance(element, Element | CachedElement), (
|
|
203
|
+
"element shouldn't be null and should be of type Element or CachedElement"
|
|
204
|
+
)
|
|
205
205
|
|
|
206
206
|
query_params = {}
|
|
207
207
|
if load_parents is not None:
|
arkindex_worker/worker/task.py
CHANGED
|
@@ -17,9 +17,9 @@ class TaskMixin:
|
|
|
17
17
|
:param task_id: Task ID to find artifacts from.
|
|
18
18
|
:returns: An iterator of ``Artifact`` objects built from the ``ListArtifacts`` API endpoint.
|
|
19
19
|
"""
|
|
20
|
-
assert task_id and isinstance(
|
|
21
|
-
task_id
|
|
22
|
-
)
|
|
20
|
+
assert task_id and isinstance(task_id, uuid.UUID), (
|
|
21
|
+
"task_id shouldn't be null and should be an UUID"
|
|
22
|
+
)
|
|
23
23
|
|
|
24
24
|
results = self.api_client.request("ListArtifacts", id=task_id)
|
|
25
25
|
|
|
@@ -35,12 +35,12 @@ class TaskMixin:
|
|
|
35
35
|
:param artifact: Artifact to download content from.
|
|
36
36
|
:returns: A temporary file containing the ``Artifact`` downloaded from the ``DownloadArtifact`` API endpoint.
|
|
37
37
|
"""
|
|
38
|
-
assert task_id and isinstance(
|
|
39
|
-
task_id
|
|
40
|
-
)
|
|
41
|
-
assert artifact and isinstance(
|
|
42
|
-
artifact
|
|
43
|
-
)
|
|
38
|
+
assert task_id and isinstance(task_id, uuid.UUID), (
|
|
39
|
+
"task_id shouldn't be null and should be an UUID"
|
|
40
|
+
)
|
|
41
|
+
assert artifact and isinstance(artifact, Artifact), (
|
|
42
|
+
"artifact shouldn't be null and should be an Artifact"
|
|
43
|
+
)
|
|
44
44
|
|
|
45
45
|
return self.api_client.request(
|
|
46
46
|
"DownloadArtifact", id=task_id, path=artifact.path
|
|
@@ -122,9 +122,9 @@ class TrainingMixin:
|
|
|
122
122
|
)
|
|
123
123
|
|
|
124
124
|
elif tag or description or configuration or parent:
|
|
125
|
-
assert (
|
|
126
|
-
|
|
127
|
-
)
|
|
125
|
+
assert self.model_version.get("model_id") == model_id, (
|
|
126
|
+
"Given `model_id` does not match the current model version"
|
|
127
|
+
)
|
|
128
128
|
# If any attribute field has been defined, PATCH the current model version
|
|
129
129
|
self.update_model_version(
|
|
130
130
|
tag=tag,
|
|
@@ -237,15 +237,17 @@ class TrainingMixin:
|
|
|
237
237
|
Upload the archive of the model's files to an Amazon s3 compatible storage
|
|
238
238
|
"""
|
|
239
239
|
|
|
240
|
-
assert (
|
|
241
|
-
|
|
242
|
-
)
|
|
243
|
-
assert (
|
|
244
|
-
|
|
245
|
-
)
|
|
240
|
+
assert self.model_version, (
|
|
241
|
+
"You must create the model version before uploading an archive."
|
|
242
|
+
)
|
|
243
|
+
assert self.model_version["state"] != "Available", (
|
|
244
|
+
"The model is already marked as available."
|
|
245
|
+
)
|
|
246
246
|
|
|
247
247
|
s3_put_url = self.model_version.get("s3_put_url")
|
|
248
|
-
assert s3_put_url,
|
|
248
|
+
assert s3_put_url, (
|
|
249
|
+
"S3 PUT URL is not set, please ensure you have the right to validate a model version."
|
|
250
|
+
)
|
|
249
251
|
|
|
250
252
|
logger.info("Uploading to s3...")
|
|
251
253
|
# Upload the archive on s3
|
|
@@ -271,7 +273,9 @@ class TrainingMixin:
|
|
|
271
273
|
:param size: The size of the uploaded archive
|
|
272
274
|
:param archive_hash: MD5 hash of the uploaded archive
|
|
273
275
|
"""
|
|
274
|
-
assert self.model_version,
|
|
276
|
+
assert self.model_version, (
|
|
277
|
+
"You must create the model version and upload its archive before validating it."
|
|
278
|
+
)
|
|
275
279
|
try:
|
|
276
280
|
self.model_version = self.api_client.request(
|
|
277
281
|
"PartialUpdateModelVersion",
|
|
@@ -59,18 +59,18 @@ class TranscriptionMixin:
|
|
|
59
59
|
:returns: A dict as returned by the ``CreateTranscription`` API endpoint,
|
|
60
60
|
or None if the worker is in read-only mode.
|
|
61
61
|
"""
|
|
62
|
-
assert element and isinstance(
|
|
63
|
-
element
|
|
64
|
-
)
|
|
65
|
-
assert text and isinstance(
|
|
66
|
-
text
|
|
67
|
-
)
|
|
68
|
-
assert orientation and isinstance(
|
|
69
|
-
orientation
|
|
70
|
-
)
|
|
71
|
-
assert (
|
|
72
|
-
|
|
73
|
-
)
|
|
62
|
+
assert element and isinstance(element, Element | CachedElement), (
|
|
63
|
+
"element shouldn't be null and should be an Element or CachedElement"
|
|
64
|
+
)
|
|
65
|
+
assert text and isinstance(text, str), (
|
|
66
|
+
"text shouldn't be null and should be of type str"
|
|
67
|
+
)
|
|
68
|
+
assert orientation and isinstance(orientation, TextOrientation), (
|
|
69
|
+
"orientation shouldn't be null and should be of type TextOrientation"
|
|
70
|
+
)
|
|
71
|
+
assert isinstance(confidence, float) and 0 <= confidence <= 1, (
|
|
72
|
+
"confidence shouldn't be null and should be a float in [0..1] range"
|
|
73
|
+
)
|
|
74
74
|
|
|
75
75
|
if self.is_read_only:
|
|
76
76
|
logger.warning(
|
|
@@ -136,37 +136,39 @@ class TranscriptionMixin:
|
|
|
136
136
|
:returns: A list of dicts as returned in the ``transcriptions`` field by the ``CreateTranscriptions`` API endpoint.
|
|
137
137
|
"""
|
|
138
138
|
|
|
139
|
-
assert transcriptions and isinstance(
|
|
140
|
-
transcriptions
|
|
141
|
-
)
|
|
139
|
+
assert transcriptions and isinstance(transcriptions, list), (
|
|
140
|
+
"transcriptions shouldn't be null and should be of type list"
|
|
141
|
+
)
|
|
142
142
|
|
|
143
143
|
# Create shallow copies of every transcription to avoid mutating the original payload
|
|
144
144
|
transcriptions_payload = list(map(dict, transcriptions))
|
|
145
145
|
|
|
146
146
|
for index, transcription in enumerate(transcriptions_payload):
|
|
147
147
|
element_id = transcription.get("element_id")
|
|
148
|
-
assert (
|
|
149
|
-
element_id and
|
|
150
|
-
)
|
|
148
|
+
assert element_id and isinstance(element_id, str), (
|
|
149
|
+
f"Transcription at index {index} in transcriptions: element_id shouldn't be null and should be of type str"
|
|
150
|
+
)
|
|
151
151
|
|
|
152
152
|
text = transcription.get("text")
|
|
153
|
-
assert (
|
|
154
|
-
text and
|
|
155
|
-
)
|
|
153
|
+
assert text and isinstance(text, str), (
|
|
154
|
+
f"Transcription at index {index} in transcriptions: text shouldn't be null and should be of type str"
|
|
155
|
+
)
|
|
156
156
|
|
|
157
157
|
confidence = transcription.get("confidence")
|
|
158
158
|
assert (
|
|
159
159
|
confidence is not None
|
|
160
160
|
and isinstance(confidence, float)
|
|
161
161
|
and 0 <= confidence <= 1
|
|
162
|
-
),
|
|
162
|
+
), (
|
|
163
|
+
f"Transcription at index {index} in transcriptions: confidence shouldn't be null and should be a float in [0..1] range"
|
|
164
|
+
)
|
|
163
165
|
|
|
164
166
|
orientation = transcription.get(
|
|
165
167
|
"orientation", TextOrientation.HorizontalLeftToRight
|
|
166
168
|
)
|
|
167
|
-
assert (
|
|
168
|
-
orientation and
|
|
169
|
-
)
|
|
169
|
+
assert orientation and isinstance(orientation, TextOrientation), (
|
|
170
|
+
f"Transcription at index {index} in transcriptions: orientation shouldn't be null and should be of type TextOrientation"
|
|
171
|
+
)
|
|
170
172
|
if orientation:
|
|
171
173
|
transcription["orientation"] = orientation.value
|
|
172
174
|
|
|
@@ -242,63 +244,67 @@ class TranscriptionMixin:
|
|
|
242
244
|
|
|
243
245
|
:returns: A list of dicts as returned by the ``CreateElementTranscriptions`` API endpoint.
|
|
244
246
|
"""
|
|
245
|
-
assert element and isinstance(
|
|
246
|
-
element
|
|
247
|
-
)
|
|
248
|
-
assert sub_element_type and isinstance(
|
|
249
|
-
sub_element_type
|
|
250
|
-
)
|
|
251
|
-
assert transcriptions and isinstance(
|
|
252
|
-
transcriptions
|
|
253
|
-
)
|
|
247
|
+
assert element and isinstance(element, Element | CachedElement), (
|
|
248
|
+
"element shouldn't be null and should be an Element or CachedElement"
|
|
249
|
+
)
|
|
250
|
+
assert sub_element_type and isinstance(sub_element_type, str), (
|
|
251
|
+
"sub_element_type shouldn't be null and should be of type str"
|
|
252
|
+
)
|
|
253
|
+
assert transcriptions and isinstance(transcriptions, list), (
|
|
254
|
+
"transcriptions shouldn't be null and should be of type list"
|
|
255
|
+
)
|
|
254
256
|
|
|
255
257
|
# Create shallow copies of every transcription to avoid mutating the original payload
|
|
256
258
|
transcriptions_payload = list(map(dict, transcriptions))
|
|
257
259
|
|
|
258
260
|
for index, transcription in enumerate(transcriptions_payload):
|
|
259
261
|
text = transcription.get("text")
|
|
260
|
-
assert (
|
|
261
|
-
text and
|
|
262
|
-
)
|
|
262
|
+
assert text and isinstance(text, str), (
|
|
263
|
+
f"Transcription at index {index} in transcriptions: text shouldn't be null and should be of type str"
|
|
264
|
+
)
|
|
263
265
|
|
|
264
266
|
confidence = transcription.get("confidence")
|
|
265
267
|
assert (
|
|
266
268
|
confidence is not None
|
|
267
269
|
and isinstance(confidence, float)
|
|
268
270
|
and 0 <= confidence <= 1
|
|
269
|
-
),
|
|
271
|
+
), (
|
|
272
|
+
f"Transcription at index {index} in transcriptions: confidence shouldn't be null and should be a float in [0..1] range"
|
|
273
|
+
)
|
|
270
274
|
|
|
271
275
|
orientation = transcription.get(
|
|
272
276
|
"orientation", TextOrientation.HorizontalLeftToRight
|
|
273
277
|
)
|
|
274
|
-
assert (
|
|
275
|
-
orientation and
|
|
276
|
-
)
|
|
278
|
+
assert orientation and isinstance(orientation, TextOrientation), (
|
|
279
|
+
f"Transcription at index {index} in transcriptions: orientation shouldn't be null and should be of type TextOrientation"
|
|
280
|
+
)
|
|
277
281
|
if orientation:
|
|
278
282
|
transcription["orientation"] = orientation.value
|
|
279
283
|
|
|
280
284
|
polygon = transcription.get("polygon")
|
|
281
|
-
assert (
|
|
282
|
-
polygon and
|
|
283
|
-
)
|
|
284
|
-
assert (
|
|
285
|
-
|
|
286
|
-
)
|
|
285
|
+
assert polygon and isinstance(polygon, list), (
|
|
286
|
+
f"Transcription at index {index} in transcriptions: polygon shouldn't be null and should be of type list"
|
|
287
|
+
)
|
|
288
|
+
assert len(polygon) >= 3, (
|
|
289
|
+
f"Transcription at index {index} in transcriptions: polygon should have at least three points"
|
|
290
|
+
)
|
|
287
291
|
assert all(
|
|
288
292
|
isinstance(point, list) and len(point) == 2 for point in polygon
|
|
289
|
-
),
|
|
293
|
+
), (
|
|
294
|
+
f"Transcription at index {index} in transcriptions: polygon points should be lists of two items"
|
|
295
|
+
)
|
|
290
296
|
assert all(
|
|
291
297
|
isinstance(coord, int | float) for point in polygon for coord in point
|
|
292
|
-
),
|
|
298
|
+
), (
|
|
299
|
+
f"Transcription at index {index} in transcriptions: polygon points should be lists of two numbers"
|
|
300
|
+
)
|
|
293
301
|
|
|
294
302
|
element_confidence = transcription.get("element_confidence")
|
|
295
|
-
assert (
|
|
296
|
-
element_confidence
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
)
|
|
301
|
-
), f"Transcription at index {index} in transcriptions: element_confidence should be either null or a float in [0..1] range"
|
|
303
|
+
assert element_confidence is None or (
|
|
304
|
+
isinstance(element_confidence, float) and 0 <= element_confidence <= 1
|
|
305
|
+
), (
|
|
306
|
+
f"Transcription at index {index} in transcriptions: element_confidence should be either null or a float in [0..1] range"
|
|
307
|
+
)
|
|
302
308
|
|
|
303
309
|
if self.is_read_only:
|
|
304
310
|
logger.warning(
|
|
@@ -407,9 +413,9 @@ class TranscriptionMixin:
|
|
|
407
413
|
:returns: An iterable of dicts representing each transcription,
|
|
408
414
|
or an iterable of CachedTranscription when cache support is enabled.
|
|
409
415
|
"""
|
|
410
|
-
assert element and isinstance(
|
|
411
|
-
element
|
|
412
|
-
)
|
|
416
|
+
assert element and isinstance(element, Element | CachedElement), (
|
|
417
|
+
"element shouldn't be null and should be an Element or CachedElement"
|
|
418
|
+
)
|
|
413
419
|
query_params = {}
|
|
414
420
|
if element_type:
|
|
415
421
|
assert isinstance(element_type, str), "element_type should be of type str"
|
|
@@ -423,22 +429,22 @@ class TranscriptionMixin:
|
|
|
423
429
|
DeprecationWarning,
|
|
424
430
|
stacklevel=1,
|
|
425
431
|
)
|
|
426
|
-
assert isinstance(
|
|
427
|
-
worker_version
|
|
428
|
-
)
|
|
432
|
+
assert isinstance(worker_version, str | bool), (
|
|
433
|
+
"worker_version should be of type str or bool"
|
|
434
|
+
)
|
|
429
435
|
if isinstance(worker_version, bool):
|
|
430
|
-
assert (
|
|
431
|
-
worker_version
|
|
432
|
-
)
|
|
436
|
+
assert worker_version is False, (
|
|
437
|
+
"if of type bool, worker_version can only be set to False"
|
|
438
|
+
)
|
|
433
439
|
query_params["worker_version"] = worker_version
|
|
434
440
|
if worker_run is not None:
|
|
435
|
-
assert isinstance(
|
|
436
|
-
worker_run
|
|
437
|
-
)
|
|
441
|
+
assert isinstance(worker_run, str | bool), (
|
|
442
|
+
"worker_run should be of type str or bool"
|
|
443
|
+
)
|
|
438
444
|
if isinstance(worker_run, bool):
|
|
439
|
-
assert (
|
|
440
|
-
worker_run
|
|
441
|
-
)
|
|
445
|
+
assert worker_run is False, (
|
|
446
|
+
"if of type bool, worker_run can only be set to False"
|
|
447
|
+
)
|
|
442
448
|
query_params["worker_run"] = worker_run
|
|
443
449
|
|
|
444
450
|
if not self.use_cache:
|
|
@@ -51,9 +51,9 @@ def test_create_archive(model_file_dir):
|
|
|
51
51
|
archive_hash,
|
|
52
52
|
):
|
|
53
53
|
assert zst_archive_path.exists(), "The archive was not created"
|
|
54
|
-
assert (
|
|
55
|
-
|
|
56
|
-
)
|
|
54
|
+
assert hash == "c5aedde18a768757351068b840c8c8f9", (
|
|
55
|
+
"Hash was not properly computed"
|
|
56
|
+
)
|
|
57
57
|
assert 300 < size < 700
|
|
58
58
|
|
|
59
59
|
assert not zst_archive_path.exists(), "Auto removal failed"
|
|
@@ -69,9 +69,9 @@ def test_create_archive_with_subfolder(model_file_dir_with_subfolder):
|
|
|
69
69
|
archive_hash,
|
|
70
70
|
):
|
|
71
71
|
assert zst_archive_path.exists(), "The archive was not created"
|
|
72
|
-
assert (
|
|
73
|
-
|
|
74
|
-
)
|
|
72
|
+
assert hash == "3e453881404689e6e125144d2db3e605", (
|
|
73
|
+
"Hash was not properly computed"
|
|
74
|
+
)
|
|
75
75
|
assert 300 < size < 1500
|
|
76
76
|
|
|
77
77
|
assert not zst_archive_path.exists(), "Auto removal failed"
|
|
File without changes
|
|
File without changes
|
{arkindex_base_worker-0.5.0a1.dist-info → arkindex_base_worker-0.5.0a3.dist-info}/top_level.txt
RENAMED
|
File without changes
|