arkindex-base-worker 0.3.7rc5__py3-none-any.whl → 0.5.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arkindex_base_worker-0.3.7rc5.dist-info → arkindex_base_worker-0.5.0a1.dist-info}/METADATA +18 -19
- arkindex_base_worker-0.5.0a1.dist-info/RECORD +61 -0
- {arkindex_base_worker-0.3.7rc5.dist-info → arkindex_base_worker-0.5.0a1.dist-info}/WHEEL +1 -1
- {arkindex_base_worker-0.3.7rc5.dist-info → arkindex_base_worker-0.5.0a1.dist-info}/top_level.txt +2 -0
- arkindex_worker/cache.py +1 -1
- arkindex_worker/image.py +167 -2
- arkindex_worker/models.py +18 -0
- arkindex_worker/utils.py +98 -4
- arkindex_worker/worker/__init__.py +117 -218
- arkindex_worker/worker/base.py +39 -46
- arkindex_worker/worker/classification.py +34 -18
- arkindex_worker/worker/corpus.py +86 -0
- arkindex_worker/worker/dataset.py +89 -26
- arkindex_worker/worker/element.py +352 -91
- arkindex_worker/worker/entity.py +13 -11
- arkindex_worker/worker/image.py +21 -0
- arkindex_worker/worker/metadata.py +26 -16
- arkindex_worker/worker/process.py +92 -0
- arkindex_worker/worker/task.py +5 -4
- arkindex_worker/worker/training.py +25 -10
- arkindex_worker/worker/transcription.py +89 -68
- arkindex_worker/worker/version.py +3 -1
- hooks/pre_gen_project.py +3 -0
- tests/__init__.py +8 -0
- tests/conftest.py +47 -58
- tests/test_base_worker.py +212 -12
- tests/test_dataset_worker.py +294 -437
- tests/test_elements_worker/{test_classifications.py → test_classification.py} +216 -100
- tests/test_elements_worker/test_cli.py +3 -11
- tests/test_elements_worker/test_corpus.py +168 -0
- tests/test_elements_worker/test_dataset.py +106 -157
- tests/test_elements_worker/test_element.py +427 -0
- tests/test_elements_worker/test_element_create_multiple.py +715 -0
- tests/test_elements_worker/test_element_create_single.py +528 -0
- tests/test_elements_worker/test_element_list_children.py +969 -0
- tests/test_elements_worker/test_element_list_parents.py +530 -0
- tests/test_elements_worker/{test_entities.py → test_entity_create.py} +37 -195
- tests/test_elements_worker/test_entity_list_and_check.py +160 -0
- tests/test_elements_worker/test_image.py +66 -0
- tests/test_elements_worker/test_metadata.py +252 -161
- tests/test_elements_worker/test_process.py +89 -0
- tests/test_elements_worker/test_task.py +8 -18
- tests/test_elements_worker/test_training.py +17 -8
- tests/test_elements_worker/test_transcription_create.py +873 -0
- tests/test_elements_worker/test_transcription_create_with_elements.py +951 -0
- tests/test_elements_worker/test_transcription_list.py +450 -0
- tests/test_elements_worker/test_version.py +60 -0
- tests/test_elements_worker/test_worker.py +578 -293
- tests/test_image.py +542 -209
- tests/test_merge.py +1 -2
- tests/test_utils.py +89 -4
- worker-demo/tests/__init__.py +0 -0
- worker-demo/tests/conftest.py +32 -0
- worker-demo/tests/test_worker.py +12 -0
- worker-demo/worker_demo/__init__.py +6 -0
- worker-demo/worker_demo/worker.py +19 -0
- arkindex_base_worker-0.3.7rc5.dist-info/RECORD +0 -41
- tests/test_elements_worker/test_elements.py +0 -2713
- tests/test_elements_worker/test_transcriptions.py +0 -2119
- {arkindex_base_worker-0.3.7rc5.dist-info → arkindex_base_worker-0.5.0a1.dist-info}/LICENSE +0 -0
|
@@ -7,6 +7,7 @@ from enum import Enum
|
|
|
7
7
|
from arkindex_worker import logger
|
|
8
8
|
from arkindex_worker.cache import CachedElement, unsupported_cache
|
|
9
9
|
from arkindex_worker.models import Element
|
|
10
|
+
from arkindex_worker.utils import DEFAULT_BATCH_SIZE, batch_publication, make_batches
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
class MetaType(Enum):
|
|
@@ -93,7 +94,7 @@ class MetaDataMixin:
|
|
|
93
94
|
logger.warning("Cannot create metadata as this worker is in read-only mode")
|
|
94
95
|
return
|
|
95
96
|
|
|
96
|
-
metadata = self.request(
|
|
97
|
+
metadata = self.api_client.request(
|
|
97
98
|
"CreateMetaData",
|
|
98
99
|
id=element.id,
|
|
99
100
|
body={
|
|
@@ -108,33 +109,38 @@ class MetaDataMixin:
|
|
|
108
109
|
return metadata["id"]
|
|
109
110
|
|
|
110
111
|
@unsupported_cache
|
|
111
|
-
|
|
112
|
+
@batch_publication
|
|
113
|
+
def create_metadata_bulk(
|
|
112
114
|
self,
|
|
113
115
|
element: Element | CachedElement,
|
|
114
|
-
|
|
116
|
+
metadata_list: list[dict[str, MetaType | str | int | float | None]],
|
|
117
|
+
batch_size: int = DEFAULT_BATCH_SIZE,
|
|
115
118
|
) -> list[dict[str, str]]:
|
|
116
119
|
"""
|
|
117
120
|
Create multiple metadata on an existing element.
|
|
118
121
|
This method does not support cache.
|
|
119
122
|
|
|
120
123
|
:param element: The element to create multiple metadata on.
|
|
121
|
-
:param
|
|
124
|
+
:param metadata_list: The list of dict whose keys are the following:
|
|
122
125
|
- type: MetaType
|
|
123
126
|
- name: str
|
|
124
127
|
- value: str | int | float
|
|
125
128
|
- entity_id: str | None
|
|
129
|
+
:param batch_size: The size of each batch, which will be used to split the publication to avoid API errors.
|
|
130
|
+
|
|
131
|
+
:returns: A list of dicts as returned in the ``metadata_list`` field by the ``CreateMetaDataBulk`` API endpoint.
|
|
126
132
|
"""
|
|
127
133
|
assert element and isinstance(
|
|
128
134
|
element, Element | CachedElement
|
|
129
135
|
), "element shouldn't be null and should be of type Element or CachedElement"
|
|
130
136
|
|
|
131
|
-
assert
|
|
132
|
-
|
|
133
|
-
), "
|
|
137
|
+
assert metadata_list and isinstance(
|
|
138
|
+
metadata_list, list
|
|
139
|
+
), "metadata_list shouldn't be null and should be of type list of dict"
|
|
134
140
|
|
|
135
141
|
# Make a copy to avoid modifying the metadata_list argument
|
|
136
142
|
metas = []
|
|
137
|
-
for index, metadata in enumerate(
|
|
143
|
+
for index, metadata in enumerate(metadata_list):
|
|
138
144
|
assert isinstance(
|
|
139
145
|
metadata, dict
|
|
140
146
|
), f"Element at index {index} in metadata_list: Should be of type dict"
|
|
@@ -168,14 +174,18 @@ class MetaDataMixin:
|
|
|
168
174
|
logger.warning("Cannot create metadata as this worker is in read-only mode")
|
|
169
175
|
return
|
|
170
176
|
|
|
171
|
-
created_metadata_list =
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
"
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
177
|
+
created_metadata_list = [
|
|
178
|
+
created_metadata
|
|
179
|
+
for batch in make_batches(metas, "metadata", batch_size)
|
|
180
|
+
for created_metadata in self.api_client.request(
|
|
181
|
+
"CreateMetaDataBulk",
|
|
182
|
+
id=element.id,
|
|
183
|
+
body={
|
|
184
|
+
"worker_run_id": self.worker_run_id,
|
|
185
|
+
"metadata_list": batch,
|
|
186
|
+
},
|
|
187
|
+
)["metadata_list"]
|
|
188
|
+
]
|
|
179
189
|
|
|
180
190
|
return created_metadata_list
|
|
181
191
|
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
from collections.abc import Iterator
|
|
2
|
+
from enum import Enum
|
|
3
|
+
|
|
4
|
+
from arkindex_worker.cache import unsupported_cache
|
|
5
|
+
|
|
6
|
+
# Increases the number of elements returned per page by the API
|
|
7
|
+
PROCESS_ELEMENTS_PAGE_SIZE = 500
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ActivityState(Enum):
|
|
11
|
+
"""
|
|
12
|
+
Processing state of an element.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
Queued = "queued"
|
|
16
|
+
"""
|
|
17
|
+
The element has not yet been processed by a worker.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
Started = "started"
|
|
21
|
+
"""
|
|
22
|
+
The element is being processed by a worker.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
Processed = "processed"
|
|
26
|
+
"""
|
|
27
|
+
The element has been successfully processed by a worker.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
Error = "error"
|
|
31
|
+
"""
|
|
32
|
+
An error occurred while processing this element.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class ProcessMode(Enum):
|
|
37
|
+
"""
|
|
38
|
+
Mode of the process of the worker.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
Files = "files"
|
|
42
|
+
"""
|
|
43
|
+
Processes of files (images, PDFs, IIIF, ...) imports.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
Workers = "workers"
|
|
47
|
+
"""
|
|
48
|
+
Processes of worker executions.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
Template = "template"
|
|
52
|
+
"""
|
|
53
|
+
Process templates.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
S3 = "s3"
|
|
57
|
+
"""
|
|
58
|
+
Processes of imports from an S3-compatible storage.
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
Local = "local"
|
|
62
|
+
"""
|
|
63
|
+
Local processes.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
Dataset = "dataset"
|
|
67
|
+
"""
|
|
68
|
+
Dataset processes.
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
Export = "export"
|
|
72
|
+
"""
|
|
73
|
+
Export processes.
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class ProcessMixin:
|
|
78
|
+
@unsupported_cache
|
|
79
|
+
def list_process_elements(self, with_image: bool = False) -> Iterator[dict]:
|
|
80
|
+
"""
|
|
81
|
+
List the elements of a process.
|
|
82
|
+
|
|
83
|
+
:param with_image: whether or not to include zone and image information in the elements response.
|
|
84
|
+
:returns: the process' elements.
|
|
85
|
+
"""
|
|
86
|
+
return self.api_client.paginate(
|
|
87
|
+
"ListProcessElements",
|
|
88
|
+
id=self.process_information["id"],
|
|
89
|
+
with_image=with_image,
|
|
90
|
+
allow_missing_data=True,
|
|
91
|
+
page_size=PROCESS_ELEMENTS_PAGE_SIZE,
|
|
92
|
+
)
|
arkindex_worker/worker/task.py
CHANGED
|
@@ -5,8 +5,7 @@ BaseWorker methods for tasks.
|
|
|
5
5
|
import uuid
|
|
6
6
|
from collections.abc import Iterator
|
|
7
7
|
|
|
8
|
-
from
|
|
9
|
-
|
|
8
|
+
from arkindex.compat import DownloadedFile
|
|
10
9
|
from arkindex_worker.models import Artifact
|
|
11
10
|
|
|
12
11
|
|
|
@@ -22,7 +21,7 @@ class TaskMixin:
|
|
|
22
21
|
task_id, uuid.UUID
|
|
23
22
|
), "task_id shouldn't be null and should be an UUID"
|
|
24
23
|
|
|
25
|
-
results = self.request("ListArtifacts", id=task_id)
|
|
24
|
+
results = self.api_client.request("ListArtifacts", id=task_id)
|
|
26
25
|
|
|
27
26
|
return map(Artifact, results)
|
|
28
27
|
|
|
@@ -43,4 +42,6 @@ class TaskMixin:
|
|
|
43
42
|
artifact, Artifact
|
|
44
43
|
), "artifact shouldn't be null and should be an Artifact"
|
|
45
44
|
|
|
46
|
-
return self.request(
|
|
45
|
+
return self.api_client.request(
|
|
46
|
+
"DownloadArtifact", id=task_id, path=artifact.path
|
|
47
|
+
)
|
|
@@ -9,8 +9,8 @@ from typing import NewType
|
|
|
9
9
|
from uuid import UUID
|
|
10
10
|
|
|
11
11
|
import requests
|
|
12
|
-
from apistar.exceptions import ErrorResponse
|
|
13
12
|
|
|
13
|
+
from arkindex.exceptions import ErrorResponse
|
|
14
14
|
from arkindex_worker import logger
|
|
15
15
|
from arkindex_worker.utils import close_delete_file, create_tar_zst_archive
|
|
16
16
|
|
|
@@ -83,6 +83,9 @@ class TrainingMixin:
|
|
|
83
83
|
|
|
84
84
|
@property
|
|
85
85
|
def is_finetuning(self) -> bool:
|
|
86
|
+
"""
|
|
87
|
+
Whether or not this worker is fine-tuning an existing model version.
|
|
88
|
+
"""
|
|
86
89
|
return bool(self.model_version_id)
|
|
87
90
|
|
|
88
91
|
@skip_if_read_only
|
|
@@ -182,7 +185,7 @@ class TrainingMixin:
|
|
|
182
185
|
assert not self.model_version, "A model version has already been created."
|
|
183
186
|
|
|
184
187
|
configuration = configuration or {}
|
|
185
|
-
self.model_version = self.request(
|
|
188
|
+
self.model_version = self.api_client.request(
|
|
186
189
|
"CreateModelVersion",
|
|
187
190
|
id=model_id,
|
|
188
191
|
body=build_clean_payload(
|
|
@@ -214,7 +217,7 @@ class TrainingMixin:
|
|
|
214
217
|
:param parent: ID of the parent model version
|
|
215
218
|
"""
|
|
216
219
|
assert self.model_version, "No model version has been created yet."
|
|
217
|
-
self.model_version = self.request(
|
|
220
|
+
self.model_version = self.api_client.request(
|
|
218
221
|
"UpdateModelVersion",
|
|
219
222
|
id=self.model_version["id"],
|
|
220
223
|
body=build_clean_payload(
|
|
@@ -270,32 +273,44 @@ class TrainingMixin:
|
|
|
270
273
|
"""
|
|
271
274
|
assert self.model_version, "You must create the model version and upload its archive before validating it."
|
|
272
275
|
try:
|
|
273
|
-
self.model_version = self.request(
|
|
274
|
-
"
|
|
276
|
+
self.model_version = self.api_client.request(
|
|
277
|
+
"PartialUpdateModelVersion",
|
|
275
278
|
id=self.model_version["id"],
|
|
276
279
|
body={
|
|
280
|
+
"state": "available",
|
|
277
281
|
"size": size,
|
|
278
282
|
"hash": hash,
|
|
279
283
|
"archive_hash": archive_hash,
|
|
280
284
|
},
|
|
281
285
|
)
|
|
282
286
|
except ErrorResponse as e:
|
|
283
|
-
|
|
287
|
+
model_version = e.content
|
|
288
|
+
if not model_version or "id" not in model_version:
|
|
284
289
|
raise e
|
|
290
|
+
|
|
285
291
|
logger.warning(
|
|
286
292
|
f"An available model version exists with hash {hash}, using it instead of the pending version."
|
|
287
293
|
)
|
|
288
294
|
pending_version_id = self.model_version["id"]
|
|
289
|
-
self.model_version = getattr(e, "content", None)
|
|
290
|
-
assert self.model_version is not None, "An unexpected error occurred."
|
|
291
|
-
|
|
292
295
|
logger.warning("Removing the pending model version.")
|
|
293
296
|
try:
|
|
294
|
-
self.request("DestroyModelVersion", id=pending_version_id)
|
|
297
|
+
self.api_client.request("DestroyModelVersion", id=pending_version_id)
|
|
295
298
|
except ErrorResponse as e:
|
|
296
299
|
msg = getattr(e, "content", str(e))
|
|
297
300
|
logger.error(
|
|
298
301
|
f"An error occurred removing the pending version {pending_version_id}: {msg}."
|
|
299
302
|
)
|
|
300
303
|
|
|
304
|
+
logger.info("Retrieving the existing model version.")
|
|
305
|
+
existing_version_id = model_version["id"].pop()
|
|
306
|
+
try:
|
|
307
|
+
self.model_version = self.api_client.request(
|
|
308
|
+
"RetrieveModelVersion", id=existing_version_id
|
|
309
|
+
)
|
|
310
|
+
except ErrorResponse as e:
|
|
311
|
+
logger.error(
|
|
312
|
+
f"An error occurred retrieving the existing version {existing_version_id}: {e.status_code} - {e.content}."
|
|
313
|
+
)
|
|
314
|
+
raise
|
|
315
|
+
|
|
301
316
|
logger.info(f"Model version {self.model_version['id']} is now available.")
|
|
@@ -11,6 +11,7 @@ from peewee import IntegrityError
|
|
|
11
11
|
from arkindex_worker import logger
|
|
12
12
|
from arkindex_worker.cache import CachedElement, CachedTranscription
|
|
13
13
|
from arkindex_worker.models import Element
|
|
14
|
+
from arkindex_worker.utils import DEFAULT_BATCH_SIZE, batch_publication, make_batches
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
class TextOrientation(Enum):
|
|
@@ -77,7 +78,7 @@ class TranscriptionMixin:
|
|
|
77
78
|
)
|
|
78
79
|
return
|
|
79
80
|
|
|
80
|
-
created = self.request(
|
|
81
|
+
created = self.api_client.request(
|
|
81
82
|
"CreateTranscription",
|
|
82
83
|
id=element.id,
|
|
83
84
|
body={
|
|
@@ -109,9 +110,11 @@ class TranscriptionMixin:
|
|
|
109
110
|
|
|
110
111
|
return created
|
|
111
112
|
|
|
113
|
+
@batch_publication
|
|
112
114
|
def create_transcriptions(
|
|
113
115
|
self,
|
|
114
116
|
transcriptions: list[dict[str, str | float | TextOrientation | None]],
|
|
117
|
+
batch_size: int = DEFAULT_BATCH_SIZE,
|
|
115
118
|
) -> list[dict[str, str | float]]:
|
|
116
119
|
"""
|
|
117
120
|
Create multiple transcriptions at once on existing elements through the API,
|
|
@@ -128,6 +131,8 @@ class TranscriptionMixin:
|
|
|
128
131
|
orientation (TextOrientation)
|
|
129
132
|
Optional. Orientation of the transcription's text.
|
|
130
133
|
|
|
134
|
+
:param batch_size: The size of each batch, which will be used to split the publication to avoid API errors.
|
|
135
|
+
|
|
131
136
|
:returns: A list of dicts as returned in the ``transcriptions`` field by the ``CreateTranscriptions`` API endpoint.
|
|
132
137
|
"""
|
|
133
138
|
|
|
@@ -171,13 +176,19 @@ class TranscriptionMixin:
|
|
|
171
176
|
)
|
|
172
177
|
return
|
|
173
178
|
|
|
174
|
-
created_trs =
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
"
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
179
|
+
created_trs = [
|
|
180
|
+
created_tr
|
|
181
|
+
for batch in make_batches(
|
|
182
|
+
transcriptions_payload, "transcription", batch_size
|
|
183
|
+
)
|
|
184
|
+
for created_tr in self.api_client.request(
|
|
185
|
+
"CreateTranscriptions",
|
|
186
|
+
body={
|
|
187
|
+
"worker_run_id": self.worker_run_id,
|
|
188
|
+
"transcriptions": batch,
|
|
189
|
+
},
|
|
190
|
+
)["transcriptions"]
|
|
191
|
+
]
|
|
181
192
|
|
|
182
193
|
if self.use_cache:
|
|
183
194
|
# Store transcriptions in local cache
|
|
@@ -201,11 +212,13 @@ class TranscriptionMixin:
|
|
|
201
212
|
|
|
202
213
|
return created_trs
|
|
203
214
|
|
|
215
|
+
@batch_publication
|
|
204
216
|
def create_element_transcriptions(
|
|
205
217
|
self,
|
|
206
218
|
element: Element | CachedElement,
|
|
207
219
|
sub_element_type: str,
|
|
208
220
|
transcriptions: list[dict[str, str | float]],
|
|
221
|
+
batch_size: int = DEFAULT_BATCH_SIZE,
|
|
209
222
|
) -> dict[str, str | bool]:
|
|
210
223
|
"""
|
|
211
224
|
Create multiple elements and transcriptions at once on a single parent element through the API.
|
|
@@ -225,6 +238,8 @@ class TranscriptionMixin:
|
|
|
225
238
|
element_confidence (float)
|
|
226
239
|
Optional. Confidence score of the element between 0 and 1.
|
|
227
240
|
|
|
241
|
+
:param batch_size: The size of each batch, which will be used to split the publication to avoid API errors.
|
|
242
|
+
|
|
228
243
|
:returns: A list of dicts as returned by the ``CreateElementTranscriptions`` API endpoint.
|
|
229
244
|
"""
|
|
230
245
|
assert element and isinstance(
|
|
@@ -291,16 +306,22 @@ class TranscriptionMixin:
|
|
|
291
306
|
)
|
|
292
307
|
return
|
|
293
308
|
|
|
294
|
-
annotations =
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
"
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
309
|
+
annotations = [
|
|
310
|
+
annotation
|
|
311
|
+
for batch in make_batches(
|
|
312
|
+
transcriptions_payload, "transcription", batch_size
|
|
313
|
+
)
|
|
314
|
+
for annotation in self.api_client.request(
|
|
315
|
+
"CreateElementTranscriptions",
|
|
316
|
+
id=element.id,
|
|
317
|
+
body={
|
|
318
|
+
"element_type": sub_element_type,
|
|
319
|
+
"worker_run_id": self.worker_run_id,
|
|
320
|
+
"transcriptions": batch,
|
|
321
|
+
"return_elements": True,
|
|
322
|
+
},
|
|
323
|
+
)
|
|
324
|
+
]
|
|
304
325
|
|
|
305
326
|
for annotation in annotations:
|
|
306
327
|
if annotation["created"]:
|
|
@@ -420,60 +441,60 @@ class TranscriptionMixin:
|
|
|
420
441
|
), "if of type bool, worker_run can only be set to False"
|
|
421
442
|
query_params["worker_run"] = worker_run
|
|
422
443
|
|
|
423
|
-
if self.use_cache:
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
444
|
+
if not self.use_cache:
|
|
445
|
+
return self.api_client.paginate(
|
|
446
|
+
"ListTranscriptions", id=element.id, **query_params
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
if not recursive:
|
|
450
|
+
# In this case we don't have to return anything, it's easier to use an
|
|
451
|
+
# impossible condition (False) rather than filtering by type for nothing
|
|
452
|
+
if element_type and element_type != element.type:
|
|
453
|
+
return CachedTranscription.select().where(False)
|
|
454
|
+
transcriptions = CachedTranscription.select().where(
|
|
455
|
+
CachedTranscription.element_id == element.id
|
|
456
|
+
)
|
|
457
|
+
else:
|
|
458
|
+
base_case = (
|
|
459
|
+
CachedElement.select()
|
|
460
|
+
.where(CachedElement.id == element.id)
|
|
461
|
+
.cte("base", recursive=True)
|
|
462
|
+
)
|
|
463
|
+
recursive = CachedElement.select().join(
|
|
464
|
+
base_case, on=(CachedElement.parent_id == base_case.c.id)
|
|
465
|
+
)
|
|
466
|
+
cte = base_case.union_all(recursive)
|
|
467
|
+
transcriptions = (
|
|
468
|
+
CachedTranscription.select()
|
|
469
|
+
.join(cte, on=(CachedTranscription.element_id == cte.c.id))
|
|
470
|
+
.with_cte(cte)
|
|
471
|
+
)
|
|
472
|
+
|
|
473
|
+
if element_type:
|
|
474
|
+
transcriptions = transcriptions.where(cte.c.type == element_type)
|
|
475
|
+
|
|
476
|
+
if worker_version is not None:
|
|
477
|
+
# If worker_version=False, filter by manual worker_version e.g. None
|
|
478
|
+
worker_version_id = worker_version or None
|
|
479
|
+
if worker_version_id:
|
|
480
|
+
transcriptions = transcriptions.where(
|
|
481
|
+
CachedTranscription.worker_version_id == worker_version_id
|
|
431
482
|
)
|
|
432
483
|
else:
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
.where(CachedElement.id == element.id)
|
|
436
|
-
.cte("base", recursive=True)
|
|
484
|
+
transcriptions = transcriptions.where(
|
|
485
|
+
CachedTranscription.worker_version_id.is_null()
|
|
437
486
|
)
|
|
438
|
-
|
|
439
|
-
|
|
487
|
+
|
|
488
|
+
if worker_run is not None:
|
|
489
|
+
# If worker_run=False, filter by manual worker_run e.g. None
|
|
490
|
+
worker_run_id = worker_run or None
|
|
491
|
+
if worker_run_id:
|
|
492
|
+
transcriptions = transcriptions.where(
|
|
493
|
+
CachedTranscription.worker_run_id == worker_run_id
|
|
440
494
|
)
|
|
441
|
-
|
|
442
|
-
transcriptions = (
|
|
443
|
-
CachedTranscription.
|
|
444
|
-
.join(cte, on=(CachedTranscription.element_id == cte.c.id))
|
|
445
|
-
.with_cte(cte)
|
|
495
|
+
else:
|
|
496
|
+
transcriptions = transcriptions.where(
|
|
497
|
+
CachedTranscription.worker_run_id.is_null()
|
|
446
498
|
)
|
|
447
499
|
|
|
448
|
-
if element_type:
|
|
449
|
-
transcriptions = transcriptions.where(cte.c.type == element_type)
|
|
450
|
-
|
|
451
|
-
if worker_version is not None:
|
|
452
|
-
# If worker_version=False, filter by manual worker_version e.g. None
|
|
453
|
-
worker_version_id = worker_version or None
|
|
454
|
-
if worker_version_id:
|
|
455
|
-
transcriptions = transcriptions.where(
|
|
456
|
-
CachedTranscription.worker_version_id == worker_version_id
|
|
457
|
-
)
|
|
458
|
-
else:
|
|
459
|
-
transcriptions = transcriptions.where(
|
|
460
|
-
CachedTranscription.worker_version_id.is_null()
|
|
461
|
-
)
|
|
462
|
-
|
|
463
|
-
if worker_run is not None:
|
|
464
|
-
# If worker_run=False, filter by manual worker_run e.g. None
|
|
465
|
-
worker_run_id = worker_run or None
|
|
466
|
-
if worker_run_id:
|
|
467
|
-
transcriptions = transcriptions.where(
|
|
468
|
-
CachedTranscription.worker_run_id == worker_run_id
|
|
469
|
-
)
|
|
470
|
-
else:
|
|
471
|
-
transcriptions = transcriptions.where(
|
|
472
|
-
CachedTranscription.worker_run_id.is_null()
|
|
473
|
-
)
|
|
474
|
-
else:
|
|
475
|
-
transcriptions = self.api_client.paginate(
|
|
476
|
-
"ListTranscriptions", id=element.id, **query_params
|
|
477
|
-
)
|
|
478
|
-
|
|
479
500
|
return transcriptions
|
|
@@ -34,7 +34,9 @@ class WorkerVersionMixin:
|
|
|
34
34
|
if worker_version_id in self._worker_version_cache:
|
|
35
35
|
return self._worker_version_cache[worker_version_id]
|
|
36
36
|
|
|
37
|
-
worker_version = self.request(
|
|
37
|
+
worker_version = self.api_client.request(
|
|
38
|
+
"RetrieveWorkerVersion", id=worker_version_id
|
|
39
|
+
)
|
|
38
40
|
self._worker_version_cache[worker_version_id] = worker_version
|
|
39
41
|
|
|
40
42
|
return worker_version
|
hooks/pre_gen_project.py
ADDED
tests/__init__.py
CHANGED