arkindex-base-worker 0.4.0rc5__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. {arkindex_base_worker-0.4.0rc5.dist-info → arkindex_base_worker-0.5.0.dist-info}/METADATA +10 -13
  2. arkindex_base_worker-0.5.0.dist-info/RECORD +60 -0
  3. {arkindex_base_worker-0.4.0rc5.dist-info → arkindex_base_worker-0.5.0.dist-info}/WHEEL +1 -1
  4. {arkindex_base_worker-0.4.0rc5.dist-info → arkindex_base_worker-0.5.0.dist-info}/top_level.txt +1 -0
  5. arkindex_worker/__init__.py +3 -0
  6. arkindex_worker/cache.py +6 -25
  7. arkindex_worker/image.py +105 -66
  8. arkindex_worker/utils.py +2 -1
  9. arkindex_worker/worker/__init__.py +22 -32
  10. arkindex_worker/worker/base.py +16 -9
  11. arkindex_worker/worker/classification.py +36 -34
  12. arkindex_worker/worker/corpus.py +3 -3
  13. arkindex_worker/worker/dataset.py +9 -9
  14. arkindex_worker/worker/element.py +261 -231
  15. arkindex_worker/worker/entity.py +137 -206
  16. arkindex_worker/worker/image.py +3 -3
  17. arkindex_worker/worker/metadata.py +27 -38
  18. arkindex_worker/worker/process.py +24 -0
  19. arkindex_worker/worker/task.py +9 -9
  20. arkindex_worker/worker/training.py +15 -11
  21. arkindex_worker/worker/transcription.py +77 -71
  22. examples/standalone/python/worker.py +171 -0
  23. examples/tooled/python/worker.py +50 -0
  24. tests/conftest.py +22 -36
  25. tests/test_base_worker.py +1 -1
  26. tests/test_cache.py +1 -2
  27. tests/test_dataset_worker.py +1 -1
  28. tests/test_elements_worker/test_element.py +200 -26
  29. tests/test_elements_worker/{test_entity_create.py → test_entity.py} +220 -227
  30. tests/test_elements_worker/test_metadata.py +0 -47
  31. tests/test_elements_worker/test_process.py +89 -0
  32. tests/test_elements_worker/test_training.py +8 -8
  33. tests/test_elements_worker/test_worker.py +61 -14
  34. tests/test_image.py +244 -126
  35. tests/test_merge.py +0 -7
  36. tests/test_utils.py +37 -0
  37. arkindex_base_worker-0.4.0rc5.dist-info/RECORD +0 -60
  38. arkindex_worker/worker/version.py +0 -58
  39. tests/test_elements_worker/test_entity_list_and_check.py +0 -160
  40. tests/test_elements_worker/test_version.py +0 -60
  41. {arkindex_base_worker-0.4.0rc5.dist-info → arkindex_base_worker-0.5.0.dist-info/licenses}/LICENSE +0 -0
@@ -8,20 +8,17 @@ from warnings import warn
8
8
 
9
9
  from peewee import IntegrityError
10
10
 
11
+ from arkindex.exceptions import ErrorResponse
11
12
  from arkindex_worker import logger
12
13
  from arkindex_worker.cache import (
13
- CachedEntity,
14
14
  CachedTranscriptionEntity,
15
15
  unsupported_cache,
16
16
  )
17
- from arkindex_worker.models import Element, Transcription
18
- from arkindex_worker.utils import (
19
- pluralize,
20
- )
17
+ from arkindex_worker.models import Transcription
18
+ from arkindex_worker.utils import pluralize
21
19
 
22
20
 
23
21
  class Entity(TypedDict):
24
- name: str
25
22
  type_id: str
26
23
  length: int
27
24
  offset: int
@@ -36,24 +33,85 @@ class MissingEntityType(Exception):
36
33
 
37
34
 
38
35
  class EntityMixin:
36
+ def list_corpus_entity_types(self):
37
+ """
38
+ Loads available entity types in corpus.
39
+ """
40
+ self.entity_types = {
41
+ entity_type["name"]: entity_type["id"]
42
+ for entity_type in self.api_client.paginate(
43
+ "ListCorpusEntityTypes", id=self.corpus_id
44
+ )
45
+ }
46
+ count = len(self.entity_types)
47
+ logger.info(
48
+ f"Loaded {count} entity {pluralize('type', count)} in corpus ({self.corpus_id})."
49
+ )
50
+
39
51
  @unsupported_cache
52
+ def create_entity_type(self, name: str) -> None:
53
+ """
54
+ Create an entity type on the given corpus.
55
+
56
+ :param name: Name of the entity type.
57
+ """
58
+ assert name and isinstance(name, str), (
59
+ "name shouldn't be null and should be of type str"
60
+ )
61
+
62
+ try:
63
+ entity_type = self.api_client.request(
64
+ "CreateEntityType",
65
+ body={
66
+ "name": name,
67
+ "corpus": self.corpus_id,
68
+ },
69
+ )
70
+ self.entity_types[name] = entity_type["id"]
71
+ logger.info(f"Created a new entity type with name `{name}`.")
72
+ except ErrorResponse as e:
73
+ # Only reload for 400 errors
74
+ if e.status_code != 400:
75
+ raise
76
+
77
+ # Reload and make sure we have the element type now
78
+ logger.warning(
79
+ f"Unable to create the entity type `{name}`. Refreshing corpus entity types cache."
80
+ )
81
+ self.list_corpus_entity_types()
82
+ assert name in self.entity_types, (
83
+ f"Missing entity type `{name}` even after refreshing."
84
+ )
85
+
40
86
  def check_required_entity_types(
41
87
  self, entity_types: list[str], create_missing: bool = True
42
- ):
43
- """Checks that every entity type needed is available in the corpus.
88
+ ) -> None:
89
+ """
90
+ Check that every entity type needed is available in the corpus.
44
91
  Missing ones may be created automatically if needed.
45
92
 
46
93
  :param entity_types: Entity type names to search.
47
94
  :param create_missing: Whether the missing types should be created. Defaults to True.
48
- :raises MissingEntityType: When an entity type is missing and cannot create.
95
+ :raises MissingEntityType: When an entity type is missing and cannot be created.
49
96
  """
50
- # Retrieve entity_type ID
97
+ assert entity_types and isinstance(entity_types, list), (
98
+ "entity_types shouldn't be null and should be of type list"
99
+ )
100
+
101
+ for index, entity_type in enumerate(entity_types):
102
+ assert isinstance(entity_type, str), (
103
+ f"Entity type at index {index} in entity_types: Should be of type str"
104
+ )
105
+
106
+ assert create_missing is not None and isinstance(create_missing, bool), (
107
+ "create_missing shouldn't be null and should be of type bool"
108
+ )
109
+
51
110
  if not self.entity_types:
52
- # Load entity_types of corpus
53
111
  self.list_corpus_entity_types()
54
112
 
55
113
  for entity_type in entity_types:
56
- # Do nothing if type already exists
114
+ # Do nothing if the type already exists
57
115
  if entity_type in self.entity_types:
58
116
  continue
59
117
 
@@ -63,98 +121,23 @@ class EntityMixin:
63
121
  f"Entity type `{entity_type}` was not in the corpus."
64
122
  )
65
123
 
66
- # Create type if non-existent
67
- self.entity_types[entity_type] = self.api_client.request(
68
- "CreateEntityType",
69
- body={
70
- "name": entity_type,
71
- "corpus": self.corpus_id,
72
- },
73
- )["id"]
74
- logger.info(f"Created a new entity type with name `{entity_type}`.")
75
-
76
- def create_entity(
77
- self,
78
- name: str,
79
- type: str,
80
- metas=None,
81
- validated=None,
82
- ):
83
- """
84
- Create an entity on the given corpus.
85
- If cache support is enabled, a [CachedEntity][arkindex_worker.cache.CachedEntity] will also be created.
86
-
87
- :param name: Name of the entity.
88
- :param type: Type of the entity.
89
- """
90
- assert name and isinstance(
91
- name, str
92
- ), "name shouldn't be null and should be of type str"
93
- assert type and isinstance(
94
- type, str
95
- ), "type shouldn't be null and should be of type str"
96
- metas = metas or {}
97
- if metas:
98
- assert isinstance(metas, dict), "metas should be of type dict"
99
- if validated is not None:
100
- assert isinstance(validated, bool), "validated should be of type bool"
101
- if self.is_read_only:
102
- logger.warning("Cannot create entity as this worker is in read-only mode")
103
- return
104
-
105
- # Retrieve entity_type ID
106
- if not self.entity_types:
107
- # Load entity_types of corpus
108
- self.list_corpus_entity_types()
109
-
110
- entity_type_id = self.entity_types.get(type)
111
- assert entity_type_id, f"Entity type `{type}` not found in the corpus."
112
-
113
- entity = self.api_client.request(
114
- "CreateEntity",
115
- body={
116
- "name": name,
117
- "type_id": entity_type_id,
118
- "metas": metas,
119
- "validated": validated,
120
- "corpus": self.corpus_id,
121
- "worker_run_id": self.worker_run_id,
122
- },
123
- )
124
-
125
- if self.use_cache:
126
- # Store entity in local cache
127
- try:
128
- to_insert = [
129
- {
130
- "id": entity["id"],
131
- "type": type,
132
- "name": name,
133
- "validated": validated if validated is not None else False,
134
- "metas": metas,
135
- "worker_run_id": self.worker_run_id,
136
- }
137
- ]
138
- CachedEntity.insert_many(to_insert).execute()
139
- except IntegrityError as e:
140
- logger.warning(f"Couldn't save created entity in local cache: {e}")
141
-
142
- return entity["id"]
124
+ # Create the type if non-existent
125
+ self.create_entity_type(entity_type)
143
126
 
144
127
  def create_transcription_entity(
145
128
  self,
146
129
  transcription: Transcription,
147
- entity: str,
130
+ type_id: str,
148
131
  offset: int,
149
132
  length: int,
150
133
  confidence: float | None = None,
151
134
  ) -> dict[str, str | int] | None:
152
135
  """
153
- Create a link between an existing entity and an existing transcription.
136
+ Create an entity on an existing transcription.
154
137
  If cache support is enabled, a `CachedTranscriptionEntity` will also be created.
155
138
 
156
139
  :param transcription: Transcription to create the entity on.
157
- :param entity: UUID of the existing entity.
140
+ :param type_id: UUID of the entity type.
158
141
  :param offset: Starting position of the entity in the transcription's text,
159
142
  as a 0-based index.
160
143
  :param length: Length of the entity in the transcription's text.
@@ -162,18 +145,18 @@ class EntityMixin:
162
145
  :returns: A dict as returned by the ``CreateTranscriptionEntity`` API endpoint,
163
146
  or None if the worker is in read-only mode.
164
147
  """
165
- assert transcription and isinstance(
166
- transcription, Transcription
167
- ), "transcription shouldn't be null and should be a Transcription"
168
- assert entity and isinstance(
169
- entity, str
170
- ), "entity shouldn't be null and should be of type str"
171
- assert (
172
- offset is not None and isinstance(offset, int) and offset >= 0
173
- ), "offset shouldn't be null and should be a positive integer"
174
- assert (
175
- length is not None and isinstance(length, int) and length > 0
176
- ), "length shouldn't be null and should be a strictly positive integer"
148
+ assert transcription and isinstance(transcription, Transcription), (
149
+ "transcription shouldn't be null and should be a Transcription"
150
+ )
151
+ assert type_id and isinstance(type_id, str), (
152
+ "type_id shouldn't be null and should be of type str"
153
+ )
154
+ assert offset is not None and isinstance(offset, int) and offset >= 0, (
155
+ "offset shouldn't be null and should be a positive integer"
156
+ )
157
+ assert length is not None and isinstance(length, int) and length > 0, (
158
+ "length shouldn't be null and should be a strictly positive integer"
159
+ )
177
160
  assert (
178
161
  confidence is None or isinstance(confidence, float) and 0 <= confidence <= 1
179
162
  ), "confidence should be null or a float in [0..1] range"
@@ -184,7 +167,7 @@ class EntityMixin:
184
167
  return
185
168
 
186
169
  body = {
187
- "entity": entity,
170
+ "type_id": type_id,
188
171
  "length": length,
189
172
  "offset": offset,
190
173
  "worker_run_id": self.worker_run_id,
@@ -192,7 +175,7 @@ class EntityMixin:
192
175
  if confidence is not None:
193
176
  body["confidence"] = confidence
194
177
 
195
- transcription_ent = self.api_client.request(
178
+ tr_entity = self.api_client.request(
196
179
  "CreateTranscriptionEntity",
197
180
  id=transcription.id,
198
181
  body=body,
@@ -203,7 +186,7 @@ class EntityMixin:
203
186
  try:
204
187
  CachedTranscriptionEntity.create(
205
188
  transcription=transcription.id,
206
- entity=entity,
189
+ type=tr_entity["type"]["name"],
207
190
  offset=offset,
208
191
  length=length,
209
192
  worker_run_id=self.worker_run_id,
@@ -213,7 +196,8 @@ class EntityMixin:
213
196
  logger.warning(
214
197
  f"Couldn't save created transcription entity in local cache: {e}"
215
198
  )
216
- return transcription_ent
199
+
200
+ return tr_entity
217
201
 
218
202
  @unsupported_cache
219
203
  def create_transcription_entities(
@@ -222,14 +206,11 @@ class EntityMixin:
222
206
  entities: list[Entity],
223
207
  ) -> list[dict[str, str]]:
224
208
  """
225
- Create multiple entities attached to a transcription in a single API request.
209
+ Create multiple entities on a transcription in a single API request.
226
210
 
227
211
  :param transcription: Transcription to create the entity on.
228
212
  :param entities: List of dicts, one per element. Each dict can have the following keys:
229
213
 
230
- name (str)
231
- Required. Name of the entity.
232
-
233
214
  type_id (str)
234
215
  Required. ID of the EntityType of the entity.
235
216
 
@@ -242,49 +223,45 @@ class EntityMixin:
242
223
  confidence (float or None)
243
224
  Optional confidence score, between 0.0 and 1.0.
244
225
 
245
- :return: List of dicts, with each dict having a two keys, `transcription_entity_id` and `entity_id`, holding the UUID of each created object.
226
+ :return: List of strings, holding the UUID of each created object.
246
227
  """
247
- assert transcription and isinstance(
248
- transcription, Transcription
249
- ), "transcription shouldn't be null and should be of type Transcription"
228
+ assert transcription and isinstance(transcription, Transcription), (
229
+ "transcription shouldn't be null and should be of type Transcription"
230
+ )
250
231
 
251
- assert entities and isinstance(
252
- entities, list
253
- ), "entities shouldn't be null and should be of type list"
232
+ assert entities and isinstance(entities, list), (
233
+ "entities shouldn't be null and should be of type list"
234
+ )
254
235
 
255
236
  for index, entity in enumerate(entities):
256
- assert isinstance(
257
- entity, dict
258
- ), f"Entity at index {index} in entities: Should be of type dict"
259
-
260
- name = entity.get("name")
261
- assert (
262
- name and isinstance(name, str)
263
- ), f"Entity at index {index} in entities: name shouldn't be null and should be of type str"
237
+ assert isinstance(entity, dict), (
238
+ f"Entity at index {index} in entities: Should be of type dict"
239
+ )
264
240
 
265
241
  type_id = entity.get("type_id")
266
- assert (
267
- type_id and isinstance(type_id, str)
268
- ), f"Entity at index {index} in entities: type_id shouldn't be null and should be of type str"
242
+ assert type_id and isinstance(type_id, str), (
243
+ f"Entity at index {index} in entities: type_id shouldn't be null and should be of type str"
244
+ )
269
245
 
270
246
  offset = entity.get("offset")
271
- assert (
272
- offset is not None and isinstance(offset, int) and offset >= 0
273
- ), f"Entity at index {index} in entities: offset shouldn't be null and should be a positive integer"
247
+ assert offset is not None and isinstance(offset, int) and offset >= 0, (
248
+ f"Entity at index {index} in entities: offset shouldn't be null and should be a positive integer"
249
+ )
274
250
 
275
251
  length = entity.get("length")
276
- assert (
277
- length is not None and isinstance(length, int) and length > 0
278
- ), f"Entity at index {index} in entities: length shouldn't be null and should be a strictly positive integer"
252
+ assert length is not None and isinstance(length, int) and length > 0, (
253
+ f"Entity at index {index} in entities: length shouldn't be null and should be a strictly positive integer"
254
+ )
279
255
 
280
256
  confidence = entity.get("confidence")
281
- assert (
282
- confidence is None
283
- or (isinstance(confidence, float) and 0 <= confidence <= 1)
284
- ), f"Entity at index {index} in entities: confidence should be None or a float in [0..1] range"
257
+ assert confidence is None or (
258
+ isinstance(confidence, float) and 0 <= confidence <= 1
259
+ ), (
260
+ f"Entity at index {index} in entities: confidence should be None or a float in [0..1] range"
261
+ )
285
262
 
286
263
  assert len(entities) == len(
287
- set(map(itemgetter("offset", "length", "name", "type_id"), entities))
264
+ set(map(itemgetter("offset", "length", "type_id"), entities))
288
265
  ), "entities should be unique"
289
266
 
290
267
  if self.is_read_only:
@@ -293,16 +270,16 @@ class EntityMixin:
293
270
  )
294
271
  return
295
272
 
296
- created_entities = self.api_client.request(
273
+ created_tr_entities = self.api_client.request(
297
274
  "CreateTranscriptionEntities",
298
275
  id=transcription.id,
299
276
  body={
300
277
  "worker_run_id": self.worker_run_id,
301
- "entities": entities,
278
+ "transcription_entities": entities,
302
279
  },
303
- )["entities"]
280
+ )["transcription_entities"]
304
281
 
305
- return created_entities
282
+ return created_tr_entities
306
283
 
307
284
  def list_transcription_entities(
308
285
  self,
@@ -325,9 +302,9 @@ class EntityMixin:
325
302
  :param worker_run: Restrict to entities created by a worker run with this UUID. Set to False to look for manually created entities.
326
303
  """
327
304
  query_params = {}
328
- assert transcription and isinstance(
329
- transcription, Transcription
330
- ), "transcription shouldn't be null and should be a Transcription"
305
+ assert transcription and isinstance(transcription, Transcription), (
306
+ "transcription shouldn't be null and should be a Transcription"
307
+ )
331
308
 
332
309
  if worker_version is not None:
333
310
  warn(
@@ -335,71 +312,25 @@ class EntityMixin:
335
312
  DeprecationWarning,
336
313
  stacklevel=1,
337
314
  )
338
- assert isinstance(
339
- worker_version, str | bool
340
- ), "worker_version should be of type str or bool"
315
+ assert isinstance(worker_version, str | bool), (
316
+ "worker_version should be of type str or bool"
317
+ )
341
318
 
342
319
  if isinstance(worker_version, bool):
343
- assert (
344
- worker_version is False
345
- ), "if of type bool, worker_version can only be set to False"
320
+ assert worker_version is False, (
321
+ "if of type bool, worker_version can only be set to False"
322
+ )
346
323
  query_params["worker_version"] = worker_version
347
324
  if worker_run is not None:
348
- assert isinstance(
349
- worker_run, str | bool
350
- ), "worker_run should be of type str or bool"
325
+ assert isinstance(worker_run, str | bool), (
326
+ "worker_run should be of type str or bool"
327
+ )
351
328
  if isinstance(worker_run, bool):
352
- assert (
353
- worker_run is False
354
- ), "if of type bool, worker_run can only be set to False"
329
+ assert worker_run is False, (
330
+ "if of type bool, worker_run can only be set to False"
331
+ )
355
332
  query_params["worker_run"] = worker_run
356
333
 
357
334
  return self.api_client.paginate(
358
335
  "ListTranscriptionEntities", id=transcription.id, **query_params
359
336
  )
360
-
361
- def list_corpus_entities(
362
- self,
363
- name: str | None = None,
364
- parent: Element | None = None,
365
- ):
366
- """
367
- List all entities in the worker's corpus and store them in the ``self.entities`` cache.
368
- :param name: Filter entities by part of their name (case-insensitive)
369
- :param parent: Restrict entities to those linked to all transcriptions of an element and all its descendants. Note that links to metadata are ignored.
370
- """
371
- query_params = {}
372
-
373
- if name is not None:
374
- assert name and isinstance(name, str), "name should be of type str"
375
- query_params["name"] = name
376
-
377
- if parent is not None:
378
- assert isinstance(parent, Element), "parent should be of type Element"
379
- query_params["parent"] = parent.id
380
-
381
- self.entities = {
382
- entity["id"]: entity
383
- for entity in self.api_client.paginate(
384
- "ListCorpusEntities", id=self.corpus_id, **query_params
385
- )
386
- }
387
- count = len(self.entities)
388
- logger.info(
389
- f'Loaded {count} {pluralize("entity", count)} in corpus ({self.corpus_id})'
390
- )
391
-
392
- def list_corpus_entity_types(self):
393
- """
394
- Loads available entity types in corpus.
395
- """
396
- self.entity_types = {
397
- entity_type["name"]: entity_type["id"]
398
- for entity_type in self.api_client.paginate(
399
- "ListCorpusEntityTypes", id=self.corpus_id
400
- )
401
- }
402
- count = len(self.entity_types)
403
- logger.info(
404
- f'Loaded {count} entity {pluralize("type", count)} in corpus ({self.corpus_id}).'
405
- )
@@ -14,8 +14,8 @@ class ImageMixin:
14
14
  :param url: URL of the image.
15
15
  :returns: The created image.
16
16
  """
17
- assert url and isinstance(
18
- url, str
19
- ), "url shouldn't be null and should be of type str"
17
+ assert url and isinstance(url, str), (
18
+ "url shouldn't be null and should be of type str"
19
+ )
20
20
 
21
21
  return Image(self.api_client.request("CreateIIIFURL", body={"url": url}))
@@ -64,7 +64,6 @@ class MetaDataMixin:
64
64
  type: MetaType,
65
65
  name: str,
66
66
  value: str,
67
- entity: str | None = None,
68
67
  ) -> str:
69
68
  """
70
69
  Create a metadata on the given element through API.
@@ -73,23 +72,20 @@ class MetaDataMixin:
73
72
  :param type: Type of the metadata.
74
73
  :param name: Name of the metadata.
75
74
  :param value: Value of the metadata.
76
- :param entity: UUID of an entity this metadata is related to.
77
75
  :returns: UUID of the created metadata.
78
76
  """
79
- assert element and isinstance(
80
- element, Element | CachedElement
81
- ), "element shouldn't be null and should be of type Element or CachedElement"
82
- assert type and isinstance(
83
- type, MetaType
84
- ), "type shouldn't be null and should be of type MetaType"
85
- assert name and isinstance(
86
- name, str
87
- ), "name shouldn't be null and should be of type str"
88
- assert value and isinstance(
89
- value, str
90
- ), "value shouldn't be null and should be of type str"
91
- if entity:
92
- assert isinstance(entity, str), "entity should be of type str"
77
+ assert element and isinstance(element, Element | CachedElement), (
78
+ "element shouldn't be null and should be of type Element or CachedElement"
79
+ )
80
+ assert type and isinstance(type, MetaType), (
81
+ "type shouldn't be null and should be of type MetaType"
82
+ )
83
+ assert name and isinstance(name, str), (
84
+ "name shouldn't be null and should be of type str"
85
+ )
86
+ assert value and isinstance(value, str), (
87
+ "value shouldn't be null and should be of type str"
88
+ )
93
89
  if self.is_read_only:
94
90
  logger.warning("Cannot create metadata as this worker is in read-only mode")
95
91
  return
@@ -101,7 +97,6 @@ class MetaDataMixin:
101
97
  "type": type.value,
102
98
  "name": name,
103
99
  "value": value,
104
- "entity_id": entity,
105
100
  "worker_run_id": self.worker_run_id,
106
101
  },
107
102
  )
@@ -125,48 +120,42 @@ class MetaDataMixin:
125
120
  - type: MetaType
126
121
  - name: str
127
122
  - value: str | int | float
128
- - entity_id: str | None
129
123
  :param batch_size: The size of each batch, which will be used to split the publication to avoid API errors.
130
124
 
131
125
  :returns: A list of dicts as returned in the ``metadata_list`` field by the ``CreateMetaDataBulk`` API endpoint.
132
126
  """
133
- assert element and isinstance(
134
- element, Element | CachedElement
135
- ), "element shouldn't be null and should be of type Element or CachedElement"
127
+ assert element and isinstance(element, Element | CachedElement), (
128
+ "element shouldn't be null and should be of type Element or CachedElement"
129
+ )
136
130
 
137
- assert metadata_list and isinstance(
138
- metadata_list, list
139
- ), "metadata_list shouldn't be null and should be of type list of dict"
131
+ assert metadata_list and isinstance(metadata_list, list), (
132
+ "metadata_list shouldn't be null and should be of type list of dict"
133
+ )
140
134
 
141
135
  # Make a copy to avoid modifying the metadata_list argument
142
136
  metas = []
143
137
  for index, metadata in enumerate(metadata_list):
144
- assert isinstance(
145
- metadata, dict
146
- ), f"Element at index {index} in metadata_list: Should be of type dict"
138
+ assert isinstance(metadata, dict), (
139
+ f"Element at index {index} in metadata_list: Should be of type dict"
140
+ )
147
141
 
148
142
  assert metadata.get("type") and isinstance(
149
143
  metadata.get("type"), MetaType
150
144
  ), "type shouldn't be null and should be of type MetaType"
151
145
 
152
- assert metadata.get("name") and isinstance(
153
- metadata.get("name"), str
154
- ), "name shouldn't be null and should be of type str"
146
+ assert metadata.get("name") and isinstance(metadata.get("name"), str), (
147
+ "name shouldn't be null and should be of type str"
148
+ )
155
149
 
156
150
  assert metadata.get("value") is not None and isinstance(
157
151
  metadata.get("value"), str | float | int
158
152
  ), "value shouldn't be null and should be of type (str or float or int)"
159
153
 
160
- assert metadata.get("entity_id") is None or isinstance(
161
- metadata.get("entity_id"), str
162
- ), "entity_id should be None or a str"
163
-
164
154
  metas.append(
165
155
  {
166
156
  "type": metadata.get("type").value,
167
157
  "name": metadata.get("name"),
168
158
  "value": metadata.get("value"),
169
- "entity_id": metadata.get("entity_id"),
170
159
  }
171
160
  )
172
161
 
@@ -199,9 +188,9 @@ class MetaDataMixin:
199
188
  :param element: The element to list metadata on.
200
189
  :param load_parents: Also include all metadata from the element's parents in the response.
201
190
  """
202
- assert element and isinstance(
203
- element, Element | CachedElement
204
- ), "element shouldn't be null and should be of type Element or CachedElement"
191
+ assert element and isinstance(element, Element | CachedElement), (
192
+ "element shouldn't be null and should be of type Element or CachedElement"
193
+ )
205
194
 
206
195
  query_params = {}
207
196
  if load_parents is not None:
@@ -1,5 +1,11 @@
1
+ from collections.abc import Iterator
1
2
  from enum import Enum
2
3
 
4
+ from arkindex_worker.cache import unsupported_cache
5
+
6
+ # Increases the number of elements returned per page by the API
7
+ PROCESS_ELEMENTS_PAGE_SIZE = 500
8
+
3
9
 
4
10
  class ActivityState(Enum):
5
11
  """
@@ -66,3 +72,21 @@ class ProcessMode(Enum):
66
72
  """
67
73
  Export processes.
68
74
  """
75
+
76
+
77
+ class ProcessMixin:
78
+ @unsupported_cache
79
+ def list_process_elements(self, with_image: bool = False) -> Iterator[dict]:
80
+ """
81
+ List the elements of a process.
82
+
83
+ :param with_image: whether or not to include zone and image information in the elements response.
84
+ :returns: the process' elements.
85
+ """
86
+ return self.api_client.paginate(
87
+ "ListProcessElements",
88
+ id=self.process_information["id"],
89
+ with_image=with_image,
90
+ allow_missing_data=True,
91
+ page_size=PROCESS_ELEMENTS_PAGE_SIZE,
92
+ )