arkindex-base-worker 0.5.0b3__tar.gz → 0.5.0rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/PKG-INFO +1 -1
  2. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/arkindex_base_worker.egg-info/PKG-INFO +1 -1
  3. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/arkindex_base_worker.egg-info/SOURCES.txt +1 -2
  4. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/arkindex_worker/cache.py +3 -22
  5. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/arkindex_worker/worker/entity.py +17 -126
  6. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/arkindex_worker/worker/metadata.py +0 -11
  7. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/pyproject.toml +1 -1
  8. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/tests/test_cache.py +1 -2
  9. arkindex_base_worker-0.5.0b3/tests/test_elements_worker/test_entity_create.py → arkindex_base_worker-0.5.0rc1/tests/test_elements_worker/test_entity.py +220 -227
  10. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/tests/test_elements_worker/test_metadata.py +0 -47
  11. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/tests/test_merge.py +0 -7
  12. arkindex_base_worker-0.5.0b3/tests/test_elements_worker/test_entity_list_and_check.py +0 -293
  13. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/LICENSE +0 -0
  14. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/README.md +0 -0
  15. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/arkindex_base_worker.egg-info/dependency_links.txt +0 -0
  16. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/arkindex_base_worker.egg-info/requires.txt +0 -0
  17. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/arkindex_base_worker.egg-info/top_level.txt +0 -0
  18. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/arkindex_worker/__init__.py +0 -0
  19. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/arkindex_worker/image.py +0 -0
  20. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/arkindex_worker/models.py +0 -0
  21. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/arkindex_worker/utils.py +0 -0
  22. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/arkindex_worker/worker/__init__.py +0 -0
  23. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/arkindex_worker/worker/base.py +0 -0
  24. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/arkindex_worker/worker/classification.py +0 -0
  25. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/arkindex_worker/worker/corpus.py +0 -0
  26. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/arkindex_worker/worker/dataset.py +0 -0
  27. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/arkindex_worker/worker/element.py +0 -0
  28. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/arkindex_worker/worker/image.py +0 -0
  29. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/arkindex_worker/worker/process.py +0 -0
  30. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/arkindex_worker/worker/task.py +0 -0
  31. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/arkindex_worker/worker/training.py +0 -0
  32. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/arkindex_worker/worker/transcription.py +0 -0
  33. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/examples/standalone/python/worker.py +0 -0
  34. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/examples/tooled/python/worker.py +0 -0
  35. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/hooks/pre_gen_project.py +0 -0
  36. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/setup.cfg +0 -0
  37. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/tests/__init__.py +0 -0
  38. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/tests/conftest.py +0 -0
  39. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/tests/test_base_worker.py +0 -0
  40. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/tests/test_dataset_worker.py +0 -0
  41. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/tests/test_element.py +0 -0
  42. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/tests/test_elements_worker/__init__.py +0 -0
  43. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/tests/test_elements_worker/test_classification.py +0 -0
  44. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/tests/test_elements_worker/test_cli.py +0 -0
  45. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/tests/test_elements_worker/test_corpus.py +0 -0
  46. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/tests/test_elements_worker/test_dataset.py +0 -0
  47. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/tests/test_elements_worker/test_element.py +0 -0
  48. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/tests/test_elements_worker/test_element_create_multiple.py +0 -0
  49. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/tests/test_elements_worker/test_element_create_single.py +0 -0
  50. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/tests/test_elements_worker/test_element_list_children.py +0 -0
  51. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/tests/test_elements_worker/test_element_list_parents.py +0 -0
  52. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/tests/test_elements_worker/test_image.py +0 -0
  53. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/tests/test_elements_worker/test_process.py +0 -0
  54. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/tests/test_elements_worker/test_task.py +0 -0
  55. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/tests/test_elements_worker/test_training.py +0 -0
  56. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/tests/test_elements_worker/test_transcription_create.py +0 -0
  57. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/tests/test_elements_worker/test_transcription_create_with_elements.py +0 -0
  58. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/tests/test_elements_worker/test_transcription_list.py +0 -0
  59. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/tests/test_elements_worker/test_worker.py +0 -0
  60. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/tests/test_image.py +0 -0
  61. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/tests/test_utils.py +0 -0
  62. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/worker-demo/tests/__init__.py +0 -0
  63. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/worker-demo/tests/conftest.py +0 -0
  64. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/worker-demo/tests/test_worker.py +0 -0
  65. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/worker-demo/worker_demo/__init__.py +0 -0
  66. {arkindex_base_worker-0.5.0b3 → arkindex_base_worker-0.5.0rc1}/worker-demo/worker_demo/worker.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: arkindex-base-worker
3
- Version: 0.5.0b3
3
+ Version: 0.5.0rc1
4
4
  Summary: Base Worker to easily build Arkindex ML workflows
5
5
  Author-email: Teklia <contact@teklia.com>
6
6
  Maintainer-email: Teklia <contact@teklia.com>
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: arkindex-base-worker
3
- Version: 0.5.0b3
3
+ Version: 0.5.0rc1
4
4
  Summary: Base Worker to easily build Arkindex ML workflows
5
5
  Author-email: Teklia <contact@teklia.com>
6
6
  Maintainer-email: Teklia <contact@teklia.com>
@@ -46,8 +46,7 @@ tests/test_elements_worker/test_element_create_multiple.py
46
46
  tests/test_elements_worker/test_element_create_single.py
47
47
  tests/test_elements_worker/test_element_list_children.py
48
48
  tests/test_elements_worker/test_element_list_parents.py
49
- tests/test_elements_worker/test_entity_create.py
50
- tests/test_elements_worker/test_entity_list_and_check.py
49
+ tests/test_elements_worker/test_entity.py
51
50
  tests/test_elements_worker/test_image.py
52
51
  tests/test_elements_worker/test_metadata.py
53
52
  tests/test_elements_worker/test_process.py
@@ -206,23 +206,6 @@ class CachedClassification(Model):
206
206
  table_name = "classifications"
207
207
 
208
208
 
209
- class CachedEntity(Model):
210
- """
211
- Cache entity table
212
- """
213
-
214
- id = UUIDField(primary_key=True)
215
- type = CharField(max_length=50)
216
- name = TextField()
217
- validated = BooleanField(default=False)
218
- metas = JSONField(null=True)
219
- worker_run_id = UUIDField(null=True)
220
-
221
- class Meta:
222
- database = db
223
- table_name = "entities"
224
-
225
-
226
209
  class CachedTranscriptionEntity(Model):
227
210
  """
228
211
  Cache transcription entity table
@@ -231,14 +214,14 @@ class CachedTranscriptionEntity(Model):
231
214
  transcription = ForeignKeyField(
232
215
  CachedTranscription, backref="transcription_entities"
233
216
  )
234
- entity = ForeignKeyField(CachedEntity, backref="transcription_entities")
217
+ type = CharField(max_length=50)
235
218
  offset = IntegerField(constraints=[Check("offset >= 0")])
236
219
  length = IntegerField(constraints=[Check("length > 0")])
237
220
  worker_run_id = UUIDField(null=True)
238
221
  confidence = FloatField(null=True)
239
222
 
240
223
  class Meta:
241
- primary_key = CompositeKey("transcription", "entity")
224
+ primary_key = CompositeKey("transcription", "type")
242
225
  database = db
243
226
  table_name = "transcription_entities"
244
227
 
@@ -272,12 +255,11 @@ MODELS = [
272
255
  CachedElement,
273
256
  CachedTranscription,
274
257
  CachedClassification,
275
- CachedEntity,
276
258
  CachedTranscriptionEntity,
277
259
  CachedDataset,
278
260
  CachedDatasetElement,
279
261
  ]
280
- SQL_VERSION = 3
262
+ SQL_VERSION = 4
281
263
 
282
264
 
283
265
  def init_cache_db(path: Path):
@@ -365,7 +347,6 @@ def merge_parents_cache(paths: list, current_database: Path):
365
347
  f"REPLACE INTO elements SELECT * FROM source_{idx}.elements;",
366
348
  f"REPLACE INTO transcriptions SELECT * FROM source_{idx}.transcriptions;",
367
349
  f"REPLACE INTO classifications SELECT * FROM source_{idx}.classifications;",
368
- f"REPLACE INTO entities SELECT * FROM source_{idx}.entities;",
369
350
  f"REPLACE INTO transcription_entities SELECT * FROM source_{idx}.transcription_entities;",
370
351
  f"REPLACE INTO datasets SELECT * FROM source_{idx}.datasets;",
371
352
  f"REPLACE INTO dataset_elements SELECT * FROM source_{idx}.dataset_elements;",
@@ -11,16 +11,14 @@ from peewee import IntegrityError
11
11
  from arkindex.exceptions import ErrorResponse
12
12
  from arkindex_worker import logger
13
13
  from arkindex_worker.cache import (
14
- CachedEntity,
15
14
  CachedTranscriptionEntity,
16
15
  unsupported_cache,
17
16
  )
18
- from arkindex_worker.models import Element, Transcription
17
+ from arkindex_worker.models import Transcription
19
18
  from arkindex_worker.utils import pluralize
20
19
 
21
20
 
22
21
  class Entity(TypedDict):
23
- name: str
24
22
  type_id: str
25
23
  length: int
26
24
  offset: int
@@ -126,88 +124,20 @@ class EntityMixin:
126
124
  # Create the type if non-existent
127
125
  self.create_entity_type(entity_type)
128
126
 
129
- def create_entity(
130
- self,
131
- name: str,
132
- type: str,
133
- metas=None,
134
- validated=None,
135
- ):
136
- """
137
- Create an entity on the given corpus.
138
- If cache support is enabled, a [CachedEntity][arkindex_worker.cache.CachedEntity] will also be created.
139
-
140
- :param name: Name of the entity.
141
- :param type: Type of the entity.
142
- """
143
- assert name and isinstance(name, str), (
144
- "name shouldn't be null and should be of type str"
145
- )
146
- assert type and isinstance(type, str), (
147
- "type shouldn't be null and should be of type str"
148
- )
149
- metas = metas or {}
150
- if metas:
151
- assert isinstance(metas, dict), "metas should be of type dict"
152
- if validated is not None:
153
- assert isinstance(validated, bool), "validated should be of type bool"
154
- if self.is_read_only:
155
- logger.warning("Cannot create entity as this worker is in read-only mode")
156
- return
157
-
158
- # Retrieve entity_type ID
159
- if not self.entity_types:
160
- # Load entity_types of corpus
161
- self.list_corpus_entity_types()
162
-
163
- entity_type_id = self.entity_types.get(type)
164
- assert entity_type_id, f"Entity type `{type}` not found in the corpus."
165
-
166
- entity = self.api_client.request(
167
- "CreateEntity",
168
- body={
169
- "name": name,
170
- "type_id": entity_type_id,
171
- "metas": metas,
172
- "validated": validated,
173
- "corpus": self.corpus_id,
174
- "worker_run_id": self.worker_run_id,
175
- },
176
- )
177
-
178
- if self.use_cache:
179
- # Store entity in local cache
180
- try:
181
- to_insert = [
182
- {
183
- "id": entity["id"],
184
- "type": type,
185
- "name": name,
186
- "validated": validated if validated is not None else False,
187
- "metas": metas,
188
- "worker_run_id": self.worker_run_id,
189
- }
190
- ]
191
- CachedEntity.insert_many(to_insert).execute()
192
- except IntegrityError as e:
193
- logger.warning(f"Couldn't save created entity in local cache: {e}")
194
-
195
- return entity["id"]
196
-
197
127
  def create_transcription_entity(
198
128
  self,
199
129
  transcription: Transcription,
200
- entity: str,
130
+ type_id: str,
201
131
  offset: int,
202
132
  length: int,
203
133
  confidence: float | None = None,
204
134
  ) -> dict[str, str | int] | None:
205
135
  """
206
- Create a link between an existing entity and an existing transcription.
136
+ Create an entity on an existing transcription.
207
137
  If cache support is enabled, a `CachedTranscriptionEntity` will also be created.
208
138
 
209
139
  :param transcription: Transcription to create the entity on.
210
- :param entity: UUID of the existing entity.
140
+ :param type_id: UUID of the entity type.
211
141
  :param offset: Starting position of the entity in the transcription's text,
212
142
  as a 0-based index.
213
143
  :param length: Length of the entity in the transcription's text.
@@ -218,8 +148,8 @@ class EntityMixin:
218
148
  assert transcription and isinstance(transcription, Transcription), (
219
149
  "transcription shouldn't be null and should be a Transcription"
220
150
  )
221
- assert entity and isinstance(entity, str), (
222
- "entity shouldn't be null and should be of type str"
151
+ assert type_id and isinstance(type_id, str), (
152
+ "type_id shouldn't be null and should be of type str"
223
153
  )
224
154
  assert offset is not None and isinstance(offset, int) and offset >= 0, (
225
155
  "offset shouldn't be null and should be a positive integer"
@@ -237,7 +167,7 @@ class EntityMixin:
237
167
  return
238
168
 
239
169
  body = {
240
- "entity": entity,
170
+ "type_id": type_id,
241
171
  "length": length,
242
172
  "offset": offset,
243
173
  "worker_run_id": self.worker_run_id,
@@ -245,7 +175,7 @@ class EntityMixin:
245
175
  if confidence is not None:
246
176
  body["confidence"] = confidence
247
177
 
248
- transcription_ent = self.api_client.request(
178
+ tr_entity = self.api_client.request(
249
179
  "CreateTranscriptionEntity",
250
180
  id=transcription.id,
251
181
  body=body,
@@ -256,7 +186,7 @@ class EntityMixin:
256
186
  try:
257
187
  CachedTranscriptionEntity.create(
258
188
  transcription=transcription.id,
259
- entity=entity,
189
+ type=tr_entity["type"]["name"],
260
190
  offset=offset,
261
191
  length=length,
262
192
  worker_run_id=self.worker_run_id,
@@ -267,7 +197,7 @@ class EntityMixin:
267
197
  f"Couldn't save created transcription entity in local cache: {e}"
268
198
  )
269
199
 
270
- return transcription_ent
200
+ return tr_entity
271
201
 
272
202
  @unsupported_cache
273
203
  def create_transcription_entities(
@@ -276,14 +206,11 @@ class EntityMixin:
276
206
  entities: list[Entity],
277
207
  ) -> list[dict[str, str]]:
278
208
  """
279
- Create multiple entities attached to a transcription in a single API request.
209
+ Create multiple entities on a transcription in a single API request.
280
210
 
281
211
  :param transcription: Transcription to create the entity on.
282
212
  :param entities: List of dicts, one per element. Each dict can have the following keys:
283
213
 
284
- name (str)
285
- Required. Name of the entity.
286
-
287
214
  type_id (str)
288
215
  Required. ID of the EntityType of the entity.
289
216
 
@@ -296,7 +223,7 @@ class EntityMixin:
296
223
  confidence (float or None)
297
224
  Optional confidence score, between 0.0 and 1.0.
298
225
 
299
- :return: List of dicts, with each dict having a two keys, `transcription_entity_id` and `entity_id`, holding the UUID of each created object.
226
+ :return: List of strings, holding the UUID of each created object.
300
227
  """
301
228
  assert transcription and isinstance(transcription, Transcription), (
302
229
  "transcription shouldn't be null and should be of type Transcription"
@@ -311,11 +238,6 @@ class EntityMixin:
311
238
  f"Entity at index {index} in entities: Should be of type dict"
312
239
  )
313
240
 
314
- name = entity.get("name")
315
- assert name and isinstance(name, str), (
316
- f"Entity at index {index} in entities: name shouldn't be null and should be of type str"
317
- )
318
-
319
241
  type_id = entity.get("type_id")
320
242
  assert type_id and isinstance(type_id, str), (
321
243
  f"Entity at index {index} in entities: type_id shouldn't be null and should be of type str"
@@ -339,7 +261,7 @@ class EntityMixin:
339
261
  )
340
262
 
341
263
  assert len(entities) == len(
342
- set(map(itemgetter("offset", "length", "name", "type_id"), entities))
264
+ set(map(itemgetter("offset", "length", "type_id"), entities))
343
265
  ), "entities should be unique"
344
266
 
345
267
  if self.is_read_only:
@@ -348,16 +270,16 @@ class EntityMixin:
348
270
  )
349
271
  return
350
272
 
351
- created_entities = self.api_client.request(
273
+ created_tr_entities = self.api_client.request(
352
274
  "CreateTranscriptionEntities",
353
275
  id=transcription.id,
354
276
  body={
355
277
  "worker_run_id": self.worker_run_id,
356
- "entities": entities,
278
+ "transcription_entities": entities,
357
279
  },
358
- )["entities"]
280
+ )["transcription_entities"]
359
281
 
360
- return created_entities
282
+ return created_tr_entities
361
283
 
362
284
  def list_transcription_entities(
363
285
  self,
@@ -412,34 +334,3 @@ class EntityMixin:
412
334
  return self.api_client.paginate(
413
335
  "ListTranscriptionEntities", id=transcription.id, **query_params
414
336
  )
415
-
416
- def list_corpus_entities(
417
- self,
418
- name: str | None = None,
419
- parent: Element | None = None,
420
- ):
421
- """
422
- List all entities in the worker's corpus and store them in the ``self.entities`` cache.
423
- :param name: Filter entities by part of their name (case-insensitive)
424
- :param parent: Restrict entities to those linked to all transcriptions of an element and all its descendants. Note that links to metadata are ignored.
425
- """
426
- query_params = {}
427
-
428
- if name is not None:
429
- assert name and isinstance(name, str), "name should be of type str"
430
- query_params["name"] = name
431
-
432
- if parent is not None:
433
- assert isinstance(parent, Element), "parent should be of type Element"
434
- query_params["parent"] = parent.id
435
-
436
- self.entities = {
437
- entity["id"]: entity
438
- for entity in self.api_client.paginate(
439
- "ListCorpusEntities", id=self.corpus_id, **query_params
440
- )
441
- }
442
- count = len(self.entities)
443
- logger.info(
444
- f"Loaded {count} {pluralize('entity', count)} in corpus ({self.corpus_id})"
445
- )
@@ -64,7 +64,6 @@ class MetaDataMixin:
64
64
  type: MetaType,
65
65
  name: str,
66
66
  value: str,
67
- entity: str | None = None,
68
67
  ) -> str:
69
68
  """
70
69
  Create a metadata on the given element through API.
@@ -73,7 +72,6 @@ class MetaDataMixin:
73
72
  :param type: Type of the metadata.
74
73
  :param name: Name of the metadata.
75
74
  :param value: Value of the metadata.
76
- :param entity: UUID of an entity this metadata is related to.
77
75
  :returns: UUID of the created metadata.
78
76
  """
79
77
  assert element and isinstance(element, Element | CachedElement), (
@@ -88,8 +86,6 @@ class MetaDataMixin:
88
86
  assert value and isinstance(value, str), (
89
87
  "value shouldn't be null and should be of type str"
90
88
  )
91
- if entity:
92
- assert isinstance(entity, str), "entity should be of type str"
93
89
  if self.is_read_only:
94
90
  logger.warning("Cannot create metadata as this worker is in read-only mode")
95
91
  return
@@ -101,7 +97,6 @@ class MetaDataMixin:
101
97
  "type": type.value,
102
98
  "name": name,
103
99
  "value": value,
104
- "entity_id": entity,
105
100
  "worker_run_id": self.worker_run_id,
106
101
  },
107
102
  )
@@ -125,7 +120,6 @@ class MetaDataMixin:
125
120
  - type: MetaType
126
121
  - name: str
127
122
  - value: str | int | float
128
- - entity_id: str | None
129
123
  :param batch_size: The size of each batch, which will be used to split the publication to avoid API errors.
130
124
 
131
125
  :returns: A list of dicts as returned in the ``metadata_list`` field by the ``CreateMetaDataBulk`` API endpoint.
@@ -157,16 +151,11 @@ class MetaDataMixin:
157
151
  metadata.get("value"), str | float | int
158
152
  ), "value shouldn't be null and should be of type (str or float or int)"
159
153
 
160
- assert metadata.get("entity_id") is None or isinstance(
161
- metadata.get("entity_id"), str
162
- ), "entity_id should be None or a str"
163
-
164
154
  metas.append(
165
155
  {
166
156
  "type": metadata.get("type").value,
167
157
  "name": metadata.get("name"),
168
158
  "value": metadata.get("value"),
169
- "entity_id": metadata.get("entity_id"),
170
159
  }
171
160
  )
172
161
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "arkindex-base-worker"
7
- version = "0.5.0b3"
7
+ version = "0.5.0rc1"
8
8
  description = "Base Worker to easily build Arkindex ML workflows"
9
9
  license = { file = "LICENSE" }
10
10
  dependencies = [
@@ -60,9 +60,8 @@ def test_create_tables(tmp_path):
60
60
  CREATE TABLE "dataset_elements" ("id" TEXT NOT NULL PRIMARY KEY, "element_id" TEXT NOT NULL, "dataset_id" TEXT NOT NULL, "set_name" VARCHAR(255) NOT NULL, FOREIGN KEY ("element_id") REFERENCES "elements" ("id"), FOREIGN KEY ("dataset_id") REFERENCES "datasets" ("id"))
61
61
  CREATE TABLE "datasets" ("id" TEXT NOT NULL PRIMARY KEY, "name" VARCHAR(255) NOT NULL, "state" VARCHAR(255) NOT NULL DEFAULT 'open', "sets" TEXT NOT NULL)
62
62
  CREATE TABLE "elements" ("id" TEXT NOT NULL PRIMARY KEY, "parent_id" TEXT, "type" VARCHAR(50) NOT NULL, "image_id" TEXT, "polygon" text, "rotation_angle" INTEGER NOT NULL, "mirrored" INTEGER NOT NULL, "initial" INTEGER NOT NULL, "worker_version_id" TEXT, "worker_run_id" TEXT, "confidence" REAL, FOREIGN KEY ("image_id") REFERENCES "images" ("id"))
63
- CREATE TABLE "entities" ("id" TEXT NOT NULL PRIMARY KEY, "type" VARCHAR(50) NOT NULL, "name" TEXT NOT NULL, "validated" INTEGER NOT NULL, "metas" text, "worker_run_id" TEXT)
64
63
  CREATE TABLE "images" ("id" TEXT NOT NULL PRIMARY KEY, "width" INTEGER NOT NULL, "height" INTEGER NOT NULL, "url" TEXT NOT NULL)
65
- CREATE TABLE "transcription_entities" ("transcription_id" TEXT NOT NULL, "entity_id" TEXT NOT NULL, "offset" INTEGER NOT NULL CHECK (offset >= 0), "length" INTEGER NOT NULL CHECK (length > 0), "worker_run_id" TEXT, "confidence" REAL, PRIMARY KEY ("transcription_id", "entity_id"), FOREIGN KEY ("transcription_id") REFERENCES "transcriptions" ("id"), FOREIGN KEY ("entity_id") REFERENCES "entities" ("id"))
64
+ CREATE TABLE "transcription_entities" ("transcription_id" TEXT NOT NULL, "type" VARCHAR(50) NOT NULL, "offset" INTEGER NOT NULL CHECK (offset >= 0), "length" INTEGER NOT NULL CHECK (length > 0), "worker_run_id" TEXT, "confidence" REAL, PRIMARY KEY ("transcription_id", "type"), FOREIGN KEY ("transcription_id") REFERENCES "transcriptions" ("id"))
66
65
  CREATE TABLE "transcriptions" ("id" TEXT NOT NULL PRIMARY KEY, "element_id" TEXT NOT NULL, "text" TEXT NOT NULL, "confidence" REAL, "orientation" VARCHAR(50) NOT NULL, "worker_version_id" TEXT, "worker_run_id" TEXT, FOREIGN KEY ("element_id") REFERENCES "elements" ("id"))"""
67
66
 
68
67
  actual_schema = "\n".join(