nucliadb 6.2.1.post2864__py3-none-any.whl → 6.2.1.post2869__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nucliadb/train/nodes.py CHANGED
@@ -28,6 +28,12 @@ from nucliadb.common.datamanagers.resources import KB_RESOURCE_SLUG_BASE
28
28
  from nucliadb.common.maindb.driver import Driver, Transaction
29
29
  from nucliadb.ingest.orm.entities import EntitiesManager
30
30
  from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
31
+ from nucliadb.train.resource import (
32
+ generate_train_resource,
33
+ iterate_fields,
34
+ iterate_paragraphs,
35
+ iterate_sentences,
36
+ )
31
37
  from nucliadb_protos.train_pb2 import (
32
38
  GetFieldsRequest,
33
39
  GetParagraphsRequest,
@@ -87,11 +93,11 @@ class TrainShardManager(manager.KBShardManager):
87
93
  # Filter by uuid
88
94
  resource = await kb.get(request.uuid)
89
95
  if resource:
90
- async for sentence in resource.iterate_sentences(request.metadata):
96
+ async for sentence in iterate_sentences(resource, request.metadata):
91
97
  yield sentence
92
98
  else:
93
99
  async for resource in kb.iterate_resources():
94
- async for sentence in resource.iterate_sentences(request.metadata):
100
+ async for sentence in iterate_sentences(resource, request.metadata):
95
101
  yield sentence
96
102
 
97
103
  async def kb_paragraphs(self, request: GetParagraphsRequest) -> AsyncIterator[TrainParagraph]:
@@ -101,11 +107,11 @@ class TrainShardManager(manager.KBShardManager):
101
107
  # Filter by uuid
102
108
  resource = await kb.get(request.uuid)
103
109
  if resource:
104
- async for paragraph in resource.iterate_paragraphs(request.metadata):
110
+ async for paragraph in iterate_paragraphs(resource, request.metadata):
105
111
  yield paragraph
106
112
  else:
107
113
  async for resource in kb.iterate_resources():
108
- async for paragraph in resource.iterate_paragraphs(request.metadata):
114
+ async for paragraph in iterate_paragraphs(resource, request.metadata):
109
115
  yield paragraph
110
116
 
111
117
  async def kb_fields(self, request: GetFieldsRequest) -> AsyncIterator[TrainField]:
@@ -115,11 +121,11 @@ class TrainShardManager(manager.KBShardManager):
115
121
  # Filter by uuid
116
122
  resource = await kb.get(request.uuid)
117
123
  if resource:
118
- async for field in resource.iterate_fields(request.metadata):
124
+ async for field in iterate_fields(resource, request.metadata):
119
125
  yield field
120
126
  else:
121
127
  async for resource in kb.iterate_resources():
122
- async for field in resource.iterate_fields(request.metadata):
128
+ async for field in iterate_fields(resource, request.metadata):
123
129
  yield field
124
130
 
125
131
  async def kb_resources(self, request: GetResourcesRequest) -> AsyncIterator[TrainResource]:
@@ -132,4 +138,4 @@ class TrainShardManager(manager.KBShardManager):
132
138
  if rid is not None:
133
139
  resource = await kb.get(rid.decode())
134
140
  if resource is not None:
135
- yield await resource.generate_train_resource(request.metadata)
141
+ yield await generate_train_resource(resource, request.metadata)
@@ -0,0 +1,380 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+ from __future__ import annotations
21
+
22
+ from typing import AsyncIterator, MutableMapping, Optional
23
+
24
+ from nucliadb.common import datamanagers
25
+ from nucliadb.ingest.orm.resource import Resource
26
+ from nucliadb_protos.resources_pb2 import (
27
+ FieldID,
28
+ FieldMetadata,
29
+ ParagraphAnnotation,
30
+ )
31
+ from nucliadb_protos.train_pb2 import (
32
+ EnabledMetadata,
33
+ TrainField,
34
+ TrainMetadata,
35
+ TrainParagraph,
36
+ TrainResource,
37
+ TrainSentence,
38
+ )
39
+ from nucliadb_protos.train_pb2 import Position as TrainPosition
40
+
41
+
42
+ async def iterate_sentences(
43
+ resource: Resource,
44
+ enabled_metadata: EnabledMetadata,
45
+ ) -> AsyncIterator[TrainSentence]: # pragma: no cover
46
+ fields = await resource.get_fields(force=True)
47
+ metadata = TrainMetadata()
48
+ userdefinedparagraphclass: dict[str, ParagraphAnnotation] = {}
49
+ if enabled_metadata.labels:
50
+ if resource.basic is None:
51
+ resource.basic = await resource.get_basic()
52
+ if resource.basic is not None:
53
+ metadata.labels.resource.extend(resource.basic.usermetadata.classifications)
54
+ for fieldmetadata in resource.basic.fieldmetadata:
55
+ field_id = resource.generate_field_id(fieldmetadata.field)
56
+ for annotationparagraph in fieldmetadata.paragraphs:
57
+ userdefinedparagraphclass[annotationparagraph.key] = annotationparagraph
58
+
59
+ for (type_id, field_id), field in fields.items():
60
+ fieldid = FieldID(field_type=type_id, field=field_id)
61
+ field_key = resource.generate_field_id(fieldid)
62
+ fm = await field.get_field_metadata()
63
+ extracted_text = None
64
+ vo = None
65
+ text = None
66
+
67
+ if enabled_metadata.vector:
68
+ # XXX: Given that nobody requested any particular vectorset, we'll
69
+ # return any
70
+ vectorset_id = None
71
+ async with datamanagers.with_ro_transaction() as txn:
72
+ async for vectorset_id, vs in datamanagers.vectorsets.iter(
73
+ txn=txn, kbid=resource.kb.kbid
74
+ ):
75
+ break
76
+ assert vectorset_id is not None, "All KBs must have at least a vectorset"
77
+ vo = await field.get_vectors(vectorset_id, vs.storage_key_kind)
78
+
79
+ extracted_text = await field.get_extracted_text()
80
+
81
+ if fm is None:
82
+ continue
83
+
84
+ field_metadatas: list[tuple[Optional[str], FieldMetadata]] = [(None, fm.metadata)]
85
+ for subfield_metadata, splitted_metadata in fm.split_metadata.items():
86
+ field_metadatas.append((subfield_metadata, splitted_metadata))
87
+
88
+ for subfield, field_metadata in field_metadatas:
89
+ if enabled_metadata.labels:
90
+ metadata.labels.ClearField("field")
91
+ metadata.labels.field.extend(field_metadata.classifications)
92
+
93
+ entities: dict[str, str] = {}
94
+ if enabled_metadata.entities:
95
+ _update_entities_dict(entities, field_metadata)
96
+
97
+ precomputed_vectors = {}
98
+ if vo is not None:
99
+ if subfield is not None:
100
+ vectors = vo.split_vectors[subfield]
101
+ base_vector_key = f"{resource.uuid}/{field_key}/{subfield}"
102
+ else:
103
+ vectors = vo.vectors
104
+ base_vector_key = f"{resource.uuid}/{field_key}"
105
+ for index, vector in enumerate(vectors.vectors):
106
+ vector_key = f"{base_vector_key}/{index}/{vector.start}-{vector.end}"
107
+ precomputed_vectors[vector_key] = vector.vector
108
+
109
+ if extracted_text is not None:
110
+ if subfield is not None:
111
+ text = extracted_text.split_text[subfield]
112
+ else:
113
+ text = extracted_text.text
114
+
115
+ for paragraph in field_metadata.paragraphs:
116
+ if subfield is not None:
117
+ paragraph_key = (
118
+ f"{resource.uuid}/{field_key}/{subfield}/{paragraph.start}-{paragraph.end}"
119
+ )
120
+ else:
121
+ paragraph_key = f"{resource.uuid}/{field_key}/{paragraph.start}-{paragraph.end}"
122
+
123
+ if enabled_metadata.labels:
124
+ metadata.labels.ClearField("field")
125
+ metadata.labels.paragraph.extend(paragraph.classifications)
126
+ if paragraph_key in userdefinedparagraphclass:
127
+ metadata.labels.paragraph.extend(
128
+ userdefinedparagraphclass[paragraph_key].classifications
129
+ )
130
+
131
+ for index, sentence in enumerate(paragraph.sentences):
132
+ if subfield is not None:
133
+ sentence_key = f"{resource.uuid}/{field_key}/{subfield}/{index}/{sentence.start}-{sentence.end}"
134
+ else:
135
+ sentence_key = (
136
+ f"{resource.uuid}/{field_key}/{index}/{sentence.start}-{sentence.end}"
137
+ )
138
+
139
+ if vo is not None:
140
+ metadata.ClearField("vector")
141
+ vector_tmp = precomputed_vectors.get(sentence_key)
142
+ if vector_tmp:
143
+ metadata.vector.extend(vector_tmp)
144
+
145
+ if extracted_text is not None and text is not None:
146
+ metadata.text = text[sentence.start : sentence.end]
147
+
148
+ metadata.ClearField("entities")
149
+ metadata.ClearField("entity_positions")
150
+ if enabled_metadata.entities and text is not None:
151
+ local_text = text[sentence.start : sentence.end]
152
+ add_entities_to_metadata(entities, local_text, metadata)
153
+
154
+ pb_sentence = TrainSentence()
155
+ pb_sentence.uuid = resource.uuid
156
+ pb_sentence.field.CopyFrom(fieldid)
157
+ pb_sentence.paragraph = paragraph_key
158
+ pb_sentence.sentence = sentence_key
159
+ pb_sentence.metadata.CopyFrom(metadata)
160
+ yield pb_sentence
161
+
162
+
163
+ async def iterate_paragraphs(
164
+ resource: Resource, enabled_metadata: EnabledMetadata
165
+ ) -> AsyncIterator[TrainParagraph]:
166
+ fields = await resource.get_fields(force=True)
167
+ metadata = TrainMetadata()
168
+ userdefinedparagraphclass: dict[str, ParagraphAnnotation] = {}
169
+ if enabled_metadata.labels:
170
+ if resource.basic is None:
171
+ resource.basic = await resource.get_basic()
172
+ if resource.basic is not None:
173
+ metadata.labels.resource.extend(resource.basic.usermetadata.classifications)
174
+ for fieldmetadata in resource.basic.fieldmetadata:
175
+ field_id = resource.generate_field_id(fieldmetadata.field)
176
+ for annotationparagraph in fieldmetadata.paragraphs:
177
+ userdefinedparagraphclass[annotationparagraph.key] = annotationparagraph
178
+
179
+ for (type_id, field_id), field in fields.items():
180
+ fieldid = FieldID(field_type=type_id, field=field_id)
181
+ field_key = resource.generate_field_id(fieldid)
182
+ fm = await field.get_field_metadata()
183
+ extracted_text = None
184
+ text = None
185
+
186
+ extracted_text = await field.get_extracted_text()
187
+
188
+ if fm is None:
189
+ continue
190
+
191
+ field_metadatas: list[tuple[Optional[str], FieldMetadata]] = [(None, fm.metadata)]
192
+ for subfield_metadata, splitted_metadata in fm.split_metadata.items():
193
+ field_metadatas.append((subfield_metadata, splitted_metadata))
194
+
195
+ for subfield, field_metadata in field_metadatas:
196
+ if enabled_metadata.labels:
197
+ metadata.labels.ClearField("field")
198
+ metadata.labels.field.extend(field_metadata.classifications)
199
+
200
+ entities: dict[str, str] = {}
201
+ if enabled_metadata.entities:
202
+ _update_entities_dict(entities, field_metadata)
203
+
204
+ if extracted_text is not None:
205
+ if subfield is not None:
206
+ text = extracted_text.split_text[subfield]
207
+ else:
208
+ text = extracted_text.text
209
+
210
+ for paragraph in field_metadata.paragraphs:
211
+ if subfield is not None:
212
+ paragraph_key = (
213
+ f"{resource.uuid}/{field_key}/{subfield}/{paragraph.start}-{paragraph.end}"
214
+ )
215
+ else:
216
+ paragraph_key = f"{resource.uuid}/{field_key}/{paragraph.start}-{paragraph.end}"
217
+
218
+ if enabled_metadata.labels:
219
+ metadata.labels.ClearField("paragraph")
220
+ metadata.labels.paragraph.extend(paragraph.classifications)
221
+
222
+ if extracted_text is not None and text is not None:
223
+ metadata.text = text[paragraph.start : paragraph.end]
224
+
225
+ metadata.ClearField("entities")
226
+ metadata.ClearField("entity_positions")
227
+ if enabled_metadata.entities and text is not None:
228
+ local_text = text[paragraph.start : paragraph.end]
229
+ add_entities_to_metadata(entities, local_text, metadata)
230
+
231
+ if paragraph_key in userdefinedparagraphclass:
232
+ metadata.labels.paragraph.extend(
233
+ userdefinedparagraphclass[paragraph_key].classifications
234
+ )
235
+
236
+ pb_paragraph = TrainParagraph()
237
+ pb_paragraph.uuid = resource.uuid
238
+ pb_paragraph.field.CopyFrom(fieldid)
239
+ pb_paragraph.paragraph = paragraph_key
240
+ pb_paragraph.metadata.CopyFrom(metadata)
241
+
242
+ yield pb_paragraph
243
+
244
+
245
+ async def iterate_fields(
246
+ resource: Resource, enabled_metadata: EnabledMetadata
247
+ ) -> AsyncIterator[TrainField]:
248
+ fields = await resource.get_fields(force=True)
249
+ metadata = TrainMetadata()
250
+ if enabled_metadata.labels:
251
+ if resource.basic is None:
252
+ resource.basic = await resource.get_basic()
253
+ if resource.basic is not None:
254
+ metadata.labels.resource.extend(resource.basic.usermetadata.classifications)
255
+
256
+ for (type_id, field_id), field in fields.items():
257
+ fieldid = FieldID(field_type=type_id, field=field_id)
258
+ fm = await field.get_field_metadata()
259
+ extracted_text = None
260
+
261
+ if enabled_metadata.text:
262
+ extracted_text = await field.get_extracted_text()
263
+
264
+ if fm is None:
265
+ continue
266
+
267
+ field_metadatas: list[tuple[Optional[str], FieldMetadata]] = [(None, fm.metadata)]
268
+ for subfield_metadata, splitted_metadata in fm.split_metadata.items():
269
+ field_metadatas.append((subfield_metadata, splitted_metadata))
270
+
271
+ for subfield, splitted_metadata in field_metadatas:
272
+ if enabled_metadata.labels:
273
+ metadata.labels.ClearField("field")
274
+ metadata.labels.field.extend(splitted_metadata.classifications)
275
+
276
+ if extracted_text is not None:
277
+ if subfield is not None:
278
+ metadata.text = extracted_text.split_text[subfield]
279
+ else:
280
+ metadata.text = extracted_text.text
281
+
282
+ if enabled_metadata.entities:
283
+ metadata.ClearField("entities")
284
+ _update_entities_dict(metadata.entities, splitted_metadata)
285
+
286
+ pb_field = TrainField()
287
+ pb_field.uuid = resource.uuid
288
+ pb_field.field.CopyFrom(fieldid)
289
+ pb_field.metadata.CopyFrom(metadata)
290
+ yield pb_field
291
+
292
+
293
+ async def generate_train_resource(
294
+ resource: Resource, enabled_metadata: EnabledMetadata
295
+ ) -> TrainResource:
296
+ fields = await resource.get_fields(force=True)
297
+ metadata = TrainMetadata()
298
+ if enabled_metadata.labels:
299
+ if resource.basic is None:
300
+ resource.basic = await resource.get_basic()
301
+ if resource.basic is not None:
302
+ metadata.labels.resource.extend(resource.basic.usermetadata.classifications)
303
+
304
+ metadata.labels.ClearField("field")
305
+ metadata.ClearField("entities")
306
+
307
+ for (_, _), field in fields.items():
308
+ extracted_text = None
309
+ fm = await field.get_field_metadata()
310
+
311
+ if enabled_metadata.text:
312
+ extracted_text = await field.get_extracted_text()
313
+
314
+ if extracted_text is not None:
315
+ metadata.text += extracted_text.text
316
+ for text in extracted_text.split_text.values():
317
+ metadata.text += f" {text}"
318
+
319
+ if fm is None:
320
+ continue
321
+
322
+ field_metadatas: list[tuple[Optional[str], FieldMetadata]] = [(None, fm.metadata)]
323
+ for subfield_metadata, splitted_metadata in fm.split_metadata.items():
324
+ field_metadatas.append((subfield_metadata, splitted_metadata))
325
+
326
+ for _, splitted_metadata in field_metadatas:
327
+ if enabled_metadata.labels:
328
+ metadata.labels.field.extend(splitted_metadata.classifications)
329
+
330
+ if enabled_metadata.entities:
331
+ _update_entities_dict(metadata.entities, splitted_metadata)
332
+
333
+ pb_resource = TrainResource()
334
+ pb_resource.uuid = resource.uuid
335
+ if resource.basic is not None:
336
+ pb_resource.title = resource.basic.title
337
+ pb_resource.icon = resource.basic.icon
338
+ pb_resource.slug = resource.basic.slug
339
+ pb_resource.modified.CopyFrom(resource.basic.modified)
340
+ pb_resource.created.CopyFrom(resource.basic.created)
341
+ pb_resource.metadata.CopyFrom(metadata)
342
+ return pb_resource
343
+
344
+
345
+ def add_entities_to_metadata(entities: dict[str, str], local_text: str, metadata: TrainMetadata) -> None:
346
+ for entity_key, entity_value in entities.items():
347
+ if entity_key not in local_text:
348
+ # Add the entity only if found in text
349
+ continue
350
+ metadata.entities[entity_key] = entity_value
351
+
352
+ # Add positions for the entity relative to the local text
353
+ poskey = f"{entity_value}/{entity_key}"
354
+ metadata.entity_positions[poskey].entity = entity_key
355
+ last_occurrence_end = 0
356
+ for _ in range(local_text.count(entity_key)):
357
+ start = local_text.index(entity_key, last_occurrence_end)
358
+ end = start + len(entity_key)
359
+ metadata.entity_positions[poskey].positions.append(TrainPosition(start=start, end=end))
360
+ last_occurrence_end = end
361
+
362
+
363
+ def _update_entities_dict(target_entites_dict: MutableMapping[str, str], field_metadata: FieldMetadata):
364
+ """
365
+ Update the entities dict with the entities from the field metadata.
366
+ Method created to ease the transition from legacy ner field to new entities field.
367
+ """
368
+ # Data Augmentation + Processor entities
369
+ # This will overwrite entities detected from more than one data augmentation task
370
+ # TODO: Change TrainMetadata proto to accept multiple entities with the same text
371
+ entity_map = {
372
+ entity.text: entity.label
373
+ for data_augmentation_task_id, entities_wrapper in field_metadata.entities.items()
374
+ for entity in entities_wrapper.entities
375
+ }
376
+ target_entites_dict.update(entity_map)
377
+
378
+ # Legacy processor entities
379
+ # TODO: Remove once processor doesn't use this anymore and remove the positions and ner fields from the message
380
+ target_entites_dict.update(field_metadata.ner)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: nucliadb
3
- Version: 6.2.1.post2864
3
+ Version: 6.2.1.post2869
4
4
  Home-page: https://docs.nuclia.dev/docs/management/nucliadb/intro
5
5
  Author: NucliaDB Community
6
6
  Author-email: nucliadb@nuclia.com
@@ -22,10 +22,10 @@ Classifier: Programming Language :: Python :: 3.12
22
22
  Classifier: Programming Language :: Python :: 3 :: Only
23
23
  Requires-Python: >=3.9, <4
24
24
  Description-Content-Type: text/markdown
25
- Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post2864
26
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post2864
27
- Requires-Dist: nucliadb-protos>=6.2.1.post2864
28
- Requires-Dist: nucliadb-models>=6.2.1.post2864
25
+ Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post2869
26
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post2869
27
+ Requires-Dist: nucliadb-protos>=6.2.1.post2869
28
+ Requires-Dist: nucliadb-models>=6.2.1.post2869
29
29
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
30
30
  Requires-Dist: nucliadb-node-binding>=2.26.0
31
31
  Requires-Dist: nuclia-models>=0.24.2
@@ -79,7 +79,7 @@ nucliadb/common/datamanagers/resources.py,sha256=5EJk7P-G4A_YiobiUexz_yuZUTuxS5z
79
79
  nucliadb/common/datamanagers/rollover.py,sha256=c_DE3jtZusNL_9aOVjHOB9PV5OSVg7GJ5J-Ny0goHBE,7833
80
80
  nucliadb/common/datamanagers/synonyms.py,sha256=zk3GEH38KF5vV_VcuL6DCg-2JwgXJfQl7Io6VPqv2cw,1566
81
81
  nucliadb/common/datamanagers/utils.py,sha256=McHlXvE4P3x-bBY3pr0n8djbTDQvI1G5WusJrnRdhLA,1827
82
- nucliadb/common/datamanagers/vectorsets.py,sha256=XgHNQRw13GpWWymE6qu_ymdzuwL6hDiBKq50fN_sEMM,4007
82
+ nucliadb/common/datamanagers/vectorsets.py,sha256=ciYb5uD435Zo8ZbqgPUAszFW9Svp_-R2hY2FEhQ411Y,4304
83
83
  nucliadb/common/external_index_providers/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
84
84
  nucliadb/common/external_index_providers/base.py,sha256=yfPkCigT4unXFvAyzy1tXSy2UgWC481GcZAS9bdE4NI,8871
85
85
  nucliadb/common/external_index_providers/exceptions.py,sha256=nDhhOIkb66hjCrBk4Spvl2vN1SuW5gbwrMCDmrdjHHE,1209
@@ -115,7 +115,7 @@ nucliadb/ingest/cache.py,sha256=w7jMMzamOmQ7gwXna6Dqm6isRNBVv6l5BTBlTxaYWjE,1005
115
115
  nucliadb/ingest/partitions.py,sha256=2NIhMYbNT0TNBL6bX1UMSi7vxFGICstCKEqsB0TXHOE,2410
116
116
  nucliadb/ingest/processing.py,sha256=gg1DqbMFwqdOsmCSGsZc2abRdYz86xOZJun9vrHOCzs,20618
117
117
  nucliadb/ingest/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
118
- nucliadb/ingest/serialize.py,sha256=l2cIIHgo0rgkbaYvAY5slzjr8keVo_3Vb3B6DI120IY,15272
118
+ nucliadb/ingest/serialize.py,sha256=03q9TBC9kbqbVq59SSL4ok1e3ThU0zeuYGdqY-B1V2M,15889
119
119
  nucliadb/ingest/settings.py,sha256=0B-wQNa8FLqtNcQgRzh-fuIuGptM816XHcbH1NQKfmE,3050
120
120
  nucliadb/ingest/utils.py,sha256=l1myURu3r8oA11dx3GpHw-gNTUc1AFX8xdPm9Lgl2rA,2275
121
121
  nucliadb/ingest/consumer/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
@@ -128,7 +128,7 @@ nucliadb/ingest/consumer/service.py,sha256=EZM1sABW_7bj6j2UgKUHUuK-EGIEYnLdtPAn8
128
128
  nucliadb/ingest/consumer/shard_creator.py,sha256=19wf-Bu_9hb_muCDVblamWuvLr09e5dMu9Id5I4-rGw,4324
129
129
  nucliadb/ingest/consumer/utils.py,sha256=jpX8D4lKzuPCpArQLZeX_Zczq3pfen_zAf8sPJfOEZU,2642
130
130
  nucliadb/ingest/fields/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
131
- nucliadb/ingest/fields/base.py,sha256=GuyZaumtaaAzoy86-mewBMhNX9DaSDBIK4pCqrxZlDc,19836
131
+ nucliadb/ingest/fields/base.py,sha256=b6QpVPsCiDirDiYG3-yOCMaSNznJSHmQB0z6J_eDIyw,20657
132
132
  nucliadb/ingest/fields/conversation.py,sha256=OcQOHvi72Pm0OyNGwxLo9gONo8f1NhwASq0_gS-E64A,7021
133
133
  nucliadb/ingest/fields/exceptions.py,sha256=LBZ-lw11f42Pk-ck-NSN9mSJ2kOw-NeRwb-UE31ILTQ,1171
134
134
  nucliadb/ingest/fields/file.py,sha256=1v4jLg3balUua2VmSV8hHkAwPFShTUCOzufZvIUQcQw,4740
@@ -136,13 +136,13 @@ nucliadb/ingest/fields/generic.py,sha256=elgtqv15aJUq3zY7X_g0bli_2BpcwPArVvzhe54
136
136
  nucliadb/ingest/fields/link.py,sha256=kN_gjRUEEj5cy8K_BwPijYg3TiWhedc24apXYlTbRJs,4172
137
137
  nucliadb/ingest/fields/text.py,sha256=tFvSQJAe0W7ePpp2_WDfLiE2yglR1OTU0Zht9acvOFw,1594
138
138
  nucliadb/ingest/orm/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
139
- nucliadb/ingest/orm/brain.py,sha256=Hzq-3aarKaUCiUoa8H83unRUfduRE9TsQH1dEq0mvZY,28841
140
- nucliadb/ingest/orm/broker_message.py,sha256=JYYUJIZEL_EqovQuw6u-FmEkjyoYlxIXJq9hFekOiks,6441
139
+ nucliadb/ingest/orm/brain.py,sha256=UND5EsNUdd7XdjScYqRqg4r_xCx3l-My8alGw5M9CWg,28398
140
+ nucliadb/ingest/orm/broker_message.py,sha256=ZEMueoGuuRKO4tHgzc0P0AM1Ls1TTYey_4UvRQf0BpY,6915
141
141
  nucliadb/ingest/orm/entities.py,sha256=2PslT1FZ6yCvJtjR0UpKTSzxJrtS-C_gZx4ZTWHunTc,15759
142
142
  nucliadb/ingest/orm/exceptions.py,sha256=k4Esv4NtL4TrGTcsQpwrSfDhPQpiYcRbB1SpYmBX5MY,1432
143
- nucliadb/ingest/orm/knowledgebox.py,sha256=UpWJrVaVfCtk8R4qfSR6h6vzwOKXa8Teuwkna5QSljE,24508
143
+ nucliadb/ingest/orm/knowledgebox.py,sha256=jWRBGic3KE1NRJzvUMpsRRLL6GHu9t28WsTb2DKtNhk,24901
144
144
  nucliadb/ingest/orm/metrics.py,sha256=OkwMSPKLZcKba0ZTwtTiIxwBgaLMX5ydhGieKvi2y7E,1096
145
- nucliadb/ingest/orm/resource.py,sha256=jvaKLsTlHtmIWUjjWCu8XBF7qQl5hoUihAa8sHDpLV8,59540
145
+ nucliadb/ingest/orm/resource.py,sha256=KhucZzQzUbTBUm8_9gaCqxH68Fy1Q2u804IfTcjAIIk,43970
146
146
  nucliadb/ingest/orm/utils.py,sha256=vCe_9UxHu26JDFGLwQ0wH-XyzJIpQCTK-Ow9dtZR5Vg,2716
147
147
  nucliadb/ingest/orm/processor/__init__.py,sha256=Aqd9wCNTvggkMkCY3WvoI8spdr94Jnqk-0iq9XpLs18,922
148
148
  nucliadb/ingest/orm/processor/auditing.py,sha256=TeYhXGJRyQ7ROytbb2u8R0fIh_FYi3HgTu3S1ribY3U,4623
@@ -152,7 +152,7 @@ nucliadb/ingest/orm/processor/processor.py,sha256=2FxAetUvtHvg6l-24xYrmBdsyqc0RU
152
152
  nucliadb/ingest/orm/processor/sequence_manager.py,sha256=uqEphtI1Ir_yk9jRl2gPf7BlzzXWovbARY5MNZSBI_8,1704
153
153
  nucliadb/ingest/service/__init__.py,sha256=MME_G_ERxzJR6JW_hfE2qcfXpmpH1kdG-S0a-M0qRm8,2043
154
154
  nucliadb/ingest/service/exceptions.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
155
- nucliadb/ingest/service/writer.py,sha256=4xXwZ8PFMTnHWxRV18Ic57bjI4qwybwHID9cc1Kxm94,22729
155
+ nucliadb/ingest/service/writer.py,sha256=aBLLpPUJLlIf-VjAczBCUrcb-zMxRZOFHXkA0QE1pgw,22952
156
156
  nucliadb/middleware/__init__.py,sha256=A8NBlBuEkunCFMKpR9gnfNELsVn0Plc55BIQMbWDM8Q,2202
157
157
  nucliadb/migrator/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
158
158
  nucliadb/migrator/command.py,sha256=dKbJ1tAmP6X4lMVRSSlz351euaqs2wBPpOczLjATUes,2089
@@ -165,7 +165,7 @@ nucliadb/migrator/settings.py,sha256=jOUX0ZMunCXN8HpF9xXN0aunJYRhu4Vdr_ffjRIqwtw
165
165
  nucliadb/migrator/utils.py,sha256=NgUreUvON8_nWEzTxELBMWlfV7E6-6qi-g0DMEbVEz4,2885
166
166
  nucliadb/models/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
167
167
  nucliadb/models/responses.py,sha256=qnuOoc7TrVSUnpikfTwHLKez47_DE4mSFzpxrwtqijA,1599
168
- nucliadb/purge/__init__.py,sha256=tcXwO99714cqflLVJyZzOv6_64H9pt7r6V0UogDd4oA,10389
168
+ nucliadb/purge/__init__.py,sha256=ijcigiWz38ohXmVVwDU87aCki1BkmAIQRjDoNQ3LPRM,11647
169
169
  nucliadb/purge/orphan_shards.py,sha256=fA5yqRRN-M50OIk8dkAi1_ShFVjwDYEYqzMA9dYP0eU,9227
170
170
  nucliadb/reader/__init__.py,sha256=C5Efic7WlGm2U2C5WOyquMFbIj2Pojwe_8mwzVYnOzE,1304
171
171
  nucliadb/reader/app.py,sha256=Se-BFTE6d1v1msLzQn4q5XIhjnSxa2ckDSHdvm7NRf8,3096
@@ -274,8 +274,9 @@ nucliadb/train/app.py,sha256=TiRttTvekLuZdIvi46E4HyuumDTkR4G4Luqq3fEdjes,2824
274
274
  nucliadb/train/generator.py,sha256=0_zqWsLUHmJZl0lXhGorO5CWSkl42-k78dqb1slZ5h0,3904
275
275
  nucliadb/train/lifecycle.py,sha256=aCNaRURu0ZOUJaWLTZuEjwTstnB9MuLtzxOMztQoGxc,1773
276
276
  nucliadb/train/models.py,sha256=BmgmMjDsu_1Ih5JDAqo6whhume90q0ASJcDP9dkMQm8,1198
277
- nucliadb/train/nodes.py,sha256=ha0AsGupmyvxUpoc1THQ6-eN7ziPkjM_gkKgKYT0SCg,5782
277
+ nucliadb/train/nodes.py,sha256=_89ZIpBb0HnR2jejvuO6aPsgHVSGbasPWz0lkGmVnvU,5925
278
278
  nucliadb/train/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
279
+ nucliadb/train/resource.py,sha256=3qQ_9Zdt5JAbtD-wpmt7OeDGRNKS-fQdKAuIQfznZm0,16219
279
280
  nucliadb/train/run.py,sha256=evz6CKVfJOzkbHMoaYz2mTMlKjJnNOb1O8zBBWMpeBw,1400
280
281
  nucliadb/train/servicer.py,sha256=scbmq8FriKsJGkOcoZB2Fg_IyIExn9Ux4W30mGDlkJQ,5728
281
282
  nucliadb/train/settings.py,sha256=rrLtgdBmuthtIObLuZUaeuo4VBGU2PJRazquQbtPBeI,1383
@@ -338,9 +339,9 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
338
339
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
339
340
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
340
341
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
341
- nucliadb-6.2.1.post2864.dist-info/METADATA,sha256=-W5qHhu9x0clURMbD6zCYn6SsU9etVobB7qbn9jrebo,4689
342
- nucliadb-6.2.1.post2864.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
343
- nucliadb-6.2.1.post2864.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
344
- nucliadb-6.2.1.post2864.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
345
- nucliadb-6.2.1.post2864.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
346
- nucliadb-6.2.1.post2864.dist-info/RECORD,,
342
+ nucliadb-6.2.1.post2869.dist-info/METADATA,sha256=yxkdjP13oz_FLAljFOB2S9kRxsEfkv6H27RhZS-7Dls,4689
343
+ nucliadb-6.2.1.post2869.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
344
+ nucliadb-6.2.1.post2869.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
345
+ nucliadb-6.2.1.post2869.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
346
+ nucliadb-6.2.1.post2869.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
347
+ nucliadb-6.2.1.post2869.dist-info/RECORD,,