nucliadb 6.9.0.post5076__py3-none-any.whl → 6.9.0.post5085__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nucliadb might be problematic. Click here for more details.

@@ -0,0 +1,106 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ """Migration #39
22
+
23
+ Backfill splits metadata on conversation fields
24
+
25
+ """
26
+
27
+ import logging
28
+ from typing import cast
29
+
30
+ from nucliadb.common.maindb.driver import Transaction
31
+ from nucliadb.common.maindb.pg import PGTransaction
32
+ from nucliadb.ingest.fields.conversation import (
33
+ CONVERSATION_SPLITS_METADATA,
34
+ Conversation,
35
+ )
36
+ from nucliadb.ingest.orm.knowledgebox import KnowledgeBox as KnowledgeBoxORM
37
+ from nucliadb.migrator.context import ExecutionContext
38
+ from nucliadb_protos import resources_pb2
39
+ from nucliadb_protos.resources_pb2 import SplitMetadata, SplitsMetadata
40
+ from nucliadb_utils.storages.storage import Storage
41
+
42
+ logger = logging.getLogger(__name__)
43
+
44
+
45
+ async def migrate(context: ExecutionContext) -> None: ...
46
+
47
+
48
+ async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
49
+ BATCH_SIZE = 100
50
+ start = ""
51
+ while True:
52
+ to_fix: list[tuple[str, str]] = []
53
+ async with context.kv_driver.rw_transaction() as txn:
54
+ txn = cast(PGTransaction, txn)
55
+ async with txn.connection.cursor() as cur:
56
+ # Retrieve a bunch of conversation fields
57
+ await cur.execute(
58
+ """
59
+ SELECT key FROM resources
60
+ WHERE key ~ ('^/kbs/' || %s || '/r/[^/]*/f/c/[^/]*$')
61
+ AND key > %s
62
+ ORDER BY key
63
+ LIMIT %s""",
64
+ (kbid, start, BATCH_SIZE),
65
+ )
66
+ rows = await cur.fetchall()
67
+ if len(rows) == 0:
68
+ return
69
+ for row in rows:
70
+ key = row[0]
71
+ start = key
72
+ rid = key.split("/")[4]
73
+ field_id = key.split("/")[7]
74
+ to_fix.append((rid, field_id))
75
+
76
+ for rid, field_id in to_fix:
77
+ async with context.kv_driver.rw_transaction() as txn2:
78
+ splits_metadata = await build_splits_metadata(
79
+ txn2, context.blob_storage, kbid, rid, field_id
80
+ )
81
+ splits_metadata_key = CONVERSATION_SPLITS_METADATA.format(
82
+ kbid=kbid, uuid=rid, type="c", field=field_id
83
+ )
84
+ await txn2.set(splits_metadata_key, splits_metadata.SerializeToString())
85
+ await txn2.commit()
86
+
87
+
88
+ async def build_splits_metadata(
89
+ txn: Transaction, storage: Storage, kbid: str, rid: str, field_id: str
90
+ ) -> SplitsMetadata:
91
+ splits_metadata = SplitsMetadata()
92
+ kb_orm = KnowledgeBoxORM(txn, storage, kbid)
93
+ resource_obj = await kb_orm.get(rid)
94
+ if resource_obj is None:
95
+ return splits_metadata
96
+ field_obj: Conversation = await resource_obj.get_field(
97
+ field_id, resources_pb2.FieldType.CONVERSATION, load=False
98
+ )
99
+ conv_metadata = await field_obj.get_metadata()
100
+ for i in range(1, conv_metadata.pages + 1):
101
+ page = await field_obj.get_value(page=i)
102
+ if page is None:
103
+ continue
104
+ for message in page.messages:
105
+ splits_metadata.metadata.setdefault(message.ident, SplitMetadata())
106
+ return splits_metadata
@@ -21,7 +21,7 @@ import uuid
21
21
  from typing import Any, Optional
22
22
 
23
23
  from nucliadb.ingest.fields.base import Field
24
- from nucliadb_protos.resources_pb2 import CloudFile, FieldConversation
24
+ from nucliadb_protos.resources_pb2 import CloudFile, FieldConversation, SplitMetadata, SplitsMetadata
25
25
  from nucliadb_protos.resources_pb2 import Conversation as PBConversation
26
26
  from nucliadb_utils.storages.storage import StorageField
27
27
 
@@ -30,6 +30,7 @@ MAX_CONVERSATION_MESSAGES = 50 * 1024
30
30
  PAGE_SIZE = 200
31
31
 
32
32
  CONVERSATION_PAGE_VALUE = "/kbs/{kbid}/r/{uuid}/f/{type}/{field}/{page}"
33
+ CONVERSATION_SPLITS_METADATA = "/kbs/{kbid}/r/{uuid}/f/{type}/{field}/splits_metadata"
33
34
  CONVERSATION_METADATA = "/kbs/{kbid}/r/{uuid}/f/{type}/{field}"
34
35
 
35
36
 
@@ -54,9 +55,22 @@ class Conversation(Field[PBConversation]):
54
55
  ):
55
56
  super(Conversation, self).__init__(id, resource, pb, value)
56
57
  self.value = {}
58
+ self._splits_metadata: Optional[SplitsMetadata] = None
57
59
  self.metadata = None
58
60
 
61
+ async def delete_value(self):
62
+ await self.resource.txn.delete_by_prefix(
63
+ CONVERSATION_METADATA.format(kbid=self.kbid, uuid=self.uuid, type=self.type, field=self.id)
64
+ )
65
+ self._split_metadata = None
66
+ self.metadata = None
67
+ self.value.clear()
68
+
59
69
  async def set_value(self, payload: PBConversation):
70
+ if payload.replace_field:
71
+ # As we need to overwrite the value of the conversation, first delete any previous data.
72
+ await self.delete_value()
73
+
60
74
  metadata = await self.get_metadata()
61
75
  metadata.extract_strategy = payload.extract_strategy
62
76
  metadata.split_strategy = payload.split_strategy
@@ -72,10 +86,13 @@ class Conversation(Field[PBConversation]):
72
86
  last_page = PBConversation()
73
87
  metadata.pages += 1
74
88
 
89
+ self._splits_metadata = await self.get_splits_metadata()
90
+
75
91
  # Make sure message attachment files are on our region. This is needed
76
92
  # to support the hybrid-onprem deployment as the attachments must be stored
77
93
  # at the storage services of the client's premises.
78
94
  for message in payload.messages:
95
+ self._splits_metadata.metadata.setdefault(message.ident, SplitMetadata())
79
96
  new_message_files = []
80
97
  for idx, file in enumerate(message.content.attachments):
81
98
  if self.storage.needs_move(file, self.kbid):
@@ -119,6 +136,7 @@ class Conversation(Field[PBConversation]):
119
136
 
120
137
  # Finally, set the metadata
121
138
  await self.db_set_metadata(metadata)
139
+ await self.set_splits_metadata(self._splits_metadata)
122
140
 
123
141
  async def get_value(self, page: Optional[int] = None) -> Optional[PBConversation]:
124
142
  # If no page was requested, force fetch of metadata
@@ -205,3 +223,29 @@ class Conversation(Field[PBConversation]):
205
223
  self.metadata = payload
206
224
  self.resource.modified = True
207
225
  self._created = False
226
+
227
+ async def get_splits_metadata(self) -> SplitsMetadata:
228
+ if self._splits_metadata is None:
229
+ field_key = CONVERSATION_SPLITS_METADATA.format(
230
+ kbid=self.kbid,
231
+ uuid=self.uuid,
232
+ type=self.type,
233
+ field=self.id,
234
+ )
235
+ payload = await self.resource.txn.get(field_key)
236
+ if payload is None:
237
+ return SplitsMetadata()
238
+ self._splits_metadata = SplitsMetadata()
239
+ self._splits_metadata.ParseFromString(payload)
240
+ return self._splits_metadata
241
+
242
+ async def set_splits_metadata(self, payload: SplitsMetadata) -> None:
243
+ key = CONVERSATION_SPLITS_METADATA.format(
244
+ kbid=self.kbid,
245
+ uuid=self.uuid,
246
+ type=self.type,
247
+ field=self.id,
248
+ )
249
+ await self.resource.txn.set(key, payload.SerializeToString())
250
+ self._split_metadata = payload
251
+ self.resource.modified = True
@@ -217,6 +217,7 @@ class ResourceBrain:
217
217
  replace_field: bool,
218
218
  skip_paragraphs_index: Optional[bool],
219
219
  skip_texts_index: Optional[bool],
220
+ append_splits: Optional[set[str]] = None,
220
221
  ) -> None:
221
222
  # We need to add the extracted text to the texts section of the Resource so that
222
223
  # the paragraphs can be indexed
@@ -234,6 +235,7 @@ class ResourceBrain:
234
235
  user_field_metadata,
235
236
  replace_field=replace_field,
236
237
  skip_paragraphs=skip_paragraphs_index,
238
+ append_splits=append_splits,
237
239
  )
238
240
 
239
241
  @observer.wrap({"type": "apply_field_paragraphs"})
@@ -246,6 +248,7 @@ class ResourceBrain:
246
248
  user_field_metadata: Optional[UserFieldMetadata],
247
249
  replace_field: bool,
248
250
  skip_paragraphs: Optional[bool],
251
+ append_splits: Optional[set[str]] = None,
249
252
  ) -> None:
250
253
  if skip_paragraphs is not None:
251
254
  self.brain.skip_paragraphs = skip_paragraphs
@@ -254,7 +257,12 @@ class ResourceBrain:
254
257
  paragraph_pages = ParagraphPages(page_positions) if page_positions else None
255
258
  # Splits of the field
256
259
  for subfield, field_metadata in field_computed_metadata.split_metadata.items():
257
- extracted_text_str = extracted_text.split_text[subfield] if extracted_text else None
260
+ if should_skip_split_indexing(subfield, replace_field, append_splits):
261
+ continue
262
+ if subfield not in extracted_text.split_text:
263
+ # No extracted text for this split
264
+ continue
265
+ extracted_text_str = extracted_text.split_text[subfield]
258
266
  for idx, paragraph in enumerate(field_metadata.paragraphs):
259
267
  key = f"{self.rid}/{field_key}/{subfield}/{paragraph.start}-{paragraph.end}"
260
268
  denied_classifications = set(user_paragraph_classifications.denied.get(key, []))
@@ -308,7 +316,7 @@ class ResourceBrain:
308
316
  self.brain.paragraphs[field_key].paragraphs[key].CopyFrom(p)
309
317
 
310
318
  # Main field
311
- extracted_text_str = extracted_text.text if extracted_text else None
319
+ extracted_text_str = extracted_text.text
312
320
  for idx, paragraph in enumerate(field_computed_metadata.metadata.paragraphs):
313
321
  key = f"{self.rid}/{field_key}/{paragraph.start}-{paragraph.end}"
314
322
  denied_classifications = set(user_paragraph_classifications.denied.get(key, []))
@@ -496,9 +504,12 @@ class ResourceBrain:
496
504
  replace_field: bool = False,
497
505
  # cut to specific dimension if specified
498
506
  vector_dimension: Optional[int] = None,
507
+ append_splits: Optional[set[str]] = None,
499
508
  ):
500
509
  fid = ids.FieldId.from_string(f"{self.rid}/{field_id}")
501
510
  for subfield, vectors in vo.split_vectors.items():
511
+ if should_skip_split_indexing(subfield, replace_field, append_splits):
512
+ continue
502
513
  _field_id = ids.FieldId(
503
514
  rid=fid.rid,
504
515
  type=fid.type,
@@ -792,3 +803,10 @@ class ParagraphPages:
792
803
  if len(self._materialized) > 0:
793
804
  return self._materialized[-1]
794
805
  return 0
806
+
807
+
808
+ def should_skip_split_indexing(
809
+ split: str, replace_field: bool, append_splits: Optional[set[str]]
810
+ ) -> bool:
811
+ # When replacing the whole field, reindex all splits. Otherwise, we're only indexing the splits that are appended
812
+ return not replace_field and append_splits is not None and split not in append_splits
@@ -25,6 +25,7 @@ from typing import Optional, Sequence
25
25
  from nidx_protos.noderesources_pb2 import Resource as IndexMessage
26
26
 
27
27
  from nucliadb.common import datamanagers
28
+ from nucliadb.ingest.fields.conversation import Conversation
28
29
  from nucliadb.ingest.fields.exceptions import FieldAuthorNotFound
29
30
  from nucliadb.ingest.fields.file import File
30
31
  from nucliadb.ingest.orm.brain_v2 import ResourceBrain
@@ -32,6 +33,7 @@ from nucliadb.ingest.orm.metrics import index_message_observer as observer
32
33
  from nucliadb.ingest.orm.resource import Resource, get_file_page_positions
33
34
  from nucliadb_protos.knowledgebox_pb2 import VectorSetConfig
34
35
  from nucliadb_protos.resources_pb2 import Basic, FieldID, FieldType
36
+ from nucliadb_protos.utils_pb2 import ExtractedText
35
37
  from nucliadb_protos.writer_pb2 import BrokerMessage
36
38
 
37
39
 
@@ -69,6 +71,7 @@ class IndexMessageBuilder:
69
71
  relations: bool = True,
70
72
  replace: bool = True,
71
73
  vectorset_configs: Optional[list[VectorSetConfig]] = None,
74
+ append_splits: Optional[set[str]] = None,
72
75
  ):
73
76
  field = await self.resource.get_field(fieldid.field, fieldid.field_type)
74
77
  extracted_text = await field.get_extracted_text()
@@ -120,6 +123,7 @@ class IndexMessageBuilder:
120
123
  replace_field=replace_paragraphs,
121
124
  skip_paragraphs_index=skip_paragraphs_index,
122
125
  skip_texts_index=skip_texts_index,
126
+ append_splits=append_splits,
123
127
  )
124
128
  if vectors:
125
129
  assert vectorset_configs is not None
@@ -137,6 +141,7 @@ class IndexMessageBuilder:
137
141
  vectorset=vectorset_config.vectorset_id,
138
142
  replace_field=replace,
139
143
  vector_dimension=dimension,
144
+ append_splits=append_splits,
140
145
  )
141
146
  if relations:
142
147
  await asyncio.to_thread(
@@ -214,6 +219,19 @@ class IndexMessageBuilder:
214
219
  for fieldid in fields_to_index:
215
220
  if fieldid in message.delete_fields:
216
221
  continue
222
+
223
+ # For conversation fields, we only replace the full field if it is not an append messages operation.
224
+ # All other fields are always replaced upon modification.
225
+ replace_field = True
226
+ modified_splits = None
227
+ if fieldid.field_type == FieldType.CONVERSATION:
228
+ modified_splits = await get_bm_modified_split_ids(fieldid, message, self.resource)
229
+ stored_splits = await get_stored_split_ids(fieldid, self.resource)
230
+ is_append_messages_op = modified_splits.issubset(stored_splits) and 0 < len(
231
+ modified_splits
232
+ ) < len(stored_splits)
233
+ replace_field = not is_append_messages_op
234
+
217
235
  await self._apply_field_index_data(
218
236
  self.brain,
219
237
  fieldid,
@@ -222,8 +240,9 @@ class IndexMessageBuilder:
222
240
  paragraphs=needs_paragraphs_update(fieldid, message),
223
241
  relations=needs_relations_update(fieldid, message),
224
242
  vectors=needs_vectors_update(fieldid, message),
225
- replace=True,
243
+ replace=replace_field,
226
244
  vectorset_configs=vectorsets_configs,
245
+ append_splits=modified_splits,
227
246
  )
228
247
  return self.brain.brain
229
248
 
@@ -354,6 +373,37 @@ def needs_vectors_update(
354
373
  return any(field_vectors.field == field_id for field_vectors in message.field_vectors)
355
374
 
356
375
 
376
+ async def get_bm_modified_split_ids(
377
+ conversation_field_id: FieldID,
378
+ message: BrokerMessage,
379
+ resource: Resource,
380
+ ) -> set[str]:
381
+ message_etw = next(
382
+ (etw for etw in message.extracted_text if etw.field == conversation_field_id), None
383
+ )
384
+ if message_etw is None:
385
+ return set()
386
+ storage = resource.storage
387
+ if message_etw.HasField("file"):
388
+ raw_payload = await storage.downloadbytescf(message_etw.file)
389
+ message_extracted_text = ExtractedText()
390
+ message_extracted_text.ParseFromString(raw_payload.read())
391
+ raw_payload.flush()
392
+ else:
393
+ message_extracted_text = message_etw.body
394
+ return set(message_extracted_text.split_text.keys())
395
+
396
+
397
+ async def get_stored_split_ids(
398
+ conversation_field_id: FieldID,
399
+ resource: Resource,
400
+ ) -> set[str]:
401
+ fid = conversation_field_id
402
+ conv: Conversation = await resource.get_field(fid.field, fid.field_type, load=False)
403
+ splits_metadata = await conv.get_splits_metadata()
404
+ return set(splits_metadata.metadata)
405
+
406
+
357
407
  def needs_relations_update(
358
408
  field_id: FieldID,
359
409
  message: BrokerMessage,
@@ -249,9 +249,10 @@ async def parse_conversation_field_adapter(
249
249
  writer: BrokerMessage,
250
250
  toprocess: PushPayload,
251
251
  resource_classifications: ResourceClassifications,
252
+ replace_field: bool = False,
252
253
  ):
253
254
  return await parse_conversation_field(
254
- field_id, field_payload, writer, toprocess, kbid, rid, resource_classifications
255
+ field_id, field_payload, writer, toprocess, kbid, rid, resource_classifications, replace_field
255
256
  )
256
257
 
257
258
 
@@ -380,7 +381,9 @@ async def add_resource_field_conversation_rslug_prefix(
380
381
  field_id: FieldIdString,
381
382
  field_payload: models.InputConversationField,
382
383
  ) -> ResourceFieldAdded:
383
- return await add_field_to_resource_by_slug(request, kbid, rslug, field_id, field_payload)
384
+ return await add_field_to_resource_by_slug(
385
+ request, kbid, rslug, field_id, field_payload, replace_field=True
386
+ )
384
387
 
385
388
 
386
389
  @api.put(
@@ -399,7 +402,7 @@ async def add_resource_field_conversation_rid_prefix(
399
402
  field_id: FieldIdString,
400
403
  field_payload: models.InputConversationField,
401
404
  ) -> ResourceFieldAdded:
402
- return await add_field_to_resource(request, kbid, rid, field_id, field_payload)
405
+ return await add_field_to_resource(request, kbid, rid, field_id, field_payload, replace_field=True)
403
406
 
404
407
 
405
408
  @api.put(
@@ -466,7 +469,9 @@ async def append_messages_to_conversation_field_rslug_prefix(
466
469
  field = models.InputConversationField(messages=messages)
467
470
  except pydantic.ValidationError as e:
468
471
  raise HTTPException(status_code=422, detail=str(e))
469
- return await add_field_to_resource_by_slug(request, kbid, rslug, field_id, field)
472
+ return await add_field_to_resource_by_slug(
473
+ request, kbid, rslug, field_id, field, replace_field=False
474
+ )
470
475
 
471
476
 
472
477
  @api.put(
@@ -489,7 +494,7 @@ async def append_messages_to_conversation_field_rid_prefix(
489
494
  field = models.InputConversationField(messages=messages)
490
495
  except pydantic.ValidationError as e:
491
496
  raise HTTPException(status_code=422, detail=str(e))
492
- return await add_field_to_resource(request, kbid, rid, field_id, field)
497
+ return await add_field_to_resource(request, kbid, rid, field_id, field, replace_field=False)
493
498
 
494
499
 
495
500
  @api.delete(
@@ -228,6 +228,7 @@ async def parse_fields(
228
228
  kbid,
229
229
  uuid,
230
230
  resource_classifications,
231
+ replace_field=True,
231
232
  )
232
233
 
233
234
 
@@ -431,19 +432,15 @@ async def parse_conversation_field(
431
432
  kbid: str,
432
433
  uuid: str,
433
434
  resource_classifications: ResourceClassifications,
435
+ replace_field: bool,
434
436
  ) -> None:
435
- # Make sure that the max number of messages is not exceeded
436
- current_message_count = await get_current_conversation_message_count(kbid, uuid, key)
437
- if len(conversation_field.messages) + current_message_count > MAX_CONVERSATION_MESSAGES:
438
- raise HTTPException(
439
- status_code=422,
440
- detail=f"Conversation fields cannot have more than {MAX_CONVERSATION_MESSAGES} messages.",
441
- )
442
-
437
+ if not replace_field:
438
+ # Appending messages to conversation
439
+ await _conversation_append_checks(kbid, uuid, key, conversation_field)
443
440
  classif_labels = resource_classifications.for_field(key, resources_pb2.FieldType.CONVERSATION)
444
441
  storage = await get_storage(service_name=SERVICE_NAME)
445
442
  processing = get_processing()
446
- field_value = resources_pb2.Conversation()
443
+ field_value = resources_pb2.Conversation(replace_field=replace_field)
447
444
  convs = processing_models.PushConversation()
448
445
  for message in conversation_field.messages:
449
446
  cm = resources_pb2.Message()
@@ -554,13 +551,31 @@ async def get_stored_resource_classifications(
554
551
  return rc
555
552
 
556
553
 
557
- async def get_current_conversation_message_count(kbid: str, rid: str, field_id: str) -> int:
554
+ async def _conversation_append_checks(
555
+ kbid: str, rid: str, field_id: str, input: models.InputConversationField
556
+ ):
558
557
  async with datamanagers.with_ro_transaction() as txn:
559
558
  resource_obj = await datamanagers.resources.get_resource(txn, kbid=kbid, rid=rid)
560
559
  if resource_obj is None:
561
- return 0
562
- field_obj: Conversation = await resource_obj.get_field(
560
+ return
561
+ conv: Conversation = await resource_obj.get_field(
563
562
  field_id, resources_pb2.FieldType.CONVERSATION, load=False
564
563
  )
565
- metadata = await field_obj.get_metadata()
566
- return metadata.total
564
+
565
+ # Make sure that the max number of messages is not exceeded
566
+ current_message_count = (await conv.get_metadata()).total
567
+ if len(input.messages) + current_message_count > MAX_CONVERSATION_MESSAGES:
568
+ raise HTTPException(
569
+ status_code=422,
570
+ detail=f"Conversation fields cannot have more than {MAX_CONVERSATION_MESSAGES} messages.",
571
+ )
572
+
573
+ # Make sure input messages use unique idents
574
+ existing_message_ids = set((await conv.get_splits_metadata()).metadata.keys())
575
+ input_message_ids = {message.ident for message in input.messages}
576
+ intersection = input_message_ids.intersection(existing_message_ids)
577
+ if intersection != set():
578
+ raise HTTPException(
579
+ status_code=422,
580
+ detail=f"Message identifiers must be unique field={field_id}: {list(intersection)[:50]}",
581
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nucliadb
3
- Version: 6.9.0.post5076
3
+ Version: 6.9.0.post5085
4
4
  Summary: NucliaDB
5
5
  Author-email: Nuclia <nucliadb@nuclia.com>
6
6
  License-Expression: AGPL-3.0-or-later
@@ -19,11 +19,11 @@ Classifier: Programming Language :: Python :: 3.12
19
19
  Classifier: Programming Language :: Python :: 3 :: Only
20
20
  Requires-Python: <4,>=3.9
21
21
  Description-Content-Type: text/markdown
22
- Requires-Dist: nucliadb-telemetry[all]>=6.9.0.post5076
23
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.9.0.post5076
24
- Requires-Dist: nucliadb-protos>=6.9.0.post5076
25
- Requires-Dist: nucliadb-models>=6.9.0.post5076
26
- Requires-Dist: nidx-protos>=6.9.0.post5076
22
+ Requires-Dist: nucliadb-telemetry[all]>=6.9.0.post5085
23
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.9.0.post5085
24
+ Requires-Dist: nucliadb-protos>=6.9.0.post5085
25
+ Requires-Dist: nucliadb-models>=6.9.0.post5085
26
+ Requires-Dist: nidx-protos>=6.9.0.post5085
27
27
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
28
28
  Requires-Dist: nuclia-models>=0.50.0
29
29
  Requires-Dist: uvicorn[standard]
@@ -34,6 +34,7 @@ migrations/0035_rollover_nidx_texts_4.py,sha256=W0_AUd01pjMpYMDC3yqF6HzDLgcnnPpr
34
34
  migrations/0036_backfill_catalog_slug.py,sha256=toYqxH_EfUFqoVn_cOdR5Fg8bWZU5BoFMfPBSf74LKU,2957
35
35
  migrations/0037_backfill_catalog_facets.py,sha256=IH7H4OZ4tzws6xEh7Qro0bPDHDYOoVViEUj-JwPPe1U,2791
36
36
  migrations/0038_backfill_catalog_field_labels.py,sha256=F519nYngJDb1Mtwf-OQpweDPWKPxAlqdxy5E-DyQrhA,3492
37
+ migrations/0039_backfill_converation_splits_metadata.py,sha256=NtL9S6Kx8mbSjNJLjIsc-6vVNymD0YKlF9vEkGUEDds,3958
37
38
  migrations/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
38
39
  migrations/pg/0001_bootstrap.py,sha256=3O_P17l0d0h48nebN6VQLXzM_B7S7zvDpaLR0koVgWE,1274
39
40
  migrations/pg/0002_catalog.py,sha256=Rsleecu351Ty19kYZgOpqX5G3MEAY8nMxCJrAeuS2Mw,1690
@@ -154,18 +155,18 @@ nucliadb/ingest/consumer/shard_creator.py,sha256=UKIk0yaS_jC_nGQqymn9NGJWzwZEqhI
154
155
  nucliadb/ingest/consumer/utils.py,sha256=jpX8D4lKzuPCpArQLZeX_Zczq3pfen_zAf8sPJfOEZU,2642
155
156
  nucliadb/ingest/fields/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
156
157
  nucliadb/ingest/fields/base.py,sha256=D8NzawonF7hivDW9zvQBbV938TKA6e2OCGqV4kS96RU,24405
157
- nucliadb/ingest/fields/conversation.py,sha256=ShdPapTIG7sA05YlG1Dj2CsAwNwibsqRSQrsuZnr8YI,7723
158
+ nucliadb/ingest/fields/conversation.py,sha256=KkOvNM1rZFQRg2RsfGd3Jrz3lpx0HpGpN1cmlpz_mZw,9563
158
159
  nucliadb/ingest/fields/exceptions.py,sha256=sZBk21BSrXFdOdo1qUdCAyD-9YMYakSLdn4_WdIPCIQ,1217
159
160
  nucliadb/ingest/fields/file.py,sha256=1v4jLg3balUua2VmSV8hHkAwPFShTUCOzufZvIUQcQw,4740
160
161
  nucliadb/ingest/fields/generic.py,sha256=elgtqv15aJUq3zY7X_g0bli_2BpcwPArVvzhe54Y4Ig,1547
161
162
  nucliadb/ingest/fields/link.py,sha256=kN_gjRUEEj5cy8K_BwPijYg3TiWhedc24apXYlTbRJs,4172
162
163
  nucliadb/ingest/fields/text.py,sha256=2grxo8twWbpXEd_iwUMBw9q0dWorVmlPONmY5d1ThwQ,1684
163
164
  nucliadb/ingest/orm/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
164
- nucliadb/ingest/orm/brain_v2.py,sha256=8MAo1N_nhoGy73TvKBuaw-NaMxIanRPCNttw6dFY4jk,33647
165
+ nucliadb/ingest/orm/brain_v2.py,sha256=mBze0LBft9pPiEtTIlfwCDVk597RSBXsx0Pte4ciTTg,34456
165
166
  nucliadb/ingest/orm/broker_message.py,sha256=XWaiZgDOz94NPOPT-hqbRr5ZkpVimUw6PjUJNftfoVw,7514
166
167
  nucliadb/ingest/orm/entities.py,sha256=kXyeF6XOpFKhEsGLcY-GLIk21Exp0cJst4XQQ9jJoug,14791
167
168
  nucliadb/ingest/orm/exceptions.py,sha256=gsp7TtVNQPiIEh-zf_UEJClwuFU0iu-5vzj0OrKMScg,1550
168
- nucliadb/ingest/orm/index_message.py,sha256=DlGLuuuCsXR_rqxd6CEZMYuOx1TIiq1mR5ue114rGUk,14473
169
+ nucliadb/ingest/orm/index_message.py,sha256=mWlpQ0-KChSVIbHewVE8sXCe-7LiPIIh0cBqr3axU8o,16554
169
170
  nucliadb/ingest/orm/knowledgebox.py,sha256=OG9dmfklYf1PgTHwQd_iFZOociLEvUSMMv1ZKeUgecE,23910
170
171
  nucliadb/ingest/orm/metrics.py,sha256=OiuggTh-n3kZHA2G73NEUdIlh8c3yFrbusI88DK-Mko,1273
171
172
  nucliadb/ingest/orm/resource.py,sha256=zQeZyZ-tCxr-DhonLobfZRkz_iEew0Y-cGfXeNNIHG0,40432
@@ -360,7 +361,7 @@ nucliadb/writer/api/constants.py,sha256=SCdqGDbEmpdczQdTfbTlpHzVjbLqccPtMQ25MPIF
360
361
  nucliadb/writer/api/utils.py,sha256=wIQHlU8RQiIGVLI72suvyVIKlCU44Unh0Ae0IiN6Qwo,1313
361
362
  nucliadb/writer/api/v1/__init__.py,sha256=akI9A_jloNLb0dU4T5zjfdyvmSAiDeIdjAlzNx74FlU,1128
362
363
  nucliadb/writer/api/v1/export_import.py,sha256=v0sU55TtRSqDzwkDgcwv2uSaqKCuQTtGcMpYoHQYBQA,8192
363
- nucliadb/writer/api/v1/field.py,sha256=qcuniSwR9tR9vn5abpK3rB_olpuUTEj_0LcL_1eAiLw,18972
364
+ nucliadb/writer/api/v1/field.py,sha256=nO3IEV6v5hokdIo5HoaecdwDqvr1PzCJlh5DafzcNTw,19130
364
365
  nucliadb/writer/api/v1/knowledgebox.py,sha256=kioqjD3yN-y1cDTgmXAAOwivXHX9NXxwblcSzGqJup0,9533
365
366
  nucliadb/writer/api/v1/learning_config.py,sha256=DTLEzKJ3dHvi8pbZscjElUqCH_ZvLc6WZgvalFqHo10,4450
366
367
  nucliadb/writer/api/v1/resource.py,sha256=IfcT6HXnR5sC5wSnQSuKmFzEWcLTh7OzZEAV4hYmXnA,20442
@@ -373,7 +374,7 @@ nucliadb/writer/api/v1/vectorsets.py,sha256=F3iMViL5G95_Tns4aO2SOA0DwAzxK2_P8MXx
373
374
  nucliadb/writer/resource/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
374
375
  nucliadb/writer/resource/audit.py,sha256=FvxMZPzrNHtd31HgpZEvxzwAkbxJTZRhPLqRYYJi3tA,1426
375
376
  nucliadb/writer/resource/basic.py,sha256=44GK8M9EEVoAUfGiabdLrrpENqeFwNn7qwxF2AHhQGg,10504
376
- nucliadb/writer/resource/field.py,sha256=eM2KFxhcG3u6-ldniZDSYqGzhJ5bpWgIQBGXXFwskqw,22195
377
+ nucliadb/writer/resource/field.py,sha256=kJFxOgmizGbEuTRPb5o0cNqonZ8sa9ehVlSfRk-ektY,22866
377
378
  nucliadb/writer/resource/origin.py,sha256=pvhUDdU0mlWPUcpoQi4LDUJaRtfjzVVrA8XcGVI_N8k,2021
378
379
  nucliadb/writer/tus/__init__.py,sha256=Kera0BtxoDX0ngPftXiMjNgjrhtQ3l2XFc5nJqSBOJY,5498
379
380
  nucliadb/writer/tus/azure.py,sha256=yxoRi4PhGDikTqVK3PiuVyguy8H9DOS66JpZCY4hpUY,4177
@@ -384,8 +385,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
384
385
  nucliadb/writer/tus/s3.py,sha256=vu1BGg4VqJ_x2P1u2BxqPKlSfw5orT_a3R-Ln5oPUpU,8483
385
386
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
386
387
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
387
- nucliadb-6.9.0.post5076.dist-info/METADATA,sha256=E3lmHN9Srx1TW2AEn1K9e9l9io_KlWZ-iNlZQS1SvhM,4158
388
- nucliadb-6.9.0.post5076.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
389
- nucliadb-6.9.0.post5076.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
390
- nucliadb-6.9.0.post5076.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
391
- nucliadb-6.9.0.post5076.dist-info/RECORD,,
388
+ nucliadb-6.9.0.post5085.dist-info/METADATA,sha256=hSS20ZkqaZcgSta1Oq5me923rWzrspnZ2gamC_J_c84,4158
389
+ nucliadb-6.9.0.post5085.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
390
+ nucliadb-6.9.0.post5085.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
391
+ nucliadb-6.9.0.post5085.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
392
+ nucliadb-6.9.0.post5085.dist-info/RECORD,,