nucliadb 6.2.0.post2679__py3-none-any.whl → 6.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. migrations/0028_extracted_vectors_reference.py +61 -0
  2. migrations/0029_backfill_field_status.py +149 -0
  3. migrations/0030_label_deduplication.py +60 -0
  4. nucliadb/common/cluster/manager.py +41 -331
  5. nucliadb/common/cluster/rebalance.py +2 -2
  6. nucliadb/common/cluster/rollover.py +12 -71
  7. nucliadb/common/cluster/settings.py +3 -0
  8. nucliadb/common/cluster/standalone/utils.py +0 -43
  9. nucliadb/common/cluster/utils.py +0 -16
  10. nucliadb/common/counters.py +1 -0
  11. nucliadb/common/datamanagers/fields.py +48 -7
  12. nucliadb/common/datamanagers/vectorsets.py +11 -2
  13. nucliadb/common/external_index_providers/base.py +2 -1
  14. nucliadb/common/external_index_providers/pinecone.py +3 -5
  15. nucliadb/common/ids.py +18 -4
  16. nucliadb/common/models_utils/from_proto.py +479 -0
  17. nucliadb/common/models_utils/to_proto.py +60 -0
  18. nucliadb/common/nidx.py +76 -37
  19. nucliadb/export_import/models.py +3 -3
  20. nucliadb/health.py +0 -7
  21. nucliadb/ingest/app.py +0 -8
  22. nucliadb/ingest/consumer/auditing.py +1 -1
  23. nucliadb/ingest/consumer/shard_creator.py +1 -1
  24. nucliadb/ingest/fields/base.py +83 -21
  25. nucliadb/ingest/orm/brain.py +55 -56
  26. nucliadb/ingest/orm/broker_message.py +12 -2
  27. nucliadb/ingest/orm/entities.py +6 -17
  28. nucliadb/ingest/orm/knowledgebox.py +44 -22
  29. nucliadb/ingest/orm/processor/data_augmentation.py +7 -29
  30. nucliadb/ingest/orm/processor/processor.py +5 -2
  31. nucliadb/ingest/orm/resource.py +222 -413
  32. nucliadb/ingest/processing.py +8 -2
  33. nucliadb/ingest/serialize.py +77 -46
  34. nucliadb/ingest/service/writer.py +2 -56
  35. nucliadb/ingest/settings.py +1 -4
  36. nucliadb/learning_proxy.py +6 -4
  37. nucliadb/purge/__init__.py +102 -12
  38. nucliadb/purge/orphan_shards.py +6 -4
  39. nucliadb/reader/api/models.py +3 -3
  40. nucliadb/reader/api/v1/__init__.py +1 -0
  41. nucliadb/reader/api/v1/download.py +2 -2
  42. nucliadb/reader/api/v1/knowledgebox.py +3 -3
  43. nucliadb/reader/api/v1/resource.py +23 -12
  44. nucliadb/reader/api/v1/services.py +4 -4
  45. nucliadb/reader/api/v1/vectorsets.py +48 -0
  46. nucliadb/search/api/v1/ask.py +11 -1
  47. nucliadb/search/api/v1/feedback.py +3 -3
  48. nucliadb/search/api/v1/knowledgebox.py +8 -13
  49. nucliadb/search/api/v1/search.py +3 -2
  50. nucliadb/search/api/v1/suggest.py +0 -2
  51. nucliadb/search/predict.py +6 -4
  52. nucliadb/search/requesters/utils.py +1 -2
  53. nucliadb/search/search/chat/ask.py +77 -13
  54. nucliadb/search/search/chat/prompt.py +16 -5
  55. nucliadb/search/search/chat/query.py +74 -34
  56. nucliadb/search/search/exceptions.py +2 -7
  57. nucliadb/search/search/find.py +9 -5
  58. nucliadb/search/search/find_merge.py +10 -4
  59. nucliadb/search/search/graph_strategy.py +884 -0
  60. nucliadb/search/search/hydrator.py +6 -0
  61. nucliadb/search/search/merge.py +79 -24
  62. nucliadb/search/search/query.py +74 -245
  63. nucliadb/search/search/query_parser/exceptions.py +11 -1
  64. nucliadb/search/search/query_parser/fetcher.py +405 -0
  65. nucliadb/search/search/query_parser/models.py +0 -3
  66. nucliadb/search/search/query_parser/parser.py +22 -21
  67. nucliadb/search/search/rerankers.py +1 -42
  68. nucliadb/search/search/shards.py +19 -0
  69. nucliadb/standalone/api_router.py +2 -14
  70. nucliadb/standalone/settings.py +4 -0
  71. nucliadb/train/generators/field_streaming.py +7 -3
  72. nucliadb/train/lifecycle.py +3 -6
  73. nucliadb/train/nodes.py +14 -12
  74. nucliadb/train/resource.py +380 -0
  75. nucliadb/writer/api/constants.py +20 -16
  76. nucliadb/writer/api/v1/__init__.py +1 -0
  77. nucliadb/writer/api/v1/export_import.py +1 -1
  78. nucliadb/writer/api/v1/field.py +13 -7
  79. nucliadb/writer/api/v1/knowledgebox.py +3 -46
  80. nucliadb/writer/api/v1/resource.py +20 -13
  81. nucliadb/writer/api/v1/services.py +10 -1
  82. nucliadb/writer/api/v1/upload.py +61 -34
  83. nucliadb/writer/{vectorsets.py → api/v1/vectorsets.py} +99 -47
  84. nucliadb/writer/back_pressure.py +17 -46
  85. nucliadb/writer/resource/basic.py +9 -7
  86. nucliadb/writer/resource/field.py +42 -9
  87. nucliadb/writer/settings.py +2 -2
  88. nucliadb/writer/tus/gcs.py +11 -10
  89. {nucliadb-6.2.0.post2679.dist-info → nucliadb-6.2.1.dist-info}/METADATA +11 -14
  90. {nucliadb-6.2.0.post2679.dist-info → nucliadb-6.2.1.dist-info}/RECORD +94 -96
  91. {nucliadb-6.2.0.post2679.dist-info → nucliadb-6.2.1.dist-info}/WHEEL +1 -1
  92. nucliadb/common/cluster/discovery/base.py +0 -178
  93. nucliadb/common/cluster/discovery/k8s.py +0 -301
  94. nucliadb/common/cluster/discovery/manual.py +0 -57
  95. nucliadb/common/cluster/discovery/single.py +0 -51
  96. nucliadb/common/cluster/discovery/types.py +0 -32
  97. nucliadb/common/cluster/discovery/utils.py +0 -67
  98. nucliadb/common/cluster/standalone/grpc_node_binding.py +0 -349
  99. nucliadb/common/cluster/standalone/index_node.py +0 -123
  100. nucliadb/common/cluster/standalone/service.py +0 -84
  101. nucliadb/standalone/introspect.py +0 -208
  102. nucliadb-6.2.0.post2679.dist-info/zip-safe +0 -1
  103. /nucliadb/common/{cluster/discovery → models_utils}/__init__.py +0 -0
  104. {nucliadb-6.2.0.post2679.dist-info → nucliadb-6.2.1.dist-info}/entry_points.txt +0 -0
  105. {nucliadb-6.2.0.post2679.dist-info → nucliadb-6.2.1.dist-info}/top_level.txt +0 -0
@@ -20,11 +20,13 @@
20
20
 
21
21
  from typing import cast
22
22
 
23
+ from nucliadb.common import datamanagers
23
24
  from nucliadb.ingest.fields.base import Field
24
25
  from nucliadb.ingest.fields.conversation import Conversation
25
26
  from nucliadb.ingest.fields.file import File
26
27
  from nucliadb.ingest.fields.link import Link
27
28
  from nucliadb.ingest.orm.resource import Resource
29
+ from nucliadb_protos.knowledgebox_pb2 import VectorSetConfig
28
30
  from nucliadb_protos.resources_pb2 import (
29
31
  ExtractedTextWrapper,
30
32
  ExtractedVectorsWrapper,
@@ -90,7 +92,12 @@ class _BrokerMessageBuilder:
90
92
  self.bm.link_extracted_data.append(link_extracted_data)
91
93
 
92
94
  # Field vectors
93
- await self.generate_field_vectors(type_id, field_id, field)
95
+ async for vectorset_id, vs in datamanagers.vectorsets.iter(
96
+ resource.txn, kbid=resource.kb.kbid
97
+ ):
98
+ await self.generate_field_vectors(
99
+ type_id, field_id, field, vectorset_id, vs.storage_key_kind
100
+ )
94
101
 
95
102
  # Large metadata
96
103
  await self.generate_field_large_computed_metadata(type_id, field_id, field)
@@ -155,13 +162,16 @@ class _BrokerMessageBuilder:
155
162
  type_id: FieldType.ValueType,
156
163
  field_id: str,
157
164
  field: Field,
165
+ vectorset: str,
166
+ storage_key_kind: VectorSetConfig.StorageKeyKind.ValueType,
158
167
  ):
159
- vo = await field.get_vectors()
168
+ vo = await field.get_vectors(vectorset, storage_key_kind)
160
169
  if vo is None:
161
170
  return
162
171
  evw = ExtractedVectorsWrapper()
163
172
  evw.field.field = field_id
164
173
  evw.field.field_type = type_id
174
+ evw.vectorset_id = vectorset
165
175
  evw.vectors.CopyFrom(vo)
166
176
  self.bm.field_vectors.append(evw)
167
177
 
@@ -26,7 +26,6 @@ from nucliadb.common.cluster.base import AbstractIndexNode
26
26
  from nucliadb.common.cluster.exceptions import (
27
27
  AlreadyExists,
28
28
  EntitiesGroupNotFound,
29
- NodeError,
30
29
  )
31
30
  from nucliadb.common.cluster.utils import get_shard_manager
32
31
  from nucliadb.common.datamanagers.entities import (
@@ -37,6 +36,7 @@ from nucliadb.common.datamanagers.entities import (
37
36
  from nucliadb.common.maindb.driver import Transaction
38
37
  from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
39
38
  from nucliadb.ingest.settings import settings
39
+ from nucliadb.search.search.shards import query_shard
40
40
  from nucliadb_protos.knowledgebox_pb2 import (
41
41
  DeletedEntitiesGroups,
42
42
  EntitiesGroup,
@@ -53,9 +53,6 @@ from nucliadb_protos.nodereader_pb2 import (
53
53
  )
54
54
  from nucliadb_protos.utils_pb2 import RelationNode
55
55
  from nucliadb_protos.writer_pb2 import GetEntitiesResponse
56
- from nucliadb_telemetry import errors
57
- from nucliadb_utils import const
58
- from nucliadb_utils.utilities import has_feature
59
56
 
60
57
  from .exceptions import EntityManagementException
61
58
 
@@ -218,20 +215,15 @@ class EntitiesManager:
218
215
  ],
219
216
  ),
220
217
  )
221
- response = await node.reader.Search(request) # type: ignore
218
+ response = await query_shard(node, shard_id, request)
222
219
  return response.relation
223
220
 
224
221
  results = await shard_manager.apply_for_all_shards(
225
222
  self.kbid,
226
223
  do_entities_search,
227
224
  settings.relation_search_timeout,
228
- use_nidx=has_feature(const.Features.NIDX_READS, context={"kbid": self.kbid}),
229
225
  use_read_replica_nodes=self.use_read_replica_nodes,
230
226
  )
231
- for result in results:
232
- if isinstance(result, Exception):
233
- errors.capture_exception(result)
234
- raise NodeError("Error while querying relation index")
235
227
 
236
228
  entities = {}
237
229
  for result in results:
@@ -307,6 +299,7 @@ class EntitiesManager:
307
299
  shard_manager = get_shard_manager()
308
300
 
309
301
  async def query_indexed_entities_group_names(node: AbstractIndexNode, shard_id: str) -> set[str]:
302
+ """Search all relation types"""
310
303
  request = SearchRequest(
311
304
  shard=shard_id,
312
305
  result_per_page=0,
@@ -315,25 +308,21 @@ class EntitiesManager:
315
308
  paragraph=False,
316
309
  faceted=Faceted(labels=["/e"]),
317
310
  )
318
- response: SearchResponse = await node.reader.Search(request) # type: ignore
311
+ response: SearchResponse = await query_shard(node, shard_id, request)
319
312
  try:
320
313
  facetresults = response.document.facets["/e"].facetresults
321
- return {facet.tag.split("/")[-1] for facet in facetresults}
322
314
  except KeyError:
323
315
  # No entities found
324
316
  return set()
317
+ else:
318
+ return {facet.tag.split("/")[-1] for facet in facetresults}
325
319
 
326
320
  results = await shard_manager.apply_for_all_shards(
327
321
  self.kbid,
328
322
  query_indexed_entities_group_names,
329
323
  settings.relation_types_timeout,
330
- use_nidx=has_feature(const.Features.NIDX_READS, context={"kbid": self.kbid}),
331
324
  use_read_replica_nodes=self.use_read_replica_nodes,
332
325
  )
333
- for result in results:
334
- if isinstance(result, Exception):
335
- errors.capture_exception(result)
336
- raise NodeError("Error while looking for relations types")
337
326
 
338
327
  if not results:
339
328
  return set()
@@ -27,7 +27,6 @@ from grpc.aio import AioRpcError
27
27
 
28
28
  from nucliadb.common import datamanagers
29
29
  from nucliadb.common.cluster.exceptions import ShardNotFound
30
- from nucliadb.common.cluster.manager import get_index_node
31
30
  from nucliadb.common.cluster.utils import get_shard_manager
32
31
 
33
32
  # XXX: this keys shouldn't be exposed outside datamanagers
@@ -49,7 +48,6 @@ from nucliadb.ingest.orm.exceptions import (
49
48
  from nucliadb.ingest.orm.metrics import processor_observer
50
49
  from nucliadb.ingest.orm.resource import Resource
51
50
  from nucliadb.ingest.orm.utils import choose_matryoshka_dimension, compute_paragraph_key
52
- from nucliadb.ingest.settings import settings
53
51
  from nucliadb.migrator.utils import get_latest_version
54
52
  from nucliadb_protos import knowledgebox_pb2, noderesources_pb2, nodewriter_pb2, writer_pb2
55
53
  from nucliadb_protos.knowledgebox_pb2 import (
@@ -58,8 +56,10 @@ from nucliadb_protos.knowledgebox_pb2 import (
58
56
  KnowledgeBoxConfig,
59
57
  SemanticModelMetadata,
60
58
  StoredExternalIndexProviderMetadata,
59
+ VectorSetPurge,
61
60
  )
62
61
  from nucliadb_protos.resources_pb2 import Basic
62
+ from nucliadb_utils.settings import is_onprem_nucliadb
63
63
  from nucliadb_utils.storages.storage import Storage
64
64
  from nucliadb_utils.utilities import (
65
65
  get_audit,
@@ -74,6 +74,9 @@ KB_KEYS = "/kbs/{kbid}/"
74
74
  KB_TO_DELETE_BASE = "/kbtodelete/"
75
75
  KB_TO_DELETE_STORAGE_BASE = "/storagetodelete/"
76
76
 
77
+ RESOURCE_TO_DELETE_STORAGE_BASE = "/resourcestoragetodelete"
78
+ RESOURCE_TO_DELETE_STORAGE = f"{RESOURCE_TO_DELETE_STORAGE_BASE}/{{kbid}}/{{uuid}}"
79
+
77
80
  KB_TO_DELETE = f"{KB_TO_DELETE_BASE}{{kbid}}"
78
81
  KB_TO_DELETE_STORAGE = f"{KB_TO_DELETE_STORAGE_BASE}{{kbid}}"
79
82
 
@@ -100,9 +103,9 @@ class KnowledgeBox:
100
103
  *,
101
104
  kbid: str,
102
105
  slug: str,
106
+ semantic_models: dict[str, SemanticModelMetadata],
103
107
  title: str = "",
104
108
  description: str = "",
105
- semantic_models: Optional[dict[str, SemanticModelMetadata]] = None,
106
109
  external_index_provider: CreateExternalIndexProviderMetadata = CreateExternalIndexProviderMetadata(),
107
110
  hidden_resources_enabled: bool = False,
108
111
  hidden_resources_hide_on_creation: bool = False,
@@ -117,7 +120,7 @@ class KnowledgeBox:
117
120
  raise KnowledgeBoxCreationError(
118
121
  "Cannot hide new resources if the hidden resources feature is disabled"
119
122
  )
120
- if semantic_models is None or len(semantic_models) == 0:
123
+ if len(semantic_models) == 0:
121
124
  raise KnowledgeBoxCreationError("KB must define at least one semantic model")
122
125
 
123
126
  rollback_ops: list[Callable[[], Coroutine[Any, Any, Any]]] = []
@@ -143,6 +146,7 @@ class KnowledgeBox:
143
146
  kb_shards.actual = -1
144
147
 
145
148
  vs_external_indexes = []
149
+
146
150
  for vectorset_id, semantic_model in semantic_models.items(): # type: ignore
147
151
  # if this KB uses a matryoshka model, we can choose a different
148
152
  # dimension
@@ -169,6 +173,7 @@ class KnowledgeBox:
169
173
  vector_dimension=dimension,
170
174
  ),
171
175
  matryoshka_dimensions=semantic_model.matryoshka_dimensions,
176
+ storage_key_kind=knowledgebox_pb2.VectorSetConfig.StorageKeyKind.VECTORSET_PREFIX,
172
177
  )
173
178
  await datamanagers.vectorsets.set(txn, kbid=kbid, config=vectorset_config)
174
179
 
@@ -339,6 +344,8 @@ class KnowledgeBox:
339
344
  if exists is False:
340
345
  logger.error(f"{kbid} KB does not exists on Storage")
341
346
 
347
+ nidx_api = get_nidx_api_client()
348
+
342
349
  async with driver.transaction() as txn:
343
350
  storage_to_delete = KB_TO_DELETE_STORAGE.format(kbid=kbid)
344
351
  await txn.set(storage_to_delete, b"")
@@ -351,25 +358,17 @@ class KnowledgeBox:
351
358
  logger.warning(f"Shards not found for KB while purging it", extra={"kbid": kbid})
352
359
  else:
353
360
  for shard in shards_obj.shards:
354
- # Delete the shard on nodes
355
- for replica in shard.replicas:
356
- node = get_index_node(replica.node)
357
- if node is None:
358
- logger.error(
359
- f"No node {replica.node} found, let's continue. Some shards may stay orphaned",
360
- extra={"kbid": kbid},
361
- )
362
- continue
361
+ if shard.nidx_shard_id:
363
362
  try:
364
- await node.delete_shard(replica.shard.id)
363
+ await nidx_api.DeleteShard(noderesources_pb2.ShardId(id=shard.nidx_shard_id))
365
364
  logger.debug(
366
- f"Succeded deleting shard from nodeid={replica.node} at {node.address}",
367
- extra={"kbid": kbid, "node_id": replica.node},
365
+ f"Succeded deleting shard",
366
+ extra={"kbid": kbid, "shard_id": shard.nidx_shard_id},
368
367
  )
369
368
  except AioRpcError as exc:
370
369
  if exc.code() == StatusCode.NOT_FOUND:
371
370
  continue
372
- raise ShardNotFound(f"{exc.details()} @ {node.address}")
371
+ raise ShardNotFound(f"{exc.details()} @ shard {shard.nidx_shard_id}")
373
372
 
374
373
  await txn.commit()
375
374
  await cls.delete_all_kb_keys(driver, kbid)
@@ -415,9 +414,16 @@ class KnowledgeBox:
415
414
  logger.exception("Error deleting slug")
416
415
 
417
416
  async def storage_delete_resource(self, uuid: str):
418
- await self.storage.delete_resource(
419
- self.kbid, uuid, max_parallel=settings.ingest_delete_resource_storage_max_parallel
420
- )
417
+ if is_onprem_nucliadb():
418
+ await self.storage.delete_resource(self.kbid, uuid)
419
+ else:
420
+ # Deleting from storage can be slow, so we schedule its deletion and the purge cronjob
421
+ # will take care of it
422
+ await self.schedule_delete_resource(self.kbid, uuid)
423
+
424
+ async def schedule_delete_resource(self, kbid: str, uuid: str):
425
+ key = RESOURCE_TO_DELETE_STORAGE.format(kbid=kbid, uuid=uuid)
426
+ await self.txn.set(key, b"")
421
427
 
422
428
  async def delete_resource(self, uuid: str):
423
429
  with processor_observer({"type": "delete_resource_maindb"}):
@@ -479,6 +485,12 @@ class KnowledgeBox:
479
485
  self.txn, kbid=self.kbid, vectorset_id=config.vectorset_id
480
486
  ):
481
487
  raise VectorSetConflict(f"Vectorset {config.vectorset_id} already exists")
488
+
489
+ # To ensure we always set the storage key kind, we overwrite it with the
490
+ # correct value. This whole enum business is to maintain bw/c with KBs
491
+ # pre-vectorsets, so any new vectorset should use the vectorset prefix
492
+ # key kind
493
+ config.storage_key_kind = knowledgebox_pb2.VectorSetConfig.StorageKeyKind.VECTORSET_PREFIX
482
494
  await datamanagers.vectorsets.set(self.txn, kbid=self.kbid, config=config)
483
495
 
484
496
  # Remove the async deletion mark if it exists, just in case there was a previous deletion
@@ -491,11 +503,21 @@ class KnowledgeBox:
491
503
  await shard_manager.create_vectorset(self.kbid, config)
492
504
 
493
505
  async def delete_vectorset(self, vectorset_id: str):
494
- await datamanagers.vectorsets.delete(self.txn, kbid=self.kbid, vectorset_id=vectorset_id)
506
+ vectorset_count = await datamanagers.vectorsets.count(self.txn, kbid=self.kbid)
507
+ if vectorset_count == 1:
508
+ raise VectorSetConflict("Deletion of your last vectorset is not allowed")
509
+
510
+ deleted = await datamanagers.vectorsets.delete(
511
+ self.txn, kbid=self.kbid, vectorset_id=vectorset_id
512
+ )
513
+ if deleted is None:
514
+ # already deleted
515
+ return
495
516
 
496
517
  # mark vectorset for async deletion
497
518
  deletion_mark_key = KB_VECTORSET_TO_DELETE.format(kbid=self.kbid, vectorset=vectorset_id)
498
- await self.txn.set(deletion_mark_key, b"")
519
+ payload = VectorSetPurge(storage_key_kind=deleted.storage_key_kind)
520
+ await self.txn.set(deletion_mark_key, payload.SerializeToString())
499
521
 
500
522
  shard_manager = get_shard_manager()
501
523
  await shard_manager.delete_vectorset(self.kbid, vectorset_id)
@@ -20,13 +20,13 @@
20
20
 
21
21
  import logging
22
22
  from dataclasses import dataclass, field
23
- from typing import Optional, Sequence
23
+ from typing import Optional
24
24
 
25
25
  from nucliadb.ingest.orm.resource import Resource
26
26
  from nucliadb.ingest.processing import ProcessingEngine, PushPayload, Source
27
27
  from nucliadb_models.text import PushTextFormat, Text
28
28
  from nucliadb_protos import resources_pb2, writer_pb2
29
- from nucliadb_protos.resources_pb2 import FieldID, FieldType
29
+ from nucliadb_protos.resources_pb2 import FieldType
30
30
  from nucliadb_utils.utilities import Utility, get_partitioning, get_utility
31
31
 
32
32
  logger = logging.getLogger("ingest-processor")
@@ -50,7 +50,7 @@ async def get_generated_fields(bm: writer_pb2.BrokerMessage, resource: Resource)
50
50
  ingest the processed thing later).
51
51
 
52
52
  Given a broker message and a resource, this function returns the list of
53
- generated fields, that can be empty.
53
+ generated fields, that can be empty. It skips fields with errors.
54
54
 
55
55
  """
56
56
  generated_fields = GeneratedFields()
@@ -60,34 +60,12 @@ async def get_generated_fields(bm: writer_pb2.BrokerMessage, resource: Resource)
60
60
  return generated_fields
61
61
 
62
62
  # search all fields
63
-
64
- all_fields = await resource.get_all_field_ids(for_update=False)
65
- fields: Sequence[FieldID]
66
- if all_fields is None:
67
- fields = []
68
- else:
69
- fields = all_fields.fields
70
-
71
- for field_id in bm.texts:
72
- field = FieldID(field_type=FieldType.TEXT, field=field_id)
73
- if field not in fields:
63
+ for field_id, text in bm.texts.items():
64
+ errors = [e for e in bm.errors if e.field_type == FieldType.TEXT and e.field == field_id]
65
+ has_error = len(errors) > 0
66
+ if text.generated_by.WhichOneof("author") == "data_augmentation" and not has_error:
74
67
  generated_fields.texts.append(field_id)
75
68
 
76
- for field_id in bm.links:
77
- field = FieldID(field_type=FieldType.LINK, field=field_id)
78
- if field not in fields:
79
- generated_fields.links.append(field_id)
80
-
81
- for field_id in bm.files:
82
- field = FieldID(field_type=FieldType.FILE, field=field_id)
83
- if field not in fields:
84
- generated_fields.files.append(field_id)
85
-
86
- for field_id in bm.conversations:
87
- field = FieldID(field_type=FieldType.CONVERSATION, field=field_id)
88
- if field not in fields:
89
- generated_fields.conversations.append(field_id)
90
-
91
69
  return generated_fields
92
70
 
93
71
 
@@ -275,7 +275,6 @@ class Processor:
275
275
 
276
276
  if message.source == writer_pb2.BrokerMessage.MessageSource.WRITER:
277
277
  resource = await kb.get(uuid)
278
-
279
278
  if resource is None:
280
279
  # It's a new resource
281
280
  resource = await kb.add_resource(uuid, message.slug, message.basic)
@@ -737,7 +736,11 @@ def has_vectors_operation(index_message: PBBrainResource) -> bool:
737
736
  """
738
737
  Returns True if the index message has any vectors to index or to delete.
739
738
  """
740
- if len(index_message.sentences_to_delete) > 0 or len(index_message.paragraphs_to_delete) > 0:
739
+ if (
740
+ len(index_message.sentences_to_delete) > 0
741
+ or len(index_message.paragraphs_to_delete) > 0
742
+ or any([len(deletions.items) for deletions in index_message.vector_prefixes_to_delete.values()])
743
+ ):
741
744
  return True
742
745
  for field_paragraphs in index_message.paragraphs.values():
743
746
  for paragraph in field_paragraphs.paragraphs.values():