nucliadb 6.3.5.post3985__py3-none-any.whl → 6.3.5.post3995__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,409 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+
21
+
22
+ import asyncio
23
+ from typing import Optional
24
+
25
+ from nucliadb.common import datamanagers
26
+ from nucliadb.ingest.fields.exceptions import FieldAuthorNotFound
27
+ from nucliadb.ingest.fields.file import File
28
+ from nucliadb.ingest.orm.brain_v2 import ResourceBrainV2 as ResourceBrain
29
+ from nucliadb.ingest.orm.resource import Resource, get_file_page_positions
30
+ from nucliadb_protos.knowledgebox_pb2 import VectorSetConfig
31
+ from nucliadb_protos.noderesources_pb2 import Resource as IndexMessage
32
+ from nucliadb_protos.resources_pb2 import Basic, FieldID, FieldType
33
+ from nucliadb_protos.writer_pb2 import BrokerMessage
34
+ from nucliadb_utils import const
35
+ from nucliadb_utils.utilities import has_feature
36
+
37
+
38
+ class IndexMessageBuilder:
39
+ def __init__(self, resource: Resource):
40
+ self.resource = resource
41
+ self.brain = ResourceBrain(resource.uuid)
42
+
43
+ async def _apply_resource_index_data(self, brain: ResourceBrain) -> None:
44
+ # Set the metadata at the resource level
45
+ basic = await self.resource.get_basic()
46
+ assert basic is not None
47
+ user_relations = await self.resource.get_user_relations()
48
+ origin = await self.resource.get_origin()
49
+ security = await self.resource.get_security()
50
+ await asyncio.to_thread(
51
+ brain.generate_resource_indexing_metadata,
52
+ basic,
53
+ user_relations,
54
+ origin,
55
+ self.resource._previous_status,
56
+ security,
57
+ )
58
+
59
+ async def _apply_field_index_data(
60
+ self,
61
+ brain: ResourceBrain,
62
+ fieldid: FieldID,
63
+ basic: Basic,
64
+ texts: bool = True,
65
+ paragraphs: bool = True,
66
+ vectors: bool = True,
67
+ relations: bool = True,
68
+ replace: bool = True,
69
+ vectorset_configs: Optional[list[VectorSetConfig]] = None,
70
+ ):
71
+ field = await self.resource.get_field(fieldid.field, fieldid.field_type)
72
+ extracted_text = await field.get_extracted_text()
73
+ field_computed_metadata = await field.get_field_metadata()
74
+ user_field_metadata = next(
75
+ (fm for fm in basic.fieldmetadata if fm.field == fieldid),
76
+ None,
77
+ )
78
+ if texts or paragraphs:
79
+ # We need to compute the texts when we're going to generate the paragraphs too, but we may not
80
+ # want to index them always.
81
+ skip_index_texts = not texts
82
+ replace_texts = replace and not skip_index_texts
83
+
84
+ if extracted_text is not None:
85
+ try:
86
+ field_author = await field.generated_by()
87
+ except FieldAuthorNotFound:
88
+ field_author = None
89
+ await asyncio.to_thread(
90
+ brain.generate_texts_index_message,
91
+ self.resource.generate_field_id(fieldid),
92
+ extracted_text,
93
+ field_computed_metadata,
94
+ basic.usermetadata,
95
+ field_author,
96
+ replace_field=replace_texts,
97
+ skip_index=skip_index_texts,
98
+ )
99
+ if paragraphs or vectors:
100
+ # The paragraphs are needed to generate the vectors. However, we don't need to index them
101
+ # in all cases.
102
+ skip_index_paragraphs = not paragraphs
103
+ replace_paragraphs = replace and not skip_index_paragraphs
104
+
105
+ # We need to compute the paragraphs when we're going to generate the vectors too.
106
+ if extracted_text is not None and field_computed_metadata is not None:
107
+ page_positions = (
108
+ await get_file_page_positions(field) if isinstance(field, File) else None
109
+ )
110
+ await asyncio.to_thread(
111
+ brain.generate_paragraphs_index_message,
112
+ self.resource.generate_field_id(fieldid),
113
+ field_computed_metadata,
114
+ extracted_text,
115
+ page_positions,
116
+ user_field_metadata,
117
+ replace_field=replace_paragraphs,
118
+ skip_index=skip_index_paragraphs,
119
+ )
120
+ if vectors:
121
+ assert vectorset_configs is not None
122
+ for vectorset_config in vectorset_configs:
123
+ vo = await field.get_vectors(
124
+ vectorset=vectorset_config.vectorset_id,
125
+ storage_key_kind=vectorset_config.storage_key_kind,
126
+ )
127
+ if vo is not None:
128
+ dimension = vectorset_config.vectorset_index_config.vector_dimension
129
+ await asyncio.to_thread(
130
+ brain.generate_vectors_index_message,
131
+ self.resource.generate_field_id(fieldid),
132
+ vo,
133
+ vectorset=vectorset_config.vectorset_id,
134
+ replace_field=replace,
135
+ vector_dimension=dimension,
136
+ )
137
+ if relations:
138
+ await asyncio.to_thread(
139
+ brain.generate_relations_index_message,
140
+ self.resource.generate_field_id(fieldid),
141
+ field_computed_metadata,
142
+ basic.usermetadata,
143
+ replace_field=replace,
144
+ )
145
+
146
+ def _apply_field_deletions(
147
+ self,
148
+ brain: ResourceBrain,
149
+ field_ids: list[FieldID],
150
+ ) -> None:
151
+ for field_id in field_ids:
152
+ brain.delete_field(self.resource.generate_field_id(field_id))
153
+
154
+ async def for_writer_bm(
155
+ self,
156
+ messages: list[BrokerMessage],
157
+ resource_created: bool,
158
+ ) -> IndexMessage:
159
+ """
160
+ Builds the index message for the broker messages coming from the writer.
161
+ The writer messages are not adding new vectors to the index.
162
+ """
163
+ assert all(message.source == BrokerMessage.MessageSource.WRITER for message in messages)
164
+
165
+ deleted_fields = get_bm_deleted_fields(messages)
166
+ self._apply_field_deletions(self.brain, deleted_fields)
167
+ await self._apply_resource_index_data(self.brain)
168
+ basic = await self.get_basic()
169
+ prefilter_update = needs_prefilter_update(messages)
170
+ if prefilter_update:
171
+ # Changes on some metadata at the resource level that is used for filtering require that we reindex all the fields
172
+ # in the texts index (as it is the one used for prefiltering).
173
+ fields_to_index = [
174
+ FieldID(field=field_id, field_type=field_type)
175
+ for field_type, field_id in await self.resource.get_fields(force=True)
176
+ ]
177
+ else:
178
+ # Simply process the fields that are in the message
179
+ fields_to_index = get_bm_modified_fields(messages)
180
+ for fieldid in fields_to_index:
181
+ if fieldid in deleted_fields:
182
+ continue
183
+ await self._apply_field_index_data(
184
+ self.brain,
185
+ fieldid,
186
+ basic,
187
+ texts=prefilter_update or needs_texts_update(fieldid, messages),
188
+ paragraphs=needs_paragraphs_update(fieldid, messages),
189
+ relations=False, # Relations at the field level are not modified by the writer
190
+ vectors=False, # Vectors are never added by the writer
191
+ replace=not resource_created,
192
+ )
193
+ return self.brain.brain
194
+
195
+ async def for_processor_bm(
196
+ self,
197
+ messages: list[BrokerMessage],
198
+ ) -> IndexMessage:
199
+ """
200
+ Builds the index message for the broker messages coming from the processor.
201
+ The processor can index new data to any index.
202
+ """
203
+ assert all(message.source == BrokerMessage.MessageSource.PROCESSOR for message in messages)
204
+ deleted_fields = get_bm_deleted_fields(messages)
205
+ self._apply_field_deletions(self.brain, deleted_fields)
206
+ await self._apply_resource_index_data(self.brain)
207
+ basic = await self.get_basic()
208
+ fields_to_index = get_bm_modified_fields(messages)
209
+ vectorsets_configs = await self.get_vectorsets_configs()
210
+ for fieldid in fields_to_index:
211
+ if fieldid in deleted_fields:
212
+ continue
213
+ await self._apply_field_index_data(
214
+ self.brain,
215
+ fieldid,
216
+ basic,
217
+ texts=needs_texts_update(fieldid, messages),
218
+ paragraphs=needs_paragraphs_update(fieldid, messages),
219
+ relations=needs_relations_update(fieldid, messages),
220
+ vectors=needs_vectors_update(fieldid, messages),
221
+ replace=True,
222
+ vectorset_configs=vectorsets_configs,
223
+ )
224
+ return self.brain.brain
225
+
226
+ async def full(self, reindex: bool) -> IndexMessage:
227
+ await self._apply_resource_index_data(self.brain)
228
+ basic = await self.get_basic()
229
+ fields_to_index = [
230
+ FieldID(field=field_id, field_type=field_type)
231
+ for field_type, field_id in await self.resource.get_fields(force=True)
232
+ ]
233
+ vectorsets_configs = await self.get_vectorsets_configs()
234
+ for fieldid in fields_to_index:
235
+ await self._apply_field_index_data(
236
+ self.brain,
237
+ fieldid,
238
+ basic,
239
+ texts=True,
240
+ paragraphs=True,
241
+ relations=True,
242
+ vectors=True,
243
+ replace=reindex,
244
+ vectorset_configs=vectorsets_configs,
245
+ )
246
+ return self.brain.brain
247
+
248
+ async def get_basic(self) -> Basic:
249
+ basic = await self.resource.get_basic()
250
+ assert basic is not None
251
+ return basic
252
+
253
+ async def get_vectorsets_configs(self) -> list[VectorSetConfig]:
254
+ """
255
+ Get the vectorsets config for the resource.
256
+ """
257
+ vectorset_configs = [
258
+ vectorset_config
259
+ async for _, vectorset_config in datamanagers.vectorsets.iter(
260
+ self.resource.txn, kbid=self.resource.kb.kbid
261
+ )
262
+ ]
263
+ return vectorset_configs
264
+
265
+
266
+ def get_bm_deleted_fields(
267
+ messages: list[BrokerMessage],
268
+ ) -> list[FieldID]:
269
+ deleted = []
270
+ for message in messages:
271
+ for field in message.delete_fields:
272
+ if field not in deleted:
273
+ deleted.append(field)
274
+ return deleted
275
+
276
+
277
+ def get_bm_modified_fields(messages: list[BrokerMessage]) -> list[FieldID]:
278
+ message_source = get_messages_source(messages)
279
+ modified = set()
280
+ for message in messages:
281
+ # Added or modified fields need indexing
282
+ for link in message.links:
283
+ modified.add((link, FieldType.LINK))
284
+ for file in message.files:
285
+ modified.add((file, FieldType.FILE))
286
+ for conv in message.conversations:
287
+ modified.add((conv, FieldType.CONVERSATION))
288
+ for text in message.texts:
289
+ modified.add((text, FieldType.TEXT))
290
+ if message.HasField("basic"):
291
+ # Add title and summary only if they have changed
292
+ if message.basic.title != "":
293
+ modified.add(("title", FieldType.GENERIC))
294
+ if message.basic.summary != "":
295
+ modified.add(("summary", FieldType.GENERIC))
296
+
297
+ if message_source == BrokerMessage.MessageSource.PROCESSOR:
298
+ # Messages with field metadata, extracted text or field vectors need indexing
299
+ for fm in message.field_metadata:
300
+ modified.add((fm.field.field, fm.field.field_type))
301
+ for et in message.extracted_text:
302
+ modified.add((et.field.field, et.field.field_type))
303
+ for fv in message.field_vectors:
304
+ modified.add((fv.field.field, fv.field.field_type))
305
+
306
+ if message_source == BrokerMessage.MessageSource.WRITER:
307
+ # Any field that has fieldmetadata annotations should be considered as modified
308
+ # and needs to be reindexed
309
+ if message.HasField("basic"):
310
+ for ufm in message.basic.fieldmetadata:
311
+ modified.add((ufm.field.field, ufm.field.field_type))
312
+ return [FieldID(field=field, field_type=field_type) for field, field_type in modified]
313
+
314
+
315
+ def get_messages_source(messages: list[BrokerMessage]) -> BrokerMessage.MessageSource.ValueType:
316
+ assert len(set(message.source for message in messages)) == 1
317
+ return messages[0].source
318
+
319
+
320
+ def needs_prefilter_update(messages: list[BrokerMessage]) -> bool:
321
+ return any(message.reindex for message in messages)
322
+
323
+
324
+ def needs_paragraphs_update(field_id: FieldID, messages: list[BrokerMessage]) -> bool:
325
+ return (
326
+ has_paragraph_annotations(field_id, messages)
327
+ or has_new_extracted_text(field_id, messages)
328
+ or has_new_field_metadata(field_id, messages)
329
+ )
330
+
331
+
332
+ def has_paragraph_annotations(field_id: FieldID, messages: list[BrokerMessage]) -> bool:
333
+ for message in messages:
334
+ ufm = next(
335
+ (fm for fm in message.basic.fieldmetadata if fm.field == field_id),
336
+ None,
337
+ )
338
+ if ufm is None:
339
+ continue
340
+ if len(ufm.paragraphs) > 0:
341
+ return True
342
+ return False
343
+
344
+
345
+ def has_new_field_metadata(
346
+ field_id: FieldID,
347
+ messages: list[BrokerMessage],
348
+ ) -> bool:
349
+ for message in messages:
350
+ for field_metadata in message.field_metadata:
351
+ if field_metadata.field == field_id:
352
+ return True
353
+ return False
354
+
355
+
356
+ def has_new_extracted_text(
357
+ field_id: FieldID,
358
+ messages: list[BrokerMessage],
359
+ ) -> bool:
360
+ for message in messages:
361
+ for extracted_text in message.extracted_text:
362
+ if extracted_text.field == field_id:
363
+ return True
364
+ return False
365
+
366
+
367
+ def needs_texts_update(
368
+ field_id: FieldID,
369
+ messages: list[BrokerMessage],
370
+ ) -> bool:
371
+ return has_new_extracted_text(field_id, messages) or has_new_field_metadata(field_id, messages)
372
+
373
+
374
+ def needs_vectors_update(
375
+ field_id: FieldID,
376
+ messages: list[BrokerMessage],
377
+ ) -> bool:
378
+ for message in messages:
379
+ for field_vectors in message.field_vectors:
380
+ if field_vectors.field == field_id:
381
+ return True
382
+ return False
383
+
384
+
385
+ def needs_relations_update(
386
+ field_id: FieldID,
387
+ messages: list[BrokerMessage],
388
+ ) -> bool:
389
+ return has_new_field_metadata(field_id, messages) or has_new_extracted_text(field_id, messages)
390
+
391
+
392
+ async def get_resource_index_message(
393
+ resource: Resource,
394
+ reindex: bool = False,
395
+ ) -> IndexMessage:
396
+ """
397
+ Get the full index message for a resource.
398
+ """
399
+ if has_feature(
400
+ const.Features.INDEX_MESSAGE_GENERATION_V2,
401
+ context={
402
+ "kbid": resource.kb.kbid,
403
+ },
404
+ ):
405
+ im_builder = IndexMessageBuilder(resource)
406
+ return await im_builder.full(reindex=reindex)
407
+ else:
408
+ # TODO: remove this code when we remove the old index message generation
409
+ return (await resource.generate_index_message(reindex=reindex)).brain
@@ -22,6 +22,6 @@ from nucliadb_telemetry import metrics
22
22
 
23
23
  processor_observer = metrics.Observer(
24
24
  "nucliadb_ingest_processor",
25
- labels={"type": ""},
25
+ labels={"type": "", "source": ""},
26
26
  error_mappings={"kb_conflict": KnowledgeBoxConflict},
27
27
  )
@@ -23,8 +23,8 @@ from dataclasses import dataclass, field
23
23
  from typing import Optional
24
24
 
25
25
  from nucliadb.ingest.orm.resource import Resource
26
- from nucliadb.ingest.processing import ProcessingEngine, PushPayload, Source
27
- from nucliadb_models.text import PushTextFormat, Text
26
+ from nucliadb.ingest.processing import ProcessingEngine
27
+ from nucliadb.models.internal.processing import PushPayload, PushTextFormat, Source, Text
28
28
  from nucliadb_protos import resources_pb2, writer_pb2
29
29
  from nucliadb_protos.resources_pb2 import FieldType
30
30
  from nucliadb_utils.utilities import Utility, get_partitioning, get_utility
@@ -23,6 +23,7 @@ from typing import cast
23
23
  from nucliadb.common.maindb.driver import Transaction
24
24
  from nucliadb.common.maindb.pg import PGDriver, PGTransaction
25
25
  from nucliadb.common.maindb.utils import get_driver
26
+ from nucliadb_protos.noderesources_pb2 import Resource as IndexMessage
26
27
  from nucliadb_telemetry import metrics
27
28
 
28
29
  from ..resource import Resource
@@ -39,7 +40,7 @@ def pgcatalog_enabled(kbid):
39
40
 
40
41
 
41
42
  @observer.wrap({"type": "update"})
42
- async def pgcatalog_update(txn: Transaction, kbid: str, resource: Resource):
43
+ async def pgcatalog_update(txn: Transaction, kbid: str, resource: Resource, index_message: IndexMessage):
43
44
  if not pgcatalog_enabled(kbid):
44
45
  return
45
46
 
@@ -69,7 +70,7 @@ async def pgcatalog_update(txn: Transaction, kbid: str, resource: Resource):
69
70
  "title": resource.basic.title,
70
71
  "created_at": created_at,
71
72
  "modified_at": modified_at,
72
- "labels": list(resource.indexer.brain.labels),
73
+ "labels": list(index_message.labels),
73
74
  },
74
75
  )
75
76
 
@@ -38,6 +38,7 @@ from nucliadb.ingest.orm.exceptions import (
38
38
  ResourceNotIndexable,
39
39
  SequenceOrderViolation,
40
40
  )
41
+ from nucliadb.ingest.orm.index_message import IndexMessageBuilder
41
42
  from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
42
43
  from nucliadb.ingest.orm.metrics import processor_observer
43
44
  from nucliadb.ingest.orm.processor import sequence_manager
@@ -312,25 +313,11 @@ class Processor:
312
313
  await self.apply_resource(message, resource, update=(not created))
313
314
 
314
315
  # index message
315
-
316
- if resource:
317
- if any(needs_reindex(m) for m in messages):
318
- # when reindexing, let's just generate full new index message
319
- # TODO - This should be improved in the future as it's not optimal for very large resources:
320
- # As of now, there are some API operations that require fully reindexing all the fields of a resource.
321
- # An example of this is classification label changes - we need to reindex all the fields of a resource to
322
- # propagate the label changes to the index.
323
- resource.replace_indexer(await resource.generate_index_message(reindex=True))
324
- else:
325
- # TODO - Ideally we should only update the fields that have been changed in the current transaction.
326
- await resource.compute_global_text()
327
- await resource.compute_global_tags(resource.indexer)
328
- await resource.compute_security(resource.indexer)
329
-
330
316
  if resource and resource.modified:
331
- await pgcatalog_update(txn, kbid, resource)
317
+ index_message = await self.generate_index_message(resource, messages, created)
318
+ await pgcatalog_update(txn, kbid, resource, index_message)
332
319
  await self.index_resource( # noqa
333
- resource=resource,
320
+ index_message=index_message,
334
321
  txn=txn,
335
322
  uuid=uuid,
336
323
  kbid=kbid,
@@ -451,7 +438,7 @@ class Processor:
451
438
  @processor_observer.wrap({"type": "index_resource"})
452
439
  async def index_resource(
453
440
  self,
454
- resource: Resource,
441
+ index_message: PBBrainResource,
455
442
  txn: Transaction,
456
443
  uuid: str,
457
444
  kbid: str,
@@ -460,9 +447,8 @@ class Processor:
460
447
  kb: KnowledgeBox,
461
448
  source: nodewriter_pb2.IndexMessageSource.ValueType,
462
449
  ) -> None:
463
- validate_indexable_resource(resource.indexer.brain)
450
+ validate_indexable_resource(index_message)
464
451
  shard = await self.get_or_assign_resource_shard(txn, kb, uuid)
465
- index_message = resource.indexer.brain
466
452
  external_index_manager = await get_external_index_manager(kbid=kbid)
467
453
  if external_index_manager is not None:
468
454
  await self.external_index_add_resource(external_index_manager, uuid, index_message)
@@ -476,6 +462,56 @@ class Processor:
476
462
  source=source,
477
463
  )
478
464
 
465
+ async def generate_index_message_v2(
466
+ self,
467
+ resource: Resource,
468
+ messages: list[writer_pb2.BrokerMessage],
469
+ resource_created: bool,
470
+ ) -> PBBrainResource:
471
+ builder = IndexMessageBuilder(resource)
472
+ message_source = messages_source(messages)
473
+ if message_source == nodewriter_pb2.IndexMessageSource.WRITER:
474
+ with processor_observer({"type": "generate_index_message", "source": "writer"}):
475
+ return await builder.for_writer_bm(messages, resource_created)
476
+ elif message_source == nodewriter_pb2.IndexMessageSource.PROCESSOR:
477
+ with processor_observer({"type": "generate_index_message", "source": "processor"}):
478
+ return await builder.for_processor_bm(messages)
479
+ else: # pragma: no cover
480
+ raise InvalidBrokerMessage(f"Unknown broker message source: {message_source}")
481
+
482
+ async def generate_index_message_v1(
483
+ self,
484
+ resource: Resource,
485
+ messages: list[writer_pb2.BrokerMessage],
486
+ ) -> PBBrainResource:
487
+ if any(needs_reindex(m) for m in messages):
488
+ # when reindexing, let's just generate full new index message
489
+ # TODO - This should be improved in the future as it's not optimal for very large resources:
490
+ # As of now, there are some API operations that require fully reindexing all the fields of a resource.
491
+ # An example of this is classification label changes - we need to reindex all the fields of a resource to
492
+ # propagate the label changes to the index.
493
+ resource.replace_indexer(await resource.generate_index_message(reindex=True))
494
+ else:
495
+ # TODO - Ideally we should only update the fields that have been changed in the current transaction.
496
+ await resource.compute_global_text()
497
+ await resource.compute_global_tags(resource.indexer)
498
+ await resource.compute_security(resource.indexer)
499
+ return resource.indexer.brain
500
+
501
+ async def generate_index_message(
502
+ self,
503
+ resource: Resource,
504
+ messages: list[writer_pb2.BrokerMessage],
505
+ resource_created: bool = False,
506
+ ) -> PBBrainResource:
507
+ if has_feature(
508
+ const.Features.INDEX_MESSAGE_GENERATION_V2,
509
+ context={"kbid": resource.kb.kbid},
510
+ ):
511
+ return await self.generate_index_message_v2(resource, messages, resource_created)
512
+ else:
513
+ return await self.generate_index_message_v1(resource, messages)
514
+
479
515
  async def external_index_delete_resource(
480
516
  self, external_index_manager: ExternalIndexManager, resource_uuid: str
481
517
  ):
@@ -564,7 +600,10 @@ class Processor:
564
600
  resource: Resource,
565
601
  update: bool = False,
566
602
  ):
567
- """Apply broker message to resource object in the database"""
603
+ """
604
+ Apply broker message to resource object in the persistence layers (maindb and storage).
605
+ DO NOT add any indexing logic here.
606
+ """
568
607
  if update:
569
608
  await self.maybe_update_resource_basic(resource, message)
570
609
 
@@ -675,30 +714,9 @@ class Processor:
675
714
  try:
676
715
  async with self.driver.transaction() as txn:
677
716
  kb.txn = resource.txn = txn
678
-
679
- shard_id = await datamanagers.resources.get_resource_shard_id(
680
- txn, kbid=kb.kbid, rid=resource.uuid
681
- )
682
- shard = None
683
- if shard_id is not None:
684
- shard = await kb.get_resource_shard(shard_id)
685
- if shard is None:
686
- logger.warning(
687
- "Unable to mark resource as error, shard is None. "
688
- "This should not happen so you did something special to get here."
689
- )
690
- return
691
-
692
717
  resource.basic.metadata.status = resources_pb2.Metadata.Status.ERROR
693
718
  await resource.set_basic(resource.basic)
694
719
  await txn.commit()
695
-
696
- resource.indexer.set_processing_status(
697
- basic=resource.basic, previous_status=resource._previous_status
698
- )
699
- await self.index_node_shard_manager.add_resource(
700
- shard, resource.indexer.brain, seqid, partition=partition, kb=kb.kbid
701
- )
702
720
  except Exception:
703
721
  logger.warning("Error while marking resource as error", exc_info=True)
704
722
 
@@ -745,11 +763,7 @@ def has_vectors_operation(index_message: PBBrainResource) -> bool:
745
763
  """
746
764
  Returns True if the index message has any vectors to index or to delete.
747
765
  """
748
- if (
749
- len(index_message.sentences_to_delete) > 0
750
- or len(index_message.paragraphs_to_delete) > 0
751
- or any([len(deletions.items) for deletions in index_message.vector_prefixes_to_delete.values()])
752
- ):
766
+ if any([len(deletions.items) for deletions in index_message.vector_prefixes_to_delete.values()]):
753
767
  return True
754
768
  for field_paragraphs in index_message.paragraphs.values():
755
769
  for paragraph in field_paragraphs.paragraphs.values():