nucliadb 6.9.0.post5009__py3-none-any.whl → 6.9.0.post5018__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nucliadb might be problematic. Click here for more details.
- nucliadb/ingest/orm/index_message.py +75 -110
- nucliadb/ingest/orm/processor/processor.py +66 -124
- {nucliadb-6.9.0.post5009.dist-info → nucliadb-6.9.0.post5018.dist-info}/METADATA +6 -6
- {nucliadb-6.9.0.post5009.dist-info → nucliadb-6.9.0.post5018.dist-info}/RECORD +7 -7
- {nucliadb-6.9.0.post5009.dist-info → nucliadb-6.9.0.post5018.dist-info}/WHEEL +0 -0
- {nucliadb-6.9.0.post5009.dist-info → nucliadb-6.9.0.post5018.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.9.0.post5009.dist-info → nucliadb-6.9.0.post5018.dist-info}/top_level.txt +0 -0
|
@@ -20,7 +20,7 @@
|
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
import asyncio
|
|
23
|
-
from typing import Optional
|
|
23
|
+
from typing import Optional, Sequence
|
|
24
24
|
|
|
25
25
|
from nidx_protos.noderesources_pb2 import Resource as IndexMessage
|
|
26
26
|
|
|
@@ -150,7 +150,7 @@ class IndexMessageBuilder:
|
|
|
150
150
|
def _apply_field_deletions(
|
|
151
151
|
self,
|
|
152
152
|
brain: ResourceBrain,
|
|
153
|
-
field_ids:
|
|
153
|
+
field_ids: Sequence[FieldID],
|
|
154
154
|
) -> None:
|
|
155
155
|
for field_id in field_ids:
|
|
156
156
|
brain.delete_field(self.resource.generate_field_id(field_id))
|
|
@@ -158,20 +158,19 @@ class IndexMessageBuilder:
|
|
|
158
158
|
@observer.wrap({"type": "writer_bm"})
|
|
159
159
|
async def for_writer_bm(
|
|
160
160
|
self,
|
|
161
|
-
|
|
161
|
+
message: BrokerMessage,
|
|
162
162
|
resource_created: bool,
|
|
163
163
|
) -> IndexMessage:
|
|
164
164
|
"""
|
|
165
|
-
Builds the index message for the broker
|
|
165
|
+
Builds the index message for the broker message coming from the writer.
|
|
166
166
|
The writer messages are not adding new vectors to the index.
|
|
167
167
|
"""
|
|
168
|
-
assert
|
|
168
|
+
assert message.source == BrokerMessage.MessageSource.WRITER
|
|
169
169
|
|
|
170
|
-
|
|
171
|
-
self._apply_field_deletions(self.brain, deleted_fields)
|
|
170
|
+
self._apply_field_deletions(self.brain, message.delete_fields)
|
|
172
171
|
await self._apply_resource_index_data(self.brain)
|
|
173
172
|
basic = await self.get_basic()
|
|
174
|
-
prefilter_update = needs_prefilter_update(
|
|
173
|
+
prefilter_update = needs_prefilter_update(message)
|
|
175
174
|
if prefilter_update:
|
|
176
175
|
# Changes on some metadata at the resource level that is used for filtering require that we reindex all the fields
|
|
177
176
|
# in the texts index (as it is the one used for prefiltering).
|
|
@@ -181,16 +180,16 @@ class IndexMessageBuilder:
|
|
|
181
180
|
]
|
|
182
181
|
else:
|
|
183
182
|
# Simply process the fields that are in the message
|
|
184
|
-
fields_to_index = get_bm_modified_fields(
|
|
183
|
+
fields_to_index = get_bm_modified_fields(message)
|
|
185
184
|
for fieldid in fields_to_index:
|
|
186
|
-
if fieldid in
|
|
185
|
+
if fieldid in message.delete_fields:
|
|
187
186
|
continue
|
|
188
187
|
await self._apply_field_index_data(
|
|
189
188
|
self.brain,
|
|
190
189
|
fieldid,
|
|
191
190
|
basic,
|
|
192
|
-
texts=prefilter_update or needs_texts_update(fieldid,
|
|
193
|
-
paragraphs=needs_paragraphs_update(fieldid,
|
|
191
|
+
texts=prefilter_update or needs_texts_update(fieldid, message),
|
|
192
|
+
paragraphs=needs_paragraphs_update(fieldid, message),
|
|
194
193
|
relations=False, # Relations at the field level are not modified by the writer
|
|
195
194
|
vectors=False, # Vectors are never added by the writer
|
|
196
195
|
replace=not resource_created,
|
|
@@ -200,30 +199,29 @@ class IndexMessageBuilder:
|
|
|
200
199
|
@observer.wrap({"type": "processor_bm"})
|
|
201
200
|
async def for_processor_bm(
|
|
202
201
|
self,
|
|
203
|
-
|
|
202
|
+
message: BrokerMessage,
|
|
204
203
|
) -> IndexMessage:
|
|
205
204
|
"""
|
|
206
205
|
Builds the index message for the broker messages coming from the processor.
|
|
207
206
|
The processor can index new data to any index.
|
|
208
207
|
"""
|
|
209
|
-
assert
|
|
210
|
-
|
|
211
|
-
self._apply_field_deletions(self.brain, deleted_fields)
|
|
208
|
+
assert message.source == BrokerMessage.MessageSource.PROCESSOR
|
|
209
|
+
self._apply_field_deletions(self.brain, message.delete_fields)
|
|
212
210
|
await self._apply_resource_index_data(self.brain)
|
|
213
211
|
basic = await self.get_basic()
|
|
214
|
-
fields_to_index = get_bm_modified_fields(
|
|
212
|
+
fields_to_index = get_bm_modified_fields(message)
|
|
215
213
|
vectorsets_configs = await self.get_vectorsets_configs()
|
|
216
214
|
for fieldid in fields_to_index:
|
|
217
|
-
if fieldid in
|
|
215
|
+
if fieldid in message.delete_fields:
|
|
218
216
|
continue
|
|
219
217
|
await self._apply_field_index_data(
|
|
220
218
|
self.brain,
|
|
221
219
|
fieldid,
|
|
222
220
|
basic,
|
|
223
|
-
texts=needs_texts_update(fieldid,
|
|
224
|
-
paragraphs=needs_paragraphs_update(fieldid,
|
|
225
|
-
relations=needs_relations_update(fieldid,
|
|
226
|
-
vectors=needs_vectors_update(fieldid,
|
|
221
|
+
texts=needs_texts_update(fieldid, message),
|
|
222
|
+
paragraphs=needs_paragraphs_update(fieldid, message),
|
|
223
|
+
relations=needs_relations_update(fieldid, message),
|
|
224
|
+
vectors=needs_vectors_update(fieldid, message),
|
|
227
225
|
replace=True,
|
|
228
226
|
vectorset_configs=vectorsets_configs,
|
|
229
227
|
)
|
|
@@ -270,130 +268,97 @@ class IndexMessageBuilder:
|
|
|
270
268
|
return vectorset_configs
|
|
271
269
|
|
|
272
270
|
|
|
273
|
-
def
|
|
274
|
-
messages: list[BrokerMessage],
|
|
275
|
-
) -> list[FieldID]:
|
|
276
|
-
deleted = []
|
|
277
|
-
for message in messages:
|
|
278
|
-
for field in message.delete_fields:
|
|
279
|
-
if field not in deleted:
|
|
280
|
-
deleted.append(field)
|
|
281
|
-
return deleted
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
def get_bm_modified_fields(messages: list[BrokerMessage]) -> list[FieldID]:
|
|
285
|
-
message_source = get_messages_source(messages)
|
|
271
|
+
def get_bm_modified_fields(message: BrokerMessage) -> list[FieldID]:
|
|
286
272
|
modified = set()
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
273
|
+
# Added or modified fields need indexing
|
|
274
|
+
for link in message.links:
|
|
275
|
+
modified.add((link, FieldType.LINK))
|
|
276
|
+
for file in message.files:
|
|
277
|
+
modified.add((file, FieldType.FILE))
|
|
278
|
+
for conv in message.conversations:
|
|
279
|
+
modified.add((conv, FieldType.CONVERSATION))
|
|
280
|
+
for text in message.texts:
|
|
281
|
+
modified.add((text, FieldType.TEXT))
|
|
282
|
+
if message.HasField("basic"):
|
|
283
|
+
# Add title and summary only if they have changed
|
|
284
|
+
if message.basic.title != "":
|
|
285
|
+
modified.add(("title", FieldType.GENERIC))
|
|
286
|
+
if message.basic.summary != "":
|
|
287
|
+
modified.add(("summary", FieldType.GENERIC))
|
|
288
|
+
|
|
289
|
+
if message.source == BrokerMessage.MessageSource.PROCESSOR:
|
|
290
|
+
# Messages with field metadata, extracted text or field vectors need indexing
|
|
291
|
+
for fm in message.field_metadata:
|
|
292
|
+
modified.add((fm.field.field, fm.field.field_type))
|
|
293
|
+
for et in message.extracted_text:
|
|
294
|
+
modified.add((et.field.field, et.field.field_type))
|
|
295
|
+
for fv in message.field_vectors:
|
|
296
|
+
modified.add((fv.field.field, fv.field.field_type))
|
|
297
|
+
|
|
298
|
+
if message.source == BrokerMessage.MessageSource.WRITER:
|
|
299
|
+
# Any field that has fieldmetadata annotations should be considered as modified
|
|
300
|
+
# and needs to be reindexed
|
|
297
301
|
if message.HasField("basic"):
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
modified.add(("title", FieldType.GENERIC))
|
|
301
|
-
if message.basic.summary != "":
|
|
302
|
-
modified.add(("summary", FieldType.GENERIC))
|
|
303
|
-
|
|
304
|
-
if message_source == BrokerMessage.MessageSource.PROCESSOR:
|
|
305
|
-
# Messages with field metadata, extracted text or field vectors need indexing
|
|
306
|
-
for fm in message.field_metadata:
|
|
307
|
-
modified.add((fm.field.field, fm.field.field_type))
|
|
308
|
-
for et in message.extracted_text:
|
|
309
|
-
modified.add((et.field.field, et.field.field_type))
|
|
310
|
-
for fv in message.field_vectors:
|
|
311
|
-
modified.add((fv.field.field, fv.field.field_type))
|
|
312
|
-
|
|
313
|
-
if message_source == BrokerMessage.MessageSource.WRITER:
|
|
314
|
-
# Any field that has fieldmetadata annotations should be considered as modified
|
|
315
|
-
# and needs to be reindexed
|
|
316
|
-
if message.HasField("basic"):
|
|
317
|
-
for ufm in message.basic.fieldmetadata:
|
|
318
|
-
modified.add((ufm.field.field, ufm.field.field_type))
|
|
302
|
+
for ufm in message.basic.fieldmetadata:
|
|
303
|
+
modified.add((ufm.field.field, ufm.field.field_type))
|
|
319
304
|
return [FieldID(field=field, field_type=field_type) for field, field_type in modified]
|
|
320
305
|
|
|
321
306
|
|
|
322
|
-
def
|
|
323
|
-
|
|
324
|
-
return messages[0].source
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
def needs_prefilter_update(messages: list[BrokerMessage]) -> bool:
|
|
328
|
-
return any(message.reindex for message in messages)
|
|
307
|
+
def needs_prefilter_update(message: BrokerMessage) -> bool:
|
|
308
|
+
return message.reindex
|
|
329
309
|
|
|
330
310
|
|
|
331
|
-
def needs_paragraphs_update(field_id: FieldID,
|
|
311
|
+
def needs_paragraphs_update(field_id: FieldID, message: BrokerMessage) -> bool:
|
|
332
312
|
return (
|
|
333
|
-
has_paragraph_annotations(field_id,
|
|
334
|
-
or has_new_extracted_text(field_id,
|
|
335
|
-
or has_new_field_metadata(field_id,
|
|
313
|
+
has_paragraph_annotations(field_id, message)
|
|
314
|
+
or has_new_extracted_text(field_id, message)
|
|
315
|
+
or has_new_field_metadata(field_id, message)
|
|
336
316
|
)
|
|
337
317
|
|
|
338
318
|
|
|
339
|
-
def has_paragraph_annotations(field_id: FieldID,
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
if len(ufm.paragraphs) > 0:
|
|
348
|
-
return True
|
|
349
|
-
return False
|
|
319
|
+
def has_paragraph_annotations(field_id: FieldID, message: BrokerMessage) -> bool:
|
|
320
|
+
ufm = next(
|
|
321
|
+
(fm for fm in message.basic.fieldmetadata if fm.field == field_id),
|
|
322
|
+
None,
|
|
323
|
+
)
|
|
324
|
+
if ufm is None:
|
|
325
|
+
return False
|
|
326
|
+
return len(ufm.paragraphs) > 0
|
|
350
327
|
|
|
351
328
|
|
|
352
329
|
def has_new_field_metadata(
|
|
353
330
|
field_id: FieldID,
|
|
354
|
-
|
|
331
|
+
message: BrokerMessage,
|
|
355
332
|
) -> bool:
|
|
356
|
-
for
|
|
357
|
-
for field_metadata in message.field_metadata:
|
|
358
|
-
if field_metadata.field == field_id:
|
|
359
|
-
return True
|
|
360
|
-
return False
|
|
333
|
+
return any(field_metadata.field == field_id for field_metadata in message.field_metadata)
|
|
361
334
|
|
|
362
335
|
|
|
363
336
|
def has_new_extracted_text(
|
|
364
337
|
field_id: FieldID,
|
|
365
|
-
|
|
338
|
+
message: BrokerMessage,
|
|
366
339
|
) -> bool:
|
|
367
|
-
for
|
|
368
|
-
for extracted_text in message.extracted_text:
|
|
369
|
-
if extracted_text.field == field_id:
|
|
370
|
-
return True
|
|
371
|
-
return False
|
|
340
|
+
return any(extracted_text.field == field_id for extracted_text in message.extracted_text)
|
|
372
341
|
|
|
373
342
|
|
|
374
343
|
def needs_texts_update(
|
|
375
344
|
field_id: FieldID,
|
|
376
|
-
|
|
345
|
+
message: BrokerMessage,
|
|
377
346
|
) -> bool:
|
|
378
|
-
return has_new_extracted_text(field_id,
|
|
347
|
+
return has_new_extracted_text(field_id, message) or has_new_field_metadata(field_id, message)
|
|
379
348
|
|
|
380
349
|
|
|
381
350
|
def needs_vectors_update(
|
|
382
351
|
field_id: FieldID,
|
|
383
|
-
|
|
352
|
+
message: BrokerMessage,
|
|
384
353
|
) -> bool:
|
|
385
|
-
for
|
|
386
|
-
for field_vectors in message.field_vectors:
|
|
387
|
-
if field_vectors.field == field_id:
|
|
388
|
-
return True
|
|
389
|
-
return False
|
|
354
|
+
return any(field_vectors.field == field_id for field_vectors in message.field_vectors)
|
|
390
355
|
|
|
391
356
|
|
|
392
357
|
def needs_relations_update(
|
|
393
358
|
field_id: FieldID,
|
|
394
|
-
|
|
359
|
+
message: BrokerMessage,
|
|
395
360
|
) -> bool:
|
|
396
|
-
return has_new_field_metadata(field_id,
|
|
361
|
+
return has_new_field_metadata(field_id, message) or has_new_extracted_text(field_id, message)
|
|
397
362
|
|
|
398
363
|
|
|
399
364
|
async def get_resource_index_message(
|
|
@@ -141,8 +141,6 @@ class Processor:
|
|
|
141
141
|
and can not use the txn id
|
|
142
142
|
"""
|
|
143
143
|
|
|
144
|
-
messages: dict[str, list[writer_pb2.BrokerMessage]]
|
|
145
|
-
|
|
146
144
|
def __init__(
|
|
147
145
|
self,
|
|
148
146
|
driver: Driver,
|
|
@@ -150,7 +148,6 @@ class Processor:
|
|
|
150
148
|
pubsub: Optional[PubSubDriver] = None,
|
|
151
149
|
partition: Optional[str] = None,
|
|
152
150
|
):
|
|
153
|
-
self.messages = {}
|
|
154
151
|
self.driver = driver
|
|
155
152
|
self.storage = storage
|
|
156
153
|
self.partition = partition
|
|
@@ -179,18 +176,12 @@ class Processor:
|
|
|
179
176
|
if message.type == writer_pb2.BrokerMessage.MessageType.DELETE:
|
|
180
177
|
await self.delete_resource(message, seqid, partition, transaction_check)
|
|
181
178
|
elif message.type == writer_pb2.BrokerMessage.MessageType.AUTOCOMMIT:
|
|
182
|
-
await self.txn(
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
# XXX Should this be removed?
|
|
189
|
-
await self.multi(message, seqid)
|
|
190
|
-
elif message.type == writer_pb2.BrokerMessage.MessageType.COMMIT:
|
|
191
|
-
await self.commit(message, seqid, partition)
|
|
192
|
-
elif message.type == writer_pb2.BrokerMessage.MessageType.ROLLBACK:
|
|
193
|
-
await self.rollback(message, seqid, partition)
|
|
179
|
+
await self.txn(message, seqid, partition, transaction_check)
|
|
180
|
+
else: # pragma: no cover
|
|
181
|
+
logger.error(
|
|
182
|
+
f"Unsupported message type: {message.type}",
|
|
183
|
+
extra={"seqid": seqid, "partition": partition},
|
|
184
|
+
)
|
|
194
185
|
|
|
195
186
|
async def get_resource_uuid(self, kb: KnowledgeBox, message: writer_pb2.BrokerMessage) -> str:
|
|
196
187
|
if message.uuid is None:
|
|
@@ -242,7 +233,6 @@ class Processor:
|
|
|
242
233
|
await self.notify_abort(
|
|
243
234
|
partition=partition,
|
|
244
235
|
seqid=seqid,
|
|
245
|
-
multi=message.multiid,
|
|
246
236
|
kbid=message.kbid,
|
|
247
237
|
rid=message.uuid,
|
|
248
238
|
source=message.source,
|
|
@@ -256,7 +246,6 @@ class Processor:
|
|
|
256
246
|
await self.notify_commit(
|
|
257
247
|
partition=partition,
|
|
258
248
|
seqid=seqid,
|
|
259
|
-
multi=message.multiid,
|
|
260
249
|
message=message,
|
|
261
250
|
write_type=writer_pb2.Notification.WriteType.DELETED,
|
|
262
251
|
)
|
|
@@ -277,15 +266,12 @@ class Processor:
|
|
|
277
266
|
@processor_observer.wrap({"type": "txn"})
|
|
278
267
|
async def txn(
|
|
279
268
|
self,
|
|
280
|
-
|
|
269
|
+
message: writer_pb2.BrokerMessage,
|
|
281
270
|
seqid: int,
|
|
282
271
|
partition: str,
|
|
283
272
|
transaction_check: bool = True,
|
|
284
273
|
) -> None:
|
|
285
|
-
|
|
286
|
-
return None
|
|
287
|
-
|
|
288
|
-
kbid = messages[0].kbid
|
|
274
|
+
kbid = message.kbid
|
|
289
275
|
if not await datamanagers.atomic.kb.exists_kb(kbid=kbid):
|
|
290
276
|
logger.info(f"KB {kbid} is deleted: skiping txn")
|
|
291
277
|
if transaction_check:
|
|
@@ -296,58 +282,55 @@ class Processor:
|
|
|
296
282
|
|
|
297
283
|
async with self.driver.rw_transaction() as txn:
|
|
298
284
|
try:
|
|
299
|
-
multi = messages[0].multiid
|
|
300
285
|
kb = KnowledgeBox(txn, self.storage, kbid)
|
|
301
|
-
uuid = await self.get_resource_uuid(kb,
|
|
286
|
+
uuid = await self.get_resource_uuid(kb, message)
|
|
287
|
+
|
|
302
288
|
resource: Optional[Resource] = None
|
|
303
289
|
handled_exception = None
|
|
304
290
|
created = False
|
|
305
291
|
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
# It's an update from processor for an existing resource
|
|
329
|
-
...
|
|
330
|
-
|
|
331
|
-
generated_fields = await get_generated_fields(message, resource)
|
|
332
|
-
if generated_fields.is_not_empty():
|
|
333
|
-
await send_generated_fields_to_process(
|
|
334
|
-
kbid, resource, generated_fields, message
|
|
335
|
-
)
|
|
336
|
-
# TODO: remove this when processor sends the field set
|
|
337
|
-
for generated_text in generated_fields.texts:
|
|
338
|
-
message.texts[
|
|
339
|
-
generated_text
|
|
340
|
-
].generated_by.data_augmentation.SetInParent()
|
|
341
|
-
|
|
292
|
+
if message.source == writer_pb2.BrokerMessage.MessageSource.WRITER:
|
|
293
|
+
resource = await kb.get(uuid)
|
|
294
|
+
if resource is None:
|
|
295
|
+
# It's a new resource
|
|
296
|
+
resource = await kb.add_resource(uuid, message.slug, message.basic)
|
|
297
|
+
created = True
|
|
298
|
+
else:
|
|
299
|
+
# It's an update from writer for an existing resource
|
|
300
|
+
...
|
|
301
|
+
|
|
302
|
+
elif message.source == writer_pb2.BrokerMessage.MessageSource.PROCESSOR:
|
|
303
|
+
resource = await kb.get(uuid)
|
|
304
|
+
if resource is None:
|
|
305
|
+
logger.info(
|
|
306
|
+
f"Processor message for resource received but the resource does not exist, ignoring.",
|
|
307
|
+
extra={
|
|
308
|
+
"kbid": kbid,
|
|
309
|
+
"rid": uuid,
|
|
310
|
+
"seqid": seqid,
|
|
311
|
+
},
|
|
312
|
+
)
|
|
313
|
+
return None
|
|
342
314
|
else:
|
|
343
|
-
|
|
315
|
+
# It's an update from processor for an existing resource
|
|
316
|
+
...
|
|
344
317
|
|
|
345
|
-
|
|
346
|
-
|
|
318
|
+
generated_fields = await get_generated_fields(message, resource)
|
|
319
|
+
if generated_fields.is_not_empty():
|
|
320
|
+
await send_generated_fields_to_process(kbid, resource, generated_fields, message)
|
|
321
|
+
# TODO: remove this when processor sends the field set
|
|
322
|
+
for generated_text in generated_fields.texts:
|
|
323
|
+
message.texts[generated_text].generated_by.data_augmentation.SetInParent()
|
|
324
|
+
|
|
325
|
+
else: # pragma: no cover
|
|
326
|
+
raise InvalidBrokerMessage(f"Unknown broker message source: {message.source}")
|
|
327
|
+
|
|
328
|
+
# apply changes from the broker message to the resource
|
|
329
|
+
await self.apply_resource(message, resource, update=(not created))
|
|
347
330
|
|
|
348
331
|
# index message
|
|
349
332
|
if resource and resource.modified:
|
|
350
|
-
index_message = await self.generate_index_message(resource,
|
|
333
|
+
index_message = await self.generate_index_message(resource, message, created)
|
|
351
334
|
try:
|
|
352
335
|
warnings = await self.index_resource(
|
|
353
336
|
index_message=index_message,
|
|
@@ -357,7 +340,7 @@ class Processor:
|
|
|
357
340
|
seqid=seqid,
|
|
358
341
|
partition=partition,
|
|
359
342
|
kb=kb,
|
|
360
|
-
source=
|
|
343
|
+
source=to_index_message_source(message),
|
|
361
344
|
)
|
|
362
345
|
# Save indexing warnings
|
|
363
346
|
for field_id, warning in warnings:
|
|
@@ -385,7 +368,6 @@ class Processor:
|
|
|
385
368
|
await self.notify_commit(
|
|
386
369
|
partition=partition,
|
|
387
370
|
seqid=seqid,
|
|
388
|
-
multi=multi,
|
|
389
371
|
message=message,
|
|
390
372
|
write_type=(
|
|
391
373
|
writer_pb2.Notification.WriteType.CREATED
|
|
@@ -398,7 +380,6 @@ class Processor:
|
|
|
398
380
|
await self.notify_abort(
|
|
399
381
|
partition=partition,
|
|
400
382
|
seqid=seqid,
|
|
401
|
-
multi=multi,
|
|
402
383
|
kbid=kbid,
|
|
403
384
|
rid=uuid,
|
|
404
385
|
source=message.source,
|
|
@@ -418,7 +399,6 @@ class Processor:
|
|
|
418
399
|
await self.notify_abort(
|
|
419
400
|
partition=partition,
|
|
420
401
|
seqid=seqid,
|
|
421
|
-
multi=multi,
|
|
422
402
|
kbid=kbid,
|
|
423
403
|
rid=uuid,
|
|
424
404
|
source=message.source,
|
|
@@ -428,11 +408,10 @@ class Processor:
|
|
|
428
408
|
# As we are in the middle of a transaction, we cannot let the exception raise directly
|
|
429
409
|
# as we need to do some cleanup. The exception will be reraised at the end of the function
|
|
430
410
|
# and then handled by the top caller, so errors can be handled in the same place.
|
|
431
|
-
await self.deadletter(
|
|
411
|
+
await self.deadletter(message, partition, seqid)
|
|
432
412
|
await self.notify_abort(
|
|
433
413
|
partition=partition,
|
|
434
414
|
seqid=seqid,
|
|
435
|
-
multi=multi,
|
|
436
415
|
kbid=kbid,
|
|
437
416
|
rid=uuid,
|
|
438
417
|
source=message.source,
|
|
@@ -518,17 +497,16 @@ class Processor:
|
|
|
518
497
|
async def generate_index_message(
|
|
519
498
|
self,
|
|
520
499
|
resource: Resource,
|
|
521
|
-
|
|
500
|
+
message: writer_pb2.BrokerMessage,
|
|
522
501
|
resource_created: bool,
|
|
523
502
|
) -> PBBrainResource:
|
|
524
503
|
builder = IndexMessageBuilder(resource)
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
return await builder.for_processor_bm(messages)
|
|
504
|
+
if message.source == writer_pb2.BrokerMessage.MessageSource.WRITER:
|
|
505
|
+
return await builder.for_writer_bm(message, resource_created)
|
|
506
|
+
elif message.source == writer_pb2.BrokerMessage.MessageSource.PROCESSOR:
|
|
507
|
+
return await builder.for_processor_bm(message)
|
|
530
508
|
else: # pragma: no cover
|
|
531
|
-
raise InvalidBrokerMessage(f"Unknown broker message source: {
|
|
509
|
+
raise InvalidBrokerMessage(f"Unknown broker message source: {message.source}")
|
|
532
510
|
|
|
533
511
|
async def external_index_delete_resource(
|
|
534
512
|
self, external_index_manager: ExternalIndexManager, resource_uuid: str
|
|
@@ -581,35 +559,8 @@ class Processor:
|
|
|
581
559
|
resource_uuid=resource_uuid, resource_data=index_message
|
|
582
560
|
)
|
|
583
561
|
|
|
584
|
-
async def
|
|
585
|
-
self.
|
|
586
|
-
|
|
587
|
-
async def commit(self, message: writer_pb2.BrokerMessage, seqid: int, partition: str) -> None:
|
|
588
|
-
if message.multiid not in self.messages:
|
|
589
|
-
# Error
|
|
590
|
-
logger.error(f"Closed multi {message.multiid}")
|
|
591
|
-
await self.deadletter([message], partition, seqid)
|
|
592
|
-
else:
|
|
593
|
-
await self.txn(self.messages[message.multiid], seqid, partition)
|
|
594
|
-
|
|
595
|
-
async def rollback(self, message: writer_pb2.BrokerMessage, seqid: int, partition: str) -> None:
|
|
596
|
-
# Error
|
|
597
|
-
logger.error(f"Closed multi {message.multiid}")
|
|
598
|
-
del self.messages[message.multiid]
|
|
599
|
-
await self.notify_abort(
|
|
600
|
-
partition=partition,
|
|
601
|
-
seqid=seqid,
|
|
602
|
-
multi=message.multiid,
|
|
603
|
-
kbid=message.kbid,
|
|
604
|
-
rid=message.uuid,
|
|
605
|
-
source=message.source,
|
|
606
|
-
)
|
|
607
|
-
|
|
608
|
-
async def deadletter(
|
|
609
|
-
self, messages: list[writer_pb2.BrokerMessage], partition: str, seqid: int
|
|
610
|
-
) -> None:
|
|
611
|
-
for seq, message in enumerate(messages):
|
|
612
|
-
await self.storage.deadletter(message, seq, seqid, partition)
|
|
562
|
+
async def deadletter(self, message: writer_pb2.BrokerMessage, partition: str, seqid: int) -> None:
|
|
563
|
+
await self.storage.deadletter(message, 0, seqid, partition)
|
|
613
564
|
|
|
614
565
|
@processor_observer.wrap({"type": "apply_resource"})
|
|
615
566
|
async def apply_resource(
|
|
@@ -669,7 +620,6 @@ class Processor:
|
|
|
669
620
|
*,
|
|
670
621
|
partition: str,
|
|
671
622
|
seqid: int,
|
|
672
|
-
multi: str,
|
|
673
623
|
message: writer_pb2.BrokerMessage,
|
|
674
624
|
write_type: writer_pb2.Notification.WriteType.ValueType,
|
|
675
625
|
):
|
|
@@ -677,7 +627,7 @@ class Processor:
|
|
|
677
627
|
notification = writer_pb2.Notification(
|
|
678
628
|
partition=int(partition),
|
|
679
629
|
seqid=seqid,
|
|
680
|
-
multi=
|
|
630
|
+
multi="",
|
|
681
631
|
uuid=message.uuid,
|
|
682
632
|
kbid=message.kbid,
|
|
683
633
|
action=writer_pb2.Notification.Action.COMMIT,
|
|
@@ -697,7 +647,6 @@ class Processor:
|
|
|
697
647
|
*,
|
|
698
648
|
partition: str,
|
|
699
649
|
seqid: int,
|
|
700
|
-
multi: str,
|
|
701
650
|
kbid: str,
|
|
702
651
|
rid: str,
|
|
703
652
|
source: writer_pb2.BrokerMessage.MessageSource.ValueType,
|
|
@@ -705,7 +654,7 @@ class Processor:
|
|
|
705
654
|
message = writer_pb2.Notification(
|
|
706
655
|
partition=int(partition),
|
|
707
656
|
seqid=seqid,
|
|
708
|
-
multi=
|
|
657
|
+
multi="",
|
|
709
658
|
uuid=rid,
|
|
710
659
|
kbid=kbid,
|
|
711
660
|
action=writer_pb2.Notification.ABORT,
|
|
@@ -758,23 +707,16 @@ class Processor:
|
|
|
758
707
|
return kbobj
|
|
759
708
|
|
|
760
709
|
|
|
761
|
-
def
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
(message.source == writer_pb2.BrokerMessage.MessageSource.PROCESSOR for message in messages)
|
|
767
|
-
)
|
|
768
|
-
if from_writer:
|
|
769
|
-
source = nodewriter_pb2.IndexMessageSource.WRITER
|
|
770
|
-
elif from_processor:
|
|
771
|
-
source = nodewriter_pb2.IndexMessageSource.PROCESSOR
|
|
710
|
+
def to_index_message_source(message: writer_pb2.BrokerMessage):
|
|
711
|
+
if message.source == writer_pb2.BrokerMessage.MessageSource.WRITER:
|
|
712
|
+
return nodewriter_pb2.IndexMessageSource.WRITER
|
|
713
|
+
elif message.source == writer_pb2.BrokerMessage.MessageSource.PROCESSOR:
|
|
714
|
+
return nodewriter_pb2.IndexMessageSource.PROCESSOR
|
|
772
715
|
else: # pragma: no cover
|
|
773
|
-
msg = "Processor received
|
|
716
|
+
msg = f"Processor received a broker message with unexpected source! {message.source}"
|
|
774
717
|
logger.error(msg)
|
|
775
718
|
errors.capture_exception(Exception(msg))
|
|
776
|
-
|
|
777
|
-
return source
|
|
719
|
+
return nodewriter_pb2.IndexMessageSource.PROCESSOR
|
|
778
720
|
|
|
779
721
|
|
|
780
722
|
def has_vectors_operation(index_message: PBBrainResource) -> bool:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nucliadb
|
|
3
|
-
Version: 6.9.0.
|
|
3
|
+
Version: 6.9.0.post5018
|
|
4
4
|
Summary: NucliaDB
|
|
5
5
|
Author-email: Nuclia <nucliadb@nuclia.com>
|
|
6
6
|
License-Expression: AGPL-3.0-or-later
|
|
@@ -19,11 +19,11 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
19
19
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
20
20
|
Requires-Python: <4,>=3.9
|
|
21
21
|
Description-Content-Type: text/markdown
|
|
22
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.9.0.
|
|
23
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.9.0.
|
|
24
|
-
Requires-Dist: nucliadb-protos>=6.9.0.
|
|
25
|
-
Requires-Dist: nucliadb-models>=6.9.0.
|
|
26
|
-
Requires-Dist: nidx-protos>=6.9.0.
|
|
22
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.9.0.post5018
|
|
23
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.9.0.post5018
|
|
24
|
+
Requires-Dist: nucliadb-protos>=6.9.0.post5018
|
|
25
|
+
Requires-Dist: nucliadb-models>=6.9.0.post5018
|
|
26
|
+
Requires-Dist: nidx-protos>=6.9.0.post5018
|
|
27
27
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
|
28
28
|
Requires-Dist: nuclia-models>=0.50.0
|
|
29
29
|
Requires-Dist: uvicorn[standard]
|
|
@@ -165,7 +165,7 @@ nucliadb/ingest/orm/brain_v2.py,sha256=8MAo1N_nhoGy73TvKBuaw-NaMxIanRPCNttw6dFY4
|
|
|
165
165
|
nucliadb/ingest/orm/broker_message.py,sha256=XWaiZgDOz94NPOPT-hqbRr5ZkpVimUw6PjUJNftfoVw,7514
|
|
166
166
|
nucliadb/ingest/orm/entities.py,sha256=kXyeF6XOpFKhEsGLcY-GLIk21Exp0cJst4XQQ9jJoug,14791
|
|
167
167
|
nucliadb/ingest/orm/exceptions.py,sha256=gsp7TtVNQPiIEh-zf_UEJClwuFU0iu-5vzj0OrKMScg,1550
|
|
168
|
-
nucliadb/ingest/orm/index_message.py,sha256=
|
|
168
|
+
nucliadb/ingest/orm/index_message.py,sha256=DlGLuuuCsXR_rqxd6CEZMYuOx1TIiq1mR5ue114rGUk,14473
|
|
169
169
|
nucliadb/ingest/orm/knowledgebox.py,sha256=OG9dmfklYf1PgTHwQd_iFZOociLEvUSMMv1ZKeUgecE,23910
|
|
170
170
|
nucliadb/ingest/orm/metrics.py,sha256=OiuggTh-n3kZHA2G73NEUdIlh8c3yFrbusI88DK-Mko,1273
|
|
171
171
|
nucliadb/ingest/orm/resource.py,sha256=zQeZyZ-tCxr-DhonLobfZRkz_iEew0Y-cGfXeNNIHG0,40432
|
|
@@ -173,7 +173,7 @@ nucliadb/ingest/orm/utils.py,sha256=fCQRuyecgqhaY7mcBG93oaXMkzkKb9BFjOcy4-ZiSNw,
|
|
|
173
173
|
nucliadb/ingest/orm/processor/__init__.py,sha256=xhDNKCxY0XNOlIVKEtM8QT75vDUkJIt7K-_VgGbbOQU,904
|
|
174
174
|
nucliadb/ingest/orm/processor/auditing.py,sha256=gxn5v30KVaH0TnIjo715mWjzKGJ-DMviElEXJG9BNN4,4612
|
|
175
175
|
nucliadb/ingest/orm/processor/data_augmentation.py,sha256=v-pj4GbBWSuO8dQyahs5UDr5ghsyfhCZDS0ftKd6ZYc,5179
|
|
176
|
-
nucliadb/ingest/orm/processor/processor.py,sha256=
|
|
176
|
+
nucliadb/ingest/orm/processor/processor.py,sha256=3wlGiCXXaVpqLyCJ3AHWHfpKTwtywU78_LkNtpqOiuQ,31391
|
|
177
177
|
nucliadb/ingest/orm/processor/sequence_manager.py,sha256=kUH0bCuM6NqpA0xSwfyb9igig3Btu57pc8VYnKggqx4,1693
|
|
178
178
|
nucliadb/ingest/service/__init__.py,sha256=LHQFUkdmNBOWqBG0Md9sMMI7g5TQZ-hLAnhw6ZblrJg,2002
|
|
179
179
|
nucliadb/ingest/service/exceptions.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
|
@@ -384,8 +384,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
|
|
|
384
384
|
nucliadb/writer/tus/s3.py,sha256=vu1BGg4VqJ_x2P1u2BxqPKlSfw5orT_a3R-Ln5oPUpU,8483
|
|
385
385
|
nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
|
|
386
386
|
nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
|
|
387
|
-
nucliadb-6.9.0.
|
|
388
|
-
nucliadb-6.9.0.
|
|
389
|
-
nucliadb-6.9.0.
|
|
390
|
-
nucliadb-6.9.0.
|
|
391
|
-
nucliadb-6.9.0.
|
|
387
|
+
nucliadb-6.9.0.post5018.dist-info/METADATA,sha256=iWXIog9j2LOgX1hJGg8ECJs3fUuGQgTBvQ7oFrW4Ip0,4158
|
|
388
|
+
nucliadb-6.9.0.post5018.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
389
|
+
nucliadb-6.9.0.post5018.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
|
|
390
|
+
nucliadb-6.9.0.post5018.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
|
|
391
|
+
nucliadb-6.9.0.post5018.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|