nucliadb 6.6.0.post4546__py3-none-any.whl → 6.6.1.post4568__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -107,6 +107,9 @@ class CappedPromptContext:
107
107
  def __getitem__(self, key: str) -> str:
108
108
  return self.output.__getitem__(key)
109
109
 
110
+ def __contains__(self, key: str) -> bool:
111
+ return key in self.output
112
+
110
113
  def __delitem__(self, key: str) -> None:
111
114
  try:
112
115
  self.output.__delitem__(key)
@@ -395,7 +398,10 @@ def parse_text_block_id(text_block_id: str) -> TextBlockId:
395
398
 
396
399
 
397
400
  async def extend_prompt_context_with_origin_metadata(
398
- context, kbid, text_block_ids: list[TextBlockId], augmented_context: AugmentedContext
401
+ context: CappedPromptContext,
402
+ kbid,
403
+ text_block_ids: list[TextBlockId],
404
+ augmented_context: AugmentedContext,
399
405
  ):
400
406
  async def _get_origin(kbid: str, rid: str) -> tuple[str, Optional[Origin]]:
401
407
  origin = None
@@ -411,7 +417,7 @@ async def extend_prompt_context_with_origin_metadata(
411
417
  rid_to_origin = {rid: origin for rid, origin in origins if origin is not None}
412
418
  for tb_id in text_block_ids:
413
419
  origin = rid_to_origin.get(tb_id.rid)
414
- if origin is not None and tb_id.full() in context.output:
420
+ if origin is not None and tb_id.full() in context:
415
421
  text = context.output.pop(tb_id.full())
416
422
  extended_text = text + f"\n\nDOCUMENT METADATA AT ORIGIN:\n{to_yaml(origin)}"
417
423
  context[tb_id.full()] = extended_text
@@ -424,7 +430,10 @@ async def extend_prompt_context_with_origin_metadata(
424
430
 
425
431
 
426
432
  async def extend_prompt_context_with_classification_labels(
427
- context, kbid, text_block_ids: list[TextBlockId], augmented_context: AugmentedContext
433
+ context: CappedPromptContext,
434
+ kbid: str,
435
+ text_block_ids: list[TextBlockId],
436
+ augmented_context: AugmentedContext,
428
437
  ):
429
438
  async def _get_labels(kbid: str, _id: TextBlockId) -> tuple[TextBlockId, list[tuple[str, str]]]:
430
439
  fid = _id if isinstance(_id, FieldId) else _id.field_id
@@ -449,7 +458,7 @@ async def extend_prompt_context_with_classification_labels(
449
458
  tb_id_to_labels = {tb_id: labels for tb_id, labels in classif_labels if len(labels) > 0}
450
459
  for tb_id in text_block_ids:
451
460
  labels = tb_id_to_labels.get(tb_id)
452
- if labels is not None and tb_id.full() in context.output:
461
+ if labels is not None and tb_id.full() in context:
453
462
  text = context.output.pop(tb_id.full())
454
463
 
455
464
  labels_text = "DOCUMENT CLASSIFICATION LABELS:"
@@ -467,7 +476,10 @@ async def extend_prompt_context_with_classification_labels(
467
476
 
468
477
 
469
478
  async def extend_prompt_context_with_ner(
470
- context, kbid, text_block_ids: list[TextBlockId], augmented_context: AugmentedContext
479
+ context: CappedPromptContext,
480
+ kbid: str,
481
+ text_block_ids: list[TextBlockId],
482
+ augmented_context: AugmentedContext,
471
483
  ):
472
484
  async def _get_ners(kbid: str, _id: TextBlockId) -> tuple[TextBlockId, dict[str, set[str]]]:
473
485
  fid = _id if isinstance(_id, FieldId) else _id.field_id
@@ -494,7 +506,7 @@ async def extend_prompt_context_with_ner(
494
506
  tb_id_to_ners = {tb_id: ners for tb_id, ners in nerss if len(ners) > 0}
495
507
  for tb_id in text_block_ids:
496
508
  ners = tb_id_to_ners.get(tb_id)
497
- if ners is not None and tb_id.full() in context.output:
509
+ if ners is not None and tb_id.full() in context:
498
510
  text = context.output.pop(tb_id.full())
499
511
 
500
512
  ners_text = "DOCUMENT NAMED ENTITIES (NERs):"
@@ -515,7 +527,10 @@ async def extend_prompt_context_with_ner(
515
527
 
516
528
 
517
529
  async def extend_prompt_context_with_extra_metadata(
518
- context, kbid, text_block_ids: list[TextBlockId], augmented_context: AugmentedContext
530
+ context: CappedPromptContext,
531
+ kbid: str,
532
+ text_block_ids: list[TextBlockId],
533
+ augmented_context: AugmentedContext,
519
534
  ):
520
535
  async def _get_extra(kbid: str, rid: str) -> tuple[str, Optional[Extra]]:
521
536
  extra = None
@@ -531,7 +546,7 @@ async def extend_prompt_context_with_extra_metadata(
531
546
  rid_to_extra = {rid: extra for rid, extra in extras if extra is not None}
532
547
  for tb_id in text_block_ids:
533
548
  extra = rid_to_extra.get(tb_id.rid)
534
- if extra is not None and tb_id.full() in context.output:
549
+ if extra is not None and tb_id.full() in context:
535
550
  text = context.output.pop(tb_id.full())
536
551
  extended_text = text + f"\n\nDOCUMENT EXTRA METADATA:\n{to_yaml(extra)}"
537
552
  context[tb_id.full()] = extended_text
@@ -600,7 +615,7 @@ async def field_extension_prompt_context(
600
615
  if tb_id.startswith(field.full()):
601
616
  del context[tb_id]
602
617
  # Add the extracted text of each field to the beginning of the context.
603
- if field.full() not in context.output:
618
+ if field.full() not in context:
604
619
  context[field.full()] = extracted_text
605
620
  augmented_context.fields[field.full()] = AugmentedTextBlock(
606
621
  id=field.full(),
@@ -610,7 +625,7 @@ async def field_extension_prompt_context(
610
625
 
611
626
  # Add the extracted text of each paragraph to the end of the context.
612
627
  for paragraph in ordered_paragraphs:
613
- if paragraph.id not in context.output:
628
+ if paragraph.id not in context:
614
629
  context[paragraph.id] = _clean_paragraph_text(paragraph)
615
630
 
616
631
 
@@ -668,7 +683,7 @@ async def neighbouring_paragraphs_prompt_context(
668
683
  if field_extracted_text is None:
669
684
  continue
670
685
  ptext = _get_paragraph_text(field_extracted_text, pid)
671
- if ptext:
686
+ if ptext and pid.full() not in context:
672
687
  context[pid.full()] = ptext
673
688
 
674
689
  # Now add the neighbouring paragraphs
@@ -702,8 +717,8 @@ async def neighbouring_paragraphs_prompt_context(
702
717
  npid = field_pids[neighbour_index]
703
718
  except IndexError:
704
719
  continue
705
- if npid in retrieved_paragraphs_ids or npid.full() in context.output:
706
- # Already added above
720
+ if npid in retrieved_paragraphs_ids or npid.full() in context:
721
+ # Already added
707
722
  continue
708
723
  ptext = _get_paragraph_text(field_extracted_text, npid)
709
724
  if not ptext:
@@ -742,7 +757,8 @@ async def conversation_prompt_context(
742
757
  storage = await get_storage()
743
758
  kb = KnowledgeBoxORM(txn, storage, kbid)
744
759
  for paragraph in ordered_paragraphs:
745
- context[paragraph.id] = _clean_paragraph_text(paragraph)
760
+ if paragraph.id not in context:
761
+ context[paragraph.id] = _clean_paragraph_text(paragraph)
746
762
 
747
763
  # If the paragraph is a conversation and it matches semantically, we assume we
748
764
  # have matched with the question, therefore try to include the answer to the
@@ -780,7 +796,7 @@ async def conversation_prompt_context(
780
796
  text = message.content.text.strip()
781
797
  pid = f"{rid}/{field_type}/{field_id}/{ident}/0-{len(text) + 1}"
782
798
  attachments.extend(message.content.attachments_fields)
783
- if pid in context.output:
799
+ if pid in context:
784
800
  continue
785
801
  context[pid] = text
786
802
  augmented_context.paragraphs[pid] = AugmentedTextBlock(
@@ -802,7 +818,7 @@ async def conversation_prompt_context(
802
818
  text = message.content.text.strip()
803
819
  attachments.extend(message.content.attachments_fields)
804
820
  pid = f"{rid}/{field_type}/{field_id}/{ident}/0-{len(text) + 1}"
805
- if pid in context.output:
821
+ if pid in context:
806
822
  continue
807
823
  context[pid] = text
808
824
  augmented_context.paragraphs[pid] = AugmentedTextBlock(
@@ -834,7 +850,7 @@ async def conversation_prompt_context(
834
850
  text = message.content.text.strip()
835
851
  attachments.extend(message.content.attachments_fields)
836
852
  pid = f"{rid}/{field_type}/{field_id}/{message.ident}/0-{len(message.content.text) + 1}"
837
- if pid in context.output:
853
+ if pid in context:
838
854
  continue
839
855
  context[pid] = text
840
856
  augmented_context.paragraphs[pid] = AugmentedTextBlock(
@@ -854,7 +870,7 @@ async def conversation_prompt_context(
854
870
  extracted_text = await field.get_extracted_text()
855
871
  if extracted_text is not None:
856
872
  pid = f"{rid}/{field_type}/{attachment.field_id}/0-{len(extracted_text.text) + 1}"
857
- if pid in context.output:
873
+ if pid in context:
858
874
  continue
859
875
  text = f"Attachment {attachment.field_id}: {extracted_text.text}\n\n"
860
876
  context[pid] = text
@@ -977,9 +993,9 @@ async def hierarchy_prompt_context(
977
993
  paragraph_text = _clean_paragraph_text(paragraph)
978
994
  context[paragraph.id] = paragraph_text
979
995
  if paragraph.id in augmented_paragraphs:
980
- field_id = ParagraphId.from_string(paragraph.id).field_id.full()
981
- augmented_context.fields[field_id] = AugmentedTextBlock(
982
- id=field_id, text=paragraph_text, augmentation_type=TextBlockAugmentationType.HIERARCHY
996
+ pid = ParagraphId.from_string(paragraph.id)
997
+ augmented_context.paragraphs[pid.full()] = AugmentedTextBlock(
998
+ id=pid.full(), text=paragraph_text, augmentation_type=TextBlockAugmentationType.HIERARCHY
983
999
  )
984
1000
  return
985
1001
 
@@ -19,7 +19,7 @@
19
19
  #
20
20
  import json
21
21
  from enum import Enum
22
- from typing import Any, AsyncIterable, Optional, Union
22
+ from typing import Any, Optional, Union
23
23
 
24
24
  import aiohttp
25
25
  from fastapi.datastructures import QueryParams
@@ -171,21 +171,13 @@ async def chat_streaming_generator(
171
171
  user_query: str,
172
172
  is_json: bool,
173
173
  ):
174
- stream: AsyncIterable[bytes]
175
- if is_json:
176
- # ndjson: stream lines
177
- stream = predict_response.content
178
- else:
179
- # plain text: stream chunks (last chunk is status)
180
- stream = predict_response.content.iter_any()
181
-
182
174
  first = True
183
175
  status_code = AnswerStatusCode.ERROR.value
184
176
  text_answer = ""
185
177
  json_object = None
186
178
  metrics = AskMetrics()
187
179
  with metrics.time(PREDICT_ANSWER_METRIC):
188
- async for chunk in stream:
180
+ async for chunk in predict_response.content:
189
181
  if first:
190
182
  metrics.record_first_chunk_yielded()
191
183
  first = False
@@ -211,7 +203,11 @@ async def chat_streaming_generator(
211
203
 
212
204
  if is_json is False and chunk: # Ensure chunk is not empty before decoding
213
205
  # If response is text the status_code comes at the last chunk of data
214
- status_code = chunk.decode()
206
+ last_chunk = chunk.decode()
207
+ if last_chunk[-1] == "0":
208
+ status_code = "0"
209
+ else:
210
+ status_code = last_chunk[-2:]
215
211
 
216
212
  audit_predict_proxy_endpoint(
217
213
  headers=predict_response.headers,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nucliadb
3
- Version: 6.6.0.post4546
3
+ Version: 6.6.1.post4568
4
4
  Summary: NucliaDB
5
5
  Author-email: Nuclia <nucliadb@nuclia.com>
6
6
  License-Expression: AGPL-3.0-or-later
@@ -19,11 +19,11 @@ Classifier: Programming Language :: Python :: 3.12
19
19
  Classifier: Programming Language :: Python :: 3 :: Only
20
20
  Requires-Python: <4,>=3.9
21
21
  Description-Content-Type: text/markdown
22
- Requires-Dist: nucliadb-telemetry[all]>=6.6.0.post4546
23
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.6.0.post4546
24
- Requires-Dist: nucliadb-protos>=6.6.0.post4546
25
- Requires-Dist: nucliadb-models>=6.6.0.post4546
26
- Requires-Dist: nidx-protos>=6.6.0.post4546
22
+ Requires-Dist: nucliadb-telemetry[all]>=6.6.1.post4568
23
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.6.1.post4568
24
+ Requires-Dist: nucliadb-protos>=6.6.1.post4568
25
+ Requires-Dist: nucliadb-models>=6.6.1.post4568
26
+ Requires-Dist: nidx-protos>=6.6.1.post4568
27
27
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
28
28
  Requires-Dist: nuclia-models>=0.24.2
29
29
  Requires-Dist: uvicorn[standard]
@@ -255,7 +255,7 @@ nucliadb/search/search/merge.py,sha256=XiRBsxhYPshPV7lZXD-9E259KZOPIf4I2tKosY0lP
255
255
  nucliadb/search/search/metrics.py,sha256=3I6IN0qDSmqIvUaWJmT3rt-Jyjs6LcvnKI8ZqCiuJPY,3501
256
256
  nucliadb/search/search/paragraphs.py,sha256=pNAEiYqJGGUVcEf7xf-PFMVqz0PX4Qb-WNG-_zPGN2o,7799
257
257
  nucliadb/search/search/pgcatalog.py,sha256=_AiyW6it66UX6BsZbM3-230IQhiEG4utoKYboviyOFI,16799
258
- nucliadb/search/search/predict_proxy.py,sha256=Q12I3VIAQqFgzBe9UeVEiAjUAdVT8NBfNDXWiP-pn1M,8858
258
+ nucliadb/search/search/predict_proxy.py,sha256=jJj9LrxBOFo5AMn_EB7MNo1hqiTN-PkeBYINIpVKzl8,8735
259
259
  nucliadb/search/search/query.py,sha256=0qIQdt548L3jtKOyKo06aGJ73SLBxAW3N38_Hc1M3Uw,11528
260
260
  nucliadb/search/search/rank_fusion.py,sha256=xZtXhbmKb_56gs73u6KkFm2efvTATOSMmpOV2wrAIqE,9613
261
261
  nucliadb/search/search/rerankers.py,sha256=E2J1QdKAojqbhHM3KAyaOXKf6tJyETUxKs4tf_BEyqk,7472
@@ -266,7 +266,7 @@ nucliadb/search/search/chat/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn
266
266
  nucliadb/search/search/chat/ask.py,sha256=0sgfiCbNaCZrTvYaRGtf5xL6VnzRgzofINiEP4IvhWs,38278
267
267
  nucliadb/search/search/chat/exceptions.py,sha256=Siy4GXW2L7oPhIR86H3WHBhE9lkV4A4YaAszuGGUf54,1356
268
268
  nucliadb/search/search/chat/images.py,sha256=PA8VWxT5_HUGfW1ULhKTK46UBsVyINtWWqEM1ulzX1E,3095
269
- nucliadb/search/search/chat/prompt.py,sha256=SNsCtB9mZTODjnUMAH8YfPxn05Kjl2d5xTIteNxyVcI,52783
269
+ nucliadb/search/search/chat/prompt.py,sha256=QwHULUDqe_pS2HZvQH1vzqpYEHQG_-UagXCNtLLtJEI,52997
270
270
  nucliadb/search/search/chat/query.py,sha256=3jMPNbiFEOoS0ydMOPYkSx1qVlvAv51npzadWXDwkMs,16650
271
271
  nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
272
272
  nucliadb/search/search/query_parser/exceptions.py,sha256=sVl9gRNzhE-s480LBBVkiXzNRbKhYRQN5F3it5tNNp8,939
@@ -375,8 +375,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
375
375
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
376
376
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
377
377
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
378
- nucliadb-6.6.0.post4546.dist-info/METADATA,sha256=zs4NGKqWTtCkWXM1xaPWUC6ejZFyPIDyVNbk5ff3rfM,4158
379
- nucliadb-6.6.0.post4546.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
380
- nucliadb-6.6.0.post4546.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
381
- nucliadb-6.6.0.post4546.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
382
- nucliadb-6.6.0.post4546.dist-info/RECORD,,
378
+ nucliadb-6.6.1.post4568.dist-info/METADATA,sha256=69Tlgt2FRwmj4e1dsiHz-ps5GGEJxz_zyWnnDXMrKcg,4158
379
+ nucliadb-6.6.1.post4568.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
380
+ nucliadb-6.6.1.post4568.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
381
+ nucliadb-6.6.1.post4568.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
382
+ nucliadb-6.6.1.post4568.dist-info/RECORD,,