PyPI - nucliadb - Versions diffs - 6.6.0.post4546__py3-none-any.whl → 6.6.1.post4568__py3-none-any.whl - Mend

nucliadb 6.6.0.post4546py3-none-any.whl → 6.6.1.post4568py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

nucliadb/search/search/chat/prompt.py CHANGED Viewed

@@ -107,6 +107,9 @@ class CappedPromptContext:
     def __getitem__(self, key: str) -> str:
         return self.output.__getitem__(key)
+    def __contains__(self, key: str) -> bool:
+        return key in self.output
     def __delitem__(self, key: str) -> None:
         try:
             self.output.__delitem__(key)
@@ -395,7 +398,10 @@ def parse_text_block_id(text_block_id: str) -> TextBlockId:
 async def extend_prompt_context_with_origin_metadata(
-    context, kbid, text_block_ids: list[TextBlockId], augmented_context: AugmentedContext
+    context: CappedPromptContext,
+    kbid,
+    text_block_ids: list[TextBlockId],
+    augmented_context: AugmentedContext,
 ):
     async def _get_origin(kbid: str, rid: str) -> tuple[str, Optional[Origin]]:
         origin = None
@@ -411,7 +417,7 @@ async def extend_prompt_context_with_origin_metadata(
     rid_to_origin = {rid: origin for rid, origin in origins if origin is not None}
     for tb_id in text_block_ids:
         origin = rid_to_origin.get(tb_id.rid)
-        if origin is not None and tb_id.full() in context.output:
+        if origin is not None and tb_id.full() in context:
             text = context.output.pop(tb_id.full())
             extended_text = text + f"\n\nDOCUMENT METADATA AT ORIGIN:\n{to_yaml(origin)}"
             context[tb_id.full()] = extended_text
@@ -424,7 +430,10 @@ async def extend_prompt_context_with_origin_metadata(
 async def extend_prompt_context_with_classification_labels(
-    context, kbid, text_block_ids: list[TextBlockId], augmented_context: AugmentedContext
+    context: CappedPromptContext,
+    kbid: str,
+    text_block_ids: list[TextBlockId],
+    augmented_context: AugmentedContext,
 ):
     async def _get_labels(kbid: str, _id: TextBlockId) -> tuple[TextBlockId, list[tuple[str, str]]]:
         fid = _id if isinstance(_id, FieldId) else _id.field_id
@@ -449,7 +458,7 @@ async def extend_prompt_context_with_classification_labels(
     tb_id_to_labels = {tb_id: labels for tb_id, labels in classif_labels if len(labels) > 0}
     for tb_id in text_block_ids:
         labels = tb_id_to_labels.get(tb_id)
-        if labels is not None and tb_id.full() in context.output:
+        if labels is not None and tb_id.full() in context:
             text = context.output.pop(tb_id.full())
             labels_text = "DOCUMENT CLASSIFICATION LABELS:"
@@ -467,7 +476,10 @@ async def extend_prompt_context_with_classification_labels(
 async def extend_prompt_context_with_ner(
-    context, kbid, text_block_ids: list[TextBlockId], augmented_context: AugmentedContext
+    context: CappedPromptContext,
+    kbid: str,
+    text_block_ids: list[TextBlockId],
+    augmented_context: AugmentedContext,
 ):
     async def _get_ners(kbid: str, _id: TextBlockId) -> tuple[TextBlockId, dict[str, set[str]]]:
         fid = _id if isinstance(_id, FieldId) else _id.field_id
@@ -494,7 +506,7 @@ async def extend_prompt_context_with_ner(
     tb_id_to_ners = {tb_id: ners for tb_id, ners in nerss if len(ners) > 0}
     for tb_id in text_block_ids:
         ners = tb_id_to_ners.get(tb_id)
-        if ners is not None and tb_id.full() in context.output:
+        if ners is not None and tb_id.full() in context:
             text = context.output.pop(tb_id.full())
             ners_text = "DOCUMENT NAMED ENTITIES (NERs):"
@@ -515,7 +527,10 @@ async def extend_prompt_context_with_ner(
 async def extend_prompt_context_with_extra_metadata(
-    context, kbid, text_block_ids: list[TextBlockId], augmented_context: AugmentedContext
+    context: CappedPromptContext,
+    kbid: str,
+    text_block_ids: list[TextBlockId],
+    augmented_context: AugmentedContext,
 ):
     async def _get_extra(kbid: str, rid: str) -> tuple[str, Optional[Extra]]:
         extra = None
@@ -531,7 +546,7 @@ async def extend_prompt_context_with_extra_metadata(
     rid_to_extra = {rid: extra for rid, extra in extras if extra is not None}
     for tb_id in text_block_ids:
         extra = rid_to_extra.get(tb_id.rid)
-        if extra is not None and tb_id.full() in context.output:
+        if extra is not None and tb_id.full() in context:
             text = context.output.pop(tb_id.full())
             extended_text = text + f"\n\nDOCUMENT EXTRA METADATA:\n{to_yaml(extra)}"
             context[tb_id.full()] = extended_text
@@ -600,7 +615,7 @@ async def field_extension_prompt_context(
             if tb_id.startswith(field.full()):
                 del context[tb_id]
         # Add the extracted text of each field to the beginning of the context.
-        if field.full() not in context.output:
+        if field.full() not in context:
             context[field.full()] = extracted_text
             augmented_context.fields[field.full()] = AugmentedTextBlock(
                 id=field.full(),
@@ -610,7 +625,7 @@ async def field_extension_prompt_context(
     # Add the extracted text of each paragraph to the end of the context.
     for paragraph in ordered_paragraphs:
-        if paragraph.id not in context.output:
+        if paragraph.id not in context:
             context[paragraph.id] = _clean_paragraph_text(paragraph)
@@ -668,7 +683,7 @@ async def neighbouring_paragraphs_prompt_context(
         if field_extracted_text is None:
             continue
         ptext = _get_paragraph_text(field_extracted_text, pid)
-        if ptext:
+        if ptext and pid.full() not in context:
             context[pid.full()] = ptext
         # Now add the neighbouring paragraphs
@@ -702,8 +717,8 @@ async def neighbouring_paragraphs_prompt_context(
                 npid = field_pids[neighbour_index]
             except IndexError:
                 continue
-            if npid in retrieved_paragraphs_ids or npid.full() in context.output:
-                # Already added above
+            if npid in retrieved_paragraphs_ids or npid.full() in context:
+                # Already added
                 continue
             ptext = _get_paragraph_text(field_extracted_text, npid)
             if not ptext:
@@ -742,7 +757,8 @@ async def conversation_prompt_context(
         storage = await get_storage()
         kb = KnowledgeBoxORM(txn, storage, kbid)
         for paragraph in ordered_paragraphs:
-            context[paragraph.id] = _clean_paragraph_text(paragraph)
+            if paragraph.id not in context:
+                context[paragraph.id] = _clean_paragraph_text(paragraph)
             # If the paragraph is a conversation and it matches semantically, we assume we
             # have matched with the question, therefore try to include the answer to the
@@ -780,7 +796,7 @@ async def conversation_prompt_context(
                                 text = message.content.text.strip()
                             pid = f"{rid}/{field_type}/{field_id}/{ident}/0-{len(text) + 1}"
                             attachments.extend(message.content.attachments_fields)
-                            if pid in context.output:
+                            if pid in context:
                                 continue
                             context[pid] = text
                             augmented_context.paragraphs[pid] = AugmentedTextBlock(
@@ -802,7 +818,7 @@ async def conversation_prompt_context(
                             text = message.content.text.strip()
                         attachments.extend(message.content.attachments_fields)
                         pid = f"{rid}/{field_type}/{field_id}/{ident}/0-{len(text) + 1}"
-                        if pid in context.output:
+                        if pid in context:
                             continue
                         context[pid] = text
                         augmented_context.paragraphs[pid] = AugmentedTextBlock(
@@ -834,7 +850,7 @@ async def conversation_prompt_context(
                         text = message.content.text.strip()
                         attachments.extend(message.content.attachments_fields)
                         pid = f"{rid}/{field_type}/{field_id}/{message.ident}/0-{len(message.content.text) + 1}"
-                        if pid in context.output:
+                        if pid in context:
                             continue
                         context[pid] = text
                         augmented_context.paragraphs[pid] = AugmentedTextBlock(
@@ -854,7 +870,7 @@ async def conversation_prompt_context(
                         extracted_text = await field.get_extracted_text()
                         if extracted_text is not None:
                             pid = f"{rid}/{field_type}/{attachment.field_id}/0-{len(extracted_text.text) + 1}"
-                            if pid in context.output:
+                            if pid in context:
                                 continue
                             text = f"Attachment {attachment.field_id}: {extracted_text.text}\n\n"
                             context[pid] = text
@@ -977,9 +993,9 @@ async def hierarchy_prompt_context(
         paragraph_text = _clean_paragraph_text(paragraph)
         context[paragraph.id] = paragraph_text
         if paragraph.id in augmented_paragraphs:
-            field_id = ParagraphId.from_string(paragraph.id).field_id.full()
-            augmented_context.fields[field_id] = AugmentedTextBlock(
-                id=field_id, text=paragraph_text, augmentation_type=TextBlockAugmentationType.HIERARCHY
+            pid = ParagraphId.from_string(paragraph.id)
+            augmented_context.paragraphs[pid.full()] = AugmentedTextBlock(
+                id=pid.full(), text=paragraph_text, augmentation_type=TextBlockAugmentationType.HIERARCHY
             )
     return

nucliadb/search/search/predict_proxy.py CHANGED Viewed

@@ -19,7 +19,7 @@
 #
 import json
 from enum import Enum
-from typing import Any, AsyncIterable, Optional, Union
+from typing import Any, Optional, Union
 import aiohttp
 from fastapi.datastructures import QueryParams
@@ -171,21 +171,13 @@ async def chat_streaming_generator(
     user_query: str,
     is_json: bool,
 ):
-    stream: AsyncIterable[bytes]
-    if is_json:
-        # ndjson: stream lines
-        stream = predict_response.content
-    else:
-        # plain text: stream chunks (last chunk is status)
-        stream = predict_response.content.iter_any()
     first = True
     status_code = AnswerStatusCode.ERROR.value
     text_answer = ""
     json_object = None
     metrics = AskMetrics()
     with metrics.time(PREDICT_ANSWER_METRIC):
-        async for chunk in stream:
+        async for chunk in predict_response.content:
             if first:
                 metrics.record_first_chunk_yielded()
                 first = False
@@ -211,7 +203,11 @@ async def chat_streaming_generator(
     if is_json is False and chunk:  # Ensure chunk is not empty before decoding
         # If response is text the status_code comes at the last chunk of data
-        status_code = chunk.decode()
+        last_chunk = chunk.decode()
+        if last_chunk[-1] == "0":
+            status_code = "0"
+        else:
+            status_code = last_chunk[-2:]
     audit_predict_proxy_endpoint(
         headers=predict_response.headers,

{nucliadb-6.6.0.post4546.dist-info → nucliadb-6.6.1.post4568.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nucliadb
-Version: 6.6.0.post4546
+Version: 6.6.1.post4568
 Summary: NucliaDB
 Author-email: Nuclia <nucliadb@nuclia.com>
 License-Expression: AGPL-3.0-or-later
@@ -19,11 +19,11 @@ Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3 :: Only
 Requires-Python: <4,>=3.9
 Description-Content-Type: text/markdown
-Requires-Dist: nucliadb-telemetry[all]>=6.6.0.post4546
-Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.6.0.post4546
-Requires-Dist: nucliadb-protos>=6.6.0.post4546
-Requires-Dist: nucliadb-models>=6.6.0.post4546
-Requires-Dist: nidx-protos>=6.6.0.post4546
+Requires-Dist: nucliadb-telemetry[all]>=6.6.1.post4568
+Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.6.1.post4568
+Requires-Dist: nucliadb-protos>=6.6.1.post4568
+Requires-Dist: nucliadb-models>=6.6.1.post4568
+Requires-Dist: nidx-protos>=6.6.1.post4568
 Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
 Requires-Dist: nuclia-models>=0.24.2
 Requires-Dist: uvicorn[standard]

{nucliadb-6.6.0.post4546.dist-info → nucliadb-6.6.1.post4568.dist-info}/RECORD RENAMED Viewed

@@ -255,7 +255,7 @@ nucliadb/search/search/merge.py,sha256=XiRBsxhYPshPV7lZXD-9E259KZOPIf4I2tKosY0lP
 nucliadb/search/search/metrics.py,sha256=3I6IN0qDSmqIvUaWJmT3rt-Jyjs6LcvnKI8ZqCiuJPY,3501
 nucliadb/search/search/paragraphs.py,sha256=pNAEiYqJGGUVcEf7xf-PFMVqz0PX4Qb-WNG-_zPGN2o,7799
 nucliadb/search/search/pgcatalog.py,sha256=_AiyW6it66UX6BsZbM3-230IQhiEG4utoKYboviyOFI,16799
-nucliadb/search/search/predict_proxy.py,sha256=Q12I3VIAQqFgzBe9UeVEiAjUAdVT8NBfNDXWiP-pn1M,8858
+nucliadb/search/search/predict_proxy.py,sha256=jJj9LrxBOFo5AMn_EB7MNo1hqiTN-PkeBYINIpVKzl8,8735
 nucliadb/search/search/query.py,sha256=0qIQdt548L3jtKOyKo06aGJ73SLBxAW3N38_Hc1M3Uw,11528
 nucliadb/search/search/rank_fusion.py,sha256=xZtXhbmKb_56gs73u6KkFm2efvTATOSMmpOV2wrAIqE,9613
 nucliadb/search/search/rerankers.py,sha256=E2J1QdKAojqbhHM3KAyaOXKf6tJyETUxKs4tf_BEyqk,7472
@@ -266,7 +266,7 @@ nucliadb/search/search/chat/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn
 nucliadb/search/search/chat/ask.py,sha256=0sgfiCbNaCZrTvYaRGtf5xL6VnzRgzofINiEP4IvhWs,38278
 nucliadb/search/search/chat/exceptions.py,sha256=Siy4GXW2L7oPhIR86H3WHBhE9lkV4A4YaAszuGGUf54,1356
 nucliadb/search/search/chat/images.py,sha256=PA8VWxT5_HUGfW1ULhKTK46UBsVyINtWWqEM1ulzX1E,3095
-nucliadb/search/search/chat/prompt.py,sha256=SNsCtB9mZTODjnUMAH8YfPxn05Kjl2d5xTIteNxyVcI,52783
+nucliadb/search/search/chat/prompt.py,sha256=QwHULUDqe_pS2HZvQH1vzqpYEHQG_-UagXCNtLLtJEI,52997
 nucliadb/search/search/chat/query.py,sha256=3jMPNbiFEOoS0ydMOPYkSx1qVlvAv51npzadWXDwkMs,16650
 nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
 nucliadb/search/search/query_parser/exceptions.py,sha256=sVl9gRNzhE-s480LBBVkiXzNRbKhYRQN5F3it5tNNp8,939
@@ -375,8 +375,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
 nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
 nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
 nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
-nucliadb-6.6.0.post4546.dist-info/METADATA,sha256=zs4NGKqWTtCkWXM1xaPWUC6ejZFyPIDyVNbk5ff3rfM,4158
-nucliadb-6.6.0.post4546.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-nucliadb-6.6.0.post4546.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
-nucliadb-6.6.0.post4546.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
-nucliadb-6.6.0.post4546.dist-info/RECORD,,
+nucliadb-6.6.1.post4568.dist-info/METADATA,sha256=69Tlgt2FRwmj4e1dsiHz-ps5GGEJxz_zyWnnDXMrKcg,4158
+nucliadb-6.6.1.post4568.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+nucliadb-6.6.1.post4568.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
+nucliadb-6.6.1.post4568.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
+nucliadb-6.6.1.post4568.dist-info/RECORD,,

{nucliadb-6.6.0.post4546.dist-info → nucliadb-6.6.1.post4568.dist-info}/WHEEL RENAMED Viewed

File without changes

{nucliadb-6.6.0.post4546.dist-info → nucliadb-6.6.1.post4568.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{nucliadb-6.6.0.post4546.dist-info → nucliadb-6.6.1.post4568.dist-info}/top_level.txt RENAMED Viewed

File without changes

nucliadb 6.6.0.post4546__py3-none-any.whl → 6.6.1.post4568__py3-none-any.whl

nucliadb 6.6.0.post4546py3-none-any.whl → 6.6.1.post4568py3-none-any.whl