nucliadb 6.6.0.post4546__py3-none-any.whl → 6.6.1.post649__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0038_backfill_catalog_field_labels.py +90 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +17 -2
- nucliadb/ingest/orm/resource.py +16 -4
- nucliadb/ingest/processing.py +10 -3
- nucliadb/search/api/v1/ask.py +10 -1
- nucliadb/search/api/v1/resource/ask.py +16 -12
- nucliadb/search/api/v1/summarize.py +7 -2
- nucliadb/search/predict.py +10 -6
- nucliadb/search/search/chat/ask.py +26 -1
- nucliadb/search/search/chat/prompt.py +63 -21
- nucliadb/search/search/pgcatalog.py +1 -1
- nucliadb/search/search/predict_proxy.py +8 -11
- nucliadb/search/search/query_parser/old_filters.py +1 -1
- nucliadb/search/search/summarize.py +4 -2
- {nucliadb-6.6.0.post4546.dist-info → nucliadb-6.6.1.post649.dist-info}/METADATA +7 -7
- {nucliadb-6.6.0.post4546.dist-info → nucliadb-6.6.1.post649.dist-info}/RECORD +19 -18
- {nucliadb-6.6.0.post4546.dist-info → nucliadb-6.6.1.post649.dist-info}/WHEEL +0 -0
- {nucliadb-6.6.0.post4546.dist-info → nucliadb-6.6.1.post649.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.6.0.post4546.dist-info → nucliadb-6.6.1.post649.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,90 @@
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
+
#
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
+
#
|
6
|
+
# AGPL:
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
10
|
+
# License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU Affero General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
#
|
20
|
+
|
21
|
+
"""Migration #38
|
22
|
+
|
23
|
+
Backfill the catalog with labels from fields metadata
|
24
|
+
|
25
|
+
"""
|
26
|
+
|
27
|
+
import logging
|
28
|
+
from typing import cast
|
29
|
+
|
30
|
+
from nucliadb.common import datamanagers
|
31
|
+
from nucliadb.common.maindb.pg import PGDriver, PGTransaction
|
32
|
+
from nucliadb.ingest.orm.index_message import get_resource_index_message
|
33
|
+
from nucliadb.ingest.orm.processor.pgcatalog import pgcatalog_update
|
34
|
+
from nucliadb.migrator.context import ExecutionContext
|
35
|
+
from nucliadb_protos import resources_pb2
|
36
|
+
|
37
|
+
logger = logging.getLogger(__name__)
|
38
|
+
|
39
|
+
|
40
|
+
async def migrate(context: ExecutionContext) -> None: ...
|
41
|
+
|
42
|
+
|
43
|
+
async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
|
44
|
+
if not isinstance(context.kv_driver, PGDriver):
|
45
|
+
return
|
46
|
+
|
47
|
+
BATCH_SIZE = 100
|
48
|
+
async with context.kv_driver.transaction() as txn:
|
49
|
+
txn = cast(PGTransaction, txn)
|
50
|
+
start = ""
|
51
|
+
while True:
|
52
|
+
async with txn.connection.cursor() as cur:
|
53
|
+
# Get list of resources except those already in the catalog
|
54
|
+
await cur.execute(
|
55
|
+
"""
|
56
|
+
SELECT key, value FROM resources
|
57
|
+
WHERE key ~ ('^/kbs/' || %s || '/r/[^/]*$')
|
58
|
+
AND key > %s
|
59
|
+
ORDER BY key
|
60
|
+
LIMIT %s""",
|
61
|
+
(kbid, start, BATCH_SIZE),
|
62
|
+
)
|
63
|
+
|
64
|
+
to_index = []
|
65
|
+
rows = await cur.fetchall()
|
66
|
+
if len(rows) == 0:
|
67
|
+
return
|
68
|
+
for key, basic_pb in rows:
|
69
|
+
start = key
|
70
|
+
|
71
|
+
# Only reindex resources with labels in field computed metadata
|
72
|
+
basic = resources_pb2.Basic()
|
73
|
+
basic.ParseFromString(basic_pb)
|
74
|
+
if basic.computedmetadata.field_classifications:
|
75
|
+
to_index.append(key)
|
76
|
+
|
77
|
+
logger.info(f"Reindexing {len(to_index)} catalog entries from {start}")
|
78
|
+
# Index each resource
|
79
|
+
for key in to_index:
|
80
|
+
rid = key.split("/")[4]
|
81
|
+
resource = await datamanagers.resources.get_resource(txn, kbid=kbid, rid=rid)
|
82
|
+
if resource is None:
|
83
|
+
logger.warning(f"Could not load resource {rid} for kbid {kbid}")
|
84
|
+
continue
|
85
|
+
|
86
|
+
index_message = await get_resource_index_message(resource, reindex=False)
|
87
|
+
await pgcatalog_update(txn, kbid, resource, index_message)
|
88
|
+
|
89
|
+
if to_index:
|
90
|
+
await txn.commit()
|
@@ -65,6 +65,21 @@ async def pgcatalog_update(txn: Transaction, kbid: str, resource: Resource, inde
|
|
65
65
|
modified_at = created_at
|
66
66
|
|
67
67
|
async with _pg_transaction(txn).connection.cursor() as cur:
|
68
|
+
# Do not index canceled labels
|
69
|
+
cancelled_labels = {
|
70
|
+
f"/l/{clf.labelset}/{clf.label}"
|
71
|
+
for clf in resource.basic.usermetadata.classifications
|
72
|
+
if clf.cancelled_by_user
|
73
|
+
}
|
74
|
+
|
75
|
+
# Labels from the resource and classification labels from each field
|
76
|
+
labels = [label for label in index_message.labels]
|
77
|
+
for classification in resource.basic.computedmetadata.field_classifications:
|
78
|
+
for clf in classification.classifications:
|
79
|
+
label = f"/l/{clf.labelset}/{clf.label}"
|
80
|
+
if label not in cancelled_labels:
|
81
|
+
labels.append(label)
|
82
|
+
|
68
83
|
await cur.execute(
|
69
84
|
"""
|
70
85
|
INSERT INTO catalog
|
@@ -83,7 +98,7 @@ async def pgcatalog_update(txn: Transaction, kbid: str, resource: Resource, inde
|
|
83
98
|
"title": resource.basic.title,
|
84
99
|
"created_at": created_at,
|
85
100
|
"modified_at": modified_at,
|
86
|
-
"labels":
|
101
|
+
"labels": labels,
|
87
102
|
"slug": resource.basic.slug,
|
88
103
|
},
|
89
104
|
)
|
@@ -99,7 +114,7 @@ async def pgcatalog_update(txn: Transaction, kbid: str, resource: Resource, inde
|
|
99
114
|
{
|
100
115
|
"kbid": resource.kb.kbid,
|
101
116
|
"rid": resource.uuid,
|
102
|
-
"facets": list(extract_facets(
|
117
|
+
"facets": list(extract_facets(labels)),
|
103
118
|
},
|
104
119
|
)
|
105
120
|
|
nucliadb/ingest/orm/resource.py
CHANGED
@@ -602,7 +602,7 @@ class Resource:
|
|
602
602
|
FieldType.LINK,
|
603
603
|
load=False,
|
604
604
|
)
|
605
|
-
maybe_update_basic_thumbnail(self.basic, link_extracted_data.link_thumbnail)
|
605
|
+
maybe_update_basic_thumbnail(self.basic, link_extracted_data.link_thumbnail, self.kb.kbid)
|
606
606
|
|
607
607
|
await field_link.set_link_extracted_data(link_extracted_data)
|
608
608
|
|
@@ -661,7 +661,7 @@ class Resource:
|
|
661
661
|
# uri can change after extraction
|
662
662
|
await field_file.set_file_extracted_data(file_extracted_data)
|
663
663
|
maybe_update_basic_icon(self.basic, file_extracted_data.icon)
|
664
|
-
maybe_update_basic_thumbnail(self.basic, file_extracted_data.file_thumbnail)
|
664
|
+
maybe_update_basic_thumbnail(self.basic, file_extracted_data.file_thumbnail, self.kb.kbid)
|
665
665
|
self.modified = True
|
666
666
|
|
667
667
|
async def _should_update_resource_title_from_file_metadata(self) -> bool:
|
@@ -722,7 +722,9 @@ class Resource:
|
|
722
722
|
)
|
723
723
|
await field_obj.set_field_metadata(field_metadata)
|
724
724
|
|
725
|
-
maybe_update_basic_thumbnail(
|
725
|
+
maybe_update_basic_thumbnail(
|
726
|
+
self.basic, field_metadata.metadata.metadata.thumbnail, self.kb.kbid
|
727
|
+
)
|
726
728
|
|
727
729
|
update_basic_computedmetadata_classifications(self.basic, field_metadata)
|
728
730
|
self.modified = True
|
@@ -879,13 +881,23 @@ def maybe_update_basic_icon(basic: PBBasic, mimetype: Optional[str]) -> bool:
|
|
879
881
|
return True
|
880
882
|
|
881
883
|
|
882
|
-
def maybe_update_basic_thumbnail(basic: PBBasic, thumbnail: Optional[CloudFile]) -> bool:
|
884
|
+
def maybe_update_basic_thumbnail(basic: PBBasic, thumbnail: Optional[CloudFile], kbid: str) -> bool:
|
883
885
|
if basic.thumbnail or thumbnail is None:
|
884
886
|
return False
|
885
887
|
basic.thumbnail = CloudLink.format_reader_download_uri(thumbnail.uri)
|
888
|
+
fix_kbid_in_thumbnail(basic, kbid)
|
886
889
|
return True
|
887
890
|
|
888
891
|
|
892
|
+
def fix_kbid_in_thumbnail(basic: PBBasic, kbid: str):
|
893
|
+
if basic.thumbnail.startswith("/kb/") and not basic.thumbnail.startswith(f"/kb/{kbid}/"):
|
894
|
+
# Replace the kbid in the thumbnail if it doesn't match the current kbid. This is necessary for
|
895
|
+
# resources that have been backed up and we are restoring them to a different kbid.
|
896
|
+
parts = basic.thumbnail.split("/", 3)
|
897
|
+
parts[2] = kbid
|
898
|
+
basic.thumbnail = "/".join(parts)
|
899
|
+
|
900
|
+
|
889
901
|
def update_basic_languages(basic: Basic, languages: list[str]) -> bool:
|
890
902
|
if len(languages) == 0:
|
891
903
|
return False
|
nucliadb/ingest/processing.py
CHANGED
@@ -25,7 +25,7 @@ import uuid
|
|
25
25
|
from collections import defaultdict
|
26
26
|
from contextlib import AsyncExitStack
|
27
27
|
from enum import Enum
|
28
|
-
from typing import Any, Optional
|
28
|
+
from typing import Any, Optional
|
29
29
|
|
30
30
|
import aiohttp
|
31
31
|
import backoff
|
@@ -49,10 +49,14 @@ from nucliadb_utils.utilities import Utility, clean_utility, get_utility, set_ut
|
|
49
49
|
|
50
50
|
logger = logging.getLogger(__name__)
|
51
51
|
|
52
|
-
_T = TypeVar("_T")
|
53
52
|
|
53
|
+
class ProcessingAPIUnavailableError(SendToProcessError): ...
|
54
54
|
|
55
|
-
|
55
|
+
|
56
|
+
RETRIABLE_EXCEPTIONS = (
|
57
|
+
aiohttp.client_exceptions.ClientConnectorError,
|
58
|
+
ProcessingAPIUnavailableError,
|
59
|
+
)
|
56
60
|
MAX_TRIES = 4
|
57
61
|
|
58
62
|
|
@@ -409,6 +413,9 @@ class ProcessingEngine:
|
|
409
413
|
raise LimitsExceededError(resp.status, data["detail"])
|
410
414
|
elif resp.status == 429:
|
411
415
|
raise LimitsExceededError(resp.status, "Rate limited")
|
416
|
+
elif resp.status in (502, 503):
|
417
|
+
logger.warning(f"Processing engine is not available, retrying. Status: {resp.status}")
|
418
|
+
raise ProcessingAPIUnavailableError()
|
412
419
|
else:
|
413
420
|
error_text = await resp.text()
|
414
421
|
logger.warning(f"Error sending to process: {resp.status} {error_text}")
|
nucliadb/search/api/v1/ask.py
CHANGED
@@ -59,6 +59,7 @@ async def ask_knowledgebox_endpoint(
|
|
59
59
|
kbid: str,
|
60
60
|
item: AskRequest,
|
61
61
|
x_ndb_client: NucliaDBClientType = Header(NucliaDBClientType.API),
|
62
|
+
x_show_consumption: bool = Header(default=False),
|
62
63
|
x_nucliadb_user: str = Header(""),
|
63
64
|
x_forwarded_for: str = Header(""),
|
64
65
|
x_synchronous: bool = Header(
|
@@ -97,7 +98,13 @@ async def ask_knowledgebox_endpoint(
|
|
97
98
|
return HTTPClientError(status_code=422, detail=detail)
|
98
99
|
|
99
100
|
return await create_ask_response(
|
100
|
-
kbid,
|
101
|
+
kbid=kbid,
|
102
|
+
ask_request=item,
|
103
|
+
user_id=x_nucliadb_user,
|
104
|
+
client_type=x_ndb_client,
|
105
|
+
origin=x_forwarded_for,
|
106
|
+
x_synchronous=x_synchronous,
|
107
|
+
extra_predict_headers={"X-Show-Consumption": str(x_show_consumption).lower()},
|
101
108
|
)
|
102
109
|
|
103
110
|
|
@@ -110,6 +117,7 @@ async def create_ask_response(
|
|
110
117
|
origin: str,
|
111
118
|
x_synchronous: bool,
|
112
119
|
resource: Optional[str] = None,
|
120
|
+
extra_predict_headers: Optional[dict[str, str]] = None,
|
113
121
|
) -> Response:
|
114
122
|
maybe_log_request_payload(kbid, "/ask", ask_request)
|
115
123
|
ask_request.max_tokens = parse_max_tokens(ask_request.max_tokens)
|
@@ -122,6 +130,7 @@ async def create_ask_response(
|
|
122
130
|
client_type=client_type,
|
123
131
|
origin=origin,
|
124
132
|
resource=resource,
|
133
|
+
extra_predict_headers=extra_predict_headers,
|
125
134
|
)
|
126
135
|
except AnswerJsonSchemaTooLong as err:
|
127
136
|
return HTTPClientError(status_code=400, detail=str(err))
|
@@ -48,6 +48,7 @@ async def resource_ask_endpoint_by_uuid(
|
|
48
48
|
kbid: str,
|
49
49
|
rid: str,
|
50
50
|
item: AskRequest,
|
51
|
+
x_show_consumption: bool = Header(default=False),
|
51
52
|
x_ndb_client: NucliaDBClientType = Header(NucliaDBClientType.API),
|
52
53
|
x_nucliadb_user: str = Header(""),
|
53
54
|
x_forwarded_for: str = Header(""),
|
@@ -58,13 +59,14 @@ async def resource_ask_endpoint_by_uuid(
|
|
58
59
|
),
|
59
60
|
) -> Union[StreamingResponse, HTTPClientError, Response]:
|
60
61
|
return await create_ask_response(
|
61
|
-
kbid,
|
62
|
-
item,
|
63
|
-
x_nucliadb_user,
|
64
|
-
x_ndb_client,
|
65
|
-
x_forwarded_for,
|
66
|
-
x_synchronous,
|
62
|
+
kbid=kbid,
|
63
|
+
ask_request=item,
|
64
|
+
user_id=x_nucliadb_user,
|
65
|
+
client_type=x_ndb_client,
|
66
|
+
origin=x_forwarded_for,
|
67
|
+
x_synchronous=x_synchronous,
|
67
68
|
resource=rid,
|
69
|
+
extra_predict_headers={"X-Show-Consumption": str(x_show_consumption).lower()},
|
68
70
|
)
|
69
71
|
|
70
72
|
|
@@ -83,6 +85,7 @@ async def resource_ask_endpoint_by_slug(
|
|
83
85
|
kbid: str,
|
84
86
|
slug: str,
|
85
87
|
item: AskRequest,
|
88
|
+
x_show_consumption: bool = Header(default=False),
|
86
89
|
x_ndb_client: NucliaDBClientType = Header(NucliaDBClientType.API),
|
87
90
|
x_nucliadb_user: str = Header(""),
|
88
91
|
x_forwarded_for: str = Header(""),
|
@@ -96,11 +99,12 @@ async def resource_ask_endpoint_by_slug(
|
|
96
99
|
if resource_id is None:
|
97
100
|
return HTTPClientError(status_code=404, detail="Resource not found")
|
98
101
|
return await create_ask_response(
|
99
|
-
kbid,
|
100
|
-
item,
|
101
|
-
x_nucliadb_user,
|
102
|
-
x_ndb_client,
|
103
|
-
x_forwarded_for,
|
104
|
-
x_synchronous,
|
102
|
+
kbid=kbid,
|
103
|
+
ask_request=item,
|
104
|
+
user_id=x_nucliadb_user,
|
105
|
+
client_type=x_ndb_client,
|
106
|
+
origin=x_forwarded_for,
|
107
|
+
x_synchronous=x_synchronous,
|
105
108
|
resource=resource_id,
|
109
|
+
extra_predict_headers={"X-Show-Consumption": str(x_show_consumption).lower()},
|
106
110
|
)
|
@@ -19,7 +19,7 @@
|
|
19
19
|
#
|
20
20
|
from typing import Union
|
21
21
|
|
22
|
-
from fastapi import Request
|
22
|
+
from fastapi import Header, Request
|
23
23
|
from fastapi_versioning import version
|
24
24
|
|
25
25
|
from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
|
@@ -47,9 +47,14 @@ async def summarize_endpoint(
|
|
47
47
|
request: Request,
|
48
48
|
kbid: str,
|
49
49
|
item: SummarizeRequest,
|
50
|
+
x_show_consumption: bool = Header(default=False),
|
50
51
|
) -> Union[SummarizedResponse, HTTPClientError]:
|
51
52
|
try:
|
52
|
-
return await summarize(
|
53
|
+
return await summarize(
|
54
|
+
kbid=kbid,
|
55
|
+
request=item,
|
56
|
+
extra_predict_headers={"X-Show-Consumption": str(x_show_consumption).lower()},
|
57
|
+
)
|
53
58
|
except KnowledgeBoxNotFound:
|
54
59
|
return HTTPClientError(status_code=404, detail="Knowledge box not found")
|
55
60
|
except NoResourcesToSummarize:
|
nucliadb/search/predict.py
CHANGED
@@ -293,7 +293,7 @@ class PredictEngine:
|
|
293
293
|
|
294
294
|
@predict_observer.wrap({"type": "chat_ndjson"})
|
295
295
|
async def chat_query_ndjson(
|
296
|
-
self, kbid: str, item: ChatModel
|
296
|
+
self, kbid: str, item: ChatModel, extra_headers: Optional[dict[str, str]] = None
|
297
297
|
) -> tuple[str, str, AsyncGenerator[GenerativeChunk, None]]:
|
298
298
|
"""
|
299
299
|
Chat query using the new stream format
|
@@ -314,7 +314,7 @@ class PredictEngine:
|
|
314
314
|
"POST",
|
315
315
|
url=self.get_predict_url(CHAT, kbid),
|
316
316
|
json=item.model_dump(),
|
317
|
-
headers=headers,
|
317
|
+
headers={**headers, **(extra_headers or {})},
|
318
318
|
timeout=None,
|
319
319
|
)
|
320
320
|
await self.check_response(kbid, resp, expected_status=200)
|
@@ -396,7 +396,9 @@ class PredictEngine:
|
|
396
396
|
return convert_relations(data)
|
397
397
|
|
398
398
|
@predict_observer.wrap({"type": "summarize"})
|
399
|
-
async def summarize(
|
399
|
+
async def summarize(
|
400
|
+
self, kbid: str, item: SummarizeModel, extra_headers: Optional[dict[str, str]] = None
|
401
|
+
) -> SummarizedResponse:
|
400
402
|
try:
|
401
403
|
self.check_nua_key_is_configured_for_onprem()
|
402
404
|
except NUAKeyMissingError:
|
@@ -407,7 +409,7 @@ class PredictEngine:
|
|
407
409
|
"POST",
|
408
410
|
url=self.get_predict_url(SUMMARIZE, kbid),
|
409
411
|
json=item.model_dump(),
|
410
|
-
headers=self.get_predict_headers(kbid),
|
412
|
+
headers={**self.get_predict_headers(kbid), **(extra_headers or {})},
|
411
413
|
timeout=None,
|
412
414
|
)
|
413
415
|
await self.check_response(kbid, resp, expected_status=200)
|
@@ -489,7 +491,7 @@ class DummyPredictEngine(PredictEngine):
|
|
489
491
|
return RephraseResponse(rephrased_query=DUMMY_REPHRASE_QUERY, use_chat_history=None)
|
490
492
|
|
491
493
|
async def chat_query_ndjson(
|
492
|
-
self, kbid: str, item: ChatModel
|
494
|
+
self, kbid: str, item: ChatModel, extra_headers: Optional[dict[str, str]] = None
|
493
495
|
) -> tuple[str, str, AsyncGenerator[GenerativeChunk, None]]:
|
494
496
|
self.calls.append(("chat_query_ndjson", item))
|
495
497
|
|
@@ -559,7 +561,9 @@ class DummyPredictEngine(PredictEngine):
|
|
559
561
|
else:
|
560
562
|
return DUMMY_RELATION_NODE
|
561
563
|
|
562
|
-
async def summarize(
|
564
|
+
async def summarize(
|
565
|
+
self, kbid: str, item: SummarizeModel, extra_headers: Optional[dict[str, str]] = None
|
566
|
+
) -> SummarizedResponse:
|
563
567
|
self.calls.append(("summarize", (kbid, item)))
|
564
568
|
response = SummarizedResponse(
|
565
569
|
summary="global summary",
|
@@ -22,6 +22,7 @@ import functools
|
|
22
22
|
import json
|
23
23
|
from typing import AsyncGenerator, Optional, cast
|
24
24
|
|
25
|
+
from nuclia_models.common.consumption import Consumption
|
25
26
|
from nuclia_models.predict.generative_responses import (
|
26
27
|
CitationsGenerativeResponse,
|
27
28
|
GenerativeChunk,
|
@@ -83,6 +84,7 @@ from nucliadb_models.search import (
|
|
83
84
|
ChatModel,
|
84
85
|
ChatOptions,
|
85
86
|
CitationsAskResponseItem,
|
87
|
+
ConsumptionResponseItem,
|
86
88
|
DebugAskResponseItem,
|
87
89
|
ErrorAskResponseItem,
|
88
90
|
FindOptions,
|
@@ -106,6 +108,7 @@ from nucliadb_models.search import (
|
|
106
108
|
StatusAskResponseItem,
|
107
109
|
SyncAskMetadata,
|
108
110
|
SyncAskResponse,
|
111
|
+
TokensDetail,
|
109
112
|
UserPrompt,
|
110
113
|
parse_custom_prompt,
|
111
114
|
parse_rephrase_prompt,
|
@@ -169,6 +172,7 @@ class AskResult:
|
|
169
172
|
self._citations: Optional[CitationsGenerativeResponse] = None
|
170
173
|
self._metadata: Optional[MetaGenerativeResponse] = None
|
171
174
|
self._relations: Optional[Relations] = None
|
175
|
+
self._consumption: Optional[Consumption] = None
|
172
176
|
|
173
177
|
@property
|
174
178
|
def status_code(self) -> AnswerStatusCode:
|
@@ -299,6 +303,20 @@ class AskResult:
|
|
299
303
|
),
|
300
304
|
)
|
301
305
|
|
306
|
+
if self._consumption is not None:
|
307
|
+
yield ConsumptionResponseItem(
|
308
|
+
normalized_tokens=TokensDetail(
|
309
|
+
input=self._consumption.normalized_tokens.input,
|
310
|
+
output=self._consumption.normalized_tokens.output,
|
311
|
+
image=self._consumption.normalized_tokens.image,
|
312
|
+
),
|
313
|
+
customer_key_tokens=TokensDetail(
|
314
|
+
input=self._consumption.customer_key_tokens.input,
|
315
|
+
output=self._consumption.customer_key_tokens.output,
|
316
|
+
image=self._consumption.customer_key_tokens.image,
|
317
|
+
),
|
318
|
+
)
|
319
|
+
|
302
320
|
# Stream out the relations results
|
303
321
|
should_query_relations = (
|
304
322
|
self.ask_request_with_relations and self.status_code == AnswerStatusCode.SUCCESS
|
@@ -341,6 +359,7 @@ class AskResult:
|
|
341
359
|
generative_total=self._metadata.timings.get("generative"),
|
342
360
|
),
|
343
361
|
)
|
362
|
+
|
344
363
|
citations = {}
|
345
364
|
if self._citations is not None:
|
346
365
|
citations = self._citations.citations
|
@@ -373,6 +392,7 @@ class AskResult:
|
|
373
392
|
prequeries=prequeries_results,
|
374
393
|
citations=citations,
|
375
394
|
metadata=metadata,
|
395
|
+
consumption=self._consumption,
|
376
396
|
learning_id=self.nuclia_learning_id or "",
|
377
397
|
augmented_context=self.augmented_context,
|
378
398
|
)
|
@@ -424,6 +444,8 @@ class AskResult:
|
|
424
444
|
self._citations = item
|
425
445
|
elif isinstance(item, MetaGenerativeResponse):
|
426
446
|
self._metadata = item
|
447
|
+
elif isinstance(item, Consumption):
|
448
|
+
self._consumption = item
|
427
449
|
else:
|
428
450
|
logger.warning(
|
429
451
|
f"Unexpected item in predict answer stream: {item}",
|
@@ -486,6 +508,7 @@ async def ask(
|
|
486
508
|
client_type: NucliaDBClientType,
|
487
509
|
origin: str,
|
488
510
|
resource: Optional[str] = None,
|
511
|
+
extra_predict_headers: Optional[dict[str, str]] = None,
|
489
512
|
) -> AskResult:
|
490
513
|
metrics = AskMetrics()
|
491
514
|
chat_history = ask_request.chat_history or []
|
@@ -613,7 +636,9 @@ async def ask(
|
|
613
636
|
nuclia_learning_id,
|
614
637
|
nuclia_learning_model,
|
615
638
|
predict_answer_stream,
|
616
|
-
) = await predict.chat_query_ndjson(
|
639
|
+
) = await predict.chat_query_ndjson(
|
640
|
+
kbid=kbid, item=chat_model, extra_headers=extra_predict_headers
|
641
|
+
)
|
617
642
|
|
618
643
|
auditor = ChatAuditor(
|
619
644
|
kbid=kbid,
|
@@ -69,6 +69,7 @@ from nucliadb_models.search import (
|
|
69
69
|
RagStrategyName,
|
70
70
|
TableImageStrategy,
|
71
71
|
TextBlockAugmentationType,
|
72
|
+
TextPosition,
|
72
73
|
)
|
73
74
|
from nucliadb_protos import resources_pb2
|
74
75
|
from nucliadb_protos.resources_pb2 import ExtractedText, FieldComputedMetadata
|
@@ -107,6 +108,9 @@ class CappedPromptContext:
|
|
107
108
|
def __getitem__(self, key: str) -> str:
|
108
109
|
return self.output.__getitem__(key)
|
109
110
|
|
111
|
+
def __contains__(self, key: str) -> bool:
|
112
|
+
return key in self.output
|
113
|
+
|
110
114
|
def __delitem__(self, key: str) -> None:
|
111
115
|
try:
|
112
116
|
self.output.__delitem__(key)
|
@@ -395,7 +399,10 @@ def parse_text_block_id(text_block_id: str) -> TextBlockId:
|
|
395
399
|
|
396
400
|
|
397
401
|
async def extend_prompt_context_with_origin_metadata(
|
398
|
-
context
|
402
|
+
context: CappedPromptContext,
|
403
|
+
kbid,
|
404
|
+
text_block_ids: list[TextBlockId],
|
405
|
+
augmented_context: AugmentedContext,
|
399
406
|
):
|
400
407
|
async def _get_origin(kbid: str, rid: str) -> tuple[str, Optional[Origin]]:
|
401
408
|
origin = None
|
@@ -411,7 +418,7 @@ async def extend_prompt_context_with_origin_metadata(
|
|
411
418
|
rid_to_origin = {rid: origin for rid, origin in origins if origin is not None}
|
412
419
|
for tb_id in text_block_ids:
|
413
420
|
origin = rid_to_origin.get(tb_id.rid)
|
414
|
-
if origin is not None and tb_id.full() in context
|
421
|
+
if origin is not None and tb_id.full() in context:
|
415
422
|
text = context.output.pop(tb_id.full())
|
416
423
|
extended_text = text + f"\n\nDOCUMENT METADATA AT ORIGIN:\n{to_yaml(origin)}"
|
417
424
|
context[tb_id.full()] = extended_text
|
@@ -424,7 +431,10 @@ async def extend_prompt_context_with_origin_metadata(
|
|
424
431
|
|
425
432
|
|
426
433
|
async def extend_prompt_context_with_classification_labels(
|
427
|
-
context
|
434
|
+
context: CappedPromptContext,
|
435
|
+
kbid: str,
|
436
|
+
text_block_ids: list[TextBlockId],
|
437
|
+
augmented_context: AugmentedContext,
|
428
438
|
):
|
429
439
|
async def _get_labels(kbid: str, _id: TextBlockId) -> tuple[TextBlockId, list[tuple[str, str]]]:
|
430
440
|
fid = _id if isinstance(_id, FieldId) else _id.field_id
|
@@ -449,7 +459,7 @@ async def extend_prompt_context_with_classification_labels(
|
|
449
459
|
tb_id_to_labels = {tb_id: labels for tb_id, labels in classif_labels if len(labels) > 0}
|
450
460
|
for tb_id in text_block_ids:
|
451
461
|
labels = tb_id_to_labels.get(tb_id)
|
452
|
-
if labels is not None and tb_id.full() in context
|
462
|
+
if labels is not None and tb_id.full() in context:
|
453
463
|
text = context.output.pop(tb_id.full())
|
454
464
|
|
455
465
|
labels_text = "DOCUMENT CLASSIFICATION LABELS:"
|
@@ -467,7 +477,10 @@ async def extend_prompt_context_with_classification_labels(
|
|
467
477
|
|
468
478
|
|
469
479
|
async def extend_prompt_context_with_ner(
|
470
|
-
context
|
480
|
+
context: CappedPromptContext,
|
481
|
+
kbid: str,
|
482
|
+
text_block_ids: list[TextBlockId],
|
483
|
+
augmented_context: AugmentedContext,
|
471
484
|
):
|
472
485
|
async def _get_ners(kbid: str, _id: TextBlockId) -> tuple[TextBlockId, dict[str, set[str]]]:
|
473
486
|
fid = _id if isinstance(_id, FieldId) else _id.field_id
|
@@ -494,7 +507,7 @@ async def extend_prompt_context_with_ner(
|
|
494
507
|
tb_id_to_ners = {tb_id: ners for tb_id, ners in nerss if len(ners) > 0}
|
495
508
|
for tb_id in text_block_ids:
|
496
509
|
ners = tb_id_to_ners.get(tb_id)
|
497
|
-
if ners is not None and tb_id.full() in context
|
510
|
+
if ners is not None and tb_id.full() in context:
|
498
511
|
text = context.output.pop(tb_id.full())
|
499
512
|
|
500
513
|
ners_text = "DOCUMENT NAMED ENTITIES (NERs):"
|
@@ -515,7 +528,10 @@ async def extend_prompt_context_with_ner(
|
|
515
528
|
|
516
529
|
|
517
530
|
async def extend_prompt_context_with_extra_metadata(
|
518
|
-
context
|
531
|
+
context: CappedPromptContext,
|
532
|
+
kbid: str,
|
533
|
+
text_block_ids: list[TextBlockId],
|
534
|
+
augmented_context: AugmentedContext,
|
519
535
|
):
|
520
536
|
async def _get_extra(kbid: str, rid: str) -> tuple[str, Optional[Extra]]:
|
521
537
|
extra = None
|
@@ -531,7 +547,7 @@ async def extend_prompt_context_with_extra_metadata(
|
|
531
547
|
rid_to_extra = {rid: extra for rid, extra in extras if extra is not None}
|
532
548
|
for tb_id in text_block_ids:
|
533
549
|
extra = rid_to_extra.get(tb_id.rid)
|
534
|
-
if extra is not None and tb_id.full() in context
|
550
|
+
if extra is not None and tb_id.full() in context:
|
535
551
|
text = context.output.pop(tb_id.full())
|
536
552
|
extended_text = text + f"\n\nDOCUMENT EXTRA METADATA:\n{to_yaml(extra)}"
|
537
553
|
context[tb_id.full()] = extended_text
|
@@ -600,7 +616,7 @@ async def field_extension_prompt_context(
|
|
600
616
|
if tb_id.startswith(field.full()):
|
601
617
|
del context[tb_id]
|
602
618
|
# Add the extracted text of each field to the beginning of the context.
|
603
|
-
if field.full() not in context
|
619
|
+
if field.full() not in context:
|
604
620
|
context[field.full()] = extracted_text
|
605
621
|
augmented_context.fields[field.full()] = AugmentedTextBlock(
|
606
622
|
id=field.full(),
|
@@ -610,7 +626,7 @@ async def field_extension_prompt_context(
|
|
610
626
|
|
611
627
|
# Add the extracted text of each paragraph to the end of the context.
|
612
628
|
for paragraph in ordered_paragraphs:
|
613
|
-
if paragraph.id not in context
|
629
|
+
if paragraph.id not in context:
|
614
630
|
context[paragraph.id] = _clean_paragraph_text(paragraph)
|
615
631
|
|
616
632
|
|
@@ -668,7 +684,7 @@ async def neighbouring_paragraphs_prompt_context(
|
|
668
684
|
if field_extracted_text is None:
|
669
685
|
continue
|
670
686
|
ptext = _get_paragraph_text(field_extracted_text, pid)
|
671
|
-
if ptext:
|
687
|
+
if ptext and pid.full() not in context:
|
672
688
|
context[pid.full()] = ptext
|
673
689
|
|
674
690
|
# Now add the neighbouring paragraphs
|
@@ -702,8 +718,8 @@ async def neighbouring_paragraphs_prompt_context(
|
|
702
718
|
npid = field_pids[neighbour_index]
|
703
719
|
except IndexError:
|
704
720
|
continue
|
705
|
-
if npid in retrieved_paragraphs_ids or npid.full() in context
|
706
|
-
# Already added
|
721
|
+
if npid in retrieved_paragraphs_ids or npid.full() in context:
|
722
|
+
# Already added
|
707
723
|
continue
|
708
724
|
ptext = _get_paragraph_text(field_extracted_text, npid)
|
709
725
|
if not ptext:
|
@@ -712,6 +728,7 @@ async def neighbouring_paragraphs_prompt_context(
|
|
712
728
|
augmented_context.paragraphs[npid.full()] = AugmentedTextBlock(
|
713
729
|
id=npid.full(),
|
714
730
|
text=ptext,
|
731
|
+
position=get_text_position(npid, neighbour_index, field_extracted_metadata),
|
715
732
|
parent=pid.full(),
|
716
733
|
augmentation_type=TextBlockAugmentationType.NEIGHBOURING_PARAGRAPHS,
|
717
734
|
)
|
@@ -719,6 +736,30 @@ async def neighbouring_paragraphs_prompt_context(
|
|
719
736
|
metrics.set("neighbouring_paragraphs_ops", len(augmented_context.paragraphs))
|
720
737
|
|
721
738
|
|
739
|
+
def get_text_position(
|
740
|
+
paragraph_id: ParagraphId, index: int, field_metadata: FieldComputedMetadata
|
741
|
+
) -> Optional[TextPosition]:
|
742
|
+
if paragraph_id.field_id.subfield_id:
|
743
|
+
metadata = field_metadata.split_metadata[paragraph_id.field_id.subfield_id]
|
744
|
+
else:
|
745
|
+
metadata = field_metadata.metadata
|
746
|
+
try:
|
747
|
+
pmetadata = metadata.paragraphs[index]
|
748
|
+
except IndexError:
|
749
|
+
return None
|
750
|
+
page_number = None
|
751
|
+
if pmetadata.HasField("page"):
|
752
|
+
page_number = pmetadata.page.page
|
753
|
+
return TextPosition(
|
754
|
+
page_number=page_number,
|
755
|
+
index=index,
|
756
|
+
start=pmetadata.start,
|
757
|
+
end=pmetadata.end,
|
758
|
+
start_seconds=list(pmetadata.start_seconds),
|
759
|
+
end_seconds=list(pmetadata.end_seconds),
|
760
|
+
)
|
761
|
+
|
762
|
+
|
722
763
|
def get_neighbouring_indices(
|
723
764
|
index: int, before: int, after: int, field_pids: list[ParagraphId]
|
724
765
|
) -> list[int]:
|
@@ -742,7 +783,8 @@ async def conversation_prompt_context(
|
|
742
783
|
storage = await get_storage()
|
743
784
|
kb = KnowledgeBoxORM(txn, storage, kbid)
|
744
785
|
for paragraph in ordered_paragraphs:
|
745
|
-
|
786
|
+
if paragraph.id not in context:
|
787
|
+
context[paragraph.id] = _clean_paragraph_text(paragraph)
|
746
788
|
|
747
789
|
# If the paragraph is a conversation and it matches semantically, we assume we
|
748
790
|
# have matched with the question, therefore try to include the answer to the
|
@@ -780,7 +822,7 @@ async def conversation_prompt_context(
|
|
780
822
|
text = message.content.text.strip()
|
781
823
|
pid = f"{rid}/{field_type}/{field_id}/{ident}/0-{len(text) + 1}"
|
782
824
|
attachments.extend(message.content.attachments_fields)
|
783
|
-
if pid in context
|
825
|
+
if pid in context:
|
784
826
|
continue
|
785
827
|
context[pid] = text
|
786
828
|
augmented_context.paragraphs[pid] = AugmentedTextBlock(
|
@@ -802,7 +844,7 @@ async def conversation_prompt_context(
|
|
802
844
|
text = message.content.text.strip()
|
803
845
|
attachments.extend(message.content.attachments_fields)
|
804
846
|
pid = f"{rid}/{field_type}/{field_id}/{ident}/0-{len(text) + 1}"
|
805
|
-
if pid in context
|
847
|
+
if pid in context:
|
806
848
|
continue
|
807
849
|
context[pid] = text
|
808
850
|
augmented_context.paragraphs[pid] = AugmentedTextBlock(
|
@@ -834,7 +876,7 @@ async def conversation_prompt_context(
|
|
834
876
|
text = message.content.text.strip()
|
835
877
|
attachments.extend(message.content.attachments_fields)
|
836
878
|
pid = f"{rid}/{field_type}/{field_id}/{message.ident}/0-{len(message.content.text) + 1}"
|
837
|
-
if pid in context
|
879
|
+
if pid in context:
|
838
880
|
continue
|
839
881
|
context[pid] = text
|
840
882
|
augmented_context.paragraphs[pid] = AugmentedTextBlock(
|
@@ -854,7 +896,7 @@ async def conversation_prompt_context(
|
|
854
896
|
extracted_text = await field.get_extracted_text()
|
855
897
|
if extracted_text is not None:
|
856
898
|
pid = f"{rid}/{field_type}/{attachment.field_id}/0-{len(extracted_text.text) + 1}"
|
857
|
-
if pid in context
|
899
|
+
if pid in context:
|
858
900
|
continue
|
859
901
|
text = f"Attachment {attachment.field_id}: {extracted_text.text}\n\n"
|
860
902
|
context[pid] = text
|
@@ -977,9 +1019,9 @@ async def hierarchy_prompt_context(
|
|
977
1019
|
paragraph_text = _clean_paragraph_text(paragraph)
|
978
1020
|
context[paragraph.id] = paragraph_text
|
979
1021
|
if paragraph.id in augmented_paragraphs:
|
980
|
-
|
981
|
-
augmented_context.
|
982
|
-
id=
|
1022
|
+
pid = ParagraphId.from_string(paragraph.id)
|
1023
|
+
augmented_context.paragraphs[pid.full()] = AugmentedTextBlock(
|
1024
|
+
id=pid.full(), text=paragraph_text, augmentation_type=TextBlockAugmentationType.HIERARCHY
|
983
1025
|
)
|
984
1026
|
return
|
985
1027
|
|
@@ -168,7 +168,7 @@ def _prepare_query_search(query: search_models.CatalogQuery, params: dict[str, A
|
|
168
168
|
# executed per query is not a problem.
|
169
169
|
|
170
170
|
# Remove zero-length words from the split
|
171
|
-
params["query"] = [word for word in SPLIT_REGEX.split(query.query) if word]
|
171
|
+
params["query"] = [word.lower() for word in SPLIT_REGEX.split(query.query) if word]
|
172
172
|
return sql.SQL("regexp_split_to_array(lower(title), '\\W') @> %(query)s")
|
173
173
|
elif query.match == search_models.CatalogQueryMatch.Fuzzy:
|
174
174
|
params["query"] = query.query
|
@@ -19,7 +19,7 @@
|
|
19
19
|
#
|
20
20
|
import json
|
21
21
|
from enum import Enum
|
22
|
-
from typing import Any,
|
22
|
+
from typing import Any, Optional, Union
|
23
23
|
|
24
24
|
import aiohttp
|
25
25
|
from fastapi.datastructures import QueryParams
|
@@ -63,6 +63,7 @@ class PredictProxiedEndpoints(str, Enum):
|
|
63
63
|
|
64
64
|
ALLOWED_HEADERS = [
|
65
65
|
"Accept", # To allow 'application/x-ndjson' on the /chat endpoint
|
66
|
+
"X-show-consumption", # To show token consumption in the response
|
66
67
|
]
|
67
68
|
|
68
69
|
PREDICT_ANSWER_METRIC = "predict_answer_proxy_metric"
|
@@ -171,21 +172,13 @@ async def chat_streaming_generator(
|
|
171
172
|
user_query: str,
|
172
173
|
is_json: bool,
|
173
174
|
):
|
174
|
-
stream: AsyncIterable[bytes]
|
175
|
-
if is_json:
|
176
|
-
# ndjson: stream lines
|
177
|
-
stream = predict_response.content
|
178
|
-
else:
|
179
|
-
# plain text: stream chunks (last chunk is status)
|
180
|
-
stream = predict_response.content.iter_any()
|
181
|
-
|
182
175
|
first = True
|
183
176
|
status_code = AnswerStatusCode.ERROR.value
|
184
177
|
text_answer = ""
|
185
178
|
json_object = None
|
186
179
|
metrics = AskMetrics()
|
187
180
|
with metrics.time(PREDICT_ANSWER_METRIC):
|
188
|
-
async for chunk in
|
181
|
+
async for chunk in predict_response.content:
|
189
182
|
if first:
|
190
183
|
metrics.record_first_chunk_yielded()
|
191
184
|
first = False
|
@@ -211,7 +204,11 @@ async def chat_streaming_generator(
|
|
211
204
|
|
212
205
|
if is_json is False and chunk: # Ensure chunk is not empty before decoding
|
213
206
|
# If response is text the status_code comes at the last chunk of data
|
214
|
-
|
207
|
+
last_chunk = chunk.decode()
|
208
|
+
if last_chunk[-1] == "0":
|
209
|
+
status_code = "0"
|
210
|
+
else:
|
211
|
+
status_code = last_chunk[-2:]
|
215
212
|
|
216
213
|
audit_predict_proxy_endpoint(
|
217
214
|
headers=predict_response.headers,
|
@@ -45,7 +45,9 @@ class NoResourcesToSummarize(Exception):
|
|
45
45
|
pass
|
46
46
|
|
47
47
|
|
48
|
-
async def summarize(
|
48
|
+
async def summarize(
|
49
|
+
kbid: str, request: SummarizeRequest, extra_predict_headers: Optional[dict[str, str]]
|
50
|
+
) -> SummarizedResponse:
|
49
51
|
predict_request = SummarizeModel()
|
50
52
|
predict_request.generative_model = request.generative_model
|
51
53
|
predict_request.user_prompt = request.user_prompt
|
@@ -62,7 +64,7 @@ async def summarize(kbid: str, request: SummarizeRequest) -> SummarizedResponse:
|
|
62
64
|
raise NoResourcesToSummarize()
|
63
65
|
|
64
66
|
predict = get_predict()
|
65
|
-
return await predict.summarize(kbid, predict_request)
|
67
|
+
return await predict.summarize(kbid=kbid, item=predict_request, extra_headers=extra_predict_headers)
|
66
68
|
|
67
69
|
|
68
70
|
async def get_extracted_texts(kbid: str, resource_uuids_or_slugs: list[str]) -> ExtractedTexts:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: nucliadb
|
3
|
-
Version: 6.6.
|
3
|
+
Version: 6.6.1.post649
|
4
4
|
Summary: NucliaDB
|
5
5
|
Author-email: Nuclia <nucliadb@nuclia.com>
|
6
6
|
License-Expression: AGPL-3.0-or-later
|
@@ -19,13 +19,13 @@ Classifier: Programming Language :: Python :: 3.12
|
|
19
19
|
Classifier: Programming Language :: Python :: 3 :: Only
|
20
20
|
Requires-Python: <4,>=3.9
|
21
21
|
Description-Content-Type: text/markdown
|
22
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.6.
|
23
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.6.
|
24
|
-
Requires-Dist: nucliadb-protos>=6.6.
|
25
|
-
Requires-Dist: nucliadb-models>=6.6.
|
26
|
-
Requires-Dist: nidx-protos>=6.6.
|
22
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.6.1.post649
|
23
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.6.1.post649
|
24
|
+
Requires-Dist: nucliadb-protos>=6.6.1.post649
|
25
|
+
Requires-Dist: nucliadb-models>=6.6.1.post649
|
26
|
+
Requires-Dist: nidx-protos>=6.6.1.post649
|
27
27
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
28
|
-
Requires-Dist: nuclia-models>=0.
|
28
|
+
Requires-Dist: nuclia-models>=0.43.0
|
29
29
|
Requires-Dist: uvicorn[standard]
|
30
30
|
Requires-Dist: argdantic
|
31
31
|
Requires-Dist: aiohttp>=3.11.11
|
@@ -33,6 +33,7 @@ migrations/0034_rollover_nidx_texts_3.py,sha256=t19QtWUgHxmTaBPoR1DooAby2IYmkLTQ
|
|
33
33
|
migrations/0035_rollover_nidx_texts_4.py,sha256=W0_AUd01pjMpYMDC3yqF6HzDLgcnnPprL80kfyb1WZI,1187
|
34
34
|
migrations/0036_backfill_catalog_slug.py,sha256=mizRM-HfPswKq4iEmqofu4kIT6Gd97ruT3qhb257vZk,2954
|
35
35
|
migrations/0037_backfill_catalog_facets.py,sha256=KAf3VKbKePw7ykDnJi47LyJ7pK1JwYkwMxrsXUnbt9g,2788
|
36
|
+
migrations/0038_backfill_catalog_field_labels.py,sha256=EKJwJfU0p1nDq7s71CpGhaX4t1iD2d1ZCzTmLcUAhDs,3382
|
36
37
|
migrations/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
37
38
|
migrations/pg/0001_bootstrap.py,sha256=3O_P17l0d0h48nebN6VQLXzM_B7S7zvDpaLR0koVgWE,1274
|
38
39
|
migrations/pg/0002_catalog.py,sha256=Rsleecu351Ty19kYZgOpqX5G3MEAY8nMxCJrAeuS2Mw,1690
|
@@ -132,7 +133,7 @@ nucliadb/export_import/utils.py,sha256=XV3tJJdhgnVJRSj8AxZjgeipONtB107M185HVJmHp
|
|
132
133
|
nucliadb/ingest/__init__.py,sha256=fsw3C38VP50km3R-nHL775LNGPpJ4JxqXJ2Ib1f5SqE,1011
|
133
134
|
nucliadb/ingest/app.py,sha256=qiPad2eWgudRdLq0tB0MQZOxOezXO7QBK_ZpPNKQZO0,7378
|
134
135
|
nucliadb/ingest/partitions.py,sha256=2NIhMYbNT0TNBL6bX1UMSi7vxFGICstCKEqsB0TXHOE,2410
|
135
|
-
nucliadb/ingest/processing.py,sha256=
|
136
|
+
nucliadb/ingest/processing.py,sha256=gAm591llkscMq0abhxQmpChDZIzto-76Dni4f7Flhfw,21229
|
136
137
|
nucliadb/ingest/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
137
138
|
nucliadb/ingest/serialize.py,sha256=-TIjibJTbMqAowzRvyrG3R209vKqBZqXpdrQL9Dq4lo,16135
|
138
139
|
nucliadb/ingest/settings.py,sha256=5qJICxwYb028a2iAhVbxOJB5X-hWtDLtiya-YhWostw,3179
|
@@ -162,12 +163,12 @@ nucliadb/ingest/orm/exceptions.py,sha256=k4Esv4NtL4TrGTcsQpwrSfDhPQpiYcRbB1SpYmB
|
|
162
163
|
nucliadb/ingest/orm/index_message.py,sha256=DWMTHJoVamUbK8opKl5csDvxfgz7c2j7phG1Ut4yIxk,15724
|
163
164
|
nucliadb/ingest/orm/knowledgebox.py,sha256=_rkeTMIXMhR64gbYtZpFHoUHghV2DTJ2lUBqZsoqC_4,23898
|
164
165
|
nucliadb/ingest/orm/metrics.py,sha256=OiuggTh-n3kZHA2G73NEUdIlh8c3yFrbusI88DK-Mko,1273
|
165
|
-
nucliadb/ingest/orm/resource.py,sha256=
|
166
|
+
nucliadb/ingest/orm/resource.py,sha256=yB0HWC3jc_1b-zXu-3FJCKOdAPPSb1aRBHpbZhsvyQk,37749
|
166
167
|
nucliadb/ingest/orm/utils.py,sha256=fCQRuyecgqhaY7mcBG93oaXMkzkKb9BFjOcy4-ZiSNw,2693
|
167
168
|
nucliadb/ingest/orm/processor/__init__.py,sha256=Aqd9wCNTvggkMkCY3WvoI8spdr94Jnqk-0iq9XpLs18,922
|
168
169
|
nucliadb/ingest/orm/processor/auditing.py,sha256=TeYhXGJRyQ7ROytbb2u8R0fIh_FYi3HgTu3S1ribY3U,4623
|
169
170
|
nucliadb/ingest/orm/processor/data_augmentation.py,sha256=v-pj4GbBWSuO8dQyahs5UDr5ghsyfhCZDS0ftKd6ZYc,5179
|
170
|
-
nucliadb/ingest/orm/processor/pgcatalog.py,sha256=
|
171
|
+
nucliadb/ingest/orm/processor/pgcatalog.py,sha256=VPQ_Evme7xmmGoQ45zt0Am0yPkaD4hxN1r5rEaVt6s8,4633
|
171
172
|
nucliadb/ingest/orm/processor/processor.py,sha256=jaEBwbv--WyoC8zcdxWAyF0dAzVA5crVDJl56Bqv1eI,31444
|
172
173
|
nucliadb/ingest/orm/processor/sequence_manager.py,sha256=uqEphtI1Ir_yk9jRl2gPf7BlzzXWovbARY5MNZSBI_8,1704
|
173
174
|
nucliadb/ingest/service/__init__.py,sha256=LHQFUkdmNBOWqBG0Md9sMMI7g5TQZ-hLAnhw6ZblrJg,2002
|
@@ -212,7 +213,7 @@ nucliadb/search/__init__.py,sha256=tnypbqcH4nBHbGpkINudhKgdLKpwXQCvDtPchUlsyY4,1
|
|
212
213
|
nucliadb/search/app.py,sha256=-WEX1AZRA8R_9aeOo9ovOTwjXW_7VfwWN7N2ccSoqXg,3387
|
213
214
|
nucliadb/search/lifecycle.py,sha256=hiylV-lxsAWkqTCulXBg0EIfMQdejSr8Zar0L_GLFT8,2218
|
214
215
|
nucliadb/search/openapi.py,sha256=t3Wo_4baTrfPftg2BHsyLWNZ1MYn7ZRdW7ht-wFOgRs,1016
|
215
|
-
nucliadb/search/predict.py,sha256=
|
216
|
+
nucliadb/search/predict.py,sha256=xZtZaydg1pzXOSEDg0xyWNbbgA4zMQ59gbHi0wNuAxk,23770
|
216
217
|
nucliadb/search/predict_models.py,sha256=pm4ykuWH9bTXxj5RlI2F6pmXSXOVt64WL_sRlc2u6Tk,6144
|
217
218
|
nucliadb/search/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
218
219
|
nucliadb/search/run.py,sha256=aFb-CXRi_C8YMpP_ivNj8KW1BYhADj88y8K9Lr_nUPI,1402
|
@@ -220,7 +221,7 @@ nucliadb/search/settings.py,sha256=vem3EcyYlTPSim0kEK-xe-erF4BZg0CT_LAb8ZRQAE8,1
|
|
220
221
|
nucliadb/search/utilities.py,sha256=9SsRDw0rJVXVoLBfF7rBb6q080h-thZc7u8uRcTiBeY,1037
|
221
222
|
nucliadb/search/api/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
222
223
|
nucliadb/search/api/v1/__init__.py,sha256=DH16OYnw9jQ38OpKlmdXeoq2j40ZPXZRtGvClKOkMhw,1239
|
223
|
-
nucliadb/search/api/v1/ask.py,sha256=
|
224
|
+
nucliadb/search/api/v1/ask.py,sha256=hZUnk1opZuXp1IwTiingSatlUefg2CZ9r_Z9sUwZMaU,5698
|
224
225
|
nucliadb/search/api/v1/catalog.py,sha256=5ZY3d8sVia1traUxVS0Q4aQJmgcOuXzbxis_uY4ulE4,8077
|
225
226
|
nucliadb/search/api/v1/feedback.py,sha256=kNLc4dHz2SXHzV0PwC1WiRAwY88fDptPcP-kO0q-FrQ,2620
|
226
227
|
nucliadb/search/api/v1/find.py,sha256=j6mxEyxjlLnZSqCT_N2LmOJlytsm1vkY4KFFmJRrtP8,10904
|
@@ -230,10 +231,10 @@ nucliadb/search/api/v1/predict_proxy.py,sha256=TnXKAqf_Go-9QVi6L5z4cXjnuNRe7XLJj
|
|
230
231
|
nucliadb/search/api/v1/router.py,sha256=mtT07rBZcVfpa49doaw9b1tj3sdi3qLH0gn9Io6NYM0,988
|
231
232
|
nucliadb/search/api/v1/search.py,sha256=eqlrvRE7IlMpunNwD1RJwt6RgMV01sIDJLgxxE7CFcE,12297
|
232
233
|
nucliadb/search/api/v1/suggest.py,sha256=gaJE60r8-z6TVO05mQRKBITwXn2_ofM3B4-OtpOgZEk,6343
|
233
|
-
nucliadb/search/api/v1/summarize.py,sha256=
|
234
|
+
nucliadb/search/api/v1/summarize.py,sha256=eJzgFJWUO80STx3lHc_0h9RZVaBCWF196nZUecfmqbE,2700
|
234
235
|
nucliadb/search/api/v1/utils.py,sha256=5Ve-frn7LAE2jqAgB85F8RSeqxDlyA08--gS-AdOLS4,1434
|
235
236
|
nucliadb/search/api/v1/resource/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
236
|
-
nucliadb/search/api/v1/resource/ask.py,sha256=
|
237
|
+
nucliadb/search/api/v1/resource/ask.py,sha256=PlOXa17lnmj3KA9bARNfDqvnx7Pe9OTnwz-OwgGTUjU,4035
|
237
238
|
nucliadb/search/api/v1/resource/ingestion_agents.py,sha256=AZ5_cH1jbf7d5wh_gz6EHLEKAzEOMrQZwEZAu1Q_3FE,4846
|
238
239
|
nucliadb/search/api/v1/resource/search.py,sha256=PZR7fs5oYD0RKqKoD38NZMAnOJzBv35NB2YOr2xy1ck,4923
|
239
240
|
nucliadb/search/api/v1/resource/utils.py,sha256=-NjZqAQtFEXKpIh8ui5S26ItnJ5rzmmG0BHxGSS9QPw,1141
|
@@ -254,25 +255,25 @@ nucliadb/search/search/ingestion_agents.py,sha256=IK6yOPEF9rST_uoqspdVdPk0pldjDh
|
|
254
255
|
nucliadb/search/search/merge.py,sha256=XiRBsxhYPshPV7lZXD-9E259KZOPIf4I2tKosY0lPo4,22470
|
255
256
|
nucliadb/search/search/metrics.py,sha256=3I6IN0qDSmqIvUaWJmT3rt-Jyjs6LcvnKI8ZqCiuJPY,3501
|
256
257
|
nucliadb/search/search/paragraphs.py,sha256=pNAEiYqJGGUVcEf7xf-PFMVqz0PX4Qb-WNG-_zPGN2o,7799
|
257
|
-
nucliadb/search/search/pgcatalog.py,sha256=
|
258
|
-
nucliadb/search/search/predict_proxy.py,sha256=
|
258
|
+
nucliadb/search/search/pgcatalog.py,sha256=0n_gDihZZhqrDLRHvHzS3IESvMRTcU6YShqizQMyE_Y,16807
|
259
|
+
nucliadb/search/search/predict_proxy.py,sha256=Df8F5K-oS4TIXJc_y8UDViJTo7st5L0kMgxYPFZ39Vk,8806
|
259
260
|
nucliadb/search/search/query.py,sha256=0qIQdt548L3jtKOyKo06aGJ73SLBxAW3N38_Hc1M3Uw,11528
|
260
261
|
nucliadb/search/search/rank_fusion.py,sha256=xZtXhbmKb_56gs73u6KkFm2efvTATOSMmpOV2wrAIqE,9613
|
261
262
|
nucliadb/search/search/rerankers.py,sha256=E2J1QdKAojqbhHM3KAyaOXKf6tJyETUxKs4tf_BEyqk,7472
|
262
263
|
nucliadb/search/search/shards.py,sha256=mc5DK-MoCv9AFhlXlOFHbPvetcyNDzTFOJ5rimK8PC8,2636
|
263
|
-
nucliadb/search/search/summarize.py,sha256=
|
264
|
+
nucliadb/search/search/summarize.py,sha256=S4-mUS8d-rvHFcsr8Pa8N5NTxU6ZTxLFZTMKTTOOpr4,5098
|
264
265
|
nucliadb/search/search/utils.py,sha256=ajRIXfdTF67dBVahQCXW-rSv6gJpUMPt3QhJrWqArTQ,2175
|
265
266
|
nucliadb/search/search/chat/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
266
|
-
nucliadb/search/search/chat/ask.py,sha256=
|
267
|
+
nucliadb/search/search/chat/ask.py,sha256=vJ3TSdr-cT_xh43UnoYugqxnHv_-LFSCYoU7o0NnI1M,39368
|
267
268
|
nucliadb/search/search/chat/exceptions.py,sha256=Siy4GXW2L7oPhIR86H3WHBhE9lkV4A4YaAszuGGUf54,1356
|
268
269
|
nucliadb/search/search/chat/images.py,sha256=PA8VWxT5_HUGfW1ULhKTK46UBsVyINtWWqEM1ulzX1E,3095
|
269
|
-
nucliadb/search/search/chat/prompt.py,sha256=
|
270
|
+
nucliadb/search/search/chat/prompt.py,sha256=gmYRC3aK03vrDoBElJP5H5Z7OEeu79k5yTxv3FEkN0I,53866
|
270
271
|
nucliadb/search/search/chat/query.py,sha256=3jMPNbiFEOoS0ydMOPYkSx1qVlvAv51npzadWXDwkMs,16650
|
271
272
|
nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
272
273
|
nucliadb/search/search/query_parser/exceptions.py,sha256=sVl9gRNzhE-s480LBBVkiXzNRbKhYRQN5F3it5tNNp8,939
|
273
274
|
nucliadb/search/search/query_parser/fetcher.py,sha256=nP4EySj2BvH10QgCvgzvp13Nf22wwfHsdLbDoPlH2cQ,16831
|
274
275
|
nucliadb/search/search/query_parser/models.py,sha256=kAslqX_-zaIdUpcpdNU2a5uQPQh7LC605qWLZ4aZ5T4,5064
|
275
|
-
nucliadb/search/search/query_parser/old_filters.py,sha256=
|
276
|
+
nucliadb/search/search/query_parser/old_filters.py,sha256=GsU3T3-WiSPvjucP7evHkshzAWZOli8qsuXChvWRCY0,9092
|
276
277
|
nucliadb/search/search/query_parser/parsers/__init__.py,sha256=ySCNSdbesLXGZyR88919njulA6UE10_3PhqMG_Yj1o4,1034
|
277
278
|
nucliadb/search/search/query_parser/parsers/ask.py,sha256=eTz8wS-EJHuAagR384h6TT64itymFZRpfZJGX8r6aZM,2771
|
278
279
|
nucliadb/search/search/query_parser/parsers/catalog.py,sha256=JuDiBL2wdjAuEFEPo0e2nQ4VqWjF3FXakT0ziZk3Oes,7495
|
@@ -375,8 +376,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
|
|
375
376
|
nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
|
376
377
|
nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
|
377
378
|
nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
|
378
|
-
nucliadb-6.6.
|
379
|
-
nucliadb-6.6.
|
380
|
-
nucliadb-6.6.
|
381
|
-
nucliadb-6.6.
|
382
|
-
nucliadb-6.6.
|
379
|
+
nucliadb-6.6.1.post649.dist-info/METADATA,sha256=_peNGuFRZE9h5r-n-Aglwzr-hlIijYTLzMDBF5BWsss,4152
|
380
|
+
nucliadb-6.6.1.post649.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
381
|
+
nucliadb-6.6.1.post649.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
|
382
|
+
nucliadb-6.6.1.post649.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
|
383
|
+
nucliadb-6.6.1.post649.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|