nucliadb 6.4.0.post4227__py3-none-any.whl → 6.4.0.post4265__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucliadb/search/api/v1/find.py +5 -1
- nucliadb/search/api/v1/predict_proxy.py +8 -3
- nucliadb/search/predict.py +6 -4
- nucliadb/search/search/chat/ask.py +15 -24
- nucliadb/search/search/chat/query.py +12 -15
- nucliadb/search/search/find.py +8 -8
- nucliadb/search/search/graph_strategy.py +2 -2
- nucliadb/search/search/metrics.py +41 -21
- nucliadb/search/search/predict_proxy.py +17 -10
- {nucliadb-6.4.0.post4227.dist-info → nucliadb-6.4.0.post4265.dist-info}/METADATA +6 -6
- {nucliadb-6.4.0.post4227.dist-info → nucliadb-6.4.0.post4265.dist-info}/RECORD +14 -14
- {nucliadb-6.4.0.post4227.dist-info → nucliadb-6.4.0.post4265.dist-info}/WHEEL +1 -1
- {nucliadb-6.4.0.post4227.dist-info → nucliadb-6.4.0.post4265.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.4.0.post4227.dist-info → nucliadb-6.4.0.post4265.dist-info}/top_level.txt +0 -0
nucliadb/search/api/v1/find.py
CHANGED
@@ -34,6 +34,7 @@ from nucliadb.search.api.v1.utils import fastapi_query
|
|
34
34
|
from nucliadb.search.search import cache
|
35
35
|
from nucliadb.search.search.exceptions import InvalidQueryError
|
36
36
|
from nucliadb.search.search.find import find
|
37
|
+
from nucliadb.search.search.metrics import Metrics
|
37
38
|
from nucliadb.search.search.utils import maybe_log_request_payload, min_score_from_query_params
|
38
39
|
from nucliadb_models.common import FieldTypeName
|
39
40
|
from nucliadb_models.configuration import FindConfig
|
@@ -231,10 +232,13 @@ async def _find_endpoint(
|
|
231
232
|
try:
|
232
233
|
maybe_log_request_payload(kbid, "/find", item)
|
233
234
|
with cache.request_caches():
|
235
|
+
metrics = Metrics("find")
|
234
236
|
results, incomplete, _ = await find(
|
235
|
-
kbid, item, x_ndb_client, x_nucliadb_user, x_forwarded_for
|
237
|
+
kbid, item, x_ndb_client, x_nucliadb_user, x_forwarded_for, metrics
|
236
238
|
)
|
237
239
|
response.status_code = 206 if incomplete else 200
|
240
|
+
if item.debug:
|
241
|
+
results.metrics = metrics.to_dict()
|
238
242
|
return results
|
239
243
|
except KnowledgeBoxNotFound:
|
240
244
|
return HTTPClientError(status_code=404, detail="Knowledge Box not found")
|
@@ -21,7 +21,7 @@ import json
|
|
21
21
|
from typing import Union
|
22
22
|
|
23
23
|
from fastapi import Request
|
24
|
-
from fastapi.responses import
|
24
|
+
from fastapi.responses import Response, StreamingResponse
|
25
25
|
from fastapi_versioning import version
|
26
26
|
|
27
27
|
from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
|
@@ -58,14 +58,19 @@ async def predict_proxy_endpoint(
|
|
58
58
|
request: Request,
|
59
59
|
kbid: str,
|
60
60
|
endpoint: PredictProxiedEndpoints,
|
61
|
-
) -> Union[
|
61
|
+
) -> Union[Response, StreamingResponse, HTTPClientError]:
|
62
62
|
try:
|
63
63
|
payload = await request.json()
|
64
64
|
except json.JSONDecodeError:
|
65
65
|
payload = None
|
66
66
|
try:
|
67
67
|
return await predict_proxy(
|
68
|
-
kbid,
|
68
|
+
kbid,
|
69
|
+
endpoint,
|
70
|
+
request.method,
|
71
|
+
params=request.query_params,
|
72
|
+
json=payload,
|
73
|
+
headers=dict(request.headers),
|
69
74
|
)
|
70
75
|
except KnowledgeBoxNotFound:
|
71
76
|
return HTTPClientError(status_code=404, detail="Knowledge box not found")
|
nucliadb/search/predict.py
CHANGED
@@ -262,7 +262,7 @@ class PredictEngine:
|
|
262
262
|
jitter=backoff.random_jitter,
|
263
263
|
max_tries=MAX_TRIES,
|
264
264
|
)
|
265
|
-
async def make_request(self, method: str, **request_args):
|
265
|
+
async def make_request(self, method: str, **request_args) -> aiohttp.ClientResponse:
|
266
266
|
func = getattr(self.session, method.lower())
|
267
267
|
return await func(**request_args)
|
268
268
|
|
@@ -311,8 +311,8 @@ class PredictEngine:
|
|
311
311
|
timeout=None,
|
312
312
|
)
|
313
313
|
await self.check_response(kbid, resp, expected_status=200)
|
314
|
-
ident = resp.headers.get(NUCLIA_LEARNING_ID_HEADER)
|
315
|
-
model = resp.headers.get(NUCLIA_LEARNING_MODEL_HEADER)
|
314
|
+
ident = resp.headers.get(NUCLIA_LEARNING_ID_HEADER) or "unknown"
|
315
|
+
model = resp.headers.get(NUCLIA_LEARNING_MODEL_HEADER) or "unknown"
|
316
316
|
return ident, model, get_chat_ndjson_generator(resp)
|
317
317
|
|
318
318
|
@predict_observer.wrap({"type": "query"})
|
@@ -471,7 +471,9 @@ class DummyPredictEngine(PredictEngine):
|
|
471
471
|
|
472
472
|
async def make_request(self, method: str, **request_args):
|
473
473
|
response = Mock(status=200)
|
474
|
-
|
474
|
+
json_data = {"foo": "bar"}
|
475
|
+
response.json = AsyncMock(return_value=json_data)
|
476
|
+
response.read = AsyncMock(return_value=json.dumps(json_data).encode("utf-8"))
|
475
477
|
response.headers = {NUCLIA_LEARNING_ID_HEADER: DUMMY_LEARNING_ID}
|
476
478
|
return response
|
477
479
|
|
@@ -62,7 +62,7 @@ from nucliadb.search.search.exceptions import (
|
|
62
62
|
InvalidQueryError,
|
63
63
|
)
|
64
64
|
from nucliadb.search.search.graph_strategy import get_graph_results
|
65
|
-
from nucliadb.search.search.metrics import
|
65
|
+
from nucliadb.search.search.metrics import AskMetrics, Metrics
|
66
66
|
from nucliadb.search.search.query_parser.fetcher import Fetcher
|
67
67
|
from nucliadb.search.search.query_parser.parsers.ask import fetcher_for_ask, parse_ask
|
68
68
|
from nucliadb.search.search.rank_fusion import WeightedCombSum
|
@@ -140,7 +140,7 @@ class AskResult:
|
|
140
140
|
prompt_context: PromptContext,
|
141
141
|
prompt_context_order: PromptContextOrder,
|
142
142
|
auditor: ChatAuditor,
|
143
|
-
metrics:
|
143
|
+
metrics: AskMetrics,
|
144
144
|
best_matches: list[RetrievalMatch],
|
145
145
|
debug_chat_model: Optional[ChatModel],
|
146
146
|
):
|
@@ -155,7 +155,7 @@ class AskResult:
|
|
155
155
|
self.debug_chat_model = debug_chat_model
|
156
156
|
self.prompt_context_order = prompt_context_order
|
157
157
|
self.auditor: ChatAuditor = auditor
|
158
|
-
self.metrics:
|
158
|
+
self.metrics: AskMetrics = metrics
|
159
159
|
self.best_matches: list[RetrievalMatch] = best_matches
|
160
160
|
|
161
161
|
# Computed from the predict chat answer stream
|
@@ -264,18 +264,11 @@ class AskResult:
|
|
264
264
|
audit_answer = self._answer_text.encode("utf-8")
|
265
265
|
else:
|
266
266
|
audit_answer = json.dumps(self._object.object).encode("utf-8")
|
267
|
-
|
268
|
-
try:
|
269
|
-
rephrase_time = self.metrics.elapsed("rephrase")
|
270
|
-
except KeyError:
|
271
|
-
# Not all ask requests have a rephrase step
|
272
|
-
rephrase_time = None
|
273
|
-
|
274
267
|
self.auditor.audit(
|
275
268
|
text_answer=audit_answer,
|
276
|
-
generative_answer_time=self.metrics
|
269
|
+
generative_answer_time=self.metrics["stream_predict_answer"],
|
277
270
|
generative_answer_first_chunk_time=self.metrics.get_first_chunk_time() or 0,
|
278
|
-
rephrase_time=
|
271
|
+
rephrase_time=self.metrics.get("rephrase"),
|
279
272
|
status_code=self.status_code,
|
280
273
|
)
|
281
274
|
|
@@ -317,7 +310,8 @@ class AskResult:
|
|
317
310
|
self.prompt_context, self.prompt_context_order
|
318
311
|
),
|
319
312
|
"predict_request": predict_request,
|
320
|
-
}
|
313
|
+
},
|
314
|
+
metrics=self.metrics.dump(),
|
321
315
|
)
|
322
316
|
|
323
317
|
async def json(self) -> str:
|
@@ -382,6 +376,9 @@ class AskResult:
|
|
382
376
|
response.prompt_context = sorted_prompt_context
|
383
377
|
if self.debug_chat_model:
|
384
378
|
response.predict_request = self.debug_chat_model.model_dump(mode="json")
|
379
|
+
response.debug = {
|
380
|
+
"metrics": self.metrics.dump(),
|
381
|
+
}
|
385
382
|
return response.model_dump_json(exclude_none=True, by_alias=True)
|
386
383
|
|
387
384
|
async def get_relations_results(self) -> Relations:
|
@@ -481,7 +478,7 @@ async def ask(
|
|
481
478
|
origin: str,
|
482
479
|
resource: Optional[str] = None,
|
483
480
|
) -> AskResult:
|
484
|
-
metrics =
|
481
|
+
metrics = AskMetrics()
|
485
482
|
chat_history = ask_request.chat_history or []
|
486
483
|
user_context = ask_request.extra_context or []
|
487
484
|
user_query = ask_request.query
|
@@ -515,12 +512,6 @@ async def ask(
|
|
515
512
|
resource=resource,
|
516
513
|
)
|
517
514
|
except NoRetrievalResultsError as err:
|
518
|
-
try:
|
519
|
-
rephrase_time = metrics.elapsed("rephrase")
|
520
|
-
except KeyError:
|
521
|
-
# Not all ask requests have a rephrase step
|
522
|
-
rephrase_time = None
|
523
|
-
|
524
515
|
maybe_audit_chat(
|
525
516
|
kbid=kbid,
|
526
517
|
user_id=user_id,
|
@@ -528,7 +519,7 @@ async def ask(
|
|
528
519
|
origin=origin,
|
529
520
|
generative_answer_time=0,
|
530
521
|
generative_answer_first_chunk_time=0,
|
531
|
-
rephrase_time=
|
522
|
+
rephrase_time=metrics.get("rephrase"),
|
532
523
|
user_query=user_query,
|
533
524
|
rephrased_query=rephrased_query,
|
534
525
|
retrieval_rephrase_query=err.main_query.rephrased_query if err.main_query else None,
|
@@ -709,7 +700,7 @@ async def retrieval_step(
|
|
709
700
|
client_type: NucliaDBClientType,
|
710
701
|
user_id: str,
|
711
702
|
origin: str,
|
712
|
-
metrics:
|
703
|
+
metrics: Metrics,
|
713
704
|
resource: Optional[str] = None,
|
714
705
|
) -> RetrievalResults:
|
715
706
|
"""
|
@@ -745,7 +736,7 @@ async def retrieval_in_kb(
|
|
745
736
|
client_type: NucliaDBClientType,
|
746
737
|
user_id: str,
|
747
738
|
origin: str,
|
748
|
-
metrics:
|
739
|
+
metrics: Metrics,
|
749
740
|
) -> RetrievalResults:
|
750
741
|
prequeries = parse_prequeries(ask_request)
|
751
742
|
graph_strategy = parse_graph_strategy(ask_request)
|
@@ -812,7 +803,7 @@ async def retrieval_in_resource(
|
|
812
803
|
client_type: NucliaDBClientType,
|
813
804
|
user_id: str,
|
814
805
|
origin: str,
|
815
|
-
metrics:
|
806
|
+
metrics: Metrics,
|
816
807
|
) -> RetrievalResults:
|
817
808
|
if any(strategy.name == "full_resource" for strategy in ask_request.rag_strategies):
|
818
809
|
# Retrieval is not needed if we are chatting on a specific resource and the full_resource strategy is enabled
|
@@ -33,7 +33,7 @@ from nucliadb.search.search.chat.exceptions import NoRetrievalResultsError
|
|
33
33
|
from nucliadb.search.search.exceptions import IncompleteFindResultsError
|
34
34
|
from nucliadb.search.search.find import find
|
35
35
|
from nucliadb.search.search.merge import merge_relations_results
|
36
|
-
from nucliadb.search.search.metrics import
|
36
|
+
from nucliadb.search.search.metrics import Metrics
|
37
37
|
from nucliadb.search.search.query_parser.models import ParsedQuery, Query, RelationQuery, UnitRetrieval
|
38
38
|
from nucliadb.search.search.query_parser.parsers.unit_retrieval import convert_retrieval_to_proto
|
39
39
|
from nucliadb.search.settings import settings
|
@@ -91,7 +91,7 @@ async def get_find_results(
|
|
91
91
|
ndb_client: NucliaDBClientType,
|
92
92
|
user: str,
|
93
93
|
origin: str,
|
94
|
-
metrics:
|
94
|
+
metrics: Metrics,
|
95
95
|
prequeries_strategy: Optional[PreQueriesStrategy] = None,
|
96
96
|
) -> tuple[KnowledgeboxFindResults, Optional[list[PreQueryResult]], ParsedQuery]:
|
97
97
|
prequeries_results = None
|
@@ -108,7 +108,7 @@ async def get_find_results(
|
|
108
108
|
x_ndb_client=ndb_client,
|
109
109
|
x_nucliadb_user=user,
|
110
110
|
x_forwarded_for=origin,
|
111
|
-
metrics=metrics,
|
111
|
+
metrics=metrics.child_span("prefilters"),
|
112
112
|
)
|
113
113
|
prefilter_matching_resources = {
|
114
114
|
resource
|
@@ -133,8 +133,7 @@ async def get_find_results(
|
|
133
133
|
x_ndb_client=ndb_client,
|
134
134
|
x_nucliadb_user=user,
|
135
135
|
x_forwarded_for=origin,
|
136
|
-
|
137
|
-
metrics=metrics,
|
136
|
+
metrics=metrics.child_span("prequeries"),
|
138
137
|
)
|
139
138
|
|
140
139
|
prequeries_results = (prefilter_queries_results or []) + (queries_results or [])
|
@@ -147,7 +146,7 @@ async def get_find_results(
|
|
147
146
|
ndb_client,
|
148
147
|
user,
|
149
148
|
origin,
|
150
|
-
metrics=metrics,
|
149
|
+
metrics=metrics.child_span("main_query"),
|
151
150
|
)
|
152
151
|
return main_results, prequeries_results, query_parser
|
153
152
|
|
@@ -223,7 +222,7 @@ async def run_main_query(
|
|
223
222
|
ndb_client: NucliaDBClientType,
|
224
223
|
user: str,
|
225
224
|
origin: str,
|
226
|
-
metrics:
|
225
|
+
metrics: Metrics,
|
227
226
|
) -> tuple[KnowledgeboxFindResults, ParsedQuery]:
|
228
227
|
find_request = find_request_from_ask_request(item, query)
|
229
228
|
|
@@ -455,8 +454,7 @@ async def run_prequeries(
|
|
455
454
|
x_ndb_client: NucliaDBClientType,
|
456
455
|
x_nucliadb_user: str,
|
457
456
|
x_forwarded_for: str,
|
458
|
-
|
459
|
-
metrics: RAGMetrics = RAGMetrics(),
|
457
|
+
metrics: Metrics,
|
460
458
|
) -> list[PreQueryResult]:
|
461
459
|
"""
|
462
460
|
Runs simultaneous find requests for each prequery and returns the merged results according to the normalized weights.
|
@@ -464,23 +462,22 @@ async def run_prequeries(
|
|
464
462
|
results: list[PreQueryResult] = []
|
465
463
|
max_parallel_prequeries = asyncio.Semaphore(settings.prequeries_max_parallel)
|
466
464
|
|
467
|
-
async def _prequery_find(
|
468
|
-
prequery: PreQuery,
|
469
|
-
):
|
465
|
+
async def _prequery_find(prequery: PreQuery, index: int):
|
470
466
|
async with max_parallel_prequeries:
|
467
|
+
prequery_id = prequery.id or f"prequery-{index}"
|
471
468
|
find_results, _, _ = await find(
|
472
469
|
kbid,
|
473
470
|
prequery.request,
|
474
471
|
x_ndb_client,
|
475
472
|
x_nucliadb_user,
|
476
473
|
x_forwarded_for,
|
477
|
-
metrics=metrics,
|
474
|
+
metrics=metrics.child_span(prequery_id),
|
478
475
|
)
|
479
476
|
return prequery, find_results
|
480
477
|
|
481
478
|
ops = []
|
482
|
-
for prequery in prequeries:
|
483
|
-
ops.append(asyncio.create_task(_prequery_find(prequery)))
|
479
|
+
for idx, prequery in enumerate(prequeries):
|
480
|
+
ops.append(asyncio.create_task(_prequery_find(prequery, idx)))
|
484
481
|
ops_results = await asyncio.gather(*ops)
|
485
482
|
for prequery, find_results in ops_results:
|
486
483
|
results.append((prequery, find_results))
|
nucliadb/search/search/find.py
CHANGED
@@ -34,7 +34,7 @@ from nucliadb.search.search.hydrator import (
|
|
34
34
|
TextBlockHydrationOptions,
|
35
35
|
)
|
36
36
|
from nucliadb.search.search.metrics import (
|
37
|
-
|
37
|
+
Metrics,
|
38
38
|
)
|
39
39
|
from nucliadb.search.search.query_parser.models import ParsedQuery
|
40
40
|
from nucliadb.search.search.query_parser.parsers import parse_find
|
@@ -64,7 +64,7 @@ async def find(
|
|
64
64
|
x_ndb_client: NucliaDBClientType,
|
65
65
|
x_nucliadb_user: str,
|
66
66
|
x_forwarded_for: str,
|
67
|
-
metrics:
|
67
|
+
metrics: Metrics,
|
68
68
|
) -> tuple[KnowledgeboxFindResults, bool, ParsedQuery]:
|
69
69
|
external_index_manager = await get_external_index_manager(kbid=kbid)
|
70
70
|
if external_index_manager is not None:
|
@@ -85,7 +85,7 @@ async def _index_node_retrieval(
|
|
85
85
|
x_ndb_client: NucliaDBClientType,
|
86
86
|
x_nucliadb_user: str,
|
87
87
|
x_forwarded_for: str,
|
88
|
-
metrics:
|
88
|
+
metrics: Metrics,
|
89
89
|
) -> tuple[KnowledgeboxFindResults, bool, ParsedQuery]:
|
90
90
|
audit = get_audit()
|
91
91
|
start_time = time()
|
@@ -104,7 +104,7 @@ async def _index_node_retrieval(
|
|
104
104
|
rephrased_query,
|
105
105
|
) = await legacy_convert_retrieval_to_proto(parsed)
|
106
106
|
|
107
|
-
with metrics.time("
|
107
|
+
with metrics.time("index_search"):
|
108
108
|
results, query_incomplete_results, queried_shards = await node_query(
|
109
109
|
kbid, Method.SEARCH, pb_query
|
110
110
|
)
|
@@ -142,8 +142,8 @@ async def _index_node_retrieval(
|
|
142
142
|
search_results.shards = queried_shards
|
143
143
|
search_results.autofilters = autofilters
|
144
144
|
|
145
|
-
ndb_time = metrics
|
146
|
-
if metrics
|
145
|
+
ndb_time = metrics["index_search"] + metrics["results_merge"]
|
146
|
+
if metrics["index_search"] > settings.slow_node_query_log_threshold:
|
147
147
|
logger.warning(
|
148
148
|
"Slow nidx query",
|
149
149
|
extra={
|
@@ -152,7 +152,7 @@ async def _index_node_retrieval(
|
|
152
152
|
"client": x_ndb_client,
|
153
153
|
"query": item.model_dump_json(),
|
154
154
|
"time": search_time,
|
155
|
-
"
|
155
|
+
"metrics": metrics.to_dict(),
|
156
156
|
},
|
157
157
|
)
|
158
158
|
elif ndb_time > settings.slow_find_log_threshold:
|
@@ -164,7 +164,7 @@ async def _index_node_retrieval(
|
|
164
164
|
"client": x_ndb_client,
|
165
165
|
"query": item.model_dump_json(),
|
166
166
|
"time": search_time,
|
167
|
-
"
|
167
|
+
"metrics": metrics.to_dict(),
|
168
168
|
},
|
169
169
|
)
|
170
170
|
|
@@ -43,7 +43,7 @@ from nucliadb.search.search.find_merge import (
|
|
43
43
|
hydrate_and_rerank,
|
44
44
|
)
|
45
45
|
from nucliadb.search.search.hydrator import ResourceHydrationOptions, TextBlockHydrationOptions
|
46
|
-
from nucliadb.search.search.metrics import
|
46
|
+
from nucliadb.search.search.metrics import Metrics
|
47
47
|
from nucliadb.search.search.rerankers import (
|
48
48
|
Reranker,
|
49
49
|
RerankingOptions,
|
@@ -305,8 +305,8 @@ async def get_graph_results(
|
|
305
305
|
origin: str,
|
306
306
|
graph_strategy: GraphStrategy,
|
307
307
|
text_block_reranker: Reranker,
|
308
|
+
metrics: Metrics,
|
308
309
|
generative_model: Optional[str] = None,
|
309
|
-
metrics: RAGMetrics = RAGMetrics(),
|
310
310
|
shards: Optional[list[str]] = None,
|
311
311
|
) -> tuple[KnowledgeboxFindResults, FindRequest]:
|
312
312
|
relations = Relations(entities={})
|
@@ -19,7 +19,7 @@
|
|
19
19
|
#
|
20
20
|
import contextlib
|
21
21
|
import time
|
22
|
-
from typing import Optional
|
22
|
+
from typing import Any, Optional, Union
|
23
23
|
|
24
24
|
from nucliadb_telemetry import metrics
|
25
25
|
|
@@ -58,27 +58,55 @@ rag_histogram = metrics.Histogram(
|
|
58
58
|
buckets=buckets,
|
59
59
|
)
|
60
60
|
|
61
|
+
MetricsData = dict[str, Union[int, float]]
|
61
62
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
self.
|
66
|
-
self.
|
67
|
-
self.
|
63
|
+
|
64
|
+
class Metrics:
|
65
|
+
def __init__(self: "Metrics", id: str):
|
66
|
+
self.id = id
|
67
|
+
self.child_spans: list[Metrics] = []
|
68
|
+
self._metrics: MetricsData = {}
|
68
69
|
|
69
70
|
@contextlib.contextmanager
|
70
71
|
def time(self, step: str):
|
71
|
-
|
72
|
+
start_time = time.monotonic()
|
72
73
|
try:
|
73
74
|
yield
|
74
75
|
finally:
|
75
|
-
|
76
|
+
elapsed = time.monotonic() - start_time
|
77
|
+
self._metrics[step] = elapsed
|
78
|
+
rag_histogram.observe(elapsed, labels={"step": step})
|
79
|
+
|
80
|
+
def child_span(self, id: str) -> "Metrics":
|
81
|
+
child_span = Metrics(id)
|
82
|
+
self.child_spans.append(child_span)
|
83
|
+
return child_span
|
84
|
+
|
85
|
+
def set(self, key: str, value: Union[int, float]):
|
86
|
+
self._metrics[key] = value
|
87
|
+
|
88
|
+
def get(self, key: str) -> Optional[Union[int, float]]:
|
89
|
+
return self._metrics.get(key)
|
90
|
+
|
91
|
+
def to_dict(self) -> MetricsData:
|
92
|
+
return self._metrics
|
76
93
|
|
77
|
-
def
|
78
|
-
|
94
|
+
def dump(self) -> dict[str, Any]:
|
95
|
+
result = {}
|
96
|
+
for child in self.child_spans:
|
97
|
+
result.update(child.dump())
|
98
|
+
result[self.id] = self.to_dict()
|
99
|
+
return result
|
79
100
|
|
80
|
-
def
|
81
|
-
return self.
|
101
|
+
def __getitem__(self, key: str) -> Union[int, float]:
|
102
|
+
return self._metrics[key]
|
103
|
+
|
104
|
+
|
105
|
+
class AskMetrics(Metrics):
|
106
|
+
def __init__(self: "AskMetrics"):
|
107
|
+
super().__init__(id="ask")
|
108
|
+
self.global_start = time.monotonic()
|
109
|
+
self.first_chunk_yielded_at: Optional[float] = None
|
82
110
|
|
83
111
|
def record_first_chunk_yielded(self):
|
84
112
|
self.first_chunk_yielded_at = time.monotonic()
|
@@ -88,11 +116,3 @@ class RAGMetrics:
|
|
88
116
|
if self.first_chunk_yielded_at is None:
|
89
117
|
return None
|
90
118
|
return self.first_chunk_yielded_at - self.global_start
|
91
|
-
|
92
|
-
def _start(self, step: str):
|
93
|
-
self._start_times[step] = time.monotonic()
|
94
|
-
|
95
|
-
def _end(self, step: str):
|
96
|
-
self._end_times[step] = time.monotonic()
|
97
|
-
elapsed = self.elapsed(step)
|
98
|
-
rag_histogram.observe(elapsed, labels={"step": step})
|
@@ -21,7 +21,7 @@ from enum import Enum
|
|
21
21
|
from typing import Any, Optional, Union
|
22
22
|
|
23
23
|
from fastapi.datastructures import QueryParams
|
24
|
-
from fastapi.responses import
|
24
|
+
from fastapi.responses import Response, StreamingResponse
|
25
25
|
|
26
26
|
from nucliadb.common import datamanagers
|
27
27
|
from nucliadb.search.predict import PredictEngine
|
@@ -42,20 +42,25 @@ class PredictProxiedEndpoints(str, Enum):
|
|
42
42
|
REMI = "remi"
|
43
43
|
|
44
44
|
|
45
|
+
ALLOWED_HEADERS = [
|
46
|
+
"Accept", # To allow 'application/x-ndjson' on the /chat endpoint
|
47
|
+
]
|
48
|
+
|
49
|
+
|
45
50
|
async def predict_proxy(
|
46
51
|
kbid: str,
|
47
52
|
endpoint: PredictProxiedEndpoints,
|
48
53
|
method: str,
|
49
54
|
params: QueryParams,
|
50
55
|
json: Optional[Any] = None,
|
51
|
-
|
56
|
+
headers: dict[str, str] = {},
|
57
|
+
) -> Union[Response, StreamingResponse]:
|
52
58
|
if not await exists_kb(kbid=kbid):
|
53
59
|
raise datamanagers.exceptions.KnowledgeBoxNotFound()
|
54
60
|
|
55
61
|
predict: PredictEngine = get_predict()
|
56
|
-
|
57
|
-
|
58
|
-
headers = predict.get_predict_headers(kbid)
|
62
|
+
predict_headers = predict.get_predict_headers(kbid)
|
63
|
+
user_headers = {k: v for k, v in headers.items() if k.capitalize() in ALLOWED_HEADERS}
|
59
64
|
|
60
65
|
# Proxy the request to predict API
|
61
66
|
predict_response = await predict.make_request(
|
@@ -63,22 +68,24 @@ async def predict_proxy(
|
|
63
68
|
url=predict.get_predict_url(endpoint, kbid),
|
64
69
|
json=json,
|
65
70
|
params=params,
|
66
|
-
headers=
|
71
|
+
headers={**user_headers, **predict_headers},
|
67
72
|
)
|
68
73
|
|
69
74
|
# Proxy the response back to the client
|
70
75
|
status_code = predict_response.status
|
71
|
-
|
76
|
+
media_type = predict_response.headers.get("Content-Type")
|
77
|
+
response: Union[Response, StreamingResponse]
|
72
78
|
if predict_response.headers.get("Transfer-Encoding") == "chunked":
|
73
79
|
response = StreamingResponse(
|
74
80
|
content=predict_response.content.iter_any(),
|
75
81
|
status_code=status_code,
|
76
|
-
media_type=
|
82
|
+
media_type=media_type,
|
77
83
|
)
|
78
84
|
else:
|
79
|
-
response =
|
80
|
-
content=await predict_response.
|
85
|
+
response = Response(
|
86
|
+
content=await predict_response.read(),
|
81
87
|
status_code=status_code,
|
88
|
+
media_type=media_type,
|
82
89
|
)
|
83
90
|
nuclia_learning_id = predict_response.headers.get("NUCLIA-LEARNING-ID")
|
84
91
|
if nuclia_learning_id:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: nucliadb
|
3
|
-
Version: 6.4.0.
|
3
|
+
Version: 6.4.0.post4265
|
4
4
|
Summary: NucliaDB
|
5
5
|
Author-email: Nuclia <nucliadb@nuclia.com>
|
6
6
|
License: AGPL
|
@@ -20,11 +20,11 @@ Classifier: Programming Language :: Python :: 3.12
|
|
20
20
|
Classifier: Programming Language :: Python :: 3 :: Only
|
21
21
|
Requires-Python: <4,>=3.9
|
22
22
|
Description-Content-Type: text/markdown
|
23
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.4.0.
|
24
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.4.0.
|
25
|
-
Requires-Dist: nucliadb-protos>=6.4.0.
|
26
|
-
Requires-Dist: nucliadb-models>=6.4.0.
|
27
|
-
Requires-Dist: nidx-protos>=6.4.0.
|
23
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.4.0.post4265
|
24
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.4.0.post4265
|
25
|
+
Requires-Dist: nucliadb-protos>=6.4.0.post4265
|
26
|
+
Requires-Dist: nucliadb-models>=6.4.0.post4265
|
27
|
+
Requires-Dist: nidx-protos>=6.4.0.post4265
|
28
28
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
29
29
|
Requires-Dist: nuclia-models>=0.24.2
|
30
30
|
Requires-Dist: uvicorn[standard]
|
@@ -204,7 +204,7 @@ nucliadb/search/__init__.py,sha256=tnypbqcH4nBHbGpkINudhKgdLKpwXQCvDtPchUlsyY4,1
|
|
204
204
|
nucliadb/search/app.py,sha256=-WEX1AZRA8R_9aeOo9ovOTwjXW_7VfwWN7N2ccSoqXg,3387
|
205
205
|
nucliadb/search/lifecycle.py,sha256=hiylV-lxsAWkqTCulXBg0EIfMQdejSr8Zar0L_GLFT8,2218
|
206
206
|
nucliadb/search/openapi.py,sha256=t3Wo_4baTrfPftg2BHsyLWNZ1MYn7ZRdW7ht-wFOgRs,1016
|
207
|
-
nucliadb/search/predict.py,sha256=
|
207
|
+
nucliadb/search/predict.py,sha256=BYkKL2-3-MNT8JnE7y7XTEMKMnynUm2y4VJZP1jRjdQ,22987
|
208
208
|
nucliadb/search/predict_models.py,sha256=ZAe0dneUsPmV9uBar57cCFADCGOrYDsJHuqKlA5zWag,5937
|
209
209
|
nucliadb/search/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
210
210
|
nucliadb/search/run.py,sha256=aFb-CXRi_C8YMpP_ivNj8KW1BYhADj88y8K9Lr_nUPI,1402
|
@@ -215,10 +215,10 @@ nucliadb/search/api/v1/__init__.py,sha256=DH16OYnw9jQ38OpKlmdXeoq2j40ZPXZRtGvClK
|
|
215
215
|
nucliadb/search/api/v1/ask.py,sha256=b4tz33HNsfT5DXv_2DMc_jirnFsHuobreWkbAKkzj5o,5337
|
216
216
|
nucliadb/search/api/v1/catalog.py,sha256=Nw4wIj4AjGp-p64FFVQFN4v2LFcV3A0UJIxfo3_XGmY,7670
|
217
217
|
nucliadb/search/api/v1/feedback.py,sha256=kNLc4dHz2SXHzV0PwC1WiRAwY88fDptPcP-kO0q-FrQ,2620
|
218
|
-
nucliadb/search/api/v1/find.py,sha256=
|
218
|
+
nucliadb/search/api/v1/find.py,sha256=C4sTGFRS9tQFF8v1zhnHQvnExJoGDYi78bZTRfwhGrc,10831
|
219
219
|
nucliadb/search/api/v1/graph.py,sha256=ItVpzJbqfDLjoIo2fTb2mKGCM1Z34sx7CBb3gNmj6IQ,4274
|
220
220
|
nucliadb/search/api/v1/knowledgebox.py,sha256=e9xeLPUqnQTx33i4A8xuV93ENvtJGrpjPlLRbGJtAI8,8415
|
221
|
-
nucliadb/search/api/v1/predict_proxy.py,sha256=
|
221
|
+
nucliadb/search/api/v1/predict_proxy.py,sha256=Q03ZTvWp7Sq0x71t5Br4LHxTiYsRd6-GCb4YuKqhynM,3131
|
222
222
|
nucliadb/search/api/v1/router.py,sha256=mtT07rBZcVfpa49doaw9b1tj3sdi3qLH0gn9Io6NYM0,988
|
223
223
|
nucliadb/search/api/v1/search.py,sha256=Or-mUvmBAyh0Y55NqTYNXe_BWR0lLLaTSL2ChjJaE2M,12402
|
224
224
|
nucliadb/search/api/v1/suggest.py,sha256=Em7ApddZNHMHjL_ZfXmUIVUk504f58J96JlxJXnIxaM,6438
|
@@ -237,17 +237,17 @@ nucliadb/search/search/cut.py,sha256=ytY0_GY7ocNjfxTb4aosxEp4ZfhQNDP--JkhEMGD298
|
|
237
237
|
nucliadb/search/search/exceptions.py,sha256=klGLgAGGrXcSGix_W6418ZBMqDchAIGjN77ofkOScEI,1039
|
238
238
|
nucliadb/search/search/fetch.py,sha256=eiljOKim-4OOEZn-3fyVZSYxztCH156BXYdqlIwVdN4,6181
|
239
239
|
nucliadb/search/search/filters.py,sha256=1MkHlJjAQqoRCj7e5cEzK2HvBxGLE17I_omsjiklbtw,6476
|
240
|
-
nucliadb/search/search/find.py,sha256=
|
240
|
+
nucliadb/search/search/find.py,sha256=i1auc8visRakBwbbZGhyQgXNAmsaAVheisYi2xGjdKY,7925
|
241
241
|
nucliadb/search/search/find_merge.py,sha256=c-7IlfjfdmWAvQOyM7IO3bKS1EQpnR4oi6pN6mwrQKw,19815
|
242
242
|
nucliadb/search/search/graph_merge.py,sha256=y5V7X-BhjHsKDXE69tzQLIIKGm4XuaFrZXw0odcHVNM,3402
|
243
|
-
nucliadb/search/search/graph_strategy.py,sha256=
|
243
|
+
nucliadb/search/search/graph_strategy.py,sha256=zYfi1df982ZYOFtYSksnHEJvQn-ZZsCIFSruVZP_934,32891
|
244
244
|
nucliadb/search/search/hydrator.py,sha256=-R37gCrGxkyaiHQalnTWHNG_FCx11Zucd7qA1vQCxuw,6985
|
245
245
|
nucliadb/search/search/ingestion_agents.py,sha256=NeJr4EEX-bvFFMGvXOOwLv8uU7NuQ-ntJnnrhnKfMzY,3174
|
246
246
|
nucliadb/search/search/merge.py,sha256=Abg9YblQJvH2jDvXVT45MNxaIpNa7TTpsiUSJqb3NDc,23307
|
247
|
-
nucliadb/search/search/metrics.py,sha256=
|
247
|
+
nucliadb/search/search/metrics.py,sha256=3I6IN0qDSmqIvUaWJmT3rt-Jyjs6LcvnKI8ZqCiuJPY,3501
|
248
248
|
nucliadb/search/search/paragraphs.py,sha256=pNAEiYqJGGUVcEf7xf-PFMVqz0PX4Qb-WNG-_zPGN2o,7799
|
249
249
|
nucliadb/search/search/pgcatalog.py,sha256=s_J98fsX_RuFXwpejpkGqG-tD9ELuzz4YQ6U3ew5h2g,9313
|
250
|
-
nucliadb/search/search/predict_proxy.py,sha256=
|
250
|
+
nucliadb/search/search/predict_proxy.py,sha256=JwgBeEg1j4LnCjPCvTUrnmOd9LceJAt3iAu4m9cmJBo,3390
|
251
251
|
nucliadb/search/search/query.py,sha256=-gvKsyGmKYpsoEVzKkq3HJUMcs_3LD3TYUueOcJsTec,11511
|
252
252
|
nucliadb/search/search/rank_fusion.py,sha256=xZtXhbmKb_56gs73u6KkFm2efvTATOSMmpOV2wrAIqE,9613
|
253
253
|
nucliadb/search/search/rerankers.py,sha256=PvhExUb8zZYghiFHRgGotw6h6bU--Rft09wE8arvtAw,7424
|
@@ -255,11 +255,11 @@ nucliadb/search/search/shards.py,sha256=mc5DK-MoCv9AFhlXlOFHbPvetcyNDzTFOJ5rimK8
|
|
255
255
|
nucliadb/search/search/summarize.py,sha256=ksmYPubEQvAQgfPdZHfzB_rR19B2ci4IYZ6jLdHxZo8,4996
|
256
256
|
nucliadb/search/search/utils.py,sha256=ajRIXfdTF67dBVahQCXW-rSv6gJpUMPt3QhJrWqArTQ,2175
|
257
257
|
nucliadb/search/search/chat/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
258
|
-
nucliadb/search/search/chat/ask.py,sha256=
|
258
|
+
nucliadb/search/search/chat/ask.py,sha256=tkU431_tZpFQ8Au9RpGrHO78D0vCKE-VaBT5BNTL0pA,37393
|
259
259
|
nucliadb/search/search/chat/exceptions.py,sha256=Siy4GXW2L7oPhIR86H3WHBhE9lkV4A4YaAszuGGUf54,1356
|
260
260
|
nucliadb/search/search/chat/images.py,sha256=PA8VWxT5_HUGfW1ULhKTK46UBsVyINtWWqEM1ulzX1E,3095
|
261
261
|
nucliadb/search/search/chat/prompt.py,sha256=Jnja-Ss7skgnnDY8BymVfdeYsFPnIQFL8tEvcRXTKUE,47356
|
262
|
-
nucliadb/search/search/chat/query.py,sha256=
|
262
|
+
nucliadb/search/search/chat/query.py,sha256=yK7J8vwJT7gKeBPn21uYqQw0tEJJ_HRtGLSt2bnJEuQ,16895
|
263
263
|
nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
264
264
|
nucliadb/search/search/query_parser/exceptions.py,sha256=szAOXUZ27oNY-OSa9t2hQ5HHkQQC0EX1FZz_LluJHJE,1224
|
265
265
|
nucliadb/search/search/query_parser/fetcher.py,sha256=SkvBRDfSKmuz-QygNKLAU4AhZhhDo1dnOZmt1zA28RA,16851
|
@@ -368,8 +368,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
|
|
368
368
|
nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
|
369
369
|
nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
|
370
370
|
nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
|
371
|
-
nucliadb-6.4.0.
|
372
|
-
nucliadb-6.4.0.
|
373
|
-
nucliadb-6.4.0.
|
374
|
-
nucliadb-6.4.0.
|
375
|
-
nucliadb-6.4.0.
|
371
|
+
nucliadb-6.4.0.post4265.dist-info/METADATA,sha256=DdUvK3SLFsfNpySAdkMKFq9721wSRZ-R0n7fFHGEpxQ,4223
|
372
|
+
nucliadb-6.4.0.post4265.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
|
373
|
+
nucliadb-6.4.0.post4265.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
|
374
|
+
nucliadb-6.4.0.post4265.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
|
375
|
+
nucliadb-6.4.0.post4265.dist-info/RECORD,,
|
File without changes
|
File without changes
|