nucliadb 6.5.1.post4539__py3-none-any.whl → 6.6.1.post649__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,90 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ """Migration #38
22
+
23
+ Backfill the catalog with labels from fields metadata
24
+
25
+ """
26
+
27
+ import logging
28
+ from typing import cast
29
+
30
+ from nucliadb.common import datamanagers
31
+ from nucliadb.common.maindb.pg import PGDriver, PGTransaction
32
+ from nucliadb.ingest.orm.index_message import get_resource_index_message
33
+ from nucliadb.ingest.orm.processor.pgcatalog import pgcatalog_update
34
+ from nucliadb.migrator.context import ExecutionContext
35
+ from nucliadb_protos import resources_pb2
36
+
37
+ logger = logging.getLogger(__name__)
38
+
39
+
40
+ async def migrate(context: ExecutionContext) -> None: ...
41
+
42
+
43
+ async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
44
+ if not isinstance(context.kv_driver, PGDriver):
45
+ return
46
+
47
+ BATCH_SIZE = 100
48
+ async with context.kv_driver.transaction() as txn:
49
+ txn = cast(PGTransaction, txn)
50
+ start = ""
51
+ while True:
52
+ async with txn.connection.cursor() as cur:
53
+ # Get list of resources except those already in the catalog
54
+ await cur.execute(
55
+ """
56
+ SELECT key, value FROM resources
57
+ WHERE key ~ ('^/kbs/' || %s || '/r/[^/]*$')
58
+ AND key > %s
59
+ ORDER BY key
60
+ LIMIT %s""",
61
+ (kbid, start, BATCH_SIZE),
62
+ )
63
+
64
+ to_index = []
65
+ rows = await cur.fetchall()
66
+ if len(rows) == 0:
67
+ return
68
+ for key, basic_pb in rows:
69
+ start = key
70
+
71
+ # Only reindex resources with labels in field computed metadata
72
+ basic = resources_pb2.Basic()
73
+ basic.ParseFromString(basic_pb)
74
+ if basic.computedmetadata.field_classifications:
75
+ to_index.append(key)
76
+
77
+ logger.info(f"Reindexing {len(to_index)} catalog entries from {start}")
78
+ # Index each resource
79
+ for key in to_index:
80
+ rid = key.split("/")[4]
81
+ resource = await datamanagers.resources.get_resource(txn, kbid=kbid, rid=rid)
82
+ if resource is None:
83
+ logger.warning(f"Could not load resource {rid} for kbid {kbid}")
84
+ continue
85
+
86
+ index_message = await get_resource_index_message(resource, reindex=False)
87
+ await pgcatalog_update(txn, kbid, resource, index_message)
88
+
89
+ if to_index:
90
+ await txn.commit()
@@ -65,6 +65,21 @@ async def pgcatalog_update(txn: Transaction, kbid: str, resource: Resource, inde
65
65
  modified_at = created_at
66
66
 
67
67
  async with _pg_transaction(txn).connection.cursor() as cur:
68
+ # Do not index canceled labels
69
+ cancelled_labels = {
70
+ f"/l/{clf.labelset}/{clf.label}"
71
+ for clf in resource.basic.usermetadata.classifications
72
+ if clf.cancelled_by_user
73
+ }
74
+
75
+ # Labels from the resource and classification labels from each field
76
+ labels = [label for label in index_message.labels]
77
+ for classification in resource.basic.computedmetadata.field_classifications:
78
+ for clf in classification.classifications:
79
+ label = f"/l/{clf.labelset}/{clf.label}"
80
+ if label not in cancelled_labels:
81
+ labels.append(label)
82
+
68
83
  await cur.execute(
69
84
  """
70
85
  INSERT INTO catalog
@@ -83,7 +98,7 @@ async def pgcatalog_update(txn: Transaction, kbid: str, resource: Resource, inde
83
98
  "title": resource.basic.title,
84
99
  "created_at": created_at,
85
100
  "modified_at": modified_at,
86
- "labels": list(index_message.labels),
101
+ "labels": labels,
87
102
  "slug": resource.basic.slug,
88
103
  },
89
104
  )
@@ -99,7 +114,7 @@ async def pgcatalog_update(txn: Transaction, kbid: str, resource: Resource, inde
99
114
  {
100
115
  "kbid": resource.kb.kbid,
101
116
  "rid": resource.uuid,
102
- "facets": list(extract_facets(index_message.labels)),
117
+ "facets": list(extract_facets(labels)),
103
118
  },
104
119
  )
105
120
 
@@ -602,7 +602,7 @@ class Resource:
602
602
  FieldType.LINK,
603
603
  load=False,
604
604
  )
605
- maybe_update_basic_thumbnail(self.basic, link_extracted_data.link_thumbnail)
605
+ maybe_update_basic_thumbnail(self.basic, link_extracted_data.link_thumbnail, self.kb.kbid)
606
606
 
607
607
  await field_link.set_link_extracted_data(link_extracted_data)
608
608
 
@@ -661,7 +661,7 @@ class Resource:
661
661
  # uri can change after extraction
662
662
  await field_file.set_file_extracted_data(file_extracted_data)
663
663
  maybe_update_basic_icon(self.basic, file_extracted_data.icon)
664
- maybe_update_basic_thumbnail(self.basic, file_extracted_data.file_thumbnail)
664
+ maybe_update_basic_thumbnail(self.basic, file_extracted_data.file_thumbnail, self.kb.kbid)
665
665
  self.modified = True
666
666
 
667
667
  async def _should_update_resource_title_from_file_metadata(self) -> bool:
@@ -722,7 +722,9 @@ class Resource:
722
722
  )
723
723
  await field_obj.set_field_metadata(field_metadata)
724
724
 
725
- maybe_update_basic_thumbnail(self.basic, field_metadata.metadata.metadata.thumbnail)
725
+ maybe_update_basic_thumbnail(
726
+ self.basic, field_metadata.metadata.metadata.thumbnail, self.kb.kbid
727
+ )
726
728
 
727
729
  update_basic_computedmetadata_classifications(self.basic, field_metadata)
728
730
  self.modified = True
@@ -879,13 +881,23 @@ def maybe_update_basic_icon(basic: PBBasic, mimetype: Optional[str]) -> bool:
879
881
  return True
880
882
 
881
883
 
882
- def maybe_update_basic_thumbnail(basic: PBBasic, thumbnail: Optional[CloudFile]) -> bool:
884
+ def maybe_update_basic_thumbnail(basic: PBBasic, thumbnail: Optional[CloudFile], kbid: str) -> bool:
883
885
  if basic.thumbnail or thumbnail is None:
884
886
  return False
885
887
  basic.thumbnail = CloudLink.format_reader_download_uri(thumbnail.uri)
888
+ fix_kbid_in_thumbnail(basic, kbid)
886
889
  return True
887
890
 
888
891
 
892
+ def fix_kbid_in_thumbnail(basic: PBBasic, kbid: str):
893
+ if basic.thumbnail.startswith("/kb/") and not basic.thumbnail.startswith(f"/kb/{kbid}/"):
894
+ # Replace the kbid in the thumbnail if it doesn't match the current kbid. This is necessary for
895
+ # resources that have been backed up and we are restoring them to a different kbid.
896
+ parts = basic.thumbnail.split("/", 3)
897
+ parts[2] = kbid
898
+ basic.thumbnail = "/".join(parts)
899
+
900
+
889
901
  def update_basic_languages(basic: Basic, languages: list[str]) -> bool:
890
902
  if len(languages) == 0:
891
903
  return False
@@ -25,7 +25,7 @@ import uuid
25
25
  from collections import defaultdict
26
26
  from contextlib import AsyncExitStack
27
27
  from enum import Enum
28
- from typing import Any, Optional, TypeVar
28
+ from typing import Any, Optional
29
29
 
30
30
  import aiohttp
31
31
  import backoff
@@ -49,10 +49,14 @@ from nucliadb_utils.utilities import Utility, clean_utility, get_utility, set_ut
49
49
 
50
50
  logger = logging.getLogger(__name__)
51
51
 
52
- _T = TypeVar("_T")
53
52
 
53
+ class ProcessingAPIUnavailableError(SendToProcessError): ...
54
54
 
55
- RETRIABLE_EXCEPTIONS = (aiohttp.client_exceptions.ClientConnectorError,)
55
+
56
+ RETRIABLE_EXCEPTIONS = (
57
+ aiohttp.client_exceptions.ClientConnectorError,
58
+ ProcessingAPIUnavailableError,
59
+ )
56
60
  MAX_TRIES = 4
57
61
 
58
62
 
@@ -409,6 +413,9 @@ class ProcessingEngine:
409
413
  raise LimitsExceededError(resp.status, data["detail"])
410
414
  elif resp.status == 429:
411
415
  raise LimitsExceededError(resp.status, "Rate limited")
416
+ elif resp.status in (502, 503):
417
+ logger.warning(f"Processing engine is not available, retrying. Status: {resp.status}")
418
+ raise ProcessingAPIUnavailableError()
412
419
  else:
413
420
  error_text = await resp.text()
414
421
  logger.warning(f"Error sending to process: {resp.status} {error_text}")
@@ -59,6 +59,7 @@ async def ask_knowledgebox_endpoint(
59
59
  kbid: str,
60
60
  item: AskRequest,
61
61
  x_ndb_client: NucliaDBClientType = Header(NucliaDBClientType.API),
62
+ x_show_consumption: bool = Header(default=False),
62
63
  x_nucliadb_user: str = Header(""),
63
64
  x_forwarded_for: str = Header(""),
64
65
  x_synchronous: bool = Header(
@@ -97,7 +98,13 @@ async def ask_knowledgebox_endpoint(
97
98
  return HTTPClientError(status_code=422, detail=detail)
98
99
 
99
100
  return await create_ask_response(
100
- kbid, item, x_nucliadb_user, x_ndb_client, x_forwarded_for, x_synchronous
101
+ kbid=kbid,
102
+ ask_request=item,
103
+ user_id=x_nucliadb_user,
104
+ client_type=x_ndb_client,
105
+ origin=x_forwarded_for,
106
+ x_synchronous=x_synchronous,
107
+ extra_predict_headers={"X-Show-Consumption": str(x_show_consumption).lower()},
101
108
  )
102
109
 
103
110
 
@@ -110,6 +117,7 @@ async def create_ask_response(
110
117
  origin: str,
111
118
  x_synchronous: bool,
112
119
  resource: Optional[str] = None,
120
+ extra_predict_headers: Optional[dict[str, str]] = None,
113
121
  ) -> Response:
114
122
  maybe_log_request_payload(kbid, "/ask", ask_request)
115
123
  ask_request.max_tokens = parse_max_tokens(ask_request.max_tokens)
@@ -122,6 +130,7 @@ async def create_ask_response(
122
130
  client_type=client_type,
123
131
  origin=origin,
124
132
  resource=resource,
133
+ extra_predict_headers=extra_predict_headers,
125
134
  )
126
135
  except AnswerJsonSchemaTooLong as err:
127
136
  return HTTPClientError(status_code=400, detail=str(err))
@@ -199,6 +199,8 @@ async def catalog(
199
199
  response_model=CatalogFacetsResponse,
200
200
  response_model_exclude_unset=True,
201
201
  tags=["Search"],
202
+ summary="Show facets in Knowledge Box",
203
+ description="List all facets in a Knowledge Box and how many resources they apply to",
202
204
  include_in_schema=False,
203
205
  )
204
206
  @requires(NucliaDBRoles.READER)
@@ -133,7 +133,8 @@ async def find_knowledgebox(
133
133
  rank_fusion: RankFusionName = fastapi_query(SearchParamDefaults.rank_fusion),
134
134
  reranker: Union[RerankerName, Reranker] = fastapi_query(SearchParamDefaults.reranker),
135
135
  search_configuration: Optional[str] = Query(
136
- default=None, description="Load find parameters from this configuration"
136
+ default=None,
137
+ description="Load find parameters from this configuration. Parameters in the request override parameters from the configuration.",
137
138
  ),
138
139
  x_ndb_client: NucliaDBClientType = Header(NucliaDBClientType.API),
139
140
  x_nucliadb_user: str = Header(""),
@@ -48,6 +48,7 @@ async def resource_ask_endpoint_by_uuid(
48
48
  kbid: str,
49
49
  rid: str,
50
50
  item: AskRequest,
51
+ x_show_consumption: bool = Header(default=False),
51
52
  x_ndb_client: NucliaDBClientType = Header(NucliaDBClientType.API),
52
53
  x_nucliadb_user: str = Header(""),
53
54
  x_forwarded_for: str = Header(""),
@@ -58,13 +59,14 @@ async def resource_ask_endpoint_by_uuid(
58
59
  ),
59
60
  ) -> Union[StreamingResponse, HTTPClientError, Response]:
60
61
  return await create_ask_response(
61
- kbid,
62
- item,
63
- x_nucliadb_user,
64
- x_ndb_client,
65
- x_forwarded_for,
66
- x_synchronous,
62
+ kbid=kbid,
63
+ ask_request=item,
64
+ user_id=x_nucliadb_user,
65
+ client_type=x_ndb_client,
66
+ origin=x_forwarded_for,
67
+ x_synchronous=x_synchronous,
67
68
  resource=rid,
69
+ extra_predict_headers={"X-Show-Consumption": str(x_show_consumption).lower()},
68
70
  )
69
71
 
70
72
 
@@ -83,6 +85,7 @@ async def resource_ask_endpoint_by_slug(
83
85
  kbid: str,
84
86
  slug: str,
85
87
  item: AskRequest,
88
+ x_show_consumption: bool = Header(default=False),
86
89
  x_ndb_client: NucliaDBClientType = Header(NucliaDBClientType.API),
87
90
  x_nucliadb_user: str = Header(""),
88
91
  x_forwarded_for: str = Header(""),
@@ -96,11 +99,12 @@ async def resource_ask_endpoint_by_slug(
96
99
  if resource_id is None:
97
100
  return HTTPClientError(status_code=404, detail="Resource not found")
98
101
  return await create_ask_response(
99
- kbid,
100
- item,
101
- x_nucliadb_user,
102
- x_ndb_client,
103
- x_forwarded_for,
104
- x_synchronous,
102
+ kbid=kbid,
103
+ ask_request=item,
104
+ user_id=x_nucliadb_user,
105
+ client_type=x_ndb_client,
106
+ origin=x_forwarded_for,
107
+ x_synchronous=x_synchronous,
105
108
  resource=resource_id,
109
+ extra_predict_headers={"X-Show-Consumption": str(x_show_consumption).lower()},
106
110
  )
@@ -19,7 +19,7 @@
19
19
  #
20
20
  from typing import Union
21
21
 
22
- from fastapi import Request
22
+ from fastapi import Header, Request
23
23
  from fastapi_versioning import version
24
24
 
25
25
  from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
@@ -47,9 +47,14 @@ async def summarize_endpoint(
47
47
  request: Request,
48
48
  kbid: str,
49
49
  item: SummarizeRequest,
50
+ x_show_consumption: bool = Header(default=False),
50
51
  ) -> Union[SummarizedResponse, HTTPClientError]:
51
52
  try:
52
- return await summarize(kbid, item)
53
+ return await summarize(
54
+ kbid=kbid,
55
+ request=item,
56
+ extra_predict_headers={"X-Show-Consumption": str(x_show_consumption).lower()},
57
+ )
53
58
  except KnowledgeBoxNotFound:
54
59
  return HTTPClientError(status_code=404, detail="Knowledge box not found")
55
60
  except NoResourcesToSummarize:
@@ -293,7 +293,7 @@ class PredictEngine:
293
293
 
294
294
  @predict_observer.wrap({"type": "chat_ndjson"})
295
295
  async def chat_query_ndjson(
296
- self, kbid: str, item: ChatModel
296
+ self, kbid: str, item: ChatModel, extra_headers: Optional[dict[str, str]] = None
297
297
  ) -> tuple[str, str, AsyncGenerator[GenerativeChunk, None]]:
298
298
  """
299
299
  Chat query using the new stream format
@@ -314,7 +314,7 @@ class PredictEngine:
314
314
  "POST",
315
315
  url=self.get_predict_url(CHAT, kbid),
316
316
  json=item.model_dump(),
317
- headers=headers,
317
+ headers={**headers, **(extra_headers or {})},
318
318
  timeout=None,
319
319
  )
320
320
  await self.check_response(kbid, resp, expected_status=200)
@@ -396,7 +396,9 @@ class PredictEngine:
396
396
  return convert_relations(data)
397
397
 
398
398
  @predict_observer.wrap({"type": "summarize"})
399
- async def summarize(self, kbid: str, item: SummarizeModel) -> SummarizedResponse:
399
+ async def summarize(
400
+ self, kbid: str, item: SummarizeModel, extra_headers: Optional[dict[str, str]] = None
401
+ ) -> SummarizedResponse:
400
402
  try:
401
403
  self.check_nua_key_is_configured_for_onprem()
402
404
  except NUAKeyMissingError:
@@ -407,7 +409,7 @@ class PredictEngine:
407
409
  "POST",
408
410
  url=self.get_predict_url(SUMMARIZE, kbid),
409
411
  json=item.model_dump(),
410
- headers=self.get_predict_headers(kbid),
412
+ headers={**self.get_predict_headers(kbid), **(extra_headers or {})},
411
413
  timeout=None,
412
414
  )
413
415
  await self.check_response(kbid, resp, expected_status=200)
@@ -489,7 +491,7 @@ class DummyPredictEngine(PredictEngine):
489
491
  return RephraseResponse(rephrased_query=DUMMY_REPHRASE_QUERY, use_chat_history=None)
490
492
 
491
493
  async def chat_query_ndjson(
492
- self, kbid: str, item: ChatModel
494
+ self, kbid: str, item: ChatModel, extra_headers: Optional[dict[str, str]] = None
493
495
  ) -> tuple[str, str, AsyncGenerator[GenerativeChunk, None]]:
494
496
  self.calls.append(("chat_query_ndjson", item))
495
497
 
@@ -559,7 +561,9 @@ class DummyPredictEngine(PredictEngine):
559
561
  else:
560
562
  return DUMMY_RELATION_NODE
561
563
 
562
- async def summarize(self, kbid: str, item: SummarizeModel) -> SummarizedResponse:
564
+ async def summarize(
565
+ self, kbid: str, item: SummarizeModel, extra_headers: Optional[dict[str, str]] = None
566
+ ) -> SummarizedResponse:
563
567
  self.calls.append(("summarize", (kbid, item)))
564
568
  response = SummarizedResponse(
565
569
  summary="global summary",
@@ -22,6 +22,7 @@ import functools
22
22
  import json
23
23
  from typing import AsyncGenerator, Optional, cast
24
24
 
25
+ from nuclia_models.common.consumption import Consumption
25
26
  from nuclia_models.predict.generative_responses import (
26
27
  CitationsGenerativeResponse,
27
28
  GenerativeChunk,
@@ -83,6 +84,7 @@ from nucliadb_models.search import (
83
84
  ChatModel,
84
85
  ChatOptions,
85
86
  CitationsAskResponseItem,
87
+ ConsumptionResponseItem,
86
88
  DebugAskResponseItem,
87
89
  ErrorAskResponseItem,
88
90
  FindOptions,
@@ -106,6 +108,7 @@ from nucliadb_models.search import (
106
108
  StatusAskResponseItem,
107
109
  SyncAskMetadata,
108
110
  SyncAskResponse,
111
+ TokensDetail,
109
112
  UserPrompt,
110
113
  parse_custom_prompt,
111
114
  parse_rephrase_prompt,
@@ -169,6 +172,7 @@ class AskResult:
169
172
  self._citations: Optional[CitationsGenerativeResponse] = None
170
173
  self._metadata: Optional[MetaGenerativeResponse] = None
171
174
  self._relations: Optional[Relations] = None
175
+ self._consumption: Optional[Consumption] = None
172
176
 
173
177
  @property
174
178
  def status_code(self) -> AnswerStatusCode:
@@ -299,6 +303,20 @@ class AskResult:
299
303
  ),
300
304
  )
301
305
 
306
+ if self._consumption is not None:
307
+ yield ConsumptionResponseItem(
308
+ normalized_tokens=TokensDetail(
309
+ input=self._consumption.normalized_tokens.input,
310
+ output=self._consumption.normalized_tokens.output,
311
+ image=self._consumption.normalized_tokens.image,
312
+ ),
313
+ customer_key_tokens=TokensDetail(
314
+ input=self._consumption.customer_key_tokens.input,
315
+ output=self._consumption.customer_key_tokens.output,
316
+ image=self._consumption.customer_key_tokens.image,
317
+ ),
318
+ )
319
+
302
320
  # Stream out the relations results
303
321
  should_query_relations = (
304
322
  self.ask_request_with_relations and self.status_code == AnswerStatusCode.SUCCESS
@@ -341,6 +359,7 @@ class AskResult:
341
359
  generative_total=self._metadata.timings.get("generative"),
342
360
  ),
343
361
  )
362
+
344
363
  citations = {}
345
364
  if self._citations is not None:
346
365
  citations = self._citations.citations
@@ -373,6 +392,7 @@ class AskResult:
373
392
  prequeries=prequeries_results,
374
393
  citations=citations,
375
394
  metadata=metadata,
395
+ consumption=self._consumption,
376
396
  learning_id=self.nuclia_learning_id or "",
377
397
  augmented_context=self.augmented_context,
378
398
  )
@@ -424,6 +444,8 @@ class AskResult:
424
444
  self._citations = item
425
445
  elif isinstance(item, MetaGenerativeResponse):
426
446
  self._metadata = item
447
+ elif isinstance(item, Consumption):
448
+ self._consumption = item
427
449
  else:
428
450
  logger.warning(
429
451
  f"Unexpected item in predict answer stream: {item}",
@@ -486,6 +508,7 @@ async def ask(
486
508
  client_type: NucliaDBClientType,
487
509
  origin: str,
488
510
  resource: Optional[str] = None,
511
+ extra_predict_headers: Optional[dict[str, str]] = None,
489
512
  ) -> AskResult:
490
513
  metrics = AskMetrics()
491
514
  chat_history = ask_request.chat_history or []
@@ -613,7 +636,9 @@ async def ask(
613
636
  nuclia_learning_id,
614
637
  nuclia_learning_model,
615
638
  predict_answer_stream,
616
- ) = await predict.chat_query_ndjson(kbid, chat_model)
639
+ ) = await predict.chat_query_ndjson(
640
+ kbid=kbid, item=chat_model, extra_headers=extra_predict_headers
641
+ )
617
642
 
618
643
  auditor = ChatAuditor(
619
644
  kbid=kbid,
@@ -69,6 +69,7 @@ from nucliadb_models.search import (
69
69
  RagStrategyName,
70
70
  TableImageStrategy,
71
71
  TextBlockAugmentationType,
72
+ TextPosition,
72
73
  )
73
74
  from nucliadb_protos import resources_pb2
74
75
  from nucliadb_protos.resources_pb2 import ExtractedText, FieldComputedMetadata
@@ -107,6 +108,9 @@ class CappedPromptContext:
107
108
  def __getitem__(self, key: str) -> str:
108
109
  return self.output.__getitem__(key)
109
110
 
111
+ def __contains__(self, key: str) -> bool:
112
+ return key in self.output
113
+
110
114
  def __delitem__(self, key: str) -> None:
111
115
  try:
112
116
  self.output.__delitem__(key)
@@ -395,7 +399,10 @@ def parse_text_block_id(text_block_id: str) -> TextBlockId:
395
399
 
396
400
 
397
401
  async def extend_prompt_context_with_origin_metadata(
398
- context, kbid, text_block_ids: list[TextBlockId], augmented_context: AugmentedContext
402
+ context: CappedPromptContext,
403
+ kbid,
404
+ text_block_ids: list[TextBlockId],
405
+ augmented_context: AugmentedContext,
399
406
  ):
400
407
  async def _get_origin(kbid: str, rid: str) -> tuple[str, Optional[Origin]]:
401
408
  origin = None
@@ -411,7 +418,7 @@ async def extend_prompt_context_with_origin_metadata(
411
418
  rid_to_origin = {rid: origin for rid, origin in origins if origin is not None}
412
419
  for tb_id in text_block_ids:
413
420
  origin = rid_to_origin.get(tb_id.rid)
414
- if origin is not None and tb_id.full() in context.output:
421
+ if origin is not None and tb_id.full() in context:
415
422
  text = context.output.pop(tb_id.full())
416
423
  extended_text = text + f"\n\nDOCUMENT METADATA AT ORIGIN:\n{to_yaml(origin)}"
417
424
  context[tb_id.full()] = extended_text
@@ -424,7 +431,10 @@ async def extend_prompt_context_with_origin_metadata(
424
431
 
425
432
 
426
433
  async def extend_prompt_context_with_classification_labels(
427
- context, kbid, text_block_ids: list[TextBlockId], augmented_context: AugmentedContext
434
+ context: CappedPromptContext,
435
+ kbid: str,
436
+ text_block_ids: list[TextBlockId],
437
+ augmented_context: AugmentedContext,
428
438
  ):
429
439
  async def _get_labels(kbid: str, _id: TextBlockId) -> tuple[TextBlockId, list[tuple[str, str]]]:
430
440
  fid = _id if isinstance(_id, FieldId) else _id.field_id
@@ -449,7 +459,7 @@ async def extend_prompt_context_with_classification_labels(
449
459
  tb_id_to_labels = {tb_id: labels for tb_id, labels in classif_labels if len(labels) > 0}
450
460
  for tb_id in text_block_ids:
451
461
  labels = tb_id_to_labels.get(tb_id)
452
- if labels is not None and tb_id.full() in context.output:
462
+ if labels is not None and tb_id.full() in context:
453
463
  text = context.output.pop(tb_id.full())
454
464
 
455
465
  labels_text = "DOCUMENT CLASSIFICATION LABELS:"
@@ -467,7 +477,10 @@ async def extend_prompt_context_with_classification_labels(
467
477
 
468
478
 
469
479
  async def extend_prompt_context_with_ner(
470
- context, kbid, text_block_ids: list[TextBlockId], augmented_context: AugmentedContext
480
+ context: CappedPromptContext,
481
+ kbid: str,
482
+ text_block_ids: list[TextBlockId],
483
+ augmented_context: AugmentedContext,
471
484
  ):
472
485
  async def _get_ners(kbid: str, _id: TextBlockId) -> tuple[TextBlockId, dict[str, set[str]]]:
473
486
  fid = _id if isinstance(_id, FieldId) else _id.field_id
@@ -494,7 +507,7 @@ async def extend_prompt_context_with_ner(
494
507
  tb_id_to_ners = {tb_id: ners for tb_id, ners in nerss if len(ners) > 0}
495
508
  for tb_id in text_block_ids:
496
509
  ners = tb_id_to_ners.get(tb_id)
497
- if ners is not None and tb_id.full() in context.output:
510
+ if ners is not None and tb_id.full() in context:
498
511
  text = context.output.pop(tb_id.full())
499
512
 
500
513
  ners_text = "DOCUMENT NAMED ENTITIES (NERs):"
@@ -515,7 +528,10 @@ async def extend_prompt_context_with_ner(
515
528
 
516
529
 
517
530
  async def extend_prompt_context_with_extra_metadata(
518
- context, kbid, text_block_ids: list[TextBlockId], augmented_context: AugmentedContext
531
+ context: CappedPromptContext,
532
+ kbid: str,
533
+ text_block_ids: list[TextBlockId],
534
+ augmented_context: AugmentedContext,
519
535
  ):
520
536
  async def _get_extra(kbid: str, rid: str) -> tuple[str, Optional[Extra]]:
521
537
  extra = None
@@ -531,7 +547,7 @@ async def extend_prompt_context_with_extra_metadata(
531
547
  rid_to_extra = {rid: extra for rid, extra in extras if extra is not None}
532
548
  for tb_id in text_block_ids:
533
549
  extra = rid_to_extra.get(tb_id.rid)
534
- if extra is not None and tb_id.full() in context.output:
550
+ if extra is not None and tb_id.full() in context:
535
551
  text = context.output.pop(tb_id.full())
536
552
  extended_text = text + f"\n\nDOCUMENT EXTRA METADATA:\n{to_yaml(extra)}"
537
553
  context[tb_id.full()] = extended_text
@@ -600,7 +616,7 @@ async def field_extension_prompt_context(
600
616
  if tb_id.startswith(field.full()):
601
617
  del context[tb_id]
602
618
  # Add the extracted text of each field to the beginning of the context.
603
- if field.full() not in context.output:
619
+ if field.full() not in context:
604
620
  context[field.full()] = extracted_text
605
621
  augmented_context.fields[field.full()] = AugmentedTextBlock(
606
622
  id=field.full(),
@@ -610,7 +626,7 @@ async def field_extension_prompt_context(
610
626
 
611
627
  # Add the extracted text of each paragraph to the end of the context.
612
628
  for paragraph in ordered_paragraphs:
613
- if paragraph.id not in context.output:
629
+ if paragraph.id not in context:
614
630
  context[paragraph.id] = _clean_paragraph_text(paragraph)
615
631
 
616
632
 
@@ -668,7 +684,7 @@ async def neighbouring_paragraphs_prompt_context(
668
684
  if field_extracted_text is None:
669
685
  continue
670
686
  ptext = _get_paragraph_text(field_extracted_text, pid)
671
- if ptext:
687
+ if ptext and pid.full() not in context:
672
688
  context[pid.full()] = ptext
673
689
 
674
690
  # Now add the neighbouring paragraphs
@@ -702,8 +718,8 @@ async def neighbouring_paragraphs_prompt_context(
702
718
  npid = field_pids[neighbour_index]
703
719
  except IndexError:
704
720
  continue
705
- if npid in retrieved_paragraphs_ids or npid.full() in context.output:
706
- # Already added above
721
+ if npid in retrieved_paragraphs_ids or npid.full() in context:
722
+ # Already added
707
723
  continue
708
724
  ptext = _get_paragraph_text(field_extracted_text, npid)
709
725
  if not ptext:
@@ -712,6 +728,7 @@ async def neighbouring_paragraphs_prompt_context(
712
728
  augmented_context.paragraphs[npid.full()] = AugmentedTextBlock(
713
729
  id=npid.full(),
714
730
  text=ptext,
731
+ position=get_text_position(npid, neighbour_index, field_extracted_metadata),
715
732
  parent=pid.full(),
716
733
  augmentation_type=TextBlockAugmentationType.NEIGHBOURING_PARAGRAPHS,
717
734
  )
@@ -719,6 +736,30 @@ async def neighbouring_paragraphs_prompt_context(
719
736
  metrics.set("neighbouring_paragraphs_ops", len(augmented_context.paragraphs))
720
737
 
721
738
 
739
+ def get_text_position(
740
+ paragraph_id: ParagraphId, index: int, field_metadata: FieldComputedMetadata
741
+ ) -> Optional[TextPosition]:
742
+ if paragraph_id.field_id.subfield_id:
743
+ metadata = field_metadata.split_metadata[paragraph_id.field_id.subfield_id]
744
+ else:
745
+ metadata = field_metadata.metadata
746
+ try:
747
+ pmetadata = metadata.paragraphs[index]
748
+ except IndexError:
749
+ return None
750
+ page_number = None
751
+ if pmetadata.HasField("page"):
752
+ page_number = pmetadata.page.page
753
+ return TextPosition(
754
+ page_number=page_number,
755
+ index=index,
756
+ start=pmetadata.start,
757
+ end=pmetadata.end,
758
+ start_seconds=list(pmetadata.start_seconds),
759
+ end_seconds=list(pmetadata.end_seconds),
760
+ )
761
+
762
+
722
763
  def get_neighbouring_indices(
723
764
  index: int, before: int, after: int, field_pids: list[ParagraphId]
724
765
  ) -> list[int]:
@@ -742,7 +783,8 @@ async def conversation_prompt_context(
742
783
  storage = await get_storage()
743
784
  kb = KnowledgeBoxORM(txn, storage, kbid)
744
785
  for paragraph in ordered_paragraphs:
745
- context[paragraph.id] = _clean_paragraph_text(paragraph)
786
+ if paragraph.id not in context:
787
+ context[paragraph.id] = _clean_paragraph_text(paragraph)
746
788
 
747
789
  # If the paragraph is a conversation and it matches semantically, we assume we
748
790
  # have matched with the question, therefore try to include the answer to the
@@ -780,7 +822,7 @@ async def conversation_prompt_context(
780
822
  text = message.content.text.strip()
781
823
  pid = f"{rid}/{field_type}/{field_id}/{ident}/0-{len(text) + 1}"
782
824
  attachments.extend(message.content.attachments_fields)
783
- if pid in context.output:
825
+ if pid in context:
784
826
  continue
785
827
  context[pid] = text
786
828
  augmented_context.paragraphs[pid] = AugmentedTextBlock(
@@ -802,7 +844,7 @@ async def conversation_prompt_context(
802
844
  text = message.content.text.strip()
803
845
  attachments.extend(message.content.attachments_fields)
804
846
  pid = f"{rid}/{field_type}/{field_id}/{ident}/0-{len(text) + 1}"
805
- if pid in context.output:
847
+ if pid in context:
806
848
  continue
807
849
  context[pid] = text
808
850
  augmented_context.paragraphs[pid] = AugmentedTextBlock(
@@ -834,7 +876,7 @@ async def conversation_prompt_context(
834
876
  text = message.content.text.strip()
835
877
  attachments.extend(message.content.attachments_fields)
836
878
  pid = f"{rid}/{field_type}/{field_id}/{message.ident}/0-{len(message.content.text) + 1}"
837
- if pid in context.output:
879
+ if pid in context:
838
880
  continue
839
881
  context[pid] = text
840
882
  augmented_context.paragraphs[pid] = AugmentedTextBlock(
@@ -854,7 +896,7 @@ async def conversation_prompt_context(
854
896
  extracted_text = await field.get_extracted_text()
855
897
  if extracted_text is not None:
856
898
  pid = f"{rid}/{field_type}/{attachment.field_id}/0-{len(extracted_text.text) + 1}"
857
- if pid in context.output:
899
+ if pid in context:
858
900
  continue
859
901
  text = f"Attachment {attachment.field_id}: {extracted_text.text}\n\n"
860
902
  context[pid] = text
@@ -977,9 +1019,9 @@ async def hierarchy_prompt_context(
977
1019
  paragraph_text = _clean_paragraph_text(paragraph)
978
1020
  context[paragraph.id] = paragraph_text
979
1021
  if paragraph.id in augmented_paragraphs:
980
- field_id = ParagraphId.from_string(paragraph.id).field_id.full()
981
- augmented_context.fields[field_id] = AugmentedTextBlock(
982
- id=field_id, text=paragraph_text, augmentation_type=TextBlockAugmentationType.HIERARCHY
1022
+ pid = ParagraphId.from_string(paragraph.id)
1023
+ augmented_context.paragraphs[pid.full()] = AugmentedTextBlock(
1024
+ id=pid.full(), text=paragraph_text, augmentation_type=TextBlockAugmentationType.HIERARCHY
983
1025
  )
984
1026
  return
985
1027
 
@@ -168,7 +168,7 @@ def _prepare_query_search(query: search_models.CatalogQuery, params: dict[str, A
168
168
  # executed per query is not a problem.
169
169
 
170
170
  # Remove zero-length words from the split
171
- params["query"] = [word for word in SPLIT_REGEX.split(query.query) if word]
171
+ params["query"] = [word.lower() for word in SPLIT_REGEX.split(query.query) if word]
172
172
  return sql.SQL("regexp_split_to_array(lower(title), '\\W') @> %(query)s")
173
173
  elif query.match == search_models.CatalogQueryMatch.Fuzzy:
174
174
  params["query"] = query.query
@@ -19,7 +19,7 @@
19
19
  #
20
20
  import json
21
21
  from enum import Enum
22
- from typing import Any, AsyncIterable, Optional, Union
22
+ from typing import Any, Optional, Union
23
23
 
24
24
  import aiohttp
25
25
  from fastapi.datastructures import QueryParams
@@ -63,6 +63,7 @@ class PredictProxiedEndpoints(str, Enum):
63
63
 
64
64
  ALLOWED_HEADERS = [
65
65
  "Accept", # To allow 'application/x-ndjson' on the /chat endpoint
66
+ "X-show-consumption", # To show token consumption in the response
66
67
  ]
67
68
 
68
69
  PREDICT_ANSWER_METRIC = "predict_answer_proxy_metric"
@@ -171,21 +172,13 @@ async def chat_streaming_generator(
171
172
  user_query: str,
172
173
  is_json: bool,
173
174
  ):
174
- stream: AsyncIterable[bytes]
175
- if is_json:
176
- # ndjson: stream lines
177
- stream = predict_response.content
178
- else:
179
- # plain text: stream chunks (last chunk is status)
180
- stream = predict_response.content.iter_any()
181
-
182
175
  first = True
183
176
  status_code = AnswerStatusCode.ERROR.value
184
177
  text_answer = ""
185
178
  json_object = None
186
179
  metrics = AskMetrics()
187
180
  with metrics.time(PREDICT_ANSWER_METRIC):
188
- async for chunk in stream:
181
+ async for chunk in predict_response.content:
189
182
  if first:
190
183
  metrics.record_first_chunk_yielded()
191
184
  first = False
@@ -211,7 +204,11 @@ async def chat_streaming_generator(
211
204
 
212
205
  if is_json is False and chunk: # Ensure chunk is not empty before decoding
213
206
  # If response is text the status_code comes at the last chunk of data
214
- status_code = chunk.decode()
207
+ last_chunk = chunk.decode()
208
+ if last_chunk[-1] == "0":
209
+ status_code = "0"
210
+ else:
211
+ status_code = last_chunk[-2:]
215
212
 
216
213
  audit_predict_proxy_endpoint(
217
214
  headers=predict_response.headers,
@@ -212,7 +212,7 @@ def split_labels(
212
212
  else:
213
213
  paragraph_expr = FilterExpression()
214
214
  filter_list = getattr(paragraph_expr, combinator)
215
- filter_list.extend(paragraph)
215
+ filter_list.operands.extend(paragraph)
216
216
 
217
217
  return field_expr, paragraph_expr
218
218
 
@@ -45,7 +45,9 @@ class NoResourcesToSummarize(Exception):
45
45
  pass
46
46
 
47
47
 
48
- async def summarize(kbid: str, request: SummarizeRequest) -> SummarizedResponse:
48
+ async def summarize(
49
+ kbid: str, request: SummarizeRequest, extra_predict_headers: Optional[dict[str, str]]
50
+ ) -> SummarizedResponse:
49
51
  predict_request = SummarizeModel()
50
52
  predict_request.generative_model = request.generative_model
51
53
  predict_request.user_prompt = request.user_prompt
@@ -62,7 +64,7 @@ async def summarize(kbid: str, request: SummarizeRequest) -> SummarizedResponse:
62
64
  raise NoResourcesToSummarize()
63
65
 
64
66
  predict = get_predict()
65
- return await predict.summarize(kbid, predict_request)
67
+ return await predict.summarize(kbid=kbid, item=predict_request, extra_headers=extra_predict_headers)
66
68
 
67
69
 
68
70
  async def get_extracted_texts(kbid: str, resource_uuids_or_slugs: list[str]) -> ExtractedTexts:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nucliadb
3
- Version: 6.5.1.post4539
3
+ Version: 6.6.1.post649
4
4
  Summary: NucliaDB
5
5
  Author-email: Nuclia <nucliadb@nuclia.com>
6
6
  License-Expression: AGPL-3.0-or-later
@@ -19,13 +19,13 @@ Classifier: Programming Language :: Python :: 3.12
19
19
  Classifier: Programming Language :: Python :: 3 :: Only
20
20
  Requires-Python: <4,>=3.9
21
21
  Description-Content-Type: text/markdown
22
- Requires-Dist: nucliadb-telemetry[all]>=6.5.1.post4539
23
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.5.1.post4539
24
- Requires-Dist: nucliadb-protos>=6.5.1.post4539
25
- Requires-Dist: nucliadb-models>=6.5.1.post4539
26
- Requires-Dist: nidx-protos>=6.5.1.post4539
22
+ Requires-Dist: nucliadb-telemetry[all]>=6.6.1.post649
23
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.6.1.post649
24
+ Requires-Dist: nucliadb-protos>=6.6.1.post649
25
+ Requires-Dist: nucliadb-models>=6.6.1.post649
26
+ Requires-Dist: nidx-protos>=6.6.1.post649
27
27
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
28
- Requires-Dist: nuclia-models>=0.24.2
28
+ Requires-Dist: nuclia-models>=0.43.0
29
29
  Requires-Dist: uvicorn[standard]
30
30
  Requires-Dist: argdantic
31
31
  Requires-Dist: aiohttp>=3.11.11
@@ -33,6 +33,7 @@ migrations/0034_rollover_nidx_texts_3.py,sha256=t19QtWUgHxmTaBPoR1DooAby2IYmkLTQ
33
33
  migrations/0035_rollover_nidx_texts_4.py,sha256=W0_AUd01pjMpYMDC3yqF6HzDLgcnnPprL80kfyb1WZI,1187
34
34
  migrations/0036_backfill_catalog_slug.py,sha256=mizRM-HfPswKq4iEmqofu4kIT6Gd97ruT3qhb257vZk,2954
35
35
  migrations/0037_backfill_catalog_facets.py,sha256=KAf3VKbKePw7ykDnJi47LyJ7pK1JwYkwMxrsXUnbt9g,2788
36
+ migrations/0038_backfill_catalog_field_labels.py,sha256=EKJwJfU0p1nDq7s71CpGhaX4t1iD2d1ZCzTmLcUAhDs,3382
36
37
  migrations/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
37
38
  migrations/pg/0001_bootstrap.py,sha256=3O_P17l0d0h48nebN6VQLXzM_B7S7zvDpaLR0koVgWE,1274
38
39
  migrations/pg/0002_catalog.py,sha256=Rsleecu351Ty19kYZgOpqX5G3MEAY8nMxCJrAeuS2Mw,1690
@@ -132,7 +133,7 @@ nucliadb/export_import/utils.py,sha256=XV3tJJdhgnVJRSj8AxZjgeipONtB107M185HVJmHp
132
133
  nucliadb/ingest/__init__.py,sha256=fsw3C38VP50km3R-nHL775LNGPpJ4JxqXJ2Ib1f5SqE,1011
133
134
  nucliadb/ingest/app.py,sha256=qiPad2eWgudRdLq0tB0MQZOxOezXO7QBK_ZpPNKQZO0,7378
134
135
  nucliadb/ingest/partitions.py,sha256=2NIhMYbNT0TNBL6bX1UMSi7vxFGICstCKEqsB0TXHOE,2410
135
- nucliadb/ingest/processing.py,sha256=QmkHq-BU4vub7JRWe9VHvQ2DcAmT6-CzgFXuZxXhcBU,20953
136
+ nucliadb/ingest/processing.py,sha256=gAm591llkscMq0abhxQmpChDZIzto-76Dni4f7Flhfw,21229
136
137
  nucliadb/ingest/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
137
138
  nucliadb/ingest/serialize.py,sha256=-TIjibJTbMqAowzRvyrG3R209vKqBZqXpdrQL9Dq4lo,16135
138
139
  nucliadb/ingest/settings.py,sha256=5qJICxwYb028a2iAhVbxOJB5X-hWtDLtiya-YhWostw,3179
@@ -162,12 +163,12 @@ nucliadb/ingest/orm/exceptions.py,sha256=k4Esv4NtL4TrGTcsQpwrSfDhPQpiYcRbB1SpYmB
162
163
  nucliadb/ingest/orm/index_message.py,sha256=DWMTHJoVamUbK8opKl5csDvxfgz7c2j7phG1Ut4yIxk,15724
163
164
  nucliadb/ingest/orm/knowledgebox.py,sha256=_rkeTMIXMhR64gbYtZpFHoUHghV2DTJ2lUBqZsoqC_4,23898
164
165
  nucliadb/ingest/orm/metrics.py,sha256=OiuggTh-n3kZHA2G73NEUdIlh8c3yFrbusI88DK-Mko,1273
165
- nucliadb/ingest/orm/resource.py,sha256=OZEdoaaP56VaybuAbUHexGRMmM9C8-S0340jIHqamcQ,37177
166
+ nucliadb/ingest/orm/resource.py,sha256=yB0HWC3jc_1b-zXu-3FJCKOdAPPSb1aRBHpbZhsvyQk,37749
166
167
  nucliadb/ingest/orm/utils.py,sha256=fCQRuyecgqhaY7mcBG93oaXMkzkKb9BFjOcy4-ZiSNw,2693
167
168
  nucliadb/ingest/orm/processor/__init__.py,sha256=Aqd9wCNTvggkMkCY3WvoI8spdr94Jnqk-0iq9XpLs18,922
168
169
  nucliadb/ingest/orm/processor/auditing.py,sha256=TeYhXGJRyQ7ROytbb2u8R0fIh_FYi3HgTu3S1ribY3U,4623
169
170
  nucliadb/ingest/orm/processor/data_augmentation.py,sha256=v-pj4GbBWSuO8dQyahs5UDr5ghsyfhCZDS0ftKd6ZYc,5179
170
- nucliadb/ingest/orm/processor/pgcatalog.py,sha256=GpzQv0_iWTHbM90J0rAz_QIh_TMv1XbghyDgs8tk_8M,4014
171
+ nucliadb/ingest/orm/processor/pgcatalog.py,sha256=VPQ_Evme7xmmGoQ45zt0Am0yPkaD4hxN1r5rEaVt6s8,4633
171
172
  nucliadb/ingest/orm/processor/processor.py,sha256=jaEBwbv--WyoC8zcdxWAyF0dAzVA5crVDJl56Bqv1eI,31444
172
173
  nucliadb/ingest/orm/processor/sequence_manager.py,sha256=uqEphtI1Ir_yk9jRl2gPf7BlzzXWovbARY5MNZSBI_8,1704
173
174
  nucliadb/ingest/service/__init__.py,sha256=LHQFUkdmNBOWqBG0Md9sMMI7g5TQZ-hLAnhw6ZblrJg,2002
@@ -212,7 +213,7 @@ nucliadb/search/__init__.py,sha256=tnypbqcH4nBHbGpkINudhKgdLKpwXQCvDtPchUlsyY4,1
212
213
  nucliadb/search/app.py,sha256=-WEX1AZRA8R_9aeOo9ovOTwjXW_7VfwWN7N2ccSoqXg,3387
213
214
  nucliadb/search/lifecycle.py,sha256=hiylV-lxsAWkqTCulXBg0EIfMQdejSr8Zar0L_GLFT8,2218
214
215
  nucliadb/search/openapi.py,sha256=t3Wo_4baTrfPftg2BHsyLWNZ1MYn7ZRdW7ht-wFOgRs,1016
215
- nucliadb/search/predict.py,sha256=__0qwIU2CIRYRTYsbG9zZEjXXrxNe8puZWYJIyOT6dg,23492
216
+ nucliadb/search/predict.py,sha256=xZtZaydg1pzXOSEDg0xyWNbbgA4zMQ59gbHi0wNuAxk,23770
216
217
  nucliadb/search/predict_models.py,sha256=pm4ykuWH9bTXxj5RlI2F6pmXSXOVt64WL_sRlc2u6Tk,6144
217
218
  nucliadb/search/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
218
219
  nucliadb/search/run.py,sha256=aFb-CXRi_C8YMpP_ivNj8KW1BYhADj88y8K9Lr_nUPI,1402
@@ -220,20 +221,20 @@ nucliadb/search/settings.py,sha256=vem3EcyYlTPSim0kEK-xe-erF4BZg0CT_LAb8ZRQAE8,1
220
221
  nucliadb/search/utilities.py,sha256=9SsRDw0rJVXVoLBfF7rBb6q080h-thZc7u8uRcTiBeY,1037
221
222
  nucliadb/search/api/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
222
223
  nucliadb/search/api/v1/__init__.py,sha256=DH16OYnw9jQ38OpKlmdXeoq2j40ZPXZRtGvClKOkMhw,1239
223
- nucliadb/search/api/v1/ask.py,sha256=b4tz33HNsfT5DXv_2DMc_jirnFsHuobreWkbAKkzj5o,5337
224
- nucliadb/search/api/v1/catalog.py,sha256=aBNhgg-8Dj4kiB9IYshe46ph1FaeaPCjyxnNPgUu3AQ,7942
224
+ nucliadb/search/api/v1/ask.py,sha256=hZUnk1opZuXp1IwTiingSatlUefg2CZ9r_Z9sUwZMaU,5698
225
+ nucliadb/search/api/v1/catalog.py,sha256=5ZY3d8sVia1traUxVS0Q4aQJmgcOuXzbxis_uY4ulE4,8077
225
226
  nucliadb/search/api/v1/feedback.py,sha256=kNLc4dHz2SXHzV0PwC1WiRAwY88fDptPcP-kO0q-FrQ,2620
226
- nucliadb/search/api/v1/find.py,sha256=iMjyq4y0JOMC_x1B8kUfVdkCoc9G9Ark58kPLLY4HDw,10824
227
+ nucliadb/search/api/v1/find.py,sha256=j6mxEyxjlLnZSqCT_N2LmOJlytsm1vkY4KFFmJRrtP8,10904
227
228
  nucliadb/search/api/v1/graph.py,sha256=gthqxCOn9biE6D6s93jRGLglk0ono8U7OyS390kWiI8,4178
228
229
  nucliadb/search/api/v1/knowledgebox.py,sha256=e9xeLPUqnQTx33i4A8xuV93ENvtJGrpjPlLRbGJtAI8,8415
229
230
  nucliadb/search/api/v1/predict_proxy.py,sha256=TnXKAqf_Go-9QVi6L5z4cXjnuNRe7XLJjF5QH_uwA1I,3504
230
231
  nucliadb/search/api/v1/router.py,sha256=mtT07rBZcVfpa49doaw9b1tj3sdi3qLH0gn9Io6NYM0,988
231
232
  nucliadb/search/api/v1/search.py,sha256=eqlrvRE7IlMpunNwD1RJwt6RgMV01sIDJLgxxE7CFcE,12297
232
233
  nucliadb/search/api/v1/suggest.py,sha256=gaJE60r8-z6TVO05mQRKBITwXn2_ofM3B4-OtpOgZEk,6343
233
- nucliadb/search/api/v1/summarize.py,sha256=VAHJvE6V3xUgEBfqNKhgoxmDqCvh30RnrEIBVhMcNLU,2499
234
+ nucliadb/search/api/v1/summarize.py,sha256=eJzgFJWUO80STx3lHc_0h9RZVaBCWF196nZUecfmqbE,2700
234
235
  nucliadb/search/api/v1/utils.py,sha256=5Ve-frn7LAE2jqAgB85F8RSeqxDlyA08--gS-AdOLS4,1434
235
236
  nucliadb/search/api/v1/resource/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
236
- nucliadb/search/api/v1/resource/ask.py,sha256=nsVzBSanSSlf0Ody6LSTjdEy75Vg283_YhbkAtWEjh8,3637
237
+ nucliadb/search/api/v1/resource/ask.py,sha256=PlOXa17lnmj3KA9bARNfDqvnx7Pe9OTnwz-OwgGTUjU,4035
237
238
  nucliadb/search/api/v1/resource/ingestion_agents.py,sha256=AZ5_cH1jbf7d5wh_gz6EHLEKAzEOMrQZwEZAu1Q_3FE,4846
238
239
  nucliadb/search/api/v1/resource/search.py,sha256=PZR7fs5oYD0RKqKoD38NZMAnOJzBv35NB2YOr2xy1ck,4923
239
240
  nucliadb/search/api/v1/resource/utils.py,sha256=-NjZqAQtFEXKpIh8ui5S26ItnJ5rzmmG0BHxGSS9QPw,1141
@@ -254,25 +255,25 @@ nucliadb/search/search/ingestion_agents.py,sha256=IK6yOPEF9rST_uoqspdVdPk0pldjDh
254
255
  nucliadb/search/search/merge.py,sha256=XiRBsxhYPshPV7lZXD-9E259KZOPIf4I2tKosY0lPo4,22470
255
256
  nucliadb/search/search/metrics.py,sha256=3I6IN0qDSmqIvUaWJmT3rt-Jyjs6LcvnKI8ZqCiuJPY,3501
256
257
  nucliadb/search/search/paragraphs.py,sha256=pNAEiYqJGGUVcEf7xf-PFMVqz0PX4Qb-WNG-_zPGN2o,7799
257
- nucliadb/search/search/pgcatalog.py,sha256=_AiyW6it66UX6BsZbM3-230IQhiEG4utoKYboviyOFI,16799
258
- nucliadb/search/search/predict_proxy.py,sha256=Q12I3VIAQqFgzBe9UeVEiAjUAdVT8NBfNDXWiP-pn1M,8858
258
+ nucliadb/search/search/pgcatalog.py,sha256=0n_gDihZZhqrDLRHvHzS3IESvMRTcU6YShqizQMyE_Y,16807
259
+ nucliadb/search/search/predict_proxy.py,sha256=Df8F5K-oS4TIXJc_y8UDViJTo7st5L0kMgxYPFZ39Vk,8806
259
260
  nucliadb/search/search/query.py,sha256=0qIQdt548L3jtKOyKo06aGJ73SLBxAW3N38_Hc1M3Uw,11528
260
261
  nucliadb/search/search/rank_fusion.py,sha256=xZtXhbmKb_56gs73u6KkFm2efvTATOSMmpOV2wrAIqE,9613
261
262
  nucliadb/search/search/rerankers.py,sha256=E2J1QdKAojqbhHM3KAyaOXKf6tJyETUxKs4tf_BEyqk,7472
262
263
  nucliadb/search/search/shards.py,sha256=mc5DK-MoCv9AFhlXlOFHbPvetcyNDzTFOJ5rimK8PC8,2636
263
- nucliadb/search/search/summarize.py,sha256=ksmYPubEQvAQgfPdZHfzB_rR19B2ci4IYZ6jLdHxZo8,4996
264
+ nucliadb/search/search/summarize.py,sha256=S4-mUS8d-rvHFcsr8Pa8N5NTxU6ZTxLFZTMKTTOOpr4,5098
264
265
  nucliadb/search/search/utils.py,sha256=ajRIXfdTF67dBVahQCXW-rSv6gJpUMPt3QhJrWqArTQ,2175
265
266
  nucliadb/search/search/chat/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
266
- nucliadb/search/search/chat/ask.py,sha256=0sgfiCbNaCZrTvYaRGtf5xL6VnzRgzofINiEP4IvhWs,38278
267
+ nucliadb/search/search/chat/ask.py,sha256=vJ3TSdr-cT_xh43UnoYugqxnHv_-LFSCYoU7o0NnI1M,39368
267
268
  nucliadb/search/search/chat/exceptions.py,sha256=Siy4GXW2L7oPhIR86H3WHBhE9lkV4A4YaAszuGGUf54,1356
268
269
  nucliadb/search/search/chat/images.py,sha256=PA8VWxT5_HUGfW1ULhKTK46UBsVyINtWWqEM1ulzX1E,3095
269
- nucliadb/search/search/chat/prompt.py,sha256=SNsCtB9mZTODjnUMAH8YfPxn05Kjl2d5xTIteNxyVcI,52783
270
+ nucliadb/search/search/chat/prompt.py,sha256=gmYRC3aK03vrDoBElJP5H5Z7OEeu79k5yTxv3FEkN0I,53866
270
271
  nucliadb/search/search/chat/query.py,sha256=3jMPNbiFEOoS0ydMOPYkSx1qVlvAv51npzadWXDwkMs,16650
271
272
  nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
272
273
  nucliadb/search/search/query_parser/exceptions.py,sha256=sVl9gRNzhE-s480LBBVkiXzNRbKhYRQN5F3it5tNNp8,939
273
274
  nucliadb/search/search/query_parser/fetcher.py,sha256=nP4EySj2BvH10QgCvgzvp13Nf22wwfHsdLbDoPlH2cQ,16831
274
275
  nucliadb/search/search/query_parser/models.py,sha256=kAslqX_-zaIdUpcpdNU2a5uQPQh7LC605qWLZ4aZ5T4,5064
275
- nucliadb/search/search/query_parser/old_filters.py,sha256=HircRqYEac_90bNCtFIJZ2RKA90kjbpNOQcp_ArBqR0,9083
276
+ nucliadb/search/search/query_parser/old_filters.py,sha256=GsU3T3-WiSPvjucP7evHkshzAWZOli8qsuXChvWRCY0,9092
276
277
  nucliadb/search/search/query_parser/parsers/__init__.py,sha256=ySCNSdbesLXGZyR88919njulA6UE10_3PhqMG_Yj1o4,1034
277
278
  nucliadb/search/search/query_parser/parsers/ask.py,sha256=eTz8wS-EJHuAagR384h6TT64itymFZRpfZJGX8r6aZM,2771
278
279
  nucliadb/search/search/query_parser/parsers/catalog.py,sha256=JuDiBL2wdjAuEFEPo0e2nQ4VqWjF3FXakT0ziZk3Oes,7495
@@ -375,8 +376,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
375
376
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
376
377
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
377
378
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
378
- nucliadb-6.5.1.post4539.dist-info/METADATA,sha256=fB-dfmL9TqpjyMPOiHEu-EIwQLqH-a_j3bl3eTsjbFc,4158
379
- nucliadb-6.5.1.post4539.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
380
- nucliadb-6.5.1.post4539.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
381
- nucliadb-6.5.1.post4539.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
382
- nucliadb-6.5.1.post4539.dist-info/RECORD,,
379
+ nucliadb-6.6.1.post649.dist-info/METADATA,sha256=_peNGuFRZE9h5r-n-Aglwzr-hlIijYTLzMDBF5BWsss,4152
380
+ nucliadb-6.6.1.post649.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
381
+ nucliadb-6.6.1.post649.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
382
+ nucliadb-6.6.1.post649.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
383
+ nucliadb-6.6.1.post649.dist-info/RECORD,,