nucliadb 6.1.0.post2594__py3-none-any.whl → 6.1.0.post2610__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,6 +18,7 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
  from . import ask # noqa
21
+ from . import catalog # noqa
21
22
  from . import feedback # noqa
22
23
  from . import find # noqa
23
24
  from . import knowledgebox # noqa
@@ -0,0 +1,184 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+ from time import time
21
+ from typing import Optional, Union
22
+
23
+ from fastapi import Request, Response
24
+ from fastapi_versioning import version
25
+
26
+ from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
27
+ from nucliadb.common.maindb.pg import PGDriver
28
+ from nucliadb.common.maindb.utils import get_driver
29
+ from nucliadb.models.responses import HTTPClientError
30
+ from nucliadb.search import logger
31
+ from nucliadb.search.api.v1.router import KB_PREFIX, api
32
+ from nucliadb.search.api.v1.utils import fastapi_query
33
+ from nucliadb.search.search import cache
34
+ from nucliadb.search.search.exceptions import InvalidQueryError
35
+ from nucliadb.search.search.merge import fetch_resources
36
+ from nucliadb.search.search.pgcatalog import pgcatalog_search
37
+ from nucliadb.search.search.query_parser.parser import parse_catalog
38
+ from nucliadb.search.search.utils import (
39
+ maybe_log_request_payload,
40
+ )
41
+ from nucliadb_models.common import FieldTypeName
42
+ from nucliadb_models.metadata import ResourceProcessingStatus
43
+ from nucliadb_models.resource import NucliaDBRoles
44
+ from nucliadb_models.search import (
45
+ CatalogRequest,
46
+ CatalogResponse,
47
+ KnowledgeboxSearchResults,
48
+ ResourceProperties,
49
+ SearchParamDefaults,
50
+ SortField,
51
+ SortOptions,
52
+ SortOrder,
53
+ )
54
+ from nucliadb_models.utils import DateTime
55
+ from nucliadb_utils.authentication import requires
56
+ from nucliadb_utils.exceptions import LimitsExceededError
57
+
58
+
59
+ @api.get(
60
+ f"/{KB_PREFIX}/{{kbid}}/catalog",
61
+ status_code=200,
62
+ summary="List resources of a Knowledge Box",
63
+ description="List resources of a Knowledge Box",
64
+ response_model=KnowledgeboxSearchResults,
65
+ response_model_exclude_unset=True,
66
+ tags=["Search"],
67
+ )
68
+ @requires(NucliaDBRoles.READER)
69
+ @version(1)
70
+ async def catalog_get(
71
+ request: Request,
72
+ response: Response,
73
+ kbid: str,
74
+ query: str = fastapi_query(SearchParamDefaults.query),
75
+ filters: list[str] = fastapi_query(SearchParamDefaults.filters),
76
+ faceted: list[str] = fastapi_query(SearchParamDefaults.faceted),
77
+ sort_field: SortField = fastapi_query(SearchParamDefaults.sort_field),
78
+ sort_limit: Optional[int] = fastapi_query(SearchParamDefaults.sort_limit),
79
+ sort_order: SortOrder = fastapi_query(SearchParamDefaults.sort_order),
80
+ page_number: int = fastapi_query(SearchParamDefaults.catalog_page_number),
81
+ page_size: int = fastapi_query(SearchParamDefaults.catalog_page_size),
82
+ shards: list[str] = fastapi_query(SearchParamDefaults.shards, deprecated=True),
83
+ with_status: Optional[ResourceProcessingStatus] = fastapi_query(
84
+ SearchParamDefaults.with_status, deprecated="Use filters instead"
85
+ ),
86
+ debug: bool = fastapi_query(SearchParamDefaults.debug, include_in_schema=False),
87
+ range_creation_start: Optional[DateTime] = fastapi_query(SearchParamDefaults.range_creation_start),
88
+ range_creation_end: Optional[DateTime] = fastapi_query(SearchParamDefaults.range_creation_end),
89
+ range_modification_start: Optional[DateTime] = fastapi_query(
90
+ SearchParamDefaults.range_modification_start
91
+ ),
92
+ range_modification_end: Optional[DateTime] = fastapi_query(
93
+ SearchParamDefaults.range_modification_end
94
+ ),
95
+ hidden: Optional[bool] = fastapi_query(SearchParamDefaults.hidden),
96
+ ) -> Union[KnowledgeboxSearchResults, HTTPClientError]:
97
+ item = CatalogRequest(
98
+ query=query,
99
+ filters=filters,
100
+ faceted=faceted,
101
+ page_number=page_number,
102
+ page_size=page_size,
103
+ shards=shards,
104
+ debug=debug,
105
+ with_status=with_status,
106
+ range_creation_start=range_creation_start,
107
+ range_creation_end=range_creation_end,
108
+ range_modification_start=range_modification_start,
109
+ range_modification_end=range_modification_end,
110
+ hidden=hidden,
111
+ )
112
+ if sort_field:
113
+ item.sort = SortOptions(field=sort_field, limit=sort_limit, order=sort_order)
114
+ return await catalog(kbid, item)
115
+
116
+
117
+ @api.post(
118
+ f"/{KB_PREFIX}/{{kbid}}/catalog",
119
+ status_code=200,
120
+ summary="List resources of a Knowledge Box",
121
+ description="List resources of a Knowledge Box",
122
+ response_model=KnowledgeboxSearchResults,
123
+ response_model_exclude_unset=True,
124
+ tags=["Search"],
125
+ )
126
+ @requires(NucliaDBRoles.READER)
127
+ @version(1)
128
+ async def catalog_post(
129
+ request: Request,
130
+ kbid: str,
131
+ item: CatalogRequest,
132
+ ) -> Union[CatalogResponse, HTTPClientError]:
133
+ return await catalog(kbid, item)
134
+
135
+
136
+ async def catalog(
137
+ kbid: str,
138
+ item: CatalogRequest,
139
+ ):
140
+ """
141
+ Catalog endpoint is a simplified version of the search endpoint, it only
142
+ returns bm25 results on titles and it does not support vector search.
143
+ It is useful for listing resources in a knowledge box.
144
+ """
145
+ if not pgcatalog_enabled(): # pragma: no cover
146
+ return HTTPClientError(status_code=501, detail="PG driver is needed for catalog search")
147
+
148
+ maybe_log_request_payload(kbid, "/catalog", item)
149
+ start_time = time()
150
+ try:
151
+ with cache.request_caches():
152
+ query_parser = parse_catalog(kbid, item)
153
+
154
+ catalog_results = CatalogResponse()
155
+ catalog_results.fulltext = await pgcatalog_search(query_parser)
156
+ catalog_results.resources = await fetch_resources(
157
+ resources=[r.rid for r in catalog_results.fulltext.results],
158
+ kbid=kbid,
159
+ show=[ResourceProperties.BASIC, ResourceProperties.ERRORS],
160
+ field_type_filter=list(FieldTypeName),
161
+ extracted=[],
162
+ )
163
+ return catalog_results
164
+ except InvalidQueryError as exc:
165
+ return HTTPClientError(status_code=412, detail=str(exc))
166
+ except KnowledgeBoxNotFound:
167
+ return HTTPClientError(status_code=404, detail="Knowledge Box not found")
168
+ except LimitsExceededError as exc:
169
+ return HTTPClientError(status_code=exc.status_code, detail=exc.detail)
170
+ finally:
171
+ duration = time() - start_time
172
+ if duration > 2: # pragma: no cover
173
+ logger.warning(
174
+ "Slow catalog request",
175
+ extra={
176
+ "kbid": kbid,
177
+ "duration": duration,
178
+ "query": item.model_dump_json(),
179
+ },
180
+ )
181
+
182
+
183
+ def pgcatalog_enabled():
184
+ return isinstance(get_driver(), PGDriver)
@@ -90,7 +90,6 @@ async def resource_search(
90
90
  fields,
91
91
  filters,
92
92
  faceted,
93
- 0,
94
93
  top_k,
95
94
  range_creation_start,
96
95
  range_creation_end,
@@ -109,8 +108,7 @@ async def resource_search(
109
108
  # We need to merge
110
109
  search_results = await merge_paragraphs_results(
111
110
  results,
112
- count=top_k,
113
- page=0,
111
+ top_k=top_k,
114
112
  kbid=kbid,
115
113
  highlight_split=highlight,
116
114
  min_score=0.0,
@@ -27,21 +27,17 @@ from fastapi_versioning import version
27
27
  from pydantic import ValidationError
28
28
 
29
29
  from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
30
- from nucliadb.common.maindb.pg import PGDriver
31
- from nucliadb.common.maindb.utils import get_driver
32
30
  from nucliadb.models.responses import HTTPClientError
33
- from nucliadb.search import logger, predict
31
+ from nucliadb.search import predict
34
32
  from nucliadb.search.api.v1.router import KB_PREFIX, api
35
33
  from nucliadb.search.api.v1.utils import fastapi_query
36
34
  from nucliadb.search.requesters.utils import Method, debug_nodes_info, node_query
37
35
  from nucliadb.search.search import cache
38
36
  from nucliadb.search.search.exceptions import InvalidQueryError
39
- from nucliadb.search.search.merge import fetch_resources, merge_results
40
- from nucliadb.search.search.pgcatalog import pgcatalog_search
37
+ from nucliadb.search.search.merge import merge_results
41
38
  from nucliadb.search.search.query import QueryParser
42
39
  from nucliadb.search.search.utils import (
43
40
  filter_hidden_resources,
44
- maybe_log_request_payload,
45
41
  min_score_from_payload,
46
42
  min_score_from_query_params,
47
43
  should_disable_vector_search,
@@ -50,10 +46,7 @@ from nucliadb_models.common import FieldTypeName
50
46
  from nucliadb_models.metadata import ResourceProcessingStatus
51
47
  from nucliadb_models.resource import ExtractedDataTypeName, NucliaDBRoles
52
48
  from nucliadb_models.search import (
53
- CatalogRequest,
54
- CatalogResponse,
55
49
  KnowledgeboxSearchResults,
56
- MinScore,
57
50
  NucliaDBClientType,
58
51
  ResourceProperties,
59
52
  SearchOptions,
@@ -202,156 +195,6 @@ async def search_knowledgebox(
202
195
  return await _search_endpoint(response, kbid, item, x_ndb_client, x_nucliadb_user, x_forwarded_for)
203
196
 
204
197
 
205
- @api.get(
206
- f"/{KB_PREFIX}/{{kbid}}/catalog",
207
- status_code=200,
208
- summary="List resources of a Knowledge Box",
209
- description="List resources of a Knowledge Box",
210
- response_model=KnowledgeboxSearchResults,
211
- response_model_exclude_unset=True,
212
- tags=["Search"],
213
- )
214
- @requires(NucliaDBRoles.READER)
215
- @version(1)
216
- async def catalog_get(
217
- request: Request,
218
- response: Response,
219
- kbid: str,
220
- query: str = fastapi_query(SearchParamDefaults.query),
221
- filters: list[str] = fastapi_query(SearchParamDefaults.filters),
222
- faceted: list[str] = fastapi_query(SearchParamDefaults.faceted),
223
- sort_field: SortField = fastapi_query(SearchParamDefaults.sort_field),
224
- sort_limit: Optional[int] = fastapi_query(SearchParamDefaults.sort_limit),
225
- sort_order: SortOrder = fastapi_query(SearchParamDefaults.sort_order),
226
- page_number: int = fastapi_query(SearchParamDefaults.catalog_page_number),
227
- page_size: int = fastapi_query(SearchParamDefaults.catalog_page_size),
228
- shards: list[str] = fastapi_query(SearchParamDefaults.shards, deprecated=True),
229
- with_status: Optional[ResourceProcessingStatus] = fastapi_query(
230
- SearchParamDefaults.with_status, deprecated="Use filters instead"
231
- ),
232
- debug: bool = fastapi_query(SearchParamDefaults.debug, include_in_schema=False),
233
- range_creation_start: Optional[DateTime] = fastapi_query(SearchParamDefaults.range_creation_start),
234
- range_creation_end: Optional[DateTime] = fastapi_query(SearchParamDefaults.range_creation_end),
235
- range_modification_start: Optional[DateTime] = fastapi_query(
236
- SearchParamDefaults.range_modification_start
237
- ),
238
- range_modification_end: Optional[DateTime] = fastapi_query(
239
- SearchParamDefaults.range_modification_end
240
- ),
241
- hidden: Optional[bool] = fastapi_query(SearchParamDefaults.hidden),
242
- ) -> Union[KnowledgeboxSearchResults, HTTPClientError]:
243
- item = CatalogRequest(
244
- query=query,
245
- filters=filters,
246
- faceted=faceted,
247
- page_number=page_number,
248
- page_size=page_size,
249
- shards=shards,
250
- debug=debug,
251
- with_status=with_status,
252
- range_creation_start=range_creation_start,
253
- range_creation_end=range_creation_end,
254
- range_modification_start=range_modification_start,
255
- range_modification_end=range_modification_end,
256
- hidden=hidden,
257
- )
258
- if sort_field:
259
- item.sort = SortOptions(field=sort_field, limit=sort_limit, order=sort_order)
260
- return await catalog(kbid, item)
261
-
262
-
263
- @api.post(
264
- f"/{KB_PREFIX}/{{kbid}}/catalog",
265
- status_code=200,
266
- summary="List resources of a Knowledge Box",
267
- description="List resources of a Knowledge Box",
268
- response_model=KnowledgeboxSearchResults,
269
- response_model_exclude_unset=True,
270
- tags=["Search"],
271
- )
272
- @requires(NucliaDBRoles.READER)
273
- @version(1)
274
- async def catalog_post(
275
- request: Request,
276
- kbid: str,
277
- item: CatalogRequest,
278
- ) -> Union[CatalogResponse, HTTPClientError]:
279
- return await catalog(kbid, item)
280
-
281
-
282
- async def catalog(
283
- kbid: str,
284
- item: CatalogRequest,
285
- ):
286
- """
287
- Catalog endpoint is a simplified version of the search endpoint, it only
288
- returns bm25 results on titles and it does not support vector search.
289
- It is useful for listing resources in a knowledge box.
290
- """
291
- if not pgcatalog_enabled(): # pragma: no cover
292
- return HTTPClientError(status_code=501, detail="PG driver is needed for catalog search")
293
-
294
- maybe_log_request_payload(kbid, "/catalog", item)
295
- start_time = time()
296
- try:
297
- with cache.request_caches():
298
- sort = item.sort
299
- if sort is None:
300
- # By default we sort by creation date (most recent first)
301
- sort = SortOptions(
302
- field=SortField.CREATED,
303
- order=SortOrder.DESC,
304
- limit=None,
305
- )
306
-
307
- query_parser = QueryParser(
308
- kbid=kbid,
309
- features=[SearchOptions.FULLTEXT],
310
- query=item.query,
311
- label_filters=item.filters,
312
- keyword_filters=[],
313
- faceted=item.faceted,
314
- sort=sort,
315
- page_number=item.page_number,
316
- page_size=item.page_size,
317
- min_score=MinScore(bm25=0, semantic=0),
318
- fields=["a/title"],
319
- with_status=item.with_status,
320
- range_creation_start=item.range_creation_start,
321
- range_creation_end=item.range_creation_end,
322
- range_modification_start=item.range_modification_start,
323
- range_modification_end=item.range_modification_end,
324
- hidden=item.hidden,
325
- )
326
- catalog_results = CatalogResponse()
327
- catalog_results.fulltext = await pgcatalog_search(query_parser)
328
- catalog_results.resources = await fetch_resources(
329
- resources=[r.rid for r in catalog_results.fulltext.results],
330
- kbid=kbid,
331
- show=[ResourceProperties.BASIC, ResourceProperties.ERRORS],
332
- field_type_filter=list(FieldTypeName),
333
- extracted=[],
334
- )
335
- return catalog_results
336
- except InvalidQueryError as exc:
337
- return HTTPClientError(status_code=412, detail=str(exc))
338
- except KnowledgeBoxNotFound:
339
- return HTTPClientError(status_code=404, detail="Knowledge Box not found")
340
- except LimitsExceededError as exc:
341
- return HTTPClientError(status_code=exc.status_code, detail=exc.detail)
342
- finally:
343
- duration = time() - start_time
344
- if duration > 2: # pragma: no cover
345
- logger.warning(
346
- "Slow catalog request",
347
- extra={
348
- "kbid": kbid,
349
- "duration": duration,
350
- "query": item.model_dump_json(),
351
- },
352
- )
353
-
354
-
355
198
  @api.post(
356
199
  f"/{KB_PREFIX}/{{kbid}}/search",
357
200
  status_code=200,
@@ -431,8 +274,7 @@ async def search(
431
274
  keyword_filters=[],
432
275
  faceted=item.faceted,
433
276
  sort=item.sort,
434
- page_number=0,
435
- page_size=item.top_k,
277
+ top_k=item.top_k,
436
278
  min_score=item.min_score,
437
279
  range_creation_start=item.range_creation_start,
438
280
  range_creation_end=item.range_creation_end,
@@ -461,8 +303,7 @@ async def search(
461
303
  # We need to merge
462
304
  search_results = await merge_results(
463
305
  results,
464
- count=item.top_k,
465
- page=0,
306
+ top_k=item.top_k,
466
307
  kbid=kbid,
467
308
  show=item.show,
468
309
  field_type_filter=item.field_type_filter,
@@ -491,7 +332,3 @@ async def search(
491
332
  search_results.shards = queried_shards
492
333
  search_results.autofilters = autofilters
493
334
  return search_results, incomplete_results
494
-
495
-
496
- def pgcatalog_enabled():
497
- return isinstance(get_driver(), PGDriver)
@@ -721,8 +721,7 @@ async def retrieval_in_resource(
721
721
  query="",
722
722
  label_filters=ask_request.filters,
723
723
  keyword_filters=ask_request.keyword_filters,
724
- page_number=0,
725
- page_size=0,
724
+ top_k=0,
726
725
  min_score=MinScore(),
727
726
  ),
728
727
  main_query_weight=1.0,
@@ -23,10 +23,8 @@ from typing import TypeVar
23
23
  T = TypeVar("T")
24
24
 
25
25
 
26
- def cut_page(items: list[T], page_size: int, page_number: int) -> tuple[list[T], bool]:
26
+ def cut_page(items: list[T], top_k: int) -> tuple[list[T], bool]:
27
27
  """Return a slice of `items` representing the specified page and a boolean
28
28
  indicating whether there is a next page or not"""
29
- start = page_size * page_number
30
- end = start + page_size
31
- next_page = len(items) > end
32
- return items[start:end], next_page
29
+ next_page = len(items) > top_k
30
+ return items[:top_k], next_page
@@ -122,8 +122,7 @@ async def _index_node_retrieval(
122
122
  relation_subgraph_query=pb_query.relations.subgraph,
123
123
  min_score_bm25=pb_query.min_score_bm25,
124
124
  min_score_semantic=pb_query.min_score_semantic,
125
- page_size=item.top_k,
126
- page_number=0,
125
+ top_k=item.top_k,
127
126
  show=item.show,
128
127
  extracted=item.extracted,
129
128
  field_type_filter=item.field_type_filter,
@@ -214,7 +213,7 @@ async def _external_index_retrieval(
214
213
  kbid=kbid,
215
214
  query=search_request.body,
216
215
  ),
217
- top_k=query_parser.page_size,
216
+ top_k=query_parser.top_k,
218
217
  )
219
218
  find_resources = compose_find_resources(text_blocks, resources)
220
219
 
@@ -273,8 +272,7 @@ async def query_parser_from_find_request(
273
272
  keyword_filters=item.keyword_filters,
274
273
  faceted=None,
275
274
  sort=None,
276
- page_number=0,
277
- page_size=item.top_k,
275
+ top_k=item.top_k,
278
276
  min_score=item.min_score,
279
277
  range_creation_start=item.range_creation_start,
280
278
  range_creation_end=item.range_creation_end,
@@ -75,8 +75,7 @@ async def build_find_response(
75
75
  kbid: str,
76
76
  query: str,
77
77
  relation_subgraph_query: EntitiesSubgraphRequest,
78
- page_size: int,
79
- page_number: int,
78
+ top_k: int,
80
79
  min_score_bm25: float,
81
80
  min_score_semantic: float,
82
81
  rank_fusion_algorithm: RankFusionAlgorithm,
@@ -106,9 +105,9 @@ async def build_find_response(
106
105
  # enforced/validated by the query parsing.
107
106
  if reranker.needs_extra_results:
108
107
  assert reranker.window is not None, "Reranker definition must enforce this condition"
109
- text_blocks_page, next_page = cut_page(merged_text_blocks, reranker.window, 0)
108
+ text_blocks_page, next_page = cut_page(merged_text_blocks, reranker.window)
110
109
  else:
111
- text_blocks_page, next_page = cut_page(merged_text_blocks, page_size, page_number)
110
+ text_blocks_page, next_page = cut_page(merged_text_blocks, top_k)
112
111
 
113
112
  # hydrate and rerank
114
113
  resource_hydration_options = ResourceHydrationOptions(
@@ -126,7 +125,7 @@ async def build_find_response(
126
125
  text_block_hydration_options=text_block_hydration_options,
127
126
  reranker=reranker,
128
127
  reranking_options=reranking_options,
129
- top_k=page_size,
128
+ top_k=top_k,
130
129
  )
131
130
 
132
131
  # build relations graph
@@ -144,8 +143,8 @@ async def build_find_response(
144
143
  best_matches=best_matches,
145
144
  relations=relations,
146
145
  total=total_paragraphs,
147
- page_number=page_number,
148
- page_size=page_size,
146
+ page_number=0, # Bw/c with pagination
147
+ page_size=top_k,
149
148
  next_page=next_page,
150
149
  min_score=MinScore(bm25=_round(min_score_bm25), semantic=_round(min_score_semantic)),
151
150
  )
@@ -24,6 +24,7 @@ from typing import Any, Optional, Set, Union
24
24
 
25
25
  from nucliadb.common.ids import FieldId, ParagraphId
26
26
  from nucliadb.search.search import cache
27
+ from nucliadb.search.search.cut import cut_page
27
28
  from nucliadb.search.search.fetch import (
28
29
  fetch_resources,
29
30
  get_labels_paragraph,
@@ -118,8 +119,7 @@ async def get_sort_value(
118
119
  async def merge_documents_results(
119
120
  document_responses: list[DocumentSearchResponse],
120
121
  resources: list[str],
121
- count: int,
122
- page: int,
122
+ top_k: int,
123
123
  kbid: str,
124
124
  sort: SortOptions,
125
125
  min_score: float,
@@ -148,15 +148,9 @@ async def merge_documents_results(
148
148
  raw_resource_list.append((result, sort_value))
149
149
  total += document_response.total
150
150
 
151
- skip = page * count
152
- end = skip + count
153
- length = len(raw_resource_list)
154
-
155
- if length > end:
156
- next_page = True
157
-
158
- # We need to cut first and then sort, otherwise pagination will be wrong if the order is DESC
159
- raw_resource_list = raw_resource_list[min(skip, length) : min(end, length)]
151
+ # We need to cut first and then sort, otherwise the page will be wrong if the order is DESC
152
+ raw_resource_list, has_more = cut_page(raw_resource_list, top_k)
153
+ next_page = next_page or has_more
160
154
  raw_resource_list.sort(key=lambda x: x[1], reverse=(sort.order == SortOrder.DESC))
161
155
 
162
156
  result_resource_list: list[ResourceResult] = []
@@ -181,8 +175,8 @@ async def merge_documents_results(
181
175
  results=result_resource_list,
182
176
  query=query,
183
177
  total=total,
184
- page_number=page,
185
- page_size=count,
178
+ page_number=0, # Bw/c with pagination
179
+ page_size=top_k,
186
180
  next_page=next_page,
187
181
  min_score=min_score,
188
182
  )
@@ -258,8 +252,7 @@ async def merge_vectors_results(
258
252
  vector_responses: list[VectorSearchResponse],
259
253
  resources: list[str],
260
254
  kbid: str,
261
- count: int,
262
- page: int,
255
+ top_k: int,
263
256
  min_score: Optional[float] = None,
264
257
  ):
265
258
  facets: dict[str, Any] = {}
@@ -276,12 +269,10 @@ async def merge_vectors_results(
276
269
  if len(vector_responses) > 1:
277
270
  raw_vectors_list.sort(key=lambda x: x.score, reverse=True)
278
271
 
279
- skip = page * count
280
- end_element = skip + count
281
- length = len(raw_vectors_list)
272
+ raw_vectors_list, _ = cut_page(raw_vectors_list, top_k)
282
273
 
283
274
  result_sentence_list: list[Sentence] = []
284
- for result in raw_vectors_list[min(skip, length) : min(end_element, length)]:
275
+ for result in raw_vectors_list:
285
276
  id_count = result.doc_id.id.count("/")
286
277
  if id_count == 4:
287
278
  rid, field_type, field, index, position = result.doc_id.id.split("/")
@@ -329,8 +320,8 @@ async def merge_vectors_results(
329
320
  return Sentences(
330
321
  results=result_sentence_list,
331
322
  facets=facets,
332
- page_number=page,
333
- page_size=count,
323
+ page_number=0, # Bw/c with pagination
324
+ page_size=top_k,
334
325
  min_score=round(min_score or 0, ndigits=3),
335
326
  )
336
327
 
@@ -339,8 +330,7 @@ async def merge_paragraph_results(
339
330
  paragraph_responses: list[ParagraphSearchResponse],
340
331
  resources: list[str],
341
332
  kbid: str,
342
- count: int,
343
- page: int,
333
+ top_k: int,
344
334
  highlight: bool,
345
335
  sort: SortOptions,
346
336
  min_score: float,
@@ -374,15 +364,11 @@ async def merge_paragraph_results(
374
364
 
375
365
  raw_paragraph_list.sort(key=lambda x: x[1], reverse=(sort.order == SortOrder.DESC))
376
366
 
377
- skip = page * count
378
- end = skip + count
379
- length = len(raw_paragraph_list)
380
-
381
- if length > end:
382
- next_page = True
367
+ raw_paragraph_list, has_more = cut_page(raw_paragraph_list, top_k)
368
+ next_page = next_page or has_more
383
369
 
384
370
  result_paragraph_list: list[Paragraph] = []
385
- for result, _ in raw_paragraph_list[min(skip, length) : min(end, length)]:
371
+ for result, _ in raw_paragraph_list:
386
372
  _, field_type, field = result.field.split("/")
387
373
  text = await get_paragraph_text(
388
374
  kbid=kbid,
@@ -435,8 +421,8 @@ async def merge_paragraph_results(
435
421
  facets=facets,
436
422
  query=query,
437
423
  total=total,
438
- page_number=page,
439
- page_size=count,
424
+ page_number=0, # Bw/c with pagination
425
+ page_size=top_k,
440
426
  next_page=next_page,
441
427
  min_score=min_score,
442
428
  )
@@ -494,8 +480,7 @@ def _merge_relations_results(
494
480
  @merge_observer.wrap({"type": "merge"})
495
481
  async def merge_results(
496
482
  search_responses: list[SearchResponse],
497
- count: int,
498
- page: int,
483
+ top_k: int,
499
484
  kbid: str,
500
485
  show: list[ResourceProperties],
501
486
  field_type_filter: list[FieldTypeName],
@@ -520,22 +505,21 @@ async def merge_results(
520
505
 
521
506
  resources: list[str] = list()
522
507
  api_results.fulltext = await merge_documents_results(
523
- documents, resources, count, page, kbid, sort, min_score=min_score.bm25
508
+ documents, resources, top_k, kbid, sort, min_score=min_score.bm25
524
509
  )
525
510
 
526
511
  api_results.paragraphs = await merge_paragraph_results(
527
512
  paragraphs,
528
513
  resources,
529
514
  kbid,
530
- count,
531
- page,
515
+ top_k,
532
516
  highlight,
533
517
  sort,
534
518
  min_score=min_score.bm25,
535
519
  )
536
520
 
537
521
  api_results.sentences = await merge_vectors_results(
538
- vectors, resources, kbid, count, page, min_score=min_score.semantic
522
+ vectors, resources, kbid, top_k, min_score=min_score.semantic
539
523
  )
540
524
 
541
525
  api_results.relations = await merge_relations_results(relations, requested_relations)
@@ -546,8 +530,7 @@ async def merge_results(
546
530
 
547
531
  async def merge_paragraphs_results(
548
532
  responses: list[SearchResponse],
549
- count: int,
550
- page: int,
533
+ top_k: int,
551
534
  kbid: str,
552
535
  highlight_split: bool,
553
536
  min_score: float,
@@ -563,8 +546,7 @@ async def merge_paragraphs_results(
563
546
  paragraphs,
564
547
  resources,
565
548
  kbid,
566
- count,
567
- page,
549
+ top_k,
568
550
  highlight=highlight_split,
569
551
  sort=SortOptions(
570
552
  field=SortField.SCORE,
@@ -26,6 +26,7 @@ from psycopg.rows import dict_row
26
26
 
27
27
  from nucliadb.common.maindb.pg import PGDriver
28
28
  from nucliadb.common.maindb.utils import get_driver
29
+ from nucliadb.search.search.query_parser.models import CatalogQuery
29
30
  from nucliadb_models.labels import translate_system_to_alias_label
30
31
  from nucliadb_models.metadata import ResourceProcessingStatus
31
32
  from nucliadb_models.search import (
@@ -37,7 +38,6 @@ from nucliadb_models.search import (
37
38
  from nucliadb_telemetry import metrics
38
39
 
39
40
  from .filters import translate_label
40
- from .query import QueryParser
41
41
 
42
42
  observer = metrics.Observer("pg_catalog_search", labels={"op": ""})
43
43
  logger = logging.getLogger(__name__)
@@ -79,60 +79,60 @@ def _convert_filter(filter, filter_params):
79
79
  raise ValueError(f"Invalid operator {op}")
80
80
 
81
81
 
82
- def _prepare_query(query_parser: QueryParser):
82
+ def _prepare_query(catalog_query: CatalogQuery):
83
83
  filter_sql = ["kbid = %(kbid)s"]
84
- filter_params: dict[str, Any] = {"kbid": query_parser.kbid}
84
+ filter_params: dict[str, Any] = {"kbid": catalog_query.kbid}
85
85
 
86
- if query_parser.query:
86
+ if catalog_query.query:
87
87
  # This is doing tokenization inside the SQL server (to keep the index updated). We could move it to
88
88
  # the python code at update/query time if it ever becomes a problem but for now, a single regex
89
89
  # executed per query is not a problem.
90
90
  filter_sql.append(
91
91
  "regexp_split_to_array(lower(title), '\\W') @> regexp_split_to_array(lower(%(query)s), '\\W')"
92
92
  )
93
- filter_params["query"] = query_parser.query
93
+ filter_params["query"] = catalog_query.query
94
94
 
95
- if query_parser.range_creation_start:
95
+ if catalog_query.filters.creation.after:
96
96
  filter_sql.append("created_at > %(created_at_start)s")
97
- filter_params["created_at_start"] = query_parser.range_creation_start
97
+ filter_params["created_at_start"] = catalog_query.filters.creation.after
98
98
 
99
- if query_parser.range_creation_end:
99
+ if catalog_query.filters.creation.before:
100
100
  filter_sql.append("created_at < %(created_at_end)s")
101
- filter_params["created_at_end"] = query_parser.range_creation_end
101
+ filter_params["created_at_end"] = catalog_query.filters.creation.before
102
102
 
103
- if query_parser.range_modification_start:
103
+ if catalog_query.filters.modification.after:
104
104
  filter_sql.append("modified_at > %(modified_at_start)s")
105
- filter_params["modified_at_start"] = query_parser.range_modification_start
105
+ filter_params["modified_at_start"] = catalog_query.filters.modification.after
106
106
 
107
- if query_parser.range_modification_end:
107
+ if catalog_query.filters.modification.before:
108
108
  filter_sql.append("modified_at < %(modified_at_end)s")
109
- filter_params["modified_at_end"] = query_parser.range_modification_end
109
+ filter_params["modified_at_end"] = catalog_query.filters.modification.before
110
110
 
111
- if query_parser.label_filters:
112
- filter_sql.append(_convert_filter(query_parser.label_filters, filter_params))
111
+ if catalog_query.filters.labels:
112
+ filter_sql.append(_convert_filter(catalog_query.filters.labels, filter_params))
113
113
 
114
114
  order_sql = ""
115
- if query_parser.sort:
116
- if query_parser.sort.field == SortField.CREATED:
115
+ if catalog_query.sort:
116
+ if catalog_query.sort.field == SortField.CREATED:
117
117
  order_field = "created_at"
118
- elif query_parser.sort.field == SortField.MODIFIED:
118
+ elif catalog_query.sort.field == SortField.MODIFIED:
119
119
  order_field = "modified_at"
120
- elif query_parser.sort.field == SortField.TITLE:
120
+ elif catalog_query.sort.field == SortField.TITLE:
121
121
  order_field = "title"
122
122
  else:
123
123
  # Deprecated order by score, use created_at instead
124
124
  order_field = "created_at"
125
125
 
126
- if query_parser.sort.order == SortOrder.ASC:
126
+ if catalog_query.sort.order == SortOrder.ASC:
127
127
  order_dir = "ASC"
128
128
  else:
129
129
  order_dir = "DESC"
130
130
 
131
131
  order_sql = f" ORDER BY {order_field} {order_dir}"
132
132
 
133
- if query_parser.with_status:
133
+ if catalog_query.filters.with_status:
134
134
  filter_sql.append("labels && %(status)s")
135
- if query_parser.with_status == ResourceProcessingStatus.PROCESSED:
135
+ if catalog_query.filters.with_status == ResourceProcessingStatus.PROCESSED:
136
136
  filter_params["status"] = ["/n/s/PROCESSED", "/n/s/ERROR"]
137
137
  else:
138
138
  filter_params["status"] = ["/n/s/PENDING"]
@@ -148,18 +148,18 @@ def _pg_driver() -> PGDriver:
148
148
 
149
149
 
150
150
  @observer.wrap({"op": "search"})
151
- async def pgcatalog_search(query_parser: QueryParser) -> Resources:
151
+ async def pgcatalog_search(catalog_query: CatalogQuery) -> Resources:
152
152
  # Prepare SQL query
153
- query, query_params = _prepare_query(query_parser)
153
+ query, query_params = _prepare_query(catalog_query)
154
154
 
155
155
  async with _pg_driver()._get_connection() as conn, conn.cursor(row_factory=dict_row) as cur:
156
156
  facets = {}
157
157
 
158
158
  # Faceted search
159
- if query_parser.faceted:
159
+ if catalog_query.faceted:
160
160
  with observer({"op": "facets"}):
161
161
  tmp_facets: dict[str, dict[str, int]] = {
162
- translate_label(f): defaultdict(int) for f in query_parser.faceted
162
+ translate_label(f): defaultdict(int) for f in catalog_query.faceted
163
163
  }
164
164
  facet_filters = " OR ".join(f"label LIKE '{f}/%%'" for f in tmp_facets.keys())
165
165
  for facet in tmp_facets.keys():
@@ -167,7 +167,7 @@ async def pgcatalog_search(query_parser: QueryParser) -> Resources:
167
167
  facet.startswith("/n/s") or facet.startswith("/n/i") or facet.startswith("/l")
168
168
  ):
169
169
  logger.warn(
170
- f"Unexpected facet used at catalog: {facet}, kbid={query_parser.kbid}"
170
+ f"Unexpected facet used at catalog: {facet}, kbid={catalog_query.kbid}"
171
171
  )
172
172
 
173
173
  await cur.execute(
@@ -201,12 +201,12 @@ async def pgcatalog_search(query_parser: QueryParser) -> Resources:
201
201
 
202
202
  # Query
203
203
  with observer({"op": "query"}):
204
- offset = query_parser.page_size * query_parser.page_number
204
+ offset = catalog_query.page_size * catalog_query.page_number
205
205
  await cur.execute(
206
206
  f"{query} LIMIT %(page_size)s OFFSET %(offset)s",
207
207
  {
208
208
  **query_params,
209
- "page_size": query_parser.page_size,
209
+ "page_size": catalog_query.page_size,
210
210
  "offset": offset,
211
211
  },
212
212
  )
@@ -224,10 +224,10 @@ async def pgcatalog_search(query_parser: QueryParser) -> Resources:
224
224
  )
225
225
  for r in data
226
226
  ],
227
- query=query_parser.query,
227
+ query=catalog_query.query,
228
228
  total=total,
229
- page_number=query_parser.page_number,
230
- page_size=query_parser.page_size,
229
+ page_number=catalog_query.page_number,
230
+ page_size=catalog_query.page_size,
231
231
  next_page=(offset + len(data) < total),
232
232
  min_score=0,
233
233
  )
@@ -74,7 +74,6 @@ INDEX_SORTABLE_FIELDS = [
74
74
  SortField.MODIFIED,
75
75
  ]
76
76
 
77
- MAX_VECTOR_RESULTS_ALLOWED = 2000
78
77
  DEFAULT_GENERIC_SEMANTIC_THRESHOLD = 0.7
79
78
 
80
79
 
@@ -105,8 +104,7 @@ class QueryParser:
105
104
  query: str,
106
105
  label_filters: Union[list[str], list[Filter]],
107
106
  keyword_filters: Union[list[str], list[Filter]],
108
- page_number: int,
109
- page_size: int,
107
+ top_k: int,
110
108
  min_score: MinScore,
111
109
  faceted: Optional[list[str]] = None,
112
110
  sort: Optional[SortOptions] = None,
@@ -145,8 +143,7 @@ class QueryParser:
145
143
  self.flat_label_filters: list[str] = []
146
144
  self.keyword_filters: dict[str, Any] = convert_to_node_filters(keyword_filters)
147
145
  self.faceted = faceted or []
148
- self.page_number = page_number
149
- self.page_size = page_size
146
+ self.top_k = top_k
150
147
  self.min_score = min_score
151
148
  self.sort = sort
152
149
  self.range_creation_start = range_creation_start
@@ -389,19 +386,13 @@ class QueryParser:
389
386
  # have consistent results, we must limit them
390
387
  request.result_per_page = self.sort.limit
391
388
  else:
392
- request.result_per_page = self.page_number * self.page_size + self.page_size
389
+ request.result_per_page = self.top_k
393
390
 
394
391
  sort_field = SortFieldMap[self.sort.field] if self.sort else None
395
392
  if sort_field is not None:
396
393
  request.order.sort_by = sort_field
397
394
  request.order.type = SortOrderMap[self.sort.order] # type: ignore
398
395
 
399
- if self.has_vector_search and request.result_per_page > MAX_VECTOR_RESULTS_ALLOWED:
400
- raise InvalidQueryError(
401
- "page_size",
402
- f"Pagination of semantic results limit reached: {MAX_VECTOR_RESULTS_ALLOWED}. If you want to paginate through all results, please disable the vector search feature.", # noqa: E501
403
- )
404
-
405
396
  async def parse_min_score(self, request: nodereader_pb2.SearchRequest, incomplete: bool) -> None:
406
397
  semantic_min_score = DEFAULT_GENERIC_SEMANTIC_THRESHOLD
407
398
  if self.min_score.semantic is not None:
@@ -635,8 +626,7 @@ async def paragraph_query_to_pb(
635
626
  fields: list[str],
636
627
  filters: list[str],
637
628
  faceted: list[str],
638
- page_number: int,
639
- page_size: int,
629
+ top_k: int,
640
630
  range_creation_start: Optional[datetime] = None,
641
631
  range_creation_end: Optional[datetime] = None,
642
632
  range_modification_start: Optional[datetime] = None,
@@ -650,7 +640,7 @@ async def paragraph_query_to_pb(
650
640
 
651
641
  # We need to ask for all and cut later
652
642
  request.page_number = 0
653
- request.result_per_page = page_number * page_size + page_size
643
+ request.result_per_page = top_k
654
644
 
655
645
  request.body = query
656
646
 
@@ -19,6 +19,8 @@
19
19
  #
20
20
 
21
21
  from dataclasses import dataclass
22
+ from datetime import datetime
23
+ from typing import Any, Optional
22
24
 
23
25
  from pydantic import (
24
26
  BaseModel,
@@ -27,6 +29,16 @@ from pydantic import (
27
29
 
28
30
  from nucliadb_models import search as search_models
29
31
 
32
+ ### Retrieval
33
+
34
+ # filters
35
+
36
+
37
+ class DateTimeFilter(BaseModel):
38
+ after: Optional[datetime] = None # aka, start
39
+ before: Optional[datetime] = None # aka, end
40
+
41
+
30
42
  # rank fusion
31
43
 
32
44
 
@@ -65,3 +77,25 @@ class UnitRetrieval:
65
77
  top_k: int
66
78
  rank_fusion: RankFusion
67
79
  reranker: Reranker
80
+
81
+
82
+ ### Catalog
83
+
84
+
85
+ class CatalogFilters(BaseModel):
86
+ labels: dict[str, Any] = Field(
87
+ default_factory=dict, description="Labels filter expression, like, `{and: {not: ...}, ...}`"
88
+ )
89
+ creation: DateTimeFilter
90
+ modification: DateTimeFilter
91
+ with_status: Optional[search_models.ResourceProcessingStatus] = None
92
+
93
+
94
+ class CatalogQuery(BaseModel):
95
+ kbid: str
96
+ query: str
97
+ filters: CatalogFilters
98
+ sort: search_models.SortOptions
99
+ faceted: list[str]
100
+ page_size: int
101
+ page_number: int
@@ -18,11 +18,19 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
 
21
+ from typing import Any
21
22
 
22
23
  from pydantic import ValidationError
23
24
 
25
+ from nucliadb.search.search.filters import (
26
+ convert_to_node_filters,
27
+ translate_label_filters,
28
+ )
24
29
  from nucliadb.search.search.query_parser.exceptions import ParserError
25
30
  from nucliadb.search.search.query_parser.models import (
31
+ CatalogFilters,
32
+ CatalogQuery,
33
+ DateTimeFilter,
26
34
  MultiMatchBoosterReranker,
27
35
  NoopReranker,
28
36
  PredictReranker,
@@ -32,7 +40,14 @@ from nucliadb.search.search.query_parser.models import (
32
40
  UnitRetrieval,
33
41
  )
34
42
  from nucliadb_models import search as search_models
35
- from nucliadb_models.search import FindRequest
43
+ from nucliadb_models.labels import LABEL_HIDDEN
44
+ from nucliadb_models.search import (
45
+ Filter,
46
+ FindRequest,
47
+ SortField,
48
+ SortOptions,
49
+ SortOrder,
50
+ )
36
51
 
37
52
 
38
53
  def parse_find(item: FindRequest) -> UnitRetrieval:
@@ -69,9 +84,6 @@ class _FindParser:
69
84
  )
70
85
 
71
86
  def _parse_top_k(self) -> int:
72
- # while pagination is still there, FindRequest has a validator that converts
73
- # top_k to page_number and page_size. To get top_k, we can compute it from
74
- # those
75
87
  assert self.item.top_k is not None, "top_k must have an int value"
76
88
  top_k = self.item.top_k
77
89
  return top_k
@@ -129,3 +141,43 @@ class _FindParser:
129
141
  raise ParserError(f"Unknown reranker {self.item.reranker}")
130
142
 
131
143
  return reranking
144
+
145
+
146
+ def parse_catalog(kbid: str, item: search_models.CatalogRequest) -> CatalogQuery:
147
+ if item.hidden:
148
+ hidden_filter = Filter(all=[LABEL_HIDDEN])
149
+ else:
150
+ hidden_filter = Filter(none=[LABEL_HIDDEN])
151
+ label_filters: dict[str, Any] = convert_to_node_filters(item.filters + [hidden_filter]) # type: ignore
152
+ if len(label_filters) > 0:
153
+ label_filters = translate_label_filters(label_filters)
154
+
155
+ sort = item.sort
156
+ if sort is None:
157
+ # By default we sort by creation date (most recent first)
158
+ sort = SortOptions(
159
+ field=SortField.CREATED,
160
+ order=SortOrder.DESC,
161
+ limit=None,
162
+ )
163
+
164
+ return CatalogQuery(
165
+ kbid=kbid,
166
+ query=item.query,
167
+ filters=CatalogFilters(
168
+ labels=label_filters,
169
+ creation=DateTimeFilter(
170
+ after=item.range_creation_start,
171
+ before=item.range_creation_end,
172
+ ),
173
+ modification=DateTimeFilter(
174
+ after=item.range_modification_start,
175
+ before=item.range_modification_end,
176
+ ),
177
+ with_status=item.with_status,
178
+ ),
179
+ sort=sort,
180
+ faceted=item.faceted,
181
+ page_number=item.page_number,
182
+ page_size=item.page_size,
183
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nucliadb
3
- Version: 6.1.0.post2594
3
+ Version: 6.1.0.post2610
4
4
  Home-page: https://docs.nuclia.dev/docs/management/nucliadb/intro
5
5
  Author: NucliaDB Community
6
6
  Author-email: nucliadb@nuclia.com
@@ -22,10 +22,10 @@ Classifier: Programming Language :: Python :: 3.12
22
22
  Classifier: Programming Language :: Python :: 3 :: Only
23
23
  Requires-Python: >=3.9, <4
24
24
  Description-Content-Type: text/markdown
25
- Requires-Dist: nucliadb-telemetry[all]>=6.1.0.post2594
26
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.1.0.post2594
27
- Requires-Dist: nucliadb-protos>=6.1.0.post2594
28
- Requires-Dist: nucliadb-models>=6.1.0.post2594
25
+ Requires-Dist: nucliadb-telemetry[all]>=6.1.0.post2610
26
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.1.0.post2610
27
+ Requires-Dist: nucliadb-protos>=6.1.0.post2610
28
+ Requires-Dist: nucliadb-models>=6.1.0.post2610
29
29
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
30
30
  Requires-Dist: nucliadb-node-binding>=2.26.0
31
31
  Requires-Dist: uvicorn
@@ -190,52 +190,53 @@ nucliadb/search/run.py,sha256=aFb-CXRi_C8YMpP_ivNj8KW1BYhADj88y8K9Lr_nUPI,1402
190
190
  nucliadb/search/settings.py,sha256=vem3EcyYlTPSim0kEK-xe-erF4BZg0CT_LAb8ZRQAE8,1684
191
191
  nucliadb/search/utilities.py,sha256=9SsRDw0rJVXVoLBfF7rBb6q080h-thZc7u8uRcTiBeY,1037
192
192
  nucliadb/search/api/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
193
- nucliadb/search/api/v1/__init__.py,sha256=Xep7u4Q7ygHvTGRn2CzRwJRVX1jtoSxUg8usQcVbC2s,1219
193
+ nucliadb/search/api/v1/__init__.py,sha256=NSbOVF6toiHX9WMpGgqpFrjJKT50EcHzOILp-2iHk5I,1249
194
194
  nucliadb/search/api/v1/ask.py,sha256=Od2U_gaOZK6dJZ1eDGQQJ3xUVnbBih58VPYVAsQErOw,3902
195
+ nucliadb/search/api/v1/catalog.py,sha256=ubYPS1wmPHzOgH9LR0qJmmV-9ELZPtHRSs5TYJ1pA9A,7117
195
196
  nucliadb/search/api/v1/feedback.py,sha256=yrOZeElw6XLu6j_6m3QGHKjEMwZPWa9vtdCud4dNilU,2547
196
197
  nucliadb/search/api/v1/find.py,sha256=DsnWkySu_cFajDWJIxN8DYvLL_Rm2yiCjHD8TsqPfRk,9304
197
198
  nucliadb/search/api/v1/knowledgebox.py,sha256=PKT1V3vZUnBkGfkxnFGjWPuHwQarVxREDY7lAT_9k1w,8764
198
199
  nucliadb/search/api/v1/predict_proxy.py,sha256=QrGzo0hKjtmyGZ6pjlJHYAh4hxwVUIOTcVcerRCw7eE,3047
199
200
  nucliadb/search/api/v1/router.py,sha256=mtT07rBZcVfpa49doaw9b1tj3sdi3qLH0gn9Io6NYM0,988
200
- nucliadb/search/api/v1/search.py,sha256=9QO2-AI1b2WJX8gmURB02cih1ONWjHe0-qnL1SXbF_E,19864
201
+ nucliadb/search/api/v1/search.py,sha256=_5J8lIzLjfFW3j-XeaebaJqcO1vxm0W2oaX4unFJ5e8,13577
201
202
  nucliadb/search/api/v1/suggest.py,sha256=SXxRVKT5hDSHNKlBYo8XozHHq9bGyvJOlo286lEruLE,5979
202
203
  nucliadb/search/api/v1/summarize.py,sha256=VAHJvE6V3xUgEBfqNKhgoxmDqCvh30RnrEIBVhMcNLU,2499
203
204
  nucliadb/search/api/v1/utils.py,sha256=5Ve-frn7LAE2jqAgB85F8RSeqxDlyA08--gS-AdOLS4,1434
204
205
  nucliadb/search/api/v1/resource/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
205
206
  nucliadb/search/api/v1/resource/ask.py,sha256=XMEP9_Uwy37yaXLcIYKMXGiZYNASD8RTByzQGjd9LPQ,3847
206
- nucliadb/search/api/v1/resource/search.py,sha256=jgYJnSSult2ah_Jfd78vbGT5URyZPDsX1Gbdj-sQgCE,4851
207
+ nucliadb/search/api/v1/resource/search.py,sha256=X0rQU14r_s4_CPpoE2sc84AJPX68gvCftcP4bosWHhA,4812
207
208
  nucliadb/search/requesters/__init__.py,sha256=itSI7dtTwFP55YMX4iK7JzdMHS5CQVUiB1XzQu4UBh8,833
208
209
  nucliadb/search/requesters/utils.py,sha256=7ovWSGzhLpZGTMi9x9nMOi7QNCgt2qah-7Kam-cIvUg,8468
209
210
  nucliadb/search/search/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
210
211
  nucliadb/search/search/cache.py,sha256=n9vkN6Y6Xnr2RBJyoH0WzjzGTJOMfKekU9tfPTWWCPc,6810
211
- nucliadb/search/search/cut.py,sha256=1lmQpc8p5G8okHcPZ5GKH1F60Qr72HpKGoZI7H15Wzs,1240
212
+ nucliadb/search/search/cut.py,sha256=ytY0_GY7ocNjfxTb4aosxEp4ZfhQNDP--JkhEMGD298,1153
212
213
  nucliadb/search/search/exceptions.py,sha256=mbToQ-ghrv8ukLEv8S_-EZrgweWaIZZ5SIpoeuGDk6s,1154
213
214
  nucliadb/search/search/fetch.py,sha256=XJHIFnZmXM_8Kb37lb4lg1GYG7cZ1plT-qAIb_QziX4,6184
214
215
  nucliadb/search/search/filters.py,sha256=1MkHlJjAQqoRCj7e5cEzK2HvBxGLE17I_omsjiklbtw,6476
215
- nucliadb/search/search/find.py,sha256=KvRuPwvaZCxgxpHQtetPt9gy5DS9cszLu2oKyiDc3Cg,9891
216
- nucliadb/search/search/find_merge.py,sha256=oM71fbLamlVmwTuSQm9Z2lRNU63Ak7iHL_6mxwvVwB4,17218
216
+ nucliadb/search/search/find.py,sha256=EPtnb3jJcj6_4Brzk0mRkd9meY26OVGfW5dj-RKk2yU,9829
217
+ nucliadb/search/search/find_merge.py,sha256=_R_YpHAZv5BHh3XABQ8MRd1Ci0seclGYf26yJHJ7H0I,17178
217
218
  nucliadb/search/search/hydrator.py,sha256=7Zi44uf2m9b2X_b1aOV2lrWu1Vmbo9lXYgPVUGK0RGI,6728
218
- nucliadb/search/search/merge.py,sha256=SfAzDKUEAQ2JUf6K6MEhGZZCJXwdsN9vusRIhdg7ajI,20325
219
+ nucliadb/search/search/merge.py,sha256=TATahN22AX23gJ-2hxGiIZLjj6H1AtnIeADN6jC11HY,20079
219
220
  nucliadb/search/search/metrics.py,sha256=81X-tahGW4n2CLvUzCPdNxNClmZqUWZjcVOGCUHoiUM,2872
220
221
  nucliadb/search/search/paragraphs.py,sha256=pNAEiYqJGGUVcEf7xf-PFMVqz0PX4Qb-WNG-_zPGN2o,7799
221
- nucliadb/search/search/pgcatalog.py,sha256=cHiUZzrEnCdiM_0F5HCid8xJDDzFDbK-3cDSvGmCT4s,8738
222
+ nucliadb/search/search/pgcatalog.py,sha256=IaNK4dAxdXs38PoIkTdgqMDuZDjeiOtcXn3LeaT-OMw,8855
222
223
  nucliadb/search/search/predict_proxy.py,sha256=xBlh6kjuQpWRq7KsBx4pEl2PtnwljjQIiYMaTWpcCSA,3015
223
- nucliadb/search/search/query.py,sha256=O3ry3PPGjGLtH8AXssyyzDxStu57W5PK-bvUfCOMaqY,38309
224
+ nucliadb/search/search/query.py,sha256=1g_kek5mbxXPrFbs2ptTrFfr-WwZjsjMUHOw9Le2vZ4,37732
224
225
  nucliadb/search/search/rank_fusion.py,sha256=tRGo_KlsFsVx1CQEy1iqQ6f0T1Dq1kf0axDXHuuzvvM,6946
225
226
  nucliadb/search/search/rerankers.py,sha256=0kAHES9X_FKkP7KSN9NRETFmRPKzwrFAo_54MbyvM7Q,9051
226
227
  nucliadb/search/search/shards.py,sha256=mM2aCHWhl_gwkCENXDShPukS-_qnB5tFS3UAJuzM9qA,2182
227
228
  nucliadb/search/search/summarize.py,sha256=ksmYPubEQvAQgfPdZHfzB_rR19B2ci4IYZ6jLdHxZo8,4996
228
229
  nucliadb/search/search/utils.py,sha256=iF2tbBA56gRMJH1TlE2hMrqeXqjoeOPt4KgRdp2m9Ek,3313
229
230
  nucliadb/search/search/chat/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
230
- nucliadb/search/search/chat/ask.py,sha256=tUPsJpRCj7Sw7wHTpp5Mq1G9UDrYliCkYiIFdZ7qv_Y,33834
231
+ nucliadb/search/search/chat/ask.py,sha256=7yUPEMluZ553O4FdcghyQI3Hw042P3QL06T0AwDctJI,33799
231
232
  nucliadb/search/search/chat/exceptions.py,sha256=Siy4GXW2L7oPhIR86H3WHBhE9lkV4A4YaAszuGGUf54,1356
232
233
  nucliadb/search/search/chat/images.py,sha256=PA8VWxT5_HUGfW1ULhKTK46UBsVyINtWWqEM1ulzX1E,3095
233
234
  nucliadb/search/search/chat/prompt.py,sha256=TIzjI_882hJ--KLKCY8rJomtJ_CMJ-MHYtHqivgG8Lk,46819
234
235
  nucliadb/search/search/chat/query.py,sha256=gKtlj2ms81m417Id29-DtHFxE3M4TtJvYNB03gAgpYo,14402
235
236
  nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
236
237
  nucliadb/search/search/query_parser/exceptions.py,sha256=tuzl7ZyvVsRz6u0_3zMe60vx39nd3pi641prs-5nC0E,872
237
- nucliadb/search/search/query_parser/models.py,sha256=BNHwpItSLCZIDclcDpwDiu-BBQfmEn6xkxCErrMPgVU,1590
238
- nucliadb/search/search/query_parser/parser.py,sha256=DGVtph_ZlRiLQJJdoH07qrUvur6LQpiozwtgbO-SNqs,4890
238
+ nucliadb/search/search/query_parser/models.py,sha256=-VlCDXUCgOroAZw1Leqhj2VMgRv_CD2w40PXXOBLaUM,2332
239
+ nucliadb/search/search/query_parser/parser.py,sha256=ElKx9JboJCSqBiFiEAVVH-JM0_7ykc_cdY4TbKfAxUg,6296
239
240
  nucliadb/standalone/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
240
241
  nucliadb/standalone/api_router.py,sha256=zR03TQ-Pd2kXx1jeV83Puw19112Z8Jhln7p1cAn69kg,6699
241
242
  nucliadb/standalone/app.py,sha256=mAApNK_iVsQgJyd-mtwCeZq5csSimwnXmlQGH9a70pE,5586
@@ -331,9 +332,9 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
331
332
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
332
333
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
333
334
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
334
- nucliadb-6.1.0.post2594.dist-info/METADATA,sha256=yth5qp_Am2HEo9HU5DwRFfNx0wzYxAMekpT3it9DgAY,4390
335
- nucliadb-6.1.0.post2594.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
336
- nucliadb-6.1.0.post2594.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
337
- nucliadb-6.1.0.post2594.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
338
- nucliadb-6.1.0.post2594.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
339
- nucliadb-6.1.0.post2594.dist-info/RECORD,,
335
+ nucliadb-6.1.0.post2610.dist-info/METADATA,sha256=hq7ECRkXdli7ZqMNQKUI1ZjJyi0ww7yCXEC7asizcCk,4390
336
+ nucliadb-6.1.0.post2610.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
337
+ nucliadb-6.1.0.post2610.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
338
+ nucliadb-6.1.0.post2610.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
339
+ nucliadb-6.1.0.post2610.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
340
+ nucliadb-6.1.0.post2610.dist-info/RECORD,,