nucliadb 6.1.0.post2589__py3-none-any.whl → 6.1.0.post2602__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucliadb/common/ids.py +23 -0
- nucliadb/ingest/fields/base.py +15 -6
- nucliadb/ingest/fields/conversation.py +1 -1
- nucliadb/ingest/fields/file.py +2 -2
- nucliadb/ingest/fields/link.py +2 -2
- nucliadb/ingest/fields/text.py +9 -3
- nucliadb/ingest/orm/brain.py +17 -0
- nucliadb/ingest/orm/processor/processor.py +5 -0
- nucliadb/ingest/orm/resource.py +3 -0
- nucliadb/ingest/processing.py +4 -0
- nucliadb/search/api/v1/find.py +1 -5
- nucliadb/search/api/v1/resource/search.py +8 -10
- nucliadb/search/api/v1/search.py +6 -13
- nucliadb/search/search/chat/ask.py +1 -2
- nucliadb/search/search/find.py +6 -20
- nucliadb/search/search/query_parser/parser.py +2 -1
- {nucliadb-6.1.0.post2589.dist-info → nucliadb-6.1.0.post2602.dist-info}/METADATA +5 -5
- {nucliadb-6.1.0.post2589.dist-info → nucliadb-6.1.0.post2602.dist-info}/RECORD +22 -22
- {nucliadb-6.1.0.post2589.dist-info → nucliadb-6.1.0.post2602.dist-info}/WHEEL +0 -0
- {nucliadb-6.1.0.post2589.dist-info → nucliadb-6.1.0.post2602.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.1.0.post2589.dist-info → nucliadb-6.1.0.post2602.dist-info}/top_level.txt +0 -0
- {nucliadb-6.1.0.post2589.dist-info → nucliadb-6.1.0.post2602.dist-info}/zip-safe +0 -0
nucliadb/common/ids.py
CHANGED
@@ -215,3 +215,26 @@ class VectorId:
|
|
215
215
|
index = int(parts[-2])
|
216
216
|
field_id = FieldId.from_string("/".join(parts[:-2]))
|
217
217
|
return cls(field_id=field_id, index=index, vector_start=start, vector_end=end)
|
218
|
+
|
219
|
+
|
220
|
+
def extract_data_augmentation_id(generated_field_id: str) -> Optional[str]:
|
221
|
+
"""Data augmentation generated fields have a strict id with the following
|
222
|
+
format:
|
223
|
+
`da-{task_id}-{original:field_type}-{original:field_id}[-{original:split}]`
|
224
|
+
|
225
|
+
@return the `task_id`
|
226
|
+
|
227
|
+
ATENTION: we are assuming ids have been properly generated and `-` is not a
|
228
|
+
valid character, otherwise, this extraction would be wrong and a partial id
|
229
|
+
would be returned.
|
230
|
+
|
231
|
+
"""
|
232
|
+
parts = generated_field_id.split("-")
|
233
|
+
|
234
|
+
if len(parts) < 4:
|
235
|
+
return None
|
236
|
+
|
237
|
+
if parts[0] != "da":
|
238
|
+
return None
|
239
|
+
|
240
|
+
return parts[1] or None
|
nucliadb/ingest/fields/base.py
CHANGED
@@ -21,9 +21,9 @@ from __future__ import annotations
|
|
21
21
|
|
22
22
|
import enum
|
23
23
|
from datetime import datetime
|
24
|
-
from typing import Any, Optional, Type
|
24
|
+
from typing import Any, Generic, Optional, Type, TypeVar
|
25
25
|
|
26
|
-
from google.protobuf.message import DecodeError
|
26
|
+
from google.protobuf.message import DecodeError, Message
|
27
27
|
|
28
28
|
from nucliadb.common import datamanagers
|
29
29
|
from nucliadb.ingest.fields.exceptions import InvalidFieldClass, InvalidPBClass
|
@@ -31,6 +31,7 @@ from nucliadb_protos.resources_pb2 import (
|
|
31
31
|
CloudFile,
|
32
32
|
ExtractedTextWrapper,
|
33
33
|
ExtractedVectorsWrapper,
|
34
|
+
FieldAuthor,
|
34
35
|
FieldComputedMetadata,
|
35
36
|
FieldComputedMetadataWrapper,
|
36
37
|
FieldQuestionAnswers,
|
@@ -56,8 +57,11 @@ class FieldTypes(str, enum.Enum):
|
|
56
57
|
QUESTION_ANSWERS = "question_answers"
|
57
58
|
|
58
59
|
|
59
|
-
|
60
|
-
|
60
|
+
PbType = TypeVar("PbType", bound=Message)
|
61
|
+
|
62
|
+
|
63
|
+
class Field(Generic[PbType]):
|
64
|
+
pbklass: Type[PbType]
|
61
65
|
type: str = "x"
|
62
66
|
value: Optional[Any]
|
63
67
|
extracted_text: Optional[ExtractedText]
|
@@ -122,7 +126,7 @@ class Field:
|
|
122
126
|
key = FieldTypes.FIELD_VECTORS.value
|
123
127
|
return self.storage.file_extracted(self.kbid, self.uuid, self.type, self.id, key)
|
124
128
|
|
125
|
-
async def db_get_value(self):
|
129
|
+
async def db_get_value(self) -> Optional[PbType]:
|
126
130
|
if self.value is None:
|
127
131
|
payload = await datamanagers.fields.get_raw(
|
128
132
|
self.resource.txn,
|
@@ -132,7 +136,7 @@ class Field:
|
|
132
136
|
field_id=self.id,
|
133
137
|
)
|
134
138
|
if payload is None:
|
135
|
-
return
|
139
|
+
return None
|
136
140
|
|
137
141
|
self.value = self.pbklass()
|
138
142
|
self.value.ParseFromString(payload)
|
@@ -470,6 +474,11 @@ class Field:
|
|
470
474
|
self.large_computed_metadata = payload
|
471
475
|
return self.large_computed_metadata
|
472
476
|
|
477
|
+
async def generated_by(self) -> FieldAuthor:
|
478
|
+
author = FieldAuthor()
|
479
|
+
author.user.SetInParent()
|
480
|
+
return author
|
481
|
+
|
473
482
|
def serialize(self):
|
474
483
|
return self.value.SerializeToString()
|
475
484
|
|
nucliadb/ingest/fields/file.py
CHANGED
@@ -26,7 +26,7 @@ from nucliadb_utils.storages.storage import StorageField
|
|
26
26
|
FILE_METADATA = "file_metadata"
|
27
27
|
|
28
28
|
|
29
|
-
class File(Field):
|
29
|
+
class File(Field[FieldFile]):
|
30
30
|
pbklass = FieldFile
|
31
31
|
value: FieldFile
|
32
32
|
type: str = "f"
|
@@ -57,7 +57,7 @@ class File(Field):
|
|
57
57
|
|
58
58
|
await self.db_set_value(payload)
|
59
59
|
|
60
|
-
async def get_value(self) -> FieldFile:
|
60
|
+
async def get_value(self) -> Optional[FieldFile]:
|
61
61
|
return await self.db_get_value()
|
62
62
|
|
63
63
|
async def set_file_extracted_data(self, file_extracted_data: FileExtractedData):
|
nucliadb/ingest/fields/link.py
CHANGED
@@ -26,7 +26,7 @@ from nucliadb_utils.storages.storage import StorageField
|
|
26
26
|
LINK_METADATA = "link_metadata"
|
27
27
|
|
28
28
|
|
29
|
-
class Link(Field):
|
29
|
+
class Link(Field[FieldLink]):
|
30
30
|
pbklass = FieldLink
|
31
31
|
value: FieldLink
|
32
32
|
type: str = "u"
|
@@ -45,7 +45,7 @@ class Link(Field):
|
|
45
45
|
async def set_value(self, payload: FieldLink):
|
46
46
|
await self.db_set_value(payload)
|
47
47
|
|
48
|
-
async def get_value(self) -> FieldLink:
|
48
|
+
async def get_value(self) -> Optional[FieldLink]:
|
49
49
|
return await self.db_get_value()
|
50
50
|
|
51
51
|
async def set_link_extracted_data(self, link_extracted_data: LinkExtractedData):
|
nucliadb/ingest/fields/text.py
CHANGED
@@ -19,20 +19,26 @@
|
|
19
19
|
#
|
20
20
|
|
21
21
|
import hashlib
|
22
|
+
from typing import Optional
|
22
23
|
|
23
24
|
from nucliadb.ingest.fields.base import Field
|
24
|
-
from nucliadb_protos.resources_pb2 import FieldText
|
25
|
+
from nucliadb_protos.resources_pb2 import FieldAuthor, FieldText
|
25
26
|
|
26
27
|
|
27
|
-
class Text(Field):
|
28
|
+
class Text(Field[FieldText]):
|
28
29
|
pbklass = FieldText
|
29
30
|
value: FieldText
|
30
31
|
type: str = "t"
|
31
32
|
|
33
|
+
async def generated_by(self) -> FieldAuthor:
|
34
|
+
value = await self.get_value()
|
35
|
+
assert value is not None, "Can't know who generated the field if it has no value!"
|
36
|
+
return value.generated_by
|
37
|
+
|
32
38
|
async def set_value(self, payload: FieldText):
|
33
39
|
if payload.md5 == "":
|
34
40
|
payload.md5 = hashlib.md5(payload.body.encode()).hexdigest()
|
35
41
|
await self.db_set_value(payload)
|
36
42
|
|
37
|
-
async def get_value(self) -> FieldText:
|
43
|
+
async def get_value(self) -> Optional[FieldText]:
|
38
44
|
return await self.db_get_value()
|
nucliadb/ingest/orm/brain.py
CHANGED
@@ -39,6 +39,7 @@ from nucliadb_protos.noderesources_pb2 import Resource as PBBrainResource
|
|
39
39
|
from nucliadb_protos.resources_pb2 import (
|
40
40
|
Basic,
|
41
41
|
ExtractedText,
|
42
|
+
FieldAuthor,
|
42
43
|
FieldComputedMetadata,
|
43
44
|
FieldMetadata,
|
44
45
|
Metadata,
|
@@ -558,6 +559,7 @@ class ResourceBrain:
|
|
558
559
|
field_key: str,
|
559
560
|
metadata: Optional[FieldComputedMetadata],
|
560
561
|
uuid: str,
|
562
|
+
generated_by: FieldAuthor,
|
561
563
|
basic_user_metadata: Optional[UserMetadata] = None,
|
562
564
|
basic_user_fieldmetadata: Optional[UserFieldMetadata] = None,
|
563
565
|
):
|
@@ -573,6 +575,7 @@ class ResourceBrain:
|
|
573
575
|
"l": set(), # classification labels
|
574
576
|
"e": set(), # entities
|
575
577
|
"mt": set(), # mime type
|
578
|
+
"g/da": set(), # generated by
|
576
579
|
}
|
577
580
|
if metadata is not None:
|
578
581
|
for meta in metadata.split_metadata.values():
|
@@ -623,6 +626,20 @@ class ResourceBrain:
|
|
623
626
|
paragraph_annotation.key
|
624
627
|
].labels.append(label)
|
625
628
|
|
629
|
+
if generated_by.WhichOneof("author") == "data_augmentation":
|
630
|
+
field_type, field_id = field_key.split("/")
|
631
|
+
da_task_id = ids.extract_data_augmentation_id(field_id)
|
632
|
+
if da_task_id is None: # pragma: nocover
|
633
|
+
logger.warning(
|
634
|
+
"Data augmentation field id has an unexpected format! Skipping label",
|
635
|
+
extra={
|
636
|
+
"rid": uuid,
|
637
|
+
"field_id": field_id,
|
638
|
+
},
|
639
|
+
)
|
640
|
+
else:
|
641
|
+
labels["g/da"].add(da_task_id)
|
642
|
+
|
626
643
|
self.brain.texts[field_key].labels.extend(flatten_resource_labels(labels))
|
627
644
|
|
628
645
|
|
@@ -300,6 +300,11 @@ class Processor:
|
|
300
300
|
await send_generated_fields_to_process(
|
301
301
|
kbid, resource, generated_fields, message
|
302
302
|
)
|
303
|
+
# TODO: remove this when processor sends the field set
|
304
|
+
for generated_text in generated_fields.texts:
|
305
|
+
message.texts[
|
306
|
+
generated_text
|
307
|
+
].generated_by.data_augmentation.SetInParent()
|
303
308
|
|
304
309
|
else:
|
305
310
|
raise InvalidBrokerMessage(f"Unknown broker message source: {message.source}")
|
nucliadb/ingest/orm/resource.py
CHANGED
@@ -820,10 +820,13 @@ class Resource:
|
|
820
820
|
):
|
821
821
|
valid_user_field_metadata = user_field_metadata
|
822
822
|
break
|
823
|
+
|
824
|
+
generated_by = await fieldobj.generated_by()
|
823
825
|
brain.apply_field_labels(
|
824
826
|
fieldkey,
|
825
827
|
extracted_metadata,
|
826
828
|
self.uuid,
|
829
|
+
generated_by,
|
827
830
|
basic.usermetadata,
|
828
831
|
valid_user_field_metadata,
|
829
832
|
)
|
nucliadb/ingest/processing.py
CHANGED
@@ -114,6 +114,10 @@ class PushPayload(BaseModel):
|
|
114
114
|
|
115
115
|
|
116
116
|
async def start_processing_engine():
|
117
|
+
processing_engine = get_utility(Utility.PROCESSING)
|
118
|
+
if processing_engine is not None:
|
119
|
+
return
|
120
|
+
|
117
121
|
if nuclia_settings.dummy_processing:
|
118
122
|
processing_engine = DummyProcessingEngine()
|
119
123
|
else:
|
nucliadb/search/api/v1/find.py
CHANGED
@@ -82,8 +82,6 @@ async def find_knowledgebox(
|
|
82
82
|
query: str = fastapi_query(SearchParamDefaults.query),
|
83
83
|
fields: list[str] = fastapi_query(SearchParamDefaults.fields),
|
84
84
|
filters: list[str] = fastapi_query(SearchParamDefaults.filters),
|
85
|
-
page_number: int = fastapi_query(SearchParamDefaults.page_number),
|
86
|
-
page_size: int = fastapi_query(SearchParamDefaults.page_size),
|
87
85
|
top_k: Optional[int] = fastapi_query(SearchParamDefaults.top_k),
|
88
86
|
min_score: Optional[float] = Query(
|
89
87
|
default=None,
|
@@ -141,9 +139,7 @@ async def find_knowledgebox(
|
|
141
139
|
query=query,
|
142
140
|
fields=fields,
|
143
141
|
filters=filters,
|
144
|
-
|
145
|
-
page_size=page_size,
|
146
|
-
top_k=top_k,
|
142
|
+
top_k=top_k, # type: ignore
|
147
143
|
min_score=min_score_from_query_params(min_score_bm25, min_score_semantic, min_score),
|
148
144
|
vectorset=vectorset,
|
149
145
|
range_creation_end=range_creation_end,
|
@@ -17,7 +17,7 @@
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
|
-
from typing import Optional, Union
|
20
|
+
from typing import Optional, Union, cast
|
21
21
|
|
22
22
|
from fastapi import Header, Request, Response
|
23
23
|
from fastapi_versioning import version
|
@@ -64,8 +64,6 @@ async def resource_search(
|
|
64
64
|
faceted: list[str] = fastapi_query(SearchParamDefaults.faceted),
|
65
65
|
sort: Optional[SortField] = fastapi_query(SearchParamDefaults.sort_field, alias="sort_field"),
|
66
66
|
sort_order: SortOrder = fastapi_query(SearchParamDefaults.sort_order),
|
67
|
-
page_number: int = fastapi_query(SearchParamDefaults.page_number),
|
68
|
-
page_size: int = fastapi_query(SearchParamDefaults.page_size),
|
69
67
|
top_k: Optional[int] = fastapi_query(SearchParamDefaults.top_k),
|
70
68
|
range_creation_start: Optional[DateTime] = fastapi_query(SearchParamDefaults.range_creation_start),
|
71
69
|
range_creation_end: Optional[DateTime] = fastapi_query(SearchParamDefaults.range_creation_end),
|
@@ -80,9 +78,9 @@ async def resource_search(
|
|
80
78
|
debug: bool = fastapi_query(SearchParamDefaults.debug),
|
81
79
|
shards: list[str] = fastapi_query(SearchParamDefaults.shards),
|
82
80
|
) -> Union[ResourceSearchResults, HTTPClientError]:
|
83
|
-
|
84
|
-
|
85
|
-
|
81
|
+
top_k = top_k or SearchParamDefaults.top_k # type: ignore
|
82
|
+
top_k = cast(int, top_k)
|
83
|
+
|
86
84
|
with cache.request_caches():
|
87
85
|
try:
|
88
86
|
pb_query = await paragraph_query_to_pb(
|
@@ -92,8 +90,8 @@ async def resource_search(
|
|
92
90
|
fields,
|
93
91
|
filters,
|
94
92
|
faceted,
|
95
|
-
|
96
|
-
|
93
|
+
0,
|
94
|
+
top_k,
|
97
95
|
range_creation_start,
|
98
96
|
range_creation_end,
|
99
97
|
range_modification_start,
|
@@ -111,8 +109,8 @@ async def resource_search(
|
|
111
109
|
# We need to merge
|
112
110
|
search_results = await merge_paragraphs_results(
|
113
111
|
results,
|
114
|
-
count=
|
115
|
-
page=
|
112
|
+
count=top_k,
|
113
|
+
page=0,
|
116
114
|
kbid=kbid,
|
117
115
|
highlight_split=highlight,
|
118
116
|
min_score=0.0,
|
nucliadb/search/api/v1/search.py
CHANGED
@@ -113,8 +113,6 @@ async def search_knowledgebox(
|
|
113
113
|
sort_field: SortField = fastapi_query(SearchParamDefaults.sort_field),
|
114
114
|
sort_limit: Optional[int] = fastapi_query(SearchParamDefaults.sort_limit),
|
115
115
|
sort_order: SortOrder = fastapi_query(SearchParamDefaults.sort_order),
|
116
|
-
page_number: int = fastapi_query(SearchParamDefaults.page_number, deprecated=True),
|
117
|
-
page_size: int = fastapi_query(SearchParamDefaults.page_size, deprecated=True),
|
118
116
|
top_k: int = fastapi_query(SearchParamDefaults.top_k),
|
119
117
|
min_score: Optional[float] = Query(
|
120
118
|
default=None,
|
@@ -178,8 +176,6 @@ async def search_knowledgebox(
|
|
178
176
|
if sort_field is not None
|
179
177
|
else None
|
180
178
|
),
|
181
|
-
page_number=page_number,
|
182
|
-
page_size=page_size,
|
183
179
|
top_k=top_k,
|
184
180
|
min_score=min_score_from_query_params(min_score_bm25, min_score_semantic, min_score),
|
185
181
|
vectorset=vectorset,
|
@@ -227,8 +223,8 @@ async def catalog_get(
|
|
227
223
|
sort_field: SortField = fastapi_query(SearchParamDefaults.sort_field),
|
228
224
|
sort_limit: Optional[int] = fastapi_query(SearchParamDefaults.sort_limit),
|
229
225
|
sort_order: SortOrder = fastapi_query(SearchParamDefaults.sort_order),
|
230
|
-
page_number: int = fastapi_query(SearchParamDefaults.
|
231
|
-
page_size: int = fastapi_query(SearchParamDefaults.
|
226
|
+
page_number: int = fastapi_query(SearchParamDefaults.catalog_page_number),
|
227
|
+
page_size: int = fastapi_query(SearchParamDefaults.catalog_page_size),
|
232
228
|
shards: list[str] = fastapi_query(SearchParamDefaults.shards, deprecated=True),
|
233
229
|
with_status: Optional[ResourceProcessingStatus] = fastapi_query(
|
234
230
|
SearchParamDefaults.with_status, deprecated="Use filters instead"
|
@@ -417,9 +413,6 @@ async def search(
|
|
417
413
|
do_audit: bool = True,
|
418
414
|
with_status: Optional[ResourceProcessingStatus] = None,
|
419
415
|
) -> tuple[KnowledgeboxSearchResults, bool]:
|
420
|
-
if item.page_number > 0:
|
421
|
-
logger.warning("Someone is still using pagination!", extra={"kbid": kbid, "endpoint": "search"})
|
422
|
-
|
423
416
|
audit = get_audit()
|
424
417
|
start_time = time()
|
425
418
|
|
@@ -438,8 +431,8 @@ async def search(
|
|
438
431
|
keyword_filters=[],
|
439
432
|
faceted=item.faceted,
|
440
433
|
sort=item.sort,
|
441
|
-
page_number=
|
442
|
-
page_size=item.
|
434
|
+
page_number=0,
|
435
|
+
page_size=item.top_k,
|
443
436
|
min_score=item.min_score,
|
444
437
|
range_creation_start=item.range_creation_start,
|
445
438
|
range_creation_end=item.range_creation_end,
|
@@ -468,8 +461,8 @@ async def search(
|
|
468
461
|
# We need to merge
|
469
462
|
search_results = await merge_results(
|
470
463
|
results,
|
471
|
-
count=item.
|
472
|
-
page=
|
464
|
+
count=item.top_k,
|
465
|
+
page=0,
|
473
466
|
kbid=kbid,
|
474
467
|
show=item.show,
|
475
468
|
field_type_filter=item.field_type_filter,
|
@@ -883,8 +883,7 @@ def calculate_prequeries_for_json_schema(
|
|
883
883
|
features=features,
|
884
884
|
filters=[],
|
885
885
|
keyword_filters=[],
|
886
|
-
|
887
|
-
page_size=10,
|
886
|
+
top_k=10,
|
888
887
|
min_score=ask_request.min_score,
|
889
888
|
vectorset=ask_request.vectorset,
|
890
889
|
highlight=False,
|
nucliadb/search/search/find.py
CHANGED
@@ -38,14 +38,12 @@ from nucliadb.search.search.metrics import (
|
|
38
38
|
RAGMetrics,
|
39
39
|
)
|
40
40
|
from nucliadb.search.search.query import QueryParser
|
41
|
-
from nucliadb.search.search.query_parser import models as parser_models
|
42
41
|
from nucliadb.search.search.query_parser.parser import parse_find
|
43
42
|
from nucliadb.search.search.rank_fusion import (
|
44
43
|
RankFusionAlgorithm,
|
45
44
|
get_rank_fusion,
|
46
45
|
)
|
47
46
|
from nucliadb.search.search.rerankers import (
|
48
|
-
NoopReranker,
|
49
47
|
Reranker,
|
50
48
|
RerankingOptions,
|
51
49
|
get_reranker,
|
@@ -77,9 +75,6 @@ async def find(
|
|
77
75
|
generative_model: Optional[str] = None,
|
78
76
|
metrics: RAGMetrics = RAGMetrics(),
|
79
77
|
) -> tuple[KnowledgeboxFindResults, bool, QueryParser]:
|
80
|
-
if item.page_number > 0:
|
81
|
-
logger.warning("Someone is still using pagination!", extra={"kbid": kbid, "endpoint": "find"})
|
82
|
-
|
83
78
|
external_index_manager = await get_external_index_manager(kbid=kbid)
|
84
79
|
if external_index_manager is not None:
|
85
80
|
return await _external_index_retrieval(
|
@@ -127,8 +122,8 @@ async def _index_node_retrieval(
|
|
127
122
|
relation_subgraph_query=pb_query.relations.subgraph,
|
128
123
|
min_score_bm25=pb_query.min_score_bm25,
|
129
124
|
min_score_semantic=pb_query.min_score_semantic,
|
130
|
-
page_size=item.
|
131
|
-
page_number=
|
125
|
+
page_size=item.top_k,
|
126
|
+
page_number=0,
|
132
127
|
show=item.show,
|
133
128
|
extracted=item.extracted,
|
134
129
|
field_type_filter=item.field_type_filter,
|
@@ -232,7 +227,7 @@ async def _external_index_retrieval(
|
|
232
227
|
query=item.query,
|
233
228
|
total=0,
|
234
229
|
page_number=0,
|
235
|
-
page_size=
|
230
|
+
page_size=item.top_k,
|
236
231
|
relations=None, # Not implemented for external indexes yet
|
237
232
|
autofilters=[], # Not implemented for external indexes yet
|
238
233
|
min_score=results_min_score,
|
@@ -268,16 +263,7 @@ async def query_parser_from_find_request(
|
|
268
263
|
parsed = parse_find(item)
|
269
264
|
|
270
265
|
rank_fusion = get_rank_fusion(parsed.rank_fusion)
|
271
|
-
|
272
|
-
reranker: Reranker
|
273
|
-
if item.page_number > 0 and isinstance(parsed.reranker, parser_models.Reranker):
|
274
|
-
logger.warning(
|
275
|
-
"Trying to use predict reranker with pagination. Reranker won't be used",
|
276
|
-
extra={"kbid": kbid},
|
277
|
-
)
|
278
|
-
reranker = NoopReranker()
|
279
|
-
else:
|
280
|
-
reranker = get_reranker(parsed.reranker)
|
266
|
+
reranker = get_reranker(parsed.reranker)
|
281
267
|
|
282
268
|
query_parser = QueryParser(
|
283
269
|
kbid=kbid,
|
@@ -287,8 +273,8 @@ async def query_parser_from_find_request(
|
|
287
273
|
keyword_filters=item.keyword_filters,
|
288
274
|
faceted=None,
|
289
275
|
sort=None,
|
290
|
-
page_number=
|
291
|
-
page_size=item.
|
276
|
+
page_number=0,
|
277
|
+
page_size=item.top_k,
|
292
278
|
min_score=item.min_score,
|
293
279
|
range_creation_start=item.range_creation_start,
|
294
280
|
range_creation_end=item.range_creation_end,
|
@@ -72,7 +72,8 @@ class _FindParser:
|
|
72
72
|
# while pagination is still there, FindRequest has a validator that converts
|
73
73
|
# top_k to page_number and page_size. To get top_k, we can compute it from
|
74
74
|
# those
|
75
|
-
|
75
|
+
assert self.item.top_k is not None, "top_k must have an int value"
|
76
|
+
top_k = self.item.top_k
|
76
77
|
return top_k
|
77
78
|
|
78
79
|
def _parse_rank_fusion(self) -> RankFusion:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: nucliadb
|
3
|
-
Version: 6.1.0.
|
3
|
+
Version: 6.1.0.post2602
|
4
4
|
Home-page: https://docs.nuclia.dev/docs/management/nucliadb/intro
|
5
5
|
Author: NucliaDB Community
|
6
6
|
Author-email: nucliadb@nuclia.com
|
@@ -22,10 +22,10 @@ Classifier: Programming Language :: Python :: 3.12
|
|
22
22
|
Classifier: Programming Language :: Python :: 3 :: Only
|
23
23
|
Requires-Python: >=3.9, <4
|
24
24
|
Description-Content-Type: text/markdown
|
25
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.1.0.
|
26
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.1.0.
|
27
|
-
Requires-Dist: nucliadb-protos>=6.1.0.
|
28
|
-
Requires-Dist: nucliadb-models>=6.1.0.
|
25
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.1.0.post2602
|
26
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.1.0.post2602
|
27
|
+
Requires-Dist: nucliadb-protos>=6.1.0.post2602
|
28
|
+
Requires-Dist: nucliadb-models>=6.1.0.post2602
|
29
29
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
30
30
|
Requires-Dist: nucliadb-node-binding>=2.26.0
|
31
31
|
Requires-Dist: uvicorn
|
@@ -37,7 +37,7 @@ nucliadb/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
37
|
nucliadb/common/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
38
38
|
nucliadb/common/constants.py,sha256=QpigxJh_CtD85Evy0PtV5cVq6x0U_f9xfIcXz1ymkUg,869
|
39
39
|
nucliadb/common/counters.py,sha256=yhJEmmrglTSrDmB8OjaFLkZ__TwhTxayyQrtacnB55I,957
|
40
|
-
nucliadb/common/ids.py,sha256=
|
40
|
+
nucliadb/common/ids.py,sha256=P92d9aNJCd1VpGgbqnab6g35iQalpUIW_eaaxEin2mE,7439
|
41
41
|
nucliadb/common/locking.py,sha256=RL0CabZVPzxHZyUjYeUyLvsJTm7W3J9o4fEgsY_ufNc,5896
|
42
42
|
nucliadb/common/nidx.py,sha256=D74oNdniWjbc6gBBDwZP74NH-egTIORHhbfzgIto8DE,8667
|
43
43
|
nucliadb/common/cluster/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
@@ -108,7 +108,7 @@ nucliadb/ingest/__init__.py,sha256=fsw3C38VP50km3R-nHL775LNGPpJ4JxqXJ2Ib1f5SqE,1
|
|
108
108
|
nucliadb/ingest/app.py,sha256=L8MDbURnSdD6yI4yCnSbmnNccsnha-zJEkKSaGk1xMg,7612
|
109
109
|
nucliadb/ingest/cache.py,sha256=w7jMMzamOmQ7gwXna6Dqm6isRNBVv6l5BTBlTxaYWjE,1005
|
110
110
|
nucliadb/ingest/partitions.py,sha256=2NIhMYbNT0TNBL6bX1UMSi7vxFGICstCKEqsB0TXHOE,2410
|
111
|
-
nucliadb/ingest/processing.py,sha256=
|
111
|
+
nucliadb/ingest/processing.py,sha256=x8FGnq2epsGl0QEzdYlgCys9MpxtV5_WO09hc7Wy150,20254
|
112
112
|
nucliadb/ingest/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
113
113
|
nucliadb/ingest/serialize.py,sha256=GSDfrO4JLm-QLKw8LJ7TD1JFcXXvwm-ugXzbCfGh3Fk,15492
|
114
114
|
nucliadb/ingest/settings.py,sha256=SDQpMRsTsNyi6IDxCJy6BZVUSKUzwAMuxf6ktp31VMM,3130
|
@@ -123,27 +123,27 @@ nucliadb/ingest/consumer/service.py,sha256=EZM1sABW_7bj6j2UgKUHUuK-EGIEYnLdtPAn8
|
|
123
123
|
nucliadb/ingest/consumer/shard_creator.py,sha256=19wf-Bu_9hb_muCDVblamWuvLr09e5dMu9Id5I4-rGw,4324
|
124
124
|
nucliadb/ingest/consumer/utils.py,sha256=jpX8D4lKzuPCpArQLZeX_Zczq3pfen_zAf8sPJfOEZU,2642
|
125
125
|
nucliadb/ingest/fields/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
126
|
-
nucliadb/ingest/fields/base.py,sha256=
|
127
|
-
nucliadb/ingest/fields/conversation.py,sha256=
|
126
|
+
nucliadb/ingest/fields/base.py,sha256=EdWrNWG7-W5ehyPyJrNULLtoApT_AEsKCcfXwzxYMhU,19257
|
127
|
+
nucliadb/ingest/fields/conversation.py,sha256=OcQOHvi72Pm0OyNGwxLo9gONo8f1NhwASq0_gS-E64A,7021
|
128
128
|
nucliadb/ingest/fields/exceptions.py,sha256=LBZ-lw11f42Pk-ck-NSN9mSJ2kOw-NeRwb-UE31ILTQ,1171
|
129
|
-
nucliadb/ingest/fields/file.py,sha256=
|
129
|
+
nucliadb/ingest/fields/file.py,sha256=1v4jLg3balUua2VmSV8hHkAwPFShTUCOzufZvIUQcQw,4740
|
130
130
|
nucliadb/ingest/fields/generic.py,sha256=elgtqv15aJUq3zY7X_g0bli_2BpcwPArVvzhe54Y4Ig,1547
|
131
|
-
nucliadb/ingest/fields/link.py,sha256=
|
132
|
-
nucliadb/ingest/fields/text.py,sha256=
|
131
|
+
nucliadb/ingest/fields/link.py,sha256=kN_gjRUEEj5cy8K_BwPijYg3TiWhedc24apXYlTbRJs,4172
|
132
|
+
nucliadb/ingest/fields/text.py,sha256=tFvSQJAe0W7ePpp2_WDfLiE2yglR1OTU0Zht9acvOFw,1594
|
133
133
|
nucliadb/ingest/orm/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
134
|
-
nucliadb/ingest/orm/brain.py,sha256=
|
134
|
+
nucliadb/ingest/orm/brain.py,sha256=TtHPKZzv_Yz-tYkB1QjzPWUxA7Z_naMVXAfo8iov9Gw,28574
|
135
135
|
nucliadb/ingest/orm/broker_message.py,sha256=JYYUJIZEL_EqovQuw6u-FmEkjyoYlxIXJq9hFekOiks,6441
|
136
136
|
nucliadb/ingest/orm/entities.py,sha256=xWhp_JXxHjUJ-m3I08tLqkpIrThHH0LJDvIVgK4q7wA,15769
|
137
137
|
nucliadb/ingest/orm/exceptions.py,sha256=k4Esv4NtL4TrGTcsQpwrSfDhPQpiYcRbB1SpYmBX5MY,1432
|
138
138
|
nucliadb/ingest/orm/knowledgebox.py,sha256=hKKqvZUG4tdPO3Z29_qvz5-dcX5_X2x5pkNQhDGnR7Q,22746
|
139
139
|
nucliadb/ingest/orm/metrics.py,sha256=OkwMSPKLZcKba0ZTwtTiIxwBgaLMX5ydhGieKvi2y7E,1096
|
140
|
-
nucliadb/ingest/orm/resource.py,sha256=
|
140
|
+
nucliadb/ingest/orm/resource.py,sha256=_m4B14dSpO-lszpoqlhXYL3LrplB9p3NrDZC5kQbXHs,53860
|
141
141
|
nucliadb/ingest/orm/utils.py,sha256=vCe_9UxHu26JDFGLwQ0wH-XyzJIpQCTK-Ow9dtZR5Vg,2716
|
142
142
|
nucliadb/ingest/orm/processor/__init__.py,sha256=Aqd9wCNTvggkMkCY3WvoI8spdr94Jnqk-0iq9XpLs18,922
|
143
143
|
nucliadb/ingest/orm/processor/auditing.py,sha256=TeYhXGJRyQ7ROytbb2u8R0fIh_FYi3HgTu3S1ribY3U,4623
|
144
144
|
nucliadb/ingest/orm/processor/data_augmentation.py,sha256=ZF66gfHHMHCh9n9XXoTSdTXhETnzJlcylfQqhFq54Pw,5775
|
145
145
|
nucliadb/ingest/orm/processor/pgcatalog.py,sha256=f32PIEXWktWzGDws6Ffife37OAfrseP5IOti_Cb4ir8,3012
|
146
|
-
nucliadb/ingest/orm/processor/processor.py,sha256=
|
146
|
+
nucliadb/ingest/orm/processor/processor.py,sha256=Wmh_7KUT2U0oX1pMyzw1jVHNMVGoaBn-6cDYqNoTHtA,30817
|
147
147
|
nucliadb/ingest/orm/processor/sequence_manager.py,sha256=uqEphtI1Ir_yk9jRl2gPf7BlzzXWovbARY5MNZSBI_8,1704
|
148
148
|
nucliadb/ingest/service/__init__.py,sha256=MME_G_ERxzJR6JW_hfE2qcfXpmpH1kdG-S0a-M0qRm8,2043
|
149
149
|
nucliadb/ingest/service/exceptions.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
@@ -193,17 +193,17 @@ nucliadb/search/api/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20
|
|
193
193
|
nucliadb/search/api/v1/__init__.py,sha256=Xep7u4Q7ygHvTGRn2CzRwJRVX1jtoSxUg8usQcVbC2s,1219
|
194
194
|
nucliadb/search/api/v1/ask.py,sha256=Od2U_gaOZK6dJZ1eDGQQJ3xUVnbBih58VPYVAsQErOw,3902
|
195
195
|
nucliadb/search/api/v1/feedback.py,sha256=yrOZeElw6XLu6j_6m3QGHKjEMwZPWa9vtdCud4dNilU,2547
|
196
|
-
nucliadb/search/api/v1/find.py,sha256=
|
196
|
+
nucliadb/search/api/v1/find.py,sha256=DsnWkySu_cFajDWJIxN8DYvLL_Rm2yiCjHD8TsqPfRk,9304
|
197
197
|
nucliadb/search/api/v1/knowledgebox.py,sha256=PKT1V3vZUnBkGfkxnFGjWPuHwQarVxREDY7lAT_9k1w,8764
|
198
198
|
nucliadb/search/api/v1/predict_proxy.py,sha256=QrGzo0hKjtmyGZ6pjlJHYAh4hxwVUIOTcVcerRCw7eE,3047
|
199
199
|
nucliadb/search/api/v1/router.py,sha256=mtT07rBZcVfpa49doaw9b1tj3sdi3qLH0gn9Io6NYM0,988
|
200
|
-
nucliadb/search/api/v1/search.py,sha256=
|
200
|
+
nucliadb/search/api/v1/search.py,sha256=9QO2-AI1b2WJX8gmURB02cih1ONWjHe0-qnL1SXbF_E,19864
|
201
201
|
nucliadb/search/api/v1/suggest.py,sha256=SXxRVKT5hDSHNKlBYo8XozHHq9bGyvJOlo286lEruLE,5979
|
202
202
|
nucliadb/search/api/v1/summarize.py,sha256=VAHJvE6V3xUgEBfqNKhgoxmDqCvh30RnrEIBVhMcNLU,2499
|
203
203
|
nucliadb/search/api/v1/utils.py,sha256=5Ve-frn7LAE2jqAgB85F8RSeqxDlyA08--gS-AdOLS4,1434
|
204
204
|
nucliadb/search/api/v1/resource/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
205
205
|
nucliadb/search/api/v1/resource/ask.py,sha256=XMEP9_Uwy37yaXLcIYKMXGiZYNASD8RTByzQGjd9LPQ,3847
|
206
|
-
nucliadb/search/api/v1/resource/search.py,sha256=
|
206
|
+
nucliadb/search/api/v1/resource/search.py,sha256=jgYJnSSult2ah_Jfd78vbGT5URyZPDsX1Gbdj-sQgCE,4851
|
207
207
|
nucliadb/search/requesters/__init__.py,sha256=itSI7dtTwFP55YMX4iK7JzdMHS5CQVUiB1XzQu4UBh8,833
|
208
208
|
nucliadb/search/requesters/utils.py,sha256=7ovWSGzhLpZGTMi9x9nMOi7QNCgt2qah-7Kam-cIvUg,8468
|
209
209
|
nucliadb/search/search/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
@@ -212,7 +212,7 @@ nucliadb/search/search/cut.py,sha256=1lmQpc8p5G8okHcPZ5GKH1F60Qr72HpKGoZI7H15Wzs
|
|
212
212
|
nucliadb/search/search/exceptions.py,sha256=mbToQ-ghrv8ukLEv8S_-EZrgweWaIZZ5SIpoeuGDk6s,1154
|
213
213
|
nucliadb/search/search/fetch.py,sha256=XJHIFnZmXM_8Kb37lb4lg1GYG7cZ1plT-qAIb_QziX4,6184
|
214
214
|
nucliadb/search/search/filters.py,sha256=1MkHlJjAQqoRCj7e5cEzK2HvBxGLE17I_omsjiklbtw,6476
|
215
|
-
nucliadb/search/search/find.py,sha256=
|
215
|
+
nucliadb/search/search/find.py,sha256=KvRuPwvaZCxgxpHQtetPt9gy5DS9cszLu2oKyiDc3Cg,9891
|
216
216
|
nucliadb/search/search/find_merge.py,sha256=oM71fbLamlVmwTuSQm9Z2lRNU63Ak7iHL_6mxwvVwB4,17218
|
217
217
|
nucliadb/search/search/hydrator.py,sha256=7Zi44uf2m9b2X_b1aOV2lrWu1Vmbo9lXYgPVUGK0RGI,6728
|
218
218
|
nucliadb/search/search/merge.py,sha256=SfAzDKUEAQ2JUf6K6MEhGZZCJXwdsN9vusRIhdg7ajI,20325
|
@@ -227,7 +227,7 @@ nucliadb/search/search/shards.py,sha256=mM2aCHWhl_gwkCENXDShPukS-_qnB5tFS3UAJuzM
|
|
227
227
|
nucliadb/search/search/summarize.py,sha256=ksmYPubEQvAQgfPdZHfzB_rR19B2ci4IYZ6jLdHxZo8,4996
|
228
228
|
nucliadb/search/search/utils.py,sha256=iF2tbBA56gRMJH1TlE2hMrqeXqjoeOPt4KgRdp2m9Ek,3313
|
229
229
|
nucliadb/search/search/chat/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
230
|
-
nucliadb/search/search/chat/ask.py,sha256=
|
230
|
+
nucliadb/search/search/chat/ask.py,sha256=tUPsJpRCj7Sw7wHTpp5Mq1G9UDrYliCkYiIFdZ7qv_Y,33834
|
231
231
|
nucliadb/search/search/chat/exceptions.py,sha256=Siy4GXW2L7oPhIR86H3WHBhE9lkV4A4YaAszuGGUf54,1356
|
232
232
|
nucliadb/search/search/chat/images.py,sha256=PA8VWxT5_HUGfW1ULhKTK46UBsVyINtWWqEM1ulzX1E,3095
|
233
233
|
nucliadb/search/search/chat/prompt.py,sha256=TIzjI_882hJ--KLKCY8rJomtJ_CMJ-MHYtHqivgG8Lk,46819
|
@@ -235,7 +235,7 @@ nucliadb/search/search/chat/query.py,sha256=gKtlj2ms81m417Id29-DtHFxE3M4TtJvYNB0
|
|
235
235
|
nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
236
236
|
nucliadb/search/search/query_parser/exceptions.py,sha256=tuzl7ZyvVsRz6u0_3zMe60vx39nd3pi641prs-5nC0E,872
|
237
237
|
nucliadb/search/search/query_parser/models.py,sha256=BNHwpItSLCZIDclcDpwDiu-BBQfmEn6xkxCErrMPgVU,1590
|
238
|
-
nucliadb/search/search/query_parser/parser.py,sha256=
|
238
|
+
nucliadb/search/search/query_parser/parser.py,sha256=DGVtph_ZlRiLQJJdoH07qrUvur6LQpiozwtgbO-SNqs,4890
|
239
239
|
nucliadb/standalone/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
240
240
|
nucliadb/standalone/api_router.py,sha256=zR03TQ-Pd2kXx1jeV83Puw19112Z8Jhln7p1cAn69kg,6699
|
241
241
|
nucliadb/standalone/app.py,sha256=mAApNK_iVsQgJyd-mtwCeZq5csSimwnXmlQGH9a70pE,5586
|
@@ -331,9 +331,9 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
|
|
331
331
|
nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
|
332
332
|
nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
|
333
333
|
nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
|
334
|
-
nucliadb-6.1.0.
|
335
|
-
nucliadb-6.1.0.
|
336
|
-
nucliadb-6.1.0.
|
337
|
-
nucliadb-6.1.0.
|
338
|
-
nucliadb-6.1.0.
|
339
|
-
nucliadb-6.1.0.
|
334
|
+
nucliadb-6.1.0.post2602.dist-info/METADATA,sha256=33LSAfHphDUPl-cBPDvkh3CzM5h5lYAb3NZafBn9LdY,4390
|
335
|
+
nucliadb-6.1.0.post2602.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
336
|
+
nucliadb-6.1.0.post2602.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
|
337
|
+
nucliadb-6.1.0.post2602.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
|
338
|
+
nucliadb-6.1.0.post2602.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
339
|
+
nucliadb-6.1.0.post2602.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|