nucliadb 6.7.2.post4874__py3-none-any.whl → 6.10.0.post5705__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0023_backfill_pg_catalog.py +8 -4
- migrations/0028_extracted_vectors_reference.py +1 -1
- migrations/0029_backfill_field_status.py +3 -4
- migrations/0032_remove_old_relations.py +2 -3
- migrations/0038_backfill_catalog_field_labels.py +8 -4
- migrations/0039_backfill_converation_splits_metadata.py +106 -0
- migrations/0040_migrate_search_configurations.py +79 -0
- migrations/0041_reindex_conversations.py +137 -0
- migrations/pg/0010_shards_index.py +34 -0
- nucliadb/search/api/v1/resource/utils.py → migrations/pg/0011_catalog_statistics.py +5 -6
- migrations/pg/0012_catalog_statistics_undo.py +26 -0
- nucliadb/backups/create.py +2 -15
- nucliadb/backups/restore.py +4 -15
- nucliadb/backups/tasks.py +4 -1
- nucliadb/common/back_pressure/cache.py +2 -3
- nucliadb/common/back_pressure/materializer.py +7 -13
- nucliadb/common/back_pressure/settings.py +6 -6
- nucliadb/common/back_pressure/utils.py +1 -0
- nucliadb/common/cache.py +9 -9
- nucliadb/common/catalog/__init__.py +79 -0
- nucliadb/common/catalog/dummy.py +36 -0
- nucliadb/common/catalog/interface.py +85 -0
- nucliadb/{search/search/pgcatalog.py → common/catalog/pg.py} +330 -232
- nucliadb/common/catalog/utils.py +56 -0
- nucliadb/common/cluster/manager.py +8 -23
- nucliadb/common/cluster/rebalance.py +484 -112
- nucliadb/common/cluster/rollover.py +36 -9
- nucliadb/common/cluster/settings.py +4 -9
- nucliadb/common/cluster/utils.py +34 -8
- nucliadb/common/context/__init__.py +7 -8
- nucliadb/common/context/fastapi.py +1 -2
- nucliadb/common/datamanagers/__init__.py +2 -4
- nucliadb/common/datamanagers/atomic.py +9 -2
- nucliadb/common/datamanagers/cluster.py +1 -2
- nucliadb/common/datamanagers/fields.py +3 -4
- nucliadb/common/datamanagers/kb.py +6 -6
- nucliadb/common/datamanagers/labels.py +2 -3
- nucliadb/common/datamanagers/resources.py +10 -33
- nucliadb/common/datamanagers/rollover.py +5 -7
- nucliadb/common/datamanagers/search_configurations.py +1 -2
- nucliadb/common/datamanagers/synonyms.py +1 -2
- nucliadb/common/datamanagers/utils.py +4 -4
- nucliadb/common/datamanagers/vectorsets.py +4 -4
- nucliadb/common/external_index_providers/base.py +32 -5
- nucliadb/common/external_index_providers/manager.py +5 -34
- nucliadb/common/external_index_providers/settings.py +1 -27
- nucliadb/common/filter_expression.py +129 -41
- nucliadb/common/http_clients/exceptions.py +8 -0
- nucliadb/common/http_clients/processing.py +16 -23
- nucliadb/common/http_clients/utils.py +3 -0
- nucliadb/common/ids.py +82 -58
- nucliadb/common/locking.py +1 -2
- nucliadb/common/maindb/driver.py +9 -8
- nucliadb/common/maindb/local.py +5 -5
- nucliadb/common/maindb/pg.py +9 -8
- nucliadb/common/nidx.py +22 -5
- nucliadb/common/vector_index_config.py +1 -1
- nucliadb/export_import/datamanager.py +4 -3
- nucliadb/export_import/exporter.py +11 -19
- nucliadb/export_import/importer.py +13 -6
- nucliadb/export_import/tasks.py +2 -0
- nucliadb/export_import/utils.py +6 -18
- nucliadb/health.py +2 -2
- nucliadb/ingest/app.py +8 -8
- nucliadb/ingest/consumer/consumer.py +8 -10
- nucliadb/ingest/consumer/pull.py +10 -8
- nucliadb/ingest/consumer/service.py +5 -30
- nucliadb/ingest/consumer/shard_creator.py +16 -5
- nucliadb/ingest/consumer/utils.py +1 -1
- nucliadb/ingest/fields/base.py +37 -49
- nucliadb/ingest/fields/conversation.py +55 -9
- nucliadb/ingest/fields/exceptions.py +1 -2
- nucliadb/ingest/fields/file.py +22 -8
- nucliadb/ingest/fields/link.py +7 -7
- nucliadb/ingest/fields/text.py +2 -3
- nucliadb/ingest/orm/brain_v2.py +89 -57
- nucliadb/ingest/orm/broker_message.py +2 -4
- nucliadb/ingest/orm/entities.py +10 -209
- nucliadb/ingest/orm/index_message.py +128 -113
- nucliadb/ingest/orm/knowledgebox.py +91 -59
- nucliadb/ingest/orm/processor/auditing.py +1 -3
- nucliadb/ingest/orm/processor/data_augmentation.py +1 -2
- nucliadb/ingest/orm/processor/processor.py +98 -153
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -2
- nucliadb/ingest/orm/resource.py +82 -71
- nucliadb/ingest/orm/utils.py +1 -1
- nucliadb/ingest/partitions.py +12 -1
- nucliadb/ingest/processing.py +17 -17
- nucliadb/ingest/serialize.py +202 -145
- nucliadb/ingest/service/writer.py +15 -114
- nucliadb/ingest/settings.py +36 -15
- nucliadb/ingest/utils.py +1 -2
- nucliadb/learning_proxy.py +23 -26
- nucliadb/metrics_exporter.py +20 -6
- nucliadb/middleware/__init__.py +82 -1
- nucliadb/migrator/datamanager.py +4 -11
- nucliadb/migrator/migrator.py +1 -2
- nucliadb/migrator/models.py +1 -2
- nucliadb/migrator/settings.py +1 -2
- nucliadb/models/internal/augment.py +614 -0
- nucliadb/models/internal/processing.py +19 -19
- nucliadb/openapi.py +2 -2
- nucliadb/purge/__init__.py +3 -8
- nucliadb/purge/orphan_shards.py +1 -2
- nucliadb/reader/__init__.py +5 -0
- nucliadb/reader/api/models.py +6 -13
- nucliadb/reader/api/v1/download.py +59 -38
- nucliadb/reader/api/v1/export_import.py +4 -4
- nucliadb/reader/api/v1/knowledgebox.py +37 -9
- nucliadb/reader/api/v1/learning_config.py +33 -14
- nucliadb/reader/api/v1/resource.py +61 -9
- nucliadb/reader/api/v1/services.py +18 -14
- nucliadb/reader/app.py +3 -1
- nucliadb/reader/reader/notifications.py +1 -2
- nucliadb/search/api/v1/__init__.py +3 -0
- nucliadb/search/api/v1/ask.py +3 -4
- nucliadb/search/api/v1/augment.py +585 -0
- nucliadb/search/api/v1/catalog.py +15 -19
- nucliadb/search/api/v1/find.py +16 -22
- nucliadb/search/api/v1/hydrate.py +328 -0
- nucliadb/search/api/v1/knowledgebox.py +1 -2
- nucliadb/search/api/v1/predict_proxy.py +1 -2
- nucliadb/search/api/v1/resource/ask.py +28 -8
- nucliadb/search/api/v1/resource/ingestion_agents.py +5 -6
- nucliadb/search/api/v1/resource/search.py +9 -11
- nucliadb/search/api/v1/retrieve.py +130 -0
- nucliadb/search/api/v1/search.py +28 -32
- nucliadb/search/api/v1/suggest.py +11 -14
- nucliadb/search/api/v1/summarize.py +1 -2
- nucliadb/search/api/v1/utils.py +2 -2
- nucliadb/search/app.py +3 -2
- nucliadb/search/augmentor/__init__.py +21 -0
- nucliadb/search/augmentor/augmentor.py +232 -0
- nucliadb/search/augmentor/fields.py +704 -0
- nucliadb/search/augmentor/metrics.py +24 -0
- nucliadb/search/augmentor/paragraphs.py +334 -0
- nucliadb/search/augmentor/resources.py +238 -0
- nucliadb/search/augmentor/utils.py +33 -0
- nucliadb/search/lifecycle.py +3 -1
- nucliadb/search/predict.py +33 -19
- nucliadb/search/predict_models.py +8 -9
- nucliadb/search/requesters/utils.py +11 -10
- nucliadb/search/search/cache.py +19 -42
- nucliadb/search/search/chat/ask.py +131 -59
- nucliadb/search/search/chat/exceptions.py +3 -5
- nucliadb/search/search/chat/fetcher.py +201 -0
- nucliadb/search/search/chat/images.py +6 -4
- nucliadb/search/search/chat/old_prompt.py +1375 -0
- nucliadb/search/search/chat/parser.py +510 -0
- nucliadb/search/search/chat/prompt.py +563 -615
- nucliadb/search/search/chat/query.py +453 -32
- nucliadb/search/search/chat/rpc.py +85 -0
- nucliadb/search/search/fetch.py +3 -4
- nucliadb/search/search/filters.py +8 -11
- nucliadb/search/search/find.py +33 -31
- nucliadb/search/search/find_merge.py +124 -331
- nucliadb/search/search/graph_strategy.py +14 -12
- nucliadb/search/search/hydrator/__init__.py +49 -0
- nucliadb/search/search/hydrator/fields.py +217 -0
- nucliadb/search/search/hydrator/images.py +130 -0
- nucliadb/search/search/hydrator/paragraphs.py +323 -0
- nucliadb/search/search/hydrator/resources.py +60 -0
- nucliadb/search/search/ingestion_agents.py +5 -5
- nucliadb/search/search/merge.py +90 -94
- nucliadb/search/search/metrics.py +24 -7
- nucliadb/search/search/paragraphs.py +7 -9
- nucliadb/search/search/predict_proxy.py +44 -18
- nucliadb/search/search/query.py +14 -86
- nucliadb/search/search/query_parser/fetcher.py +51 -82
- nucliadb/search/search/query_parser/models.py +19 -48
- nucliadb/search/search/query_parser/old_filters.py +20 -19
- nucliadb/search/search/query_parser/parsers/ask.py +5 -6
- nucliadb/search/search/query_parser/parsers/catalog.py +7 -11
- nucliadb/search/search/query_parser/parsers/common.py +21 -13
- nucliadb/search/search/query_parser/parsers/find.py +6 -29
- nucliadb/search/search/query_parser/parsers/graph.py +18 -28
- nucliadb/search/search/query_parser/parsers/retrieve.py +207 -0
- nucliadb/search/search/query_parser/parsers/search.py +15 -56
- nucliadb/search/search/query_parser/parsers/unit_retrieval.py +8 -29
- nucliadb/search/search/rank_fusion.py +18 -13
- nucliadb/search/search/rerankers.py +6 -7
- nucliadb/search/search/retrieval.py +300 -0
- nucliadb/search/search/summarize.py +5 -6
- nucliadb/search/search/utils.py +3 -4
- nucliadb/search/settings.py +1 -2
- nucliadb/standalone/api_router.py +1 -1
- nucliadb/standalone/app.py +4 -3
- nucliadb/standalone/auth.py +5 -6
- nucliadb/standalone/lifecycle.py +2 -2
- nucliadb/standalone/run.py +5 -4
- nucliadb/standalone/settings.py +5 -6
- nucliadb/standalone/versions.py +3 -4
- nucliadb/tasks/consumer.py +13 -8
- nucliadb/tasks/models.py +2 -1
- nucliadb/tasks/producer.py +3 -3
- nucliadb/tasks/retries.py +8 -7
- nucliadb/train/api/utils.py +1 -3
- nucliadb/train/api/v1/shards.py +1 -2
- nucliadb/train/api/v1/trainset.py +1 -2
- nucliadb/train/app.py +1 -1
- nucliadb/train/generator.py +4 -4
- nucliadb/train/generators/field_classifier.py +2 -2
- nucliadb/train/generators/field_streaming.py +6 -6
- nucliadb/train/generators/image_classifier.py +2 -2
- nucliadb/train/generators/paragraph_classifier.py +2 -2
- nucliadb/train/generators/paragraph_streaming.py +2 -2
- nucliadb/train/generators/question_answer_streaming.py +2 -2
- nucliadb/train/generators/sentence_classifier.py +4 -10
- nucliadb/train/generators/token_classifier.py +3 -2
- nucliadb/train/generators/utils.py +6 -5
- nucliadb/train/nodes.py +3 -3
- nucliadb/train/resource.py +6 -8
- nucliadb/train/settings.py +3 -4
- nucliadb/train/types.py +11 -11
- nucliadb/train/upload.py +3 -2
- nucliadb/train/uploader.py +1 -2
- nucliadb/train/utils.py +1 -2
- nucliadb/writer/api/v1/export_import.py +4 -1
- nucliadb/writer/api/v1/field.py +15 -14
- nucliadb/writer/api/v1/knowledgebox.py +18 -56
- nucliadb/writer/api/v1/learning_config.py +5 -4
- nucliadb/writer/api/v1/resource.py +9 -20
- nucliadb/writer/api/v1/services.py +10 -132
- nucliadb/writer/api/v1/upload.py +73 -72
- nucliadb/writer/app.py +8 -2
- nucliadb/writer/resource/basic.py +12 -15
- nucliadb/writer/resource/field.py +43 -5
- nucliadb/writer/resource/origin.py +7 -0
- nucliadb/writer/settings.py +2 -3
- nucliadb/writer/tus/__init__.py +2 -3
- nucliadb/writer/tus/azure.py +5 -7
- nucliadb/writer/tus/dm.py +3 -3
- nucliadb/writer/tus/exceptions.py +3 -4
- nucliadb/writer/tus/gcs.py +15 -22
- nucliadb/writer/tus/s3.py +2 -3
- nucliadb/writer/tus/storage.py +3 -3
- {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/METADATA +10 -11
- nucliadb-6.10.0.post5705.dist-info/RECORD +410 -0
- nucliadb/common/datamanagers/entities.py +0 -139
- nucliadb/common/external_index_providers/pinecone.py +0 -894
- nucliadb/ingest/orm/processor/pgcatalog.py +0 -129
- nucliadb/search/search/hydrator.py +0 -197
- nucliadb-6.7.2.post4874.dist-info/RECORD +0 -383
- {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/WHEEL +0 -0
- {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/top_level.txt +0 -0
|
@@ -17,7 +17,8 @@
|
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
|
-
|
|
20
|
+
|
|
21
|
+
from typing_extensions import assert_never
|
|
21
22
|
|
|
22
23
|
from nucliadb.search.search.query_parser.fetcher import Fetcher
|
|
23
24
|
from nucliadb.search.search.query_parser.models import (
|
|
@@ -26,7 +27,7 @@ from nucliadb.search.search.query_parser.models import (
|
|
|
26
27
|
from nucliadb_models.search import AskRequest, MaxTokens
|
|
27
28
|
|
|
28
29
|
|
|
29
|
-
async def parse_ask(kbid: str, item: AskRequest, *, fetcher:
|
|
30
|
+
async def parse_ask(kbid: str, item: AskRequest, *, fetcher: Fetcher | None = None) -> Generation:
|
|
30
31
|
fetcher = fetcher or fetcher_for_ask(kbid, item)
|
|
31
32
|
parser = _AskParser(kbid, item, fetcher)
|
|
32
33
|
return await parser.parse()
|
|
@@ -63,10 +64,8 @@ class _AskParser:
|
|
|
63
64
|
)
|
|
64
65
|
elif isinstance(self.item.max_tokens, MaxTokens):
|
|
65
66
|
max_tokens = self.item.max_tokens
|
|
66
|
-
else: # pragma:
|
|
67
|
-
|
|
68
|
-
# that is, if we are missing some ifs
|
|
69
|
-
_a: int = "a"
|
|
67
|
+
else: # pragma: no cover
|
|
68
|
+
assert_never(self.item.max_tokens)
|
|
70
69
|
|
|
71
70
|
max_context_tokens = await self.fetcher.get_max_context_tokens(max_tokens)
|
|
72
71
|
max_answer_tokens = self.fetcher.get_max_answer_tokens(max_tokens)
|
|
@@ -18,14 +18,13 @@
|
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
20
|
|
|
21
|
+
from typing_extensions import assert_never
|
|
22
|
+
|
|
21
23
|
from nucliadb.common import datamanagers
|
|
24
|
+
from nucliadb.common.catalog.interface import CatalogExpression, CatalogQuery
|
|
22
25
|
from nucliadb.common.exceptions import InvalidQueryError
|
|
23
|
-
from nucliadb.common.filter_expression import
|
|
26
|
+
from nucliadb.common.filter_expression import FacetFilter, facet_from_filter
|
|
24
27
|
from nucliadb.search.search.filters import translate_label
|
|
25
|
-
from nucliadb.search.search.query_parser.models import (
|
|
26
|
-
CatalogExpression,
|
|
27
|
-
CatalogQuery,
|
|
28
|
-
)
|
|
29
28
|
from nucliadb_models import search as search_models
|
|
30
29
|
from nucliadb_models.filters import (
|
|
31
30
|
And,
|
|
@@ -81,7 +80,6 @@ async def parse_catalog(kbid: str, item: search_models.CatalogRequest) -> Catalo
|
|
|
81
80
|
sort = SortOptions(
|
|
82
81
|
field=SortField.CREATED,
|
|
83
82
|
order=SortOrder.DESC,
|
|
84
|
-
limit=None,
|
|
85
83
|
)
|
|
86
84
|
|
|
87
85
|
if isinstance(item.query, search_models.CatalogQuery):
|
|
@@ -185,18 +183,16 @@ async def parse_filter_expression(expr: ResourceFilterExpression, kbid: str) ->
|
|
|
185
183
|
if rid is None:
|
|
186
184
|
raise InvalidQueryError("slug", f"Cannot find slug {expr.slug}")
|
|
187
185
|
cat.resource_id = rid
|
|
188
|
-
else: # pragma:
|
|
186
|
+
else: # pragma: no cover
|
|
189
187
|
# Cannot happen due to model validation
|
|
190
188
|
raise ValueError("Resource needs id or slug")
|
|
191
189
|
elif isinstance(expr, DateCreated):
|
|
192
190
|
cat.date = CatalogExpression.Date(field="created_at", since=expr.since, until=expr.until)
|
|
193
191
|
elif isinstance(expr, DateModified):
|
|
194
192
|
cat.date = CatalogExpression.Date(field="modified_at", since=expr.since, until=expr.until)
|
|
195
|
-
elif isinstance(expr,
|
|
193
|
+
elif isinstance(expr, FacetFilter):
|
|
196
194
|
cat.facet = facet_from_filter(expr)
|
|
197
195
|
else:
|
|
198
|
-
|
|
199
|
-
# that is, if we are missing some ifs
|
|
200
|
-
_a: int = "a"
|
|
196
|
+
assert_never(expr)
|
|
201
197
|
|
|
202
198
|
return cat
|
|
@@ -19,9 +19,7 @@
|
|
|
19
19
|
#
|
|
20
20
|
import re
|
|
21
21
|
import string
|
|
22
|
-
from typing import Optional, Union
|
|
23
22
|
|
|
24
|
-
from nucliadb.common.exceptions import InvalidQueryError
|
|
25
23
|
from nucliadb.search import logger
|
|
26
24
|
from nucliadb.search.search.query_parser.fetcher import Fetcher
|
|
27
25
|
from nucliadb.search.search.query_parser.models import (
|
|
@@ -32,15 +30,20 @@ from nucliadb_models import search as search_models
|
|
|
32
30
|
|
|
33
31
|
DEFAULT_GENERIC_SEMANTIC_THRESHOLD = 0.7
|
|
34
32
|
|
|
35
|
-
# -* is an invalid query in tantivy and it won't return results but if you add some whitespaces
|
|
36
|
-
# between - and *, it will actually trigger a tantivy bug and panic
|
|
37
|
-
INVALID_QUERY = re.compile(r"- +\*")
|
|
38
33
|
|
|
34
|
+
def validate_query_syntax(query: str) -> str:
|
|
35
|
+
"""Filter some queries that panic tantivy, better than returning the 500"""
|
|
39
36
|
|
|
40
|
-
|
|
41
|
-
#
|
|
37
|
+
# -* is an invalid query in tantivy and it won't return results but if you add some whitespaces
|
|
38
|
+
# between - and *, it will actually trigger a tantivy bug and panic
|
|
39
|
+
INVALID_QUERY = re.compile(r"- *\*+")
|
|
42
40
|
if INVALID_QUERY.search(query):
|
|
43
|
-
|
|
41
|
+
# remove the * and extra spaces, as it's probably what doesn't have
|
|
42
|
+
# meaning in both cases: -* and - *
|
|
43
|
+
fixed = re.sub(INVALID_QUERY, "- ", query)
|
|
44
|
+
query = fixed
|
|
45
|
+
|
|
46
|
+
return query
|
|
44
47
|
|
|
45
48
|
|
|
46
49
|
def is_empty_query(request: search_models.BaseSearchRequest) -> bool:
|
|
@@ -85,6 +88,7 @@ async def parse_keyword_query(
|
|
|
85
88
|
fetcher: Fetcher,
|
|
86
89
|
) -> KeywordQuery:
|
|
87
90
|
query = item.query
|
|
91
|
+
|
|
88
92
|
# If there was a rephrase with image, we should use the rephrased query for keyword search
|
|
89
93
|
rephrased_query = await fetcher.get_rephrased_query()
|
|
90
94
|
if item.query_image is not None and rephrased_query is not None:
|
|
@@ -98,6 +102,10 @@ async def parse_keyword_query(
|
|
|
98
102
|
query = synonyms_query
|
|
99
103
|
is_synonyms_query = True
|
|
100
104
|
|
|
105
|
+
# after all query transformations, pass a validator that can fix some
|
|
106
|
+
# queries that trigger a panic on the index
|
|
107
|
+
query = validate_query_syntax(query)
|
|
108
|
+
|
|
101
109
|
min_score = parse_keyword_min_score(item.min_score)
|
|
102
110
|
|
|
103
111
|
return KeywordQuery(
|
|
@@ -108,7 +116,7 @@ async def parse_keyword_query(
|
|
|
108
116
|
|
|
109
117
|
|
|
110
118
|
async def parse_semantic_query(
|
|
111
|
-
item:
|
|
119
|
+
item: search_models.SearchRequest | search_models.FindRequest,
|
|
112
120
|
*,
|
|
113
121
|
fetcher: Fetcher,
|
|
114
122
|
) -> SemanticQuery:
|
|
@@ -121,7 +129,7 @@ async def parse_semantic_query(
|
|
|
121
129
|
|
|
122
130
|
|
|
123
131
|
def parse_keyword_min_score(
|
|
124
|
-
min_score:
|
|
132
|
+
min_score: float | search_models.MinScore | None,
|
|
125
133
|
) -> float:
|
|
126
134
|
# Keep backward compatibility with the deprecated min_score payload
|
|
127
135
|
# parameter being a float (specifying semantic)
|
|
@@ -132,7 +140,7 @@ def parse_keyword_min_score(
|
|
|
132
140
|
|
|
133
141
|
|
|
134
142
|
async def parse_semantic_min_score(
|
|
135
|
-
min_score:
|
|
143
|
+
min_score: float | search_models.MinScore | None,
|
|
136
144
|
*,
|
|
137
145
|
fetcher: Fetcher,
|
|
138
146
|
):
|
|
@@ -161,7 +169,7 @@ async def query_with_synonyms(
|
|
|
161
169
|
query: str,
|
|
162
170
|
*,
|
|
163
171
|
fetcher: Fetcher,
|
|
164
|
-
) ->
|
|
172
|
+
) -> str | None:
|
|
165
173
|
"""
|
|
166
174
|
Replace the terms in the query with an expression that will make it match with the configured synonyms.
|
|
167
175
|
We're using the Tantivy's query language here: https://docs.rs/tantivy/latest/tantivy/query/struct.QueryParser.html
|
|
@@ -183,7 +191,7 @@ async def query_with_synonyms(
|
|
|
183
191
|
variants: dict[str, str] = {}
|
|
184
192
|
for term, term_synonyms in synonyms.terms.items():
|
|
185
193
|
if len(term_synonyms.synonyms) > 0:
|
|
186
|
-
variants[term] = "({})".format(" OR ".join([term
|
|
194
|
+
variants[term] = "({})".format(" OR ".join([term, *list(term_synonyms.synonyms)]))
|
|
187
195
|
|
|
188
196
|
# Split the query into terms
|
|
189
197
|
query_terms = query.split()
|
|
@@ -18,7 +18,6 @@
|
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
20
|
|
|
21
|
-
from typing import Optional
|
|
22
21
|
|
|
23
22
|
from nidx_protos import nodereader_pb2
|
|
24
23
|
from pydantic import ValidationError
|
|
@@ -27,7 +26,6 @@ from nucliadb.common.exceptions import InvalidQueryError
|
|
|
27
26
|
from nucliadb.common.filter_expression import parse_expression
|
|
28
27
|
from nucliadb.common.models_utils.from_proto import RelationNodeTypeMap
|
|
29
28
|
from nucliadb.search.search.metrics import query_parser_observer
|
|
30
|
-
from nucliadb.search.search.query import expand_entities
|
|
31
29
|
from nucliadb.search.search.query_parser.exceptions import InternalParserError
|
|
32
30
|
from nucliadb.search.search.query_parser.fetcher import Fetcher
|
|
33
31
|
from nucliadb.search.search.query_parser.models import (
|
|
@@ -57,7 +55,6 @@ from .common import (
|
|
|
57
55
|
parse_semantic_query,
|
|
58
56
|
parse_top_k,
|
|
59
57
|
should_disable_vector_search,
|
|
60
|
-
validate_query_syntax,
|
|
61
58
|
)
|
|
62
59
|
|
|
63
60
|
|
|
@@ -66,7 +63,7 @@ async def parse_find(
|
|
|
66
63
|
kbid: str,
|
|
67
64
|
item: FindRequest,
|
|
68
65
|
*,
|
|
69
|
-
fetcher:
|
|
66
|
+
fetcher: Fetcher | None = None,
|
|
70
67
|
) -> ParsedQuery:
|
|
71
68
|
fetcher = fetcher or fetcher_for_find(kbid, item)
|
|
72
69
|
parser = _FindParser(kbid, item, fetcher)
|
|
@@ -94,8 +91,8 @@ class _FindParser:
|
|
|
94
91
|
self.fetcher = fetcher
|
|
95
92
|
|
|
96
93
|
# cached data while parsing
|
|
97
|
-
self._query:
|
|
98
|
-
self._top_k:
|
|
94
|
+
self._query: Query | None = None
|
|
95
|
+
self._top_k: int | None = None
|
|
99
96
|
|
|
100
97
|
async def parse(self) -> UnitRetrieval:
|
|
101
98
|
self._validate_request()
|
|
@@ -123,11 +120,11 @@ class _FindParser:
|
|
|
123
120
|
try:
|
|
124
121
|
rank_fusion = self._parse_rank_fusion()
|
|
125
122
|
except ValidationError as exc:
|
|
126
|
-
raise InternalParserError(f"Parsing error in rank fusion: {
|
|
123
|
+
raise InternalParserError(f"Parsing error in rank fusion: {exc!s}") from exc
|
|
127
124
|
try:
|
|
128
125
|
reranker = self._parse_reranker()
|
|
129
126
|
except ValidationError as exc:
|
|
130
|
-
raise InternalParserError(f"Parsing error in reranker: {
|
|
127
|
+
raise InternalParserError(f"Parsing error in reranker: {exc!s}") from exc
|
|
131
128
|
|
|
132
129
|
# Adjust retrieval windows. Our current implementation assume:
|
|
133
130
|
# `top_k <= reranker.window <= rank_fusion.window`
|
|
@@ -146,8 +143,6 @@ class _FindParser:
|
|
|
146
143
|
return retrieval
|
|
147
144
|
|
|
148
145
|
def _validate_request(self):
|
|
149
|
-
validate_query_syntax(self.item.query)
|
|
150
|
-
|
|
151
146
|
# synonyms are not compatible with vector/graph search
|
|
152
147
|
if (
|
|
153
148
|
self.item.with_synonyms
|
|
@@ -173,15 +168,8 @@ class _FindParser:
|
|
|
173
168
|
async def _parse_relation_query(self) -> RelationQuery:
|
|
174
169
|
detected_entities = await self._get_detected_entities()
|
|
175
170
|
|
|
176
|
-
deleted_entity_groups = await self.fetcher.get_deleted_entity_groups()
|
|
177
|
-
|
|
178
|
-
meta_cache = await self.fetcher.get_entities_meta_cache()
|
|
179
|
-
deleted_entities = meta_cache.deleted_entities
|
|
180
|
-
|
|
181
171
|
return RelationQuery(
|
|
182
|
-
entry_points=detected_entities,
|
|
183
|
-
deleted_entity_groups=deleted_entity_groups,
|
|
184
|
-
deleted_entities=deleted_entities,
|
|
172
|
+
entry_points=detected_entities, deleted_entity_groups=[], deleted_entities={}
|
|
185
173
|
)
|
|
186
174
|
|
|
187
175
|
async def _parse_graph_query(self) -> GraphQuery:
|
|
@@ -208,9 +196,6 @@ class _FindParser:
|
|
|
208
196
|
else:
|
|
209
197
|
detected_entities = await self.fetcher.get_detected_entities()
|
|
210
198
|
|
|
211
|
-
meta_cache = await self.fetcher.get_entities_meta_cache()
|
|
212
|
-
detected_entities = expand_entities(meta_cache, detected_entities)
|
|
213
|
-
|
|
214
199
|
return detected_entities
|
|
215
200
|
|
|
216
201
|
async def _parse_filters(self) -> Filters:
|
|
@@ -256,17 +241,9 @@ class _FindParser:
|
|
|
256
241
|
else:
|
|
257
242
|
filter_operator = nodereader_pb2.FilterOperator.AND
|
|
258
243
|
|
|
259
|
-
autofilter = None
|
|
260
|
-
if self.item.autofilter:
|
|
261
|
-
if self._query.relation is not None:
|
|
262
|
-
autofilter = self._query.relation.entry_points
|
|
263
|
-
else:
|
|
264
|
-
autofilter = await self._get_detected_entities()
|
|
265
|
-
|
|
266
244
|
hidden = await filter_hidden_resources(self.kbid, self.item.show_hidden)
|
|
267
245
|
|
|
268
246
|
return Filters(
|
|
269
|
-
autofilter=autofilter,
|
|
270
247
|
facets=[],
|
|
271
248
|
field_expression=field_expr,
|
|
272
249
|
paragraph_expression=paragraph_expr,
|
|
@@ -18,9 +18,9 @@
|
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
20
|
|
|
21
|
-
from typing import Optional, Union
|
|
22
21
|
|
|
23
22
|
from nidx_protos import nodereader_pb2
|
|
23
|
+
from typing_extensions import assert_never
|
|
24
24
|
|
|
25
25
|
from nucliadb.common.filter_expression import add_and_expression, parse_expression
|
|
26
26
|
from nucliadb.common.models_utils.from_proto import RelationNodeTypeMap, RelationTypeMap
|
|
@@ -56,11 +56,11 @@ async def parse_graph_relation_search(
|
|
|
56
56
|
return pb
|
|
57
57
|
|
|
58
58
|
|
|
59
|
-
AnyGraphRequest =
|
|
60
|
-
graph_requests.GraphSearchRequest
|
|
61
|
-
graph_requests.GraphNodesSearchRequest
|
|
62
|
-
graph_requests.GraphRelationsSearchRequest
|
|
63
|
-
|
|
59
|
+
AnyGraphRequest = (
|
|
60
|
+
graph_requests.GraphSearchRequest
|
|
61
|
+
| graph_requests.GraphNodesSearchRequest
|
|
62
|
+
| graph_requests.GraphRelationsSearchRequest
|
|
63
|
+
)
|
|
64
64
|
|
|
65
65
|
|
|
66
66
|
async def _parse_common(kbid: str, item: AnyGraphRequest) -> nodereader_pb2.GraphSearchRequest:
|
|
@@ -78,7 +78,7 @@ async def _parse_common(kbid: str, item: AnyGraphRequest) -> nodereader_pb2.Grap
|
|
|
78
78
|
return pb
|
|
79
79
|
|
|
80
80
|
|
|
81
|
-
async def _parse_filters(kbid: str, item: AnyGraphRequest) ->
|
|
81
|
+
async def _parse_filters(kbid: str, item: AnyGraphRequest) -> nodereader_pb2.FilterExpression | None:
|
|
82
82
|
filter_expr = nodereader_pb2.FilterExpression()
|
|
83
83
|
if item.filter_expression:
|
|
84
84
|
if item.filter_expression.field:
|
|
@@ -100,7 +100,7 @@ async def _parse_filters(kbid: str, item: AnyGraphRequest) -> Optional[nodereade
|
|
|
100
100
|
return None
|
|
101
101
|
|
|
102
102
|
|
|
103
|
-
def _parse_security(kbid: str, item: AnyGraphRequest) ->
|
|
103
|
+
def _parse_security(kbid: str, item: AnyGraphRequest) -> utils_pb2.Security | None:
|
|
104
104
|
if item.security is not None and len(item.security.groups) > 0:
|
|
105
105
|
security_pb = utils_pb2.Security()
|
|
106
106
|
for group_id in item.security.groups:
|
|
@@ -153,10 +153,8 @@ def parse_path_query(expr: graph_requests.GraphPathQuery) -> nodereader_pb2.Grap
|
|
|
153
153
|
elif isinstance(expr, graph_requests.Generated):
|
|
154
154
|
_set_generated_to_pb(expr, pb)
|
|
155
155
|
|
|
156
|
-
else: # pragma:
|
|
157
|
-
|
|
158
|
-
# that is, if we are missing some ifs
|
|
159
|
-
_a: int = "a"
|
|
156
|
+
else: # pragma: no cover
|
|
157
|
+
assert_never(expr)
|
|
160
158
|
|
|
161
159
|
return pb
|
|
162
160
|
|
|
@@ -182,10 +180,8 @@ def _parse_node_query(expr: graph_requests.GraphNodesQuery) -> nodereader_pb2.Gr
|
|
|
182
180
|
elif isinstance(expr, graph_requests.Generated):
|
|
183
181
|
_set_generated_to_pb(expr, pb)
|
|
184
182
|
|
|
185
|
-
else: # pragma:
|
|
186
|
-
|
|
187
|
-
# that is, if we are missing some ifs
|
|
188
|
-
_a: int = "a"
|
|
183
|
+
else: # pragma: no cover
|
|
184
|
+
assert_never(expr)
|
|
189
185
|
|
|
190
186
|
return pb
|
|
191
187
|
|
|
@@ -212,10 +208,8 @@ def _parse_relation_query(
|
|
|
212
208
|
elif isinstance(expr, graph_requests.Generated):
|
|
213
209
|
_set_generated_to_pb(expr, pb)
|
|
214
210
|
|
|
215
|
-
else: # pragma:
|
|
216
|
-
|
|
217
|
-
# that is, if we are missing some ifs
|
|
218
|
-
_a: int = "a"
|
|
211
|
+
else: # pragma: no cover
|
|
212
|
+
assert_never(expr)
|
|
219
213
|
|
|
220
214
|
return pb
|
|
221
215
|
|
|
@@ -230,10 +224,8 @@ def _set_node_to_pb(node: graph_requests.GraphNode, pb: nodereader_pb2.GraphQuer
|
|
|
230
224
|
pb.fuzzy.kind = nodereader_pb2.GraphQuery.Node.MatchLocation.PREFIX
|
|
231
225
|
pb.fuzzy.distance = 1
|
|
232
226
|
|
|
233
|
-
else: # pragma:
|
|
234
|
-
|
|
235
|
-
# that is, if we are missing some ifs
|
|
236
|
-
_a: int = "a"
|
|
227
|
+
else: # pragma: no cover
|
|
228
|
+
assert_never(node.match)
|
|
237
229
|
|
|
238
230
|
if node.type is not None:
|
|
239
231
|
pb.node_type = RelationNodeTypeMap[node.type]
|
|
@@ -263,7 +255,5 @@ def _set_generated_to_pb(generated: graph_requests.Generated, pb: nodereader_pb2
|
|
|
263
255
|
|
|
264
256
|
pb.facet.facet = facet
|
|
265
257
|
|
|
266
|
-
else: # pragma:
|
|
267
|
-
|
|
268
|
-
# that is, if we are missing some ifs
|
|
269
|
-
_a: int = "a"
|
|
258
|
+
else: # pragma: no cover
|
|
259
|
+
assert_never(generated.by)
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
|
2
|
+
#
|
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
|
5
|
+
#
|
|
6
|
+
# AGPL:
|
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
|
10
|
+
# License, or (at your option) any later version.
|
|
11
|
+
#
|
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
15
|
+
# GNU Affero General Public License for more details.
|
|
16
|
+
#
|
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
|
+
#
|
|
20
|
+
from nidx_protos import nodereader_pb2
|
|
21
|
+
from pydantic import ValidationError
|
|
22
|
+
|
|
23
|
+
from nucliadb.common.exceptions import InvalidQueryError
|
|
24
|
+
from nucliadb.common.filter_expression import parse_expression
|
|
25
|
+
from nucliadb.search.search.metrics import query_parser_observer
|
|
26
|
+
from nucliadb.search.search.query_parser.exceptions import InternalParserError
|
|
27
|
+
from nucliadb.search.search.query_parser.fetcher import Fetcher
|
|
28
|
+
from nucliadb.search.search.query_parser.models import (
|
|
29
|
+
Filters,
|
|
30
|
+
GraphQuery,
|
|
31
|
+
KeywordQuery,
|
|
32
|
+
Query,
|
|
33
|
+
RankFusion,
|
|
34
|
+
ReciprocalRankFusion,
|
|
35
|
+
SemanticQuery,
|
|
36
|
+
UnitRetrieval,
|
|
37
|
+
)
|
|
38
|
+
from nucliadb.search.search.query_parser.parsers.common import query_with_synonyms, validate_query_syntax
|
|
39
|
+
from nucliadb.search.search.utils import filter_hidden_resources
|
|
40
|
+
from nucliadb_models import search as search_models
|
|
41
|
+
from nucliadb_models.filters import FilterExpression
|
|
42
|
+
from nucliadb_models.retrieval import RetrievalRequest
|
|
43
|
+
from nucliadb_models.search import MAX_RANK_FUSION_WINDOW
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@query_parser_observer.wrap({"type": "parse_retrieve"})
|
|
47
|
+
async def parse_retrieve(kbid: str, item: RetrievalRequest) -> UnitRetrieval:
|
|
48
|
+
fetcher = Fetcher(
|
|
49
|
+
kbid=kbid,
|
|
50
|
+
query=item.query.keyword.query if item.query.keyword else "",
|
|
51
|
+
user_vector=item.query.semantic.query if item.query.semantic else None,
|
|
52
|
+
vectorset=item.query.semantic.vectorset if item.query.semantic else None,
|
|
53
|
+
# Retrieve doesn't use images for now
|
|
54
|
+
query_image=None,
|
|
55
|
+
# Retrieve doesn't do rephrasing
|
|
56
|
+
rephrase=False,
|
|
57
|
+
rephrase_prompt=None,
|
|
58
|
+
generative_model=None,
|
|
59
|
+
)
|
|
60
|
+
parser = _RetrievalParser(kbid, item, fetcher)
|
|
61
|
+
retrieval = await parser.parse()
|
|
62
|
+
return retrieval
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class _RetrievalParser:
|
|
66
|
+
def __init__(self, kbid: str, item: RetrievalRequest, fetcher: Fetcher):
|
|
67
|
+
self.kbid = kbid
|
|
68
|
+
self.item = item
|
|
69
|
+
self.fetcher = fetcher
|
|
70
|
+
|
|
71
|
+
async def parse(self) -> UnitRetrieval:
|
|
72
|
+
top_k = self.item.top_k
|
|
73
|
+
query = await self._parse_query()
|
|
74
|
+
filters = await self._parse_filters()
|
|
75
|
+
try:
|
|
76
|
+
rank_fusion = self._parse_rank_fusion()
|
|
77
|
+
except ValidationError as exc:
|
|
78
|
+
raise InternalParserError(f"Parsing error in rank fusion: {exc!s}") from exc
|
|
79
|
+
|
|
80
|
+
# ensure top_k and rank_fusion are coherent
|
|
81
|
+
if top_k > rank_fusion.window:
|
|
82
|
+
raise InvalidQueryError(
|
|
83
|
+
"rank_fusion.window", "Rank fusion window must be greater or equal to top_k"
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
retrieval = UnitRetrieval(
|
|
87
|
+
query=query,
|
|
88
|
+
top_k=top_k,
|
|
89
|
+
filters=filters,
|
|
90
|
+
rank_fusion=rank_fusion,
|
|
91
|
+
reranker=None,
|
|
92
|
+
)
|
|
93
|
+
return retrieval
|
|
94
|
+
|
|
95
|
+
async def _parse_query(self) -> Query:
|
|
96
|
+
keyword = None
|
|
97
|
+
if self.item.query.keyword is not None:
|
|
98
|
+
keyword_query, is_synonyms_query = await self._parse_keyword_query()
|
|
99
|
+
keyword = KeywordQuery(
|
|
100
|
+
query=keyword_query,
|
|
101
|
+
is_synonyms_query=is_synonyms_query,
|
|
102
|
+
min_score=self.item.query.keyword.min_score,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
semantic = None
|
|
106
|
+
if self.item.query.semantic is not None:
|
|
107
|
+
vectorset, query_vector = await self._parse_semantic_query()
|
|
108
|
+
semantic = SemanticQuery(
|
|
109
|
+
query=query_vector,
|
|
110
|
+
vectorset=vectorset,
|
|
111
|
+
min_score=self.item.query.semantic.min_score,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
graph = None
|
|
115
|
+
if self.item.query.graph is not None:
|
|
116
|
+
graph = GraphQuery(query=self.item.query.graph.query)
|
|
117
|
+
|
|
118
|
+
return Query(keyword=keyword, semantic=semantic, graph=graph)
|
|
119
|
+
|
|
120
|
+
async def _parse_keyword_query(self) -> tuple[str, bool]:
|
|
121
|
+
assert self.item.query.keyword is not None
|
|
122
|
+
keyword_query = self.item.query.keyword.query
|
|
123
|
+
is_synonyms_query = False
|
|
124
|
+
if self.item.query.keyword.with_synonyms:
|
|
125
|
+
synonyms_query = await query_with_synonyms(keyword_query, fetcher=self.fetcher)
|
|
126
|
+
if synonyms_query is not None:
|
|
127
|
+
keyword_query = synonyms_query
|
|
128
|
+
is_synonyms_query = True
|
|
129
|
+
|
|
130
|
+
# after all query transformations, pass a validator that can fix some
|
|
131
|
+
# queries that trigger a panic on the index
|
|
132
|
+
keyword_query = validate_query_syntax(keyword_query)
|
|
133
|
+
return keyword_query, is_synonyms_query
|
|
134
|
+
|
|
135
|
+
async def _parse_semantic_query(self) -> tuple[str, list[float]]:
|
|
136
|
+
# Make sure the vectorset exists in the KB
|
|
137
|
+
assert self.item.query.semantic is not None
|
|
138
|
+
vectorset = self.item.query.semantic.vectorset
|
|
139
|
+
await self.fetcher.validate_vectorset(self.kbid, vectorset)
|
|
140
|
+
|
|
141
|
+
# Calculate the matryoshka dimension if applicable
|
|
142
|
+
user_vector = self.item.query.semantic.query
|
|
143
|
+
matryoshka_dimension = await self.fetcher.get_matryoshka_dimension_cached(self.kbid, vectorset)
|
|
144
|
+
if matryoshka_dimension is not None:
|
|
145
|
+
if len(user_vector) < matryoshka_dimension:
|
|
146
|
+
raise InvalidQueryError(
|
|
147
|
+
"vector",
|
|
148
|
+
f"Invalid vector length, please check valid embedding size for {vectorset} model",
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
# KB using a matryoshka embeddings model, cut the query vector
|
|
152
|
+
# accordingly
|
|
153
|
+
query_vector = user_vector[:matryoshka_dimension]
|
|
154
|
+
return vectorset, query_vector
|
|
155
|
+
|
|
156
|
+
async def _parse_filters(self) -> Filters:
|
|
157
|
+
filters = Filters()
|
|
158
|
+
if self.item.filters is None:
|
|
159
|
+
return filters
|
|
160
|
+
|
|
161
|
+
if self.item.filters.filter_expression is not None:
|
|
162
|
+
if self.item.filters.filter_expression.field is not None:
|
|
163
|
+
filters.field_expression = await parse_expression(
|
|
164
|
+
self.item.filters.filter_expression.field,
|
|
165
|
+
self.kbid,
|
|
166
|
+
)
|
|
167
|
+
if self.item.filters.filter_expression.paragraph is not None:
|
|
168
|
+
filters.paragraph_expression = await parse_expression(
|
|
169
|
+
self.item.filters.filter_expression.paragraph,
|
|
170
|
+
self.kbid,
|
|
171
|
+
)
|
|
172
|
+
if self.item.filters.filter_expression.operator == FilterExpression.Operator.OR:
|
|
173
|
+
filter_operator = nodereader_pb2.FilterOperator.OR
|
|
174
|
+
else:
|
|
175
|
+
filter_operator = nodereader_pb2.FilterOperator.AND
|
|
176
|
+
filters.filter_expression_operator = filter_operator
|
|
177
|
+
|
|
178
|
+
filters.hidden = await filter_hidden_resources(self.kbid, self.item.filters.show_hidden)
|
|
179
|
+
filters.security = self.item.filters.security
|
|
180
|
+
filters.with_duplicates = self.item.filters.with_duplicates
|
|
181
|
+
|
|
182
|
+
return filters
|
|
183
|
+
|
|
184
|
+
def _parse_rank_fusion(self) -> RankFusion:
|
|
185
|
+
rank_fusion: RankFusion
|
|
186
|
+
|
|
187
|
+
top_k = self.item.top_k
|
|
188
|
+
window = min(top_k, MAX_RANK_FUSION_WINDOW)
|
|
189
|
+
|
|
190
|
+
if isinstance(self.item.rank_fusion, search_models.RankFusionName):
|
|
191
|
+
if self.item.rank_fusion == search_models.RankFusionName.RECIPROCAL_RANK_FUSION:
|
|
192
|
+
rank_fusion = ReciprocalRankFusion(window=window)
|
|
193
|
+
else:
|
|
194
|
+
raise InternalParserError(f"Unknown rank fusion algorithm: {self.item.rank_fusion}")
|
|
195
|
+
|
|
196
|
+
elif isinstance(self.item.rank_fusion, search_models.ReciprocalRankFusion):
|
|
197
|
+
user_window = self.item.rank_fusion.window
|
|
198
|
+
rank_fusion = ReciprocalRankFusion(
|
|
199
|
+
k=self.item.rank_fusion.k,
|
|
200
|
+
boosting=self.item.rank_fusion.boosting,
|
|
201
|
+
window=min(max(user_window or 0, top_k), 500),
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
else:
|
|
205
|
+
raise InternalParserError(f"Unknown rank fusion {self.item.rank_fusion}")
|
|
206
|
+
|
|
207
|
+
return rank_fusion
|