nucliadb 6.7.2.post4874__py3-none-any.whl → 6.10.0.post5705__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0023_backfill_pg_catalog.py +8 -4
- migrations/0028_extracted_vectors_reference.py +1 -1
- migrations/0029_backfill_field_status.py +3 -4
- migrations/0032_remove_old_relations.py +2 -3
- migrations/0038_backfill_catalog_field_labels.py +8 -4
- migrations/0039_backfill_converation_splits_metadata.py +106 -0
- migrations/0040_migrate_search_configurations.py +79 -0
- migrations/0041_reindex_conversations.py +137 -0
- migrations/pg/0010_shards_index.py +34 -0
- nucliadb/search/api/v1/resource/utils.py → migrations/pg/0011_catalog_statistics.py +5 -6
- migrations/pg/0012_catalog_statistics_undo.py +26 -0
- nucliadb/backups/create.py +2 -15
- nucliadb/backups/restore.py +4 -15
- nucliadb/backups/tasks.py +4 -1
- nucliadb/common/back_pressure/cache.py +2 -3
- nucliadb/common/back_pressure/materializer.py +7 -13
- nucliadb/common/back_pressure/settings.py +6 -6
- nucliadb/common/back_pressure/utils.py +1 -0
- nucliadb/common/cache.py +9 -9
- nucliadb/common/catalog/__init__.py +79 -0
- nucliadb/common/catalog/dummy.py +36 -0
- nucliadb/common/catalog/interface.py +85 -0
- nucliadb/{search/search/pgcatalog.py → common/catalog/pg.py} +330 -232
- nucliadb/common/catalog/utils.py +56 -0
- nucliadb/common/cluster/manager.py +8 -23
- nucliadb/common/cluster/rebalance.py +484 -112
- nucliadb/common/cluster/rollover.py +36 -9
- nucliadb/common/cluster/settings.py +4 -9
- nucliadb/common/cluster/utils.py +34 -8
- nucliadb/common/context/__init__.py +7 -8
- nucliadb/common/context/fastapi.py +1 -2
- nucliadb/common/datamanagers/__init__.py +2 -4
- nucliadb/common/datamanagers/atomic.py +9 -2
- nucliadb/common/datamanagers/cluster.py +1 -2
- nucliadb/common/datamanagers/fields.py +3 -4
- nucliadb/common/datamanagers/kb.py +6 -6
- nucliadb/common/datamanagers/labels.py +2 -3
- nucliadb/common/datamanagers/resources.py +10 -33
- nucliadb/common/datamanagers/rollover.py +5 -7
- nucliadb/common/datamanagers/search_configurations.py +1 -2
- nucliadb/common/datamanagers/synonyms.py +1 -2
- nucliadb/common/datamanagers/utils.py +4 -4
- nucliadb/common/datamanagers/vectorsets.py +4 -4
- nucliadb/common/external_index_providers/base.py +32 -5
- nucliadb/common/external_index_providers/manager.py +5 -34
- nucliadb/common/external_index_providers/settings.py +1 -27
- nucliadb/common/filter_expression.py +129 -41
- nucliadb/common/http_clients/exceptions.py +8 -0
- nucliadb/common/http_clients/processing.py +16 -23
- nucliadb/common/http_clients/utils.py +3 -0
- nucliadb/common/ids.py +82 -58
- nucliadb/common/locking.py +1 -2
- nucliadb/common/maindb/driver.py +9 -8
- nucliadb/common/maindb/local.py +5 -5
- nucliadb/common/maindb/pg.py +9 -8
- nucliadb/common/nidx.py +22 -5
- nucliadb/common/vector_index_config.py +1 -1
- nucliadb/export_import/datamanager.py +4 -3
- nucliadb/export_import/exporter.py +11 -19
- nucliadb/export_import/importer.py +13 -6
- nucliadb/export_import/tasks.py +2 -0
- nucliadb/export_import/utils.py +6 -18
- nucliadb/health.py +2 -2
- nucliadb/ingest/app.py +8 -8
- nucliadb/ingest/consumer/consumer.py +8 -10
- nucliadb/ingest/consumer/pull.py +10 -8
- nucliadb/ingest/consumer/service.py +5 -30
- nucliadb/ingest/consumer/shard_creator.py +16 -5
- nucliadb/ingest/consumer/utils.py +1 -1
- nucliadb/ingest/fields/base.py +37 -49
- nucliadb/ingest/fields/conversation.py +55 -9
- nucliadb/ingest/fields/exceptions.py +1 -2
- nucliadb/ingest/fields/file.py +22 -8
- nucliadb/ingest/fields/link.py +7 -7
- nucliadb/ingest/fields/text.py +2 -3
- nucliadb/ingest/orm/brain_v2.py +89 -57
- nucliadb/ingest/orm/broker_message.py +2 -4
- nucliadb/ingest/orm/entities.py +10 -209
- nucliadb/ingest/orm/index_message.py +128 -113
- nucliadb/ingest/orm/knowledgebox.py +91 -59
- nucliadb/ingest/orm/processor/auditing.py +1 -3
- nucliadb/ingest/orm/processor/data_augmentation.py +1 -2
- nucliadb/ingest/orm/processor/processor.py +98 -153
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -2
- nucliadb/ingest/orm/resource.py +82 -71
- nucliadb/ingest/orm/utils.py +1 -1
- nucliadb/ingest/partitions.py +12 -1
- nucliadb/ingest/processing.py +17 -17
- nucliadb/ingest/serialize.py +202 -145
- nucliadb/ingest/service/writer.py +15 -114
- nucliadb/ingest/settings.py +36 -15
- nucliadb/ingest/utils.py +1 -2
- nucliadb/learning_proxy.py +23 -26
- nucliadb/metrics_exporter.py +20 -6
- nucliadb/middleware/__init__.py +82 -1
- nucliadb/migrator/datamanager.py +4 -11
- nucliadb/migrator/migrator.py +1 -2
- nucliadb/migrator/models.py +1 -2
- nucliadb/migrator/settings.py +1 -2
- nucliadb/models/internal/augment.py +614 -0
- nucliadb/models/internal/processing.py +19 -19
- nucliadb/openapi.py +2 -2
- nucliadb/purge/__init__.py +3 -8
- nucliadb/purge/orphan_shards.py +1 -2
- nucliadb/reader/__init__.py +5 -0
- nucliadb/reader/api/models.py +6 -13
- nucliadb/reader/api/v1/download.py +59 -38
- nucliadb/reader/api/v1/export_import.py +4 -4
- nucliadb/reader/api/v1/knowledgebox.py +37 -9
- nucliadb/reader/api/v1/learning_config.py +33 -14
- nucliadb/reader/api/v1/resource.py +61 -9
- nucliadb/reader/api/v1/services.py +18 -14
- nucliadb/reader/app.py +3 -1
- nucliadb/reader/reader/notifications.py +1 -2
- nucliadb/search/api/v1/__init__.py +3 -0
- nucliadb/search/api/v1/ask.py +3 -4
- nucliadb/search/api/v1/augment.py +585 -0
- nucliadb/search/api/v1/catalog.py +15 -19
- nucliadb/search/api/v1/find.py +16 -22
- nucliadb/search/api/v1/hydrate.py +328 -0
- nucliadb/search/api/v1/knowledgebox.py +1 -2
- nucliadb/search/api/v1/predict_proxy.py +1 -2
- nucliadb/search/api/v1/resource/ask.py +28 -8
- nucliadb/search/api/v1/resource/ingestion_agents.py +5 -6
- nucliadb/search/api/v1/resource/search.py +9 -11
- nucliadb/search/api/v1/retrieve.py +130 -0
- nucliadb/search/api/v1/search.py +28 -32
- nucliadb/search/api/v1/suggest.py +11 -14
- nucliadb/search/api/v1/summarize.py +1 -2
- nucliadb/search/api/v1/utils.py +2 -2
- nucliadb/search/app.py +3 -2
- nucliadb/search/augmentor/__init__.py +21 -0
- nucliadb/search/augmentor/augmentor.py +232 -0
- nucliadb/search/augmentor/fields.py +704 -0
- nucliadb/search/augmentor/metrics.py +24 -0
- nucliadb/search/augmentor/paragraphs.py +334 -0
- nucliadb/search/augmentor/resources.py +238 -0
- nucliadb/search/augmentor/utils.py +33 -0
- nucliadb/search/lifecycle.py +3 -1
- nucliadb/search/predict.py +33 -19
- nucliadb/search/predict_models.py +8 -9
- nucliadb/search/requesters/utils.py +11 -10
- nucliadb/search/search/cache.py +19 -42
- nucliadb/search/search/chat/ask.py +131 -59
- nucliadb/search/search/chat/exceptions.py +3 -5
- nucliadb/search/search/chat/fetcher.py +201 -0
- nucliadb/search/search/chat/images.py +6 -4
- nucliadb/search/search/chat/old_prompt.py +1375 -0
- nucliadb/search/search/chat/parser.py +510 -0
- nucliadb/search/search/chat/prompt.py +563 -615
- nucliadb/search/search/chat/query.py +453 -32
- nucliadb/search/search/chat/rpc.py +85 -0
- nucliadb/search/search/fetch.py +3 -4
- nucliadb/search/search/filters.py +8 -11
- nucliadb/search/search/find.py +33 -31
- nucliadb/search/search/find_merge.py +124 -331
- nucliadb/search/search/graph_strategy.py +14 -12
- nucliadb/search/search/hydrator/__init__.py +49 -0
- nucliadb/search/search/hydrator/fields.py +217 -0
- nucliadb/search/search/hydrator/images.py +130 -0
- nucliadb/search/search/hydrator/paragraphs.py +323 -0
- nucliadb/search/search/hydrator/resources.py +60 -0
- nucliadb/search/search/ingestion_agents.py +5 -5
- nucliadb/search/search/merge.py +90 -94
- nucliadb/search/search/metrics.py +24 -7
- nucliadb/search/search/paragraphs.py +7 -9
- nucliadb/search/search/predict_proxy.py +44 -18
- nucliadb/search/search/query.py +14 -86
- nucliadb/search/search/query_parser/fetcher.py +51 -82
- nucliadb/search/search/query_parser/models.py +19 -48
- nucliadb/search/search/query_parser/old_filters.py +20 -19
- nucliadb/search/search/query_parser/parsers/ask.py +5 -6
- nucliadb/search/search/query_parser/parsers/catalog.py +7 -11
- nucliadb/search/search/query_parser/parsers/common.py +21 -13
- nucliadb/search/search/query_parser/parsers/find.py +6 -29
- nucliadb/search/search/query_parser/parsers/graph.py +18 -28
- nucliadb/search/search/query_parser/parsers/retrieve.py +207 -0
- nucliadb/search/search/query_parser/parsers/search.py +15 -56
- nucliadb/search/search/query_parser/parsers/unit_retrieval.py +8 -29
- nucliadb/search/search/rank_fusion.py +18 -13
- nucliadb/search/search/rerankers.py +6 -7
- nucliadb/search/search/retrieval.py +300 -0
- nucliadb/search/search/summarize.py +5 -6
- nucliadb/search/search/utils.py +3 -4
- nucliadb/search/settings.py +1 -2
- nucliadb/standalone/api_router.py +1 -1
- nucliadb/standalone/app.py +4 -3
- nucliadb/standalone/auth.py +5 -6
- nucliadb/standalone/lifecycle.py +2 -2
- nucliadb/standalone/run.py +5 -4
- nucliadb/standalone/settings.py +5 -6
- nucliadb/standalone/versions.py +3 -4
- nucliadb/tasks/consumer.py +13 -8
- nucliadb/tasks/models.py +2 -1
- nucliadb/tasks/producer.py +3 -3
- nucliadb/tasks/retries.py +8 -7
- nucliadb/train/api/utils.py +1 -3
- nucliadb/train/api/v1/shards.py +1 -2
- nucliadb/train/api/v1/trainset.py +1 -2
- nucliadb/train/app.py +1 -1
- nucliadb/train/generator.py +4 -4
- nucliadb/train/generators/field_classifier.py +2 -2
- nucliadb/train/generators/field_streaming.py +6 -6
- nucliadb/train/generators/image_classifier.py +2 -2
- nucliadb/train/generators/paragraph_classifier.py +2 -2
- nucliadb/train/generators/paragraph_streaming.py +2 -2
- nucliadb/train/generators/question_answer_streaming.py +2 -2
- nucliadb/train/generators/sentence_classifier.py +4 -10
- nucliadb/train/generators/token_classifier.py +3 -2
- nucliadb/train/generators/utils.py +6 -5
- nucliadb/train/nodes.py +3 -3
- nucliadb/train/resource.py +6 -8
- nucliadb/train/settings.py +3 -4
- nucliadb/train/types.py +11 -11
- nucliadb/train/upload.py +3 -2
- nucliadb/train/uploader.py +1 -2
- nucliadb/train/utils.py +1 -2
- nucliadb/writer/api/v1/export_import.py +4 -1
- nucliadb/writer/api/v1/field.py +15 -14
- nucliadb/writer/api/v1/knowledgebox.py +18 -56
- nucliadb/writer/api/v1/learning_config.py +5 -4
- nucliadb/writer/api/v1/resource.py +9 -20
- nucliadb/writer/api/v1/services.py +10 -132
- nucliadb/writer/api/v1/upload.py +73 -72
- nucliadb/writer/app.py +8 -2
- nucliadb/writer/resource/basic.py +12 -15
- nucliadb/writer/resource/field.py +43 -5
- nucliadb/writer/resource/origin.py +7 -0
- nucliadb/writer/settings.py +2 -3
- nucliadb/writer/tus/__init__.py +2 -3
- nucliadb/writer/tus/azure.py +5 -7
- nucliadb/writer/tus/dm.py +3 -3
- nucliadb/writer/tus/exceptions.py +3 -4
- nucliadb/writer/tus/gcs.py +15 -22
- nucliadb/writer/tus/s3.py +2 -3
- nucliadb/writer/tus/storage.py +3 -3
- {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/METADATA +10 -11
- nucliadb-6.10.0.post5705.dist-info/RECORD +410 -0
- nucliadb/common/datamanagers/entities.py +0 -139
- nucliadb/common/external_index_providers/pinecone.py +0 -894
- nucliadb/ingest/orm/processor/pgcatalog.py +0 -129
- nucliadb/search/search/hydrator.py +0 -197
- nucliadb-6.7.2.post4874.dist-info/RECORD +0 -383
- {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/WHEEL +0 -0
- {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
|
2
|
+
#
|
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
|
5
|
+
#
|
|
6
|
+
# AGPL:
|
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
|
10
|
+
# License, or (at your option) any later version.
|
|
11
|
+
#
|
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
15
|
+
# GNU Affero General Public License for more details.
|
|
16
|
+
#
|
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
|
+
#
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
from google.protobuf.json_format import ParseDict
|
|
23
|
+
|
|
24
|
+
from nucliadb.common.exceptions import InvalidQueryError
|
|
25
|
+
from nucliadb.search import logger
|
|
26
|
+
from nucliadb.search.predict import SendToPredictError, convert_relations
|
|
27
|
+
from nucliadb.search.predict_models import QueryModel
|
|
28
|
+
from nucliadb.search.search.chat import rpc
|
|
29
|
+
from nucliadb.search.search.query_parser.fetcher import Fetcher
|
|
30
|
+
from nucliadb.search.utilities import get_predict
|
|
31
|
+
from nucliadb_models.internal.predict import QueryInfo
|
|
32
|
+
from nucliadb_models.search import Image, MaxTokens
|
|
33
|
+
from nucliadb_protos import knowledgebox_pb2, utils_pb2
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class RAOFetcher(Fetcher):
|
|
37
|
+
def __init__(
|
|
38
|
+
self,
|
|
39
|
+
kbid: str,
|
|
40
|
+
*,
|
|
41
|
+
query: str,
|
|
42
|
+
user_vector: list[float] | None,
|
|
43
|
+
vectorset: str | None,
|
|
44
|
+
rephrase: bool,
|
|
45
|
+
rephrase_prompt: str | None,
|
|
46
|
+
generative_model: str | None,
|
|
47
|
+
query_image: Image | None,
|
|
48
|
+
):
|
|
49
|
+
super().__init__(
|
|
50
|
+
kbid,
|
|
51
|
+
query=query,
|
|
52
|
+
user_vector=user_vector,
|
|
53
|
+
vectorset=vectorset,
|
|
54
|
+
rephrase=rephrase,
|
|
55
|
+
rephrase_prompt=rephrase_prompt,
|
|
56
|
+
generative_model=generative_model,
|
|
57
|
+
query_image=query_image,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
self._query_info: QueryInfo | None = None
|
|
61
|
+
self._vectorset: str | None = None
|
|
62
|
+
|
|
63
|
+
async def query_information(self) -> QueryInfo:
|
|
64
|
+
if self._query_info is None:
|
|
65
|
+
self._query_info = await query_information(
|
|
66
|
+
kbid=self.kbid,
|
|
67
|
+
query=self.query,
|
|
68
|
+
semantic_model=self.user_vectorset,
|
|
69
|
+
generative_model=self.generative_model,
|
|
70
|
+
rephrase=self.rephrase,
|
|
71
|
+
rephrase_prompt=self.rephrase_prompt,
|
|
72
|
+
query_image=self.query_image,
|
|
73
|
+
)
|
|
74
|
+
return self._query_info
|
|
75
|
+
|
|
76
|
+
# Retrieval
|
|
77
|
+
|
|
78
|
+
async def get_rephrased_query(self) -> str | None:
|
|
79
|
+
query_info = await self.query_information()
|
|
80
|
+
return query_info.rephrased_query
|
|
81
|
+
|
|
82
|
+
async def get_detected_entities(self) -> list[utils_pb2.RelationNode]:
|
|
83
|
+
query_info = await self.query_information()
|
|
84
|
+
if query_info.entities is not None:
|
|
85
|
+
detected_entities = convert_relations(query_info.entities.model_dump())
|
|
86
|
+
else:
|
|
87
|
+
detected_entities = []
|
|
88
|
+
return detected_entities
|
|
89
|
+
|
|
90
|
+
async def get_semantic_min_score(self) -> float | None:
|
|
91
|
+
query_info = await self.query_information()
|
|
92
|
+
vectorset = await self.get_vectorset()
|
|
93
|
+
return query_info.semantic_thresholds.get(vectorset, None)
|
|
94
|
+
|
|
95
|
+
async def get_vectorset(self) -> str:
|
|
96
|
+
if self._vectorset is None:
|
|
97
|
+
if self.user_vectorset is not None:
|
|
98
|
+
self._vectorset = self.user_vectorset
|
|
99
|
+
else:
|
|
100
|
+
# when it's not provided, we get the default from Predict API
|
|
101
|
+
query_info = await self.query_information()
|
|
102
|
+
if query_info.sentence is None or len(query_info.sentence.vectors) == 0:
|
|
103
|
+
logger.error(
|
|
104
|
+
"Asking for a vectorset but /query didn't return one", extra={"kbid": self.kbid}
|
|
105
|
+
)
|
|
106
|
+
raise SendToPredictError("Predict API didn't return a sentence vectorset")
|
|
107
|
+
# vectors field is enforced by the data model to have at least one key
|
|
108
|
+
for vectorset in query_info.sentence.vectors.keys():
|
|
109
|
+
self._vectorset = vectorset
|
|
110
|
+
break
|
|
111
|
+
assert self._vectorset is not None
|
|
112
|
+
return self._vectorset
|
|
113
|
+
|
|
114
|
+
async def get_query_vector(self) -> list[float]:
|
|
115
|
+
if self.user_vector is not None:
|
|
116
|
+
return self.user_vector
|
|
117
|
+
|
|
118
|
+
query_info = await self.query_information()
|
|
119
|
+
if query_info.sentence is None:
|
|
120
|
+
logger.error(
|
|
121
|
+
"Asking for a semantic query vector but /query didn't return a sentence",
|
|
122
|
+
extra={"kbid": self.kbid},
|
|
123
|
+
)
|
|
124
|
+
raise SendToPredictError("Predict API didn't return a sentence for semantic search")
|
|
125
|
+
|
|
126
|
+
vectorset = await self.get_vectorset()
|
|
127
|
+
if vectorset not in query_info.sentence.vectors:
|
|
128
|
+
logger.error(
|
|
129
|
+
"Predict is not responding with a valid query nucliadb vectorset",
|
|
130
|
+
extra={
|
|
131
|
+
"kbid": self.kbid,
|
|
132
|
+
"vectorset": vectorset,
|
|
133
|
+
"predict_vectorsets": ",".join(query_info.sentence.vectors.keys()),
|
|
134
|
+
},
|
|
135
|
+
)
|
|
136
|
+
raise SendToPredictError("Predict API didn't return the requested vectorset")
|
|
137
|
+
|
|
138
|
+
query_vector = query_info.sentence.vectors[vectorset]
|
|
139
|
+
return query_vector
|
|
140
|
+
|
|
141
|
+
async def get_classification_labels(self) -> knowledgebox_pb2.Labels:
|
|
142
|
+
labelsets = await rpc.labelsets(self.kbid)
|
|
143
|
+
|
|
144
|
+
# TODO(decoupled-ask): remove this conversion and refactor code to use API models instead of protobuf
|
|
145
|
+
kb_labels = knowledgebox_pb2.Labels()
|
|
146
|
+
for labelset, labels in labelsets.labelsets.items():
|
|
147
|
+
ParseDict(labels.model_dump(), kb_labels.labelset[labelset])
|
|
148
|
+
|
|
149
|
+
return kb_labels
|
|
150
|
+
|
|
151
|
+
# Generative
|
|
152
|
+
|
|
153
|
+
async def get_visual_llm_enabled(self) -> bool:
|
|
154
|
+
query_info = await self.query_information()
|
|
155
|
+
if query_info is None:
|
|
156
|
+
raise SendToPredictError("Error while using predict's query endpoint")
|
|
157
|
+
|
|
158
|
+
return query_info.visual_llm
|
|
159
|
+
|
|
160
|
+
async def get_max_context_tokens(self, max_tokens: MaxTokens | None) -> int:
|
|
161
|
+
query_info = await self.query_information()
|
|
162
|
+
if query_info is None:
|
|
163
|
+
raise SendToPredictError("Error while using predict's query endpoint")
|
|
164
|
+
|
|
165
|
+
model_max = query_info.max_context
|
|
166
|
+
if max_tokens is not None and max_tokens.context is not None:
|
|
167
|
+
if max_tokens.context > model_max:
|
|
168
|
+
raise InvalidQueryError(
|
|
169
|
+
"max_tokens.context",
|
|
170
|
+
f"Max context tokens is higher than the model's limit of {model_max}",
|
|
171
|
+
)
|
|
172
|
+
return max_tokens.context
|
|
173
|
+
return model_max
|
|
174
|
+
|
|
175
|
+
def get_max_answer_tokens(self, max_tokens: MaxTokens | None) -> int | None:
|
|
176
|
+
if max_tokens is not None and max_tokens.answer is not None:
|
|
177
|
+
return max_tokens.answer
|
|
178
|
+
return None
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
async def query_information(
|
|
182
|
+
kbid: str,
|
|
183
|
+
query: str,
|
|
184
|
+
semantic_model: str | None,
|
|
185
|
+
generative_model: str | None = None,
|
|
186
|
+
rephrase: bool = False,
|
|
187
|
+
rephrase_prompt: str | None = None,
|
|
188
|
+
query_image: Image | None = None,
|
|
189
|
+
) -> QueryInfo:
|
|
190
|
+
# NOTE: When moving /ask to RAO, this will need to change to whatever client/utility is used
|
|
191
|
+
# to call NUA predict (internally or externally in the case of onprem).
|
|
192
|
+
predict = get_predict()
|
|
193
|
+
item = QueryModel(
|
|
194
|
+
text=query,
|
|
195
|
+
semantic_models=[semantic_model] if semantic_model else None,
|
|
196
|
+
generative_model=generative_model,
|
|
197
|
+
rephrase=rephrase,
|
|
198
|
+
rephrase_prompt=rephrase_prompt,
|
|
199
|
+
query_image=query_image,
|
|
200
|
+
)
|
|
201
|
+
return await predict.query(kbid, item)
|
|
@@ -19,7 +19,6 @@
|
|
|
19
19
|
|
|
20
20
|
import base64
|
|
21
21
|
from io import BytesIO
|
|
22
|
-
from typing import Optional
|
|
23
22
|
|
|
24
23
|
from nucliadb.common.ids import ParagraphId
|
|
25
24
|
from nucliadb.ingest.fields.file import File
|
|
@@ -29,7 +28,8 @@ from nucliadb_utils.storages.storage import Storage
|
|
|
29
28
|
from nucliadb_utils.utilities import get_storage
|
|
30
29
|
|
|
31
30
|
|
|
32
|
-
|
|
31
|
+
# DEPRECATED(decoupled-ask): remove once old_prompt.py is removed
|
|
32
|
+
async def get_page_image(kbid: str, paragraph_id: ParagraphId, page_number: int) -> Image | None:
|
|
33
33
|
storage = await get_storage(service_name=SERVICE_NAME)
|
|
34
34
|
sf = storage.file_extracted(
|
|
35
35
|
kbid=kbid,
|
|
@@ -48,7 +48,8 @@ async def get_page_image(kbid: str, paragraph_id: ParagraphId, page_number: int)
|
|
|
48
48
|
return image
|
|
49
49
|
|
|
50
50
|
|
|
51
|
-
|
|
51
|
+
# DEPRECATED(decoupled-ask): remove once old_prompt.py is removed
|
|
52
|
+
async def get_paragraph_image(kbid: str, paragraph_id: ParagraphId, reference: str) -> Image | None:
|
|
52
53
|
storage = await get_storage(service_name=SERVICE_NAME)
|
|
53
54
|
sf = storage.file_extracted(
|
|
54
55
|
kbid=kbid,
|
|
@@ -67,7 +68,8 @@ async def get_paragraph_image(kbid: str, paragraph_id: ParagraphId, reference: s
|
|
|
67
68
|
return image
|
|
68
69
|
|
|
69
70
|
|
|
70
|
-
|
|
71
|
+
# DEPRECATED(decoupled-ask): remove once old_prompt.py is removed
|
|
72
|
+
async def get_file_thumbnail_image(file: File) -> Image | None:
|
|
71
73
|
fed = await file.get_file_extracted_data()
|
|
72
74
|
if fed is None or not fed.HasField("file_thumbnail"):
|
|
73
75
|
return None
|