nucliadb 6.3.7.post4081__py3-none-any.whl → 6.3.7.post4114__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucliadb/common/context/__init__.py +90 -25
- nucliadb/common/context/fastapi.py +4 -2
- nucliadb/ingest/consumer/consumer.py +3 -4
- nucliadb/search/api/v1/find.py +5 -5
- nucliadb/search/api/v1/search.py +2 -10
- nucliadb/search/search/chat/ask.py +6 -3
- nucliadb/search/search/chat/query.py +21 -17
- nucliadb/search/search/find.py +14 -5
- nucliadb/search/search/find_merge.py +27 -13
- nucliadb/search/search/merge.py +17 -18
- nucliadb/search/search/query_parser/models.py +22 -27
- nucliadb/search/search/query_parser/parsers/common.py +32 -21
- nucliadb/search/search/query_parser/parsers/find.py +31 -8
- nucliadb/search/search/query_parser/parsers/search.py +33 -10
- nucliadb/search/search/query_parser/parsers/unit_retrieval.py +207 -115
- nucliadb/search/search/utils.py +2 -42
- nucliadb/train/app.py +0 -3
- nucliadb/train/lifecycle.py +16 -11
- {nucliadb-6.3.7.post4081.dist-info → nucliadb-6.3.7.post4114.dist-info}/METADATA +6 -6
- {nucliadb-6.3.7.post4081.dist-info → nucliadb-6.3.7.post4114.dist-info}/RECORD +23 -23
- {nucliadb-6.3.7.post4081.dist-info → nucliadb-6.3.7.post4114.dist-info}/WHEEL +1 -1
- {nucliadb-6.3.7.post4081.dist-info → nucliadb-6.3.7.post4114.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.3.7.post4081.dist-info → nucliadb-6.3.7.post4114.dist-info}/top_level.txt +0 -0
@@ -19,154 +19,246 @@
|
|
19
19
|
#
|
20
20
|
from typing import Optional
|
21
21
|
|
22
|
-
from nucliadb.search.search.filters import
|
23
|
-
|
24
|
-
|
25
|
-
from nucliadb.search.search.metrics import (
|
26
|
-
node_features,
|
27
|
-
query_parser_observer,
|
28
|
-
)
|
29
|
-
from nucliadb.search.search.query import (
|
30
|
-
apply_entities_filter,
|
31
|
-
get_sort_field_proto,
|
32
|
-
)
|
22
|
+
from nucliadb.search.search.filters import translate_label
|
23
|
+
from nucliadb.search.search.metrics import node_features, query_parser_observer
|
24
|
+
from nucliadb.search.search.query import apply_entities_filter, get_sort_field_proto
|
33
25
|
from nucliadb.search.search.query_parser.filter_expression import add_and_expression
|
34
26
|
from nucliadb.search.search.query_parser.models import ParsedQuery, PredictReranker, UnitRetrieval
|
35
27
|
from nucliadb_models.labels import LABEL_HIDDEN, translate_system_to_alias_label
|
36
|
-
from nucliadb_models.search import
|
37
|
-
SortOrderMap,
|
38
|
-
)
|
28
|
+
from nucliadb_models.search import SortOrderMap
|
39
29
|
from nucliadb_protos import nodereader_pb2, utils_pb2
|
40
30
|
from nucliadb_protos.nodereader_pb2 import SearchRequest
|
41
31
|
|
42
32
|
|
43
33
|
@query_parser_observer.wrap({"type": "convert_retrieval_to_proto"})
|
44
|
-
async def
|
34
|
+
async def legacy_convert_retrieval_to_proto(
|
45
35
|
parsed: ParsedQuery,
|
46
36
|
) -> tuple[SearchRequest, bool, list[str], Optional[str]]:
|
47
|
-
|
37
|
+
converter = _Converter(parsed.retrieval)
|
38
|
+
request = converter.into_search_request()
|
48
39
|
|
49
|
-
|
40
|
+
# XXX: legacy values that were returned by QueryParser but not always
|
41
|
+
# needed. We should find a better abstraction
|
50
42
|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
43
|
+
incomplete = is_incomplete(parsed.retrieval)
|
44
|
+
autofilter = converter._autofilter
|
45
|
+
|
46
|
+
rephrased_query = None
|
47
|
+
if parsed.retrieval.query.semantic:
|
48
|
+
rephrased_query = await parsed.fetcher.get_rephrased_query()
|
55
49
|
|
56
|
-
|
57
|
-
request.document = True
|
58
|
-
node_features.inc({"type": "documents"})
|
59
|
-
if parsed.retrieval.query.keyword:
|
60
|
-
request.paragraph = True
|
61
|
-
node_features.inc({"type": "paragraphs"})
|
50
|
+
return request, incomplete, autofilter, rephrased_query
|
62
51
|
|
63
|
-
|
64
|
-
|
65
|
-
|
52
|
+
|
53
|
+
@query_parser_observer.wrap({"type": "convert_retrieval_to_proto"})
|
54
|
+
def convert_retrieval_to_proto(retrieval: UnitRetrieval) -> SearchRequest:
|
55
|
+
converter = _Converter(retrieval)
|
56
|
+
request = converter.into_search_request()
|
57
|
+
return request
|
58
|
+
|
59
|
+
|
60
|
+
class _Converter:
|
61
|
+
def __init__(self, retrieval: UnitRetrieval):
|
62
|
+
self.req = nodereader_pb2.SearchRequest()
|
63
|
+
self.retrieval = retrieval
|
64
|
+
|
65
|
+
self._autofilter: list[str] = []
|
66
|
+
|
67
|
+
def into_search_request(self) -> nodereader_pb2.SearchRequest:
|
68
|
+
"""Generate a SearchRequest proto from a retrieval operation."""
|
69
|
+
self._apply_text_queries()
|
70
|
+
self._apply_semantic_query()
|
71
|
+
self._apply_relation_query()
|
72
|
+
self._apply_filters()
|
73
|
+
self._apply_top_k()
|
74
|
+
return self.req
|
75
|
+
|
76
|
+
def _apply_text_queries(self):
|
77
|
+
text_query = self.retrieval.query.keyword or self.retrieval.query.fulltext
|
78
|
+
if text_query is None:
|
79
|
+
return
|
80
|
+
|
81
|
+
if self.retrieval.query.keyword and self.retrieval.query.fulltext:
|
82
|
+
assert self.retrieval.query.keyword == self.retrieval.query.fulltext, (
|
83
|
+
"search proto doesn't support different queries for fulltext and keyword search"
|
84
|
+
)
|
85
|
+
|
86
|
+
if self.retrieval.query.fulltext:
|
87
|
+
self.req.document = True
|
88
|
+
node_features.inc({"type": "documents"})
|
89
|
+
if self.retrieval.query.keyword:
|
90
|
+
self.req.paragraph = True
|
91
|
+
node_features.inc({"type": "paragraphs"})
|
92
|
+
|
93
|
+
self.req.min_score_bm25 = text_query.min_score
|
66
94
|
|
67
95
|
if text_query.is_synonyms_query:
|
68
|
-
|
96
|
+
self.req.advanced_query = text_query.query
|
69
97
|
else:
|
70
|
-
|
98
|
+
self.req.body = text_query.query
|
71
99
|
|
72
100
|
# sort order
|
73
101
|
sort_field = get_sort_field_proto(text_query.order_by)
|
74
102
|
if sort_field is not None:
|
75
|
-
|
76
|
-
|
103
|
+
self.req.order.sort_by = sort_field
|
104
|
+
self.req.order.type = SortOrderMap[text_query.sort] # type: ignore
|
105
|
+
|
106
|
+
def _apply_semantic_query(self):
|
107
|
+
if self.retrieval.query.semantic is None:
|
108
|
+
return
|
77
109
|
|
78
|
-
if parsed.retrieval.query.semantic:
|
79
110
|
node_features.inc({"type": "vectors"})
|
80
111
|
|
81
|
-
|
112
|
+
self.req.min_score_semantic = self.retrieval.query.semantic.min_score
|
82
113
|
|
83
|
-
query_vector =
|
114
|
+
query_vector = self.retrieval.query.semantic.query
|
84
115
|
if query_vector is not None:
|
85
|
-
|
86
|
-
|
116
|
+
self.req.vectorset = self.retrieval.query.semantic.vectorset
|
117
|
+
self.req.vector.extend(query_vector)
|
87
118
|
|
88
|
-
|
89
|
-
|
119
|
+
def _apply_relation_query(self):
|
120
|
+
"""Relation queries are the legacy way to query the knowledge graph.
|
121
|
+
Given a set of entry points and some subtypes and entities to exclude
|
122
|
+
from search, it'd find the distance 1 neighbours (BFS)."""
|
90
123
|
|
91
|
-
|
92
|
-
|
93
|
-
request.relation_subgraph.deleted_groups.extend(
|
94
|
-
parsed.retrieval.query.relation.deleted_entity_groups
|
95
|
-
)
|
96
|
-
for group_id, deleted_entities in parsed.retrieval.query.relation.deleted_entities.items():
|
97
|
-
request.relation_subgraph.deleted_entities.append(
|
98
|
-
nodereader_pb2.EntitiesSubgraphRequest.DeletedEntities(
|
99
|
-
node_subtype=group_id, node_values=deleted_entities
|
100
|
-
)
|
101
|
-
)
|
124
|
+
if self.retrieval.query.relation is None:
|
125
|
+
return
|
102
126
|
|
103
|
-
|
104
|
-
|
105
|
-
request.with_duplicates = parsed.retrieval.filters.with_duplicates
|
106
|
-
|
107
|
-
request.faceted.labels.extend([translate_label(facet) for facet in parsed.retrieval.filters.facets])
|
108
|
-
|
109
|
-
if (
|
110
|
-
parsed.retrieval.filters.security is not None
|
111
|
-
and len(parsed.retrieval.filters.security.groups) > 0
|
112
|
-
):
|
113
|
-
security_pb = utils_pb2.Security()
|
114
|
-
for group_id in parsed.retrieval.filters.security.groups:
|
115
|
-
if group_id not in security_pb.access_groups:
|
116
|
-
security_pb.access_groups.append(group_id)
|
117
|
-
request.security.CopyFrom(security_pb)
|
118
|
-
|
119
|
-
if parsed.retrieval.filters.field_expression:
|
120
|
-
request.field_filter.CopyFrom(parsed.retrieval.filters.field_expression)
|
121
|
-
if parsed.retrieval.filters.paragraph_expression:
|
122
|
-
request.paragraph_filter.CopyFrom(parsed.retrieval.filters.paragraph_expression)
|
123
|
-
request.filter_operator = parsed.retrieval.filters.filter_expression_operator
|
124
|
-
|
125
|
-
autofilter = []
|
126
|
-
if parsed.retrieval.filters.autofilter:
|
127
|
-
entity_filters = apply_entities_filter(request, parsed.retrieval.filters.autofilter)
|
128
|
-
autofilter.extend([translate_system_to_alias_label(e) for e in entity_filters])
|
129
|
-
|
130
|
-
if parsed.retrieval.filters.hidden is not None:
|
131
|
-
expr = nodereader_pb2.FilterExpression()
|
132
|
-
if parsed.retrieval.filters.hidden:
|
133
|
-
expr.facet.facet = LABEL_HIDDEN
|
134
|
-
else:
|
135
|
-
expr.bool_not.facet.facet = LABEL_HIDDEN
|
136
|
-
|
137
|
-
add_and_expression(request.field_filter, expr)
|
138
|
-
|
139
|
-
# top_k
|
140
|
-
|
141
|
-
# Adjust requested page size depending on rank fusion and reranking algorithms.
|
142
|
-
#
|
143
|
-
# Some rerankers want more results than the requested by the user so
|
144
|
-
# reranking can have more choices.
|
145
|
-
|
146
|
-
rank_fusion_window = 0
|
147
|
-
if parsed.retrieval.rank_fusion is not None:
|
148
|
-
rank_fusion_window = parsed.retrieval.rank_fusion.window
|
149
|
-
|
150
|
-
reranker_window = 0
|
151
|
-
if parsed.retrieval.reranker is not None and isinstance(parsed.retrieval.reranker, PredictReranker):
|
152
|
-
reranker_window = parsed.retrieval.reranker.window
|
153
|
-
|
154
|
-
request.result_per_page = max(
|
155
|
-
request.result_per_page,
|
156
|
-
rank_fusion_window,
|
157
|
-
reranker_window,
|
158
|
-
)
|
127
|
+
node_features.inc({"type": "relations"})
|
159
128
|
|
160
|
-
|
161
|
-
|
129
|
+
# Entry points are source or target nodes we want to search for. We want
|
130
|
+
# any undirected path containing any entry point
|
131
|
+
entry_points_queries = []
|
132
|
+
for entry_point in self.retrieval.query.relation.entry_points:
|
133
|
+
q = nodereader_pb2.GraphQuery.PathQuery()
|
134
|
+
if entry_point.value:
|
135
|
+
q.path.source.value = entry_point.value
|
136
|
+
q.path.source.node_type = entry_point.ntype
|
137
|
+
if entry_point.subtype:
|
138
|
+
q.path.source.node_subtype = entry_point.subtype
|
139
|
+
q.path.undirected = True
|
140
|
+
entry_points_queries.append(q)
|
141
|
+
|
142
|
+
# A query can specifiy nodes marked as deleted in the db (but not
|
143
|
+
# removed from the index). We want to exclude any path containing any of
|
144
|
+
# those nodes.
|
145
|
+
#
|
146
|
+
# The request groups values per subtype (to optimize request size) but,
|
147
|
+
# as we don't support OR at node value level, we'll split them.
|
148
|
+
deleted_nodes_queries = []
|
149
|
+
for subtype, deleted_entities in self.retrieval.query.relation.deleted_entities.items():
|
150
|
+
if len(deleted_entities) == 0:
|
151
|
+
continue
|
152
|
+
for deleted_entity_value in deleted_entities:
|
153
|
+
q = nodereader_pb2.GraphQuery.PathQuery()
|
154
|
+
q.path.source.value = deleted_entity_value
|
155
|
+
q.path.source.node_subtype = subtype
|
156
|
+
q.path.undirected = True
|
157
|
+
deleted_nodes_queries.append(q)
|
158
|
+
|
159
|
+
# Subtypes can also be marked as deleted in the db (but kept in the
|
160
|
+
# index). We also want to exclude any triplet containg a node with such
|
161
|
+
# subtypes
|
162
|
+
excluded_subtypes_queries = []
|
163
|
+
for deleted_subtype in self.retrieval.query.relation.deleted_entity_groups:
|
164
|
+
q = nodereader_pb2.GraphQuery.PathQuery()
|
165
|
+
q.path.source.node_subtype = deleted_subtype
|
166
|
+
q.path.undirected = True
|
167
|
+
excluded_subtypes_queries.append(q)
|
168
|
+
|
169
|
+
subqueries = []
|
170
|
+
|
171
|
+
if len(entry_points_queries) > 0:
|
172
|
+
if len(entry_points_queries) == 1:
|
173
|
+
q = entry_points_queries[0]
|
174
|
+
else:
|
175
|
+
q = nodereader_pb2.GraphQuery.PathQuery()
|
176
|
+
q.bool_or.operands.extend(entry_points_queries)
|
177
|
+
subqueries.append(q)
|
178
|
+
|
179
|
+
if len(deleted_nodes_queries) > 0:
|
180
|
+
q = nodereader_pb2.GraphQuery.PathQuery()
|
181
|
+
if len(deleted_nodes_queries) == 1:
|
182
|
+
q.bool_not.CopyFrom(deleted_nodes_queries[0])
|
183
|
+
else:
|
184
|
+
q.bool_not.bool_or.operands.extend(deleted_nodes_queries)
|
185
|
+
subqueries.append(q)
|
186
|
+
|
187
|
+
if len(excluded_subtypes_queries) > 0:
|
188
|
+
q = nodereader_pb2.GraphQuery.PathQuery()
|
189
|
+
if len(excluded_subtypes_queries) == 1:
|
190
|
+
q.bool_not.CopyFrom(excluded_subtypes_queries[0])
|
191
|
+
else:
|
192
|
+
q.bool_not.bool_or.operands.extend(excluded_subtypes_queries)
|
193
|
+
subqueries.append(q)
|
194
|
+
|
195
|
+
if len(subqueries) == 0:
|
196
|
+
# don't set anything, no graph query
|
197
|
+
pass
|
198
|
+
elif len(subqueries) == 1:
|
199
|
+
q = subqueries[0]
|
200
|
+
self.req.graph_search.query.path.CopyFrom(q)
|
201
|
+
else:
|
202
|
+
self.req.graph_search.query.path.bool_and.operands.extend(subqueries)
|
162
203
|
|
163
|
-
|
204
|
+
def _apply_filters(self):
|
205
|
+
self.req.with_duplicates = self.retrieval.filters.with_duplicates
|
164
206
|
|
165
|
-
|
166
|
-
|
167
|
-
|
207
|
+
self.req.faceted.labels.extend(
|
208
|
+
[translate_label(facet) for facet in self.retrieval.filters.facets]
|
209
|
+
)
|
168
210
|
|
169
|
-
|
211
|
+
if (
|
212
|
+
self.retrieval.filters.security is not None
|
213
|
+
and len(self.retrieval.filters.security.groups) > 0
|
214
|
+
):
|
215
|
+
security_pb = utils_pb2.Security()
|
216
|
+
for group_id in self.retrieval.filters.security.groups:
|
217
|
+
if group_id not in security_pb.access_groups:
|
218
|
+
security_pb.access_groups.append(group_id)
|
219
|
+
self.req.security.CopyFrom(security_pb)
|
220
|
+
|
221
|
+
if self.retrieval.filters.field_expression:
|
222
|
+
self.req.field_filter.CopyFrom(self.retrieval.filters.field_expression)
|
223
|
+
if self.retrieval.filters.paragraph_expression:
|
224
|
+
self.req.paragraph_filter.CopyFrom(self.retrieval.filters.paragraph_expression)
|
225
|
+
self.req.filter_operator = self.retrieval.filters.filter_expression_operator
|
226
|
+
|
227
|
+
if self.retrieval.filters.autofilter:
|
228
|
+
entity_filters = apply_entities_filter(self.req, self.retrieval.filters.autofilter)
|
229
|
+
self._autofilter.extend([translate_system_to_alias_label(e) for e in entity_filters])
|
230
|
+
|
231
|
+
if self.retrieval.filters.hidden is not None:
|
232
|
+
expr = nodereader_pb2.FilterExpression()
|
233
|
+
if self.retrieval.filters.hidden:
|
234
|
+
expr.facet.facet = LABEL_HIDDEN
|
235
|
+
else:
|
236
|
+
expr.bool_not.facet.facet = LABEL_HIDDEN
|
237
|
+
|
238
|
+
add_and_expression(self.req.field_filter, expr)
|
239
|
+
|
240
|
+
def _apply_top_k(self):
|
241
|
+
"""Adjust requested page size depending on rank fusion and reranking
|
242
|
+
algorithms.
|
243
|
+
|
244
|
+
Some rerankers want more results than the requested by the user so
|
245
|
+
reranking can have more choices.
|
246
|
+
"""
|
247
|
+
top_k = self.retrieval.top_k
|
248
|
+
|
249
|
+
rank_fusion_window = 0
|
250
|
+
if self.retrieval.rank_fusion is not None:
|
251
|
+
rank_fusion_window = self.retrieval.rank_fusion.window
|
252
|
+
|
253
|
+
reranker_window = 0
|
254
|
+
if self.retrieval.reranker is not None and isinstance(self.retrieval.reranker, PredictReranker):
|
255
|
+
reranker_window = self.retrieval.reranker.window
|
256
|
+
|
257
|
+
self.req.result_per_page = max(
|
258
|
+
top_k,
|
259
|
+
rank_fusion_window,
|
260
|
+
reranker_window,
|
261
|
+
)
|
170
262
|
|
171
263
|
|
172
264
|
def is_incomplete(retrieval: UnitRetrieval) -> bool:
|
nucliadb/search/search/utils.py
CHANGED
@@ -18,12 +18,12 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
20
|
import logging
|
21
|
-
from typing import Optional
|
21
|
+
from typing import Optional
|
22
22
|
|
23
23
|
from pydantic import BaseModel
|
24
24
|
|
25
25
|
from nucliadb.common.datamanagers.atomic import kb
|
26
|
-
from nucliadb_models.search import
|
26
|
+
from nucliadb_models.search import MinScore
|
27
27
|
from nucliadb_utils import const
|
28
28
|
from nucliadb_utils.utilities import has_feature
|
29
29
|
|
@@ -39,36 +39,6 @@ async def filter_hidden_resources(kbid: str, show_hidden: bool) -> Optional[bool
|
|
39
39
|
return None # None = No filtering, show all resources
|
40
40
|
|
41
41
|
|
42
|
-
def is_empty_query(request: BaseSearchRequest) -> bool:
|
43
|
-
return len(request.query) == 0
|
44
|
-
|
45
|
-
|
46
|
-
def has_user_vectors(request: BaseSearchRequest) -> bool:
|
47
|
-
return request.vector is not None and len(request.vector) > 0
|
48
|
-
|
49
|
-
|
50
|
-
def is_exact_match_only_query(request: BaseSearchRequest) -> bool:
|
51
|
-
"""
|
52
|
-
'"something"' -> True
|
53
|
-
'foo "something" else' -> False
|
54
|
-
"""
|
55
|
-
query = request.query.strip()
|
56
|
-
return len(query) > 0 and query.startswith('"') and query.endswith('"')
|
57
|
-
|
58
|
-
|
59
|
-
def should_disable_vector_search(request: BaseSearchRequest) -> bool:
|
60
|
-
if has_user_vectors(request):
|
61
|
-
return False
|
62
|
-
|
63
|
-
if is_exact_match_only_query(request):
|
64
|
-
return True
|
65
|
-
|
66
|
-
if is_empty_query(request):
|
67
|
-
return True
|
68
|
-
|
69
|
-
return False
|
70
|
-
|
71
|
-
|
72
42
|
def min_score_from_query_params(
|
73
43
|
min_score_bm25: float,
|
74
44
|
min_score_semantic: Optional[float],
|
@@ -79,16 +49,6 @@ def min_score_from_query_params(
|
|
79
49
|
return MinScore(bm25=min_score_bm25, semantic=semantic)
|
80
50
|
|
81
51
|
|
82
|
-
def min_score_from_payload(min_score: Optional[Union[float, MinScore]]) -> MinScore:
|
83
|
-
# Keep backward compatibility with the deprecated
|
84
|
-
# min_score payload parameter being a float
|
85
|
-
if min_score is None:
|
86
|
-
return MinScore(bm25=0, semantic=None)
|
87
|
-
elif isinstance(min_score, float):
|
88
|
-
return MinScore(bm25=0, semantic=min_score)
|
89
|
-
return min_score
|
90
|
-
|
91
|
-
|
92
52
|
def maybe_log_request_payload(kbid: str, endpoint: str, item: BaseModel):
|
93
53
|
if has_feature(const.Features.LOG_REQUEST_PAYLOADS, context={"kbid": kbid}, default=False):
|
94
54
|
logger.info(
|
nucliadb/train/app.py
CHANGED
@@ -33,18 +33,15 @@ from nucliadb_telemetry.fastapi.utils import (
|
|
33
33
|
client_disconnect_handler,
|
34
34
|
global_exception_handler,
|
35
35
|
)
|
36
|
-
from nucliadb_utils.audit.stream import AuditMiddleware
|
37
36
|
from nucliadb_utils.authentication import NucliaCloudAuthenticationBackend
|
38
37
|
from nucliadb_utils.fastapi.openapi import extend_openapi
|
39
38
|
from nucliadb_utils.fastapi.versioning import VersionedFastAPI
|
40
39
|
from nucliadb_utils.settings import running_settings
|
41
|
-
from nucliadb_utils.utilities import get_audit
|
42
40
|
|
43
41
|
middleware = []
|
44
42
|
middleware.extend(
|
45
43
|
[
|
46
44
|
Middleware(AuthenticationMiddleware, backend=NucliaCloudAuthenticationBackend()),
|
47
|
-
Middleware(AuditMiddleware, audit_utility_getter=get_audit),
|
48
45
|
]
|
49
46
|
)
|
50
47
|
|
nucliadb/train/lifecycle.py
CHANGED
@@ -22,32 +22,37 @@ from contextlib import asynccontextmanager
|
|
22
22
|
|
23
23
|
from fastapi import FastAPI
|
24
24
|
|
25
|
+
from nucliadb.common.context import ApplicationContext
|
25
26
|
from nucliadb.common.context.fastapi import inject_app_context
|
26
|
-
from nucliadb.common.nidx import start_nidx_utility, stop_nidx_utility
|
27
27
|
from nucliadb.train import SERVICE_NAME
|
28
28
|
from nucliadb.train.utils import (
|
29
|
-
start_shard_manager,
|
29
|
+
start_shard_manager as start_train_shard_manager,
|
30
|
+
)
|
31
|
+
from nucliadb.train.utils import (
|
30
32
|
start_train_grpc,
|
31
|
-
stop_shard_manager,
|
32
33
|
stop_train_grpc,
|
33
34
|
)
|
35
|
+
from nucliadb.train.utils import (
|
36
|
+
stop_shard_manager as stop_train_shard_manager,
|
37
|
+
)
|
34
38
|
from nucliadb_telemetry.utils import clean_telemetry, setup_telemetry
|
35
|
-
from nucliadb_utils.utilities import start_audit_utility, stop_audit_utility
|
36
39
|
|
37
40
|
|
38
41
|
@asynccontextmanager
|
39
42
|
async def lifespan(app: FastAPI):
|
40
43
|
await setup_telemetry(SERVICE_NAME)
|
41
|
-
await
|
42
|
-
await start_shard_manager()
|
44
|
+
await start_train_shard_manager()
|
43
45
|
await start_train_grpc(SERVICE_NAME)
|
44
|
-
await start_audit_utility(SERVICE_NAME)
|
45
46
|
try:
|
46
|
-
|
47
|
+
context = ApplicationContext(
|
48
|
+
service_name="train",
|
49
|
+
partitioning=False,
|
50
|
+
nats_manager=False,
|
51
|
+
transaction=False,
|
52
|
+
)
|
53
|
+
async with inject_app_context(app, context):
|
47
54
|
yield
|
48
55
|
finally:
|
49
|
-
await stop_audit_utility()
|
50
56
|
await stop_train_grpc()
|
51
|
-
await
|
52
|
-
await stop_nidx_utility()
|
57
|
+
await stop_train_shard_manager()
|
53
58
|
await clean_telemetry(SERVICE_NAME)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: nucliadb
|
3
|
-
Version: 6.3.7.
|
3
|
+
Version: 6.3.7.post4114
|
4
4
|
Summary: NucliaDB
|
5
5
|
Author-email: Nuclia <nucliadb@nuclia.com>
|
6
6
|
License: AGPL
|
@@ -20,11 +20,11 @@ Classifier: Programming Language :: Python :: 3.12
|
|
20
20
|
Classifier: Programming Language :: Python :: 3 :: Only
|
21
21
|
Requires-Python: <4,>=3.9
|
22
22
|
Description-Content-Type: text/markdown
|
23
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.3.7.
|
24
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.7.
|
25
|
-
Requires-Dist: nucliadb-protos>=6.3.7.
|
26
|
-
Requires-Dist: nucliadb-models>=6.3.7.
|
27
|
-
Requires-Dist: nidx-protos>=6.3.7.
|
23
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.3.7.post4114
|
24
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.7.post4114
|
25
|
+
Requires-Dist: nucliadb-protos>=6.3.7.post4114
|
26
|
+
Requires-Dist: nucliadb-models>=6.3.7.post4114
|
27
|
+
Requires-Dist: nidx-protos>=6.3.7.post4114
|
28
28
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
29
29
|
Requires-Dist: nuclia-models>=0.24.2
|
30
30
|
Requires-Dist: uvicorn[standard]
|
@@ -70,8 +70,8 @@ nucliadb/common/cluster/settings.py,sha256=JPwV_0U_i618Tn66GWUq6qCKNjy4TWkGEGld9
|
|
70
70
|
nucliadb/common/cluster/utils.py,sha256=K0BZ75m12Tbi7KS1Tr3LqXJSPhJzUHkBMo4j8W8nQpA,4639
|
71
71
|
nucliadb/common/cluster/standalone/__init__.py,sha256=itSI7dtTwFP55YMX4iK7JzdMHS5CQVUiB1XzQu4UBh8,833
|
72
72
|
nucliadb/common/cluster/standalone/utils.py,sha256=af3r-x_GF7A6dwIAhZLR-r-SZQEVxsFrDKeMfUTA6G0,1908
|
73
|
-
nucliadb/common/context/__init__.py,sha256=
|
74
|
-
nucliadb/common/context/fastapi.py,sha256=
|
73
|
+
nucliadb/common/context/__init__.py,sha256=C3SsVpwyqHj2PPAUgtf0GVmq9KVQi3q5dJCsMIs9fpY,5999
|
74
|
+
nucliadb/common/context/fastapi.py,sha256=mH_8n5t7quNSPivNM2JS5EQf2sTVJsdzXW6LaY7EHAA,1629
|
75
75
|
nucliadb/common/datamanagers/__init__.py,sha256=jksw4pXyXb05SG3EN-BPBrhc1u1Ge_m21PYqD7NYQEs,2118
|
76
76
|
nucliadb/common/datamanagers/atomic.py,sha256=WihdtBWQIAuElZQjh1xQ--q5dJowwlkovqsW-OB_t2k,3230
|
77
77
|
nucliadb/common/datamanagers/cluster.py,sha256=iU0b7AESm1Yi8Wp3pIKgqixZGNMjeBrxSpvEKsaZKgY,1831
|
@@ -126,7 +126,7 @@ nucliadb/ingest/settings.py,sha256=0B-wQNa8FLqtNcQgRzh-fuIuGptM816XHcbH1NQKfmE,3
|
|
126
126
|
nucliadb/ingest/utils.py,sha256=l1myURu3r8oA11dx3GpHw-gNTUc1AFX8xdPm9Lgl2rA,2275
|
127
127
|
nucliadb/ingest/consumer/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
128
128
|
nucliadb/ingest/consumer/auditing.py,sha256=QLffHz49oz9h9P080oBB7eTBL4cqWI-JTTDeg3SmFQ8,7264
|
129
|
-
nucliadb/ingest/consumer/consumer.py,sha256=
|
129
|
+
nucliadb/ingest/consumer/consumer.py,sha256=OgS1fr5Yo55u-XbC6zypTH1aJ562Y1vZHnPDlJJpCXQ,13703
|
130
130
|
nucliadb/ingest/consumer/materializer.py,sha256=7ofLbwjldJA8TWXDRZRM4U5EviZt3qNSQ8oadmkzS0Y,3840
|
131
131
|
nucliadb/ingest/consumer/metrics.py,sha256=ji1l_4cKiHJthQd8YNem1ft4iMbw9KThmVvJmLcv3Xg,1075
|
132
132
|
nucliadb/ingest/consumer/pull.py,sha256=EYT0ImngMQgatStG68p2GSrPQBbJxeuq8nFm8DdAbwk,9280
|
@@ -211,12 +211,12 @@ nucliadb/search/api/v1/__init__.py,sha256=DH16OYnw9jQ38OpKlmdXeoq2j40ZPXZRtGvClK
|
|
211
211
|
nucliadb/search/api/v1/ask.py,sha256=b4tz33HNsfT5DXv_2DMc_jirnFsHuobreWkbAKkzj5o,5337
|
212
212
|
nucliadb/search/api/v1/catalog.py,sha256=Nw4wIj4AjGp-p64FFVQFN4v2LFcV3A0UJIxfo3_XGmY,7670
|
213
213
|
nucliadb/search/api/v1/feedback.py,sha256=kNLc4dHz2SXHzV0PwC1WiRAwY88fDptPcP-kO0q-FrQ,2620
|
214
|
-
nucliadb/search/api/v1/find.py,sha256=
|
214
|
+
nucliadb/search/api/v1/find.py,sha256=JFbGDRFBHBTApYR1qHp9RngbE_QDb96fXORMdjcN6lg,10654
|
215
215
|
nucliadb/search/api/v1/graph.py,sha256=ItVpzJbqfDLjoIo2fTb2mKGCM1Z34sx7CBb3gNmj6IQ,4274
|
216
216
|
nucliadb/search/api/v1/knowledgebox.py,sha256=rWhx3PYWryingu19qwwFDbVvVYynq5Ky23FSlzmTutQ,8721
|
217
217
|
nucliadb/search/api/v1/predict_proxy.py,sha256=QrGzo0hKjtmyGZ6pjlJHYAh4hxwVUIOTcVcerRCw7eE,3047
|
218
218
|
nucliadb/search/api/v1/router.py,sha256=mtT07rBZcVfpa49doaw9b1tj3sdi3qLH0gn9Io6NYM0,988
|
219
|
-
nucliadb/search/api/v1/search.py,sha256=
|
219
|
+
nucliadb/search/api/v1/search.py,sha256=JOFlc4FE10Ai63NTQpRxc0djty9iBnSvnTsI493cLkM,12567
|
220
220
|
nucliadb/search/api/v1/suggest.py,sha256=Pwyxyk3Vu7aKU8vl2_rKhuE40ngnjZwAXS1rAilPDtM,6506
|
221
221
|
nucliadb/search/api/v1/summarize.py,sha256=VAHJvE6V3xUgEBfqNKhgoxmDqCvh30RnrEIBVhMcNLU,2499
|
222
222
|
nucliadb/search/api/v1/utils.py,sha256=5Ve-frn7LAE2jqAgB85F8RSeqxDlyA08--gS-AdOLS4,1434
|
@@ -233,13 +233,13 @@ nucliadb/search/search/cut.py,sha256=ytY0_GY7ocNjfxTb4aosxEp4ZfhQNDP--JkhEMGD298
|
|
233
233
|
nucliadb/search/search/exceptions.py,sha256=klGLgAGGrXcSGix_W6418ZBMqDchAIGjN77ofkOScEI,1039
|
234
234
|
nucliadb/search/search/fetch.py,sha256=XJHIFnZmXM_8Kb37lb4lg1GYG7cZ1plT-qAIb_QziX4,6184
|
235
235
|
nucliadb/search/search/filters.py,sha256=1MkHlJjAQqoRCj7e5cEzK2HvBxGLE17I_omsjiklbtw,6476
|
236
|
-
nucliadb/search/search/find.py,sha256=
|
237
|
-
nucliadb/search/search/find_merge.py,sha256=
|
236
|
+
nucliadb/search/search/find.py,sha256=2VEL4EaYiXa4gh3u16RljKcIQKHInRhA8Zt5xhw063o,8263
|
237
|
+
nucliadb/search/search/find_merge.py,sha256=R40z-jIRM2Lmcxs4ox3YwHy96uDXhJNd8vUP5KXgtSE,18413
|
238
238
|
nucliadb/search/search/graph_merge.py,sha256=OiUNiXOWwrUVKqStuRcoUJwvDbDYamqIgiAy_FwPdMI,3405
|
239
239
|
nucliadb/search/search/graph_strategy.py,sha256=hwof-jxYELI6EYmvccDViDda3urE6E7v24-_-IsEF3E,32916
|
240
240
|
nucliadb/search/search/hydrator.py,sha256=-R37gCrGxkyaiHQalnTWHNG_FCx11Zucd7qA1vQCxuw,6985
|
241
241
|
nucliadb/search/search/ingestion_agents.py,sha256=NeJr4EEX-bvFFMGvXOOwLv8uU7NuQ-ntJnnrhnKfMzY,3174
|
242
|
-
nucliadb/search/search/merge.py,sha256=
|
242
|
+
nucliadb/search/search/merge.py,sha256=SqjhYlBJARCs3ZAPJ6rwRabqYdMAvEAWCTkBRSNC4zY,23310
|
243
243
|
nucliadb/search/search/metrics.py,sha256=HJVQPLOIwLuc733G4keqEgx1-Dcg97hyWGKZQTojiSE,2973
|
244
244
|
nucliadb/search/search/paragraphs.py,sha256=pNAEiYqJGGUVcEf7xf-PFMVqz0PX4Qb-WNG-_zPGN2o,7799
|
245
245
|
nucliadb/search/search/pgcatalog.py,sha256=s_J98fsX_RuFXwpejpkGqG-tD9ELuzz4YQ6U3ew5h2g,9313
|
@@ -249,27 +249,27 @@ nucliadb/search/search/rank_fusion.py,sha256=tRGo_KlsFsVx1CQEy1iqQ6f0T1Dq1kf0axD
|
|
249
249
|
nucliadb/search/search/rerankers.py,sha256=PvhExUb8zZYghiFHRgGotw6h6bU--Rft09wE8arvtAw,7424
|
250
250
|
nucliadb/search/search/shards.py,sha256=OEtN1p9WX_cMX8t-myaafpmFAPTpUEOutR7z1sDuNcY,3242
|
251
251
|
nucliadb/search/search/summarize.py,sha256=ksmYPubEQvAQgfPdZHfzB_rR19B2ci4IYZ6jLdHxZo8,4996
|
252
|
-
nucliadb/search/search/utils.py,sha256=
|
252
|
+
nucliadb/search/search/utils.py,sha256=ajRIXfdTF67dBVahQCXW-rSv6gJpUMPt3QhJrWqArTQ,2175
|
253
253
|
nucliadb/search/search/chat/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
254
|
-
nucliadb/search/search/chat/ask.py,sha256=
|
254
|
+
nucliadb/search/search/chat/ask.py,sha256=M8e2BCTCDXvMAoUBdsIQq2U-e_LfQTMcDekJsEdIk4M,37368
|
255
255
|
nucliadb/search/search/chat/exceptions.py,sha256=Siy4GXW2L7oPhIR86H3WHBhE9lkV4A4YaAszuGGUf54,1356
|
256
256
|
nucliadb/search/search/chat/images.py,sha256=PA8VWxT5_HUGfW1ULhKTK46UBsVyINtWWqEM1ulzX1E,3095
|
257
257
|
nucliadb/search/search/chat/prompt.py,sha256=Jnja-Ss7skgnnDY8BymVfdeYsFPnIQFL8tEvcRXTKUE,47356
|
258
|
-
nucliadb/search/search/chat/query.py,sha256=
|
258
|
+
nucliadb/search/search/chat/query.py,sha256=wi3LqXSNq-lGpDgFsbMEjvVyoHba2GAPwsCiNriZee4,16821
|
259
259
|
nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
260
260
|
nucliadb/search/search/query_parser/exceptions.py,sha256=szAOXUZ27oNY-OSa9t2hQ5HHkQQC0EX1FZz_LluJHJE,1224
|
261
261
|
nucliadb/search/search/query_parser/fetcher.py,sha256=SkvBRDfSKmuz-QygNKLAU4AhZhhDo1dnOZmt1zA28RA,16851
|
262
262
|
nucliadb/search/search/query_parser/filter_expression.py,sha256=rws5vsKTofX2iMUK4yvjmLZFxtcbWbyhIcwen4j0rQg,6578
|
263
|
-
nucliadb/search/search/query_parser/models.py,sha256=
|
263
|
+
nucliadb/search/search/query_parser/models.py,sha256=tJLq7C8rKyvwHnIw65BAjf_XXYShzBhaeqTqaDdkgZY,4736
|
264
264
|
nucliadb/search/search/query_parser/old_filters.py,sha256=-zbfN-RsXoj_DRjh3Lfp-wShwFXgkISawzVptVzja-A,9071
|
265
265
|
nucliadb/search/search/query_parser/parsers/__init__.py,sha256=ySCNSdbesLXGZyR88919njulA6UE10_3PhqMG_Yj1o4,1034
|
266
266
|
nucliadb/search/search/query_parser/parsers/ask.py,sha256=eTz8wS-EJHuAagR384h6TT64itymFZRpfZJGX8r6aZM,2771
|
267
267
|
nucliadb/search/search/query_parser/parsers/catalog.py,sha256=XdBiTweGTQkj8m_V_i2xbwp7P5pPO8K1Tud692XKhMw,7149
|
268
|
-
nucliadb/search/search/query_parser/parsers/common.py,sha256=
|
269
|
-
nucliadb/search/search/query_parser/parsers/find.py,sha256=
|
268
|
+
nucliadb/search/search/query_parser/parsers/common.py,sha256=o3028wUnK78lOmFK0jtmpvx2Y1Jh_atBYBoO5VD-qJ4,6359
|
269
|
+
nucliadb/search/search/query_parser/parsers/find.py,sha256=pY0EihnZ2PuBds5IUDjvyRTI_f5VhcmdHpXhFApAuhw,12012
|
270
270
|
nucliadb/search/search/query_parser/parsers/graph.py,sha256=QJs-pybNXPsMSEkIHctb0Q0xQG-aArks8BtUxbJL5rU,9386
|
271
|
-
nucliadb/search/search/query_parser/parsers/search.py,sha256=
|
272
|
-
nucliadb/search/search/query_parser/parsers/unit_retrieval.py,sha256=
|
271
|
+
nucliadb/search/search/query_parser/parsers/search.py,sha256=tqfUd0r6F8JUKbcPfey6qRHcJFHC6xe6FdJWydVdn3E,10493
|
272
|
+
nucliadb/search/search/query_parser/parsers/unit_retrieval.py,sha256=mibkQyhGlICTgCtArBWv8aPONR9QMQMod5zV5V-Yr2A,10929
|
273
273
|
nucliadb/standalone/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
274
274
|
nucliadb/standalone/api_router.py,sha256=hgq9FXpihzgjHkwcVGfGCSwyXy67fqXTfLFHuINzIi0,5567
|
275
275
|
nucliadb/standalone/app.py,sha256=mAApNK_iVsQgJyd-mtwCeZq5csSimwnXmlQGH9a70pE,5586
|
@@ -296,9 +296,9 @@ nucliadb/tests/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,83
|
|
296
296
|
nucliadb/tests/config.py,sha256=JN_Jhgj-fwM9_8IeO9pwxr6C1PiwRDrXxm67Y38rU30,2080
|
297
297
|
nucliadb/tests/vectors.py,sha256=CcNKx-E8LPpyvRyljbmb-Tn_wST9Juw2CBoogWrKiTk,62843
|
298
298
|
nucliadb/train/__init__.py,sha256=NVwe5yULoHXb80itIJT8YJYEz2xbiOPQ7_OMys6XJw8,1301
|
299
|
-
nucliadb/train/app.py,sha256=
|
299
|
+
nucliadb/train/app.py,sha256=z6xlGVVVaJmZZmLPIVTgkjD-wIz5b0NYlXAQp7hBHYw,2652
|
300
300
|
nucliadb/train/generator.py,sha256=B3bdkH-IWiqib-ALpU_g8wFe0xOmdE0kEPxvki28yEU,4229
|
301
|
-
nucliadb/train/lifecycle.py,sha256=
|
301
|
+
nucliadb/train/lifecycle.py,sha256=3HadM4GRsYb2m-v4jtdr9C-KBEBx8GlrJDArPYi3SWQ,1960
|
302
302
|
nucliadb/train/models.py,sha256=BmgmMjDsu_1Ih5JDAqo6whhume90q0ASJcDP9dkMQm8,1198
|
303
303
|
nucliadb/train/nodes.py,sha256=HROQMRw2g5sJTnuBagh3B0id3iWonRJ68tg3skOme9k,5748
|
304
304
|
nucliadb/train/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -365,8 +365,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
|
|
365
365
|
nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
|
366
366
|
nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
|
367
367
|
nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
|
368
|
-
nucliadb-6.3.7.
|
369
|
-
nucliadb-6.3.7.
|
370
|
-
nucliadb-6.3.7.
|
371
|
-
nucliadb-6.3.7.
|
372
|
-
nucliadb-6.3.7.
|
368
|
+
nucliadb-6.3.7.post4114.dist-info/METADATA,sha256=Fc0VYLTAEVFwMzXgIJb-aIMwPwgmRXUTZo1EFnZeBrs,4226
|
369
|
+
nucliadb-6.3.7.post4114.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
|
370
|
+
nucliadb-6.3.7.post4114.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
|
371
|
+
nucliadb-6.3.7.post4114.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
|
372
|
+
nucliadb-6.3.7.post4114.dist-info/RECORD,,
|
File without changes
|
File without changes
|