nucliadb 6.2.0.post2675__py3-none-any.whl → 6.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0028_extracted_vectors_reference.py +61 -0
- migrations/0029_backfill_field_status.py +149 -0
- migrations/0030_label_deduplication.py +60 -0
- nucliadb/common/cluster/manager.py +41 -331
- nucliadb/common/cluster/rebalance.py +2 -2
- nucliadb/common/cluster/rollover.py +12 -71
- nucliadb/common/cluster/settings.py +3 -0
- nucliadb/common/cluster/standalone/utils.py +0 -43
- nucliadb/common/cluster/utils.py +0 -16
- nucliadb/common/counters.py +1 -0
- nucliadb/common/datamanagers/fields.py +48 -7
- nucliadb/common/datamanagers/vectorsets.py +11 -2
- nucliadb/common/external_index_providers/base.py +2 -1
- nucliadb/common/external_index_providers/pinecone.py +3 -5
- nucliadb/common/ids.py +18 -4
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +76 -37
- nucliadb/export_import/models.py +3 -3
- nucliadb/health.py +0 -7
- nucliadb/ingest/app.py +0 -8
- nucliadb/ingest/consumer/auditing.py +1 -1
- nucliadb/ingest/consumer/shard_creator.py +1 -1
- nucliadb/ingest/fields/base.py +83 -21
- nucliadb/ingest/orm/brain.py +55 -56
- nucliadb/ingest/orm/broker_message.py +12 -2
- nucliadb/ingest/orm/entities.py +6 -17
- nucliadb/ingest/orm/knowledgebox.py +44 -22
- nucliadb/ingest/orm/processor/data_augmentation.py +7 -29
- nucliadb/ingest/orm/processor/processor.py +5 -2
- nucliadb/ingest/orm/resource.py +222 -413
- nucliadb/ingest/processing.py +8 -2
- nucliadb/ingest/serialize.py +77 -46
- nucliadb/ingest/service/writer.py +2 -56
- nucliadb/ingest/settings.py +1 -4
- nucliadb/learning_proxy.py +6 -4
- nucliadb/purge/__init__.py +102 -12
- nucliadb/purge/orphan_shards.py +6 -4
- nucliadb/reader/api/models.py +3 -3
- nucliadb/reader/api/v1/__init__.py +1 -0
- nucliadb/reader/api/v1/download.py +2 -2
- nucliadb/reader/api/v1/knowledgebox.py +3 -3
- nucliadb/reader/api/v1/resource.py +23 -12
- nucliadb/reader/api/v1/services.py +4 -4
- nucliadb/reader/api/v1/vectorsets.py +48 -0
- nucliadb/search/api/v1/ask.py +11 -1
- nucliadb/search/api/v1/feedback.py +3 -3
- nucliadb/search/api/v1/knowledgebox.py +8 -13
- nucliadb/search/api/v1/search.py +3 -2
- nucliadb/search/api/v1/suggest.py +0 -2
- nucliadb/search/predict.py +6 -4
- nucliadb/search/requesters/utils.py +1 -2
- nucliadb/search/search/chat/ask.py +77 -13
- nucliadb/search/search/chat/prompt.py +16 -5
- nucliadb/search/search/chat/query.py +74 -34
- nucliadb/search/search/exceptions.py +2 -7
- nucliadb/search/search/find.py +9 -5
- nucliadb/search/search/find_merge.py +10 -4
- nucliadb/search/search/graph_strategy.py +884 -0
- nucliadb/search/search/hydrator.py +6 -0
- nucliadb/search/search/merge.py +79 -24
- nucliadb/search/search/query.py +74 -245
- nucliadb/search/search/query_parser/exceptions.py +11 -1
- nucliadb/search/search/query_parser/fetcher.py +405 -0
- nucliadb/search/search/query_parser/models.py +0 -3
- nucliadb/search/search/query_parser/parser.py +22 -21
- nucliadb/search/search/rerankers.py +1 -42
- nucliadb/search/search/shards.py +19 -0
- nucliadb/standalone/api_router.py +2 -14
- nucliadb/standalone/settings.py +4 -0
- nucliadb/train/generators/field_streaming.py +7 -3
- nucliadb/train/lifecycle.py +3 -6
- nucliadb/train/nodes.py +14 -12
- nucliadb/train/resource.py +380 -0
- nucliadb/writer/api/constants.py +20 -16
- nucliadb/writer/api/v1/__init__.py +1 -0
- nucliadb/writer/api/v1/export_import.py +1 -1
- nucliadb/writer/api/v1/field.py +13 -7
- nucliadb/writer/api/v1/knowledgebox.py +3 -46
- nucliadb/writer/api/v1/resource.py +20 -13
- nucliadb/writer/api/v1/services.py +10 -1
- nucliadb/writer/api/v1/upload.py +61 -34
- nucliadb/writer/{vectorsets.py → api/v1/vectorsets.py} +99 -47
- nucliadb/writer/back_pressure.py +17 -46
- nucliadb/writer/resource/basic.py +9 -7
- nucliadb/writer/resource/field.py +42 -9
- nucliadb/writer/settings.py +2 -2
- nucliadb/writer/tus/gcs.py +11 -10
- {nucliadb-6.2.0.post2675.dist-info → nucliadb-6.2.1.dist-info}/METADATA +11 -14
- {nucliadb-6.2.0.post2675.dist-info → nucliadb-6.2.1.dist-info}/RECORD +94 -96
- {nucliadb-6.2.0.post2675.dist-info → nucliadb-6.2.1.dist-info}/WHEEL +1 -1
- nucliadb/common/cluster/discovery/base.py +0 -178
- nucliadb/common/cluster/discovery/k8s.py +0 -301
- nucliadb/common/cluster/discovery/manual.py +0 -57
- nucliadb/common/cluster/discovery/single.py +0 -51
- nucliadb/common/cluster/discovery/types.py +0 -32
- nucliadb/common/cluster/discovery/utils.py +0 -67
- nucliadb/common/cluster/standalone/grpc_node_binding.py +0 -349
- nucliadb/common/cluster/standalone/index_node.py +0 -123
- nucliadb/common/cluster/standalone/service.py +0 -84
- nucliadb/standalone/introspect.py +0 -208
- nucliadb-6.2.0.post2675.dist-info/zip-safe +0 -1
- /nucliadb/common/{cluster/discovery → models_utils}/__init__.py +0 -0
- {nucliadb-6.2.0.post2675.dist-info → nucliadb-6.2.1.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.2.0.post2675.dist-info → nucliadb-6.2.1.dist-info}/top_level.txt +0 -0
@@ -27,31 +27,25 @@ import backoff
|
|
27
27
|
from nucliadb.common import datamanagers
|
28
28
|
from nucliadb.common.cluster.base import AbstractIndexNode
|
29
29
|
from nucliadb.common.cluster.exceptions import (
|
30
|
-
ExhaustedNodesError,
|
31
30
|
NodeClusterSmall,
|
32
31
|
NodeError,
|
33
32
|
NodesUnsync,
|
34
|
-
NoHealthyNodeAvailable,
|
35
33
|
ShardNotFound,
|
36
34
|
ShardsNotFound,
|
37
35
|
)
|
38
36
|
from nucliadb.common.maindb.driver import Transaction
|
39
|
-
from nucliadb.common.nidx import
|
37
|
+
from nucliadb.common.nidx import get_nidx, get_nidx_api_client, get_nidx_fake_node
|
40
38
|
from nucliadb_protos import (
|
41
39
|
knowledgebox_pb2,
|
42
|
-
nodereader_pb2,
|
43
40
|
noderesources_pb2,
|
44
41
|
nodewriter_pb2,
|
45
42
|
writer_pb2,
|
46
43
|
)
|
47
44
|
from nucliadb_protos.nodewriter_pb2 import IndexMessage, IndexMessageSource, NewShardRequest, TypeMessage
|
48
45
|
from nucliadb_telemetry import errors
|
49
|
-
from nucliadb_utils.utilities import
|
46
|
+
from nucliadb_utils.utilities import get_storage
|
50
47
|
|
51
|
-
from .index_node import IndexNode
|
52
48
|
from .settings import settings
|
53
|
-
from .standalone.index_node import ProxyStandaloneIndexNode
|
54
|
-
from .standalone.utils import get_self, get_standalone_node_id, is_index_node
|
55
49
|
|
56
50
|
logger = logging.getLogger(__name__)
|
57
51
|
|
@@ -60,67 +54,11 @@ READ_REPLICA_INDEX_NODES: dict[str, set[str]] = {}
|
|
60
54
|
|
61
55
|
|
62
56
|
def get_index_nodes(include_secondary: bool = False) -> list[AbstractIndexNode]:
|
63
|
-
|
64
|
-
if not include_secondary:
|
65
|
-
return [inode for inode in all_nodes if inode.primary_id is None]
|
66
|
-
return all_nodes
|
57
|
+
return [get_nidx_fake_node()]
|
67
58
|
|
68
59
|
|
69
60
|
def get_index_node(node_id: str) -> Optional[AbstractIndexNode]:
|
70
|
-
return
|
71
|
-
|
72
|
-
|
73
|
-
def clear_index_nodes():
|
74
|
-
INDEX_NODES.clear()
|
75
|
-
READ_REPLICA_INDEX_NODES.clear()
|
76
|
-
|
77
|
-
|
78
|
-
def get_read_replica_node_ids(node_id: str) -> list[str]:
|
79
|
-
return list(READ_REPLICA_INDEX_NODES.get(node_id, set()))
|
80
|
-
|
81
|
-
|
82
|
-
def add_index_node(
|
83
|
-
*,
|
84
|
-
id: str,
|
85
|
-
address: str,
|
86
|
-
shard_count: int,
|
87
|
-
available_disk: int,
|
88
|
-
dummy: bool = False,
|
89
|
-
primary_id: Optional[str] = None,
|
90
|
-
) -> AbstractIndexNode:
|
91
|
-
if settings.standalone_mode:
|
92
|
-
if is_index_node() and id == get_standalone_node_id():
|
93
|
-
node = get_self()
|
94
|
-
else:
|
95
|
-
node = ProxyStandaloneIndexNode(
|
96
|
-
id=id,
|
97
|
-
address=address,
|
98
|
-
shard_count=shard_count,
|
99
|
-
available_disk=available_disk,
|
100
|
-
dummy=dummy,
|
101
|
-
)
|
102
|
-
else:
|
103
|
-
node = IndexNode( # type: ignore
|
104
|
-
id=id,
|
105
|
-
address=address,
|
106
|
-
shard_count=shard_count,
|
107
|
-
available_disk=available_disk,
|
108
|
-
dummy=dummy,
|
109
|
-
primary_id=primary_id,
|
110
|
-
)
|
111
|
-
INDEX_NODES[id] = node
|
112
|
-
if primary_id is not None:
|
113
|
-
if primary_id not in READ_REPLICA_INDEX_NODES:
|
114
|
-
READ_REPLICA_INDEX_NODES[primary_id] = set()
|
115
|
-
READ_REPLICA_INDEX_NODES[primary_id].add(id)
|
116
|
-
return node
|
117
|
-
|
118
|
-
|
119
|
-
def remove_index_node(node_id: str, primary_id: Optional[str] = None) -> None:
|
120
|
-
INDEX_NODES.pop(node_id, None)
|
121
|
-
if primary_id is not None and primary_id in READ_REPLICA_INDEX_NODES:
|
122
|
-
if node_id in READ_REPLICA_INDEX_NODES[primary_id]:
|
123
|
-
READ_REPLICA_INDEX_NODES[primary_id].remove(node_id)
|
61
|
+
return get_nidx_fake_node()
|
124
62
|
|
125
63
|
|
126
64
|
class KBShardManager:
|
@@ -145,16 +83,13 @@ class KBShardManager:
|
|
145
83
|
aw: Callable[[AbstractIndexNode, str], Awaitable[Any]],
|
146
84
|
timeout: float,
|
147
85
|
*,
|
148
|
-
use_nidx: bool,
|
149
86
|
use_read_replica_nodes: bool = False,
|
150
87
|
) -> list[Any]:
|
151
88
|
shards = await self.get_shards_by_kbid(kbid)
|
152
89
|
ops = []
|
153
90
|
|
154
91
|
for shard_obj in shards:
|
155
|
-
node, shard_id = choose_node(
|
156
|
-
shard_obj, use_nidx=use_nidx, use_read_replica_nodes=use_read_replica_nodes
|
157
|
-
)
|
92
|
+
node, shard_id = choose_node(shard_obj, use_read_replica_nodes=use_read_replica_nodes)
|
158
93
|
if shard_id is None:
|
159
94
|
raise ShardNotFound("Found a node but not a shard")
|
160
95
|
|
@@ -169,6 +104,14 @@ class KBShardManager:
|
|
169
104
|
errors.capture_exception(exc)
|
170
105
|
raise NodeError("Node unavailable for operation") from exc
|
171
106
|
|
107
|
+
for result in results:
|
108
|
+
if isinstance(result, Exception):
|
109
|
+
errors.capture_exception(result)
|
110
|
+
raise NodeError(
|
111
|
+
f"Error while applying {aw.__name__} for all shards. Other similar errors may have been shadowed.\n"
|
112
|
+
f"{type(result).__name__}: {result}"
|
113
|
+
) from result
|
114
|
+
|
172
115
|
return results
|
173
116
|
|
174
117
|
# TODO: move to data manager
|
@@ -190,27 +133,12 @@ class KBShardManager:
|
|
190
133
|
txn: Transaction,
|
191
134
|
kbid: str,
|
192
135
|
) -> writer_pb2.ShardObject:
|
193
|
-
try:
|
194
|
-
check_enough_nodes()
|
195
|
-
except NodeClusterSmall as err:
|
196
|
-
errors.capture_exception(err)
|
197
|
-
logger.error(
|
198
|
-
f"Shard creation for kbid={kbid} failed: Replication requirements could not be met."
|
199
|
-
)
|
200
|
-
raise
|
201
|
-
|
202
136
|
kb_shards = await datamanagers.cluster.get_kb_shards(txn, kbid=kbid, for_update=True)
|
203
137
|
if kb_shards is None:
|
204
138
|
msg = ("Attempting to create a shard for a KB when it has no stored shards in maindb",)
|
205
139
|
logger.error(msg, extra={"kbid": kbid})
|
206
140
|
raise ShardsNotFound(msg)
|
207
141
|
|
208
|
-
existing_kb_nodes = [replica.node for shard in kb_shards.shards for replica in shard.replicas]
|
209
|
-
nodes = sorted_primary_nodes(
|
210
|
-
avoid_nodes=existing_kb_nodes,
|
211
|
-
ignore_nodes=settings.drain_nodes,
|
212
|
-
)
|
213
|
-
|
214
142
|
vectorsets = {
|
215
143
|
vectorset_id: vectorset_config.vectorset_index_config
|
216
144
|
async for vectorset_id, vectorset_config in datamanagers.vectorsets.iter(txn, kbid=kbid)
|
@@ -220,64 +148,14 @@ class KBShardManager:
|
|
220
148
|
|
221
149
|
shard = writer_pb2.ShardObject(shard=shard_uuid, read_only=False)
|
222
150
|
try:
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
raise ExhaustedNodesError()
|
232
|
-
|
233
|
-
node = get_index_node(node_id)
|
234
|
-
if node is None:
|
235
|
-
logger.error(f"Node {node_id} is not found or not available")
|
236
|
-
continue
|
237
|
-
|
238
|
-
try:
|
239
|
-
if not vectorsets:
|
240
|
-
# bw/c KBs without vectorsets
|
241
|
-
is_matryoshka = len(kb_shards.model.matryoshka_dimensions) > 0
|
242
|
-
vector_index_config = nodewriter_pb2.VectorIndexConfig(
|
243
|
-
similarity=kb_shards.similarity,
|
244
|
-
vector_type=nodewriter_pb2.VectorType.DENSE_F32,
|
245
|
-
vector_dimension=kb_shards.model.vector_dimension,
|
246
|
-
normalize_vectors=is_matryoshka,
|
247
|
-
)
|
248
|
-
|
249
|
-
shard_created = await node.new_shard(
|
250
|
-
kbid,
|
251
|
-
vector_index_config=vector_index_config,
|
252
|
-
)
|
253
|
-
|
254
|
-
else:
|
255
|
-
shard_created = await node.new_shard_with_vectorsets(
|
256
|
-
kbid,
|
257
|
-
vectorsets_configs=vectorsets,
|
258
|
-
)
|
259
|
-
|
260
|
-
except Exception as exc:
|
261
|
-
errors.capture_exception(exc)
|
262
|
-
logger.exception(
|
263
|
-
f"Error creating new shard for KB", extra={"kbid": kbid, "node_id": node}
|
264
|
-
)
|
265
|
-
continue
|
266
|
-
|
267
|
-
replica = writer_pb2.ShardReplica(node=str(node_id))
|
268
|
-
replica.shard.CopyFrom(shard_created)
|
269
|
-
shard.replicas.append(replica)
|
270
|
-
replicas_created += 1
|
271
|
-
|
272
|
-
nidx_api = get_nidx_api_client()
|
273
|
-
if nidx_api:
|
274
|
-
req = NewShardRequest(
|
275
|
-
kbid=kbid,
|
276
|
-
vectorsets_configs=vectorsets,
|
277
|
-
)
|
278
|
-
|
279
|
-
resp = await nidx_api.NewShard(req) # type: ignore
|
280
|
-
shard.nidx_shard_id = resp.id
|
151
|
+
nidx_api = get_nidx_api_client()
|
152
|
+
req = NewShardRequest(
|
153
|
+
kbid=kbid,
|
154
|
+
vectorsets_configs=vectorsets,
|
155
|
+
)
|
156
|
+
|
157
|
+
resp = await nidx_api.NewShard(req) # type: ignore
|
158
|
+
shard.nidx_shard_id = resp.id
|
281
159
|
|
282
160
|
except Exception as exc:
|
283
161
|
errors.capture_exception(exc)
|
@@ -300,43 +178,15 @@ class KBShardManager:
|
|
300
178
|
return shard
|
301
179
|
|
302
180
|
async def rollback_shard(self, shard: writer_pb2.ShardObject):
|
303
|
-
for shard_replica in shard.replicas:
|
304
|
-
node_id = shard_replica.node
|
305
|
-
replica_id = shard_replica.shard.id
|
306
|
-
node = get_index_node(node_id)
|
307
|
-
if node is not None:
|
308
|
-
try:
|
309
|
-
logger.info(
|
310
|
-
"Deleting shard replica",
|
311
|
-
extra={"shard": replica_id, "node": node_id},
|
312
|
-
)
|
313
|
-
await node.delete_shard(replica_id)
|
314
|
-
except Exception as rollback_error:
|
315
|
-
errors.capture_exception(rollback_error)
|
316
|
-
logger.error(
|
317
|
-
f"New shard rollback error. Node: {node_id} Shard: {replica_id}",
|
318
|
-
exc_info=True,
|
319
|
-
)
|
320
|
-
|
321
181
|
nidx_api = get_nidx_api_client()
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
)
|
331
|
-
|
332
|
-
def indexing_replicas(self, shard: writer_pb2.ShardObject) -> list[tuple[str, str]]:
|
333
|
-
"""
|
334
|
-
Returns the replica ids and nodes for the shard replicas
|
335
|
-
"""
|
336
|
-
result = []
|
337
|
-
for replica in shard.replicas:
|
338
|
-
result.append((replica.shard.id, replica.node))
|
339
|
-
return result
|
182
|
+
try:
|
183
|
+
await nidx_api.DeleteShard(noderesources_pb2.ShardId(id=shard.nidx_shard_id))
|
184
|
+
except Exception as rollback_error:
|
185
|
+
errors.capture_exception(rollback_error)
|
186
|
+
logger.error(
|
187
|
+
f"New shard rollback error. Nidx Shard: {shard.nidx_shard_id}",
|
188
|
+
exc_info=True,
|
189
|
+
)
|
340
190
|
|
341
191
|
async def delete_resource(
|
342
192
|
self,
|
@@ -346,29 +196,16 @@ class KBShardManager:
|
|
346
196
|
partition: str,
|
347
197
|
kb: str,
|
348
198
|
) -> None:
|
349
|
-
indexing = get_indexing()
|
350
199
|
storage = await get_storage()
|
351
200
|
nidx = get_nidx()
|
352
201
|
|
353
202
|
await storage.delete_indexing(resource_uid=uuid, txid=txid, kb=kb, logical_shard=shard.shard)
|
354
203
|
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
indexpb.resource = uuid
|
361
|
-
indexpb.typemessage = nodewriter_pb2.TypeMessage.DELETION
|
362
|
-
indexpb.partition = partition
|
363
|
-
indexpb.kbid = kb
|
364
|
-
await indexing.index(indexpb, node_id)
|
365
|
-
|
366
|
-
if nidx is not None and shard.nidx_shard_id:
|
367
|
-
nidxpb: nodewriter_pb2.IndexMessage = nodewriter_pb2.IndexMessage()
|
368
|
-
nidxpb.shard = shard.nidx_shard_id
|
369
|
-
nidxpb.resource = uuid
|
370
|
-
nidxpb.typemessage = nodewriter_pb2.TypeMessage.DELETION
|
371
|
-
await nidx.index(nidxpb)
|
204
|
+
nidxpb: nodewriter_pb2.IndexMessage = nodewriter_pb2.IndexMessage()
|
205
|
+
nidxpb.shard = shard.nidx_shard_id
|
206
|
+
nidxpb.resource = uuid
|
207
|
+
nidxpb.typemessage = nodewriter_pb2.TypeMessage.DELETION
|
208
|
+
await nidx.index(nidxpb)
|
372
209
|
|
373
210
|
async def add_resource(
|
374
211
|
self,
|
@@ -389,7 +226,6 @@ class KBShardManager:
|
|
389
226
|
reindex_id = uuid.uuid4().hex
|
390
227
|
|
391
228
|
storage = await get_storage()
|
392
|
-
indexing = get_indexing()
|
393
229
|
nidx = get_nidx()
|
394
230
|
indexpb = IndexMessage()
|
395
231
|
|
@@ -412,14 +248,8 @@ class KBShardManager:
|
|
412
248
|
indexpb.source = source
|
413
249
|
indexpb.resource = resource.resource.uuid
|
414
250
|
|
415
|
-
|
416
|
-
|
417
|
-
indexpb.shard = replica_id
|
418
|
-
await indexing.index(indexpb, node_id)
|
419
|
-
|
420
|
-
if nidx is not None and shard.nidx_shard_id:
|
421
|
-
indexpb.shard = shard.nidx_shard_id
|
422
|
-
await nidx.index(indexpb)
|
251
|
+
indexpb.shard = shard.nidx_shard_id
|
252
|
+
await nidx.index(indexpb)
|
423
253
|
|
424
254
|
def should_create_new_shard(self, num_paragraphs: int) -> bool:
|
425
255
|
return num_paragraphs > settings.max_shard_paragraphs
|
@@ -451,12 +281,8 @@ class KBShardManager:
|
|
451
281
|
)
|
452
282
|
|
453
283
|
await self.apply_for_all_shards(
|
454
|
-
kbid, _create_vectorset, timeout=10,
|
284
|
+
kbid, _create_vectorset, timeout=10, use_read_replica_nodes=False
|
455
285
|
)
|
456
|
-
if NIDX_ENABLED:
|
457
|
-
await self.apply_for_all_shards(
|
458
|
-
kbid, _create_vectorset, timeout=10, use_nidx=True, use_read_replica_nodes=False
|
459
|
-
)
|
460
286
|
|
461
287
|
async def delete_vectorset(self, kbid: str, vectorset_id: str):
|
462
288
|
"""Delete a vectorset from all KB shards"""
|
@@ -469,12 +295,8 @@ class KBShardManager:
|
|
469
295
|
)
|
470
296
|
|
471
297
|
await self.apply_for_all_shards(
|
472
|
-
kbid, _delete_vectorset, timeout=10,
|
298
|
+
kbid, _delete_vectorset, timeout=10, use_read_replica_nodes=False
|
473
299
|
)
|
474
|
-
if NIDX_ENABLED:
|
475
|
-
await self.apply_for_all_shards(
|
476
|
-
kbid, _delete_vectorset, timeout=10, use_nidx=True, use_read_replica_nodes=False
|
477
|
-
)
|
478
300
|
|
479
301
|
|
480
302
|
class StandaloneKBShardManager(KBShardManager):
|
@@ -485,27 +307,6 @@ class StandaloneKBShardManager(KBShardManager):
|
|
485
307
|
self._lock = asyncio.Lock()
|
486
308
|
self._change_count: dict[tuple[str, str], int] = {}
|
487
309
|
|
488
|
-
async def _resource_change_event(self, kbid: str, node_id: str, shard_id: str) -> None:
|
489
|
-
if (node_id, shard_id) not in self._change_count:
|
490
|
-
self._change_count[(node_id, shard_id)] = 0
|
491
|
-
self._change_count[(node_id, shard_id)] += 1
|
492
|
-
if self._change_count[(node_id, shard_id)] < self.max_ops_before_checks:
|
493
|
-
return
|
494
|
-
|
495
|
-
self._change_count[(node_id, shard_id)] = 0
|
496
|
-
async with self._lock:
|
497
|
-
index_node: Optional[ProxyStandaloneIndexNode] = get_index_node(node_id) # type: ignore
|
498
|
-
if index_node is None:
|
499
|
-
return
|
500
|
-
shard_info: noderesources_pb2.Shard = await index_node.reader.GetShard(
|
501
|
-
nodereader_pb2.GetShardRequest(shard_id=noderesources_pb2.ShardId(id=shard_id))
|
502
|
-
)
|
503
|
-
await self.maybe_create_new_shard(
|
504
|
-
kbid,
|
505
|
-
shard_info.paragraphs,
|
506
|
-
)
|
507
|
-
await index_node.writer.GC(noderesources_pb2.ShardId(id=shard_id))
|
508
|
-
|
509
310
|
@backoff.on_exception(backoff.expo, NodesUnsync, jitter=backoff.random_jitter, max_tries=5)
|
510
311
|
async def delete_resource(
|
511
312
|
self,
|
@@ -518,16 +319,6 @@ class StandaloneKBShardManager(KBShardManager):
|
|
518
319
|
req = noderesources_pb2.ResourceID()
|
519
320
|
req.uuid = uuid
|
520
321
|
|
521
|
-
for shardreplica in shard.replicas:
|
522
|
-
req.shard_id = shardreplica.shard.id
|
523
|
-
index_node = get_index_node(shardreplica.node)
|
524
|
-
if index_node is None: # pragma: no cover
|
525
|
-
raise NodesUnsync(f"Node {shardreplica.node} is not found or not available")
|
526
|
-
await index_node.writer.RemoveResource(req) # type: ignore
|
527
|
-
asyncio.create_task(
|
528
|
-
self._resource_change_event(kb, shardreplica.node, shardreplica.shard.id)
|
529
|
-
)
|
530
|
-
|
531
322
|
nidx = get_nidx()
|
532
323
|
if nidx is not None and shard.nidx_shard_id:
|
533
324
|
indexpb: nodewriter_pb2.IndexMessage = nodewriter_pb2.IndexMessage()
|
@@ -551,16 +342,6 @@ class StandaloneKBShardManager(KBShardManager):
|
|
551
342
|
Calls the node writer's SetResource method directly to store the resource in the node.
|
552
343
|
There is no queuing for standalone nodes at the moment -- indexing is done synchronously.
|
553
344
|
"""
|
554
|
-
index_node = None
|
555
|
-
for shardreplica in shard.replicas:
|
556
|
-
resource.shard_id = resource.resource.shard_id = shardreplica.shard.id
|
557
|
-
index_node = get_index_node(shardreplica.node)
|
558
|
-
if index_node is None: # pragma: no cover
|
559
|
-
raise NodesUnsync(f"Node {shardreplica.node} is not found or not available")
|
560
|
-
await index_node.writer.SetResource(resource) # type: ignore
|
561
|
-
asyncio.create_task(
|
562
|
-
self._resource_change_event(kb, shardreplica.node, shardreplica.shard.id)
|
563
|
-
)
|
564
345
|
|
565
346
|
nidx = get_nidx()
|
566
347
|
if nidx is not None and shard.nidx_shard_id:
|
@@ -587,89 +368,18 @@ class StandaloneKBShardManager(KBShardManager):
|
|
587
368
|
pass
|
588
369
|
|
589
370
|
|
590
|
-
def get_all_shard_nodes(
|
591
|
-
shard: writer_pb2.ShardObject,
|
592
|
-
*,
|
593
|
-
use_read_replicas: bool,
|
594
|
-
) -> list[tuple[AbstractIndexNode, str]]:
|
595
|
-
"""Return a list of all nodes containing `shard` with the shard replica id.
|
596
|
-
If `use_read_replicas`, read replica nodes will be returned too.
|
597
|
-
|
598
|
-
"""
|
599
|
-
nodes = []
|
600
|
-
for shard_replica_pb in shard.replicas:
|
601
|
-
node_id = shard_replica_pb.node
|
602
|
-
shard_replica_id = shard_replica_pb.shard.id
|
603
|
-
|
604
|
-
node = get_index_node(node_id)
|
605
|
-
if node is not None:
|
606
|
-
nodes.append((node, shard_replica_id))
|
607
|
-
|
608
|
-
if use_read_replicas:
|
609
|
-
for read_replica_node_id in get_read_replica_node_ids(node_id):
|
610
|
-
read_replica_node = get_index_node(read_replica_node_id)
|
611
|
-
if read_replica_node is not None:
|
612
|
-
nodes.append((read_replica_node, shard_replica_id))
|
613
|
-
|
614
|
-
return nodes
|
615
|
-
|
616
|
-
|
617
371
|
def choose_node(
|
618
372
|
shard: writer_pb2.ShardObject,
|
619
373
|
*,
|
620
|
-
use_nidx: bool,
|
621
374
|
target_shard_replicas: Optional[list[str]] = None,
|
622
375
|
use_read_replica_nodes: bool = False,
|
623
376
|
) -> tuple[AbstractIndexNode, str]:
|
624
|
-
|
625
|
-
|
626
|
-
- when enabled, read replica nodes are preferred over primaries
|
627
|
-
- if there's more than one option with the same score, a random choice will
|
628
|
-
be made between them.
|
629
|
-
|
630
|
-
According to these rules and considering we use read replica nodes, a read
|
631
|
-
replica node containing a shard replica from `target_shard_replicas` is the
|
632
|
-
most preferent, while a primary node with a shard not in
|
633
|
-
`target_shard_replicas` is the least preferent.
|
634
|
-
|
635
|
-
"""
|
636
|
-
|
637
|
-
# Use nidx if requested and enabled, fallback to node
|
638
|
-
if shard.nidx_shard_id and use_nidx:
|
639
|
-
fake_node = get_nidx_fake_node()
|
640
|
-
if fake_node:
|
641
|
-
return fake_node, shard.nidx_shard_id
|
642
|
-
|
643
|
-
target_shard_replicas = target_shard_replicas or []
|
644
|
-
|
645
|
-
shard_nodes = get_all_shard_nodes(shard, use_read_replicas=use_read_replica_nodes)
|
646
|
-
|
647
|
-
if len(shard_nodes) == 0:
|
648
|
-
raise NoHealthyNodeAvailable("Could not find a node to query")
|
649
|
-
|
650
|
-
# Ranking values
|
651
|
-
IN_TARGET_SHARD_REPLICAS = 0b10
|
652
|
-
IS_READ_REPLICA_NODE = 0b01
|
653
|
-
|
654
|
-
ranked_nodes: dict[int, list[tuple[AbstractIndexNode, str]]] = {}
|
655
|
-
for node, shard_replica_id in shard_nodes:
|
656
|
-
score = 0
|
657
|
-
if shard_replica_id in target_shard_replicas:
|
658
|
-
score |= IN_TARGET_SHARD_REPLICAS
|
659
|
-
if node.is_read_replica():
|
660
|
-
score |= IS_READ_REPLICA_NODE
|
661
|
-
|
662
|
-
ranked_nodes.setdefault(score, []).append((node, shard_replica_id))
|
663
|
-
|
664
|
-
top = ranked_nodes[max(ranked_nodes)]
|
665
|
-
# As shard replica ids are random numbers, we sort by shard replica id and choose its
|
666
|
-
# node to make sure we choose in deterministically but we don't favour any node in particular
|
667
|
-
top.sort(key=lambda x: x[1])
|
668
|
-
selected_node, shard_replica_id = top[0]
|
669
|
-
return selected_node, shard_replica_id
|
377
|
+
fake_node = get_nidx_fake_node()
|
378
|
+
return fake_node, shard.nidx_shard_id
|
670
379
|
|
671
380
|
|
672
381
|
def check_enough_nodes():
|
382
|
+
return True
|
673
383
|
"""
|
674
384
|
It raises an exception if it can't find enough nodes for the configured replicas.
|
675
385
|
"""
|
@@ -52,7 +52,7 @@ async def get_shards_paragraphs(kbid: str) -> list[tuple[str, int]]:
|
|
52
52
|
results = {}
|
53
53
|
for shard_meta in kb_shards.shards:
|
54
54
|
# Rebalance using node as source of truth. But it will rebalance nidx
|
55
|
-
node, shard_id = choose_node(shard_meta
|
55
|
+
node, shard_id = choose_node(shard_meta)
|
56
56
|
shard_data: nodereader_pb2.Shard = await node.reader.GetShard(
|
57
57
|
nodereader_pb2.GetShardRequest(shard_id=noderesources_pb2.ShardId(id=shard_id)) # type: ignore
|
58
58
|
)
|
@@ -102,7 +102,7 @@ async def move_set_of_kb_resources(
|
|
102
102
|
from_shard = [s for s in kb_shards.shards if s.shard == from_shard_id][0]
|
103
103
|
to_shard = [s for s in kb_shards.shards if s.shard == to_shard_id][0]
|
104
104
|
|
105
|
-
from_node, from_shard_replica_id = choose_node(from_shard
|
105
|
+
from_node, from_shard_replica_id = choose_node(from_shard)
|
106
106
|
search_response: nodereader_pb2.SearchResponse = await from_node.reader.Search( # type: ignore
|
107
107
|
nodereader_pb2.SearchRequest(
|
108
108
|
shard=from_shard_replica_id,
|
@@ -24,7 +24,6 @@ from datetime import datetime
|
|
24
24
|
from typing import Optional
|
25
25
|
|
26
26
|
from nucliadb.common import datamanagers, locking
|
27
|
-
from nucliadb.common.cluster import manager as cluster_manager
|
28
27
|
from nucliadb.common.context import ApplicationContext
|
29
28
|
from nucliadb.common.datamanagers.rollover import RolloverState, RolloverStateNotFoundError
|
30
29
|
from nucliadb.common.external_index_providers.base import ExternalIndexManager
|
@@ -32,11 +31,9 @@ from nucliadb.common.external_index_providers.manager import (
|
|
32
31
|
get_external_index_manager,
|
33
32
|
)
|
34
33
|
from nucliadb.common.nidx import get_nidx_fake_node
|
35
|
-
from nucliadb_protos import
|
34
|
+
from nucliadb_protos import writer_pb2
|
36
35
|
from nucliadb_telemetry import errors
|
37
36
|
|
38
|
-
from .manager import get_index_node
|
39
|
-
from .settings import settings
|
40
37
|
from .utils import (
|
41
38
|
delete_resource_from_shard,
|
42
39
|
get_resource,
|
@@ -138,63 +135,19 @@ async def create_rollover_shards(
|
|
138
135
|
# create new shards
|
139
136
|
created_shards = []
|
140
137
|
try:
|
141
|
-
nodes = cluster_manager.sorted_primary_nodes(ignore_nodes=drain_nodes)
|
142
138
|
for shard in kb_shards.shards:
|
143
139
|
shard.ClearField("replicas")
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
continue
|
156
|
-
|
157
|
-
vectorsets = {
|
158
|
-
vectorset_id: vectorset_config.vectorset_index_config
|
159
|
-
async for vectorset_id, vectorset_config in datamanagers.vectorsets.iter(
|
160
|
-
txn, kbid=kbid
|
161
|
-
)
|
162
|
-
}
|
163
|
-
try:
|
164
|
-
if not vectorsets:
|
165
|
-
is_matryoshka = len(kb_shards.model.matryoshka_dimensions) > 0
|
166
|
-
vector_index_config = nodewriter_pb2.VectorIndexConfig(
|
167
|
-
similarity=kb_shards.similarity,
|
168
|
-
vector_type=nodewriter_pb2.VectorType.DENSE_F32,
|
169
|
-
vector_dimension=kb_shards.model.vector_dimension,
|
170
|
-
normalize_vectors=is_matryoshka,
|
171
|
-
)
|
172
|
-
shard_created = await node.new_shard(
|
173
|
-
kbid,
|
174
|
-
vector_index_config=vector_index_config,
|
175
|
-
)
|
176
|
-
else:
|
177
|
-
shard_created = await node.new_shard_with_vectorsets(
|
178
|
-
kbid,
|
179
|
-
vectorsets_configs=vectorsets,
|
180
|
-
)
|
181
|
-
except Exception as e:
|
182
|
-
errors.capture_exception(e)
|
183
|
-
logger.exception(f"Error creating new shard at {node}")
|
184
|
-
continue
|
185
|
-
|
186
|
-
replica = writer_pb2.ShardReplica(node=str(node_id))
|
187
|
-
replica.shard.CopyFrom(shard_created)
|
188
|
-
shard.replicas.append(replica)
|
189
|
-
created_shards.append(shard)
|
190
|
-
replicas_created += 1
|
191
|
-
|
192
|
-
if nidx_node:
|
193
|
-
nidx_shard = await nidx_node.new_shard_with_vectorsets(
|
194
|
-
kbid,
|
195
|
-
vectorsets_configs=vectorsets,
|
196
|
-
)
|
197
|
-
shard.nidx_shard_id = nidx_shard.id
|
140
|
+
vectorsets = {
|
141
|
+
vectorset_id: vectorset_config.vectorset_index_config
|
142
|
+
async for vectorset_id, vectorset_config in datamanagers.vectorsets.iter(txn, kbid=kbid)
|
143
|
+
}
|
144
|
+
|
145
|
+
nidx_shard = await nidx_node.new_shard_with_vectorsets(
|
146
|
+
kbid,
|
147
|
+
vectorsets_configs=vectorsets,
|
148
|
+
)
|
149
|
+
shard.nidx_shard_id = nidx_shard.id
|
150
|
+
created_shards.append(shard)
|
198
151
|
|
199
152
|
except Exception as e:
|
200
153
|
errors.capture_exception(e)
|
@@ -621,16 +574,6 @@ async def clean_rollover_status(app_context: ApplicationContext, kbid: str) -> N
|
|
621
574
|
await txn.commit()
|
622
575
|
|
623
576
|
|
624
|
-
async def wait_for_cluster_ready() -> None:
|
625
|
-
node_ready_checks = 0
|
626
|
-
while len(cluster_manager.INDEX_NODES) == 0:
|
627
|
-
if node_ready_checks > 10:
|
628
|
-
raise Exception("No index nodes available")
|
629
|
-
logger.info("Waiting for index nodes to be available")
|
630
|
-
await asyncio.sleep(1)
|
631
|
-
node_ready_checks += 1
|
632
|
-
|
633
|
-
|
634
577
|
async def rollover_kb_index(
|
635
578
|
app_context: ApplicationContext, kbid: str, drain_nodes: Optional[list[str]] = None
|
636
579
|
) -> None:
|
@@ -654,8 +597,6 @@ async def rollover_kb_index(
|
|
654
597
|
- Validate that all resources are in the new kb index
|
655
598
|
- Clean up indexed data
|
656
599
|
"""
|
657
|
-
await wait_for_cluster_ready()
|
658
|
-
|
659
600
|
extra = {"kbid": kbid, "external_index_provider": None}
|
660
601
|
external = await get_external_index_manager(kbid, for_rollover=True)
|
661
602
|
if external is not None:
|
@@ -90,6 +90,9 @@ class Settings(BaseSettings):
|
|
90
90
|
nidx_searcher_address: Optional[str] = Field(
|
91
91
|
default=None, description="NIDX gRPC searcher API address"
|
92
92
|
)
|
93
|
+
nidx_indexer_address: Optional[str] = Field(
|
94
|
+
default=None, description="NIDX gRPC indexer API address"
|
95
|
+
)
|
93
96
|
|
94
97
|
|
95
98
|
settings = Settings()
|