nucliadb 6.4.0.post4127__py3-none-any.whl → 6.4.0.post4132__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. nucliadb/common/cluster/grpc_node_dummy.py +1 -18
  2. nucliadb/common/cluster/manager.py +26 -21
  3. nucliadb/common/cluster/rebalance.py +7 -7
  4. nucliadb/common/cluster/rollover.py +12 -5
  5. nucliadb/common/nidx.py +0 -44
  6. nucliadb/ingest/consumer/auditing.py +5 -5
  7. nucliadb/ingest/consumer/shard_creator.py +5 -4
  8. nucliadb/ingest/orm/entities.py +4 -5
  9. nucliadb/metrics_exporter.py +0 -19
  10. nucliadb/purge/orphan_shards.py +17 -14
  11. nucliadb/search/api/v1/knowledgebox.py +6 -14
  12. nucliadb/search/api/v1/resource/search.py +2 -5
  13. nucliadb/search/api/v1/search.py +2 -6
  14. nucliadb/search/api/v1/suggest.py +1 -2
  15. nucliadb/search/requesters/utils.py +14 -33
  16. nucliadb/search/search/find.py +2 -8
  17. nucliadb/search/search/shards.py +9 -25
  18. nucliadb/train/generator.py +9 -11
  19. nucliadb/train/generators/field_classifier.py +3 -5
  20. nucliadb/train/generators/field_streaming.py +3 -5
  21. nucliadb/train/generators/image_classifier.py +1 -4
  22. nucliadb/train/generators/paragraph_classifier.py +3 -5
  23. nucliadb/train/generators/paragraph_streaming.py +3 -5
  24. nucliadb/train/generators/question_answer_streaming.py +3 -5
  25. nucliadb/train/generators/sentence_classifier.py +3 -5
  26. nucliadb/train/generators/token_classifier.py +3 -5
  27. nucliadb/train/nodes.py +2 -4
  28. {nucliadb-6.4.0.post4127.dist-info → nucliadb-6.4.0.post4132.dist-info}/METADATA +6 -6
  29. {nucliadb-6.4.0.post4127.dist-info → nucliadb-6.4.0.post4132.dist-info}/RECORD +32 -33
  30. nucliadb/common/cluster/base.py +0 -146
  31. {nucliadb-6.4.0.post4127.dist-info → nucliadb-6.4.0.post4132.dist-info}/WHEEL +0 -0
  32. {nucliadb-6.4.0.post4127.dist-info → nucliadb-6.4.0.post4132.dist-info}/entry_points.txt +0 -0
  33. {nucliadb-6.4.0.post4127.dist-info → nucliadb-6.4.0.post4132.dist-info}/top_level.txt +0 -0
@@ -19,22 +19,15 @@
19
19
  #
20
20
  from typing import Any
21
21
 
22
- from nidx_protos.nodereader_pb2 import (
23
- EdgeList,
24
- RelationEdge,
25
- )
22
+ from nidx_protos.noderesources_pb2 import Shard as NodeResourcesShard
26
23
  from nidx_protos.noderesources_pb2 import (
27
- EmptyResponse,
28
24
  ShardCreated,
29
25
  ShardId,
30
26
  ShardIds,
31
27
  VectorSetList,
32
28
  )
33
- from nidx_protos.noderesources_pb2 import Shard as NodeResourcesShard
34
29
  from nidx_protos.nodewriter_pb2 import OpStatus
35
30
 
36
- from nucliadb_protos.utils_pb2 import Relation
37
-
38
31
 
39
32
  class DummyWriterStub: # pragma: no cover
40
33
  def __init__(self: "DummyWriterStub"):
@@ -77,10 +70,6 @@ class DummyWriterStub: # pragma: no cover
77
70
  result.vectorsets.append("base")
78
71
  return result
79
72
 
80
- async def GC(self, request: ShardId) -> EmptyResponse: # pragma: no cover
81
- self.calls.setdefault("GC", []).append(request)
82
- return EmptyResponse()
83
-
84
73
 
85
74
  class DummyReaderStub: # pragma: no cover
86
75
  def __init__(self: "DummyReaderStub"):
@@ -89,9 +78,3 @@ class DummyReaderStub: # pragma: no cover
89
78
  async def GetShard(self, data): # pragma: no cover
90
79
  self.calls.setdefault("GetShard", []).append(data)
91
80
  return NodeResourcesShard(shard_id="shard", fields=2, paragraphs=2, sentences=2)
92
-
93
- async def RelationEdges(self, data): # pragma: no cover
94
- self.calls.setdefault("RelationEdges", []).append(data)
95
- result = EdgeList()
96
- result.list.append(RelationEdge(edge_type=Relation.RelationType.ENTITY, property="dummy"))
97
- return result
@@ -23,17 +23,21 @@ import uuid
23
23
  from typing import Any, Awaitable, Callable, Optional
24
24
 
25
25
  from nidx_protos import noderesources_pb2, nodewriter_pb2
26
- from nidx_protos.nodewriter_pb2 import IndexMessage, IndexMessageSource, NewShardRequest, TypeMessage
26
+ from nidx_protos.nodewriter_pb2 import (
27
+ IndexMessage,
28
+ IndexMessageSource,
29
+ NewShardRequest,
30
+ NewVectorSetRequest,
31
+ TypeMessage,
32
+ )
27
33
 
28
34
  from nucliadb.common import datamanagers
29
- from nucliadb.common.cluster.base import AbstractIndexNode
30
35
  from nucliadb.common.cluster.exceptions import (
31
36
  NodeError,
32
- ShardNotFound,
33
37
  ShardsNotFound,
34
38
  )
35
39
  from nucliadb.common.maindb.driver import Transaction
36
- from nucliadb.common.nidx import get_nidx, get_nidx_api_client, get_nidx_fake_node
40
+ from nucliadb.common.nidx import get_nidx, get_nidx_api_client
37
41
  from nucliadb.common.vector_index_config import nucliadb_index_config_to_nidx
38
42
  from nucliadb_protos import knowledgebox_pb2, writer_pb2
39
43
  from nucliadb_telemetry import errors
@@ -63,18 +67,14 @@ class KBShardManager:
63
67
  async def apply_for_all_shards(
64
68
  self,
65
69
  kbid: str,
66
- aw: Callable[[AbstractIndexNode, str], Awaitable[Any]],
70
+ aw: Callable[[str], Awaitable[Any]],
67
71
  timeout: float,
68
72
  ) -> list[Any]:
69
73
  shards = await self.get_shards_by_kbid(kbid)
70
74
  ops = []
71
75
 
72
76
  for shard_obj in shards:
73
- node, shard_id = choose_node(shard_obj)
74
- if shard_id is None:
75
- raise ShardNotFound("Found a node but not a shard")
76
-
77
- ops.append(aw(node, shard_id))
77
+ ops.append(aw(shard_obj.nidx_shard_id))
78
78
 
79
79
  try:
80
80
  results = await asyncio.wait_for(
@@ -252,10 +252,18 @@ class KBShardManager:
252
252
  async def create_vectorset(self, kbid: str, config: knowledgebox_pb2.VectorSetConfig):
253
253
  """Create a new vectorset in all KB shards."""
254
254
 
255
- async def _create_vectorset(node: AbstractIndexNode, shard_id: str):
255
+ async def _create_vectorset(shard_id: str):
256
256
  vectorset_id = config.vectorset_id
257
257
  index_config = nucliadb_index_config_to_nidx(config.vectorset_index_config)
258
- result = await node.add_vectorset(shard_id, vectorset_id, index_config)
258
+
259
+ req = NewVectorSetRequest(
260
+ id=noderesources_pb2.VectorSetID(
261
+ shard=noderesources_pb2.ShardId(id=shard_id), vectorset=vectorset_id
262
+ ),
263
+ config=index_config,
264
+ )
265
+
266
+ result = await get_nidx_api_client().AddVectorSet(req)
259
267
  if result.status != result.Status.OK:
260
268
  raise NodeError(
261
269
  f"Unable to create vectorset {vectorset_id} in kb {kbid} shard {shard_id}"
@@ -266,8 +274,12 @@ class KBShardManager:
266
274
  async def delete_vectorset(self, kbid: str, vectorset_id: str):
267
275
  """Delete a vectorset from all KB shards"""
268
276
 
269
- async def _delete_vectorset(node: AbstractIndexNode, shard_id: str):
270
- result = await node.remove_vectorset(shard_id, vectorset_id)
277
+ async def _delete_vectorset(shard_id: str):
278
+ req = noderesources_pb2.VectorSetID()
279
+ req.shard.id = shard_id
280
+ req.vectorset = vectorset_id
281
+
282
+ result = await get_nidx_api_client().RemoveVectorSet(req)
271
283
  if result.status != result.Status.OK:
272
284
  raise NodeError(
273
285
  f"Unable to delete vectorset {vectorset_id} in kb {kbid} shard {shard_id}"
@@ -341,10 +353,3 @@ class StandaloneKBShardManager(KBShardManager):
341
353
  await storage.delete_upload(storage_key, storage.indexing_bucket)
342
354
  except Exception:
343
355
  pass
344
-
345
-
346
- def choose_node(
347
- shard: writer_pb2.ShardObject,
348
- ) -> tuple[AbstractIndexNode, str]:
349
- fake_node = get_nidx_fake_node()
350
- return fake_node, shard.nidx_shard_id
@@ -23,9 +23,9 @@ import logging
23
23
  from nidx_protos import nodereader_pb2, noderesources_pb2
24
24
 
25
25
  from nucliadb.common import datamanagers, locking
26
- from nucliadb.common.cluster.manager import choose_node
27
26
  from nucliadb.common.cluster.utils import get_shard_manager
28
27
  from nucliadb.common.context import ApplicationContext
28
+ from nucliadb.common.nidx import get_nidx_api_client, get_nidx_searcher_client
29
29
  from nucliadb_telemetry import errors
30
30
  from nucliadb_telemetry.logs import setup_logging
31
31
  from nucliadb_telemetry.utils import setup_telemetry
@@ -51,9 +51,10 @@ async def get_shards_paragraphs(kbid: str) -> list[tuple[str, int]]:
51
51
  results = {}
52
52
  for shard_meta in kb_shards.shards:
53
53
  # Rebalance using node as source of truth. But it will rebalance nidx
54
- node, shard_id = choose_node(shard_meta)
55
- shard_data: nodereader_pb2.Shard = await node.reader.GetShard(
56
- nodereader_pb2.GetShardRequest(shard_id=noderesources_pb2.ShardId(id=shard_id)) # type: ignore
54
+ shard_data: nodereader_pb2.Shard = await get_nidx_api_client().GetShard(
55
+ nodereader_pb2.GetShardRequest(
56
+ shard_id=noderesources_pb2.ShardId(id=shard_meta.nidx_shard_id)
57
+ ) # type: ignore
57
58
  )
58
59
  results[shard_meta.shard] = shard_data.paragraphs
59
60
 
@@ -101,16 +102,15 @@ async def move_set_of_kb_resources(
101
102
  from_shard = [s for s in kb_shards.shards if s.shard == from_shard_id][0]
102
103
  to_shard = [s for s in kb_shards.shards if s.shard == to_shard_id][0]
103
104
 
104
- from_node, from_shard_replica_id = choose_node(from_shard)
105
105
  request = nodereader_pb2.SearchRequest(
106
- shard=from_shard_replica_id,
106
+ shard=from_shard.nidx_shard_id,
107
107
  paragraph=False,
108
108
  document=True,
109
109
  result_per_page=count,
110
110
  )
111
111
  request.field_filter.field.field_type = "a"
112
112
  request.field_filter.field.field_id = "title"
113
- search_response: nodereader_pb2.SearchResponse = await from_node.reader.Search(request) # type: ignore
113
+ search_response: nodereader_pb2.SearchResponse = await get_nidx_searcher_client().Search(request)
114
114
 
115
115
  for result in search_response.document.results:
116
116
  resource_id = result.uuid
@@ -23,6 +23,10 @@ import logging
23
23
  from datetime import datetime
24
24
  from typing import Optional
25
25
 
26
+ from nidx_protos.nodewriter_pb2 import (
27
+ NewShardRequest,
28
+ )
29
+
26
30
  from nucliadb.common import datamanagers, locking
27
31
  from nucliadb.common.context import ApplicationContext
28
32
  from nucliadb.common.datamanagers.rollover import RolloverState, RolloverStateNotFoundError
@@ -30,10 +34,10 @@ from nucliadb.common.external_index_providers.base import ExternalIndexManager
30
34
  from nucliadb.common.external_index_providers.manager import (
31
35
  get_external_index_manager,
32
36
  )
33
- from nucliadb.common.nidx import get_nidx_fake_node
37
+ from nucliadb.common.nidx import get_nidx_api_client
34
38
  from nucliadb.common.vector_index_config import nucliadb_index_config_to_nidx
35
39
  from nucliadb.migrator.settings import settings
36
- from nucliadb_protos import writer_pb2
40
+ from nucliadb_protos import utils_pb2, writer_pb2
37
41
  from nucliadb_telemetry import errors
38
42
 
39
43
  from .utils import (
@@ -109,7 +113,6 @@ async def create_rollover_shards(
109
113
 
110
114
  logger.info("Creating rollover shards", extra={"kbid": kbid})
111
115
  sm = app_context.shard_manager
112
- nidx_node = get_nidx_fake_node()
113
116
 
114
117
  async with datamanagers.with_ro_transaction() as txn:
115
118
  try:
@@ -143,10 +146,14 @@ async def create_rollover_shards(
143
146
  async for vectorset_id, vectorset_config in datamanagers.vectorsets.iter(txn, kbid=kbid)
144
147
  }
145
148
 
146
- nidx_shard = await nidx_node.new_shard_with_vectorsets(
147
- kbid,
149
+ req = NewShardRequest(
150
+ kbid=kbid,
151
+ release_channel=utils_pb2.ReleaseChannel.STABLE,
148
152
  vectorsets_configs=vectorsets,
149
153
  )
154
+
155
+ nidx_shard = await get_nidx_api_client().NewShard(req)
156
+
150
157
  shard.nidx_shard_id = nidx_shard.id
151
158
  created_shards.append(shard)
152
159
 
nucliadb/common/nidx.py CHANGED
@@ -26,7 +26,6 @@ from nidx_protos.nodewriter_pb2 import (
26
26
  IndexMessage,
27
27
  )
28
28
 
29
- from nucliadb.common.cluster.base import AbstractIndexNode
30
29
  from nucliadb.common.cluster.settings import settings
31
30
  from nucliadb.ingest.settings import DriverConfig
32
31
  from nucliadb.ingest.settings import settings as ingest_settings
@@ -244,46 +243,3 @@ def get_nidx_searcher_client() -> "NidxSearcherStub":
244
243
  return nidx.searcher_client
245
244
  else:
246
245
  raise Exception("nidx not initialized")
247
-
248
-
249
- # TODO: Remove the index node abstraction
250
- class NodeNidxAdapter:
251
- def __init__(self, api_client, searcher_client):
252
- # API methods
253
- self.GetShard = api_client.GetShard
254
- self.NewShard = api_client.NewShard
255
- self.DeleteShard = api_client.DeleteShard
256
- self.ListShards = api_client.ListShards
257
- self.AddVectorSet = api_client.AddVectorSet
258
- self.RemoveVectorSet = api_client.RemoveVectorSet
259
- self.ListVectorSets = api_client.ListVectorSets
260
- self.GetMetadata = api_client.GetMetadata
261
-
262
- # Searcher methods
263
- self.Search = searcher_client.Search
264
- self.Suggest = searcher_client.Suggest
265
- self.GraphSearch = searcher_client.GraphSearch
266
- self.Paragraphs = searcher_client.Paragraphs
267
- self.Documents = searcher_client.Documents
268
-
269
-
270
- class FakeNode(AbstractIndexNode):
271
- def __init__(self, api_client, searcher_client):
272
- self.client = NodeNidxAdapter(api_client, searcher_client)
273
-
274
- @property
275
- def reader(self):
276
- return self.client
277
-
278
- @property
279
- def writer(self):
280
- return self.client
281
-
282
- @property
283
- def id(self):
284
- return "nidx"
285
-
286
-
287
- def get_nidx_fake_node() -> FakeNode:
288
- nidx = get_nidx()
289
- return FakeNode(nidx.api_client, nidx.searcher_client)
@@ -27,9 +27,9 @@ from nidx_protos import nodereader_pb2, noderesources_pb2
27
27
 
28
28
  from nucliadb.common import datamanagers
29
29
  from nucliadb.common.cluster.exceptions import ShardsNotFound
30
- from nucliadb.common.cluster.manager import choose_node
31
30
  from nucliadb.common.cluster.utils import get_shard_manager
32
31
  from nucliadb.common.constants import AVG_PARAGRAPH_SIZE_BYTES
32
+ from nucliadb.common.nidx import get_nidx_api_client
33
33
  from nucliadb_protos import audit_pb2, writer_pb2
34
34
  from nucliadb_utils import const
35
35
  from nucliadb_utils.audit.audit import AuditStorage
@@ -114,10 +114,10 @@ class IndexAuditHandler:
114
114
  total_paragraphs = 0
115
115
 
116
116
  for shard_obj in shard_groups:
117
- # TODO: Uses node for auditing, don't want to suddenly change metrics
118
- node, shard_id = choose_node(shard_obj)
119
- shard: nodereader_pb2.Shard = await node.reader.GetShard(
120
- nodereader_pb2.GetShardRequest(shard_id=noderesources_pb2.ShardId(id=shard_id)) # type: ignore
117
+ shard: nodereader_pb2.Shard = await get_nidx_api_client().GetShard(
118
+ nodereader_pb2.GetShardRequest(
119
+ shard_id=noderesources_pb2.ShardId(id=shard_obj.nidx_shard_id)
120
+ )
121
121
  )
122
122
 
123
123
  total_fields += shard.fields
@@ -25,9 +25,9 @@ from functools import partial
25
25
  from nidx_protos import nodereader_pb2, noderesources_pb2
26
26
 
27
27
  from nucliadb.common import locking
28
- from nucliadb.common.cluster.manager import choose_node
29
28
  from nucliadb.common.cluster.utils import get_shard_manager
30
29
  from nucliadb.common.maindb.driver import Driver
30
+ from nucliadb.common.nidx import get_nidx_api_client
31
31
  from nucliadb_protos import writer_pb2
32
32
  from nucliadb_utils import const
33
33
  from nucliadb_utils.cache.pubsub import PubSubDriver
@@ -105,8 +105,9 @@ class ShardCreatorHandler:
105
105
  async with locking.distributed_lock(locking.NEW_SHARD_LOCK.format(kbid=kbid)):
106
106
  # remember, a lock will do at least 1+ reads and 1 write.
107
107
  # with heavy writes, this adds some simple k/v pressure
108
- node, shard_id = choose_node(current_shard)
109
- shard: nodereader_pb2.Shard = await node.reader.GetShard(
110
- nodereader_pb2.GetShardRequest(shard_id=noderesources_pb2.ShardId(id=shard_id)) # type: ignore
108
+ shard: nodereader_pb2.Shard = await get_nidx_api_client().GetShard(
109
+ nodereader_pb2.GetShardRequest(
110
+ shard_id=noderesources_pb2.ShardId(id=current_shard.nidx_shard_id)
111
+ ) # type: ignore
111
112
  )
112
113
  await self.shard_manager.maybe_create_new_shard(kbid, shard.paragraphs)
@@ -30,7 +30,6 @@ from nidx_protos.nodereader_pb2 import (
30
30
  )
31
31
 
32
32
  from nucliadb.common import datamanagers
33
- from nucliadb.common.cluster.base import AbstractIndexNode
34
33
  from nucliadb.common.cluster.exceptions import (
35
34
  AlreadyExists,
36
35
  EntitiesGroupNotFound,
@@ -203,7 +202,7 @@ class EntitiesManager:
203
202
  async def get_indexed_entities_group(self, group: str) -> Optional[EntitiesGroup]:
204
203
  shard_manager = get_shard_manager()
205
204
 
206
- async def do_entities_search(node: AbstractIndexNode, shard_id: str) -> GraphSearchResponse:
205
+ async def do_entities_search(shard_id: str) -> GraphSearchResponse:
207
206
  request = GraphSearchRequest()
208
207
  # XXX: this is a wild guess. Are those enough or too many?
209
208
  request.top_k = 500
@@ -211,7 +210,7 @@ class EntitiesManager:
211
210
  request.query.path.path.source.node_type = RelationNode.NodeType.ENTITY
212
211
  request.query.path.path.source.node_subtype = group
213
212
  request.query.path.path.undirected = True
214
- response = await graph_search_shard(node, shard_id, request)
213
+ response = await graph_search_shard(shard_id, request)
215
214
  return response
216
215
 
217
216
  results = await shard_manager.apply_for_all_shards(
@@ -293,7 +292,7 @@ class EntitiesManager:
293
292
  ) -> set[str]:
294
293
  shard_manager = get_shard_manager()
295
294
 
296
- async def query_indexed_entities_group_names(node: AbstractIndexNode, shard_id: str) -> set[str]:
295
+ async def query_indexed_entities_group_names(shard_id: str) -> set[str]:
297
296
  """Search all relation types"""
298
297
  request = SearchRequest(
299
298
  shard=shard_id,
@@ -303,7 +302,7 @@ class EntitiesManager:
303
302
  paragraph=False,
304
303
  faceted=Faceted(labels=["/e"]),
305
304
  )
306
- response: SearchResponse = await query_shard(node, shard_id, request)
305
+ response: SearchResponse = await query_shard(shard_id, request)
307
306
  try:
308
307
  facetresults = response.document.facets["/e"].facetresults
309
308
  except KeyError:
@@ -22,40 +22,22 @@ from __future__ import annotations
22
22
  import asyncio
23
23
  from typing import AsyncGenerator, Callable, Tuple, cast
24
24
 
25
- from nidx_protos.noderesources_pb2 import EmptyQuery, NodeMetadata
26
-
27
25
  from nucliadb import logger
28
26
  from nucliadb.common import datamanagers
29
27
  from nucliadb.common.context import ApplicationContext
30
28
  from nucliadb.common.maindb.pg import PGDriver
31
29
  from nucliadb.common.maindb.utils import get_driver
32
- from nucliadb.common.nidx import get_nidx_api_client
33
30
  from nucliadb.migrator.datamanager import MigrationsDataManager
34
31
  from nucliadb_telemetry import metrics
35
32
  from nucliadb_telemetry.logs import setup_logging
36
33
  from nucliadb_telemetry.utils import setup_telemetry
37
34
  from nucliadb_utils.fastapi.run import serve_metrics
38
35
 
39
- SHARD_COUNT = metrics.Gauge("nucliadb_node_shard_count", labels={"node": ""})
40
-
41
36
  MIGRATION_COUNT = metrics.Gauge("nucliadb_migration", labels={"type": "", "version": ""})
42
37
 
43
38
  PENDING_RESOURCE_COUNT = metrics.Gauge("nucliadb_pending_resources_count")
44
39
 
45
40
 
46
- async def update_node_metrics(context: ApplicationContext):
47
- """
48
- Report the number of shards in each node.
49
- """
50
- # Clear previoulsy set values so that we report only the current state
51
- SHARD_COUNT.gauge.clear()
52
-
53
- nidx_api = get_nidx_api_client()
54
- metadata: NodeMetadata = await nidx_api.GetMetadata(EmptyQuery())
55
-
56
- SHARD_COUNT.set(metadata.shard_count, labels={"node": "nidx"})
57
-
58
-
59
41
  async def iter_kbids(context: ApplicationContext) -> AsyncGenerator[str, None]:
60
42
  """
61
43
  Return a list of all KB ids.
@@ -127,7 +109,6 @@ async def run_exporter(context: ApplicationContext):
127
109
  # Schedule exporter tasks
128
110
  tasks = []
129
111
  for export_task, interval in [
130
- (update_node_metrics, 10),
131
112
  (update_migration_metrics, 60 * 3),
132
113
  (update_resource_metrics, 60 * 5),
133
114
  ]:
@@ -23,14 +23,17 @@ import importlib.metadata
23
23
  from typing import Optional
24
24
 
25
25
  from grpc.aio import AioRpcError
26
+ from nidx_protos import nodereader_pb2, noderesources_pb2
26
27
 
27
28
  from nucliadb.common import datamanagers
28
- from nucliadb.common.cluster import manager
29
- from nucliadb.common.cluster.base import AbstractIndexNode
30
29
  from nucliadb.common.cluster.utils import setup_cluster, teardown_cluster
31
30
  from nucliadb.common.maindb.driver import Driver
32
31
  from nucliadb.common.maindb.utils import setup_driver, teardown_driver
33
- from nucliadb.common.nidx import start_nidx_utility, stop_nidx_utility
32
+ from nucliadb.common.nidx import (
33
+ get_nidx_api_client,
34
+ start_nidx_utility,
35
+ stop_nidx_utility,
36
+ )
34
37
  from nucliadb.ingest import logger
35
38
  from nucliadb_telemetry import errors
36
39
  from nucliadb_telemetry.logs import setup_logging
@@ -69,9 +72,8 @@ async def detect_orphan_shards(driver: Driver) -> dict[str, ShardKb]:
69
72
 
70
73
  orphan_shard_ids = indexed_shards.keys() - stored_shards.keys()
71
74
  orphan_shards: dict[str, ShardKb] = {}
72
- node = manager.get_nidx_fake_node()
73
75
  for shard_id in orphan_shard_ids:
74
- kbid = await _get_kbid(node, shard_id) or UNKNOWN_KB
76
+ kbid = await _get_kbid(shard_id) or UNKNOWN_KB
75
77
  # Shards with knwon KB ids can be checked and ignore those comming from
76
78
  # an ongoing migration/rollover (ongoing or finished)
77
79
  if kbid != UNKNOWN_KB:
@@ -84,15 +86,15 @@ async def detect_orphan_shards(driver: Driver) -> dict[str, ShardKb]:
84
86
  orphan_shards[shard_id] = kbid
85
87
 
86
88
  for shard_id in orphan_shard_ids:
87
- kbid = await _get_kbid(node, shard_id) or UNKNOWN_KB
89
+ kbid = await _get_kbid(shard_id) or UNKNOWN_KB
88
90
  orphan_shards[shard_id] = kbid
89
91
  return orphan_shards
90
92
 
91
93
 
92
94
  async def _get_indexed_shards() -> dict[str, ShardKb]:
93
- nidx = manager.get_nidx_fake_node()
94
- shards = await nidx.list_shards()
95
- return {shard_id: UNKNOWN_KB for shard_id in shards}
95
+ shards = await get_nidx_api_client().ListShards(noderesources_pb2.EmptyQuery())
96
+
97
+ return {shard.id: UNKNOWN_KB for shard in shards.ids}
96
98
 
97
99
 
98
100
  async def _get_stored_shards(driver: Driver) -> dict[str, ShardKb]:
@@ -111,16 +113,17 @@ async def _get_stored_shards(driver: Driver) -> dict[str, ShardKb]:
111
113
  return stored_shards
112
114
 
113
115
 
114
- async def _get_kbid(node: AbstractIndexNode, shard_id: str) -> Optional[str]:
116
+ async def _get_kbid(shard_id: str) -> Optional[str]:
115
117
  kbid = None
116
118
  try:
117
- shard_pb = await node.get_shard(shard_id)
119
+ req = nodereader_pb2.GetShardRequest()
120
+ req.shard_id.id = shard_id
121
+ shard_pb = await get_nidx_api_client().GetShard(req)
118
122
  except AioRpcError as grpc_error:
119
123
  logger.error(
120
124
  "Can't get shard while looking for orphans in nidx, is there something broken?",
121
125
  exc_info=grpc_error,
122
126
  extra={
123
- "node_id": node.id,
124
127
  "shard_id": shard_id,
125
128
  },
126
129
  )
@@ -156,7 +159,6 @@ async def purge_orphan_shards(driver: Driver):
156
159
  orphan_shards = await detect_orphan_shards(driver)
157
160
  logger.info(f"Found {len(orphan_shards)} orphan shards. Purge starts...")
158
161
 
159
- node = manager.get_nidx_fake_node()
160
162
  for shard_id, kbid in orphan_shards.items():
161
163
  logger.info(
162
164
  "Deleting orphan shard from index node",
@@ -165,7 +167,8 @@ async def purge_orphan_shards(driver: Driver):
165
167
  "kbid": kbid,
166
168
  },
167
169
  )
168
- await node.delete_shard(shard_id)
170
+ req = noderesources_pb2.ShardId(id=shard_id)
171
+ await get_nidx_api_client().DeleteShard(req)
169
172
 
170
173
 
171
174
  def parse_arguments():
@@ -28,7 +28,6 @@ from nidx_protos.noderesources_pb2 import Shard
28
28
 
29
29
  from nucliadb.common import datamanagers
30
30
  from nucliadb.common.cluster.exceptions import ShardsNotFound
31
- from nucliadb.common.cluster.manager import choose_node
32
31
  from nucliadb.common.cluster.utils import get_shard_manager
33
32
  from nucliadb.common.constants import AVG_PARAGRAPH_SIZE_BYTES
34
33
  from nucliadb.common.counters import IndexCounts
@@ -164,19 +163,12 @@ async def get_node_index_counts(kbid: str) -> tuple[IndexCounts, list[str]]:
164
163
  ops = []
165
164
  queried_shards = []
166
165
  for shard_object in shard_groups:
167
- try:
168
- node, shard_id = choose_node(shard_object)
169
- except KeyError:
170
- raise HTTPException(
171
- status_code=500,
172
- detail="Couldn't retrieve counters right now, node not found",
173
- )
174
- else:
175
- if shard_id is not None:
176
- # At least one node is alive for this shard group
177
- # let's add it ot the query list if has a valid value
178
- ops.append(get_shard(node, shard_id))
179
- queried_shards.append(shard_id)
166
+ shard_id = shard_object.nidx_shard_id
167
+ if shard_id is not None:
168
+ # At least one node is alive for this shard group
169
+ # let's add it ot the query list if has a valid value
170
+ ops.append(get_shard(shard_id))
171
+ queried_shards.append(shard_id)
180
172
 
181
173
  if not ops:
182
174
  logger.info(f"No node found for any of this resources shards {kbid}")
@@ -27,7 +27,7 @@ from pydantic import ValidationError
27
27
  from nucliadb.models.responses import HTTPClientError
28
28
  from nucliadb.search.api.v1.router import KB_PREFIX, RESOURCE_PREFIX, api
29
29
  from nucliadb.search.api.v1.utils import fastapi_query
30
- from nucliadb.search.requesters.utils import Method, debug_nodes_info, node_query
30
+ from nucliadb.search.requesters.utils import Method, node_query
31
31
  from nucliadb.search.search import cache
32
32
  from nucliadb.search.search.exceptions import InvalidQueryError
33
33
  from nucliadb.search.search.merge import merge_paragraphs_results
@@ -110,7 +110,7 @@ async def resource_search(
110
110
  detail = json.loads(exc.json())
111
111
  return HTTPClientError(status_code=422, detail=detail)
112
112
 
113
- results, incomplete_results, queried_nodes = await node_query(kbid, Method.SEARCH, pb_query)
113
+ results, incomplete_results, queried_shards = await node_query(kbid, Method.SEARCH, pb_query)
114
114
 
115
115
  # We need to merge
116
116
  search_results = await merge_paragraphs_results(
@@ -122,9 +122,6 @@ async def resource_search(
122
122
  )
123
123
 
124
124
  response.status_code = 206 if incomplete_results else 200
125
- if debug:
126
- search_results.nodes = debug_nodes_info(queried_nodes)
127
125
 
128
- queried_shards = [shard_id for _, shard_id in queried_nodes]
129
126
  search_results.shards = queried_shards
130
127
  return search_results
@@ -32,7 +32,7 @@ from nucliadb.models.responses import HTTPClientError
32
32
  from nucliadb.search import predict
33
33
  from nucliadb.search.api.v1.router import KB_PREFIX, api
34
34
  from nucliadb.search.api.v1.utils import fastapi_query
35
- from nucliadb.search.requesters.utils import Method, debug_nodes_info, node_query
35
+ from nucliadb.search.requesters.utils import Method, node_query
36
36
  from nucliadb.search.search import cache
37
37
  from nucliadb.search.search.exceptions import InvalidQueryError
38
38
  from nucliadb.search.search.merge import merge_results
@@ -265,7 +265,7 @@ async def search(
265
265
  pb_query, incomplete_results, autofilters, _ = await legacy_convert_retrieval_to_proto(parsed)
266
266
 
267
267
  # We need to query all nodes
268
- results, query_incomplete_results, queried_nodes = await node_query(kbid, Method.SEARCH, pb_query)
268
+ results, query_incomplete_results, queried_shards = await node_query(kbid, Method.SEARCH, pb_query)
269
269
  incomplete_results = incomplete_results or query_incomplete_results
270
270
 
271
271
  # We need to merge
@@ -290,10 +290,6 @@ async def search(
290
290
  len(search_results.resources),
291
291
  )
292
292
 
293
- if item.debug:
294
- search_results.nodes = debug_nodes_info(queried_nodes)
295
-
296
- queried_shards = [shard_id for _, shard_id in queried_nodes]
297
293
  search_results.shards = queried_shards
298
294
  search_results.autofilters = autofilters
299
295
  return search_results, incomplete_results
@@ -160,7 +160,7 @@ async def suggest(
160
160
  range_modification_end,
161
161
  hidden,
162
162
  )
163
- results, incomplete_results, queried_nodes = await node_query(kbid, Method.SUGGEST, pb_query)
163
+ results, incomplete_results, queried_shards = await node_query(kbid, Method.SUGGEST, pb_query)
164
164
 
165
165
  # We need to merge
166
166
  search_results = await merge_suggest_results(
@@ -171,7 +171,6 @@ async def suggest(
171
171
 
172
172
  response.status_code = 206 if incomplete_results else 200
173
173
 
174
- queried_shards = [shard_id for _, shard_id in queried_nodes]
175
174
  if debug and queried_shards:
176
175
  search_results.shards = queried_shards
177
176