PyPI - nucliadb - Versions diffs - 6.4.0.post4127__py3-none-any.whl → 6.4.0.post4132__py3-none-any.whl - Mend

nucliadb 6.4.0.post4127py3-none-any.whl → 6.4.0.post4132py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

nucliadb/common/cluster/grpc_node_dummy.py +1 -18
nucliadb/common/cluster/manager.py +26 -21
nucliadb/common/cluster/rebalance.py +7 -7
nucliadb/common/cluster/rollover.py +12 -5
nucliadb/common/nidx.py +0 -44
nucliadb/ingest/consumer/auditing.py +5 -5
nucliadb/ingest/consumer/shard_creator.py +5 -4
nucliadb/ingest/orm/entities.py +4 -5
nucliadb/metrics_exporter.py +0 -19
nucliadb/purge/orphan_shards.py +17 -14
nucliadb/search/api/v1/knowledgebox.py +6 -14
nucliadb/search/api/v1/resource/search.py +2 -5
nucliadb/search/api/v1/search.py +2 -6
nucliadb/search/api/v1/suggest.py +1 -2
nucliadb/search/requesters/utils.py +14 -33
nucliadb/search/search/find.py +2 -8
nucliadb/search/search/shards.py +9 -25
nucliadb/train/generator.py +9 -11
nucliadb/train/generators/field_classifier.py +3 -5
nucliadb/train/generators/field_streaming.py +3 -5
nucliadb/train/generators/image_classifier.py +1 -4
nucliadb/train/generators/paragraph_classifier.py +3 -5
nucliadb/train/generators/paragraph_streaming.py +3 -5
nucliadb/train/generators/question_answer_streaming.py +3 -5
nucliadb/train/generators/sentence_classifier.py +3 -5
nucliadb/train/generators/token_classifier.py +3 -5
nucliadb/train/nodes.py +2 -4
{nucliadb-6.4.0.post4127.dist-info → nucliadb-6.4.0.post4132.dist-info}/METADATA +6 -6
{nucliadb-6.4.0.post4127.dist-info → nucliadb-6.4.0.post4132.dist-info}/RECORD +32 -33
nucliadb/common/cluster/base.py +0 -146
{nucliadb-6.4.0.post4127.dist-info → nucliadb-6.4.0.post4132.dist-info}/WHEEL +0 -0
{nucliadb-6.4.0.post4127.dist-info → nucliadb-6.4.0.post4132.dist-info}/entry_points.txt +0 -0
{nucliadb-6.4.0.post4127.dist-info → nucliadb-6.4.0.post4132.dist-info}/top_level.txt +0 -0

nucliadb/common/cluster/grpc_node_dummy.py CHANGED Viewed

@@ -19,22 +19,15 @@
 #
 from typing import Any
-from nidx_protos.nodereader_pb2 import (
-    EdgeList,
-    RelationEdge,
-)
+from nidx_protos.noderesources_pb2 import Shard as NodeResourcesShard
 from nidx_protos.noderesources_pb2 import (
-    EmptyResponse,
     ShardCreated,
     ShardId,
     ShardIds,
     VectorSetList,
 )
-from nidx_protos.noderesources_pb2 import Shard as NodeResourcesShard
 from nidx_protos.nodewriter_pb2 import OpStatus
-from nucliadb_protos.utils_pb2 import Relation
 class DummyWriterStub:  # pragma: no cover
     def __init__(self: "DummyWriterStub"):
@@ -77,10 +70,6 @@ class DummyWriterStub:  # pragma: no cover
         result.vectorsets.append("base")
         return result
-    async def GC(self, request: ShardId) -> EmptyResponse:  # pragma: no cover
-        self.calls.setdefault("GC", []).append(request)
-        return EmptyResponse()
 class DummyReaderStub:  # pragma: no cover
     def __init__(self: "DummyReaderStub"):
@@ -89,9 +78,3 @@ class DummyReaderStub:  # pragma: no cover
     async def GetShard(self, data):  # pragma: no cover
         self.calls.setdefault("GetShard", []).append(data)
         return NodeResourcesShard(shard_id="shard", fields=2, paragraphs=2, sentences=2)
-    async def RelationEdges(self, data):  # pragma: no cover
-        self.calls.setdefault("RelationEdges", []).append(data)
-        result = EdgeList()
-        result.list.append(RelationEdge(edge_type=Relation.RelationType.ENTITY, property="dummy"))
-        return result

nucliadb/common/cluster/manager.py CHANGED Viewed

@@ -23,17 +23,21 @@ import uuid
 from typing import Any, Awaitable, Callable, Optional
 from nidx_protos import noderesources_pb2, nodewriter_pb2
-from nidx_protos.nodewriter_pb2 import IndexMessage, IndexMessageSource, NewShardRequest, TypeMessage
+from nidx_protos.nodewriter_pb2 import (
+    IndexMessage,
+    IndexMessageSource,
+    NewShardRequest,
+    NewVectorSetRequest,
+    TypeMessage,
+)
 from nucliadb.common import datamanagers
-from nucliadb.common.cluster.base import AbstractIndexNode
 from nucliadb.common.cluster.exceptions import (
     NodeError,
-    ShardNotFound,
     ShardsNotFound,
 )
 from nucliadb.common.maindb.driver import Transaction
-from nucliadb.common.nidx import get_nidx, get_nidx_api_client, get_nidx_fake_node
+from nucliadb.common.nidx import get_nidx, get_nidx_api_client
 from nucliadb.common.vector_index_config import nucliadb_index_config_to_nidx
 from nucliadb_protos import knowledgebox_pb2, writer_pb2
 from nucliadb_telemetry import errors
@@ -63,18 +67,14 @@ class KBShardManager:
     async def apply_for_all_shards(
         self,
         kbid: str,
-        aw: Callable[[AbstractIndexNode, str], Awaitable[Any]],
+        aw: Callable[[str], Awaitable[Any]],
         timeout: float,
     ) -> list[Any]:
         shards = await self.get_shards_by_kbid(kbid)
         ops = []
         for shard_obj in shards:
-            node, shard_id = choose_node(shard_obj)
-            if shard_id is None:
-                raise ShardNotFound("Found a node but not a shard")
-            ops.append(aw(node, shard_id))
+            ops.append(aw(shard_obj.nidx_shard_id))
         try:
             results = await asyncio.wait_for(
@@ -252,10 +252,18 @@ class KBShardManager:
     async def create_vectorset(self, kbid: str, config: knowledgebox_pb2.VectorSetConfig):
         """Create a new vectorset in all KB shards."""
-        async def _create_vectorset(node: AbstractIndexNode, shard_id: str):
+        async def _create_vectorset(shard_id: str):
             vectorset_id = config.vectorset_id
             index_config = nucliadb_index_config_to_nidx(config.vectorset_index_config)
-            result = await node.add_vectorset(shard_id, vectorset_id, index_config)
+            req = NewVectorSetRequest(
+                id=noderesources_pb2.VectorSetID(
+                    shard=noderesources_pb2.ShardId(id=shard_id), vectorset=vectorset_id
+                ),
+                config=index_config,
+            )
+            result = await get_nidx_api_client().AddVectorSet(req)
             if result.status != result.Status.OK:
                 raise NodeError(
                     f"Unable to create vectorset {vectorset_id} in kb {kbid} shard {shard_id}"
@@ -266,8 +274,12 @@ class KBShardManager:
     async def delete_vectorset(self, kbid: str, vectorset_id: str):
         """Delete a vectorset from all KB shards"""
-        async def _delete_vectorset(node: AbstractIndexNode, shard_id: str):
-            result = await node.remove_vectorset(shard_id, vectorset_id)
+        async def _delete_vectorset(shard_id: str):
+            req = noderesources_pb2.VectorSetID()
+            req.shard.id = shard_id
+            req.vectorset = vectorset_id
+            result = await get_nidx_api_client().RemoveVectorSet(req)
             if result.status != result.Status.OK:
                 raise NodeError(
                     f"Unable to delete vectorset {vectorset_id} in kb {kbid} shard {shard_id}"
@@ -341,10 +353,3 @@ class StandaloneKBShardManager(KBShardManager):
                     await storage.delete_upload(storage_key, storage.indexing_bucket)
             except Exception:
                 pass
-def choose_node(
-    shard: writer_pb2.ShardObject,
-) -> tuple[AbstractIndexNode, str]:
-    fake_node = get_nidx_fake_node()
-    return fake_node, shard.nidx_shard_id

nucliadb/common/cluster/rebalance.py CHANGED Viewed

@@ -23,9 +23,9 @@ import logging
 from nidx_protos import nodereader_pb2, noderesources_pb2
 from nucliadb.common import datamanagers, locking
-from nucliadb.common.cluster.manager import choose_node
 from nucliadb.common.cluster.utils import get_shard_manager
 from nucliadb.common.context import ApplicationContext
+from nucliadb.common.nidx import get_nidx_api_client, get_nidx_searcher_client
 from nucliadb_telemetry import errors
 from nucliadb_telemetry.logs import setup_logging
 from nucliadb_telemetry.utils import setup_telemetry
@@ -51,9 +51,10 @@ async def get_shards_paragraphs(kbid: str) -> list[tuple[str, int]]:
     results = {}
     for shard_meta in kb_shards.shards:
         # Rebalance using node as source of truth. But it will rebalance nidx
-        node, shard_id = choose_node(shard_meta)
-        shard_data: nodereader_pb2.Shard = await node.reader.GetShard(
-            nodereader_pb2.GetShardRequest(shard_id=noderesources_pb2.ShardId(id=shard_id))  # type: ignore
+        shard_data: nodereader_pb2.Shard = await get_nidx_api_client().GetShard(
+            nodereader_pb2.GetShardRequest(
+                shard_id=noderesources_pb2.ShardId(id=shard_meta.nidx_shard_id)
+            )  # type: ignore
         )
         results[shard_meta.shard] = shard_data.paragraphs
@@ -101,16 +102,15 @@ async def move_set_of_kb_resources(
     from_shard = [s for s in kb_shards.shards if s.shard == from_shard_id][0]
     to_shard = [s for s in kb_shards.shards if s.shard == to_shard_id][0]
-    from_node, from_shard_replica_id = choose_node(from_shard)
     request = nodereader_pb2.SearchRequest(
-        shard=from_shard_replica_id,
+        shard=from_shard.nidx_shard_id,
         paragraph=False,
         document=True,
         result_per_page=count,
     )
     request.field_filter.field.field_type = "a"
     request.field_filter.field.field_id = "title"
-    search_response: nodereader_pb2.SearchResponse = await from_node.reader.Search(request)  # type: ignore
+    search_response: nodereader_pb2.SearchResponse = await get_nidx_searcher_client().Search(request)
     for result in search_response.document.results:
         resource_id = result.uuid

nucliadb/common/cluster/rollover.py CHANGED Viewed

@@ -23,6 +23,10 @@ import logging
 from datetime import datetime
 from typing import Optional
+from nidx_protos.nodewriter_pb2 import (
+    NewShardRequest,
+)
 from nucliadb.common import datamanagers, locking
 from nucliadb.common.context import ApplicationContext
 from nucliadb.common.datamanagers.rollover import RolloverState, RolloverStateNotFoundError
@@ -30,10 +34,10 @@ from nucliadb.common.external_index_providers.base import ExternalIndexManager
 from nucliadb.common.external_index_providers.manager import (
     get_external_index_manager,
 )
-from nucliadb.common.nidx import get_nidx_fake_node
+from nucliadb.common.nidx import get_nidx_api_client
 from nucliadb.common.vector_index_config import nucliadb_index_config_to_nidx
 from nucliadb.migrator.settings import settings
-from nucliadb_protos import writer_pb2
+from nucliadb_protos import utils_pb2, writer_pb2
 from nucliadb_telemetry import errors
 from .utils import (
@@ -109,7 +113,6 @@ async def create_rollover_shards(
     logger.info("Creating rollover shards", extra={"kbid": kbid})
     sm = app_context.shard_manager
-    nidx_node = get_nidx_fake_node()
     async with datamanagers.with_ro_transaction() as txn:
         try:
@@ -143,10 +146,14 @@ async def create_rollover_shards(
                 async for vectorset_id, vectorset_config in datamanagers.vectorsets.iter(txn, kbid=kbid)
             }
-            nidx_shard = await nidx_node.new_shard_with_vectorsets(
-                kbid,
+            req = NewShardRequest(
+                kbid=kbid,
+                release_channel=utils_pb2.ReleaseChannel.STABLE,
                 vectorsets_configs=vectorsets,
             )
+            nidx_shard = await get_nidx_api_client().NewShard(req)
             shard.nidx_shard_id = nidx_shard.id
             created_shards.append(shard)

nucliadb/common/nidx.py CHANGED Viewed

@@ -26,7 +26,6 @@ from nidx_protos.nodewriter_pb2 import (
     IndexMessage,
 )
-from nucliadb.common.cluster.base import AbstractIndexNode
 from nucliadb.common.cluster.settings import settings
 from nucliadb.ingest.settings import DriverConfig
 from nucliadb.ingest.settings import settings as ingest_settings
@@ -244,46 +243,3 @@ def get_nidx_searcher_client() -> "NidxSearcherStub":
         return nidx.searcher_client
     else:
         raise Exception("nidx not initialized")
-# TODO: Remove the index node abstraction
-class NodeNidxAdapter:
-    def __init__(self, api_client, searcher_client):
-        # API methods
-        self.GetShard = api_client.GetShard
-        self.NewShard = api_client.NewShard
-        self.DeleteShard = api_client.DeleteShard
-        self.ListShards = api_client.ListShards
-        self.AddVectorSet = api_client.AddVectorSet
-        self.RemoveVectorSet = api_client.RemoveVectorSet
-        self.ListVectorSets = api_client.ListVectorSets
-        self.GetMetadata = api_client.GetMetadata
-        # Searcher methods
-        self.Search = searcher_client.Search
-        self.Suggest = searcher_client.Suggest
-        self.GraphSearch = searcher_client.GraphSearch
-        self.Paragraphs = searcher_client.Paragraphs
-        self.Documents = searcher_client.Documents
-class FakeNode(AbstractIndexNode):
-    def __init__(self, api_client, searcher_client):
-        self.client = NodeNidxAdapter(api_client, searcher_client)
-    @property
-    def reader(self):
-        return self.client
-    @property
-    def writer(self):
-        return self.client
-    @property
-    def id(self):
-        return "nidx"
-def get_nidx_fake_node() -> FakeNode:
-    nidx = get_nidx()
-    return FakeNode(nidx.api_client, nidx.searcher_client)

nucliadb/ingest/consumer/auditing.py CHANGED Viewed

@@ -27,9 +27,9 @@ from nidx_protos import nodereader_pb2, noderesources_pb2
 from nucliadb.common import datamanagers
 from nucliadb.common.cluster.exceptions import ShardsNotFound
-from nucliadb.common.cluster.manager import choose_node
 from nucliadb.common.cluster.utils import get_shard_manager
 from nucliadb.common.constants import AVG_PARAGRAPH_SIZE_BYTES
+from nucliadb.common.nidx import get_nidx_api_client
 from nucliadb_protos import audit_pb2, writer_pb2
 from nucliadb_utils import const
 from nucliadb_utils.audit.audit import AuditStorage
@@ -114,10 +114,10 @@ class IndexAuditHandler:
         total_paragraphs = 0
         for shard_obj in shard_groups:
-            # TODO: Uses node for auditing, don't want to suddenly change metrics
-            node, shard_id = choose_node(shard_obj)
-            shard: nodereader_pb2.Shard = await node.reader.GetShard(
-                nodereader_pb2.GetShardRequest(shard_id=noderesources_pb2.ShardId(id=shard_id))  # type: ignore
+            shard: nodereader_pb2.Shard = await get_nidx_api_client().GetShard(
+                nodereader_pb2.GetShardRequest(
+                    shard_id=noderesources_pb2.ShardId(id=shard_obj.nidx_shard_id)
+                )
             )
             total_fields += shard.fields

nucliadb/ingest/consumer/shard_creator.py CHANGED Viewed

@@ -25,9 +25,9 @@ from functools import partial
 from nidx_protos import nodereader_pb2, noderesources_pb2
 from nucliadb.common import locking
-from nucliadb.common.cluster.manager import choose_node
 from nucliadb.common.cluster.utils import get_shard_manager
 from nucliadb.common.maindb.driver import Driver
+from nucliadb.common.nidx import get_nidx_api_client
 from nucliadb_protos import writer_pb2
 from nucliadb_utils import const
 from nucliadb_utils.cache.pubsub import PubSubDriver
@@ -105,8 +105,9 @@ class ShardCreatorHandler:
         async with locking.distributed_lock(locking.NEW_SHARD_LOCK.format(kbid=kbid)):
             # remember, a lock will do at least 1+ reads and 1 write.
             # with heavy writes, this adds some simple k/v pressure
-            node, shard_id = choose_node(current_shard)
-            shard: nodereader_pb2.Shard = await node.reader.GetShard(
-                nodereader_pb2.GetShardRequest(shard_id=noderesources_pb2.ShardId(id=shard_id))  # type: ignore
+            shard: nodereader_pb2.Shard = await get_nidx_api_client().GetShard(
+                nodereader_pb2.GetShardRequest(
+                    shard_id=noderesources_pb2.ShardId(id=current_shard.nidx_shard_id)
+                )  # type: ignore
             )
             await self.shard_manager.maybe_create_new_shard(kbid, shard.paragraphs)

nucliadb/ingest/orm/entities.py CHANGED Viewed

@@ -30,7 +30,6 @@ from nidx_protos.nodereader_pb2 import (
 )
 from nucliadb.common import datamanagers
-from nucliadb.common.cluster.base import AbstractIndexNode
 from nucliadb.common.cluster.exceptions import (
     AlreadyExists,
     EntitiesGroupNotFound,
@@ -203,7 +202,7 @@ class EntitiesManager:
     async def get_indexed_entities_group(self, group: str) -> Optional[EntitiesGroup]:
         shard_manager = get_shard_manager()
-        async def do_entities_search(node: AbstractIndexNode, shard_id: str) -> GraphSearchResponse:
+        async def do_entities_search(shard_id: str) -> GraphSearchResponse:
             request = GraphSearchRequest()
             # XXX: this is a wild guess. Are those enough or too many?
             request.top_k = 500
@@ -211,7 +210,7 @@ class EntitiesManager:
             request.query.path.path.source.node_type = RelationNode.NodeType.ENTITY
             request.query.path.path.source.node_subtype = group
             request.query.path.path.undirected = True
-            response = await graph_search_shard(node, shard_id, request)
+            response = await graph_search_shard(shard_id, request)
             return response
         results = await shard_manager.apply_for_all_shards(
@@ -293,7 +292,7 @@ class EntitiesManager:
     ) -> set[str]:
         shard_manager = get_shard_manager()
-        async def query_indexed_entities_group_names(node: AbstractIndexNode, shard_id: str) -> set[str]:
+        async def query_indexed_entities_group_names(shard_id: str) -> set[str]:
             """Search all relation types"""
             request = SearchRequest(
                 shard=shard_id,
@@ -303,7 +302,7 @@ class EntitiesManager:
                 paragraph=False,
                 faceted=Faceted(labels=["/e"]),
             )
-            response: SearchResponse = await query_shard(node, shard_id, request)
+            response: SearchResponse = await query_shard(shard_id, request)
             try:
                 facetresults = response.document.facets["/e"].facetresults
             except KeyError:

nucliadb/metrics_exporter.py CHANGED Viewed

@@ -22,40 +22,22 @@ from __future__ import annotations
 import asyncio
 from typing import AsyncGenerator, Callable, Tuple, cast
-from nidx_protos.noderesources_pb2 import EmptyQuery, NodeMetadata
 from nucliadb import logger
 from nucliadb.common import datamanagers
 from nucliadb.common.context import ApplicationContext
 from nucliadb.common.maindb.pg import PGDriver
 from nucliadb.common.maindb.utils import get_driver
-from nucliadb.common.nidx import get_nidx_api_client
 from nucliadb.migrator.datamanager import MigrationsDataManager
 from nucliadb_telemetry import metrics
 from nucliadb_telemetry.logs import setup_logging
 from nucliadb_telemetry.utils import setup_telemetry
 from nucliadb_utils.fastapi.run import serve_metrics
-SHARD_COUNT = metrics.Gauge("nucliadb_node_shard_count", labels={"node": ""})
 MIGRATION_COUNT = metrics.Gauge("nucliadb_migration", labels={"type": "", "version": ""})
 PENDING_RESOURCE_COUNT = metrics.Gauge("nucliadb_pending_resources_count")
-async def update_node_metrics(context: ApplicationContext):
-    """
-    Report the number of shards in each node.
-    """
-    # Clear previoulsy set values so that we report only the current state
-    SHARD_COUNT.gauge.clear()
-    nidx_api = get_nidx_api_client()
-    metadata: NodeMetadata = await nidx_api.GetMetadata(EmptyQuery())
-    SHARD_COUNT.set(metadata.shard_count, labels={"node": "nidx"})
 async def iter_kbids(context: ApplicationContext) -> AsyncGenerator[str, None]:
     """
     Return a list of all KB ids.
@@ -127,7 +109,6 @@ async def run_exporter(context: ApplicationContext):
     # Schedule exporter tasks
     tasks = []
     for export_task, interval in [
-        (update_node_metrics, 10),
         (update_migration_metrics, 60 * 3),
         (update_resource_metrics, 60 * 5),
     ]:

nucliadb/purge/orphan_shards.py CHANGED Viewed

@@ -23,14 +23,17 @@ import importlib.metadata
 from typing import Optional
 from grpc.aio import AioRpcError
+from nidx_protos import nodereader_pb2, noderesources_pb2
 from nucliadb.common import datamanagers
-from nucliadb.common.cluster import manager
-from nucliadb.common.cluster.base import AbstractIndexNode
 from nucliadb.common.cluster.utils import setup_cluster, teardown_cluster
 from nucliadb.common.maindb.driver import Driver
 from nucliadb.common.maindb.utils import setup_driver, teardown_driver
-from nucliadb.common.nidx import start_nidx_utility, stop_nidx_utility
+from nucliadb.common.nidx import (
+    get_nidx_api_client,
+    start_nidx_utility,
+    stop_nidx_utility,
+)
 from nucliadb.ingest import logger
 from nucliadb_telemetry import errors
 from nucliadb_telemetry.logs import setup_logging
@@ -69,9 +72,8 @@ async def detect_orphan_shards(driver: Driver) -> dict[str, ShardKb]:
     orphan_shard_ids = indexed_shards.keys() - stored_shards.keys()
     orphan_shards: dict[str, ShardKb] = {}
-    node = manager.get_nidx_fake_node()
     for shard_id in orphan_shard_ids:
-        kbid = await _get_kbid(node, shard_id) or UNKNOWN_KB
+        kbid = await _get_kbid(shard_id) or UNKNOWN_KB
         # Shards with knwon KB ids can be checked and ignore those comming from
         # an ongoing migration/rollover (ongoing or finished)
         if kbid != UNKNOWN_KB:
@@ -84,15 +86,15 @@ async def detect_orphan_shards(driver: Driver) -> dict[str, ShardKb]:
         orphan_shards[shard_id] = kbid
     for shard_id in orphan_shard_ids:
-        kbid = await _get_kbid(node, shard_id) or UNKNOWN_KB
+        kbid = await _get_kbid(shard_id) or UNKNOWN_KB
         orphan_shards[shard_id] = kbid
     return orphan_shards
 async def _get_indexed_shards() -> dict[str, ShardKb]:
-    nidx = manager.get_nidx_fake_node()
-    shards = await nidx.list_shards()
-    return {shard_id: UNKNOWN_KB for shard_id in shards}
+    shards = await get_nidx_api_client().ListShards(noderesources_pb2.EmptyQuery())
+    return {shard.id: UNKNOWN_KB for shard in shards.ids}
 async def _get_stored_shards(driver: Driver) -> dict[str, ShardKb]:
@@ -111,16 +113,17 @@ async def _get_stored_shards(driver: Driver) -> dict[str, ShardKb]:
     return stored_shards
-async def _get_kbid(node: AbstractIndexNode, shard_id: str) -> Optional[str]:
+async def _get_kbid(shard_id: str) -> Optional[str]:
     kbid = None
     try:
-        shard_pb = await node.get_shard(shard_id)
+        req = nodereader_pb2.GetShardRequest()
+        req.shard_id.id = shard_id
+        shard_pb = await get_nidx_api_client().GetShard(req)
     except AioRpcError as grpc_error:
         logger.error(
             "Can't get shard while looking for orphans in nidx, is there something broken?",
             exc_info=grpc_error,
             extra={
-                "node_id": node.id,
                 "shard_id": shard_id,
             },
         )
@@ -156,7 +159,6 @@ async def purge_orphan_shards(driver: Driver):
     orphan_shards = await detect_orphan_shards(driver)
     logger.info(f"Found {len(orphan_shards)} orphan shards. Purge starts...")
-    node = manager.get_nidx_fake_node()
     for shard_id, kbid in orphan_shards.items():
         logger.info(
             "Deleting orphan shard from index node",
@@ -165,7 +167,8 @@ async def purge_orphan_shards(driver: Driver):
                 "kbid": kbid,
             },
         )
-        await node.delete_shard(shard_id)
+        req = noderesources_pb2.ShardId(id=shard_id)
+        await get_nidx_api_client().DeleteShard(req)
 def parse_arguments():

nucliadb/search/api/v1/knowledgebox.py CHANGED Viewed

@@ -28,7 +28,6 @@ from nidx_protos.noderesources_pb2 import Shard
 from nucliadb.common import datamanagers
 from nucliadb.common.cluster.exceptions import ShardsNotFound
-from nucliadb.common.cluster.manager import choose_node
 from nucliadb.common.cluster.utils import get_shard_manager
 from nucliadb.common.constants import AVG_PARAGRAPH_SIZE_BYTES
 from nucliadb.common.counters import IndexCounts
@@ -164,19 +163,12 @@ async def get_node_index_counts(kbid: str) -> tuple[IndexCounts, list[str]]:
     ops = []
     queried_shards = []
     for shard_object in shard_groups:
-        try:
-            node, shard_id = choose_node(shard_object)
-        except KeyError:
-            raise HTTPException(
-                status_code=500,
-                detail="Couldn't retrieve counters right now, node not found",
-            )
-        else:
-            if shard_id is not None:
-                # At least one node is alive for this shard group
-                # let's add it ot the query list if has a valid value
-                ops.append(get_shard(node, shard_id))
-                queried_shards.append(shard_id)
+        shard_id = shard_object.nidx_shard_id
+        if shard_id is not None:
+            # At least one node is alive for this shard group
+            # let's add it ot the query list if has a valid value
+            ops.append(get_shard(shard_id))
+            queried_shards.append(shard_id)
     if not ops:
         logger.info(f"No node found for any of this resources shards {kbid}")

nucliadb/search/api/v1/resource/search.py CHANGED Viewed

@@ -27,7 +27,7 @@ from pydantic import ValidationError
 from nucliadb.models.responses import HTTPClientError
 from nucliadb.search.api.v1.router import KB_PREFIX, RESOURCE_PREFIX, api
 from nucliadb.search.api.v1.utils import fastapi_query
-from nucliadb.search.requesters.utils import Method, debug_nodes_info, node_query
+from nucliadb.search.requesters.utils import Method, node_query
 from nucliadb.search.search import cache
 from nucliadb.search.search.exceptions import InvalidQueryError
 from nucliadb.search.search.merge import merge_paragraphs_results
@@ -110,7 +110,7 @@ async def resource_search(
             detail = json.loads(exc.json())
             return HTTPClientError(status_code=422, detail=detail)
-        results, incomplete_results, queried_nodes = await node_query(kbid, Method.SEARCH, pb_query)
+        results, incomplete_results, queried_shards = await node_query(kbid, Method.SEARCH, pb_query)
         # We need to merge
         search_results = await merge_paragraphs_results(
@@ -122,9 +122,6 @@ async def resource_search(
         )
         response.status_code = 206 if incomplete_results else 200
-        if debug:
-            search_results.nodes = debug_nodes_info(queried_nodes)
-        queried_shards = [shard_id for _, shard_id in queried_nodes]
         search_results.shards = queried_shards
         return search_results

nucliadb/search/api/v1/search.py CHANGED Viewed

@@ -32,7 +32,7 @@ from nucliadb.models.responses import HTTPClientError
 from nucliadb.search import predict
 from nucliadb.search.api.v1.router import KB_PREFIX, api
 from nucliadb.search.api.v1.utils import fastapi_query
-from nucliadb.search.requesters.utils import Method, debug_nodes_info, node_query
+from nucliadb.search.requesters.utils import Method, node_query
 from nucliadb.search.search import cache
 from nucliadb.search.search.exceptions import InvalidQueryError
 from nucliadb.search.search.merge import merge_results
@@ -265,7 +265,7 @@ async def search(
     pb_query, incomplete_results, autofilters, _ = await legacy_convert_retrieval_to_proto(parsed)
     # We need to query all nodes
-    results, query_incomplete_results, queried_nodes = await node_query(kbid, Method.SEARCH, pb_query)
+    results, query_incomplete_results, queried_shards = await node_query(kbid, Method.SEARCH, pb_query)
     incomplete_results = incomplete_results or query_incomplete_results
     # We need to merge
@@ -290,10 +290,6 @@ async def search(
             len(search_results.resources),
         )
-    if item.debug:
-        search_results.nodes = debug_nodes_info(queried_nodes)
-    queried_shards = [shard_id for _, shard_id in queried_nodes]
     search_results.shards = queried_shards
     search_results.autofilters = autofilters
     return search_results, incomplete_results

nucliadb/search/api/v1/suggest.py CHANGED Viewed

@@ -160,7 +160,7 @@ async def suggest(
             range_modification_end,
             hidden,
         )
-        results, incomplete_results, queried_nodes = await node_query(kbid, Method.SUGGEST, pb_query)
+        results, incomplete_results, queried_shards = await node_query(kbid, Method.SUGGEST, pb_query)
         # We need to merge
         search_results = await merge_suggest_results(
@@ -171,7 +171,6 @@ async def suggest(
         response.status_code = 206 if incomplete_results else 200
-        queried_shards = [shard_id for _, shard_id in queried_nodes]
         if debug and queried_shards:
             search_results.shards = queried_shards

nucliadb 6.4.0.post4127__py3-none-any.whl → 6.4.0.post4132__py3-none-any.whl

nucliadb 6.4.0.post4127py3-none-any.whl → 6.4.0.post4132py3-none-any.whl