PyPI - nucliadb - Versions diffs - 6.2.1.post2971__py3-none-any.whl → 6.2.1.post2972__py3-none-any.whl - Mend

nucliadb 6.2.1.post2971py3-none-any.whl → 6.2.1.post2972py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

nucliadb/common/cluster/manager.py +33 -331
nucliadb/common/cluster/rebalance.py +2 -2
nucliadb/common/cluster/rollover.py +12 -71
nucliadb/common/cluster/standalone/utils.py +0 -43
nucliadb/common/cluster/utils.py +0 -16
nucliadb/common/nidx.py +21 -23
nucliadb/health.py +0 -7
nucliadb/ingest/app.py +0 -8
nucliadb/ingest/consumer/auditing.py +1 -1
nucliadb/ingest/consumer/shard_creator.py +1 -1
nucliadb/ingest/orm/entities.py +3 -6
nucliadb/purge/orphan_shards.py +6 -4
nucliadb/search/api/v1/knowledgebox.py +1 -5
nucliadb/search/requesters/utils.py +1 -2
nucliadb/search/search/shards.py +19 -0
nucliadb/standalone/introspect.py +0 -25
nucliadb/train/lifecycle.py +0 -6
nucliadb/train/nodes.py +1 -5
nucliadb/writer/back_pressure.py +17 -46
nucliadb/writer/settings.py +2 -2
{nucliadb-6.2.1.post2971.dist-info → nucliadb-6.2.1.post2972.dist-info}/METADATA +5 -7
{nucliadb-6.2.1.post2971.dist-info → nucliadb-6.2.1.post2972.dist-info}/RECORD +26 -36
nucliadb/common/cluster/discovery/__init__.py +0 -19
nucliadb/common/cluster/discovery/base.py +0 -178
nucliadb/common/cluster/discovery/k8s.py +0 -301
nucliadb/common/cluster/discovery/manual.py +0 -57
nucliadb/common/cluster/discovery/single.py +0 -51
nucliadb/common/cluster/discovery/types.py +0 -32
nucliadb/common/cluster/discovery/utils.py +0 -67
nucliadb/common/cluster/standalone/grpc_node_binding.py +0 -349
nucliadb/common/cluster/standalone/index_node.py +0 -123
nucliadb/common/cluster/standalone/service.py +0 -84
{nucliadb-6.2.1.post2971.dist-info → nucliadb-6.2.1.post2972.dist-info}/WHEEL +0 -0
{nucliadb-6.2.1.post2971.dist-info → nucliadb-6.2.1.post2972.dist-info}/entry_points.txt +0 -0
{nucliadb-6.2.1.post2971.dist-info → nucliadb-6.2.1.post2972.dist-info}/top_level.txt +0 -0
{nucliadb-6.2.1.post2971.dist-info → nucliadb-6.2.1.post2972.dist-info}/zip-safe +0 -0

nucliadb/common/cluster/standalone/utils.py CHANGED Viewed

@@ -19,13 +19,10 @@
 import logging
 import os
-import shutil
 import uuid
-from socket import gethostname
 from nucliadb.common.cluster.settings import StandaloneNodeRole
 from nucliadb.common.cluster.settings import settings as cluster_settings
-from nucliadb.common.cluster.standalone.index_node import StandaloneIndexNode
 logger = logging.getLogger(__name__)
@@ -46,46 +43,6 @@ def get_standalone_node_id() -> str:
         return str(uuid.UUID(bytes=f.read()))
-_SELF_INDEX_NODE = None
-def get_self() -> StandaloneIndexNode:
-    """
-    This returns an instance of the standalone index node
-    so when API requests come into this mode, we don't
-    make another grpc request since this node can service it directly.
-    """
-    if not is_index_node():
-        raise Exception("This node is not an Index Node. You should not reach this code path.")
-    global _SELF_INDEX_NODE
-    node_id = get_standalone_node_id()
-    if _SELF_INDEX_NODE is None or node_id != _SELF_INDEX_NODE.id:
-        if "NUCLIADB_SERVICE_HOST" in os.environ:
-            hn = os.environ["HOSTNAME"]
-            ns = os.environ.get("NAMESPACE", "nucliadb")
-            host = f"{hn}.{ns}"
-        else:
-            host = gethostname()
-        _SELF_INDEX_NODE = StandaloneIndexNode(id=node_id, address=host, shard_count=0, available_disk=0)
-    try:
-        _, _, available_disk = shutil.disk_usage(cluster_settings.data_path)
-        _SELF_INDEX_NODE.available_disk = available_disk
-    except FileNotFoundError:  # pragma: no cover
-        ...
-    try:
-        _shards_dir = os.path.join(cluster_settings.data_path, "shards")
-        _SELF_INDEX_NODE.shard_count = len(
-            [
-                shard_dir
-                for shard_dir in os.listdir(_shards_dir)
-                if os.path.isdir(os.path.join(_shards_dir, shard_dir))
-            ]
-        )
-    except FileNotFoundError:  # pragma: no cover
-        ...
-    return _SELF_INDEX_NODE
 def is_index_node() -> bool:
     return cluster_settings.standalone_node_role in (
         StandaloneNodeRole.ALL,

nucliadb/common/cluster/utils.py CHANGED Viewed

@@ -23,20 +23,11 @@ from typing import TYPE_CHECKING, Optional, Union
 import backoff
 from nucliadb.common import datamanagers
-from nucliadb.common.cluster.discovery.utils import (
-    setup_cluster_discovery,
-    teardown_cluster_discovery,
-)
 from nucliadb.common.cluster.manager import (
     KBShardManager,
     StandaloneKBShardManager,
-    clear_index_nodes,
 )
 from nucliadb.common.cluster.settings import settings
-from nucliadb.common.cluster.standalone.service import (
-    start_grpc as start_standalone_grpc,
-)
-from nucliadb.common.cluster.standalone.utils import is_index_node
 from nucliadb.ingest.orm.resource import Resource
 from nucliadb_protos import nodereader_pb2, writer_pb2
 from nucliadb_utils import const
@@ -62,12 +53,8 @@ async def setup_cluster() -> Union[KBShardManager, StandaloneKBShardManager]:
             # already setup
             return get_utility(Utility.SHARD_MANAGER)
-        await setup_cluster_discovery()
         mng: Union[KBShardManager, StandaloneKBShardManager]
         if settings.standalone_mode:
-            if is_index_node():
-                server = await start_standalone_grpc()
-                set_utility(_STANDALONE_SERVER, server)
             mng = StandaloneKBShardManager()
         else:
             mng = KBShardManager()
@@ -76,7 +63,6 @@ async def setup_cluster() -> Union[KBShardManager, StandaloneKBShardManager]:
 async def teardown_cluster():
-    await teardown_cluster_discovery()
     if get_utility(Utility.SHARD_MANAGER):
         clean_utility(Utility.SHARD_MANAGER)
@@ -85,8 +71,6 @@ async def teardown_cluster():
         await std_server.stop(None)
         clean_utility(_STANDALONE_SERVER)
-    clear_index_nodes()
 def get_shard_manager() -> KBShardManager:
     return get_utility(Utility.SHARD_MANAGER)  # type: ignore

nucliadb/common/nidx.py CHANGED Viewed

@@ -37,12 +37,10 @@ from nucliadb_utils.settings import FileBackendConfig, indexing_settings, storag
 from nucliadb_utils.storages.settings import settings as extended_storage_settings
 from nucliadb_utils.utilities import Utility, clean_utility, get_utility, set_utility
-NIDX_ENABLED = bool(os.environ.get("NIDX_ENABLED"))
 class NidxUtility:
-    api_client = None
-    searcher_client = None
+    api_client: NidxApiStub
+    searcher_client: NidxSearcherStub
     async def initialize(self):
         raise NotImplementedError()
@@ -98,6 +96,9 @@ class NidxBindingUtility(NidxUtility):
         self.config = {
             "METADATA__DATABASE_URL": ingest_settings.driver_pg_url,
+            "SEARCHER__METADATA_REFRESH_INTERVAL": str(
+                indexing_settings.index_searcher_refresh_interval
+            ),
             **_storage_config("INDEXER", None),
             **_storage_config("STORAGE", "nidx"),
         }
@@ -158,11 +159,8 @@ class NidxServiceUtility(NidxUtility):
         return res.seq
-async def start_nidx_utility() -> Optional[NidxUtility]:
-    if not NIDX_ENABLED:
-        return None
-    nidx = get_nidx()
+async def start_nidx_utility() -> NidxUtility:
+    nidx = get_utility(Utility.NIDX)
     if nidx:
         return nidx
@@ -178,30 +176,33 @@ async def start_nidx_utility() -> Optional[NidxUtility]:
 async def stop_nidx_utility():
-    nidx_utility = get_nidx()
+    nidx_utility = get_utility(Utility.NIDX)
     if nidx_utility:
         clean_utility(Utility.NIDX)
         await nidx_utility.finalize()
-def get_nidx() -> Optional[NidxUtility]:
-    return get_utility(Utility.NIDX)
+def get_nidx() -> NidxUtility:
+    nidx = get_utility(Utility.NIDX)
+    if nidx is None:
+        raise Exception("nidx not initialized")
+    return nidx
-def get_nidx_api_client() -> Optional["NidxApiStub"]:
+def get_nidx_api_client() -> "NidxApiStub":
     nidx = get_nidx()
-    if nidx:
+    if nidx.api_client:
         return nidx.api_client
     else:
-        return None
+        raise Exception("nidx not initialized")
-def get_nidx_searcher_client() -> Optional["NidxSearcherStub"]:
+def get_nidx_searcher_client() -> "NidxSearcherStub":
     nidx = get_nidx()
-    if nidx:
+    if nidx.searcher_client:
         return nidx.searcher_client
     else:
-        return None
+        raise Exception("nidx not initialized")
 # TODO: Remove the index node abstraction
@@ -252,9 +253,6 @@ class FakeNode(AbstractIndexNode):
         return "nidx"
-def get_nidx_fake_node() -> Optional[FakeNode]:
+def get_nidx_fake_node() -> FakeNode:
     nidx = get_nidx()
-    if nidx:
-        return FakeNode(nidx.api_client, nidx.searcher_client)
-    else:
-        return None
+    return FakeNode(nidx.api_client, nidx.searcher_client)

nucliadb/health.py CHANGED Viewed

@@ -40,13 +40,6 @@ def nats_manager_healthy() -> bool:
     return nats_manager.healthy()
-def nodes_health_check() -> bool:
-    from nucliadb.common.cluster import manager
-    from nucliadb.ingest.settings import DriverConfig, settings
-    return len(manager.INDEX_NODES) > 0 or settings.driver == DriverConfig.LOCAL
 def pubsub_check() -> bool:
     driver: Optional[PubSubDriver] = get_utility(Utility.PUBSUB)
     if driver is None:

nucliadb/ingest/app.py CHANGED Viewed

@@ -22,10 +22,6 @@ import importlib.metadata
 from typing import Awaitable, Callable
 from nucliadb import health
-from nucliadb.common.cluster.discovery.utils import (
-    setup_cluster_discovery,
-    teardown_cluster_discovery,
-)
 from nucliadb.common.cluster.settings import settings as cluster_settings
 from nucliadb.common.cluster.utils import setup_cluster, teardown_cluster
 from nucliadb.common.context import ApplicationContext
@@ -89,13 +85,9 @@ async def initialize() -> list[Callable[[], Awaitable[None]]]:
         )
         finalizers.append(stop_nats_manager)
-        await setup_cluster_discovery()
-        finalizers.append(teardown_cluster_discovery)
     health.register_health_checks(
         [
             health.nats_manager_healthy,
-            health.nodes_health_check,
             health.pubsub_check,
         ]
     )

nucliadb/ingest/consumer/auditing.py CHANGED Viewed

@@ -113,7 +113,7 @@ class IndexAuditHandler:
         for shard_obj in shard_groups:
             # TODO: Uses node for auditing, don't want to suddenly change metrics
-            node, shard_id = choose_node(shard_obj, use_nidx=False)
+            node, shard_id = choose_node(shard_obj)
             shard: nodereader_pb2.Shard = await node.reader.GetShard(
                 nodereader_pb2.GetShardRequest(shard_id=noderesources_pb2.ShardId(id=shard_id))  # type: ignore
             )

nucliadb/ingest/consumer/shard_creator.py CHANGED Viewed

@@ -103,7 +103,7 @@ class ShardCreatorHandler:
         async with locking.distributed_lock(locking.NEW_SHARD_LOCK.format(kbid=kbid)):
             # remember, a lock will do at least 1+ reads and 1 write.
             # with heavy writes, this adds some simple k/v pressure
-            node, shard_id = choose_node(current_shard, use_nidx=True)
+            node, shard_id = choose_node(current_shard)
             shard: nodereader_pb2.Shard = await node.reader.GetShard(
                 nodereader_pb2.GetShardRequest(shard_id=noderesources_pb2.ShardId(id=shard_id))  # type: ignore
             )

nucliadb/ingest/orm/entities.py CHANGED Viewed

@@ -37,6 +37,7 @@ from nucliadb.common.datamanagers.entities import (
 from nucliadb.common.maindb.driver import Transaction
 from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
 from nucliadb.ingest.settings import settings
+from nucliadb.search.search.shards import query_shard
 from nucliadb_protos.knowledgebox_pb2 import (
     DeletedEntitiesGroups,
     EntitiesGroup,
@@ -54,8 +55,6 @@ from nucliadb_protos.nodereader_pb2 import (
 from nucliadb_protos.utils_pb2 import RelationNode
 from nucliadb_protos.writer_pb2 import GetEntitiesResponse
 from nucliadb_telemetry import errors
-from nucliadb_utils import const
-from nucliadb_utils.utilities import has_feature
 from .exceptions import EntityManagementException
@@ -218,14 +217,13 @@ class EntitiesManager:
                     ],
                 ),
             )
-            response = await node.reader.Search(request)  # type: ignore
+            response = await query_shard(node, shard_id, request)
             return response.relation
         results = await shard_manager.apply_for_all_shards(
             self.kbid,
             do_entities_search,
             settings.relation_search_timeout,
-            use_nidx=has_feature(const.Features.NIDX_READS, context={"kbid": self.kbid}),
             use_read_replica_nodes=self.use_read_replica_nodes,
         )
         for result in results:
@@ -315,7 +313,7 @@ class EntitiesManager:
                 paragraph=False,
                 faceted=Faceted(labels=["/e"]),
             )
-            response: SearchResponse = await node.reader.Search(request)  # type: ignore
+            response: SearchResponse = await query_shard(node, shard_id, request)
             try:
                 facetresults = response.document.facets["/e"].facetresults
                 return {facet.tag.split("/")[-1] for facet in facetresults}
@@ -327,7 +325,6 @@ class EntitiesManager:
             self.kbid,
             query_indexed_entities_group_names,
             settings.relation_types_timeout,
-            use_nidx=has_feature(const.Features.NIDX_READS, context={"kbid": self.kbid}),
             use_read_replica_nodes=self.use_read_replica_nodes,
         )
         for result in results:

nucliadb/purge/orphan_shards.py CHANGED Viewed

@@ -33,6 +33,7 @@ from nucliadb.common.cluster.manager import KBShardManager
 from nucliadb.common.cluster.utils import setup_cluster, teardown_cluster
 from nucliadb.common.maindb.driver import Driver
 from nucliadb.common.maindb.utils import setup_driver, teardown_driver
+from nucliadb.common.nidx import start_nidx_utility, stop_nidx_utility
 from nucliadb.ingest import logger
 from nucliadb_telemetry import errors
 from nucliadb_telemetry.logs import setup_logging
@@ -135,10 +136,9 @@ async def _get_stored_shards(driver: Driver) -> dict[str, ShardLocation]:
                 continue
             else:
                 for shard_object_pb in kb_shards:
-                    for shard_replica_pb in shard_object_pb.replicas:
-                        shard_replica_id = shard_replica_pb.shard.id
-                        node_id = shard_replica_pb.node
-                        stored_shards[shard_replica_id] = ShardLocation(kbid=kbid, node_id=node_id)
+                    stored_shards[shard_object_pb.nidx_shard_id] = ShardLocation(
+                        kbid=kbid, node_id="nidx"
+                    )
     return stored_shards
@@ -241,6 +241,7 @@ async def main():
     """
     args = parse_arguments()
+    await start_nidx_utility()
     await setup_cluster()
     driver = await setup_driver()
@@ -253,6 +254,7 @@ async def main():
     finally:
         await teardown_driver()
         await teardown_cluster()
+        await stop_nidx_utility()
 def run() -> int:  # pragma: no cover

nucliadb/search/api/v1/knowledgebox.py CHANGED Viewed

@@ -48,9 +48,7 @@ from nucliadb_protos.noderesources_pb2 import Shard
 from nucliadb_protos.writer_pb2 import ShardObject as PBShardObject
 from nucliadb_protos.writer_pb2 import Shards
 from nucliadb_telemetry import errors
-from nucliadb_utils import const
 from nucliadb_utils.authentication import requires, requires_one
-from nucliadb_utils.utilities import has_feature
 MAX_PARAGRAPHS_FOR_SMALL_KB = 250_000
@@ -166,9 +164,7 @@ async def get_node_index_counts(kbid: str) -> tuple[IndexCounts, list[str]]:
     queried_shards = []
     for shard_object in shard_groups:
         try:
-            node, shard_id = choose_node(
-                shard_object, use_nidx=has_feature(const.Features.NIDX_READS, context={"kbid": kbid})
-            )
+            node, shard_id = choose_node(shard_object)
         except KeyError:
             raise HTTPException(
                 status_code=500,

nucliadb/search/requesters/utils.py CHANGED Viewed

@@ -123,7 +123,6 @@ async def node_query(
         try:
             node, shard_id = cluster_manager.choose_node(
                 shard_obj,
-                use_nidx=has_feature(const.Features.NIDX_READS, context={"kbid": kbid}),
                 use_read_replica_nodes=use_read_replica_nodes,
                 target_shard_replicas=target_shard_replicas,
             )
@@ -224,7 +223,7 @@ def validate_node_query_results(results: list[Any]) -> Optional[HTTPException]:
                     )
             else:
                 errors.capture_exception(result)
-                logger.exception("Error while querying shard data", exc_info=result)
+                logger.exception(f"Error while querying shard data {result}", exc_info=result)
             return HTTPException(status_code=status_code, detail=reason)

nucliadb/search/search/shards.py CHANGED Viewed

@@ -19,6 +19,10 @@
 #
 import asyncio
+import backoff
+from grpc import StatusCode
+from grpc.aio import AioRpcError
 from nucliadb.common.cluster.base import AbstractIndexNode
 from nucliadb_protos.nodereader_pb2 import (
     GetShardRequest,
@@ -39,6 +43,15 @@ node_observer = metrics.Observer(
 )
+def should_giveup(e: Exception):
+    if isinstance(e, AioRpcError) and e.code() != StatusCode.NOT_FOUND:
+        return True
+    return False
+@backoff.on_exception(
+    backoff.expo, Exception, jitter=None, factor=0.1, max_tries=3, giveup=should_giveup
+)
 async def query_shard(node: AbstractIndexNode, shard: str, query: SearchRequest) -> SearchResponse:
     req = SearchRequest()
     req.CopyFrom(query)
@@ -47,6 +60,9 @@ async def query_shard(node: AbstractIndexNode, shard: str, query: SearchRequest)
         return await node.reader.Search(req)  # type: ignore
+@backoff.on_exception(
+    backoff.expo, Exception, jitter=None, factor=0.1, max_tries=3, giveup=should_giveup
+)
 async def get_shard(node: AbstractIndexNode, shard_id: str) -> Shard:
     req = GetShardRequest()
     req.shard_id.id = shard_id
@@ -54,6 +70,9 @@ async def get_shard(node: AbstractIndexNode, shard_id: str) -> Shard:
         return await node.reader.GetShard(req)  # type: ignore
+@backoff.on_exception(
+    backoff.expo, Exception, jitter=None, factor=0.1, max_tries=3, giveup=should_giveup
+)
 async def suggest_shard(node: AbstractIndexNode, shard: str, query: SuggestRequest) -> SuggestResponse:
     req = SuggestRequest()
     req.CopyFrom(query)

nucliadb/standalone/introspect.py CHANGED Viewed

@@ -32,7 +32,6 @@ import psutil
 from fastapi import FastAPI
 from pydantic import BaseModel
-from nucliadb.common.cluster import manager as cluster_manager
 from nucliadb.standalone.settings import Settings
 from nucliadb_telemetry.settings import LogOutputType, LogSettings
@@ -83,7 +82,6 @@ async def stream_tar(app: FastAPI) -> AsyncGenerator[bytes, None]:
         with tarfile.open(tar_file, mode="w:gz") as tar:
             await add_system_info(temp_dir, tar)
             await add_dependencies(temp_dir, tar)
-            await add_cluster_info(temp_dir, tar)
             settings: Settings = app.settings.copy()  # type: ignore
             await add_settings(temp_dir, tar, settings)
             if settings.log_output_type == LogOutputType.FILE:
@@ -145,29 +143,6 @@ def _add_dependencies_to_tar(temp_dir: str, tar: tarfile.TarFile):
     tar.add(dependendies_file, arcname="dependencies.txt")
-async def add_cluster_info(temp_dir: str, tar: tarfile.TarFile):
-    loop = asyncio.get_event_loop()
-    await loop.run_in_executor(None, _add_cluster_info_to_tar, temp_dir, tar)
-def _add_cluster_info_to_tar(temp_dir: str, tar: tarfile.TarFile):
-    cluster_info = ClusterInfo(
-        nodes=[
-            NodeInfo(
-                id=node.id,
-                address=node.address,
-                shard_count=node.shard_count,
-                primary_id=node.primary_id,
-            )
-            for node in cluster_manager.get_index_nodes()
-        ]
-    )
-    cluster_info_file = os.path.join(temp_dir, "cluster_info.txt")
-    with open(cluster_info_file, "w") as f:
-        f.write(cluster_info.model_dump_json(indent=4))
-    tar.add(cluster_info_file, arcname="cluster_info.txt")
 async def add_settings(temp_dir: str, tar: tarfile.TarFile, settings: Settings):
     loop = asyncio.get_event_loop()
     await loop.run_in_executor(None, _add_settings_to_tar, temp_dir, tar, settings)

nucliadb/train/lifecycle.py CHANGED Viewed

@@ -22,10 +22,6 @@ from contextlib import asynccontextmanager
 from fastapi import FastAPI
-from nucliadb.common.cluster.discovery.utils import (
-    setup_cluster_discovery,
-    teardown_cluster_discovery,
-)
 from nucliadb.train import SERVICE_NAME
 from nucliadb.train.utils import (
     start_shard_manager,
@@ -40,7 +36,6 @@ from nucliadb_utils.utilities import start_audit_utility, stop_audit_utility
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     await setup_telemetry(SERVICE_NAME)
-    await setup_cluster_discovery()
     await start_shard_manager()
     await start_train_grpc(SERVICE_NAME)
     await start_audit_utility(SERVICE_NAME)
@@ -50,5 +45,4 @@ async def lifespan(app: FastAPI):
     await stop_audit_utility()
     await stop_train_grpc()
     await stop_shard_manager()
-    await teardown_cluster_discovery()
     await clean_telemetry(SERVICE_NAME)

nucliadb/train/nodes.py CHANGED Viewed

@@ -45,9 +45,7 @@ from nucliadb_protos.train_pb2 import (
     TrainSentence,
 )
 from nucliadb_protos.writer_pb2 import ShardObject
-from nucliadb_utils import const
 from nucliadb_utils.storages.storage import Storage
-from nucliadb_utils.utilities import has_feature
 class TrainShardManager(manager.KBShardManager):
@@ -63,9 +61,7 @@ class TrainShardManager(manager.KBShardManager):
         except StopIteration:
             raise KeyError("Shard not found")
-        node_obj, shard_id = manager.choose_node(
-            shard_object, use_nidx=has_feature(const.Features.NIDX_READS, context={"kbid": kbid})
-        )
+        node_obj, shard_id = manager.choose_node(shard_object)
         return node_obj, shard_id
     async def get_kb_obj(self, txn: Transaction, kbid: str) -> Optional[KnowledgeBox]:

nucliadb/writer/back_pressure.py CHANGED Viewed

@@ -30,7 +30,6 @@ from cachetools import TTLCache
 from fastapi import HTTPException, Request
 from nucliadb.common import datamanagers
-from nucliadb.common.cluster.manager import get_index_nodes
 from nucliadb.common.context import ApplicationContext
 from nucliadb.common.context.fastapi import get_app_context
 from nucliadb.common.http_clients.processing import ProcessingHTTPClient
@@ -168,7 +167,7 @@ class Materializer:
         self.ingest_check_interval = ingest_check_interval
         self.ingest_pending: int = 0
-        self.indexing_pending: dict[str, int] = {}
+        self.indexing_pending: int = 0
         self._tasks: list[asyncio.Task] = []
         self._running = False
@@ -232,7 +231,7 @@ class Materializer:
         response = await self.processing_http_client.stats(kbid=kbid, timeout=0.5)
         return response.incomplete
-    def get_indexing_pending(self) -> dict[str, int]:
+    def get_indexing_pending(self) -> int:
         return self.indexing_pending
     def get_ingest_pending(self) -> int:
@@ -241,20 +240,18 @@ class Materializer:
     async def _get_indexing_pending_task(self):
         try:
             while True:
-                for node in get_index_nodes():
-                    try:
-                        with back_pressure_observer({"type": "get_indexing_pending"}):
-                            self.indexing_pending[node.id] = await get_nats_consumer_pending_messages(
-                                self.nats_manager,
-                                stream=const.Streams.INDEX.name,
-                                consumer=const.Streams.INDEX.group.format(node=node.id),
-                            )
-                    except Exception:
-                        logger.exception(
-                            "Error getting pending messages to index",
-                            exc_info=True,
-                            extra={"node_id": node.id},
+                try:
+                    with back_pressure_observer({"type": "get_indexing_pending"}):
+                        self.indexing_pending = await get_nats_consumer_pending_messages(
+                            self.nats_manager,
+                            stream="nidx",
+                            consumer="nidx",
                         )
+                except Exception:
+                    logger.exception(
+                        "Error getting pending messages to index",
+                        exc_info=True,
+                    )
                 await asyncio.sleep(self.indexing_check_interval)
         except asyncio.CancelledError:
             pass
@@ -386,7 +383,7 @@ async def check_indexing_behind(
     context: ApplicationContext,
     kbid: str,
     resource_uuid: Optional[str],
-    pending_by_node: dict[str, int],
+    pending: int,
 ):
     """
     If a resource uuid is provided, it will check the nodes that have the replicas
@@ -398,36 +395,10 @@ async def check_indexing_behind(
         # Indexing back pressure is disabled
         return
-    if len(pending_by_node) == 0:
-        logger.warning("No nodes found to check for pending messages")
-        return
-    # Get nodes that are involved in the indexing of the request
-    if resource_uuid is not None:
-        nodes_to_check = await get_nodes_for_resource_shard(context, kbid, resource_uuid)
-    else:
-        nodes_to_check = await get_nodes_for_kb_active_shards(context, kbid)
-    if len(nodes_to_check) == 0:
-        logger.warning(
-            "No nodes found to check for pending messages",
-            extra={"kbid": kbid, "resource_uuid": resource_uuid},
-        )
-        return
-    # Get the highest pending value
-    highest_pending = 0
-    for node in nodes_to_check:
-        if node not in pending_by_node:
-            logger.warning("Node not found in pending messages", extra={"node": node})
-            continue
-        if pending_by_node[node] > highest_pending:
-            highest_pending = pending_by_node[node]
-    if highest_pending > max_pending:
+    if pending > max_pending:
         try_after = estimate_try_after(
             rate=settings.indexing_rate,
-            pending=highest_pending,
+            pending=pending,
             max_wait=settings.max_wait_time,
         )
         data = BackPressureData(type="indexing", try_after=try_after)
@@ -437,7 +408,7 @@ async def check_indexing_behind(
                 "kbid": kbid,
                 "resource_uuid": resource_uuid,
                 "try_after": try_after,
-                "pending": highest_pending,
+                "pending": pending,
             },
         )
         raise BackPressureException(data)

nucliadb/writer/settings.py CHANGED Viewed

@@ -36,7 +36,7 @@ class BackPressureSettings(BaseSettings):
         alias="back_pressure_enabled",
     )
     indexing_rate: float = Field(
-        default=4,
+        default=10,
         description="Estimation of the indexing rate in messages per second. This is used to calculate the try again in time",  # noqa
     )
     ingest_rate: float = Field(
@@ -48,7 +48,7 @@ class BackPressureSettings(BaseSettings):
         description="Estimation of the processing rate in messages per second. This is used to calculate the try again in time",  # noqa
     )
     max_indexing_pending: int = Field(
-        default=200,
+        default=1000,
         description="Max number of messages pending to index in a node queue before rate limiting writes. Set to 0 to disable indexing back pressure checks",  # noqa
         alias="back_pressure_max_indexing_pending",
     )

nucliadb 6.2.1.post2971__py3-none-any.whl → 6.2.1.post2972__py3-none-any.whl

nucliadb 6.2.1.post2971py3-none-any.whl → 6.2.1.post2972py3-none-any.whl