PyPI - nucliadb - Versions diffs - 6.4.0.post4200__py3-none-any.whl → 6.4.0.post4204__py3-none-any.whl - Mend

nucliadb 6.4.0.post4200py3-none-any.whl → 6.4.0.post4204py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

nucliadb/writer/api/v1/upload.py CHANGED Viewed

@@ -32,6 +32,7 @@ from fastapi_versioning import version
 from starlette.requests import Request as StarletteRequest
 from nucliadb.common import datamanagers
+from nucliadb.common.back_pressure import maybe_back_pressure
 from nucliadb.ingest.orm.utils import set_title
 from nucliadb.models.internal.processing import PushPayload, Source
 from nucliadb.models.responses import HTTPClientError
@@ -43,7 +44,6 @@ from nucliadb.writer.api.v1.resource import (
     validate_rid_exists_or_raise_error,
 )
 from nucliadb.writer.api.v1.slug import ensure_slug_uniqueness, noop_context_manager
-from nucliadb.writer.back_pressure import maybe_back_pressure
 from nucliadb.writer.resource.audit import parse_audit
 from nucliadb.writer.resource.basic import parse_basic_creation, parse_user_classifications
 from nucliadb.writer.resource.field import (
@@ -215,7 +215,7 @@ async def _tus_post(
             detail="Cannot hide a resource: the KB does not have hidden resources enabled",
         )
-    await maybe_back_pressure(request, kbid, resource_uuid=path_rid)
+    await maybe_back_pressure(kbid, resource_uuid=path_rid)
     dm = get_dm()
     storage_manager = get_storage_manager()
@@ -713,7 +713,7 @@ async def _upload(
     if path_rid is not None:
         await validate_rid_exists_or_raise_error(kbid, path_rid)
-    await maybe_back_pressure(request, kbid, resource_uuid=path_rid)
+    await maybe_back_pressure(kbid, resource_uuid=path_rid)
     md5_user = x_md5
     path, rid, valid_field = await validate_field_upload(kbid, path_rid, field, md5_user)

nucliadb/writer/lifecycle.py CHANGED Viewed

@@ -21,12 +21,12 @@ from contextlib import asynccontextmanager
 from fastapi import FastAPI
+from nucliadb.common.back_pressure import start_materializer, stop_materializer
+from nucliadb.common.back_pressure.settings import settings as back_pressure_settings
 from nucliadb.common.context.fastapi import inject_app_context
 from nucliadb.ingest.processing import start_processing_engine, stop_processing_engine
 from nucliadb.ingest.utils import start_ingest, stop_ingest
 from nucliadb.writer import SERVICE_NAME
-from nucliadb.writer.back_pressure import start_materializer, stop_materializer
-from nucliadb.writer.settings import back_pressure_settings
 from nucliadb.writer.tus import finalize as storage_finalize
 from nucliadb.writer.tus import initialize as storage_initialize
 from nucliadb_telemetry.utils import clean_telemetry, setup_telemetry

nucliadb/writer/settings.py CHANGED Viewed

@@ -19,7 +19,6 @@
 #
 from typing import Optional
-from pydantic import Field
 from pydantic_settings import BaseSettings
@@ -29,54 +28,4 @@ class Settings(BaseSettings):
     dm_redis_port: Optional[int] = None
-class BackPressureSettings(BaseSettings):
-    enabled: bool = Field(
-        default=False,
-        description="Enable or disable back pressure.",
-        alias="back_pressure_enabled",
-    )
-    indexing_rate: float = Field(
-        default=10,
-        description="Estimation of the indexing rate in messages per second. This is used to calculate the try again in time",  # noqa
-    )
-    ingest_rate: float = Field(
-        default=4,
-        description="Estimation of the ingest processed consumer rate in messages per second. This is used to calculate the try again in time",  # noqa
-    )
-    processing_rate: float = Field(
-        default=1,
-        description="Estimation of the processing rate in messages per second. This is used to calculate the try again in time",  # noqa
-    )
-    max_indexing_pending: int = Field(
-        default=1000,
-        description="Max number of messages pending to index in a node queue before rate limiting writes. Set to 0 to disable indexing back pressure checks",  # noqa
-        alias="back_pressure_max_indexing_pending",
-    )
-    max_ingest_pending: int = Field(
-        # Disabled by default
-        default=0,
-        description="Max number of messages pending to be ingested by processed consumers before rate limiting writes. Set to 0 to disable ingest back pressure checks",  # noqa
-        alias="back_pressure_max_ingest_pending",
-    )
-    max_processing_pending: int = Field(
-        default=1000,
-        description="Max number of messages pending to process per Knowledge Box before rate limiting writes. Set to 0 to disable processing back pressure checks",  # noqa
-        alias="back_pressure_max_processing_pending",
-    )
-    indexing_check_interval: int = Field(
-        default=30,
-        description="Interval in seconds to check the indexing pending messages",
-    )
-    ingest_check_interval: int = Field(
-        default=30,
-        description="Interval in seconds to check the ingest pending messages",
-    )
-    max_wait_time: int = Field(
-        default=60,
-        description="Max time in seconds to wait before trying again after back pressure",
-    )
 settings = Settings()
-back_pressure_settings = BackPressureSettings()

{nucliadb-6.4.0.post4200.dist-info → nucliadb-6.4.0.post4204.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nucliadb
-Version: 6.4.0.post4200
+Version: 6.4.0.post4204
 Summary: NucliaDB
 Author-email: Nuclia <nucliadb@nuclia.com>
 License: AGPL
@@ -20,11 +20,11 @@ Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3 :: Only
 Requires-Python: <4,>=3.9
 Description-Content-Type: text/markdown
-Requires-Dist: nucliadb-telemetry[all]>=6.4.0.post4200
-Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.4.0.post4200
-Requires-Dist: nucliadb-protos>=6.4.0.post4200
-Requires-Dist: nucliadb-models>=6.4.0.post4200
-Requires-Dist: nidx-protos>=6.4.0.post4200
+Requires-Dist: nucliadb-telemetry[all]>=6.4.0.post4204
+Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.4.0.post4204
+Requires-Dist: nucliadb-protos>=6.4.0.post4204
+Requires-Dist: nucliadb-models>=6.4.0.post4204
+Requires-Dist: nidx-protos>=6.4.0.post4204
 Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
 Requires-Dist: nuclia-models>=0.24.2
 Requires-Dist: uvicorn[standard]

{nucliadb-6.4.0.post4200.dist-info → nucliadb-6.4.0.post4204.dist-info}/RECORD RENAMED Viewed

@@ -60,6 +60,11 @@ nucliadb/common/ids.py,sha256=4QjoIofes_vtKj2HsFWZf8VVIVWXxdkYtLpx1n618Us,8239
 nucliadb/common/locking.py,sha256=RL0CabZVPzxHZyUjYeUyLvsJTm7W3J9o4fEgsY_ufNc,5896
 nucliadb/common/nidx.py,sha256=3EeQGjM_gxK0l_Rb54fspFWVNnzUiKF-_GMxTiiDC8Q,9116
 nucliadb/common/vector_index_config.py,sha256=LqGwhrDCp1q1vBow3scd1Chhr4GLYjYnGL72FKvOYYc,1552
+nucliadb/common/back_pressure/__init__.py,sha256=paAcAZcfGRTyURF9lnn3vX0vcwakTEVswG_xcdGBH-U,928
+nucliadb/common/back_pressure/cache.py,sha256=ANvXglWzI5naAD6N4E_fNi17qS6KNyAhjLeh6WlZZ84,2931
+nucliadb/common/back_pressure/materializer.py,sha256=YzYfN7xI5nlmSowbdLktWIkrJJb3Q2vEmoyz9O3eb2s,11667
+nucliadb/common/back_pressure/settings.py,sha256=3qNOzbI0KC6LMy-wMilXRSBfZu6CCpGHod26MTgAZ2o,3082
+nucliadb/common/back_pressure/utils.py,sha256=aZeP1XSkdgaRgZC76yR9Kje3511ZUCp7KB-XzcvhMYY,2018
 nucliadb/common/cluster/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
 nucliadb/common/cluster/exceptions.py,sha256=t7v_l93t44l2tQpdQXgO_w-c4YZRcaayOz1A2i0w4RQ,1258
 nucliadb/common/cluster/grpc_node_dummy.py,sha256=JkufazWzMA4KFEU8EBkMbiiDW4C8lLcRhiiCxP7aCQY,2949
@@ -328,28 +333,27 @@ nucliadb/train/generators/token_classifier.py,sha256=DdyMbrpxIVGWdTcz3SEN_3HwxKf
 nucliadb/train/generators/utils.py,sha256=ZNwvEVPZr-eP0MW3ABN7a11hPQKaa0NdVaRcgBcTp5w,3601
 nucliadb/writer/__init__.py,sha256=S298mrZL3vr62OrBqi97mdLxgR5cReMlRJgnaQHZV7s,1304
 nucliadb/writer/app.py,sha256=ABBO8-u4pDAa61b3mCdD0TFhuHAYcxMkgpZSGgWARuE,2736
-nucliadb/writer/back_pressure.py,sha256=4OwFGq9pvAbChB3WBZAY36lclfD-gD2ouC6YsKA4bIo,16892
 nucliadb/writer/exceptions.py,sha256=-Z7LW--eid7PNeKFuzo9kAlbLEBMUosxE-UVIgGD3SA,929
-nucliadb/writer/lifecycle.py,sha256=OYyhUZ1ejlybPzO-O_EsInjdifKiPiEzooy2d_2DW3k,2550
+nucliadb/writer/lifecycle.py,sha256=P1b_KoNkMTeF1IbyDCh_zhexWbeYe5LH6p2iFSJPiN4,2576
 nucliadb/writer/openapi.py,sha256=thqCO1ht_RJgOkXs-aIsv8aXJrU5z8wo2n05l2_LqMs,1032
 nucliadb/writer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nucliadb/writer/run.py,sha256=euVZ_rtHDXs-O1kB-Pt1Id8eft9CYVpWH3zJzEoEqls,1448
-nucliadb/writer/settings.py,sha256=pA9aMAvY8H6zvsxAOdGY8SZLrThDvJ8KLhluGI0GxnQ,3288
+nucliadb/writer/settings.py,sha256=gKtCTDF2E1m6lYL0Iv4WwY4VZuvw1Dsa-uIBZxCHTdU,1071
 nucliadb/writer/utilities.py,sha256=AZ5qEny1Xm0IDsFtH13oJa2usvJZK8f0FdgF1LrnLCw,1036
 nucliadb/writer/api/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
 nucliadb/writer/api/constants.py,sha256=qWEDjFUycrEZnSJyLnNK4PQNodU2oVmkO4NycaEZtio,1738
 nucliadb/writer/api/utils.py,sha256=wIQHlU8RQiIGVLI72suvyVIKlCU44Unh0Ae0IiN6Qwo,1313
 nucliadb/writer/api/v1/__init__.py,sha256=akI9A_jloNLb0dU4T5zjfdyvmSAiDeIdjAlzNx74FlU,1128
-nucliadb/writer/api/v1/export_import.py,sha256=elf-EQY5DD3mhw8kWb9tQpDcbrF9sY6VFYqxQOjuVP0,8201
-nucliadb/writer/api/v1/field.py,sha256=KOOBqBJzwsNczn_isxl-YFBL-bmduz3rzSDWMbAJefc,18523
+nucliadb/writer/api/v1/export_import.py,sha256=v0sU55TtRSqDzwkDgcwv2uSaqKCuQTtGcMpYoHQYBQA,8192
+nucliadb/writer/api/v1/field.py,sha256=OicvLF1bnkJj1ixALFLuhvFX6NCMFpORROcFcS9nKpk,18505
 nucliadb/writer/api/v1/knowledgebox.py,sha256=PHEYDFa-sN5JrI8-EiVVg5FDOsRuCLT43kyAB4xt-xA,9530
 nucliadb/writer/api/v1/learning_config.py,sha256=CKBjqcbewkfPwGUPLDWzZSpro6XkmCaVppe5Qtpu5Go,3117
-nucliadb/writer/api/v1/resource.py,sha256=jxphiyeXJq342BR1R8pRQ81L0i3Tczf_Yarqx_DqvWs,19786
+nucliadb/writer/api/v1/resource.py,sha256=IaKHwP4M4Pm3xXj_xcnQCnTzKtXj_xj-r7YOHdH-89I,19750
 nucliadb/writer/api/v1/router.py,sha256=RjuoWLpZer6Kl2BW_wznpNo6XL3BOpdTGqXZCn3QrrQ,1034
 nucliadb/writer/api/v1/services.py,sha256=3AUjk-SmvqJx76v7y89DZx6oyasojPliGYeniRQjpcU,13337
 nucliadb/writer/api/v1/slug.py,sha256=xlVBDBpRi9bNulpBHZwhyftVvulfE0zFm1XZIWl-AKY,2389
 nucliadb/writer/api/v1/transaction.py,sha256=d2Vbgnkk_-FLGSTt3vfldwiJIUf0XoyD0wP1jQNz_DY,2430
-nucliadb/writer/api/v1/upload.py,sha256=fwWXA5BuLPuGKhOcuyf0CdutWJITjJ6fAvDzV_X9VsU,33809
+nucliadb/writer/api/v1/upload.py,sha256=vdKurdxRU7vYlcQIXf5RNTuX-G0waBSak2HnNRmAbLk,33791
 nucliadb/writer/api/v1/vectorsets.py,sha256=F3iMViL5G95_Tns4aO2SOA0DwAzxK2_P8MXxtd_XLRE,6973
 nucliadb/writer/resource/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
 nucliadb/writer/resource/audit.py,sha256=FvxMZPzrNHtd31HgpZEvxzwAkbxJTZRhPLqRYYJi3tA,1426
@@ -365,8 +369,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
 nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
 nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
 nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
-nucliadb-6.4.0.post4200.dist-info/METADATA,sha256=mubKUtJdgnEEdrwTcMBVP2xDBYfxCTsqDlxFa3TQugU,4223
-nucliadb-6.4.0.post4200.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
-nucliadb-6.4.0.post4200.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
-nucliadb-6.4.0.post4200.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
-nucliadb-6.4.0.post4200.dist-info/RECORD,,
+nucliadb-6.4.0.post4204.dist-info/METADATA,sha256=fAIY46KkkEIlOfObcPGceV3ZnO74SMottRW6kUPOFnU,4223
+nucliadb-6.4.0.post4204.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
+nucliadb-6.4.0.post4204.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
+nucliadb-6.4.0.post4204.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
+nucliadb-6.4.0.post4204.dist-info/RECORD,,

nucliadb/writer/back_pressure.py DELETED Viewed

@@ -1,485 +0,0 @@
-# Copyright (C) 2021 Bosutech XXI S.L.
-#
-# nucliadb is offered under the AGPL v3.0 and as commercial software.
-# For commercial licensing, contact us at info@nuclia.com.
-#
-# AGPL:
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-#
-import asyncio
-import contextlib
-import threading
-from dataclasses import dataclass
-from datetime import datetime, timedelta
-from typing import Optional
-from cachetools import TTLCache
-from fastapi import HTTPException, Request
-from nucliadb.common import datamanagers
-from nucliadb.common.context import ApplicationContext
-from nucliadb.common.context.fastapi import get_app_context
-from nucliadb.common.http_clients.processing import ProcessingHTTPClient
-from nucliadb.writer import logger
-from nucliadb.writer.settings import back_pressure_settings as settings
-from nucliadb_protos.writer_pb2 import ShardObject
-from nucliadb_telemetry import metrics
-from nucliadb_utils import const
-from nucliadb_utils.nats import NatsConnectionManager
-from nucliadb_utils.settings import is_onprem_nucliadb
-__all__ = ["maybe_back_pressure"]
-back_pressure_observer = metrics.Observer("nucliadb_back_pressure", labels={"type": ""})
-RATE_LIMITED_REQUESTS_COUNTER = metrics.Counter(
-    "nucliadb_rate_limited_requests", labels={"type": "", "cached": ""}
-)
-@dataclass
-class BackPressureData:
-    type: str
-    try_after: datetime
-class BackPressureException(Exception):
-    def __init__(self, data: BackPressureData):
-        self.data = data
-def is_back_pressure_enabled() -> bool:
-    return settings.enabled
-class BackPressureCache:
-    """
-    Global cache for storing already computed try again in times.
-    It allows us to avoid making the same calculations multiple
-    times if back pressure has been applied.
-    """
-    def __init__(self):
-        self._cache = TTLCache(maxsize=1024, ttl=5 * 60)
-        self._lock = threading.Lock()
-    def get(self, key: str) -> Optional[BackPressureData]:
-        with self._lock:
-            data = self._cache.get(key, None)
-            if data is None:
-                return None
-            if datetime.utcnow() >= data.try_after:
-                # The key has expired, so remove it from the cache
-                self._cache.pop(key, None)
-                return None
-            return data
-    def set(self, key: str, data: BackPressureData):
-        with self._lock:
-            self._cache[key] = data
-_cache = BackPressureCache()
-@contextlib.contextmanager
-def cached_back_pressure(kbid: str, resource_uuid: Optional[str] = None):
-    """
-    Context manager that handles the caching of the try again in time so that
-    we don't recompute try again times if we have already applied back pressure.
-    """
-    cache_key = "-".join([kbid, resource_uuid or ""])
-    data: Optional[BackPressureData] = _cache.get(cache_key)
-    if data is not None:
-        try_after = data.try_after
-        back_pressure_type = data.type
-        RATE_LIMITED_REQUESTS_COUNTER.inc({"type": back_pressure_type, "cached": "true"})
-        logger.info(
-            "Back pressure applied from cache",
-            extra={
-                "type": back_pressure_type,
-                "try_after": try_after,
-                "kbid": kbid,
-                "resource_uuid": resource_uuid,
-            },
-        )
-        raise HTTPException(
-            status_code=429,
-            detail={
-                "message": f"Too many messages pending to ingest. Retry after {try_after}",
-                "try_after": try_after.timestamp(),
-                "back_pressure_type": back_pressure_type,
-            },
-        )
-    try:
-        yield
-    except BackPressureException as exc:
-        try_after = exc.data.try_after
-        back_pressure_type = exc.data.type
-        RATE_LIMITED_REQUESTS_COUNTER.inc({"type": back_pressure_type, "cached": "false"})
-        _cache.set(cache_key, exc.data)
-        raise HTTPException(
-            status_code=429,
-            detail={
-                "message": f"Too many messages pending to ingest. Retry after {try_after}",
-                "try_after": try_after.timestamp(),
-                "back_pressure_type": back_pressure_type,
-            },
-        )
-class Materializer:
-    """
-    Singleton class that will run in the background gathering the different
-    stats to apply back pressure and materializing it in memory. This allows us
-    to do stale-reads when checking if back pressure is needed for a particular
-    request - thus not slowing it down.
-    """
-    def __init__(
-        self,
-        nats_manager: NatsConnectionManager,
-        indexing_check_interval: int = 30,
-        ingest_check_interval: int = 30,
-    ):
-        self.nats_manager = nats_manager
-        self.processing_http_client = ProcessingHTTPClient()
-        self.indexing_check_interval = indexing_check_interval
-        self.ingest_check_interval = ingest_check_interval
-        self.ingest_pending: int = 0
-        self.indexing_pending: int = 0
-        self._tasks: list[asyncio.Task] = []
-        self._running = False
-        self.processing_pending_cache = TTLCache(maxsize=1024, ttl=60)  # type: ignore
-        self.processing_pending_locks: dict[str, asyncio.Lock] = {}
-    async def start(self):
-        self._tasks.append(asyncio.create_task(self._get_indexing_pending_task()))
-        self._tasks.append(asyncio.create_task(self._get_ingest_pending_task()))
-        self._running = True
-    async def stop(self):
-        for task in self._tasks:
-            task.cancel()
-        self._tasks.clear()
-        await self.processing_http_client.close()
-        self._running = False
-    @property
-    def running(self) -> bool:
-        return self._running
-    async def get_processing_pending(self, kbid: str) -> int:
-        """
-        We don't materialize the pending messages for every kbid, but values are cached for some time.
-        """
-        cached = self.processing_pending_cache.get(kbid)
-        if cached is not None:
-            return cached
-        lock = self.processing_pending_locks.setdefault(kbid, asyncio.Lock())
-        async with lock:
-            # Check again if the value has been cached while we were waiting for the lock
-            cached = self.processing_pending_cache.get(kbid)
-            if cached is not None:
-                return cached
-            # Get the pending messages and cache the result
-            try:
-                with back_pressure_observer({"type": "get_processing_pending"}):
-                    pending = await self._get_processing_pending(kbid)
-            except Exception:
-                # Do not cache if there was an error
-                logger.exception(
-                    "Error getting pending messages to process. Back pressure on proccessing for KB can't be applied.",
-                    exc_info=True,
-                    extra={"kbid": kbid},
-                )
-                return 0
-            if pending > 0:
-                logger.info(
-                    f"Processing returned {pending} pending messages for KB",
-                    extra={"kbid": kbid},
-                )
-            self.processing_pending_cache[kbid] = pending
-            return pending
-    async def _get_processing_pending(self, kbid: str) -> int:
-        response = await self.processing_http_client.stats(kbid=kbid, timeout=0.5)
-        return response.incomplete
-    def get_indexing_pending(self) -> int:
-        return self.indexing_pending
-    def get_ingest_pending(self) -> int:
-        return self.ingest_pending
-    async def _get_indexing_pending_task(self):
-        try:
-            while True:
-                try:
-                    with back_pressure_observer({"type": "get_indexing_pending"}):
-                        self.indexing_pending = await get_nats_consumer_pending_messages(
-                            self.nats_manager,
-                            stream="nidx",
-                            consumer="nidx",
-                        )
-                except Exception:
-                    logger.exception(
-                        "Error getting pending messages to index",
-                        exc_info=True,
-                    )
-                await asyncio.sleep(self.indexing_check_interval)
-        except asyncio.CancelledError:
-            pass
-    async def _get_ingest_pending_task(self):
-        try:
-            while True:
-                try:
-                    with back_pressure_observer({"type": "get_ingest_pending"}):
-                        self.ingest_pending = await get_nats_consumer_pending_messages(
-                            self.nats_manager,
-                            stream=const.Streams.INGEST_PROCESSED.name,
-                            consumer=const.Streams.INGEST_PROCESSED.group,
-                        )
-                except Exception:
-                    logger.exception(
-                        "Error getting pending messages to ingest",
-                        exc_info=True,
-                    )
-                await asyncio.sleep(self.ingest_check_interval)
-        except asyncio.CancelledError:
-            pass
-MATERIALIZER: Optional[Materializer] = None
-materializer_lock = threading.Lock()
-async def start_materializer(context: ApplicationContext):
-    global MATERIALIZER
-    if MATERIALIZER is not None:
-        logger.info("Materializer already started")
-        return
-    with materializer_lock:
-        if MATERIALIZER is not None:
-            return
-        logger.info("Initializing materializer")
-        try:
-            nats_manager = context.nats_manager
-        except AttributeError:
-            logger.warning(
-                "Could not initialize materializer. Nats manager not found or not initialized yet"
-            )
-            return
-        materializer = Materializer(
-            nats_manager,
-            indexing_check_interval=settings.indexing_check_interval,
-            ingest_check_interval=settings.ingest_check_interval,
-        )
-        await materializer.start()
-        MATERIALIZER = materializer
-async def stop_materializer():
-    global MATERIALIZER
-    if MATERIALIZER is None or not MATERIALIZER.running:
-        logger.info("Materializer already stopped")
-        return
-    with materializer_lock:
-        if MATERIALIZER is None:
-            return
-        logger.info("Stopping materializer")
-        await MATERIALIZER.stop()
-        MATERIALIZER = None
-def get_materializer() -> Materializer:
-    global MATERIALIZER
-    if MATERIALIZER is None:
-        raise RuntimeError("Materializer not initialized")
-    return MATERIALIZER
-async def maybe_back_pressure(request: Request, kbid: str, resource_uuid: Optional[str] = None) -> None:
-    """
-    This function does system checks to see if we need to put back pressure on writes.
-    In that case, a HTTP 429 will be raised with the estimated time to try again.
-    """
-    if not is_back_pressure_enabled() or is_onprem_nucliadb():
-        return
-    await back_pressure_checks(request, kbid, resource_uuid)
-async def back_pressure_checks(request: Request, kbid: str, resource_uuid: Optional[str] = None):
-    """
-    Will raise a 429 if back pressure is needed:
-    - If the processing engine is behind.
-    - If ingest processed consumer is behind.
-    - If the indexing on nodes affected by the request (kbid, and resource_uuid) is behind.
-    """
-    context = get_app_context(request.app)
-    materializer = get_materializer()
-    with cached_back_pressure(kbid, resource_uuid):
-        check_ingest_behind(materializer.get_ingest_pending())
-        await check_indexing_behind(context, kbid, resource_uuid, materializer.get_indexing_pending())
-        await check_processing_behind(materializer, kbid)
-async def check_processing_behind(materializer: Materializer, kbid: str):
-    """
-    This function checks if the processing engine is behind and may raise a 429
-    if it is further behind than the configured threshold.
-    """
-    max_pending = settings.max_processing_pending
-    if max_pending <= 0:
-        # Processing back pressure is disabled
-        return
-    kb_pending = await materializer.get_processing_pending(kbid)
-    if kb_pending > max_pending:
-        try_after = estimate_try_after(
-            rate=settings.processing_rate,
-            pending=kb_pending,
-            max_wait=settings.max_wait_time,
-        )
-        data = BackPressureData(type="processing", try_after=try_after)
-        logger.info(
-            "Processing back pressure applied",
-            extra={
-                "kbid": kbid,
-                "try_after": try_after,
-                "pending": kb_pending,
-            },
-        )
-        raise BackPressureException(data)
-async def check_indexing_behind(
-    context: ApplicationContext,
-    kbid: str,
-    resource_uuid: Optional[str],
-    pending: int,
-):
-    """
-    If a resource uuid is provided, it will check the nodes that have the replicas
-    of the resource's shard, otherwise it will check the nodes of all active shards
-    for the KnowledgeBox.
-    """
-    max_pending = settings.max_indexing_pending
-    if max_pending <= 0:
-        # Indexing back pressure is disabled
-        return
-    if pending > max_pending:
-        try_after = estimate_try_after(
-            rate=settings.indexing_rate,
-            pending=pending,
-            max_wait=settings.max_wait_time,
-        )
-        data = BackPressureData(type="indexing", try_after=try_after)
-        logger.info(
-            "Indexing back pressure applied",
-            extra={
-                "kbid": kbid,
-                "resource_uuid": resource_uuid,
-                "try_after": try_after,
-                "pending": pending,
-            },
-        )
-        raise BackPressureException(data)
-def check_ingest_behind(ingest_pending: int):
-    max_pending = settings.max_ingest_pending
-    if max_pending <= 0:
-        # Ingest back pressure is disabled
-        return
-    if ingest_pending > max_pending:
-        try_after = estimate_try_after(
-            rate=settings.ingest_rate,
-            pending=ingest_pending,
-            max_wait=settings.max_wait_time,
-        )
-        data = BackPressureData(type="ingest", try_after=try_after)
-        logger.info(
-            "Ingest back pressure applied",
-            extra={"try_after": try_after, "pending": ingest_pending},
-        )
-        raise BackPressureException(data)
-def estimate_try_after(rate: float, pending: int, max_wait: int) -> datetime:
-    """
-    This function estimates the time to try again based on the rate and the number of pending messages.
-    """
-    delta_seconds = min(pending / rate, max_wait)
-    return datetime.utcnow() + timedelta(seconds=delta_seconds)
-async def get_nats_consumer_pending_messages(
-    nats_manager: NatsConnectionManager, *, stream: str, consumer: str
-) -> int:
-    # get raw js client
-    js = nats_manager.js
-    consumer_info = await js.consumer_info(stream, consumer)
-    return consumer_info.num_pending
-async def get_kb_active_shard(context: ApplicationContext, kbid: str) -> Optional[ShardObject]:
-    async with context.kv_driver.transaction(read_only=True) as txn:
-        return await context.shard_manager.get_current_active_shard(txn, kbid)
-async def get_resource_shard(
-    context: ApplicationContext, kbid: str, resource_uuid: str
-) -> Optional[ShardObject]:
-    async with datamanagers.with_ro_transaction() as txn:
-        shard_id = await datamanagers.resources.get_resource_shard_id(txn, kbid=kbid, rid=resource_uuid)
-        if shard_id is None:
-            # Resource does not exist
-            logger.debug(
-                "Resource shard not found",
-                extra={"kbid": kbid, "resource_uuid": resource_uuid},
-            )
-            return None
-        all_shards = await datamanagers.cluster.get_kb_shards(txn, kbid=kbid)
-        if all_shards is None:
-            # KB doesn't exist or has been deleted
-            logger.debug("No shards found for KB", extra={"kbid": kbid})
-            return None
-    for shard in all_shards.shards:
-        if shard.shard == shard_id:
-            return shard
-    else:
-        logger.error(
-            "Resource shard not found",
-            extra={"kbid": kbid, "resource_uuid": resource_uuid, "shard_id": shard_id},
-        )
-        return None

{nucliadb-6.4.0.post4200.dist-info → nucliadb-6.4.0.post4204.dist-info}/WHEEL RENAMED Viewed

File without changes

nucliadb 6.4.0.post4200__py3-none-any.whl → 6.4.0.post4204__py3-none-any.whl

nucliadb 6.4.0.post4200py3-none-any.whl → 6.4.0.post4204py3-none-any.whl