nucliadb 6.4.0.post4200__py3-none-any.whl → 6.4.0.post4204__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -32,6 +32,7 @@ from fastapi_versioning import version
32
32
  from starlette.requests import Request as StarletteRequest
33
33
 
34
34
  from nucliadb.common import datamanagers
35
+ from nucliadb.common.back_pressure import maybe_back_pressure
35
36
  from nucliadb.ingest.orm.utils import set_title
36
37
  from nucliadb.models.internal.processing import PushPayload, Source
37
38
  from nucliadb.models.responses import HTTPClientError
@@ -43,7 +44,6 @@ from nucliadb.writer.api.v1.resource import (
43
44
  validate_rid_exists_or_raise_error,
44
45
  )
45
46
  from nucliadb.writer.api.v1.slug import ensure_slug_uniqueness, noop_context_manager
46
- from nucliadb.writer.back_pressure import maybe_back_pressure
47
47
  from nucliadb.writer.resource.audit import parse_audit
48
48
  from nucliadb.writer.resource.basic import parse_basic_creation, parse_user_classifications
49
49
  from nucliadb.writer.resource.field import (
@@ -215,7 +215,7 @@ async def _tus_post(
215
215
  detail="Cannot hide a resource: the KB does not have hidden resources enabled",
216
216
  )
217
217
 
218
- await maybe_back_pressure(request, kbid, resource_uuid=path_rid)
218
+ await maybe_back_pressure(kbid, resource_uuid=path_rid)
219
219
 
220
220
  dm = get_dm()
221
221
  storage_manager = get_storage_manager()
@@ -713,7 +713,7 @@ async def _upload(
713
713
  if path_rid is not None:
714
714
  await validate_rid_exists_or_raise_error(kbid, path_rid)
715
715
 
716
- await maybe_back_pressure(request, kbid, resource_uuid=path_rid)
716
+ await maybe_back_pressure(kbid, resource_uuid=path_rid)
717
717
 
718
718
  md5_user = x_md5
719
719
  path, rid, valid_field = await validate_field_upload(kbid, path_rid, field, md5_user)
@@ -21,12 +21,12 @@ from contextlib import asynccontextmanager
21
21
 
22
22
  from fastapi import FastAPI
23
23
 
24
+ from nucliadb.common.back_pressure import start_materializer, stop_materializer
25
+ from nucliadb.common.back_pressure.settings import settings as back_pressure_settings
24
26
  from nucliadb.common.context.fastapi import inject_app_context
25
27
  from nucliadb.ingest.processing import start_processing_engine, stop_processing_engine
26
28
  from nucliadb.ingest.utils import start_ingest, stop_ingest
27
29
  from nucliadb.writer import SERVICE_NAME
28
- from nucliadb.writer.back_pressure import start_materializer, stop_materializer
29
- from nucliadb.writer.settings import back_pressure_settings
30
30
  from nucliadb.writer.tus import finalize as storage_finalize
31
31
  from nucliadb.writer.tus import initialize as storage_initialize
32
32
  from nucliadb_telemetry.utils import clean_telemetry, setup_telemetry
@@ -19,7 +19,6 @@
19
19
  #
20
20
  from typing import Optional
21
21
 
22
- from pydantic import Field
23
22
  from pydantic_settings import BaseSettings
24
23
 
25
24
 
@@ -29,54 +28,4 @@ class Settings(BaseSettings):
29
28
  dm_redis_port: Optional[int] = None
30
29
 
31
30
 
32
- class BackPressureSettings(BaseSettings):
33
- enabled: bool = Field(
34
- default=False,
35
- description="Enable or disable back pressure.",
36
- alias="back_pressure_enabled",
37
- )
38
- indexing_rate: float = Field(
39
- default=10,
40
- description="Estimation of the indexing rate in messages per second. This is used to calculate the try again in time", # noqa
41
- )
42
- ingest_rate: float = Field(
43
- default=4,
44
- description="Estimation of the ingest processed consumer rate in messages per second. This is used to calculate the try again in time", # noqa
45
- )
46
- processing_rate: float = Field(
47
- default=1,
48
- description="Estimation of the processing rate in messages per second. This is used to calculate the try again in time", # noqa
49
- )
50
- max_indexing_pending: int = Field(
51
- default=1000,
52
- description="Max number of messages pending to index in a node queue before rate limiting writes. Set to 0 to disable indexing back pressure checks", # noqa
53
- alias="back_pressure_max_indexing_pending",
54
- )
55
- max_ingest_pending: int = Field(
56
- # Disabled by default
57
- default=0,
58
- description="Max number of messages pending to be ingested by processed consumers before rate limiting writes. Set to 0 to disable ingest back pressure checks", # noqa
59
- alias="back_pressure_max_ingest_pending",
60
- )
61
- max_processing_pending: int = Field(
62
- default=1000,
63
- description="Max number of messages pending to process per Knowledge Box before rate limiting writes. Set to 0 to disable processing back pressure checks", # noqa
64
- alias="back_pressure_max_processing_pending",
65
- )
66
- indexing_check_interval: int = Field(
67
- default=30,
68
- description="Interval in seconds to check the indexing pending messages",
69
- )
70
- ingest_check_interval: int = Field(
71
- default=30,
72
- description="Interval in seconds to check the ingest pending messages",
73
- )
74
- max_wait_time: int = Field(
75
- default=60,
76
- description="Max time in seconds to wait before trying again after back pressure",
77
- )
78
-
79
-
80
31
  settings = Settings()
81
-
82
- back_pressure_settings = BackPressureSettings()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nucliadb
3
- Version: 6.4.0.post4200
3
+ Version: 6.4.0.post4204
4
4
  Summary: NucliaDB
5
5
  Author-email: Nuclia <nucliadb@nuclia.com>
6
6
  License: AGPL
@@ -20,11 +20,11 @@ Classifier: Programming Language :: Python :: 3.12
20
20
  Classifier: Programming Language :: Python :: 3 :: Only
21
21
  Requires-Python: <4,>=3.9
22
22
  Description-Content-Type: text/markdown
23
- Requires-Dist: nucliadb-telemetry[all]>=6.4.0.post4200
24
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.4.0.post4200
25
- Requires-Dist: nucliadb-protos>=6.4.0.post4200
26
- Requires-Dist: nucliadb-models>=6.4.0.post4200
27
- Requires-Dist: nidx-protos>=6.4.0.post4200
23
+ Requires-Dist: nucliadb-telemetry[all]>=6.4.0.post4204
24
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.4.0.post4204
25
+ Requires-Dist: nucliadb-protos>=6.4.0.post4204
26
+ Requires-Dist: nucliadb-models>=6.4.0.post4204
27
+ Requires-Dist: nidx-protos>=6.4.0.post4204
28
28
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
29
29
  Requires-Dist: nuclia-models>=0.24.2
30
30
  Requires-Dist: uvicorn[standard]
@@ -60,6 +60,11 @@ nucliadb/common/ids.py,sha256=4QjoIofes_vtKj2HsFWZf8VVIVWXxdkYtLpx1n618Us,8239
60
60
  nucliadb/common/locking.py,sha256=RL0CabZVPzxHZyUjYeUyLvsJTm7W3J9o4fEgsY_ufNc,5896
61
61
  nucliadb/common/nidx.py,sha256=3EeQGjM_gxK0l_Rb54fspFWVNnzUiKF-_GMxTiiDC8Q,9116
62
62
  nucliadb/common/vector_index_config.py,sha256=LqGwhrDCp1q1vBow3scd1Chhr4GLYjYnGL72FKvOYYc,1552
63
+ nucliadb/common/back_pressure/__init__.py,sha256=paAcAZcfGRTyURF9lnn3vX0vcwakTEVswG_xcdGBH-U,928
64
+ nucliadb/common/back_pressure/cache.py,sha256=ANvXglWzI5naAD6N4E_fNi17qS6KNyAhjLeh6WlZZ84,2931
65
+ nucliadb/common/back_pressure/materializer.py,sha256=YzYfN7xI5nlmSowbdLktWIkrJJb3Q2vEmoyz9O3eb2s,11667
66
+ nucliadb/common/back_pressure/settings.py,sha256=3qNOzbI0KC6LMy-wMilXRSBfZu6CCpGHod26MTgAZ2o,3082
67
+ nucliadb/common/back_pressure/utils.py,sha256=aZeP1XSkdgaRgZC76yR9Kje3511ZUCp7KB-XzcvhMYY,2018
63
68
  nucliadb/common/cluster/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
64
69
  nucliadb/common/cluster/exceptions.py,sha256=t7v_l93t44l2tQpdQXgO_w-c4YZRcaayOz1A2i0w4RQ,1258
65
70
  nucliadb/common/cluster/grpc_node_dummy.py,sha256=JkufazWzMA4KFEU8EBkMbiiDW4C8lLcRhiiCxP7aCQY,2949
@@ -328,28 +333,27 @@ nucliadb/train/generators/token_classifier.py,sha256=DdyMbrpxIVGWdTcz3SEN_3HwxKf
328
333
  nucliadb/train/generators/utils.py,sha256=ZNwvEVPZr-eP0MW3ABN7a11hPQKaa0NdVaRcgBcTp5w,3601
329
334
  nucliadb/writer/__init__.py,sha256=S298mrZL3vr62OrBqi97mdLxgR5cReMlRJgnaQHZV7s,1304
330
335
  nucliadb/writer/app.py,sha256=ABBO8-u4pDAa61b3mCdD0TFhuHAYcxMkgpZSGgWARuE,2736
331
- nucliadb/writer/back_pressure.py,sha256=4OwFGq9pvAbChB3WBZAY36lclfD-gD2ouC6YsKA4bIo,16892
332
336
  nucliadb/writer/exceptions.py,sha256=-Z7LW--eid7PNeKFuzo9kAlbLEBMUosxE-UVIgGD3SA,929
333
- nucliadb/writer/lifecycle.py,sha256=OYyhUZ1ejlybPzO-O_EsInjdifKiPiEzooy2d_2DW3k,2550
337
+ nucliadb/writer/lifecycle.py,sha256=P1b_KoNkMTeF1IbyDCh_zhexWbeYe5LH6p2iFSJPiN4,2576
334
338
  nucliadb/writer/openapi.py,sha256=thqCO1ht_RJgOkXs-aIsv8aXJrU5z8wo2n05l2_LqMs,1032
335
339
  nucliadb/writer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
336
340
  nucliadb/writer/run.py,sha256=euVZ_rtHDXs-O1kB-Pt1Id8eft9CYVpWH3zJzEoEqls,1448
337
- nucliadb/writer/settings.py,sha256=pA9aMAvY8H6zvsxAOdGY8SZLrThDvJ8KLhluGI0GxnQ,3288
341
+ nucliadb/writer/settings.py,sha256=gKtCTDF2E1m6lYL0Iv4WwY4VZuvw1Dsa-uIBZxCHTdU,1071
338
342
  nucliadb/writer/utilities.py,sha256=AZ5qEny1Xm0IDsFtH13oJa2usvJZK8f0FdgF1LrnLCw,1036
339
343
  nucliadb/writer/api/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
340
344
  nucliadb/writer/api/constants.py,sha256=qWEDjFUycrEZnSJyLnNK4PQNodU2oVmkO4NycaEZtio,1738
341
345
  nucliadb/writer/api/utils.py,sha256=wIQHlU8RQiIGVLI72suvyVIKlCU44Unh0Ae0IiN6Qwo,1313
342
346
  nucliadb/writer/api/v1/__init__.py,sha256=akI9A_jloNLb0dU4T5zjfdyvmSAiDeIdjAlzNx74FlU,1128
343
- nucliadb/writer/api/v1/export_import.py,sha256=elf-EQY5DD3mhw8kWb9tQpDcbrF9sY6VFYqxQOjuVP0,8201
344
- nucliadb/writer/api/v1/field.py,sha256=KOOBqBJzwsNczn_isxl-YFBL-bmduz3rzSDWMbAJefc,18523
347
+ nucliadb/writer/api/v1/export_import.py,sha256=v0sU55TtRSqDzwkDgcwv2uSaqKCuQTtGcMpYoHQYBQA,8192
348
+ nucliadb/writer/api/v1/field.py,sha256=OicvLF1bnkJj1ixALFLuhvFX6NCMFpORROcFcS9nKpk,18505
345
349
  nucliadb/writer/api/v1/knowledgebox.py,sha256=PHEYDFa-sN5JrI8-EiVVg5FDOsRuCLT43kyAB4xt-xA,9530
346
350
  nucliadb/writer/api/v1/learning_config.py,sha256=CKBjqcbewkfPwGUPLDWzZSpro6XkmCaVppe5Qtpu5Go,3117
347
- nucliadb/writer/api/v1/resource.py,sha256=jxphiyeXJq342BR1R8pRQ81L0i3Tczf_Yarqx_DqvWs,19786
351
+ nucliadb/writer/api/v1/resource.py,sha256=IaKHwP4M4Pm3xXj_xcnQCnTzKtXj_xj-r7YOHdH-89I,19750
348
352
  nucliadb/writer/api/v1/router.py,sha256=RjuoWLpZer6Kl2BW_wznpNo6XL3BOpdTGqXZCn3QrrQ,1034
349
353
  nucliadb/writer/api/v1/services.py,sha256=3AUjk-SmvqJx76v7y89DZx6oyasojPliGYeniRQjpcU,13337
350
354
  nucliadb/writer/api/v1/slug.py,sha256=xlVBDBpRi9bNulpBHZwhyftVvulfE0zFm1XZIWl-AKY,2389
351
355
  nucliadb/writer/api/v1/transaction.py,sha256=d2Vbgnkk_-FLGSTt3vfldwiJIUf0XoyD0wP1jQNz_DY,2430
352
- nucliadb/writer/api/v1/upload.py,sha256=fwWXA5BuLPuGKhOcuyf0CdutWJITjJ6fAvDzV_X9VsU,33809
356
+ nucliadb/writer/api/v1/upload.py,sha256=vdKurdxRU7vYlcQIXf5RNTuX-G0waBSak2HnNRmAbLk,33791
353
357
  nucliadb/writer/api/v1/vectorsets.py,sha256=F3iMViL5G95_Tns4aO2SOA0DwAzxK2_P8MXxtd_XLRE,6973
354
358
  nucliadb/writer/resource/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
355
359
  nucliadb/writer/resource/audit.py,sha256=FvxMZPzrNHtd31HgpZEvxzwAkbxJTZRhPLqRYYJi3tA,1426
@@ -365,8 +369,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
365
369
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
366
370
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
367
371
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
368
- nucliadb-6.4.0.post4200.dist-info/METADATA,sha256=mubKUtJdgnEEdrwTcMBVP2xDBYfxCTsqDlxFa3TQugU,4223
369
- nucliadb-6.4.0.post4200.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
370
- nucliadb-6.4.0.post4200.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
371
- nucliadb-6.4.0.post4200.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
372
- nucliadb-6.4.0.post4200.dist-info/RECORD,,
372
+ nucliadb-6.4.0.post4204.dist-info/METADATA,sha256=fAIY46KkkEIlOfObcPGceV3ZnO74SMottRW6kUPOFnU,4223
373
+ nucliadb-6.4.0.post4204.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
374
+ nucliadb-6.4.0.post4204.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
375
+ nucliadb-6.4.0.post4204.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
376
+ nucliadb-6.4.0.post4204.dist-info/RECORD,,
@@ -1,485 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
-
21
- import asyncio
22
- import contextlib
23
- import threading
24
- from dataclasses import dataclass
25
- from datetime import datetime, timedelta
26
- from typing import Optional
27
-
28
- from cachetools import TTLCache
29
- from fastapi import HTTPException, Request
30
-
31
- from nucliadb.common import datamanagers
32
- from nucliadb.common.context import ApplicationContext
33
- from nucliadb.common.context.fastapi import get_app_context
34
- from nucliadb.common.http_clients.processing import ProcessingHTTPClient
35
- from nucliadb.writer import logger
36
- from nucliadb.writer.settings import back_pressure_settings as settings
37
- from nucliadb_protos.writer_pb2 import ShardObject
38
- from nucliadb_telemetry import metrics
39
- from nucliadb_utils import const
40
- from nucliadb_utils.nats import NatsConnectionManager
41
- from nucliadb_utils.settings import is_onprem_nucliadb
42
-
43
- __all__ = ["maybe_back_pressure"]
44
-
45
-
46
- back_pressure_observer = metrics.Observer("nucliadb_back_pressure", labels={"type": ""})
47
-
48
-
49
- RATE_LIMITED_REQUESTS_COUNTER = metrics.Counter(
50
- "nucliadb_rate_limited_requests", labels={"type": "", "cached": ""}
51
- )
52
-
53
-
54
- @dataclass
55
- class BackPressureData:
56
- type: str
57
- try_after: datetime
58
-
59
-
60
- class BackPressureException(Exception):
61
- def __init__(self, data: BackPressureData):
62
- self.data = data
63
-
64
-
65
- def is_back_pressure_enabled() -> bool:
66
- return settings.enabled
67
-
68
-
69
- class BackPressureCache:
70
- """
71
- Global cache for storing already computed try again in times.
72
-
73
- It allows us to avoid making the same calculations multiple
74
- times if back pressure has been applied.
75
- """
76
-
77
- def __init__(self):
78
- self._cache = TTLCache(maxsize=1024, ttl=5 * 60)
79
- self._lock = threading.Lock()
80
-
81
- def get(self, key: str) -> Optional[BackPressureData]:
82
- with self._lock:
83
- data = self._cache.get(key, None)
84
- if data is None:
85
- return None
86
- if datetime.utcnow() >= data.try_after:
87
- # The key has expired, so remove it from the cache
88
- self._cache.pop(key, None)
89
- return None
90
- return data
91
-
92
- def set(self, key: str, data: BackPressureData):
93
- with self._lock:
94
- self._cache[key] = data
95
-
96
-
97
- _cache = BackPressureCache()
98
-
99
-
100
- @contextlib.contextmanager
101
- def cached_back_pressure(kbid: str, resource_uuid: Optional[str] = None):
102
- """
103
- Context manager that handles the caching of the try again in time so that
104
- we don't recompute try again times if we have already applied back pressure.
105
- """
106
-
107
- cache_key = "-".join([kbid, resource_uuid or ""])
108
-
109
- data: Optional[BackPressureData] = _cache.get(cache_key)
110
- if data is not None:
111
- try_after = data.try_after
112
- back_pressure_type = data.type
113
- RATE_LIMITED_REQUESTS_COUNTER.inc({"type": back_pressure_type, "cached": "true"})
114
- logger.info(
115
- "Back pressure applied from cache",
116
- extra={
117
- "type": back_pressure_type,
118
- "try_after": try_after,
119
- "kbid": kbid,
120
- "resource_uuid": resource_uuid,
121
- },
122
- )
123
- raise HTTPException(
124
- status_code=429,
125
- detail={
126
- "message": f"Too many messages pending to ingest. Retry after {try_after}",
127
- "try_after": try_after.timestamp(),
128
- "back_pressure_type": back_pressure_type,
129
- },
130
- )
131
- try:
132
- yield
133
- except BackPressureException as exc:
134
- try_after = exc.data.try_after
135
- back_pressure_type = exc.data.type
136
- RATE_LIMITED_REQUESTS_COUNTER.inc({"type": back_pressure_type, "cached": "false"})
137
- _cache.set(cache_key, exc.data)
138
- raise HTTPException(
139
- status_code=429,
140
- detail={
141
- "message": f"Too many messages pending to ingest. Retry after {try_after}",
142
- "try_after": try_after.timestamp(),
143
- "back_pressure_type": back_pressure_type,
144
- },
145
- )
146
-
147
-
148
- class Materializer:
149
- """
150
- Singleton class that will run in the background gathering the different
151
- stats to apply back pressure and materializing it in memory. This allows us
152
- to do stale-reads when checking if back pressure is needed for a particular
153
- request - thus not slowing it down.
154
- """
155
-
156
- def __init__(
157
- self,
158
- nats_manager: NatsConnectionManager,
159
- indexing_check_interval: int = 30,
160
- ingest_check_interval: int = 30,
161
- ):
162
- self.nats_manager = nats_manager
163
- self.processing_http_client = ProcessingHTTPClient()
164
-
165
- self.indexing_check_interval = indexing_check_interval
166
- self.ingest_check_interval = ingest_check_interval
167
-
168
- self.ingest_pending: int = 0
169
- self.indexing_pending: int = 0
170
-
171
- self._tasks: list[asyncio.Task] = []
172
- self._running = False
173
-
174
- self.processing_pending_cache = TTLCache(maxsize=1024, ttl=60) # type: ignore
175
- self.processing_pending_locks: dict[str, asyncio.Lock] = {}
176
-
177
- async def start(self):
178
- self._tasks.append(asyncio.create_task(self._get_indexing_pending_task()))
179
- self._tasks.append(asyncio.create_task(self._get_ingest_pending_task()))
180
- self._running = True
181
-
182
- async def stop(self):
183
- for task in self._tasks:
184
- task.cancel()
185
- self._tasks.clear()
186
- await self.processing_http_client.close()
187
- self._running = False
188
-
189
- @property
190
- def running(self) -> bool:
191
- return self._running
192
-
193
- async def get_processing_pending(self, kbid: str) -> int:
194
- """
195
- We don't materialize the pending messages for every kbid, but values are cached for some time.
196
- """
197
- cached = self.processing_pending_cache.get(kbid)
198
- if cached is not None:
199
- return cached
200
-
201
- lock = self.processing_pending_locks.setdefault(kbid, asyncio.Lock())
202
- async with lock:
203
- # Check again if the value has been cached while we were waiting for the lock
204
- cached = self.processing_pending_cache.get(kbid)
205
- if cached is not None:
206
- return cached
207
-
208
- # Get the pending messages and cache the result
209
- try:
210
- with back_pressure_observer({"type": "get_processing_pending"}):
211
- pending = await self._get_processing_pending(kbid)
212
- except Exception:
213
- # Do not cache if there was an error
214
- logger.exception(
215
- "Error getting pending messages to process. Back pressure on proccessing for KB can't be applied.",
216
- exc_info=True,
217
- extra={"kbid": kbid},
218
- )
219
- return 0
220
-
221
- if pending > 0:
222
- logger.info(
223
- f"Processing returned {pending} pending messages for KB",
224
- extra={"kbid": kbid},
225
- )
226
- self.processing_pending_cache[kbid] = pending
227
- return pending
228
-
229
- async def _get_processing_pending(self, kbid: str) -> int:
230
- response = await self.processing_http_client.stats(kbid=kbid, timeout=0.5)
231
- return response.incomplete
232
-
233
- def get_indexing_pending(self) -> int:
234
- return self.indexing_pending
235
-
236
- def get_ingest_pending(self) -> int:
237
- return self.ingest_pending
238
-
239
- async def _get_indexing_pending_task(self):
240
- try:
241
- while True:
242
- try:
243
- with back_pressure_observer({"type": "get_indexing_pending"}):
244
- self.indexing_pending = await get_nats_consumer_pending_messages(
245
- self.nats_manager,
246
- stream="nidx",
247
- consumer="nidx",
248
- )
249
- except Exception:
250
- logger.exception(
251
- "Error getting pending messages to index",
252
- exc_info=True,
253
- )
254
- await asyncio.sleep(self.indexing_check_interval)
255
- except asyncio.CancelledError:
256
- pass
257
-
258
- async def _get_ingest_pending_task(self):
259
- try:
260
- while True:
261
- try:
262
- with back_pressure_observer({"type": "get_ingest_pending"}):
263
- self.ingest_pending = await get_nats_consumer_pending_messages(
264
- self.nats_manager,
265
- stream=const.Streams.INGEST_PROCESSED.name,
266
- consumer=const.Streams.INGEST_PROCESSED.group,
267
- )
268
- except Exception:
269
- logger.exception(
270
- "Error getting pending messages to ingest",
271
- exc_info=True,
272
- )
273
- await asyncio.sleep(self.ingest_check_interval)
274
- except asyncio.CancelledError:
275
- pass
276
-
277
-
278
- MATERIALIZER: Optional[Materializer] = None
279
- materializer_lock = threading.Lock()
280
-
281
-
282
- async def start_materializer(context: ApplicationContext):
283
- global MATERIALIZER
284
- if MATERIALIZER is not None:
285
- logger.info("Materializer already started")
286
- return
287
- with materializer_lock:
288
- if MATERIALIZER is not None:
289
- return
290
- logger.info("Initializing materializer")
291
- try:
292
- nats_manager = context.nats_manager
293
- except AttributeError:
294
- logger.warning(
295
- "Could not initialize materializer. Nats manager not found or not initialized yet"
296
- )
297
- return
298
- materializer = Materializer(
299
- nats_manager,
300
- indexing_check_interval=settings.indexing_check_interval,
301
- ingest_check_interval=settings.ingest_check_interval,
302
- )
303
- await materializer.start()
304
- MATERIALIZER = materializer
305
-
306
-
307
- async def stop_materializer():
308
- global MATERIALIZER
309
- if MATERIALIZER is None or not MATERIALIZER.running:
310
- logger.info("Materializer already stopped")
311
- return
312
- with materializer_lock:
313
- if MATERIALIZER is None:
314
- return
315
- logger.info("Stopping materializer")
316
- await MATERIALIZER.stop()
317
- MATERIALIZER = None
318
-
319
-
320
- def get_materializer() -> Materializer:
321
- global MATERIALIZER
322
- if MATERIALIZER is None:
323
- raise RuntimeError("Materializer not initialized")
324
- return MATERIALIZER
325
-
326
-
327
- async def maybe_back_pressure(request: Request, kbid: str, resource_uuid: Optional[str] = None) -> None:
328
- """
329
- This function does system checks to see if we need to put back pressure on writes.
330
- In that case, a HTTP 429 will be raised with the estimated time to try again.
331
- """
332
- if not is_back_pressure_enabled() or is_onprem_nucliadb():
333
- return
334
- await back_pressure_checks(request, kbid, resource_uuid)
335
-
336
-
337
- async def back_pressure_checks(request: Request, kbid: str, resource_uuid: Optional[str] = None):
338
- """
339
- Will raise a 429 if back pressure is needed:
340
- - If the processing engine is behind.
341
- - If ingest processed consumer is behind.
342
- - If the indexing on nodes affected by the request (kbid, and resource_uuid) is behind.
343
- """
344
- context = get_app_context(request.app)
345
- materializer = get_materializer()
346
- with cached_back_pressure(kbid, resource_uuid):
347
- check_ingest_behind(materializer.get_ingest_pending())
348
- await check_indexing_behind(context, kbid, resource_uuid, materializer.get_indexing_pending())
349
- await check_processing_behind(materializer, kbid)
350
-
351
-
352
- async def check_processing_behind(materializer: Materializer, kbid: str):
353
- """
354
- This function checks if the processing engine is behind and may raise a 429
355
- if it is further behind than the configured threshold.
356
- """
357
- max_pending = settings.max_processing_pending
358
- if max_pending <= 0:
359
- # Processing back pressure is disabled
360
- return
361
-
362
- kb_pending = await materializer.get_processing_pending(kbid)
363
- if kb_pending > max_pending:
364
- try_after = estimate_try_after(
365
- rate=settings.processing_rate,
366
- pending=kb_pending,
367
- max_wait=settings.max_wait_time,
368
- )
369
- data = BackPressureData(type="processing", try_after=try_after)
370
- logger.info(
371
- "Processing back pressure applied",
372
- extra={
373
- "kbid": kbid,
374
- "try_after": try_after,
375
- "pending": kb_pending,
376
- },
377
- )
378
- raise BackPressureException(data)
379
-
380
-
381
- async def check_indexing_behind(
382
- context: ApplicationContext,
383
- kbid: str,
384
- resource_uuid: Optional[str],
385
- pending: int,
386
- ):
387
- """
388
- If a resource uuid is provided, it will check the nodes that have the replicas
389
- of the resource's shard, otherwise it will check the nodes of all active shards
390
- for the KnowledgeBox.
391
- """
392
- max_pending = settings.max_indexing_pending
393
- if max_pending <= 0:
394
- # Indexing back pressure is disabled
395
- return
396
-
397
- if pending > max_pending:
398
- try_after = estimate_try_after(
399
- rate=settings.indexing_rate,
400
- pending=pending,
401
- max_wait=settings.max_wait_time,
402
- )
403
- data = BackPressureData(type="indexing", try_after=try_after)
404
- logger.info(
405
- "Indexing back pressure applied",
406
- extra={
407
- "kbid": kbid,
408
- "resource_uuid": resource_uuid,
409
- "try_after": try_after,
410
- "pending": pending,
411
- },
412
- )
413
- raise BackPressureException(data)
414
-
415
-
416
- def check_ingest_behind(ingest_pending: int):
417
- max_pending = settings.max_ingest_pending
418
- if max_pending <= 0:
419
- # Ingest back pressure is disabled
420
- return
421
-
422
- if ingest_pending > max_pending:
423
- try_after = estimate_try_after(
424
- rate=settings.ingest_rate,
425
- pending=ingest_pending,
426
- max_wait=settings.max_wait_time,
427
- )
428
- data = BackPressureData(type="ingest", try_after=try_after)
429
- logger.info(
430
- "Ingest back pressure applied",
431
- extra={"try_after": try_after, "pending": ingest_pending},
432
- )
433
- raise BackPressureException(data)
434
-
435
-
436
- def estimate_try_after(rate: float, pending: int, max_wait: int) -> datetime:
437
- """
438
- This function estimates the time to try again based on the rate and the number of pending messages.
439
- """
440
- delta_seconds = min(pending / rate, max_wait)
441
- return datetime.utcnow() + timedelta(seconds=delta_seconds)
442
-
443
-
444
- async def get_nats_consumer_pending_messages(
445
- nats_manager: NatsConnectionManager, *, stream: str, consumer: str
446
- ) -> int:
447
- # get raw js client
448
- js = nats_manager.js
449
- consumer_info = await js.consumer_info(stream, consumer)
450
- return consumer_info.num_pending
451
-
452
-
453
- async def get_kb_active_shard(context: ApplicationContext, kbid: str) -> Optional[ShardObject]:
454
- async with context.kv_driver.transaction(read_only=True) as txn:
455
- return await context.shard_manager.get_current_active_shard(txn, kbid)
456
-
457
-
458
- async def get_resource_shard(
459
- context: ApplicationContext, kbid: str, resource_uuid: str
460
- ) -> Optional[ShardObject]:
461
- async with datamanagers.with_ro_transaction() as txn:
462
- shard_id = await datamanagers.resources.get_resource_shard_id(txn, kbid=kbid, rid=resource_uuid)
463
- if shard_id is None:
464
- # Resource does not exist
465
- logger.debug(
466
- "Resource shard not found",
467
- extra={"kbid": kbid, "resource_uuid": resource_uuid},
468
- )
469
- return None
470
-
471
- all_shards = await datamanagers.cluster.get_kb_shards(txn, kbid=kbid)
472
- if all_shards is None:
473
- # KB doesn't exist or has been deleted
474
- logger.debug("No shards found for KB", extra={"kbid": kbid})
475
- return None
476
-
477
- for shard in all_shards.shards:
478
- if shard.shard == shard_id:
479
- return shard
480
- else:
481
- logger.error(
482
- "Resource shard not found",
483
- extra={"kbid": kbid, "resource_uuid": resource_uuid, "shard_id": shard_id},
484
- )
485
- return None