nucliadb 6.4.0.post4210__py3-none-any.whl → 6.4.0.post4213__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nucliadb/ingest/app.py CHANGED
@@ -103,7 +103,7 @@ async def initialize_pull_workers() -> list[Callable[[], Awaitable[None]]]:
103
103
  finalizers = await initialize_grpc()
104
104
  pull_workers = await consumer_service.start_pull_workers(SERVICE_NAME)
105
105
 
106
- return [pull_workers] + finalizers
106
+ return pull_workers + finalizers
107
107
 
108
108
 
109
109
  async def main_consumer(): # pragma: no cover
@@ -117,7 +117,7 @@ async def main_consumer(): # pragma: no cover
117
117
  ingest_consumers = await consumer_service.start_ingest_consumers(SERVICE_NAME)
118
118
 
119
119
  await run_until_exit(
120
- [grpc_health_finalizer, pull_workers, ingest_consumers, metrics_server.shutdown] + finalizers
120
+ [grpc_health_finalizer, ingest_consumers, metrics_server.shutdown] + pull_workers + finalizers
121
121
  )
122
122
 
123
123
 
@@ -19,11 +19,14 @@
19
19
  #
20
20
  import asyncio
21
21
  import base64
22
+ from datetime import datetime, timezone
22
23
  from typing import Optional
23
24
 
24
25
  from aiohttp.client_exceptions import ClientConnectorError
25
26
 
26
27
  from nucliadb.common import datamanagers
28
+ from nucliadb.common.back_pressure.materializer import BackPressureMaterializer
29
+ from nucliadb.common.back_pressure.utils import BackPressureException
27
30
  from nucliadb.common.http_clients.processing import ProcessingHTTPClient, get_nua_api_id
28
31
  from nucliadb.common.maindb.driver import Driver
29
32
  from nucliadb.ingest import logger, logger_activity
@@ -57,6 +60,7 @@ class PullWorker:
57
60
  local_subscriber: bool = False,
58
61
  pull_time_empty_backoff: float = 5.0,
59
62
  pull_api_timeout: int = 60,
63
+ back_pressure: Optional[BackPressureMaterializer] = None,
60
64
  ):
61
65
  self.partition = partition
62
66
  self.pull_time_error_backoff = pull_time_error_backoff
@@ -65,6 +69,7 @@ class PullWorker:
65
69
  self.local_subscriber = local_subscriber
66
70
 
67
71
  self.processor = Processor(driver, storage, pubsub, partition)
72
+ self.back_pressure = back_pressure
68
73
 
69
74
  def __str__(self) -> str:
70
75
  return f"PullWorker(partition={self.partition})"
@@ -112,11 +117,29 @@ class PullWorker:
112
117
  transaction_check=False,
113
118
  )
114
119
 
120
+ async def back_pressure_check(self) -> None:
121
+ if self.back_pressure is None:
122
+ return
123
+ while True:
124
+ try:
125
+ self.back_pressure.check_indexing()
126
+ self.back_pressure.check_ingest()
127
+ break
128
+ except BackPressureException as exc:
129
+ sleep_time = (datetime.now(timezone.utc) - exc.data.try_after).total_seconds()
130
+ logger.warning(f"Back pressure active! Sleeping for {sleep_time} seconds", exc_info=True)
131
+ await asyncio.sleep(sleep_time)
132
+ except Exception as e:
133
+ errors.capture_exception(e)
134
+ logger.exception("Error while checking back pressure. Moving on")
135
+ break
136
+
115
137
  async def loop(self):
116
138
  """
117
139
  Run this forever
118
140
  """
119
141
  while True:
142
+ await self.back_pressure_check()
120
143
  try:
121
144
  await self._loop()
122
145
  except ReallyStopPulling:
@@ -22,18 +22,22 @@ import sys
22
22
  from functools import partial
23
23
  from typing import Awaitable, Callable, Optional
24
24
 
25
+ from nucliadb.common.back_pressure.materializer import BackPressureMaterializer
26
+ from nucliadb.common.back_pressure.settings import settings as back_pressure_settings
27
+ from nucliadb.common.back_pressure.utils import is_back_pressure_enabled
25
28
  from nucliadb.common.maindb.utils import setup_driver
26
29
  from nucliadb.ingest import SERVICE_NAME, logger
27
30
  from nucliadb.ingest.consumer.consumer import IngestConsumer, IngestProcessedConsumer
28
31
  from nucliadb.ingest.consumer.pull import PullWorker
29
32
  from nucliadb.ingest.settings import settings
30
33
  from nucliadb_utils.exceptions import ConfigurationError
31
- from nucliadb_utils.settings import transaction_settings
34
+ from nucliadb_utils.settings import indexing_settings, transaction_settings
32
35
  from nucliadb_utils.utilities import (
33
36
  get_audit,
34
37
  get_nats_manager,
35
38
  get_pubsub,
36
39
  get_storage,
40
+ start_nats_manager,
37
41
  )
38
42
 
39
43
  from .auditing import IndexAuditHandler, ResourceWritesAuditHandler
@@ -54,12 +58,38 @@ async def _exit_tasks(tasks: list[asyncio.Task]) -> None:
54
58
  await asyncio.gather(*tasks, return_exceptions=True)
55
59
 
56
60
 
61
+ async def start_back_pressure() -> BackPressureMaterializer:
62
+ nats_manager = await start_nats_manager(
63
+ SERVICE_NAME,
64
+ indexing_settings.index_jetstream_servers,
65
+ indexing_settings.index_jetstream_auth,
66
+ )
67
+ back_pressure = BackPressureMaterializer(
68
+ nats_manager,
69
+ indexing_check_interval=back_pressure_settings.indexing_check_interval,
70
+ ingest_check_interval=back_pressure_settings.ingest_check_interval,
71
+ )
72
+ await back_pressure.start()
73
+ return back_pressure
74
+
75
+
76
+ async def stop_back_pressure(materializer: BackPressureMaterializer) -> None:
77
+ await materializer.stop()
78
+ await materializer.nats_manager.finalize()
79
+
80
+
57
81
  async def start_pull_workers(
58
82
  service_name: Optional[str] = None,
59
- ) -> Callable[[], Awaitable[None]]:
83
+ ) -> list[Callable[[], Awaitable[None]]]:
84
+ finalizers: list[Callable[[], Awaitable[None]]] = []
85
+
60
86
  driver = await setup_driver()
61
87
  pubsub = await get_pubsub()
62
88
  storage = await get_storage(service_name=service_name or SERVICE_NAME)
89
+ back_pressure = None
90
+ if is_back_pressure_enabled():
91
+ back_pressure = await start_back_pressure()
92
+ finalizers.append(partial(stop_back_pressure, back_pressure))
63
93
  tasks = []
64
94
  for partition in settings.partitions:
65
95
  worker = PullWorker(
@@ -70,12 +100,14 @@ async def start_pull_workers(
70
100
  pubsub=pubsub,
71
101
  local_subscriber=transaction_settings.transaction_local,
72
102
  pull_api_timeout=settings.pull_api_timeout,
103
+ back_pressure=back_pressure,
73
104
  )
74
105
  task = asyncio.create_task(worker.loop())
75
106
  task.add_done_callback(_handle_task_result)
76
107
  tasks.append(task)
77
-
78
- return partial(_exit_tasks, tasks)
108
+ if len(tasks):
109
+ finalizers.append(partial(_exit_tasks, tasks))
110
+ return finalizers
79
111
 
80
112
 
81
113
  async def start_ingest_consumers(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nucliadb
3
- Version: 6.4.0.post4210
3
+ Version: 6.4.0.post4213
4
4
  Summary: NucliaDB
5
5
  Author-email: Nuclia <nucliadb@nuclia.com>
6
6
  License: AGPL
@@ -20,11 +20,11 @@ Classifier: Programming Language :: Python :: 3.12
20
20
  Classifier: Programming Language :: Python :: 3 :: Only
21
21
  Requires-Python: <4,>=3.9
22
22
  Description-Content-Type: text/markdown
23
- Requires-Dist: nucliadb-telemetry[all]>=6.4.0.post4210
24
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.4.0.post4210
25
- Requires-Dist: nucliadb-protos>=6.4.0.post4210
26
- Requires-Dist: nucliadb-models>=6.4.0.post4210
27
- Requires-Dist: nidx-protos>=6.4.0.post4210
23
+ Requires-Dist: nucliadb-telemetry[all]>=6.4.0.post4213
24
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.4.0.post4213
25
+ Requires-Dist: nucliadb-protos>=6.4.0.post4213
26
+ Requires-Dist: nucliadb-models>=6.4.0.post4213
27
+ Requires-Dist: nidx-protos>=6.4.0.post4213
28
28
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
29
29
  Requires-Dist: nuclia-models>=0.24.2
30
30
  Requires-Dist: uvicorn[standard]
@@ -122,7 +122,7 @@ nucliadb/export_import/models.py,sha256=dbjScNkiMRv4X3Ktudy1JRliD25bfoDTy3JmEZgQ
122
122
  nucliadb/export_import/tasks.py,sha256=DWbdqY97ffoyfipelGXz3Jqz1iam6JCjQSh367Fc3NA,2947
123
123
  nucliadb/export_import/utils.py,sha256=8XOVMYXXw8b4ikojG7RjQ4tKN3Xu7nfu2yCUOqD50sk,23216
124
124
  nucliadb/ingest/__init__.py,sha256=fsw3C38VP50km3R-nHL775LNGPpJ4JxqXJ2Ib1f5SqE,1011
125
- nucliadb/ingest/app.py,sha256=KCptzFq1Msq4eHFxvEol4TFwSLdmkG2v1EfQ3C8PhyY,7547
125
+ nucliadb/ingest/app.py,sha256=BKmjpdBEskHcRIHwOnI_jG4gFGs6dV0KKVH9MLJeA48,7546
126
126
  nucliadb/ingest/partitions.py,sha256=2NIhMYbNT0TNBL6bX1UMSi7vxFGICstCKEqsB0TXHOE,2410
127
127
  nucliadb/ingest/processing.py,sha256=QmkHq-BU4vub7JRWe9VHvQ2DcAmT6-CzgFXuZxXhcBU,20953
128
128
  nucliadb/ingest/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -134,8 +134,8 @@ nucliadb/ingest/consumer/auditing.py,sha256=xK21DIa_ZAiOJVVbnkmT4jgCRGshNGyPyxsq
134
134
  nucliadb/ingest/consumer/consumer.py,sha256=OgS1fr5Yo55u-XbC6zypTH1aJ562Y1vZHnPDlJJpCXQ,13703
135
135
  nucliadb/ingest/consumer/materializer.py,sha256=tgD_rDI2twQzcz8kKNiW_L4YIth16IGh9mUfD5wiSD4,3858
136
136
  nucliadb/ingest/consumer/metrics.py,sha256=ji1l_4cKiHJthQd8YNem1ft4iMbw9KThmVvJmLcv3Xg,1075
137
- nucliadb/ingest/consumer/pull.py,sha256=EYT0ImngMQgatStG68p2GSrPQBbJxeuq8nFm8DdAbwk,9280
138
- nucliadb/ingest/consumer/service.py,sha256=BLM_dmKZkFBsYl3sj4MZZp5M3kkxHLuO7sE18PqIatw,6538
137
+ nucliadb/ingest/consumer/pull.py,sha256=vv1AyN0EhVgbgnZyT0D_1_IB4hWy7jPd4lAWPAOHGNc,10374
138
+ nucliadb/ingest/consumer/service.py,sha256=mWzMQS1QkWmJNrkIahEZsn7jb8NbY9FRvPz89NeTT-4,7842
139
139
  nucliadb/ingest/consumer/shard_creator.py,sha256=w0smEu01FU_2cjZnsfBRNqT_Ntho11X17zTMST-vKbc,4359
140
140
  nucliadb/ingest/consumer/utils.py,sha256=jpX8D4lKzuPCpArQLZeX_Zczq3pfen_zAf8sPJfOEZU,2642
141
141
  nucliadb/ingest/fields/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
@@ -369,8 +369,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
369
369
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
370
370
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
371
371
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
372
- nucliadb-6.4.0.post4210.dist-info/METADATA,sha256=SB9gIMgWxoWNtUEexRLH85E0PL-MnroGhJ6aOambTT4,4223
373
- nucliadb-6.4.0.post4210.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
374
- nucliadb-6.4.0.post4210.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
375
- nucliadb-6.4.0.post4210.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
376
- nucliadb-6.4.0.post4210.dist-info/RECORD,,
372
+ nucliadb-6.4.0.post4213.dist-info/METADATA,sha256=PAI_c9PMh-wJWIS4SmAAltmQcXStRUi6tKINdrNKJRM,4223
373
+ nucliadb-6.4.0.post4213.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
374
+ nucliadb-6.4.0.post4213.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
375
+ nucliadb-6.4.0.post4213.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
376
+ nucliadb-6.4.0.post4213.dist-info/RECORD,,