nucliadb 6.4.0.post4210__py3-none-any.whl → 6.4.0.post4213__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucliadb/ingest/app.py +2 -2
- nucliadb/ingest/consumer/pull.py +23 -0
- nucliadb/ingest/consumer/service.py +36 -4
- {nucliadb-6.4.0.post4210.dist-info → nucliadb-6.4.0.post4213.dist-info}/METADATA +6 -6
- {nucliadb-6.4.0.post4210.dist-info → nucliadb-6.4.0.post4213.dist-info}/RECORD +8 -8
- {nucliadb-6.4.0.post4210.dist-info → nucliadb-6.4.0.post4213.dist-info}/WHEEL +0 -0
- {nucliadb-6.4.0.post4210.dist-info → nucliadb-6.4.0.post4213.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.4.0.post4210.dist-info → nucliadb-6.4.0.post4213.dist-info}/top_level.txt +0 -0
nucliadb/ingest/app.py
CHANGED
@@ -103,7 +103,7 @@ async def initialize_pull_workers() -> list[Callable[[], Awaitable[None]]]:
|
|
103
103
|
finalizers = await initialize_grpc()
|
104
104
|
pull_workers = await consumer_service.start_pull_workers(SERVICE_NAME)
|
105
105
|
|
106
|
-
return
|
106
|
+
return pull_workers + finalizers
|
107
107
|
|
108
108
|
|
109
109
|
async def main_consumer(): # pragma: no cover
|
@@ -117,7 +117,7 @@ async def main_consumer(): # pragma: no cover
|
|
117
117
|
ingest_consumers = await consumer_service.start_ingest_consumers(SERVICE_NAME)
|
118
118
|
|
119
119
|
await run_until_exit(
|
120
|
-
[grpc_health_finalizer,
|
120
|
+
[grpc_health_finalizer, ingest_consumers, metrics_server.shutdown] + pull_workers + finalizers
|
121
121
|
)
|
122
122
|
|
123
123
|
|
nucliadb/ingest/consumer/pull.py
CHANGED
@@ -19,11 +19,14 @@
|
|
19
19
|
#
|
20
20
|
import asyncio
|
21
21
|
import base64
|
22
|
+
from datetime import datetime, timezone
|
22
23
|
from typing import Optional
|
23
24
|
|
24
25
|
from aiohttp.client_exceptions import ClientConnectorError
|
25
26
|
|
26
27
|
from nucliadb.common import datamanagers
|
28
|
+
from nucliadb.common.back_pressure.materializer import BackPressureMaterializer
|
29
|
+
from nucliadb.common.back_pressure.utils import BackPressureException
|
27
30
|
from nucliadb.common.http_clients.processing import ProcessingHTTPClient, get_nua_api_id
|
28
31
|
from nucliadb.common.maindb.driver import Driver
|
29
32
|
from nucliadb.ingest import logger, logger_activity
|
@@ -57,6 +60,7 @@ class PullWorker:
|
|
57
60
|
local_subscriber: bool = False,
|
58
61
|
pull_time_empty_backoff: float = 5.0,
|
59
62
|
pull_api_timeout: int = 60,
|
63
|
+
back_pressure: Optional[BackPressureMaterializer] = None,
|
60
64
|
):
|
61
65
|
self.partition = partition
|
62
66
|
self.pull_time_error_backoff = pull_time_error_backoff
|
@@ -65,6 +69,7 @@ class PullWorker:
|
|
65
69
|
self.local_subscriber = local_subscriber
|
66
70
|
|
67
71
|
self.processor = Processor(driver, storage, pubsub, partition)
|
72
|
+
self.back_pressure = back_pressure
|
68
73
|
|
69
74
|
def __str__(self) -> str:
|
70
75
|
return f"PullWorker(partition={self.partition})"
|
@@ -112,11 +117,29 @@ class PullWorker:
|
|
112
117
|
transaction_check=False,
|
113
118
|
)
|
114
119
|
|
120
|
+
async def back_pressure_check(self) -> None:
|
121
|
+
if self.back_pressure is None:
|
122
|
+
return
|
123
|
+
while True:
|
124
|
+
try:
|
125
|
+
self.back_pressure.check_indexing()
|
126
|
+
self.back_pressure.check_ingest()
|
127
|
+
break
|
128
|
+
except BackPressureException as exc:
|
129
|
+
sleep_time = (datetime.now(timezone.utc) - exc.data.try_after).total_seconds()
|
130
|
+
logger.warning(f"Back pressure active! Sleeping for {sleep_time} seconds", exc_info=True)
|
131
|
+
await asyncio.sleep(sleep_time)
|
132
|
+
except Exception as e:
|
133
|
+
errors.capture_exception(e)
|
134
|
+
logger.exception("Error while checking back pressure. Moving on")
|
135
|
+
break
|
136
|
+
|
115
137
|
async def loop(self):
|
116
138
|
"""
|
117
139
|
Run this forever
|
118
140
|
"""
|
119
141
|
while True:
|
142
|
+
await self.back_pressure_check()
|
120
143
|
try:
|
121
144
|
await self._loop()
|
122
145
|
except ReallyStopPulling:
|
@@ -22,18 +22,22 @@ import sys
|
|
22
22
|
from functools import partial
|
23
23
|
from typing import Awaitable, Callable, Optional
|
24
24
|
|
25
|
+
from nucliadb.common.back_pressure.materializer import BackPressureMaterializer
|
26
|
+
from nucliadb.common.back_pressure.settings import settings as back_pressure_settings
|
27
|
+
from nucliadb.common.back_pressure.utils import is_back_pressure_enabled
|
25
28
|
from nucliadb.common.maindb.utils import setup_driver
|
26
29
|
from nucliadb.ingest import SERVICE_NAME, logger
|
27
30
|
from nucliadb.ingest.consumer.consumer import IngestConsumer, IngestProcessedConsumer
|
28
31
|
from nucliadb.ingest.consumer.pull import PullWorker
|
29
32
|
from nucliadb.ingest.settings import settings
|
30
33
|
from nucliadb_utils.exceptions import ConfigurationError
|
31
|
-
from nucliadb_utils.settings import transaction_settings
|
34
|
+
from nucliadb_utils.settings import indexing_settings, transaction_settings
|
32
35
|
from nucliadb_utils.utilities import (
|
33
36
|
get_audit,
|
34
37
|
get_nats_manager,
|
35
38
|
get_pubsub,
|
36
39
|
get_storage,
|
40
|
+
start_nats_manager,
|
37
41
|
)
|
38
42
|
|
39
43
|
from .auditing import IndexAuditHandler, ResourceWritesAuditHandler
|
@@ -54,12 +58,38 @@ async def _exit_tasks(tasks: list[asyncio.Task]) -> None:
|
|
54
58
|
await asyncio.gather(*tasks, return_exceptions=True)
|
55
59
|
|
56
60
|
|
61
|
+
async def start_back_pressure() -> BackPressureMaterializer:
|
62
|
+
nats_manager = await start_nats_manager(
|
63
|
+
SERVICE_NAME,
|
64
|
+
indexing_settings.index_jetstream_servers,
|
65
|
+
indexing_settings.index_jetstream_auth,
|
66
|
+
)
|
67
|
+
back_pressure = BackPressureMaterializer(
|
68
|
+
nats_manager,
|
69
|
+
indexing_check_interval=back_pressure_settings.indexing_check_interval,
|
70
|
+
ingest_check_interval=back_pressure_settings.ingest_check_interval,
|
71
|
+
)
|
72
|
+
await back_pressure.start()
|
73
|
+
return back_pressure
|
74
|
+
|
75
|
+
|
76
|
+
async def stop_back_pressure(materializer: BackPressureMaterializer) -> None:
|
77
|
+
await materializer.stop()
|
78
|
+
await materializer.nats_manager.finalize()
|
79
|
+
|
80
|
+
|
57
81
|
async def start_pull_workers(
|
58
82
|
service_name: Optional[str] = None,
|
59
|
-
) -> Callable[[], Awaitable[None]]:
|
83
|
+
) -> list[Callable[[], Awaitable[None]]]:
|
84
|
+
finalizers: list[Callable[[], Awaitable[None]]] = []
|
85
|
+
|
60
86
|
driver = await setup_driver()
|
61
87
|
pubsub = await get_pubsub()
|
62
88
|
storage = await get_storage(service_name=service_name or SERVICE_NAME)
|
89
|
+
back_pressure = None
|
90
|
+
if is_back_pressure_enabled():
|
91
|
+
back_pressure = await start_back_pressure()
|
92
|
+
finalizers.append(partial(stop_back_pressure, back_pressure))
|
63
93
|
tasks = []
|
64
94
|
for partition in settings.partitions:
|
65
95
|
worker = PullWorker(
|
@@ -70,12 +100,14 @@ async def start_pull_workers(
|
|
70
100
|
pubsub=pubsub,
|
71
101
|
local_subscriber=transaction_settings.transaction_local,
|
72
102
|
pull_api_timeout=settings.pull_api_timeout,
|
103
|
+
back_pressure=back_pressure,
|
73
104
|
)
|
74
105
|
task = asyncio.create_task(worker.loop())
|
75
106
|
task.add_done_callback(_handle_task_result)
|
76
107
|
tasks.append(task)
|
77
|
-
|
78
|
-
|
108
|
+
if len(tasks):
|
109
|
+
finalizers.append(partial(_exit_tasks, tasks))
|
110
|
+
return finalizers
|
79
111
|
|
80
112
|
|
81
113
|
async def start_ingest_consumers(
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: nucliadb
|
3
|
-
Version: 6.4.0.
|
3
|
+
Version: 6.4.0.post4213
|
4
4
|
Summary: NucliaDB
|
5
5
|
Author-email: Nuclia <nucliadb@nuclia.com>
|
6
6
|
License: AGPL
|
@@ -20,11 +20,11 @@ Classifier: Programming Language :: Python :: 3.12
|
|
20
20
|
Classifier: Programming Language :: Python :: 3 :: Only
|
21
21
|
Requires-Python: <4,>=3.9
|
22
22
|
Description-Content-Type: text/markdown
|
23
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.4.0.
|
24
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.4.0.
|
25
|
-
Requires-Dist: nucliadb-protos>=6.4.0.
|
26
|
-
Requires-Dist: nucliadb-models>=6.4.0.
|
27
|
-
Requires-Dist: nidx-protos>=6.4.0.
|
23
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.4.0.post4213
|
24
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.4.0.post4213
|
25
|
+
Requires-Dist: nucliadb-protos>=6.4.0.post4213
|
26
|
+
Requires-Dist: nucliadb-models>=6.4.0.post4213
|
27
|
+
Requires-Dist: nidx-protos>=6.4.0.post4213
|
28
28
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
29
29
|
Requires-Dist: nuclia-models>=0.24.2
|
30
30
|
Requires-Dist: uvicorn[standard]
|
@@ -122,7 +122,7 @@ nucliadb/export_import/models.py,sha256=dbjScNkiMRv4X3Ktudy1JRliD25bfoDTy3JmEZgQ
|
|
122
122
|
nucliadb/export_import/tasks.py,sha256=DWbdqY97ffoyfipelGXz3Jqz1iam6JCjQSh367Fc3NA,2947
|
123
123
|
nucliadb/export_import/utils.py,sha256=8XOVMYXXw8b4ikojG7RjQ4tKN3Xu7nfu2yCUOqD50sk,23216
|
124
124
|
nucliadb/ingest/__init__.py,sha256=fsw3C38VP50km3R-nHL775LNGPpJ4JxqXJ2Ib1f5SqE,1011
|
125
|
-
nucliadb/ingest/app.py,sha256=
|
125
|
+
nucliadb/ingest/app.py,sha256=BKmjpdBEskHcRIHwOnI_jG4gFGs6dV0KKVH9MLJeA48,7546
|
126
126
|
nucliadb/ingest/partitions.py,sha256=2NIhMYbNT0TNBL6bX1UMSi7vxFGICstCKEqsB0TXHOE,2410
|
127
127
|
nucliadb/ingest/processing.py,sha256=QmkHq-BU4vub7JRWe9VHvQ2DcAmT6-CzgFXuZxXhcBU,20953
|
128
128
|
nucliadb/ingest/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -134,8 +134,8 @@ nucliadb/ingest/consumer/auditing.py,sha256=xK21DIa_ZAiOJVVbnkmT4jgCRGshNGyPyxsq
|
|
134
134
|
nucliadb/ingest/consumer/consumer.py,sha256=OgS1fr5Yo55u-XbC6zypTH1aJ562Y1vZHnPDlJJpCXQ,13703
|
135
135
|
nucliadb/ingest/consumer/materializer.py,sha256=tgD_rDI2twQzcz8kKNiW_L4YIth16IGh9mUfD5wiSD4,3858
|
136
136
|
nucliadb/ingest/consumer/metrics.py,sha256=ji1l_4cKiHJthQd8YNem1ft4iMbw9KThmVvJmLcv3Xg,1075
|
137
|
-
nucliadb/ingest/consumer/pull.py,sha256=
|
138
|
-
nucliadb/ingest/consumer/service.py,sha256=
|
137
|
+
nucliadb/ingest/consumer/pull.py,sha256=vv1AyN0EhVgbgnZyT0D_1_IB4hWy7jPd4lAWPAOHGNc,10374
|
138
|
+
nucliadb/ingest/consumer/service.py,sha256=mWzMQS1QkWmJNrkIahEZsn7jb8NbY9FRvPz89NeTT-4,7842
|
139
139
|
nucliadb/ingest/consumer/shard_creator.py,sha256=w0smEu01FU_2cjZnsfBRNqT_Ntho11X17zTMST-vKbc,4359
|
140
140
|
nucliadb/ingest/consumer/utils.py,sha256=jpX8D4lKzuPCpArQLZeX_Zczq3pfen_zAf8sPJfOEZU,2642
|
141
141
|
nucliadb/ingest/fields/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
@@ -369,8 +369,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
|
|
369
369
|
nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
|
370
370
|
nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
|
371
371
|
nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
|
372
|
-
nucliadb-6.4.0.
|
373
|
-
nucliadb-6.4.0.
|
374
|
-
nucliadb-6.4.0.
|
375
|
-
nucliadb-6.4.0.
|
376
|
-
nucliadb-6.4.0.
|
372
|
+
nucliadb-6.4.0.post4213.dist-info/METADATA,sha256=PAI_c9PMh-wJWIS4SmAAltmQcXStRUi6tKINdrNKJRM,4223
|
373
|
+
nucliadb-6.4.0.post4213.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
|
374
|
+
nucliadb-6.4.0.post4213.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
|
375
|
+
nucliadb-6.4.0.post4213.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
|
376
|
+
nucliadb-6.4.0.post4213.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|