nucliadb 6.4.0.post4276__py3-none-any.whl → 6.4.0.post4283__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucliadb/common/http_clients/processing.py +80 -1
- nucliadb/ingest/app.py +22 -4
- nucliadb/ingest/consumer/consumer.py +2 -2
- nucliadb/ingest/consumer/pull.py +180 -3
- nucliadb/ingest/consumer/service.py +27 -1
- nucliadb/ingest/settings.py +9 -0
- nucliadb/tasks/consumer.py +2 -2
- {nucliadb-6.4.0.post4276.dist-info → nucliadb-6.4.0.post4283.dist-info}/METADATA +6 -6
- {nucliadb-6.4.0.post4276.dist-info → nucliadb-6.4.0.post4283.dist-info}/RECORD +12 -12
- {nucliadb-6.4.0.post4276.dist-info → nucliadb-6.4.0.post4283.dist-info}/WHEEL +0 -0
- {nucliadb-6.4.0.post4276.dist-info → nucliadb-6.4.0.post4283.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.4.0.post4276.dist-info → nucliadb-6.4.0.post4283.dist-info}/top_level.txt +0 -0
@@ -25,6 +25,7 @@ import aiohttp
|
|
25
25
|
import jwt
|
26
26
|
import pydantic
|
27
27
|
|
28
|
+
from nucliadb_utils.helpers import MessageProgressUpdater
|
28
29
|
from nucliadb_utils.settings import nuclia_settings
|
29
30
|
|
30
31
|
from .utils import check_status
|
@@ -51,6 +52,16 @@ def get_processing_api_url() -> str:
|
|
51
52
|
return nuclia_settings.nuclia_processing_cluster_url + "/api/v1/internal/processing"
|
52
53
|
|
53
54
|
|
55
|
+
def get_processing_api_v2_url() -> str:
|
56
|
+
if nuclia_settings.nuclia_service_account:
|
57
|
+
return (
|
58
|
+
nuclia_settings.nuclia_public_url.format(zone=nuclia_settings.nuclia_zone)
|
59
|
+
+ "/api/v2/processing"
|
60
|
+
)
|
61
|
+
else:
|
62
|
+
return nuclia_settings.nuclia_processing_cluster_url + "/api/v2/internal/processing"
|
63
|
+
|
64
|
+
|
54
65
|
class PullResponse(pydantic.BaseModel):
|
55
66
|
status: str
|
56
67
|
payload: Optional[str] = None
|
@@ -150,11 +161,38 @@ class StatsResponse(pydantic.BaseModel):
|
|
150
161
|
scheduled: int
|
151
162
|
|
152
163
|
|
164
|
+
class PullRequestV2(pydantic.BaseModel):
|
165
|
+
timeout: float = 5
|
166
|
+
limit: int = 1
|
167
|
+
ack: list[str] = []
|
168
|
+
|
169
|
+
|
170
|
+
class InProgressRequest(pydantic.BaseModel):
|
171
|
+
ack: list[str] = []
|
172
|
+
|
173
|
+
|
174
|
+
class PulledMessage(pydantic.BaseModel):
|
175
|
+
payload: bytes
|
176
|
+
headers: dict[str, str]
|
177
|
+
ack_token: str
|
178
|
+
seq: int
|
179
|
+
|
180
|
+
|
181
|
+
class PullResponseV2(pydantic.BaseModel):
|
182
|
+
messages: list[PulledMessage]
|
183
|
+
ttl: float
|
184
|
+
pending: int
|
185
|
+
|
186
|
+
|
187
|
+
JSON_HEADERS = {"Content-Type": "application/json"}
|
188
|
+
|
189
|
+
|
153
190
|
class ProcessingHTTPClient:
|
154
191
|
def __init__(self):
|
155
192
|
self.session = aiohttp.ClientSession()
|
156
193
|
self.base_url = get_processing_api_url()
|
157
|
-
self.
|
194
|
+
self.base_url_v2 = get_processing_api_v2_url()
|
195
|
+
self.headers: dict[str, str] = {}
|
158
196
|
if nuclia_settings.nuclia_service_account is not None:
|
159
197
|
self.headers["X-STF-NUAKEY"] = f"Bearer {nuclia_settings.nuclia_service_account}"
|
160
198
|
|
@@ -193,6 +231,31 @@ class ProcessingHTTPClient:
|
|
193
231
|
data = PullPosition.model_validate_json(resp_text)
|
194
232
|
return data.cursor
|
195
233
|
|
234
|
+
async def in_progress(self, ack_token: str):
|
235
|
+
url = self.base_url_v2 + "/pull/in_progress"
|
236
|
+
request = InProgressRequest(ack=[ack_token])
|
237
|
+
async with self.session.post(
|
238
|
+
url, headers=self.headers | JSON_HEADERS, data=request.model_dump_json()
|
239
|
+
) as resp:
|
240
|
+
resp_text = await resp.text()
|
241
|
+
check_status(resp, resp_text)
|
242
|
+
|
243
|
+
async def pull_v2(
|
244
|
+
self, ack_tokens: list[str], limit: int = 1, timeout: float = 5
|
245
|
+
) -> Optional[PullResponseV2]:
|
246
|
+
url = self.base_url_v2 + "/pull"
|
247
|
+
request = PullRequestV2(limit=limit, timeout=timeout, ack=ack_tokens)
|
248
|
+
async with self.session.post(
|
249
|
+
url, headers=self.headers | JSON_HEADERS, data=request.model_dump_json()
|
250
|
+
) as resp:
|
251
|
+
resp_text = await resp.text()
|
252
|
+
check_status(resp, resp_text)
|
253
|
+
|
254
|
+
if resp.status == 204:
|
255
|
+
return None
|
256
|
+
else:
|
257
|
+
return PullResponseV2.model_validate_json(resp_text)
|
258
|
+
|
196
259
|
async def requests(
|
197
260
|
self,
|
198
261
|
cursor: Optional[str] = None,
|
@@ -225,3 +288,19 @@ class ProcessingHTTPClient:
|
|
225
288
|
resp_text = await resp.text()
|
226
289
|
check_status(resp, resp_text)
|
227
290
|
return StatsResponse.model_validate_json(resp_text)
|
291
|
+
|
292
|
+
|
293
|
+
class ProcessingPullMessageProgressUpdater(MessageProgressUpdater):
|
294
|
+
"""
|
295
|
+
Context manager to send progress updates to NATS.
|
296
|
+
|
297
|
+
This should allow lower ack_wait time settings without causing
|
298
|
+
messages to be redelivered.
|
299
|
+
"""
|
300
|
+
|
301
|
+
def __init__(self, client: ProcessingHTTPClient, msg: PulledMessage, timeout: float):
|
302
|
+
async def update_msg() -> bool:
|
303
|
+
await client.in_progress(msg.ack_token)
|
304
|
+
return False
|
305
|
+
|
306
|
+
super().__init__(str(msg.seq), update_msg, timeout)
|
nucliadb/ingest/app.py
CHANGED
@@ -32,7 +32,7 @@ from nucliadb.ingest.consumer import service as consumer_service
|
|
32
32
|
from nucliadb.ingest.partitions import assign_partitions
|
33
33
|
from nucliadb.ingest.processing import start_processing_engine, stop_processing_engine
|
34
34
|
from nucliadb.ingest.service import start_grpc
|
35
|
-
from nucliadb.ingest.settings import settings
|
35
|
+
from nucliadb.ingest.settings import ProcessingPullMode, settings
|
36
36
|
from nucliadb.ingest.utils import start_ingest as start_ingest_utility
|
37
37
|
from nucliadb.ingest.utils import stop_ingest as stop_ingest_utility
|
38
38
|
from nucliadb_telemetry import errors
|
@@ -101,7 +101,12 @@ async def initialize_grpc(): # pragma: no cover
|
|
101
101
|
|
102
102
|
async def initialize_pull_workers() -> list[Callable[[], Awaitable[None]]]:
|
103
103
|
finalizers = await initialize_grpc()
|
104
|
-
|
104
|
+
if settings.processing_pull_mode == ProcessingPullMode.V1:
|
105
|
+
pull_workers = await consumer_service.start_pull_workers(SERVICE_NAME)
|
106
|
+
elif settings.processing_pull_mode == ProcessingPullMode.V2:
|
107
|
+
pull_workers = [await consumer_service.start_ingest_processed_consumer_v2(SERVICE_NAME)]
|
108
|
+
else:
|
109
|
+
raise Exception("Processing pull workers not enabled and it is required")
|
105
110
|
|
106
111
|
return pull_workers + finalizers
|
107
112
|
|
@@ -113,7 +118,11 @@ async def main_consumer(): # pragma: no cover
|
|
113
118
|
grpc_health_finalizer = await health.start_grpc_health_service(settings.grpc_port)
|
114
119
|
|
115
120
|
# pull workers could be pulled out into it's own deployment
|
116
|
-
|
121
|
+
if settings.processing_pull_mode == ProcessingPullMode.V1:
|
122
|
+
pull_workers = await consumer_service.start_pull_workers(SERVICE_NAME)
|
123
|
+
else:
|
124
|
+
# In v2, pull workers run inside the ingest consumer
|
125
|
+
pull_workers = []
|
117
126
|
ingest_consumers = await consumer_service.start_ingest_consumers(SERVICE_NAME)
|
118
127
|
|
119
128
|
await run_until_exit(
|
@@ -134,7 +143,16 @@ async def main_ingest_processed_consumer(): # pragma: no cover
|
|
134
143
|
await start_processing_engine()
|
135
144
|
metrics_server = await serve_metrics()
|
136
145
|
grpc_health_finalizer = await health.start_grpc_health_service(settings.grpc_port)
|
137
|
-
|
146
|
+
|
147
|
+
if settings.processing_pull_mode == ProcessingPullMode.V1:
|
148
|
+
consumer = await consumer_service.start_ingest_processed_consumer(SERVICE_NAME)
|
149
|
+
elif settings.processing_pull_mode == ProcessingPullMode.V2:
|
150
|
+
consumer = await consumer_service.start_ingest_processed_consumer_v2(SERVICE_NAME)
|
151
|
+
else:
|
152
|
+
# Off
|
153
|
+
async def fake_consumer(): ...
|
154
|
+
|
155
|
+
consumer = fake_consumer
|
138
156
|
|
139
157
|
await run_until_exit(
|
140
158
|
[grpc_health_finalizer, consumer, metrics_server.shutdown, stop_processing_engine] + finalizers
|
@@ -39,7 +39,7 @@ from nucliadb_protos.writer_pb2 import BrokerMessage, BrokerMessageBlobReference
|
|
39
39
|
from nucliadb_telemetry import context, errors, metrics
|
40
40
|
from nucliadb_utils import const
|
41
41
|
from nucliadb_utils.cache.pubsub import PubSubDriver
|
42
|
-
from nucliadb_utils.nats import
|
42
|
+
from nucliadb_utils.nats import NatsConnectionManager, NatsMessageProgressUpdater
|
43
43
|
from nucliadb_utils.settings import nats_consumer_settings
|
44
44
|
from nucliadb_utils.storages.storage import Storage
|
45
45
|
|
@@ -181,7 +181,7 @@ class IngestConsumer:
|
|
181
181
|
start = time.monotonic()
|
182
182
|
|
183
183
|
async with (
|
184
|
-
|
184
|
+
NatsMessageProgressUpdater(msg, nats_consumer_settings.nats_ack_wait * 0.66),
|
185
185
|
self.lock,
|
186
186
|
):
|
187
187
|
try:
|
nucliadb/ingest/consumer/pull.py
CHANGED
@@ -19,27 +19,44 @@
|
|
19
19
|
#
|
20
20
|
import asyncio
|
21
21
|
import base64
|
22
|
+
import time
|
23
|
+
from contextlib import contextmanager
|
22
24
|
from datetime import datetime, timezone
|
23
25
|
from typing import Optional
|
24
26
|
|
25
27
|
from aiohttp.client_exceptions import ClientConnectorError
|
28
|
+
from opentelemetry import trace
|
29
|
+
from opentelemetry.context import Context
|
30
|
+
from opentelemetry.propagate import extract
|
31
|
+
from opentelemetry.trace import (
|
32
|
+
Link,
|
33
|
+
)
|
26
34
|
|
27
35
|
from nucliadb.common import datamanagers
|
28
36
|
from nucliadb.common.back_pressure.materializer import BackPressureMaterializer
|
29
37
|
from nucliadb.common.back_pressure.utils import BackPressureException
|
30
|
-
from nucliadb.common.http_clients.processing import
|
38
|
+
from nucliadb.common.http_clients.processing import (
|
39
|
+
ProcessingHTTPClient,
|
40
|
+
ProcessingPullMessageProgressUpdater,
|
41
|
+
get_nua_api_id,
|
42
|
+
)
|
31
43
|
from nucliadb.common.maindb.driver import Driver
|
32
|
-
from nucliadb.ingest import logger, logger_activity
|
44
|
+
from nucliadb.ingest import SERVICE_NAME, logger, logger_activity
|
45
|
+
from nucliadb.ingest.consumer.consumer import consumer_observer
|
33
46
|
from nucliadb.ingest.orm.exceptions import ReallyStopPulling
|
34
47
|
from nucliadb.ingest.orm.processor import Processor
|
35
48
|
from nucliadb_protos.writer_pb2 import BrokerMessage, BrokerMessageBlobReference
|
36
49
|
from nucliadb_telemetry import errors
|
50
|
+
from nucliadb_telemetry.metrics import Gauge
|
51
|
+
from nucliadb_telemetry.utils import get_telemetry
|
37
52
|
from nucliadb_utils import const
|
38
53
|
from nucliadb_utils.cache.pubsub import PubSubDriver
|
39
54
|
from nucliadb_utils.settings import nuclia_settings
|
40
55
|
from nucliadb_utils.storages.storage import Storage
|
41
56
|
from nucliadb_utils.transaction import MaxTransactionSizeExceededError
|
42
|
-
from nucliadb_utils.utilities import get_storage, get_transaction_utility
|
57
|
+
from nucliadb_utils.utilities import get_storage, get_transaction_utility, pull_subscriber_utilization
|
58
|
+
|
59
|
+
processing_pending_messages = Gauge("nucliadb_processing_pending_messages")
|
43
60
|
|
44
61
|
|
45
62
|
class PullWorker:
|
@@ -234,3 +251,163 @@ class PullWorker:
|
|
234
251
|
except Exception:
|
235
252
|
logger.exception("Unhandled error pulling messages from processing")
|
236
253
|
await asyncio.sleep(self.pull_time_error_backoff)
|
254
|
+
|
255
|
+
|
256
|
+
@contextmanager
|
257
|
+
def run_in_span(headers: dict[str, str]):
|
258
|
+
# Create a span for handling this message
|
259
|
+
tracer_provider = get_telemetry(SERVICE_NAME)
|
260
|
+
if tracer_provider is None:
|
261
|
+
yield
|
262
|
+
return
|
263
|
+
|
264
|
+
tracer = tracer_provider.get_tracer(__name__)
|
265
|
+
our_span = tracer.start_span("handle_processing_pull")
|
266
|
+
|
267
|
+
# Try to retrieve processing context to link to it
|
268
|
+
witness = Context()
|
269
|
+
processor_context = extract(headers, context=witness)
|
270
|
+
if processor_context != witness:
|
271
|
+
# We successfully extracted a context, we link from the processor span to ours for ease of navigation
|
272
|
+
with tracer.start_as_current_span(
|
273
|
+
f"Pulled from proxy", links=[Link(our_span.get_span_context())], context=processor_context
|
274
|
+
):
|
275
|
+
# And link from our span back to the processor span
|
276
|
+
our_span.add_link(trace.get_current_span().get_span_context())
|
277
|
+
|
278
|
+
# Go back to our context
|
279
|
+
trace.set_span_in_context(our_span)
|
280
|
+
with trace.use_span(our_span, end_on_exit=True):
|
281
|
+
yield
|
282
|
+
|
283
|
+
|
284
|
+
class PullV2Worker:
|
285
|
+
"""
|
286
|
+
The pull worker is responsible for pulling messages from the pull processing
|
287
|
+
http endpoint and processing them
|
288
|
+
|
289
|
+
The processing pull endpoint is also described as the "processing proxy" at times.
|
290
|
+
"""
|
291
|
+
|
292
|
+
def __init__(
|
293
|
+
self,
|
294
|
+
driver: Driver,
|
295
|
+
storage: Storage,
|
296
|
+
pull_time_error_backoff: int,
|
297
|
+
pubsub: Optional[PubSubDriver] = None,
|
298
|
+
pull_time_empty_backoff: float = 5.0,
|
299
|
+
pull_api_timeout: int = 60,
|
300
|
+
):
|
301
|
+
self.pull_time_error_backoff = pull_time_error_backoff
|
302
|
+
self.pull_time_empty_backoff = pull_time_empty_backoff
|
303
|
+
self.pull_api_timeout = pull_api_timeout
|
304
|
+
|
305
|
+
self.processor = Processor(driver, storage, pubsub, "-1")
|
306
|
+
|
307
|
+
async def handle_message(self, seq: int, payload: bytes) -> None:
|
308
|
+
pb = BrokerMessage()
|
309
|
+
data = base64.b64decode(payload)
|
310
|
+
pb.ParseFromString(data)
|
311
|
+
|
312
|
+
logger.debug(f"Resource: {pb.uuid} KB: {pb.kbid} ProcessingID: {pb.processing_id}")
|
313
|
+
|
314
|
+
source = "writer" if pb.source == pb.MessageSource.WRITER else "processor"
|
315
|
+
with consumer_observer({"source": source, "partition": "-1"}):
|
316
|
+
await self.processor.process(
|
317
|
+
pb,
|
318
|
+
seq,
|
319
|
+
transaction_check=False,
|
320
|
+
)
|
321
|
+
|
322
|
+
async def loop(self):
|
323
|
+
"""
|
324
|
+
Run this forever
|
325
|
+
"""
|
326
|
+
while True:
|
327
|
+
try:
|
328
|
+
await self._loop()
|
329
|
+
except ReallyStopPulling:
|
330
|
+
logger.info("Exiting...")
|
331
|
+
break
|
332
|
+
except Exception as e:
|
333
|
+
errors.capture_exception(e)
|
334
|
+
logger.exception("Exception on worker", exc_info=e)
|
335
|
+
await asyncio.sleep(10)
|
336
|
+
|
337
|
+
async def _loop(self):
|
338
|
+
usage_metric = pull_subscriber_utilization
|
339
|
+
headers = {}
|
340
|
+
data = None
|
341
|
+
if nuclia_settings.nuclia_service_account is not None:
|
342
|
+
headers["X-STF-NUAKEY"] = f"Bearer {nuclia_settings.nuclia_service_account}"
|
343
|
+
# parse jwt sub to get pull type id
|
344
|
+
try:
|
345
|
+
get_nua_api_id()
|
346
|
+
except Exception as exc:
|
347
|
+
logger.exception("Could not read NUA API Key. Can not start pull worker")
|
348
|
+
raise ReallyStopPulling() from exc
|
349
|
+
|
350
|
+
ack_tokens = []
|
351
|
+
async with ProcessingHTTPClient() as processing_http_client:
|
352
|
+
while True:
|
353
|
+
try:
|
354
|
+
start_time = time.monotonic()
|
355
|
+
|
356
|
+
# The code is only really prepared to pull 1 message at a time. If changing this, review MessageProgressUpdate usage
|
357
|
+
pull = await processing_http_client.pull_v2(ack_tokens=ack_tokens, limit=1)
|
358
|
+
ack_tokens.clear()
|
359
|
+
if pull is None:
|
360
|
+
processing_pending_messages.set(0)
|
361
|
+
logger_activity.debug(f"No messages waiting in processing pull")
|
362
|
+
await asyncio.sleep(self.pull_time_empty_backoff)
|
363
|
+
usage_metric.inc({"status": "waiting"}, time.monotonic() - start_time)
|
364
|
+
continue
|
365
|
+
|
366
|
+
received_time = time.monotonic()
|
367
|
+
usage_metric.inc({"status": "waiting"}, received_time - start_time)
|
368
|
+
processing_pending_messages.set(pull.pending)
|
369
|
+
|
370
|
+
logger.info("Message received from proxy", extra={"seq": [pull.messages[0].seq]})
|
371
|
+
try:
|
372
|
+
for message in pull.messages:
|
373
|
+
async with ProcessingPullMessageProgressUpdater(
|
374
|
+
processing_http_client, message, pull.ttl * 0.66
|
375
|
+
):
|
376
|
+
with run_in_span(message.headers):
|
377
|
+
await self.handle_message(message.seq, message.payload)
|
378
|
+
ack_tokens.append(message.ack_token)
|
379
|
+
|
380
|
+
usage_metric.inc({"status": "processing"}, time.monotonic() - received_time)
|
381
|
+
except Exception as e:
|
382
|
+
errors.capture_exception(e)
|
383
|
+
logger.exception("Error while pulling and processing message/s")
|
384
|
+
raise e
|
385
|
+
|
386
|
+
except (
|
387
|
+
asyncio.exceptions.CancelledError,
|
388
|
+
RuntimeError,
|
389
|
+
KeyboardInterrupt,
|
390
|
+
SystemExit,
|
391
|
+
):
|
392
|
+
if ack_tokens:
|
393
|
+
await processing_http_client.pull_v2(ack_tokens=ack_tokens, limit=0)
|
394
|
+
logger.info(f"Pull task was canceled, exiting")
|
395
|
+
raise ReallyStopPulling()
|
396
|
+
|
397
|
+
except ClientConnectorError:
|
398
|
+
logger.error(
|
399
|
+
f"Could not connect to processing engine, \
|
400
|
+
{processing_http_client.base_url} verify your internet connection"
|
401
|
+
)
|
402
|
+
await asyncio.sleep(self.pull_time_error_backoff)
|
403
|
+
|
404
|
+
except MaxTransactionSizeExceededError as e:
|
405
|
+
if data is not None:
|
406
|
+
payload_length = 0
|
407
|
+
if data.payload:
|
408
|
+
payload_length = len(base64.b64decode(data.payload))
|
409
|
+
logger.error(f"Message too big for transaction: {payload_length}")
|
410
|
+
raise e
|
411
|
+
except Exception:
|
412
|
+
logger.exception("Unhandled error pulling messages from processing")
|
413
|
+
await asyncio.sleep(self.pull_time_error_backoff)
|
@@ -28,7 +28,7 @@ from nucliadb.common.back_pressure.utils import is_back_pressure_enabled
|
|
28
28
|
from nucliadb.common.maindb.utils import setup_driver
|
29
29
|
from nucliadb.ingest import SERVICE_NAME, logger
|
30
30
|
from nucliadb.ingest.consumer.consumer import IngestConsumer, IngestProcessedConsumer
|
31
|
-
from nucliadb.ingest.consumer.pull import PullWorker
|
31
|
+
from nucliadb.ingest.consumer.pull import PullV2Worker, PullWorker
|
32
32
|
from nucliadb.ingest.settings import settings
|
33
33
|
from nucliadb_utils.exceptions import ConfigurationError
|
34
34
|
from nucliadb_utils.settings import indexing_settings, transaction_settings
|
@@ -177,6 +177,32 @@ async def start_ingest_processed_consumer(
|
|
177
177
|
return nats_connection_manager.finalize
|
178
178
|
|
179
179
|
|
180
|
+
async def start_ingest_processed_consumer_v2(
|
181
|
+
service_name: Optional[str] = None,
|
182
|
+
) -> Callable[[], Awaitable[None]]:
|
183
|
+
"""
|
184
|
+
This is not meant to be deployed with a stateful set like the other consumers.
|
185
|
+
|
186
|
+
We are not maintaining transactionability based on the nats sequence id from this
|
187
|
+
consumer and we will start off by not separating writes by partition AND
|
188
|
+
allowing NATS to manage the queue group for us.
|
189
|
+
"""
|
190
|
+
driver = await setup_driver()
|
191
|
+
pubsub = await get_pubsub()
|
192
|
+
storage = await get_storage(service_name=service_name or SERVICE_NAME)
|
193
|
+
|
194
|
+
consumer = PullV2Worker(
|
195
|
+
driver=driver,
|
196
|
+
storage=storage,
|
197
|
+
pubsub=pubsub,
|
198
|
+
pull_time_error_backoff=settings.pull_time_error_backoff,
|
199
|
+
pull_api_timeout=settings.pull_api_timeout,
|
200
|
+
)
|
201
|
+
task = asyncio.create_task(consumer.loop())
|
202
|
+
task.add_done_callback(_handle_task_result)
|
203
|
+
return partial(_exit_tasks, [task])
|
204
|
+
|
205
|
+
|
180
206
|
async def start_auditor() -> Callable[[], Awaitable[None]]:
|
181
207
|
audit = get_audit()
|
182
208
|
assert audit is not None
|
nucliadb/ingest/settings.py
CHANGED
@@ -63,6 +63,13 @@ class DriverSettings(BaseSettings):
|
|
63
63
|
)
|
64
64
|
|
65
65
|
|
66
|
+
# For use during migration from pull v1 to pull v2
|
67
|
+
class ProcessingPullMode(Enum):
|
68
|
+
OFF = "off"
|
69
|
+
V1 = "v1"
|
70
|
+
V2 = "v2"
|
71
|
+
|
72
|
+
|
66
73
|
class Settings(DriverSettings):
|
67
74
|
grpc_port: int = 8030
|
68
75
|
|
@@ -85,5 +92,7 @@ class Settings(DriverSettings):
|
|
85
92
|
|
86
93
|
max_concurrent_ingest_processing: int = 5
|
87
94
|
|
95
|
+
processing_pull_mode: ProcessingPullMode = ProcessingPullMode.V1
|
96
|
+
|
88
97
|
|
89
98
|
settings = Settings()
|
nucliadb/tasks/consumer.py
CHANGED
@@ -30,7 +30,7 @@ from nucliadb.tasks.logger import logger
|
|
30
30
|
from nucliadb.tasks.models import Callback, MsgType
|
31
31
|
from nucliadb.tasks.utils import NatsConsumer, NatsStream, create_nats_stream_if_not_exists
|
32
32
|
from nucliadb_telemetry import errors
|
33
|
-
from nucliadb_utils.nats import
|
33
|
+
from nucliadb_utils.nats import NatsMessageProgressUpdater
|
34
34
|
from nucliadb_utils.settings import nats_consumer_settings
|
35
35
|
|
36
36
|
BEFORE_NAK_SLEEP_SECONDS = 2
|
@@ -124,7 +124,7 @@ class NatsTaskConsumer(Generic[MsgType]):
|
|
124
124
|
f"Message received: subject:{subject}, seqid: {seqid}, reply: {reply}",
|
125
125
|
extra={"consumer_name": self.name},
|
126
126
|
)
|
127
|
-
async with
|
127
|
+
async with NatsMessageProgressUpdater(msg, nats_consumer_settings.nats_ack_wait * 0.66):
|
128
128
|
try:
|
129
129
|
task_msg = self.msg_type.model_validate_json(msg.data)
|
130
130
|
except pydantic.ValidationError as e:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: nucliadb
|
3
|
-
Version: 6.4.0.
|
3
|
+
Version: 6.4.0.post4283
|
4
4
|
Summary: NucliaDB
|
5
5
|
Author-email: Nuclia <nucliadb@nuclia.com>
|
6
6
|
License: AGPL
|
@@ -20,11 +20,11 @@ Classifier: Programming Language :: Python :: 3.12
|
|
20
20
|
Classifier: Programming Language :: Python :: 3 :: Only
|
21
21
|
Requires-Python: <4,>=3.9
|
22
22
|
Description-Content-Type: text/markdown
|
23
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.4.0.
|
24
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.4.0.
|
25
|
-
Requires-Dist: nucliadb-protos>=6.4.0.
|
26
|
-
Requires-Dist: nucliadb-models>=6.4.0.
|
27
|
-
Requires-Dist: nidx-protos>=6.4.0.
|
23
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.4.0.post4283
|
24
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.4.0.post4283
|
25
|
+
Requires-Dist: nucliadb-protos>=6.4.0.post4283
|
26
|
+
Requires-Dist: nucliadb-models>=6.4.0.post4283
|
27
|
+
Requires-Dist: nidx-protos>=6.4.0.post4283
|
28
28
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
29
29
|
Requires-Dist: nuclia-models>=0.24.2
|
30
30
|
Requires-Dist: uvicorn[standard]
|
@@ -101,7 +101,7 @@ nucliadb/common/external_index_providers/settings.py,sha256=EGHnIkwxqe6aypwKegXT
|
|
101
101
|
nucliadb/common/http_clients/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
102
102
|
nucliadb/common/http_clients/auth.py,sha256=srfpgAbs2wmqA9u_l-HxsV4YoO77Tse4y3gm3q2YvYM,2112
|
103
103
|
nucliadb/common/http_clients/exceptions.py,sha256=47Y8OjkaGV_F18G07FpJhOzgWKUIexhlILyuVtICz8s,1100
|
104
|
-
nucliadb/common/http_clients/processing.py,sha256=
|
104
|
+
nucliadb/common/http_clients/processing.py,sha256=VzxzFArNsHWGmFoX0c5OrQB3vFW841aeyuP5NgzPQGo,9581
|
105
105
|
nucliadb/common/http_clients/pypi.py,sha256=VHIUjwJEJVntVUo_FRoXIo8sLmluy7sa9-iXSITcrMY,1540
|
106
106
|
nucliadb/common/http_clients/utils.py,sha256=yGUkHNS41abHiBoHqo_Mg3QSqGsS7rUtbfGftbEC57U,1529
|
107
107
|
nucliadb/common/maindb/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
@@ -122,20 +122,20 @@ nucliadb/export_import/models.py,sha256=dbjScNkiMRv4X3Ktudy1JRliD25bfoDTy3JmEZgQ
|
|
122
122
|
nucliadb/export_import/tasks.py,sha256=DWbdqY97ffoyfipelGXz3Jqz1iam6JCjQSh367Fc3NA,2947
|
123
123
|
nucliadb/export_import/utils.py,sha256=8XOVMYXXw8b4ikojG7RjQ4tKN3Xu7nfu2yCUOqD50sk,23216
|
124
124
|
nucliadb/ingest/__init__.py,sha256=fsw3C38VP50km3R-nHL775LNGPpJ4JxqXJ2Ib1f5SqE,1011
|
125
|
-
nucliadb/ingest/app.py,sha256=
|
125
|
+
nucliadb/ingest/app.py,sha256=Eympy8nbz09VDNPF28MuIeKMb7wgB9cTSOObS8uvL0o,8372
|
126
126
|
nucliadb/ingest/partitions.py,sha256=2NIhMYbNT0TNBL6bX1UMSi7vxFGICstCKEqsB0TXHOE,2410
|
127
127
|
nucliadb/ingest/processing.py,sha256=QmkHq-BU4vub7JRWe9VHvQ2DcAmT6-CzgFXuZxXhcBU,20953
|
128
128
|
nucliadb/ingest/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
129
129
|
nucliadb/ingest/serialize.py,sha256=-TIjibJTbMqAowzRvyrG3R209vKqBZqXpdrQL9Dq4lo,16135
|
130
|
-
nucliadb/ingest/settings.py,sha256=
|
130
|
+
nucliadb/ingest/settings.py,sha256=inB5SpkSI6sRd-ftlJIHFH6XlbuiSaRdL-F2WGyseUw,3249
|
131
131
|
nucliadb/ingest/utils.py,sha256=l1myURu3r8oA11dx3GpHw-gNTUc1AFX8xdPm9Lgl2rA,2275
|
132
132
|
nucliadb/ingest/consumer/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
133
133
|
nucliadb/ingest/consumer/auditing.py,sha256=xK21DIa_ZAiOJVVbnkmT4jgCRGshNGyPyxsqhE6kROE,7204
|
134
|
-
nucliadb/ingest/consumer/consumer.py,sha256=
|
134
|
+
nucliadb/ingest/consumer/consumer.py,sha256=GfdlrNlnt7PWYyk75xtyzn2SHZse7475U4U9q_9jKr0,13711
|
135
135
|
nucliadb/ingest/consumer/materializer.py,sha256=tgD_rDI2twQzcz8kKNiW_L4YIth16IGh9mUfD5wiSD4,3858
|
136
136
|
nucliadb/ingest/consumer/metrics.py,sha256=ji1l_4cKiHJthQd8YNem1ft4iMbw9KThmVvJmLcv3Xg,1075
|
137
|
-
nucliadb/ingest/consumer/pull.py,sha256=
|
138
|
-
nucliadb/ingest/consumer/service.py,sha256=
|
137
|
+
nucliadb/ingest/consumer/pull.py,sha256=x39G6AcNXSnw_GRPxJfafmD5pehZzMBd6v_f_yrNbUI,17594
|
138
|
+
nucliadb/ingest/consumer/service.py,sha256=WXBN8dY7MlmYWxqQHIbIO7w_SdVJRY1RuHAWlQUXf8o,8852
|
139
139
|
nucliadb/ingest/consumer/shard_creator.py,sha256=w0smEu01FU_2cjZnsfBRNqT_Ntho11X17zTMST-vKbc,4359
|
140
140
|
nucliadb/ingest/consumer/utils.py,sha256=jpX8D4lKzuPCpArQLZeX_Zczq3pfen_zAf8sPJfOEZU,2642
|
141
141
|
nucliadb/ingest/fields/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
@@ -290,7 +290,7 @@ nucliadb/standalone/static/favicon.ico,sha256=96pKGp6Sx457JkTfjy1dtApMhkitixfU6i
|
|
290
290
|
nucliadb/standalone/static/index.html,sha256=PEZfuEQFYnYACAL1ceN8xC0im8lBrUx838RkE8tbvgA,3833
|
291
291
|
nucliadb/standalone/static/logo.svg,sha256=-wQqSvPGTdlKjUP6pHE6kiq005pgYjDzp9nPl0X71Mk,2639
|
292
292
|
nucliadb/tasks/__init__.py,sha256=oFJ3A8HD7w11mBu-IixYE_KxA7juMGlYQb7YD_y6WPM,975
|
293
|
-
nucliadb/tasks/consumer.py,sha256=
|
293
|
+
nucliadb/tasks/consumer.py,sha256=E7_9bY5o7BVlioWX9yO9yimDJaKeuj-P-tiNCJcaRz8,6964
|
294
294
|
nucliadb/tasks/logger.py,sha256=C7keOEO_mjLVp5VbqAZ2QXfqVB2Hot7NgBlUP_SDSMw,924
|
295
295
|
nucliadb/tasks/models.py,sha256=qrZKi5DNDQ07waMsp5L4_Fi7WRs57YiO-kmXlrBzEAA,1168
|
296
296
|
nucliadb/tasks/producer.py,sha256=UnpJAzhj_GElsCoO5G6T4m6MshsgOaqR2tVzJmEta64,2625
|
@@ -368,8 +368,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
|
|
368
368
|
nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
|
369
369
|
nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
|
370
370
|
nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
|
371
|
-
nucliadb-6.4.0.
|
372
|
-
nucliadb-6.4.0.
|
373
|
-
nucliadb-6.4.0.
|
374
|
-
nucliadb-6.4.0.
|
375
|
-
nucliadb-6.4.0.
|
371
|
+
nucliadb-6.4.0.post4283.dist-info/METADATA,sha256=qNXC_Jl6XRiEVh5KfaC09owsizR7ozLE3OJ2FG_Ygsg,4223
|
372
|
+
nucliadb-6.4.0.post4283.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
|
373
|
+
nucliadb-6.4.0.post4283.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
|
374
|
+
nucliadb-6.4.0.post4283.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
|
375
|
+
nucliadb-6.4.0.post4283.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|