nucliadb 6.4.1.post4337__py3-none-any.whl → 6.4.1.post4342__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -37,7 +37,6 @@ from nucliadb.common.back_pressure.utils import (
37
37
  from nucliadb.common.context import ApplicationContext
38
38
  from nucliadb.common.http_clients.processing import ProcessingHTTPClient
39
39
  from nucliadb_telemetry import metrics
40
- from nucliadb_utils import const
41
40
  from nucliadb_utils.nats import NatsConnectionManager
42
41
  from nucliadb_utils.settings import is_onprem_nucliadb
43
42
 
@@ -162,11 +161,8 @@ class BackPressureMaterializer:
162
161
  while True:
163
162
  try:
164
163
  with back_pressure_observer({"type": "get_ingest_pending"}):
165
- self.ingest_pending = await get_nats_consumer_pending_messages(
166
- self.nats_manager,
167
- stream=const.Streams.INGEST_PROCESSED.name,
168
- consumer=const.Streams.INGEST_PROCESSED.group,
169
- )
164
+ status = await self.processing_http_client.pull_status()
165
+ self.ingest_pending = status.pending
170
166
  except Exception: # pragma: no cover
171
167
  logger.exception(
172
168
  "Error getting pending messages to ingest",
@@ -36,7 +36,6 @@ from . import (
36
36
  fields,
37
37
  kb,
38
38
  labels,
39
- processing,
40
39
  resources,
41
40
  rollover,
42
41
  search_configurations,
@@ -53,7 +52,6 @@ __all__ = (
53
52
  "fields",
54
53
  "kb",
55
54
  "labels",
56
- "processing",
57
55
  "resources",
58
56
  "rollover",
59
57
  "search_configurations",
@@ -184,6 +184,10 @@ class PullResponseV2(pydantic.BaseModel):
184
184
  pending: int
185
185
 
186
186
 
187
+ class PullStatusResponse(pydantic.BaseModel):
188
+ pending: int
189
+
190
+
187
191
  JSON_HEADERS = {"Content-Type": "application/json"}
188
192
 
189
193
 
@@ -205,32 +209,6 @@ class ProcessingHTTPClient:
205
209
  async def close(self):
206
210
  await self.session.close()
207
211
 
208
- async def pull(
209
- self,
210
- partition: str,
211
- cursor: Optional[int] = None,
212
- limit: int = 3,
213
- timeout: int = 1,
214
- ) -> PullResponse:
215
- url = self.base_url + "/pull"
216
- params = {"partition": partition, "limit": limit, "timeout": timeout}
217
- if cursor is not None:
218
- params["from_cursor"] = cursor
219
-
220
- async with self.session.get(url, headers=self.headers, params=params) as resp:
221
- resp_text = await resp.text()
222
- check_status(resp, resp_text)
223
- return PullResponse.model_validate_json(resp_text)
224
-
225
- async def pull_position(self, partition: str) -> int:
226
- url = self.base_url + "/pull/position"
227
- params = {"partition": partition}
228
- async with self.session.get(url, headers=self.headers, params=params) as resp:
229
- resp_text = await resp.text()
230
- check_status(resp, resp_text)
231
- data = PullPosition.model_validate_json(resp_text)
232
- return data.cursor
233
-
234
212
  async def in_progress(self, ack_token: str):
235
213
  url = self.base_url_v2 + "/pull/in_progress"
236
214
  request = InProgressRequest(ack=[ack_token])
@@ -256,6 +234,14 @@ class ProcessingHTTPClient:
256
234
  else:
257
235
  return PullResponseV2.model_validate_json(resp_text)
258
236
 
237
+ async def pull_status(self) -> PullStatusResponse:
238
+ url = self.base_url_v2 + "/pull/status"
239
+ async with self.session.get(url, headers=self.headers) as resp:
240
+ resp_text = await resp.text()
241
+ check_status(resp, resp_text)
242
+
243
+ return PullStatusResponse.model_validate_json(resp_text)
244
+
259
245
  async def requests(
260
246
  self,
261
247
  cursor: Optional[str] = None,
@@ -40,8 +40,6 @@ from nucliadb_models.export_import import Status
40
40
  from nucliadb_protos import knowledgebox_pb2 as kb_pb2
41
41
  from nucliadb_protos import resources_pb2, writer_pb2
42
42
  from nucliadb_protos.writer_pb2_grpc import WriterStub
43
- from nucliadb_utils.const import Streams
44
- from nucliadb_utils.transaction import MaxTransactionSizeExceededError
45
43
  from nucliadb_utils.utilities import get_ingest
46
44
 
47
45
  BinaryStream = AsyncIterator[bytes]
@@ -130,37 +128,6 @@ async def process_bm_grpc(context: ApplicationContext, bm: writer_pb2.BrokerMess
130
128
  assert response.status == writer_pb2.OpStatusWriter.Status.OK, "Failed to process broker message"
131
129
 
132
130
 
133
- async def transaction_commit(
134
- context: ApplicationContext, bm: writer_pb2.BrokerMessage, partition: int
135
- ) -> None:
136
- """
137
- Try to send the broker message over nats. If it's too big, upload
138
- it to blob storage and over nats only send a reference to it.
139
- """
140
- try:
141
- await context.transaction.commit(
142
- bm,
143
- partition,
144
- wait=False,
145
- target_subject=Streams.INGEST_PROCESSED.subject,
146
- )
147
- except MaxTransactionSizeExceededError:
148
- stored_key = await context.blob_storage.set_stream_message(
149
- kbid=bm.kbid, rid=bm.uuid, data=bm.SerializeToString()
150
- )
151
- referenced_bm = writer_pb2.BrokerMessageBlobReference(
152
- uuid=bm.uuid, kbid=bm.kbid, storage_key=stored_key
153
- )
154
- await context.transaction.commit(
155
- writer=referenced_bm,
156
- partition=partition,
157
- target_subject=Streams.INGEST_PROCESSED.subject,
158
- # This header is needed as it's the way we flag the transaction
159
- # consumer to download from storage
160
- headers={"X-MESSAGE-TYPE": "PROXY"},
161
- )
162
-
163
-
164
131
  def get_writer_bm(bm: writer_pb2.BrokerMessage) -> writer_pb2.BrokerMessage:
165
132
  wbm = writer_pb2.BrokerMessage()
166
133
  wbm.CopyFrom(bm)
nucliadb/ingest/app.py CHANGED
@@ -32,7 +32,7 @@ from nucliadb.ingest.consumer import service as consumer_service
32
32
  from nucliadb.ingest.partitions import assign_partitions
33
33
  from nucliadb.ingest.processing import start_processing_engine, stop_processing_engine
34
34
  from nucliadb.ingest.service import start_grpc
35
- from nucliadb.ingest.settings import ProcessingPullMode, settings
35
+ from nucliadb.ingest.settings import settings
36
36
  from nucliadb.ingest.utils import start_ingest as start_ingest_utility
37
37
  from nucliadb.ingest.utils import stop_ingest as stop_ingest_utility
38
38
  from nucliadb_telemetry import errors
@@ -101,12 +101,7 @@ async def initialize_grpc(): # pragma: no cover
101
101
 
102
102
  async def initialize_pull_workers() -> list[Callable[[], Awaitable[None]]]:
103
103
  finalizers = await initialize_grpc()
104
- if settings.processing_pull_mode == ProcessingPullMode.V1:
105
- pull_workers = await consumer_service.start_pull_workers(SERVICE_NAME)
106
- elif settings.processing_pull_mode == ProcessingPullMode.V2:
107
- pull_workers = [await consumer_service.start_ingest_processed_consumer_v2(SERVICE_NAME)]
108
- else:
109
- raise Exception("Processing pull workers not enabled and it is required")
104
+ pull_workers = [await consumer_service.start_ingest_processed_consumer_v2(SERVICE_NAME)]
110
105
 
111
106
  return pull_workers + finalizers
112
107
 
@@ -117,17 +112,9 @@ async def main_consumer(): # pragma: no cover
117
112
 
118
113
  grpc_health_finalizer = await health.start_grpc_health_service(settings.grpc_port)
119
114
 
120
- # pull workers could be pulled out into it's own deployment
121
- if settings.processing_pull_mode == ProcessingPullMode.V1:
122
- pull_workers = await consumer_service.start_pull_workers(SERVICE_NAME)
123
- else:
124
- # In v2, pull workers run inside the ingest consumer
125
- pull_workers = []
126
115
  ingest_consumers = await consumer_service.start_ingest_consumers(SERVICE_NAME)
127
116
 
128
- await run_until_exit(
129
- [grpc_health_finalizer, ingest_consumers, metrics_server.shutdown] + pull_workers + finalizers
130
- )
117
+ await run_until_exit([grpc_health_finalizer, ingest_consumers, metrics_server.shutdown] + finalizers)
131
118
 
132
119
 
133
120
  async def main_orm_grpc(): # pragma: no cover
@@ -144,15 +131,7 @@ async def main_ingest_processed_consumer(): # pragma: no cover
144
131
  metrics_server = await serve_metrics()
145
132
  grpc_health_finalizer = await health.start_grpc_health_service(settings.grpc_port)
146
133
 
147
- if settings.processing_pull_mode == ProcessingPullMode.V1:
148
- consumer = await consumer_service.start_ingest_processed_consumer(SERVICE_NAME)
149
- elif settings.processing_pull_mode == ProcessingPullMode.V2:
150
- consumer = await consumer_service.start_ingest_processed_consumer_v2(SERVICE_NAME)
151
- else:
152
- # Off
153
- async def fake_consumer(): ...
154
-
155
- consumer = fake_consumer
134
+ consumer = await consumer_service.start_ingest_processed_consumer_v2(SERVICE_NAME)
156
135
 
157
136
  await run_until_exit(
158
137
  [grpc_health_finalizer, consumer, metrics_server.shutdown, stop_processing_engine] + finalizers
@@ -270,47 +270,3 @@ class IngestConsumer:
270
270
  await self.ack_message(msg, kbid)
271
271
  logger.info("Message acked because of success", extra={"seqid": seqid})
272
272
  await self.clean_broker_message(msg)
273
-
274
-
275
- class IngestProcessedConsumer(IngestConsumer):
276
- """
277
- Consumer designed to write processed resources to the database.
278
-
279
- This is so that we can have a single consumer for both the regular writer and writes
280
- coming from processor.
281
-
282
- This is important because writes coming from processor can be very large and slow and
283
- other writes are going to be coming from user actions and we don't want to slow them down.
284
- """
285
-
286
- async def setup_nats_subscription(self):
287
- subject = const.Streams.INGEST_PROCESSED.subject
288
- durable_name = const.Streams.INGEST_PROCESSED.group
289
- self.subscription = await self.nats_connection_manager.pull_subscribe(
290
- stream=const.Streams.INGEST_PROCESSED.name,
291
- subject=subject,
292
- durable=durable_name,
293
- cb=self.subscription_worker,
294
- subscription_lost_cb=self.setup_nats_subscription,
295
- config=nats.js.api.ConsumerConfig(
296
- durable_name=durable_name,
297
- ack_policy=nats.js.api.AckPolicy.EXPLICIT,
298
- deliver_policy=nats.js.api.DeliverPolicy.ALL,
299
- # We set it to 20 because we don't care about order here and we want to be able to HPA based
300
- # on the number of pending messages in the queue.
301
- max_ack_pending=20,
302
- max_deliver=nats_consumer_settings.nats_max_deliver,
303
- ack_wait=nats_consumer_settings.nats_ack_wait,
304
- ),
305
- )
306
- logger.info(
307
- f"Subscribed pull consumer to {subject} on stream {const.Streams.INGEST_PROCESSED.name}"
308
- )
309
-
310
- @backoff.on_exception(backoff.expo, (ConflictError,), jitter=backoff.random_jitter, max_tries=4)
311
- async def _process(self, pb: BrokerMessage, seqid: int):
312
- """
313
- We are setting `transaction_check` to False here because we can not mix
314
- transaction ids from regular ingest writes and writes coming from processor.
315
- """
316
- await self.processor.process(pb, seqid, self.partition, transaction_check=False)
@@ -21,7 +21,6 @@ import asyncio
21
21
  import base64
22
22
  import time
23
23
  from contextlib import contextmanager
24
- from datetime import datetime, timezone
25
24
  from typing import Optional
26
25
 
27
26
  from aiohttp.client_exceptions import ClientConnectorError
@@ -32,9 +31,6 @@ from opentelemetry.trace import (
32
31
  Link,
33
32
  )
34
33
 
35
- from nucliadb.common import datamanagers
36
- from nucliadb.common.back_pressure.materializer import BackPressureMaterializer
37
- from nucliadb.common.back_pressure.utils import BackPressureException
38
34
  from nucliadb.common.http_clients.processing import (
39
35
  ProcessingHTTPClient,
40
36
  ProcessingPullMessageProgressUpdater,
@@ -45,214 +41,19 @@ from nucliadb.ingest import SERVICE_NAME, logger, logger_activity
45
41
  from nucliadb.ingest.consumer.consumer import consumer_observer
46
42
  from nucliadb.ingest.orm.exceptions import ReallyStopPulling
47
43
  from nucliadb.ingest.orm.processor import Processor
48
- from nucliadb_protos.writer_pb2 import BrokerMessage, BrokerMessageBlobReference
44
+ from nucliadb_protos.writer_pb2 import BrokerMessage
49
45
  from nucliadb_telemetry import errors
50
46
  from nucliadb_telemetry.metrics import Gauge
51
47
  from nucliadb_telemetry.utils import get_telemetry
52
- from nucliadb_utils import const
53
48
  from nucliadb_utils.cache.pubsub import PubSubDriver
54
49
  from nucliadb_utils.settings import nuclia_settings
55
50
  from nucliadb_utils.storages.storage import Storage
56
51
  from nucliadb_utils.transaction import MaxTransactionSizeExceededError
57
- from nucliadb_utils.utilities import get_storage, get_transaction_utility, pull_subscriber_utilization
52
+ from nucliadb_utils.utilities import pull_subscriber_utilization
58
53
 
59
54
  processing_pending_messages = Gauge("nucliadb_processing_pending_messages")
60
55
 
61
56
 
62
- class PullWorker:
63
- """
64
- The pull worker is responsible for pulling messages from the pull processing
65
- http endpoint and injecting them into the processing write queue.
66
-
67
- The processing pull endpoint is also described as the "processing proxy" at times.
68
- """
69
-
70
- def __init__(
71
- self,
72
- driver: Driver,
73
- partition: str,
74
- storage: Storage,
75
- pull_time_error_backoff: int,
76
- pubsub: Optional[PubSubDriver] = None,
77
- local_subscriber: bool = False,
78
- pull_time_empty_backoff: float = 5.0,
79
- pull_api_timeout: int = 60,
80
- back_pressure: Optional[BackPressureMaterializer] = None,
81
- ):
82
- self.partition = partition
83
- self.pull_time_error_backoff = pull_time_error_backoff
84
- self.pull_time_empty_backoff = pull_time_empty_backoff
85
- self.pull_api_timeout = pull_api_timeout
86
- self.local_subscriber = local_subscriber
87
-
88
- self.processor = Processor(driver, storage, pubsub, partition)
89
- self.back_pressure = back_pressure
90
-
91
- def __str__(self) -> str:
92
- return f"PullWorker(partition={self.partition})"
93
-
94
- def __repr__(self) -> str:
95
- return str(self)
96
-
97
- async def handle_message(self, payload: str) -> None:
98
- pb = BrokerMessage()
99
- data = base64.b64decode(payload)
100
- pb.ParseFromString(data)
101
-
102
- logger.debug(f"Resource: {pb.uuid} KB: {pb.kbid} ProcessingID: {pb.processing_id}")
103
-
104
- if not self.local_subscriber:
105
- transaction_utility = get_transaction_utility()
106
- if transaction_utility is None:
107
- raise Exception("No transaction utility defined")
108
- try:
109
- await transaction_utility.commit(
110
- writer=pb,
111
- partition=int(self.partition),
112
- # send to separate processor
113
- target_subject=const.Streams.INGEST_PROCESSED.subject,
114
- )
115
- except MaxTransactionSizeExceededError:
116
- storage = await get_storage()
117
- stored_key = await storage.set_stream_message(kbid=pb.kbid, rid=pb.uuid, data=data)
118
- referenced_pb = BrokerMessageBlobReference(
119
- uuid=pb.uuid, kbid=pb.kbid, storage_key=stored_key
120
- )
121
- await transaction_utility.commit(
122
- writer=referenced_pb,
123
- partition=int(self.partition),
124
- # send to separate processor
125
- target_subject=const.Streams.INGEST_PROCESSED.subject,
126
- headers={"X-MESSAGE-TYPE": "PROXY"},
127
- )
128
- else:
129
- # No nats defined == monolitic nucliadb
130
- await self.processor.process(
131
- pb,
132
- 0, # Fake sequence id as in local mode there's no transactions
133
- partition=self.partition,
134
- transaction_check=False,
135
- )
136
-
137
- async def back_pressure_check(self) -> None:
138
- if self.back_pressure is None:
139
- return
140
- while True:
141
- try:
142
- self.back_pressure.check_indexing()
143
- self.back_pressure.check_ingest()
144
- break
145
- except BackPressureException as exc:
146
- sleep_time = (datetime.now(timezone.utc) - exc.data.try_after).total_seconds()
147
- logger.warning(f"Back pressure active! Sleeping for {sleep_time} seconds", exc_info=True)
148
- await asyncio.sleep(sleep_time)
149
- except Exception as e:
150
- errors.capture_exception(e)
151
- logger.exception("Error while checking back pressure. Moving on")
152
- break
153
-
154
- async def loop(self):
155
- """
156
- Run this forever
157
- """
158
- while True:
159
- await self.back_pressure_check()
160
- try:
161
- await self._loop()
162
- except ReallyStopPulling:
163
- logger.info("Exiting...")
164
- break
165
- except Exception as e:
166
- errors.capture_exception(e)
167
- logger.exception("Exception on worker", exc_info=e)
168
- await asyncio.sleep(10)
169
-
170
- async def _loop(self):
171
- headers = {}
172
- data = None
173
- if nuclia_settings.nuclia_service_account is not None:
174
- headers["X-STF-NUAKEY"] = f"Bearer {nuclia_settings.nuclia_service_account}"
175
- # parse jwt sub to get pull type id
176
- try:
177
- pull_type_id = get_nua_api_id()
178
- except Exception as exc:
179
- logger.exception("Could not read NUA API Key. Can not start pull worker")
180
- raise ReallyStopPulling() from exc
181
- else:
182
- pull_type_id = "main"
183
-
184
- async with ProcessingHTTPClient() as processing_http_client:
185
- logger.info(f"Collecting from NucliaDB Cloud {self.partition} partition")
186
- while True:
187
- try:
188
- async with datamanagers.with_ro_transaction() as txn:
189
- cursor = await datamanagers.processing.get_pull_offset(
190
- txn, pull_type_id=pull_type_id, partition=self.partition
191
- )
192
-
193
- data = await processing_http_client.pull(
194
- self.partition,
195
- cursor=cursor,
196
- timeout=self.pull_api_timeout,
197
- )
198
- if data.status == "ok":
199
- logger.info(
200
- "Message received from proxy",
201
- extra={"partition": self.partition, "cursor": data.cursor},
202
- )
203
- try:
204
- if data.payload is not None:
205
- await self.handle_message(data.payload)
206
- for payload in data.payloads:
207
- # If using cursors and multiple messages are returned, it will be in the
208
- # `payloads` property
209
- await self.handle_message(payload)
210
- except Exception as e:
211
- errors.capture_exception(e)
212
- logger.exception("Error while pulling and processing message/s")
213
- raise e
214
- async with datamanagers.with_transaction() as txn:
215
- await datamanagers.processing.set_pull_offset(
216
- txn,
217
- pull_type_id=pull_type_id,
218
- partition=self.partition,
219
- offset=data.cursor,
220
- )
221
- await txn.commit()
222
- elif data.status == "empty":
223
- logger_activity.debug(f"No messages waiting in partition #{self.partition}")
224
- await asyncio.sleep(self.pull_time_empty_backoff)
225
- else:
226
- logger.info(f"Proxy pull answered with error: {data}")
227
- await asyncio.sleep(self.pull_time_error_backoff)
228
- except (
229
- asyncio.exceptions.CancelledError,
230
- RuntimeError,
231
- KeyboardInterrupt,
232
- SystemExit,
233
- ):
234
- logger.info(f"Pull task for partition #{self.partition} was canceled, exiting")
235
- raise ReallyStopPulling()
236
-
237
- except ClientConnectorError:
238
- logger.error(
239
- f"Could not connect to processing engine, \
240
- {processing_http_client.base_url} verify your internet connection"
241
- )
242
- await asyncio.sleep(self.pull_time_error_backoff)
243
-
244
- except MaxTransactionSizeExceededError as e:
245
- if data is not None:
246
- payload_length = 0
247
- if data.payload:
248
- payload_length = len(base64.b64decode(data.payload))
249
- logger.error(f"Message too big for transaction: {payload_length}")
250
- raise e
251
- except Exception:
252
- logger.exception("Unhandled error pulling messages from processing")
253
- await asyncio.sleep(self.pull_time_error_backoff)
254
-
255
-
256
57
  @contextmanager
257
58
  def run_in_span(headers: dict[str, str]):
258
59
  # Create a span for handling this message
@@ -24,11 +24,10 @@ from typing import Awaitable, Callable, Optional
24
24
 
25
25
  from nucliadb.common.back_pressure.materializer import BackPressureMaterializer
26
26
  from nucliadb.common.back_pressure.settings import settings as back_pressure_settings
27
- from nucliadb.common.back_pressure.utils import is_back_pressure_enabled
28
27
  from nucliadb.common.maindb.utils import setup_driver
29
28
  from nucliadb.ingest import SERVICE_NAME, logger
30
- from nucliadb.ingest.consumer.consumer import IngestConsumer, IngestProcessedConsumer
31
- from nucliadb.ingest.consumer.pull import PullV2Worker, PullWorker
29
+ from nucliadb.ingest.consumer.consumer import IngestConsumer
30
+ from nucliadb.ingest.consumer.pull import PullV2Worker
32
31
  from nucliadb.ingest.settings import settings
33
32
  from nucliadb_utils.exceptions import ConfigurationError
34
33
  from nucliadb_utils.settings import indexing_settings, transaction_settings
@@ -79,38 +78,6 @@ async def stop_back_pressure(materializer: BackPressureMaterializer) -> None:
79
78
  await materializer.nats_manager.finalize()
80
79
 
81
80
 
82
- async def start_pull_workers(
83
- service_name: Optional[str] = None,
84
- ) -> list[Callable[[], Awaitable[None]]]:
85
- finalizers: list[Callable[[], Awaitable[None]]] = []
86
-
87
- driver = await setup_driver()
88
- pubsub = await get_pubsub()
89
- storage = await get_storage(service_name=service_name or SERVICE_NAME)
90
- back_pressure = None
91
- if is_back_pressure_enabled():
92
- back_pressure = await start_back_pressure()
93
- finalizers.append(partial(stop_back_pressure, back_pressure))
94
- tasks = []
95
- for partition in settings.partitions:
96
- worker = PullWorker(
97
- driver=driver,
98
- partition=partition,
99
- storage=storage,
100
- pull_time_error_backoff=settings.pull_time_error_backoff,
101
- pubsub=pubsub,
102
- local_subscriber=transaction_settings.transaction_local,
103
- pull_api_timeout=settings.pull_api_timeout,
104
- back_pressure=back_pressure,
105
- )
106
- task = asyncio.create_task(worker.loop())
107
- task.add_done_callback(_handle_task_result)
108
- tasks.append(task)
109
- if len(tasks):
110
- finalizers.append(partial(_exit_tasks, tasks))
111
- return finalizers
112
-
113
-
114
81
  async def start_ingest_consumers(
115
82
  service_name: Optional[str] = None,
116
83
  ) -> Callable[[], Awaitable[None]]:
@@ -147,36 +114,6 @@ async def start_ingest_consumers(
147
114
  return _finalize
148
115
 
149
116
 
150
- async def start_ingest_processed_consumer(
151
- service_name: Optional[str] = None,
152
- ) -> Callable[[], Awaitable[None]]:
153
- """
154
- This is not meant to be deployed with a stateful set like the other consumers.
155
-
156
- We are not maintaining transactionability based on the nats sequence id from this
157
- consumer and we will start off by not separating writes by partition AND
158
- allowing NATS to manage the queue group for us.
159
- """
160
- if transaction_settings.transaction_local:
161
- raise ConfigurationError("Can not start ingest consumers in local mode")
162
-
163
- driver = await setup_driver()
164
- pubsub = await get_pubsub()
165
- storage = await get_storage(service_name=service_name or SERVICE_NAME)
166
- nats_connection_manager = get_nats_manager()
167
-
168
- consumer = IngestProcessedConsumer(
169
- driver=driver,
170
- partition="-1",
171
- storage=storage,
172
- pubsub=pubsub,
173
- nats_connection_manager=nats_connection_manager,
174
- )
175
- await consumer.initialize()
176
-
177
- return nats_connection_manager.finalize
178
-
179
-
180
117
  async def start_ingest_processed_consumer_v2(
181
118
  service_name: Optional[str] = None,
182
119
  ) -> Callable[[], Awaitable[None]]:
@@ -92,7 +92,5 @@ class Settings(DriverSettings):
92
92
 
93
93
  max_concurrent_ingest_processing: int = 5
94
94
 
95
- processing_pull_mode: ProcessingPullMode = ProcessingPullMode.V1
96
-
97
95
 
98
96
  settings = Settings()
@@ -21,15 +21,12 @@ import logging
21
21
  import time
22
22
 
23
23
  import orjson
24
- import pydantic
25
24
  from fastapi import Request
26
25
  from fastapi.responses import JSONResponse
27
26
  from fastapi.routing import APIRouter
28
27
  from fastapi_versioning import version
29
28
  from jwcrypto import jwe, jwk # type: ignore
30
29
 
31
- from nucliadb.common import datamanagers
32
- from nucliadb.common.http_clients import processing
33
30
  from nucliadb.common.http_clients.auth import NucliaAuthHTTPClient
34
31
  from nucliadb.standalone import versions
35
32
  from nucliadb_models.resource import NucliaDBRoles
@@ -123,35 +120,3 @@ async def versions_endpoint(request: Request) -> JSONResponse:
123
120
  for package in versions.WatchedPackages
124
121
  }
125
122
  )
126
-
127
-
128
- @standalone_api_router.get("/pull/position")
129
- async def pull_status(request: Request) -> JSONResponse:
130
- async with datamanagers.with_ro_transaction() as txn:
131
- # standalone assumes 1 partition
132
- current_offset = await datamanagers.processing.get_pull_offset(
133
- txn, pull_type_id=processing.get_nua_api_id(), partition="1"
134
- )
135
-
136
- async with processing.ProcessingHTTPClient() as client:
137
- end_offset = await client.pull_position(partition="1")
138
-
139
- return JSONResponse({"current_offset": current_offset, "end_offset": end_offset})
140
-
141
-
142
- class UpdatePullPosition(pydantic.BaseModel):
143
- cursor: int
144
-
145
-
146
- @standalone_api_router.patch("/pull/position")
147
- async def update_pull_position(request: Request, item: UpdatePullPosition) -> JSONResponse:
148
- async with datamanagers.with_transaction() as txn:
149
- # standalone assumes 1 partition
150
- await datamanagers.processing.set_pull_offset(
151
- txn,
152
- pull_type_id=processing.get_nua_api_id(),
153
- partition="1",
154
- offset=item.cursor,
155
- )
156
- await txn.commit()
157
- return JSONResponse({})
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nucliadb
3
- Version: 6.4.1.post4337
3
+ Version: 6.4.1.post4342
4
4
  Summary: NucliaDB
5
5
  Author-email: Nuclia <nucliadb@nuclia.com>
6
6
  License-Expression: AGPL-3.0-or-later
@@ -19,11 +19,11 @@ Classifier: Programming Language :: Python :: 3.12
19
19
  Classifier: Programming Language :: Python :: 3 :: Only
20
20
  Requires-Python: <4,>=3.9
21
21
  Description-Content-Type: text/markdown
22
- Requires-Dist: nucliadb-telemetry[all]>=6.4.1.post4337
23
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.4.1.post4337
24
- Requires-Dist: nucliadb-protos>=6.4.1.post4337
25
- Requires-Dist: nucliadb-models>=6.4.1.post4337
26
- Requires-Dist: nidx-protos>=6.4.1.post4337
22
+ Requires-Dist: nucliadb-telemetry[all]>=6.4.1.post4342
23
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.4.1.post4342
24
+ Requires-Dist: nucliadb-protos>=6.4.1.post4342
25
+ Requires-Dist: nucliadb-models>=6.4.1.post4342
26
+ Requires-Dist: nidx-protos>=6.4.1.post4342
27
27
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
28
28
  Requires-Dist: nuclia-models>=0.24.2
29
29
  Requires-Dist: uvicorn[standard]
@@ -62,7 +62,7 @@ nucliadb/common/nidx.py,sha256=3EeQGjM_gxK0l_Rb54fspFWVNnzUiKF-_GMxTiiDC8Q,9116
62
62
  nucliadb/common/vector_index_config.py,sha256=LqGwhrDCp1q1vBow3scd1Chhr4GLYjYnGL72FKvOYYc,1552
63
63
  nucliadb/common/back_pressure/__init__.py,sha256=paAcAZcfGRTyURF9lnn3vX0vcwakTEVswG_xcdGBH-U,928
64
64
  nucliadb/common/back_pressure/cache.py,sha256=ANvXglWzI5naAD6N4E_fNi17qS6KNyAhjLeh6WlZZ84,2931
65
- nucliadb/common/back_pressure/materializer.py,sha256=YzYfN7xI5nlmSowbdLktWIkrJJb3Q2vEmoyz9O3eb2s,11667
65
+ nucliadb/common/back_pressure/materializer.py,sha256=bXUalaaTMdrltm23ezkoymcRPJl7Ha8RVTj7xdVfHgQ,11468
66
66
  nucliadb/common/back_pressure/settings.py,sha256=3qNOzbI0KC6LMy-wMilXRSBfZu6CCpGHod26MTgAZ2o,3082
67
67
  nucliadb/common/back_pressure/utils.py,sha256=aZeP1XSkdgaRgZC76yR9Kje3511ZUCp7KB-XzcvhMYY,2018
68
68
  nucliadb/common/cluster/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
@@ -77,7 +77,7 @@ nucliadb/common/cluster/standalone/__init__.py,sha256=itSI7dtTwFP55YMX4iK7JzdMHS
77
77
  nucliadb/common/cluster/standalone/utils.py,sha256=af3r-x_GF7A6dwIAhZLR-r-SZQEVxsFrDKeMfUTA6G0,1908
78
78
  nucliadb/common/context/__init__.py,sha256=IKAHuiCjbOEsqfLozWwJ6mRFzFncsZMyxNC5E_XZ5EM,6016
79
79
  nucliadb/common/context/fastapi.py,sha256=mH_8n5t7quNSPivNM2JS5EQf2sTVJsdzXW6LaY7EHAA,1629
80
- nucliadb/common/datamanagers/__init__.py,sha256=jksw4pXyXb05SG3EN-BPBrhc1u1Ge_m21PYqD7NYQEs,2118
80
+ nucliadb/common/datamanagers/__init__.py,sha256=xKc6ZMqKUs20R90jJT4xkQ8TFMNwQnhhuWnBBqVnKdM,2084
81
81
  nucliadb/common/datamanagers/atomic.py,sha256=WihdtBWQIAuElZQjh1xQ--q5dJowwlkovqsW-OB_t2k,3230
82
82
  nucliadb/common/datamanagers/cluster.py,sha256=iU0b7AESm1Yi8Wp3pIKgqixZGNMjeBrxSpvEKsaZKgY,1831
83
83
  nucliadb/common/datamanagers/entities.py,sha256=gI-0mbMlqrr9FiyhexEh6czhgYcMxE2s9m4o866EK9o,5340
@@ -85,7 +85,6 @@ nucliadb/common/datamanagers/exceptions.py,sha256=Atz_PP_GGq4jgJaWcAkcRbHBoBaGcC
85
85
  nucliadb/common/datamanagers/fields.py,sha256=9KqBzTssAT68FR5hd17Xu_CSwAYdKFuYic1ITnrfFNc,3971
86
86
  nucliadb/common/datamanagers/kb.py,sha256=P7EhF4tApIUG2jw_HH1oMufTKG9__kuOLKnrCNGbDM4,6156
87
87
  nucliadb/common/datamanagers/labels.py,sha256=Zm0GQpSPoGXEEysUY7VsDIcyKSIIQsMVphj23IyM9_c,4502
88
- nucliadb/common/datamanagers/processing.py,sha256=ByxdZzdbAfJGqC6__mY-zryjk040TyQfcUq3rxujeoY,1587
89
88
  nucliadb/common/datamanagers/resources.py,sha256=VwFdCyHSnzMU3ASYRhC-wuCjCQEjOKEF7tIob4lTcPg,10793
90
89
  nucliadb/common/datamanagers/rollover.py,sha256=GKdGv5goJVi3B3ZjawnMuQkgYeZjpCqxRYFz0VIswrE,7813
91
90
  nucliadb/common/datamanagers/search_configurations.py,sha256=O-8eW43CE46GcxO6TB5hpi27NBguv4BL4SI1vLlN8os,2463
@@ -101,7 +100,7 @@ nucliadb/common/external_index_providers/settings.py,sha256=EGHnIkwxqe6aypwKegXT
101
100
  nucliadb/common/http_clients/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
102
101
  nucliadb/common/http_clients/auth.py,sha256=srfpgAbs2wmqA9u_l-HxsV4YoO77Tse4y3gm3q2YvYM,2112
103
102
  nucliadb/common/http_clients/exceptions.py,sha256=47Y8OjkaGV_F18G07FpJhOzgWKUIexhlILyuVtICz8s,1100
104
- nucliadb/common/http_clients/processing.py,sha256=crLfKo_2RJr9Uo2vuq11MWFa9tV2njA_v7ZBd95tjNU,9589
103
+ nucliadb/common/http_clients/processing.py,sha256=mKd9vRK-Wb71UG2LCoGu47wmnN5krqA0D1Z8vitsBPE,8976
105
104
  nucliadb/common/http_clients/pypi.py,sha256=VHIUjwJEJVntVUo_FRoXIo8sLmluy7sa9-iXSITcrMY,1540
106
105
  nucliadb/common/http_clients/utils.py,sha256=yGUkHNS41abHiBoHqo_Mg3QSqGsS7rUtbfGftbEC57U,1529
107
106
  nucliadb/common/maindb/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
@@ -120,22 +119,22 @@ nucliadb/export_import/exporter.py,sha256=k2QVx1EjqFlDYiggriWiEJzwtMXzHbldsqWdpG
120
119
  nucliadb/export_import/importer.py,sha256=GNDMt4hdjbcLWdydVq8XFQKefzNJkQ1eTzhshUX64rk,4231
121
120
  nucliadb/export_import/models.py,sha256=dbjScNkiMRv4X3Ktudy1JRliD25bfoDTy3JmEZgQSCc,2121
122
121
  nucliadb/export_import/tasks.py,sha256=DWbdqY97ffoyfipelGXz3Jqz1iam6JCjQSh367Fc3NA,2947
123
- nucliadb/export_import/utils.py,sha256=iutS86YblS8aLQ9PCZUyTJMN6lDV4DjcjaptQVBfBNA,22874
122
+ nucliadb/export_import/utils.py,sha256=XV3tJJdhgnVJRSj8AxZjgeipONtB107M185HVJmHp2Q,21626
124
123
  nucliadb/ingest/__init__.py,sha256=fsw3C38VP50km3R-nHL775LNGPpJ4JxqXJ2Ib1f5SqE,1011
125
- nucliadb/ingest/app.py,sha256=Eympy8nbz09VDNPF28MuIeKMb7wgB9cTSOObS8uvL0o,8372
124
+ nucliadb/ingest/app.py,sha256=Heyd5TubnM6HOo4eQdjg-laedALu1vq96B0XJ5T5QUc,7400
126
125
  nucliadb/ingest/partitions.py,sha256=2NIhMYbNT0TNBL6bX1UMSi7vxFGICstCKEqsB0TXHOE,2410
127
126
  nucliadb/ingest/processing.py,sha256=QmkHq-BU4vub7JRWe9VHvQ2DcAmT6-CzgFXuZxXhcBU,20953
128
127
  nucliadb/ingest/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
129
128
  nucliadb/ingest/serialize.py,sha256=-TIjibJTbMqAowzRvyrG3R209vKqBZqXpdrQL9Dq4lo,16135
130
- nucliadb/ingest/settings.py,sha256=inB5SpkSI6sRd-ftlJIHFH6XlbuiSaRdL-F2WGyseUw,3249
129
+ nucliadb/ingest/settings.py,sha256=5qJICxwYb028a2iAhVbxOJB5X-hWtDLtiya-YhWostw,3179
131
130
  nucliadb/ingest/utils.py,sha256=l1myURu3r8oA11dx3GpHw-gNTUc1AFX8xdPm9Lgl2rA,2275
132
131
  nucliadb/ingest/consumer/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
133
132
  nucliadb/ingest/consumer/auditing.py,sha256=xK21DIa_ZAiOJVVbnkmT4jgCRGshNGyPyxsqhE6kROE,7204
134
- nucliadb/ingest/consumer/consumer.py,sha256=GfdlrNlnt7PWYyk75xtyzn2SHZse7475U4U9q_9jKr0,13711
133
+ nucliadb/ingest/consumer/consumer.py,sha256=1OetpJXp6glaAe4kKqUA_L46BS-ZyEccTkwt7TGf0Zw,11658
135
134
  nucliadb/ingest/consumer/materializer.py,sha256=tgD_rDI2twQzcz8kKNiW_L4YIth16IGh9mUfD5wiSD4,3858
136
135
  nucliadb/ingest/consumer/metrics.py,sha256=ji1l_4cKiHJthQd8YNem1ft4iMbw9KThmVvJmLcv3Xg,1075
137
- nucliadb/ingest/consumer/pull.py,sha256=x39G6AcNXSnw_GRPxJfafmD5pehZzMBd6v_f_yrNbUI,17594
138
- nucliadb/ingest/consumer/service.py,sha256=WXBN8dY7MlmYWxqQHIbIO7w_SdVJRY1RuHAWlQUXf8o,8852
136
+ nucliadb/ingest/consumer/pull.py,sha256=vAOu2Zum-1e4RipoHvzzIha5PoNV28_C0nciQ2UFphc,8831
137
+ nucliadb/ingest/consumer/service.py,sha256=8AD41mMN7EUeUtk4ZNy14zfvxzwmVjIX6Mwe05-bomA,6543
139
138
  nucliadb/ingest/consumer/shard_creator.py,sha256=w0smEu01FU_2cjZnsfBRNqT_Ntho11X17zTMST-vKbc,4359
140
139
  nucliadb/ingest/consumer/utils.py,sha256=jpX8D4lKzuPCpArQLZeX_Zczq3pfen_zAf8sPJfOEZU,2642
141
140
  nucliadb/ingest/fields/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
@@ -275,7 +274,7 @@ nucliadb/search/search/query_parser/parsers/graph.py,sha256=lDRJO_JvOe7yytNgXZyM
275
274
  nucliadb/search/search/query_parser/parsers/search.py,sha256=yEebeMOXJza7HMK3TdIPO6UGQbe79maSDg-GgohQIMk,10517
276
275
  nucliadb/search/search/query_parser/parsers/unit_retrieval.py,sha256=rW3YHDWLkI2Hhznl_1oOMhC01bwZMAjv-Wu3iHPIaiU,11475
277
276
  nucliadb/standalone/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
278
- nucliadb/standalone/api_router.py,sha256=hgq9FXpihzgjHkwcVGfGCSwyXy67fqXTfLFHuINzIi0,5567
277
+ nucliadb/standalone/api_router.py,sha256=zRSMlaRVHUDGTYA3zC03UV_aLLn-ch-kaeWn1tEjTXw,4338
279
278
  nucliadb/standalone/app.py,sha256=mAApNK_iVsQgJyd-mtwCeZq5csSimwnXmlQGH9a70pE,5586
280
279
  nucliadb/standalone/auth.py,sha256=UwMv-TywhMZabvVg3anQLeCRdoHDnWf2o3luvnoNBjs,7670
281
280
  nucliadb/standalone/config.py,sha256=hJ3p4dBRSsj5FOmIgAiEX9ZsAGUYd1W-_UJIol5LCCg,4967
@@ -368,8 +367,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
368
367
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
369
368
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
370
369
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
371
- nucliadb-6.4.1.post4337.dist-info/METADATA,sha256=TsjrmGAiWsREU2sPMDsUTQbvxvMIf5Y90dMtVAlgTpA,4152
372
- nucliadb-6.4.1.post4337.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
373
- nucliadb-6.4.1.post4337.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
374
- nucliadb-6.4.1.post4337.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
375
- nucliadb-6.4.1.post4337.dist-info/RECORD,,
370
+ nucliadb-6.4.1.post4342.dist-info/METADATA,sha256=kWRXbTuecuTl1JD_PVyLTq-dEd6yt7z5ps8U2o5hioM,4152
371
+ nucliadb-6.4.1.post4342.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
372
+ nucliadb-6.4.1.post4342.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
373
+ nucliadb-6.4.1.post4342.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
374
+ nucliadb-6.4.1.post4342.dist-info/RECORD,,
@@ -1,41 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import logging
21
- from typing import Optional
22
-
23
- from nucliadb.common.maindb.driver import Transaction
24
-
25
- logger = logging.getLogger(__name__)
26
-
27
-
28
- PULL_PARTITION_OFFSET = "/processing/pull-offset/{pull_type_id}/{partition}"
29
-
30
-
31
- async def get_pull_offset(txn: Transaction, *, pull_type_id: str, partition: str) -> Optional[int]:
32
- key = PULL_PARTITION_OFFSET.format(pull_type_id=pull_type_id, partition=partition)
33
- val: Optional[bytes] = await txn.get(key)
34
- if val is not None:
35
- return int(val)
36
- return None
37
-
38
-
39
- async def set_pull_offset(txn: Transaction, *, pull_type_id: str, partition: str, offset: int) -> None:
40
- key = PULL_PARTITION_OFFSET.format(pull_type_id=pull_type_id, partition=partition)
41
- await txn.set(key, str(offset).encode())