PyPI - nucliadb - Versions diffs - 6.4.1.post4337__py3-none-any.whl → 6.4.1.post4342__py3-none-any.whl - Mend

nucliadb 6.4.1.post4337py3-none-any.whl → 6.4.1.post4342py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

nucliadb/common/back_pressure/materializer.py CHANGED Viewed

@@ -37,7 +37,6 @@ from nucliadb.common.back_pressure.utils import (
 from nucliadb.common.context import ApplicationContext
 from nucliadb.common.http_clients.processing import ProcessingHTTPClient
 from nucliadb_telemetry import metrics
-from nucliadb_utils import const
 from nucliadb_utils.nats import NatsConnectionManager
 from nucliadb_utils.settings import is_onprem_nucliadb
@@ -162,11 +161,8 @@ class BackPressureMaterializer:
             while True:
                 try:
                     with back_pressure_observer({"type": "get_ingest_pending"}):
-                        self.ingest_pending = await get_nats_consumer_pending_messages(
-                            self.nats_manager,
-                            stream=const.Streams.INGEST_PROCESSED.name,
-                            consumer=const.Streams.INGEST_PROCESSED.group,
-                        )
+                        status = await self.processing_http_client.pull_status()
+                        self.ingest_pending = status.pending
                 except Exception:  # pragma: no cover
                     logger.exception(
                         "Error getting pending messages to ingest",

nucliadb/common/datamanagers/__init__.py CHANGED Viewed

@@ -36,7 +36,6 @@ from . import (
     fields,
     kb,
     labels,
-    processing,
     resources,
     rollover,
     search_configurations,
@@ -53,7 +52,6 @@ __all__ = (
     "fields",
     "kb",
     "labels",
-    "processing",
     "resources",
     "rollover",
     "search_configurations",

nucliadb/common/http_clients/processing.py CHANGED Viewed

@@ -184,6 +184,10 @@ class PullResponseV2(pydantic.BaseModel):
     pending: int
+class PullStatusResponse(pydantic.BaseModel):
+    pending: int
 JSON_HEADERS = {"Content-Type": "application/json"}
@@ -205,32 +209,6 @@ class ProcessingHTTPClient:
     async def close(self):
         await self.session.close()
-    async def pull(
-        self,
-        partition: str,
-        cursor: Optional[int] = None,
-        limit: int = 3,
-        timeout: int = 1,
-    ) -> PullResponse:
-        url = self.base_url + "/pull"
-        params = {"partition": partition, "limit": limit, "timeout": timeout}
-        if cursor is not None:
-            params["from_cursor"] = cursor
-        async with self.session.get(url, headers=self.headers, params=params) as resp:
-            resp_text = await resp.text()
-            check_status(resp, resp_text)
-            return PullResponse.model_validate_json(resp_text)
-    async def pull_position(self, partition: str) -> int:
-        url = self.base_url + "/pull/position"
-        params = {"partition": partition}
-        async with self.session.get(url, headers=self.headers, params=params) as resp:
-            resp_text = await resp.text()
-            check_status(resp, resp_text)
-            data = PullPosition.model_validate_json(resp_text)
-            return data.cursor
     async def in_progress(self, ack_token: str):
         url = self.base_url_v2 + "/pull/in_progress"
         request = InProgressRequest(ack=[ack_token])
@@ -256,6 +234,14 @@ class ProcessingHTTPClient:
             else:
                 return PullResponseV2.model_validate_json(resp_text)
+    async def pull_status(self) -> PullStatusResponse:
+        url = self.base_url_v2 + "/pull/status"
+        async with self.session.get(url, headers=self.headers) as resp:
+            resp_text = await resp.text()
+            check_status(resp, resp_text)
+            return PullStatusResponse.model_validate_json(resp_text)
     async def requests(
         self,
         cursor: Optional[str] = None,

nucliadb/export_import/utils.py CHANGED Viewed

@@ -40,8 +40,6 @@ from nucliadb_models.export_import import Status
 from nucliadb_protos import knowledgebox_pb2 as kb_pb2
 from nucliadb_protos import resources_pb2, writer_pb2
 from nucliadb_protos.writer_pb2_grpc import WriterStub
-from nucliadb_utils.const import Streams
-from nucliadb_utils.transaction import MaxTransactionSizeExceededError
 from nucliadb_utils.utilities import get_ingest
 BinaryStream = AsyncIterator[bytes]
@@ -130,37 +128,6 @@ async def process_bm_grpc(context: ApplicationContext, bm: writer_pb2.BrokerMess
     assert response.status == writer_pb2.OpStatusWriter.Status.OK, "Failed to process broker message"
-async def transaction_commit(
-    context: ApplicationContext, bm: writer_pb2.BrokerMessage, partition: int
-) -> None:
-    """
-    Try to send the broker message over nats. If it's too big, upload
-    it to blob storage and over nats only send a reference to it.
-    """
-    try:
-        await context.transaction.commit(
-            bm,
-            partition,
-            wait=False,
-            target_subject=Streams.INGEST_PROCESSED.subject,
-        )
-    except MaxTransactionSizeExceededError:
-        stored_key = await context.blob_storage.set_stream_message(
-            kbid=bm.kbid, rid=bm.uuid, data=bm.SerializeToString()
-        )
-        referenced_bm = writer_pb2.BrokerMessageBlobReference(
-            uuid=bm.uuid, kbid=bm.kbid, storage_key=stored_key
-        )
-        await context.transaction.commit(
-            writer=referenced_bm,
-            partition=partition,
-            target_subject=Streams.INGEST_PROCESSED.subject,
-            # This header is needed as it's the way we flag the transaction
-            # consumer to download from storage
-            headers={"X-MESSAGE-TYPE": "PROXY"},
-        )
 def get_writer_bm(bm: writer_pb2.BrokerMessage) -> writer_pb2.BrokerMessage:
     wbm = writer_pb2.BrokerMessage()
     wbm.CopyFrom(bm)

nucliadb/ingest/app.py CHANGED Viewed

@@ -32,7 +32,7 @@ from nucliadb.ingest.consumer import service as consumer_service
 from nucliadb.ingest.partitions import assign_partitions
 from nucliadb.ingest.processing import start_processing_engine, stop_processing_engine
 from nucliadb.ingest.service import start_grpc
-from nucliadb.ingest.settings import ProcessingPullMode, settings
+from nucliadb.ingest.settings import settings
 from nucliadb.ingest.utils import start_ingest as start_ingest_utility
 from nucliadb.ingest.utils import stop_ingest as stop_ingest_utility
 from nucliadb_telemetry import errors
@@ -101,12 +101,7 @@ async def initialize_grpc():  # pragma: no cover
 async def initialize_pull_workers() -> list[Callable[[], Awaitable[None]]]:
     finalizers = await initialize_grpc()
-    if settings.processing_pull_mode == ProcessingPullMode.V1:
-        pull_workers = await consumer_service.start_pull_workers(SERVICE_NAME)
-    elif settings.processing_pull_mode == ProcessingPullMode.V2:
-        pull_workers = [await consumer_service.start_ingest_processed_consumer_v2(SERVICE_NAME)]
-    else:
-        raise Exception("Processing pull workers not enabled and it is required")
+    pull_workers = [await consumer_service.start_ingest_processed_consumer_v2(SERVICE_NAME)]
     return pull_workers + finalizers
@@ -117,17 +112,9 @@ async def main_consumer():  # pragma: no cover
     grpc_health_finalizer = await health.start_grpc_health_service(settings.grpc_port)
-    # pull workers could be pulled out into it's own deployment
-    if settings.processing_pull_mode == ProcessingPullMode.V1:
-        pull_workers = await consumer_service.start_pull_workers(SERVICE_NAME)
-    else:
-        # In v2, pull workers run inside the ingest consumer
-        pull_workers = []
     ingest_consumers = await consumer_service.start_ingest_consumers(SERVICE_NAME)
-    await run_until_exit(
-        [grpc_health_finalizer, ingest_consumers, metrics_server.shutdown] + pull_workers + finalizers
-    )
+    await run_until_exit([grpc_health_finalizer, ingest_consumers, metrics_server.shutdown] + finalizers)
 async def main_orm_grpc():  # pragma: no cover
@@ -144,15 +131,7 @@ async def main_ingest_processed_consumer():  # pragma: no cover
     metrics_server = await serve_metrics()
     grpc_health_finalizer = await health.start_grpc_health_service(settings.grpc_port)
-    if settings.processing_pull_mode == ProcessingPullMode.V1:
-        consumer = await consumer_service.start_ingest_processed_consumer(SERVICE_NAME)
-    elif settings.processing_pull_mode == ProcessingPullMode.V2:
-        consumer = await consumer_service.start_ingest_processed_consumer_v2(SERVICE_NAME)
-    else:
-        # Off
-        async def fake_consumer(): ...
-        consumer = fake_consumer
+    consumer = await consumer_service.start_ingest_processed_consumer_v2(SERVICE_NAME)
     await run_until_exit(
         [grpc_health_finalizer, consumer, metrics_server.shutdown, stop_processing_engine] + finalizers

nucliadb/ingest/consumer/consumer.py CHANGED Viewed

@@ -270,47 +270,3 @@ class IngestConsumer:
                 await self.ack_message(msg, kbid)
                 logger.info("Message acked because of success", extra={"seqid": seqid})
                 await self.clean_broker_message(msg)
-class IngestProcessedConsumer(IngestConsumer):
-    """
-    Consumer designed to write processed resources to the database.
-    This is so that we can have a single consumer for both the regular writer and writes
-    coming from processor.
-    This is important because writes coming from processor can be very large and slow and
-    other writes are going to be coming from user actions and we don't want to slow them down.
-    """
-    async def setup_nats_subscription(self):
-        subject = const.Streams.INGEST_PROCESSED.subject
-        durable_name = const.Streams.INGEST_PROCESSED.group
-        self.subscription = await self.nats_connection_manager.pull_subscribe(
-            stream=const.Streams.INGEST_PROCESSED.name,
-            subject=subject,
-            durable=durable_name,
-            cb=self.subscription_worker,
-            subscription_lost_cb=self.setup_nats_subscription,
-            config=nats.js.api.ConsumerConfig(
-                durable_name=durable_name,
-                ack_policy=nats.js.api.AckPolicy.EXPLICIT,
-                deliver_policy=nats.js.api.DeliverPolicy.ALL,
-                # We set it to 20 because we don't care about order here and we want to be able to HPA based
-                # on the number of pending messages in the queue.
-                max_ack_pending=20,
-                max_deliver=nats_consumer_settings.nats_max_deliver,
-                ack_wait=nats_consumer_settings.nats_ack_wait,
-            ),
-        )
-        logger.info(
-            f"Subscribed pull consumer to {subject} on stream {const.Streams.INGEST_PROCESSED.name}"
-        )
-    @backoff.on_exception(backoff.expo, (ConflictError,), jitter=backoff.random_jitter, max_tries=4)
-    async def _process(self, pb: BrokerMessage, seqid: int):
-        """
-        We are setting `transaction_check` to False here because we can not mix
-        transaction ids from regular ingest writes and writes coming from processor.
-        """
-        await self.processor.process(pb, seqid, self.partition, transaction_check=False)

nucliadb/ingest/consumer/pull.py CHANGED Viewed

@@ -21,7 +21,6 @@ import asyncio
 import base64
 import time
 from contextlib import contextmanager
-from datetime import datetime, timezone
 from typing import Optional
 from aiohttp.client_exceptions import ClientConnectorError
@@ -32,9 +31,6 @@ from opentelemetry.trace import (
     Link,
 )
-from nucliadb.common import datamanagers
-from nucliadb.common.back_pressure.materializer import BackPressureMaterializer
-from nucliadb.common.back_pressure.utils import BackPressureException
 from nucliadb.common.http_clients.processing import (
     ProcessingHTTPClient,
     ProcessingPullMessageProgressUpdater,
@@ -45,214 +41,19 @@ from nucliadb.ingest import SERVICE_NAME, logger, logger_activity
 from nucliadb.ingest.consumer.consumer import consumer_observer
 from nucliadb.ingest.orm.exceptions import ReallyStopPulling
 from nucliadb.ingest.orm.processor import Processor
-from nucliadb_protos.writer_pb2 import BrokerMessage, BrokerMessageBlobReference
+from nucliadb_protos.writer_pb2 import BrokerMessage
 from nucliadb_telemetry import errors
 from nucliadb_telemetry.metrics import Gauge
 from nucliadb_telemetry.utils import get_telemetry
-from nucliadb_utils import const
 from nucliadb_utils.cache.pubsub import PubSubDriver
 from nucliadb_utils.settings import nuclia_settings
 from nucliadb_utils.storages.storage import Storage
 from nucliadb_utils.transaction import MaxTransactionSizeExceededError
-from nucliadb_utils.utilities import get_storage, get_transaction_utility, pull_subscriber_utilization
+from nucliadb_utils.utilities import pull_subscriber_utilization
 processing_pending_messages = Gauge("nucliadb_processing_pending_messages")
-class PullWorker:
-    """
-    The pull worker is responsible for pulling messages from the pull processing
-    http endpoint and injecting them into the processing write queue.
-    The processing pull endpoint is also described as the "processing proxy" at times.
-    """
-    def __init__(
-        self,
-        driver: Driver,
-        partition: str,
-        storage: Storage,
-        pull_time_error_backoff: int,
-        pubsub: Optional[PubSubDriver] = None,
-        local_subscriber: bool = False,
-        pull_time_empty_backoff: float = 5.0,
-        pull_api_timeout: int = 60,
-        back_pressure: Optional[BackPressureMaterializer] = None,
-    ):
-        self.partition = partition
-        self.pull_time_error_backoff = pull_time_error_backoff
-        self.pull_time_empty_backoff = pull_time_empty_backoff
-        self.pull_api_timeout = pull_api_timeout
-        self.local_subscriber = local_subscriber
-        self.processor = Processor(driver, storage, pubsub, partition)
-        self.back_pressure = back_pressure
-    def __str__(self) -> str:
-        return f"PullWorker(partition={self.partition})"
-    def __repr__(self) -> str:
-        return str(self)
-    async def handle_message(self, payload: str) -> None:
-        pb = BrokerMessage()
-        data = base64.b64decode(payload)
-        pb.ParseFromString(data)
-        logger.debug(f"Resource: {pb.uuid} KB: {pb.kbid} ProcessingID: {pb.processing_id}")
-        if not self.local_subscriber:
-            transaction_utility = get_transaction_utility()
-            if transaction_utility is None:
-                raise Exception("No transaction utility defined")
-            try:
-                await transaction_utility.commit(
-                    writer=pb,
-                    partition=int(self.partition),
-                    # send to separate processor
-                    target_subject=const.Streams.INGEST_PROCESSED.subject,
-                )
-            except MaxTransactionSizeExceededError:
-                storage = await get_storage()
-                stored_key = await storage.set_stream_message(kbid=pb.kbid, rid=pb.uuid, data=data)
-                referenced_pb = BrokerMessageBlobReference(
-                    uuid=pb.uuid, kbid=pb.kbid, storage_key=stored_key
-                )
-                await transaction_utility.commit(
-                    writer=referenced_pb,
-                    partition=int(self.partition),
-                    # send to separate processor
-                    target_subject=const.Streams.INGEST_PROCESSED.subject,
-                    headers={"X-MESSAGE-TYPE": "PROXY"},
-                )
-        else:
-            # No nats defined == monolitic nucliadb
-            await self.processor.process(
-                pb,
-                0,  # Fake sequence id as in local mode there's no transactions
-                partition=self.partition,
-                transaction_check=False,
-            )
-    async def back_pressure_check(self) -> None:
-        if self.back_pressure is None:
-            return
-        while True:
-            try:
-                self.back_pressure.check_indexing()
-                self.back_pressure.check_ingest()
-                break
-            except BackPressureException as exc:
-                sleep_time = (datetime.now(timezone.utc) - exc.data.try_after).total_seconds()
-                logger.warning(f"Back pressure active! Sleeping for {sleep_time} seconds", exc_info=True)
-                await asyncio.sleep(sleep_time)
-            except Exception as e:
-                errors.capture_exception(e)
-                logger.exception("Error while checking back pressure. Moving on")
-                break
-    async def loop(self):
-        """
-        Run this forever
-        """
-        while True:
-            await self.back_pressure_check()
-            try:
-                await self._loop()
-            except ReallyStopPulling:
-                logger.info("Exiting...")
-                break
-            except Exception as e:
-                errors.capture_exception(e)
-                logger.exception("Exception on worker", exc_info=e)
-                await asyncio.sleep(10)
-    async def _loop(self):
-        headers = {}
-        data = None
-        if nuclia_settings.nuclia_service_account is not None:
-            headers["X-STF-NUAKEY"] = f"Bearer {nuclia_settings.nuclia_service_account}"
-            # parse jwt sub to get pull type id
-            try:
-                pull_type_id = get_nua_api_id()
-            except Exception as exc:
-                logger.exception("Could not read NUA API Key. Can not start pull worker")
-                raise ReallyStopPulling() from exc
-        else:
-            pull_type_id = "main"
-        async with ProcessingHTTPClient() as processing_http_client:
-            logger.info(f"Collecting from NucliaDB Cloud {self.partition} partition")
-            while True:
-                try:
-                    async with datamanagers.with_ro_transaction() as txn:
-                        cursor = await datamanagers.processing.get_pull_offset(
-                            txn, pull_type_id=pull_type_id, partition=self.partition
-                        )
-                    data = await processing_http_client.pull(
-                        self.partition,
-                        cursor=cursor,
-                        timeout=self.pull_api_timeout,
-                    )
-                    if data.status == "ok":
-                        logger.info(
-                            "Message received from proxy",
-                            extra={"partition": self.partition, "cursor": data.cursor},
-                        )
-                        try:
-                            if data.payload is not None:
-                                await self.handle_message(data.payload)
-                            for payload in data.payloads:
-                                # If using cursors and multiple messages are returned, it will be in the
-                                # `payloads` property
-                                await self.handle_message(payload)
-                        except Exception as e:
-                            errors.capture_exception(e)
-                            logger.exception("Error while pulling and processing message/s")
-                            raise e
-                        async with datamanagers.with_transaction() as txn:
-                            await datamanagers.processing.set_pull_offset(
-                                txn,
-                                pull_type_id=pull_type_id,
-                                partition=self.partition,
-                                offset=data.cursor,
-                            )
-                            await txn.commit()
-                    elif data.status == "empty":
-                        logger_activity.debug(f"No messages waiting in partition #{self.partition}")
-                        await asyncio.sleep(self.pull_time_empty_backoff)
-                    else:
-                        logger.info(f"Proxy pull answered with error: {data}")
-                        await asyncio.sleep(self.pull_time_error_backoff)
-                except (
-                    asyncio.exceptions.CancelledError,
-                    RuntimeError,
-                    KeyboardInterrupt,
-                    SystemExit,
-                ):
-                    logger.info(f"Pull task for partition #{self.partition} was canceled, exiting")
-                    raise ReallyStopPulling()
-                except ClientConnectorError:
-                    logger.error(
-                        f"Could not connect to processing engine, \
-                         {processing_http_client.base_url} verify your internet connection"
-                    )
-                    await asyncio.sleep(self.pull_time_error_backoff)
-                except MaxTransactionSizeExceededError as e:
-                    if data is not None:
-                        payload_length = 0
-                        if data.payload:
-                            payload_length = len(base64.b64decode(data.payload))
-                        logger.error(f"Message too big for transaction: {payload_length}")
-                    raise e
-                except Exception:
-                    logger.exception("Unhandled error pulling messages from processing")
-                    await asyncio.sleep(self.pull_time_error_backoff)
 @contextmanager
 def run_in_span(headers: dict[str, str]):
     # Create a span for handling this message

nucliadb/ingest/consumer/service.py CHANGED Viewed

@@ -24,11 +24,10 @@ from typing import Awaitable, Callable, Optional
 from nucliadb.common.back_pressure.materializer import BackPressureMaterializer
 from nucliadb.common.back_pressure.settings import settings as back_pressure_settings
-from nucliadb.common.back_pressure.utils import is_back_pressure_enabled
 from nucliadb.common.maindb.utils import setup_driver
 from nucliadb.ingest import SERVICE_NAME, logger
-from nucliadb.ingest.consumer.consumer import IngestConsumer, IngestProcessedConsumer
-from nucliadb.ingest.consumer.pull import PullV2Worker, PullWorker
+from nucliadb.ingest.consumer.consumer import IngestConsumer
+from nucliadb.ingest.consumer.pull import PullV2Worker
 from nucliadb.ingest.settings import settings
 from nucliadb_utils.exceptions import ConfigurationError
 from nucliadb_utils.settings import indexing_settings, transaction_settings
@@ -79,38 +78,6 @@ async def stop_back_pressure(materializer: BackPressureMaterializer) -> None:
     await materializer.nats_manager.finalize()
-async def start_pull_workers(
-    service_name: Optional[str] = None,
-) -> list[Callable[[], Awaitable[None]]]:
-    finalizers: list[Callable[[], Awaitable[None]]] = []
-    driver = await setup_driver()
-    pubsub = await get_pubsub()
-    storage = await get_storage(service_name=service_name or SERVICE_NAME)
-    back_pressure = None
-    if is_back_pressure_enabled():
-        back_pressure = await start_back_pressure()
-        finalizers.append(partial(stop_back_pressure, back_pressure))
-    tasks = []
-    for partition in settings.partitions:
-        worker = PullWorker(
-            driver=driver,
-            partition=partition,
-            storage=storage,
-            pull_time_error_backoff=settings.pull_time_error_backoff,
-            pubsub=pubsub,
-            local_subscriber=transaction_settings.transaction_local,
-            pull_api_timeout=settings.pull_api_timeout,
-            back_pressure=back_pressure,
-        )
-        task = asyncio.create_task(worker.loop())
-        task.add_done_callback(_handle_task_result)
-        tasks.append(task)
-    if len(tasks):
-        finalizers.append(partial(_exit_tasks, tasks))
-    return finalizers
 async def start_ingest_consumers(
     service_name: Optional[str] = None,
 ) -> Callable[[], Awaitable[None]]:
@@ -147,36 +114,6 @@ async def start_ingest_consumers(
     return _finalize
-async def start_ingest_processed_consumer(
-    service_name: Optional[str] = None,
-) -> Callable[[], Awaitable[None]]:
-    """
-    This is not meant to be deployed with a stateful set like the other consumers.
-    We are not maintaining transactionability based on the nats sequence id from this
-    consumer and we will start off by not separating writes by partition AND
-    allowing NATS to manage the queue group for us.
-    """
-    if transaction_settings.transaction_local:
-        raise ConfigurationError("Can not start ingest consumers in local mode")
-    driver = await setup_driver()
-    pubsub = await get_pubsub()
-    storage = await get_storage(service_name=service_name or SERVICE_NAME)
-    nats_connection_manager = get_nats_manager()
-    consumer = IngestProcessedConsumer(
-        driver=driver,
-        partition="-1",
-        storage=storage,
-        pubsub=pubsub,
-        nats_connection_manager=nats_connection_manager,
-    )
-    await consumer.initialize()
-    return nats_connection_manager.finalize
 async def start_ingest_processed_consumer_v2(
     service_name: Optional[str] = None,
 ) -> Callable[[], Awaitable[None]]:

nucliadb/ingest/settings.py CHANGED Viewed

@@ -92,7 +92,5 @@ class Settings(DriverSettings):
     max_concurrent_ingest_processing: int = 5
-    processing_pull_mode: ProcessingPullMode = ProcessingPullMode.V1
 settings = Settings()

nucliadb/standalone/api_router.py CHANGED Viewed

@@ -21,15 +21,12 @@ import logging
 import time
 import orjson
-import pydantic
 from fastapi import Request
 from fastapi.responses import JSONResponse
 from fastapi.routing import APIRouter
 from fastapi_versioning import version
 from jwcrypto import jwe, jwk  # type: ignore
-from nucliadb.common import datamanagers
-from nucliadb.common.http_clients import processing
 from nucliadb.common.http_clients.auth import NucliaAuthHTTPClient
 from nucliadb.standalone import versions
 from nucliadb_models.resource import NucliaDBRoles
@@ -123,35 +120,3 @@ async def versions_endpoint(request: Request) -> JSONResponse:
             for package in versions.WatchedPackages
         }
     )
-@standalone_api_router.get("/pull/position")
-async def pull_status(request: Request) -> JSONResponse:
-    async with datamanagers.with_ro_transaction() as txn:
-        # standalone assumes 1 partition
-        current_offset = await datamanagers.processing.get_pull_offset(
-            txn, pull_type_id=processing.get_nua_api_id(), partition="1"
-        )
-    async with processing.ProcessingHTTPClient() as client:
-        end_offset = await client.pull_position(partition="1")
-    return JSONResponse({"current_offset": current_offset, "end_offset": end_offset})
-class UpdatePullPosition(pydantic.BaseModel):
-    cursor: int
-@standalone_api_router.patch("/pull/position")
-async def update_pull_position(request: Request, item: UpdatePullPosition) -> JSONResponse:
-    async with datamanagers.with_transaction() as txn:
-        # standalone assumes 1 partition
-        await datamanagers.processing.set_pull_offset(
-            txn,
-            pull_type_id=processing.get_nua_api_id(),
-            partition="1",
-            offset=item.cursor,
-        )
-        await txn.commit()
-    return JSONResponse({})

{nucliadb-6.4.1.post4337.dist-info → nucliadb-6.4.1.post4342.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nucliadb
-Version: 6.4.1.post4337
+Version: 6.4.1.post4342
 Summary: NucliaDB
 Author-email: Nuclia <nucliadb@nuclia.com>
 License-Expression: AGPL-3.0-or-later
@@ -19,11 +19,11 @@ Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3 :: Only
 Requires-Python: <4,>=3.9
 Description-Content-Type: text/markdown
-Requires-Dist: nucliadb-telemetry[all]>=6.4.1.post4337
-Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.4.1.post4337
-Requires-Dist: nucliadb-protos>=6.4.1.post4337
-Requires-Dist: nucliadb-models>=6.4.1.post4337
-Requires-Dist: nidx-protos>=6.4.1.post4337
+Requires-Dist: nucliadb-telemetry[all]>=6.4.1.post4342
+Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.4.1.post4342
+Requires-Dist: nucliadb-protos>=6.4.1.post4342
+Requires-Dist: nucliadb-models>=6.4.1.post4342
+Requires-Dist: nidx-protos>=6.4.1.post4342
 Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
 Requires-Dist: nuclia-models>=0.24.2
 Requires-Dist: uvicorn[standard]

{nucliadb-6.4.1.post4337.dist-info → nucliadb-6.4.1.post4342.dist-info}/RECORD RENAMED Viewed

@@ -62,7 +62,7 @@ nucliadb/common/nidx.py,sha256=3EeQGjM_gxK0l_Rb54fspFWVNnzUiKF-_GMxTiiDC8Q,9116
 nucliadb/common/vector_index_config.py,sha256=LqGwhrDCp1q1vBow3scd1Chhr4GLYjYnGL72FKvOYYc,1552
 nucliadb/common/back_pressure/__init__.py,sha256=paAcAZcfGRTyURF9lnn3vX0vcwakTEVswG_xcdGBH-U,928
 nucliadb/common/back_pressure/cache.py,sha256=ANvXglWzI5naAD6N4E_fNi17qS6KNyAhjLeh6WlZZ84,2931
-nucliadb/common/back_pressure/materializer.py,sha256=YzYfN7xI5nlmSowbdLktWIkrJJb3Q2vEmoyz9O3eb2s,11667
+nucliadb/common/back_pressure/materializer.py,sha256=bXUalaaTMdrltm23ezkoymcRPJl7Ha8RVTj7xdVfHgQ,11468
 nucliadb/common/back_pressure/settings.py,sha256=3qNOzbI0KC6LMy-wMilXRSBfZu6CCpGHod26MTgAZ2o,3082
 nucliadb/common/back_pressure/utils.py,sha256=aZeP1XSkdgaRgZC76yR9Kje3511ZUCp7KB-XzcvhMYY,2018
 nucliadb/common/cluster/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
@@ -77,7 +77,7 @@ nucliadb/common/cluster/standalone/__init__.py,sha256=itSI7dtTwFP55YMX4iK7JzdMHS
 nucliadb/common/cluster/standalone/utils.py,sha256=af3r-x_GF7A6dwIAhZLR-r-SZQEVxsFrDKeMfUTA6G0,1908
 nucliadb/common/context/__init__.py,sha256=IKAHuiCjbOEsqfLozWwJ6mRFzFncsZMyxNC5E_XZ5EM,6016
 nucliadb/common/context/fastapi.py,sha256=mH_8n5t7quNSPivNM2JS5EQf2sTVJsdzXW6LaY7EHAA,1629
-nucliadb/common/datamanagers/__init__.py,sha256=jksw4pXyXb05SG3EN-BPBrhc1u1Ge_m21PYqD7NYQEs,2118
+nucliadb/common/datamanagers/__init__.py,sha256=xKc6ZMqKUs20R90jJT4xkQ8TFMNwQnhhuWnBBqVnKdM,2084
 nucliadb/common/datamanagers/atomic.py,sha256=WihdtBWQIAuElZQjh1xQ--q5dJowwlkovqsW-OB_t2k,3230
 nucliadb/common/datamanagers/cluster.py,sha256=iU0b7AESm1Yi8Wp3pIKgqixZGNMjeBrxSpvEKsaZKgY,1831
 nucliadb/common/datamanagers/entities.py,sha256=gI-0mbMlqrr9FiyhexEh6czhgYcMxE2s9m4o866EK9o,5340
@@ -85,7 +85,6 @@ nucliadb/common/datamanagers/exceptions.py,sha256=Atz_PP_GGq4jgJaWcAkcRbHBoBaGcC
 nucliadb/common/datamanagers/fields.py,sha256=9KqBzTssAT68FR5hd17Xu_CSwAYdKFuYic1ITnrfFNc,3971
 nucliadb/common/datamanagers/kb.py,sha256=P7EhF4tApIUG2jw_HH1oMufTKG9__kuOLKnrCNGbDM4,6156
 nucliadb/common/datamanagers/labels.py,sha256=Zm0GQpSPoGXEEysUY7VsDIcyKSIIQsMVphj23IyM9_c,4502
-nucliadb/common/datamanagers/processing.py,sha256=ByxdZzdbAfJGqC6__mY-zryjk040TyQfcUq3rxujeoY,1587
 nucliadb/common/datamanagers/resources.py,sha256=VwFdCyHSnzMU3ASYRhC-wuCjCQEjOKEF7tIob4lTcPg,10793
 nucliadb/common/datamanagers/rollover.py,sha256=GKdGv5goJVi3B3ZjawnMuQkgYeZjpCqxRYFz0VIswrE,7813
 nucliadb/common/datamanagers/search_configurations.py,sha256=O-8eW43CE46GcxO6TB5hpi27NBguv4BL4SI1vLlN8os,2463
@@ -101,7 +100,7 @@ nucliadb/common/external_index_providers/settings.py,sha256=EGHnIkwxqe6aypwKegXT
 nucliadb/common/http_clients/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
 nucliadb/common/http_clients/auth.py,sha256=srfpgAbs2wmqA9u_l-HxsV4YoO77Tse4y3gm3q2YvYM,2112
 nucliadb/common/http_clients/exceptions.py,sha256=47Y8OjkaGV_F18G07FpJhOzgWKUIexhlILyuVtICz8s,1100
-nucliadb/common/http_clients/processing.py,sha256=crLfKo_2RJr9Uo2vuq11MWFa9tV2njA_v7ZBd95tjNU,9589
+nucliadb/common/http_clients/processing.py,sha256=mKd9vRK-Wb71UG2LCoGu47wmnN5krqA0D1Z8vitsBPE,8976
 nucliadb/common/http_clients/pypi.py,sha256=VHIUjwJEJVntVUo_FRoXIo8sLmluy7sa9-iXSITcrMY,1540
 nucliadb/common/http_clients/utils.py,sha256=yGUkHNS41abHiBoHqo_Mg3QSqGsS7rUtbfGftbEC57U,1529
 nucliadb/common/maindb/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
@@ -120,22 +119,22 @@ nucliadb/export_import/exporter.py,sha256=k2QVx1EjqFlDYiggriWiEJzwtMXzHbldsqWdpG
 nucliadb/export_import/importer.py,sha256=GNDMt4hdjbcLWdydVq8XFQKefzNJkQ1eTzhshUX64rk,4231
 nucliadb/export_import/models.py,sha256=dbjScNkiMRv4X3Ktudy1JRliD25bfoDTy3JmEZgQSCc,2121
 nucliadb/export_import/tasks.py,sha256=DWbdqY97ffoyfipelGXz3Jqz1iam6JCjQSh367Fc3NA,2947
-nucliadb/export_import/utils.py,sha256=iutS86YblS8aLQ9PCZUyTJMN6lDV4DjcjaptQVBfBNA,22874
+nucliadb/export_import/utils.py,sha256=XV3tJJdhgnVJRSj8AxZjgeipONtB107M185HVJmHp2Q,21626
 nucliadb/ingest/__init__.py,sha256=fsw3C38VP50km3R-nHL775LNGPpJ4JxqXJ2Ib1f5SqE,1011
-nucliadb/ingest/app.py,sha256=Eympy8nbz09VDNPF28MuIeKMb7wgB9cTSOObS8uvL0o,8372
+nucliadb/ingest/app.py,sha256=Heyd5TubnM6HOo4eQdjg-laedALu1vq96B0XJ5T5QUc,7400
 nucliadb/ingest/partitions.py,sha256=2NIhMYbNT0TNBL6bX1UMSi7vxFGICstCKEqsB0TXHOE,2410
 nucliadb/ingest/processing.py,sha256=QmkHq-BU4vub7JRWe9VHvQ2DcAmT6-CzgFXuZxXhcBU,20953
 nucliadb/ingest/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nucliadb/ingest/serialize.py,sha256=-TIjibJTbMqAowzRvyrG3R209vKqBZqXpdrQL9Dq4lo,16135
-nucliadb/ingest/settings.py,sha256=inB5SpkSI6sRd-ftlJIHFH6XlbuiSaRdL-F2WGyseUw,3249
+nucliadb/ingest/settings.py,sha256=5qJICxwYb028a2iAhVbxOJB5X-hWtDLtiya-YhWostw,3179
 nucliadb/ingest/utils.py,sha256=l1myURu3r8oA11dx3GpHw-gNTUc1AFX8xdPm9Lgl2rA,2275
 nucliadb/ingest/consumer/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
 nucliadb/ingest/consumer/auditing.py,sha256=xK21DIa_ZAiOJVVbnkmT4jgCRGshNGyPyxsqhE6kROE,7204
-nucliadb/ingest/consumer/consumer.py,sha256=GfdlrNlnt7PWYyk75xtyzn2SHZse7475U4U9q_9jKr0,13711
+nucliadb/ingest/consumer/consumer.py,sha256=1OetpJXp6glaAe4kKqUA_L46BS-ZyEccTkwt7TGf0Zw,11658
 nucliadb/ingest/consumer/materializer.py,sha256=tgD_rDI2twQzcz8kKNiW_L4YIth16IGh9mUfD5wiSD4,3858
 nucliadb/ingest/consumer/metrics.py,sha256=ji1l_4cKiHJthQd8YNem1ft4iMbw9KThmVvJmLcv3Xg,1075
-nucliadb/ingest/consumer/pull.py,sha256=x39G6AcNXSnw_GRPxJfafmD5pehZzMBd6v_f_yrNbUI,17594
-nucliadb/ingest/consumer/service.py,sha256=WXBN8dY7MlmYWxqQHIbIO7w_SdVJRY1RuHAWlQUXf8o,8852
+nucliadb/ingest/consumer/pull.py,sha256=vAOu2Zum-1e4RipoHvzzIha5PoNV28_C0nciQ2UFphc,8831
+nucliadb/ingest/consumer/service.py,sha256=8AD41mMN7EUeUtk4ZNy14zfvxzwmVjIX6Mwe05-bomA,6543
 nucliadb/ingest/consumer/shard_creator.py,sha256=w0smEu01FU_2cjZnsfBRNqT_Ntho11X17zTMST-vKbc,4359
 nucliadb/ingest/consumer/utils.py,sha256=jpX8D4lKzuPCpArQLZeX_Zczq3pfen_zAf8sPJfOEZU,2642
 nucliadb/ingest/fields/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
@@ -275,7 +274,7 @@ nucliadb/search/search/query_parser/parsers/graph.py,sha256=lDRJO_JvOe7yytNgXZyM
 nucliadb/search/search/query_parser/parsers/search.py,sha256=yEebeMOXJza7HMK3TdIPO6UGQbe79maSDg-GgohQIMk,10517
 nucliadb/search/search/query_parser/parsers/unit_retrieval.py,sha256=rW3YHDWLkI2Hhznl_1oOMhC01bwZMAjv-Wu3iHPIaiU,11475
 nucliadb/standalone/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
-nucliadb/standalone/api_router.py,sha256=hgq9FXpihzgjHkwcVGfGCSwyXy67fqXTfLFHuINzIi0,5567
+nucliadb/standalone/api_router.py,sha256=zRSMlaRVHUDGTYA3zC03UV_aLLn-ch-kaeWn1tEjTXw,4338
 nucliadb/standalone/app.py,sha256=mAApNK_iVsQgJyd-mtwCeZq5csSimwnXmlQGH9a70pE,5586
 nucliadb/standalone/auth.py,sha256=UwMv-TywhMZabvVg3anQLeCRdoHDnWf2o3luvnoNBjs,7670
 nucliadb/standalone/config.py,sha256=hJ3p4dBRSsj5FOmIgAiEX9ZsAGUYd1W-_UJIol5LCCg,4967
@@ -368,8 +367,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
 nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
 nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
 nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
-nucliadb-6.4.1.post4337.dist-info/METADATA,sha256=TsjrmGAiWsREU2sPMDsUTQbvxvMIf5Y90dMtVAlgTpA,4152
-nucliadb-6.4.1.post4337.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
-nucliadb-6.4.1.post4337.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
-nucliadb-6.4.1.post4337.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
-nucliadb-6.4.1.post4337.dist-info/RECORD,,
+nucliadb-6.4.1.post4342.dist-info/METADATA,sha256=kWRXbTuecuTl1JD_PVyLTq-dEd6yt7z5ps8U2o5hioM,4152
+nucliadb-6.4.1.post4342.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
+nucliadb-6.4.1.post4342.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
+nucliadb-6.4.1.post4342.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
+nucliadb-6.4.1.post4342.dist-info/RECORD,,

nucliadb/common/datamanagers/processing.py DELETED Viewed

@@ -1,41 +0,0 @@
-# Copyright (C) 2021 Bosutech XXI S.L.
-#
-# nucliadb is offered under the AGPL v3.0 and as commercial software.
-# For commercial licensing, contact us at info@nuclia.com.
-#
-# AGPL:
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-#
-import logging
-from typing import Optional
-from nucliadb.common.maindb.driver import Transaction
-logger = logging.getLogger(__name__)
-PULL_PARTITION_OFFSET = "/processing/pull-offset/{pull_type_id}/{partition}"
-async def get_pull_offset(txn: Transaction, *, pull_type_id: str, partition: str) -> Optional[int]:
-    key = PULL_PARTITION_OFFSET.format(pull_type_id=pull_type_id, partition=partition)
-    val: Optional[bytes] = await txn.get(key)
-    if val is not None:
-        return int(val)
-    return None
-async def set_pull_offset(txn: Transaction, *, pull_type_id: str, partition: str, offset: int) -> None:
-    key = PULL_PARTITION_OFFSET.format(pull_type_id=pull_type_id, partition=partition)
-    await txn.set(key, str(offset).encode())

{nucliadb-6.4.1.post4337.dist-info → nucliadb-6.4.1.post4342.dist-info}/WHEEL RENAMED Viewed

File without changes

{nucliadb-6.4.1.post4337.dist-info → nucliadb-6.4.1.post4342.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{nucliadb-6.4.1.post4337.dist-info → nucliadb-6.4.1.post4342.dist-info}/top_level.txt RENAMED Viewed

File without changes

nucliadb 6.4.1.post4337__py3-none-any.whl → 6.4.1.post4342__py3-none-any.whl

nucliadb 6.4.1.post4337py3-none-any.whl → 6.4.1.post4342py3-none-any.whl