nucliadb 6.4.1.post4337__py3-none-any.whl → 6.4.1.post4344__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucliadb/common/back_pressure/materializer.py +2 -6
- nucliadb/common/datamanagers/__init__.py +0 -2
- nucliadb/common/http_clients/processing.py +12 -26
- nucliadb/export_import/utils.py +0 -33
- nucliadb/ingest/app.py +4 -25
- nucliadb/ingest/consumer/consumer.py +0 -44
- nucliadb/ingest/consumer/pull.py +2 -201
- nucliadb/ingest/consumer/service.py +2 -65
- nucliadb/ingest/settings.py +0 -2
- nucliadb/standalone/api_router.py +0 -35
- {nucliadb-6.4.1.post4337.dist-info → nucliadb-6.4.1.post4344.dist-info}/METADATA +6 -6
- {nucliadb-6.4.1.post4337.dist-info → nucliadb-6.4.1.post4344.dist-info}/RECORD +15 -16
- nucliadb/common/datamanagers/processing.py +0 -41
- {nucliadb-6.4.1.post4337.dist-info → nucliadb-6.4.1.post4344.dist-info}/WHEEL +0 -0
- {nucliadb-6.4.1.post4337.dist-info → nucliadb-6.4.1.post4344.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.4.1.post4337.dist-info → nucliadb-6.4.1.post4344.dist-info}/top_level.txt +0 -0
@@ -37,7 +37,6 @@ from nucliadb.common.back_pressure.utils import (
|
|
37
37
|
from nucliadb.common.context import ApplicationContext
|
38
38
|
from nucliadb.common.http_clients.processing import ProcessingHTTPClient
|
39
39
|
from nucliadb_telemetry import metrics
|
40
|
-
from nucliadb_utils import const
|
41
40
|
from nucliadb_utils.nats import NatsConnectionManager
|
42
41
|
from nucliadb_utils.settings import is_onprem_nucliadb
|
43
42
|
|
@@ -162,11 +161,8 @@ class BackPressureMaterializer:
|
|
162
161
|
while True:
|
163
162
|
try:
|
164
163
|
with back_pressure_observer({"type": "get_ingest_pending"}):
|
165
|
-
|
166
|
-
|
167
|
-
stream=const.Streams.INGEST_PROCESSED.name,
|
168
|
-
consumer=const.Streams.INGEST_PROCESSED.group,
|
169
|
-
)
|
164
|
+
status = await self.processing_http_client.pull_status()
|
165
|
+
self.ingest_pending = status.pending
|
170
166
|
except Exception: # pragma: no cover
|
171
167
|
logger.exception(
|
172
168
|
"Error getting pending messages to ingest",
|
@@ -36,7 +36,6 @@ from . import (
|
|
36
36
|
fields,
|
37
37
|
kb,
|
38
38
|
labels,
|
39
|
-
processing,
|
40
39
|
resources,
|
41
40
|
rollover,
|
42
41
|
search_configurations,
|
@@ -53,7 +52,6 @@ __all__ = (
|
|
53
52
|
"fields",
|
54
53
|
"kb",
|
55
54
|
"labels",
|
56
|
-
"processing",
|
57
55
|
"resources",
|
58
56
|
"rollover",
|
59
57
|
"search_configurations",
|
@@ -184,6 +184,10 @@ class PullResponseV2(pydantic.BaseModel):
|
|
184
184
|
pending: int
|
185
185
|
|
186
186
|
|
187
|
+
class PullStatusResponse(pydantic.BaseModel):
|
188
|
+
pending: int
|
189
|
+
|
190
|
+
|
187
191
|
JSON_HEADERS = {"Content-Type": "application/json"}
|
188
192
|
|
189
193
|
|
@@ -205,32 +209,6 @@ class ProcessingHTTPClient:
|
|
205
209
|
async def close(self):
|
206
210
|
await self.session.close()
|
207
211
|
|
208
|
-
async def pull(
|
209
|
-
self,
|
210
|
-
partition: str,
|
211
|
-
cursor: Optional[int] = None,
|
212
|
-
limit: int = 3,
|
213
|
-
timeout: int = 1,
|
214
|
-
) -> PullResponse:
|
215
|
-
url = self.base_url + "/pull"
|
216
|
-
params = {"partition": partition, "limit": limit, "timeout": timeout}
|
217
|
-
if cursor is not None:
|
218
|
-
params["from_cursor"] = cursor
|
219
|
-
|
220
|
-
async with self.session.get(url, headers=self.headers, params=params) as resp:
|
221
|
-
resp_text = await resp.text()
|
222
|
-
check_status(resp, resp_text)
|
223
|
-
return PullResponse.model_validate_json(resp_text)
|
224
|
-
|
225
|
-
async def pull_position(self, partition: str) -> int:
|
226
|
-
url = self.base_url + "/pull/position"
|
227
|
-
params = {"partition": partition}
|
228
|
-
async with self.session.get(url, headers=self.headers, params=params) as resp:
|
229
|
-
resp_text = await resp.text()
|
230
|
-
check_status(resp, resp_text)
|
231
|
-
data = PullPosition.model_validate_json(resp_text)
|
232
|
-
return data.cursor
|
233
|
-
|
234
212
|
async def in_progress(self, ack_token: str):
|
235
213
|
url = self.base_url_v2 + "/pull/in_progress"
|
236
214
|
request = InProgressRequest(ack=[ack_token])
|
@@ -256,6 +234,14 @@ class ProcessingHTTPClient:
|
|
256
234
|
else:
|
257
235
|
return PullResponseV2.model_validate_json(resp_text)
|
258
236
|
|
237
|
+
async def pull_status(self) -> PullStatusResponse:
|
238
|
+
url = self.base_url_v2 + "/pull/status"
|
239
|
+
async with self.session.get(url, headers=self.headers) as resp:
|
240
|
+
resp_text = await resp.text()
|
241
|
+
check_status(resp, resp_text)
|
242
|
+
|
243
|
+
return PullStatusResponse.model_validate_json(resp_text)
|
244
|
+
|
259
245
|
async def requests(
|
260
246
|
self,
|
261
247
|
cursor: Optional[str] = None,
|
nucliadb/export_import/utils.py
CHANGED
@@ -40,8 +40,6 @@ from nucliadb_models.export_import import Status
|
|
40
40
|
from nucliadb_protos import knowledgebox_pb2 as kb_pb2
|
41
41
|
from nucliadb_protos import resources_pb2, writer_pb2
|
42
42
|
from nucliadb_protos.writer_pb2_grpc import WriterStub
|
43
|
-
from nucliadb_utils.const import Streams
|
44
|
-
from nucliadb_utils.transaction import MaxTransactionSizeExceededError
|
45
43
|
from nucliadb_utils.utilities import get_ingest
|
46
44
|
|
47
45
|
BinaryStream = AsyncIterator[bytes]
|
@@ -130,37 +128,6 @@ async def process_bm_grpc(context: ApplicationContext, bm: writer_pb2.BrokerMess
|
|
130
128
|
assert response.status == writer_pb2.OpStatusWriter.Status.OK, "Failed to process broker message"
|
131
129
|
|
132
130
|
|
133
|
-
async def transaction_commit(
|
134
|
-
context: ApplicationContext, bm: writer_pb2.BrokerMessage, partition: int
|
135
|
-
) -> None:
|
136
|
-
"""
|
137
|
-
Try to send the broker message over nats. If it's too big, upload
|
138
|
-
it to blob storage and over nats only send a reference to it.
|
139
|
-
"""
|
140
|
-
try:
|
141
|
-
await context.transaction.commit(
|
142
|
-
bm,
|
143
|
-
partition,
|
144
|
-
wait=False,
|
145
|
-
target_subject=Streams.INGEST_PROCESSED.subject,
|
146
|
-
)
|
147
|
-
except MaxTransactionSizeExceededError:
|
148
|
-
stored_key = await context.blob_storage.set_stream_message(
|
149
|
-
kbid=bm.kbid, rid=bm.uuid, data=bm.SerializeToString()
|
150
|
-
)
|
151
|
-
referenced_bm = writer_pb2.BrokerMessageBlobReference(
|
152
|
-
uuid=bm.uuid, kbid=bm.kbid, storage_key=stored_key
|
153
|
-
)
|
154
|
-
await context.transaction.commit(
|
155
|
-
writer=referenced_bm,
|
156
|
-
partition=partition,
|
157
|
-
target_subject=Streams.INGEST_PROCESSED.subject,
|
158
|
-
# This header is needed as it's the way we flag the transaction
|
159
|
-
# consumer to download from storage
|
160
|
-
headers={"X-MESSAGE-TYPE": "PROXY"},
|
161
|
-
)
|
162
|
-
|
163
|
-
|
164
131
|
def get_writer_bm(bm: writer_pb2.BrokerMessage) -> writer_pb2.BrokerMessage:
|
165
132
|
wbm = writer_pb2.BrokerMessage()
|
166
133
|
wbm.CopyFrom(bm)
|
nucliadb/ingest/app.py
CHANGED
@@ -32,7 +32,7 @@ from nucliadb.ingest.consumer import service as consumer_service
|
|
32
32
|
from nucliadb.ingest.partitions import assign_partitions
|
33
33
|
from nucliadb.ingest.processing import start_processing_engine, stop_processing_engine
|
34
34
|
from nucliadb.ingest.service import start_grpc
|
35
|
-
from nucliadb.ingest.settings import
|
35
|
+
from nucliadb.ingest.settings import settings
|
36
36
|
from nucliadb.ingest.utils import start_ingest as start_ingest_utility
|
37
37
|
from nucliadb.ingest.utils import stop_ingest as stop_ingest_utility
|
38
38
|
from nucliadb_telemetry import errors
|
@@ -101,12 +101,7 @@ async def initialize_grpc(): # pragma: no cover
|
|
101
101
|
|
102
102
|
async def initialize_pull_workers() -> list[Callable[[], Awaitable[None]]]:
|
103
103
|
finalizers = await initialize_grpc()
|
104
|
-
|
105
|
-
pull_workers = await consumer_service.start_pull_workers(SERVICE_NAME)
|
106
|
-
elif settings.processing_pull_mode == ProcessingPullMode.V2:
|
107
|
-
pull_workers = [await consumer_service.start_ingest_processed_consumer_v2(SERVICE_NAME)]
|
108
|
-
else:
|
109
|
-
raise Exception("Processing pull workers not enabled and it is required")
|
104
|
+
pull_workers = [await consumer_service.start_ingest_processed_consumer_v2(SERVICE_NAME)]
|
110
105
|
|
111
106
|
return pull_workers + finalizers
|
112
107
|
|
@@ -117,17 +112,9 @@ async def main_consumer(): # pragma: no cover
|
|
117
112
|
|
118
113
|
grpc_health_finalizer = await health.start_grpc_health_service(settings.grpc_port)
|
119
114
|
|
120
|
-
# pull workers could be pulled out into it's own deployment
|
121
|
-
if settings.processing_pull_mode == ProcessingPullMode.V1:
|
122
|
-
pull_workers = await consumer_service.start_pull_workers(SERVICE_NAME)
|
123
|
-
else:
|
124
|
-
# In v2, pull workers run inside the ingest consumer
|
125
|
-
pull_workers = []
|
126
115
|
ingest_consumers = await consumer_service.start_ingest_consumers(SERVICE_NAME)
|
127
116
|
|
128
|
-
await run_until_exit(
|
129
|
-
[grpc_health_finalizer, ingest_consumers, metrics_server.shutdown] + pull_workers + finalizers
|
130
|
-
)
|
117
|
+
await run_until_exit([grpc_health_finalizer, ingest_consumers, metrics_server.shutdown] + finalizers)
|
131
118
|
|
132
119
|
|
133
120
|
async def main_orm_grpc(): # pragma: no cover
|
@@ -144,15 +131,7 @@ async def main_ingest_processed_consumer(): # pragma: no cover
|
|
144
131
|
metrics_server = await serve_metrics()
|
145
132
|
grpc_health_finalizer = await health.start_grpc_health_service(settings.grpc_port)
|
146
133
|
|
147
|
-
|
148
|
-
consumer = await consumer_service.start_ingest_processed_consumer(SERVICE_NAME)
|
149
|
-
elif settings.processing_pull_mode == ProcessingPullMode.V2:
|
150
|
-
consumer = await consumer_service.start_ingest_processed_consumer_v2(SERVICE_NAME)
|
151
|
-
else:
|
152
|
-
# Off
|
153
|
-
async def fake_consumer(): ...
|
154
|
-
|
155
|
-
consumer = fake_consumer
|
134
|
+
consumer = await consumer_service.start_ingest_processed_consumer_v2(SERVICE_NAME)
|
156
135
|
|
157
136
|
await run_until_exit(
|
158
137
|
[grpc_health_finalizer, consumer, metrics_server.shutdown, stop_processing_engine] + finalizers
|
@@ -270,47 +270,3 @@ class IngestConsumer:
|
|
270
270
|
await self.ack_message(msg, kbid)
|
271
271
|
logger.info("Message acked because of success", extra={"seqid": seqid})
|
272
272
|
await self.clean_broker_message(msg)
|
273
|
-
|
274
|
-
|
275
|
-
class IngestProcessedConsumer(IngestConsumer):
|
276
|
-
"""
|
277
|
-
Consumer designed to write processed resources to the database.
|
278
|
-
|
279
|
-
This is so that we can have a single consumer for both the regular writer and writes
|
280
|
-
coming from processor.
|
281
|
-
|
282
|
-
This is important because writes coming from processor can be very large and slow and
|
283
|
-
other writes are going to be coming from user actions and we don't want to slow them down.
|
284
|
-
"""
|
285
|
-
|
286
|
-
async def setup_nats_subscription(self):
|
287
|
-
subject = const.Streams.INGEST_PROCESSED.subject
|
288
|
-
durable_name = const.Streams.INGEST_PROCESSED.group
|
289
|
-
self.subscription = await self.nats_connection_manager.pull_subscribe(
|
290
|
-
stream=const.Streams.INGEST_PROCESSED.name,
|
291
|
-
subject=subject,
|
292
|
-
durable=durable_name,
|
293
|
-
cb=self.subscription_worker,
|
294
|
-
subscription_lost_cb=self.setup_nats_subscription,
|
295
|
-
config=nats.js.api.ConsumerConfig(
|
296
|
-
durable_name=durable_name,
|
297
|
-
ack_policy=nats.js.api.AckPolicy.EXPLICIT,
|
298
|
-
deliver_policy=nats.js.api.DeliverPolicy.ALL,
|
299
|
-
# We set it to 20 because we don't care about order here and we want to be able to HPA based
|
300
|
-
# on the number of pending messages in the queue.
|
301
|
-
max_ack_pending=20,
|
302
|
-
max_deliver=nats_consumer_settings.nats_max_deliver,
|
303
|
-
ack_wait=nats_consumer_settings.nats_ack_wait,
|
304
|
-
),
|
305
|
-
)
|
306
|
-
logger.info(
|
307
|
-
f"Subscribed pull consumer to {subject} on stream {const.Streams.INGEST_PROCESSED.name}"
|
308
|
-
)
|
309
|
-
|
310
|
-
@backoff.on_exception(backoff.expo, (ConflictError,), jitter=backoff.random_jitter, max_tries=4)
|
311
|
-
async def _process(self, pb: BrokerMessage, seqid: int):
|
312
|
-
"""
|
313
|
-
We are setting `transaction_check` to False here because we can not mix
|
314
|
-
transaction ids from regular ingest writes and writes coming from processor.
|
315
|
-
"""
|
316
|
-
await self.processor.process(pb, seqid, self.partition, transaction_check=False)
|
nucliadb/ingest/consumer/pull.py
CHANGED
@@ -21,7 +21,6 @@ import asyncio
|
|
21
21
|
import base64
|
22
22
|
import time
|
23
23
|
from contextlib import contextmanager
|
24
|
-
from datetime import datetime, timezone
|
25
24
|
from typing import Optional
|
26
25
|
|
27
26
|
from aiohttp.client_exceptions import ClientConnectorError
|
@@ -32,9 +31,6 @@ from opentelemetry.trace import (
|
|
32
31
|
Link,
|
33
32
|
)
|
34
33
|
|
35
|
-
from nucliadb.common import datamanagers
|
36
|
-
from nucliadb.common.back_pressure.materializer import BackPressureMaterializer
|
37
|
-
from nucliadb.common.back_pressure.utils import BackPressureException
|
38
34
|
from nucliadb.common.http_clients.processing import (
|
39
35
|
ProcessingHTTPClient,
|
40
36
|
ProcessingPullMessageProgressUpdater,
|
@@ -45,214 +41,19 @@ from nucliadb.ingest import SERVICE_NAME, logger, logger_activity
|
|
45
41
|
from nucliadb.ingest.consumer.consumer import consumer_observer
|
46
42
|
from nucliadb.ingest.orm.exceptions import ReallyStopPulling
|
47
43
|
from nucliadb.ingest.orm.processor import Processor
|
48
|
-
from nucliadb_protos.writer_pb2 import BrokerMessage
|
44
|
+
from nucliadb_protos.writer_pb2 import BrokerMessage
|
49
45
|
from nucliadb_telemetry import errors
|
50
46
|
from nucliadb_telemetry.metrics import Gauge
|
51
47
|
from nucliadb_telemetry.utils import get_telemetry
|
52
|
-
from nucliadb_utils import const
|
53
48
|
from nucliadb_utils.cache.pubsub import PubSubDriver
|
54
49
|
from nucliadb_utils.settings import nuclia_settings
|
55
50
|
from nucliadb_utils.storages.storage import Storage
|
56
51
|
from nucliadb_utils.transaction import MaxTransactionSizeExceededError
|
57
|
-
from nucliadb_utils.utilities import
|
52
|
+
from nucliadb_utils.utilities import pull_subscriber_utilization
|
58
53
|
|
59
54
|
processing_pending_messages = Gauge("nucliadb_processing_pending_messages")
|
60
55
|
|
61
56
|
|
62
|
-
class PullWorker:
|
63
|
-
"""
|
64
|
-
The pull worker is responsible for pulling messages from the pull processing
|
65
|
-
http endpoint and injecting them into the processing write queue.
|
66
|
-
|
67
|
-
The processing pull endpoint is also described as the "processing proxy" at times.
|
68
|
-
"""
|
69
|
-
|
70
|
-
def __init__(
|
71
|
-
self,
|
72
|
-
driver: Driver,
|
73
|
-
partition: str,
|
74
|
-
storage: Storage,
|
75
|
-
pull_time_error_backoff: int,
|
76
|
-
pubsub: Optional[PubSubDriver] = None,
|
77
|
-
local_subscriber: bool = False,
|
78
|
-
pull_time_empty_backoff: float = 5.0,
|
79
|
-
pull_api_timeout: int = 60,
|
80
|
-
back_pressure: Optional[BackPressureMaterializer] = None,
|
81
|
-
):
|
82
|
-
self.partition = partition
|
83
|
-
self.pull_time_error_backoff = pull_time_error_backoff
|
84
|
-
self.pull_time_empty_backoff = pull_time_empty_backoff
|
85
|
-
self.pull_api_timeout = pull_api_timeout
|
86
|
-
self.local_subscriber = local_subscriber
|
87
|
-
|
88
|
-
self.processor = Processor(driver, storage, pubsub, partition)
|
89
|
-
self.back_pressure = back_pressure
|
90
|
-
|
91
|
-
def __str__(self) -> str:
|
92
|
-
return f"PullWorker(partition={self.partition})"
|
93
|
-
|
94
|
-
def __repr__(self) -> str:
|
95
|
-
return str(self)
|
96
|
-
|
97
|
-
async def handle_message(self, payload: str) -> None:
|
98
|
-
pb = BrokerMessage()
|
99
|
-
data = base64.b64decode(payload)
|
100
|
-
pb.ParseFromString(data)
|
101
|
-
|
102
|
-
logger.debug(f"Resource: {pb.uuid} KB: {pb.kbid} ProcessingID: {pb.processing_id}")
|
103
|
-
|
104
|
-
if not self.local_subscriber:
|
105
|
-
transaction_utility = get_transaction_utility()
|
106
|
-
if transaction_utility is None:
|
107
|
-
raise Exception("No transaction utility defined")
|
108
|
-
try:
|
109
|
-
await transaction_utility.commit(
|
110
|
-
writer=pb,
|
111
|
-
partition=int(self.partition),
|
112
|
-
# send to separate processor
|
113
|
-
target_subject=const.Streams.INGEST_PROCESSED.subject,
|
114
|
-
)
|
115
|
-
except MaxTransactionSizeExceededError:
|
116
|
-
storage = await get_storage()
|
117
|
-
stored_key = await storage.set_stream_message(kbid=pb.kbid, rid=pb.uuid, data=data)
|
118
|
-
referenced_pb = BrokerMessageBlobReference(
|
119
|
-
uuid=pb.uuid, kbid=pb.kbid, storage_key=stored_key
|
120
|
-
)
|
121
|
-
await transaction_utility.commit(
|
122
|
-
writer=referenced_pb,
|
123
|
-
partition=int(self.partition),
|
124
|
-
# send to separate processor
|
125
|
-
target_subject=const.Streams.INGEST_PROCESSED.subject,
|
126
|
-
headers={"X-MESSAGE-TYPE": "PROXY"},
|
127
|
-
)
|
128
|
-
else:
|
129
|
-
# No nats defined == monolitic nucliadb
|
130
|
-
await self.processor.process(
|
131
|
-
pb,
|
132
|
-
0, # Fake sequence id as in local mode there's no transactions
|
133
|
-
partition=self.partition,
|
134
|
-
transaction_check=False,
|
135
|
-
)
|
136
|
-
|
137
|
-
async def back_pressure_check(self) -> None:
|
138
|
-
if self.back_pressure is None:
|
139
|
-
return
|
140
|
-
while True:
|
141
|
-
try:
|
142
|
-
self.back_pressure.check_indexing()
|
143
|
-
self.back_pressure.check_ingest()
|
144
|
-
break
|
145
|
-
except BackPressureException as exc:
|
146
|
-
sleep_time = (datetime.now(timezone.utc) - exc.data.try_after).total_seconds()
|
147
|
-
logger.warning(f"Back pressure active! Sleeping for {sleep_time} seconds", exc_info=True)
|
148
|
-
await asyncio.sleep(sleep_time)
|
149
|
-
except Exception as e:
|
150
|
-
errors.capture_exception(e)
|
151
|
-
logger.exception("Error while checking back pressure. Moving on")
|
152
|
-
break
|
153
|
-
|
154
|
-
async def loop(self):
|
155
|
-
"""
|
156
|
-
Run this forever
|
157
|
-
"""
|
158
|
-
while True:
|
159
|
-
await self.back_pressure_check()
|
160
|
-
try:
|
161
|
-
await self._loop()
|
162
|
-
except ReallyStopPulling:
|
163
|
-
logger.info("Exiting...")
|
164
|
-
break
|
165
|
-
except Exception as e:
|
166
|
-
errors.capture_exception(e)
|
167
|
-
logger.exception("Exception on worker", exc_info=e)
|
168
|
-
await asyncio.sleep(10)
|
169
|
-
|
170
|
-
async def _loop(self):
|
171
|
-
headers = {}
|
172
|
-
data = None
|
173
|
-
if nuclia_settings.nuclia_service_account is not None:
|
174
|
-
headers["X-STF-NUAKEY"] = f"Bearer {nuclia_settings.nuclia_service_account}"
|
175
|
-
# parse jwt sub to get pull type id
|
176
|
-
try:
|
177
|
-
pull_type_id = get_nua_api_id()
|
178
|
-
except Exception as exc:
|
179
|
-
logger.exception("Could not read NUA API Key. Can not start pull worker")
|
180
|
-
raise ReallyStopPulling() from exc
|
181
|
-
else:
|
182
|
-
pull_type_id = "main"
|
183
|
-
|
184
|
-
async with ProcessingHTTPClient() as processing_http_client:
|
185
|
-
logger.info(f"Collecting from NucliaDB Cloud {self.partition} partition")
|
186
|
-
while True:
|
187
|
-
try:
|
188
|
-
async with datamanagers.with_ro_transaction() as txn:
|
189
|
-
cursor = await datamanagers.processing.get_pull_offset(
|
190
|
-
txn, pull_type_id=pull_type_id, partition=self.partition
|
191
|
-
)
|
192
|
-
|
193
|
-
data = await processing_http_client.pull(
|
194
|
-
self.partition,
|
195
|
-
cursor=cursor,
|
196
|
-
timeout=self.pull_api_timeout,
|
197
|
-
)
|
198
|
-
if data.status == "ok":
|
199
|
-
logger.info(
|
200
|
-
"Message received from proxy",
|
201
|
-
extra={"partition": self.partition, "cursor": data.cursor},
|
202
|
-
)
|
203
|
-
try:
|
204
|
-
if data.payload is not None:
|
205
|
-
await self.handle_message(data.payload)
|
206
|
-
for payload in data.payloads:
|
207
|
-
# If using cursors and multiple messages are returned, it will be in the
|
208
|
-
# `payloads` property
|
209
|
-
await self.handle_message(payload)
|
210
|
-
except Exception as e:
|
211
|
-
errors.capture_exception(e)
|
212
|
-
logger.exception("Error while pulling and processing message/s")
|
213
|
-
raise e
|
214
|
-
async with datamanagers.with_transaction() as txn:
|
215
|
-
await datamanagers.processing.set_pull_offset(
|
216
|
-
txn,
|
217
|
-
pull_type_id=pull_type_id,
|
218
|
-
partition=self.partition,
|
219
|
-
offset=data.cursor,
|
220
|
-
)
|
221
|
-
await txn.commit()
|
222
|
-
elif data.status == "empty":
|
223
|
-
logger_activity.debug(f"No messages waiting in partition #{self.partition}")
|
224
|
-
await asyncio.sleep(self.pull_time_empty_backoff)
|
225
|
-
else:
|
226
|
-
logger.info(f"Proxy pull answered with error: {data}")
|
227
|
-
await asyncio.sleep(self.pull_time_error_backoff)
|
228
|
-
except (
|
229
|
-
asyncio.exceptions.CancelledError,
|
230
|
-
RuntimeError,
|
231
|
-
KeyboardInterrupt,
|
232
|
-
SystemExit,
|
233
|
-
):
|
234
|
-
logger.info(f"Pull task for partition #{self.partition} was canceled, exiting")
|
235
|
-
raise ReallyStopPulling()
|
236
|
-
|
237
|
-
except ClientConnectorError:
|
238
|
-
logger.error(
|
239
|
-
f"Could not connect to processing engine, \
|
240
|
-
{processing_http_client.base_url} verify your internet connection"
|
241
|
-
)
|
242
|
-
await asyncio.sleep(self.pull_time_error_backoff)
|
243
|
-
|
244
|
-
except MaxTransactionSizeExceededError as e:
|
245
|
-
if data is not None:
|
246
|
-
payload_length = 0
|
247
|
-
if data.payload:
|
248
|
-
payload_length = len(base64.b64decode(data.payload))
|
249
|
-
logger.error(f"Message too big for transaction: {payload_length}")
|
250
|
-
raise e
|
251
|
-
except Exception:
|
252
|
-
logger.exception("Unhandled error pulling messages from processing")
|
253
|
-
await asyncio.sleep(self.pull_time_error_backoff)
|
254
|
-
|
255
|
-
|
256
57
|
@contextmanager
|
257
58
|
def run_in_span(headers: dict[str, str]):
|
258
59
|
# Create a span for handling this message
|
@@ -24,11 +24,10 @@ from typing import Awaitable, Callable, Optional
|
|
24
24
|
|
25
25
|
from nucliadb.common.back_pressure.materializer import BackPressureMaterializer
|
26
26
|
from nucliadb.common.back_pressure.settings import settings as back_pressure_settings
|
27
|
-
from nucliadb.common.back_pressure.utils import is_back_pressure_enabled
|
28
27
|
from nucliadb.common.maindb.utils import setup_driver
|
29
28
|
from nucliadb.ingest import SERVICE_NAME, logger
|
30
|
-
from nucliadb.ingest.consumer.consumer import IngestConsumer
|
31
|
-
from nucliadb.ingest.consumer.pull import PullV2Worker
|
29
|
+
from nucliadb.ingest.consumer.consumer import IngestConsumer
|
30
|
+
from nucliadb.ingest.consumer.pull import PullV2Worker
|
32
31
|
from nucliadb.ingest.settings import settings
|
33
32
|
from nucliadb_utils.exceptions import ConfigurationError
|
34
33
|
from nucliadb_utils.settings import indexing_settings, transaction_settings
|
@@ -79,38 +78,6 @@ async def stop_back_pressure(materializer: BackPressureMaterializer) -> None:
|
|
79
78
|
await materializer.nats_manager.finalize()
|
80
79
|
|
81
80
|
|
82
|
-
async def start_pull_workers(
|
83
|
-
service_name: Optional[str] = None,
|
84
|
-
) -> list[Callable[[], Awaitable[None]]]:
|
85
|
-
finalizers: list[Callable[[], Awaitable[None]]] = []
|
86
|
-
|
87
|
-
driver = await setup_driver()
|
88
|
-
pubsub = await get_pubsub()
|
89
|
-
storage = await get_storage(service_name=service_name or SERVICE_NAME)
|
90
|
-
back_pressure = None
|
91
|
-
if is_back_pressure_enabled():
|
92
|
-
back_pressure = await start_back_pressure()
|
93
|
-
finalizers.append(partial(stop_back_pressure, back_pressure))
|
94
|
-
tasks = []
|
95
|
-
for partition in settings.partitions:
|
96
|
-
worker = PullWorker(
|
97
|
-
driver=driver,
|
98
|
-
partition=partition,
|
99
|
-
storage=storage,
|
100
|
-
pull_time_error_backoff=settings.pull_time_error_backoff,
|
101
|
-
pubsub=pubsub,
|
102
|
-
local_subscriber=transaction_settings.transaction_local,
|
103
|
-
pull_api_timeout=settings.pull_api_timeout,
|
104
|
-
back_pressure=back_pressure,
|
105
|
-
)
|
106
|
-
task = asyncio.create_task(worker.loop())
|
107
|
-
task.add_done_callback(_handle_task_result)
|
108
|
-
tasks.append(task)
|
109
|
-
if len(tasks):
|
110
|
-
finalizers.append(partial(_exit_tasks, tasks))
|
111
|
-
return finalizers
|
112
|
-
|
113
|
-
|
114
81
|
async def start_ingest_consumers(
|
115
82
|
service_name: Optional[str] = None,
|
116
83
|
) -> Callable[[], Awaitable[None]]:
|
@@ -147,36 +114,6 @@ async def start_ingest_consumers(
|
|
147
114
|
return _finalize
|
148
115
|
|
149
116
|
|
150
|
-
async def start_ingest_processed_consumer(
|
151
|
-
service_name: Optional[str] = None,
|
152
|
-
) -> Callable[[], Awaitable[None]]:
|
153
|
-
"""
|
154
|
-
This is not meant to be deployed with a stateful set like the other consumers.
|
155
|
-
|
156
|
-
We are not maintaining transactionability based on the nats sequence id from this
|
157
|
-
consumer and we will start off by not separating writes by partition AND
|
158
|
-
allowing NATS to manage the queue group for us.
|
159
|
-
"""
|
160
|
-
if transaction_settings.transaction_local:
|
161
|
-
raise ConfigurationError("Can not start ingest consumers in local mode")
|
162
|
-
|
163
|
-
driver = await setup_driver()
|
164
|
-
pubsub = await get_pubsub()
|
165
|
-
storage = await get_storage(service_name=service_name or SERVICE_NAME)
|
166
|
-
nats_connection_manager = get_nats_manager()
|
167
|
-
|
168
|
-
consumer = IngestProcessedConsumer(
|
169
|
-
driver=driver,
|
170
|
-
partition="-1",
|
171
|
-
storage=storage,
|
172
|
-
pubsub=pubsub,
|
173
|
-
nats_connection_manager=nats_connection_manager,
|
174
|
-
)
|
175
|
-
await consumer.initialize()
|
176
|
-
|
177
|
-
return nats_connection_manager.finalize
|
178
|
-
|
179
|
-
|
180
117
|
async def start_ingest_processed_consumer_v2(
|
181
118
|
service_name: Optional[str] = None,
|
182
119
|
) -> Callable[[], Awaitable[None]]:
|
nucliadb/ingest/settings.py
CHANGED
@@ -21,15 +21,12 @@ import logging
|
|
21
21
|
import time
|
22
22
|
|
23
23
|
import orjson
|
24
|
-
import pydantic
|
25
24
|
from fastapi import Request
|
26
25
|
from fastapi.responses import JSONResponse
|
27
26
|
from fastapi.routing import APIRouter
|
28
27
|
from fastapi_versioning import version
|
29
28
|
from jwcrypto import jwe, jwk # type: ignore
|
30
29
|
|
31
|
-
from nucliadb.common import datamanagers
|
32
|
-
from nucliadb.common.http_clients import processing
|
33
30
|
from nucliadb.common.http_clients.auth import NucliaAuthHTTPClient
|
34
31
|
from nucliadb.standalone import versions
|
35
32
|
from nucliadb_models.resource import NucliaDBRoles
|
@@ -123,35 +120,3 @@ async def versions_endpoint(request: Request) -> JSONResponse:
|
|
123
120
|
for package in versions.WatchedPackages
|
124
121
|
}
|
125
122
|
)
|
126
|
-
|
127
|
-
|
128
|
-
@standalone_api_router.get("/pull/position")
|
129
|
-
async def pull_status(request: Request) -> JSONResponse:
|
130
|
-
async with datamanagers.with_ro_transaction() as txn:
|
131
|
-
# standalone assumes 1 partition
|
132
|
-
current_offset = await datamanagers.processing.get_pull_offset(
|
133
|
-
txn, pull_type_id=processing.get_nua_api_id(), partition="1"
|
134
|
-
)
|
135
|
-
|
136
|
-
async with processing.ProcessingHTTPClient() as client:
|
137
|
-
end_offset = await client.pull_position(partition="1")
|
138
|
-
|
139
|
-
return JSONResponse({"current_offset": current_offset, "end_offset": end_offset})
|
140
|
-
|
141
|
-
|
142
|
-
class UpdatePullPosition(pydantic.BaseModel):
|
143
|
-
cursor: int
|
144
|
-
|
145
|
-
|
146
|
-
@standalone_api_router.patch("/pull/position")
|
147
|
-
async def update_pull_position(request: Request, item: UpdatePullPosition) -> JSONResponse:
|
148
|
-
async with datamanagers.with_transaction() as txn:
|
149
|
-
# standalone assumes 1 partition
|
150
|
-
await datamanagers.processing.set_pull_offset(
|
151
|
-
txn,
|
152
|
-
pull_type_id=processing.get_nua_api_id(),
|
153
|
-
partition="1",
|
154
|
-
offset=item.cursor,
|
155
|
-
)
|
156
|
-
await txn.commit()
|
157
|
-
return JSONResponse({})
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: nucliadb
|
3
|
-
Version: 6.4.1.
|
3
|
+
Version: 6.4.1.post4344
|
4
4
|
Summary: NucliaDB
|
5
5
|
Author-email: Nuclia <nucliadb@nuclia.com>
|
6
6
|
License-Expression: AGPL-3.0-or-later
|
@@ -19,11 +19,11 @@ Classifier: Programming Language :: Python :: 3.12
|
|
19
19
|
Classifier: Programming Language :: Python :: 3 :: Only
|
20
20
|
Requires-Python: <4,>=3.9
|
21
21
|
Description-Content-Type: text/markdown
|
22
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.4.1.
|
23
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.4.1.
|
24
|
-
Requires-Dist: nucliadb-protos>=6.4.1.
|
25
|
-
Requires-Dist: nucliadb-models>=6.4.1.
|
26
|
-
Requires-Dist: nidx-protos>=6.4.1.
|
22
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.4.1.post4344
|
23
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.4.1.post4344
|
24
|
+
Requires-Dist: nucliadb-protos>=6.4.1.post4344
|
25
|
+
Requires-Dist: nucliadb-models>=6.4.1.post4344
|
26
|
+
Requires-Dist: nidx-protos>=6.4.1.post4344
|
27
27
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
28
28
|
Requires-Dist: nuclia-models>=0.24.2
|
29
29
|
Requires-Dist: uvicorn[standard]
|
@@ -62,7 +62,7 @@ nucliadb/common/nidx.py,sha256=3EeQGjM_gxK0l_Rb54fspFWVNnzUiKF-_GMxTiiDC8Q,9116
|
|
62
62
|
nucliadb/common/vector_index_config.py,sha256=LqGwhrDCp1q1vBow3scd1Chhr4GLYjYnGL72FKvOYYc,1552
|
63
63
|
nucliadb/common/back_pressure/__init__.py,sha256=paAcAZcfGRTyURF9lnn3vX0vcwakTEVswG_xcdGBH-U,928
|
64
64
|
nucliadb/common/back_pressure/cache.py,sha256=ANvXglWzI5naAD6N4E_fNi17qS6KNyAhjLeh6WlZZ84,2931
|
65
|
-
nucliadb/common/back_pressure/materializer.py,sha256=
|
65
|
+
nucliadb/common/back_pressure/materializer.py,sha256=bXUalaaTMdrltm23ezkoymcRPJl7Ha8RVTj7xdVfHgQ,11468
|
66
66
|
nucliadb/common/back_pressure/settings.py,sha256=3qNOzbI0KC6LMy-wMilXRSBfZu6CCpGHod26MTgAZ2o,3082
|
67
67
|
nucliadb/common/back_pressure/utils.py,sha256=aZeP1XSkdgaRgZC76yR9Kje3511ZUCp7KB-XzcvhMYY,2018
|
68
68
|
nucliadb/common/cluster/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
@@ -77,7 +77,7 @@ nucliadb/common/cluster/standalone/__init__.py,sha256=itSI7dtTwFP55YMX4iK7JzdMHS
|
|
77
77
|
nucliadb/common/cluster/standalone/utils.py,sha256=af3r-x_GF7A6dwIAhZLR-r-SZQEVxsFrDKeMfUTA6G0,1908
|
78
78
|
nucliadb/common/context/__init__.py,sha256=IKAHuiCjbOEsqfLozWwJ6mRFzFncsZMyxNC5E_XZ5EM,6016
|
79
79
|
nucliadb/common/context/fastapi.py,sha256=mH_8n5t7quNSPivNM2JS5EQf2sTVJsdzXW6LaY7EHAA,1629
|
80
|
-
nucliadb/common/datamanagers/__init__.py,sha256=
|
80
|
+
nucliadb/common/datamanagers/__init__.py,sha256=xKc6ZMqKUs20R90jJT4xkQ8TFMNwQnhhuWnBBqVnKdM,2084
|
81
81
|
nucliadb/common/datamanagers/atomic.py,sha256=WihdtBWQIAuElZQjh1xQ--q5dJowwlkovqsW-OB_t2k,3230
|
82
82
|
nucliadb/common/datamanagers/cluster.py,sha256=iU0b7AESm1Yi8Wp3pIKgqixZGNMjeBrxSpvEKsaZKgY,1831
|
83
83
|
nucliadb/common/datamanagers/entities.py,sha256=gI-0mbMlqrr9FiyhexEh6czhgYcMxE2s9m4o866EK9o,5340
|
@@ -85,7 +85,6 @@ nucliadb/common/datamanagers/exceptions.py,sha256=Atz_PP_GGq4jgJaWcAkcRbHBoBaGcC
|
|
85
85
|
nucliadb/common/datamanagers/fields.py,sha256=9KqBzTssAT68FR5hd17Xu_CSwAYdKFuYic1ITnrfFNc,3971
|
86
86
|
nucliadb/common/datamanagers/kb.py,sha256=P7EhF4tApIUG2jw_HH1oMufTKG9__kuOLKnrCNGbDM4,6156
|
87
87
|
nucliadb/common/datamanagers/labels.py,sha256=Zm0GQpSPoGXEEysUY7VsDIcyKSIIQsMVphj23IyM9_c,4502
|
88
|
-
nucliadb/common/datamanagers/processing.py,sha256=ByxdZzdbAfJGqC6__mY-zryjk040TyQfcUq3rxujeoY,1587
|
89
88
|
nucliadb/common/datamanagers/resources.py,sha256=VwFdCyHSnzMU3ASYRhC-wuCjCQEjOKEF7tIob4lTcPg,10793
|
90
89
|
nucliadb/common/datamanagers/rollover.py,sha256=GKdGv5goJVi3B3ZjawnMuQkgYeZjpCqxRYFz0VIswrE,7813
|
91
90
|
nucliadb/common/datamanagers/search_configurations.py,sha256=O-8eW43CE46GcxO6TB5hpi27NBguv4BL4SI1vLlN8os,2463
|
@@ -101,7 +100,7 @@ nucliadb/common/external_index_providers/settings.py,sha256=EGHnIkwxqe6aypwKegXT
|
|
101
100
|
nucliadb/common/http_clients/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
102
101
|
nucliadb/common/http_clients/auth.py,sha256=srfpgAbs2wmqA9u_l-HxsV4YoO77Tse4y3gm3q2YvYM,2112
|
103
102
|
nucliadb/common/http_clients/exceptions.py,sha256=47Y8OjkaGV_F18G07FpJhOzgWKUIexhlILyuVtICz8s,1100
|
104
|
-
nucliadb/common/http_clients/processing.py,sha256=
|
103
|
+
nucliadb/common/http_clients/processing.py,sha256=mKd9vRK-Wb71UG2LCoGu47wmnN5krqA0D1Z8vitsBPE,8976
|
105
104
|
nucliadb/common/http_clients/pypi.py,sha256=VHIUjwJEJVntVUo_FRoXIo8sLmluy7sa9-iXSITcrMY,1540
|
106
105
|
nucliadb/common/http_clients/utils.py,sha256=yGUkHNS41abHiBoHqo_Mg3QSqGsS7rUtbfGftbEC57U,1529
|
107
106
|
nucliadb/common/maindb/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
@@ -120,22 +119,22 @@ nucliadb/export_import/exporter.py,sha256=k2QVx1EjqFlDYiggriWiEJzwtMXzHbldsqWdpG
|
|
120
119
|
nucliadb/export_import/importer.py,sha256=GNDMt4hdjbcLWdydVq8XFQKefzNJkQ1eTzhshUX64rk,4231
|
121
120
|
nucliadb/export_import/models.py,sha256=dbjScNkiMRv4X3Ktudy1JRliD25bfoDTy3JmEZgQSCc,2121
|
122
121
|
nucliadb/export_import/tasks.py,sha256=DWbdqY97ffoyfipelGXz3Jqz1iam6JCjQSh367Fc3NA,2947
|
123
|
-
nucliadb/export_import/utils.py,sha256=
|
122
|
+
nucliadb/export_import/utils.py,sha256=XV3tJJdhgnVJRSj8AxZjgeipONtB107M185HVJmHp2Q,21626
|
124
123
|
nucliadb/ingest/__init__.py,sha256=fsw3C38VP50km3R-nHL775LNGPpJ4JxqXJ2Ib1f5SqE,1011
|
125
|
-
nucliadb/ingest/app.py,sha256=
|
124
|
+
nucliadb/ingest/app.py,sha256=Heyd5TubnM6HOo4eQdjg-laedALu1vq96B0XJ5T5QUc,7400
|
126
125
|
nucliadb/ingest/partitions.py,sha256=2NIhMYbNT0TNBL6bX1UMSi7vxFGICstCKEqsB0TXHOE,2410
|
127
126
|
nucliadb/ingest/processing.py,sha256=QmkHq-BU4vub7JRWe9VHvQ2DcAmT6-CzgFXuZxXhcBU,20953
|
128
127
|
nucliadb/ingest/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
129
128
|
nucliadb/ingest/serialize.py,sha256=-TIjibJTbMqAowzRvyrG3R209vKqBZqXpdrQL9Dq4lo,16135
|
130
|
-
nucliadb/ingest/settings.py,sha256=
|
129
|
+
nucliadb/ingest/settings.py,sha256=5qJICxwYb028a2iAhVbxOJB5X-hWtDLtiya-YhWostw,3179
|
131
130
|
nucliadb/ingest/utils.py,sha256=l1myURu3r8oA11dx3GpHw-gNTUc1AFX8xdPm9Lgl2rA,2275
|
132
131
|
nucliadb/ingest/consumer/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
133
132
|
nucliadb/ingest/consumer/auditing.py,sha256=xK21DIa_ZAiOJVVbnkmT4jgCRGshNGyPyxsqhE6kROE,7204
|
134
|
-
nucliadb/ingest/consumer/consumer.py,sha256=
|
133
|
+
nucliadb/ingest/consumer/consumer.py,sha256=1OetpJXp6glaAe4kKqUA_L46BS-ZyEccTkwt7TGf0Zw,11658
|
135
134
|
nucliadb/ingest/consumer/materializer.py,sha256=tgD_rDI2twQzcz8kKNiW_L4YIth16IGh9mUfD5wiSD4,3858
|
136
135
|
nucliadb/ingest/consumer/metrics.py,sha256=ji1l_4cKiHJthQd8YNem1ft4iMbw9KThmVvJmLcv3Xg,1075
|
137
|
-
nucliadb/ingest/consumer/pull.py,sha256=
|
138
|
-
nucliadb/ingest/consumer/service.py,sha256=
|
136
|
+
nucliadb/ingest/consumer/pull.py,sha256=vAOu2Zum-1e4RipoHvzzIha5PoNV28_C0nciQ2UFphc,8831
|
137
|
+
nucliadb/ingest/consumer/service.py,sha256=8AD41mMN7EUeUtk4ZNy14zfvxzwmVjIX6Mwe05-bomA,6543
|
139
138
|
nucliadb/ingest/consumer/shard_creator.py,sha256=w0smEu01FU_2cjZnsfBRNqT_Ntho11X17zTMST-vKbc,4359
|
140
139
|
nucliadb/ingest/consumer/utils.py,sha256=jpX8D4lKzuPCpArQLZeX_Zczq3pfen_zAf8sPJfOEZU,2642
|
141
140
|
nucliadb/ingest/fields/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
@@ -275,7 +274,7 @@ nucliadb/search/search/query_parser/parsers/graph.py,sha256=lDRJO_JvOe7yytNgXZyM
|
|
275
274
|
nucliadb/search/search/query_parser/parsers/search.py,sha256=yEebeMOXJza7HMK3TdIPO6UGQbe79maSDg-GgohQIMk,10517
|
276
275
|
nucliadb/search/search/query_parser/parsers/unit_retrieval.py,sha256=rW3YHDWLkI2Hhznl_1oOMhC01bwZMAjv-Wu3iHPIaiU,11475
|
277
276
|
nucliadb/standalone/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
278
|
-
nucliadb/standalone/api_router.py,sha256=
|
277
|
+
nucliadb/standalone/api_router.py,sha256=zRSMlaRVHUDGTYA3zC03UV_aLLn-ch-kaeWn1tEjTXw,4338
|
279
278
|
nucliadb/standalone/app.py,sha256=mAApNK_iVsQgJyd-mtwCeZq5csSimwnXmlQGH9a70pE,5586
|
280
279
|
nucliadb/standalone/auth.py,sha256=UwMv-TywhMZabvVg3anQLeCRdoHDnWf2o3luvnoNBjs,7670
|
281
280
|
nucliadb/standalone/config.py,sha256=hJ3p4dBRSsj5FOmIgAiEX9ZsAGUYd1W-_UJIol5LCCg,4967
|
@@ -368,8 +367,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
|
|
368
367
|
nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
|
369
368
|
nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
|
370
369
|
nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
|
371
|
-
nucliadb-6.4.1.
|
372
|
-
nucliadb-6.4.1.
|
373
|
-
nucliadb-6.4.1.
|
374
|
-
nucliadb-6.4.1.
|
375
|
-
nucliadb-6.4.1.
|
370
|
+
nucliadb-6.4.1.post4344.dist-info/METADATA,sha256=7g8xzzmO1LVucQdu1NpEorQBr87zZGTeJyOZy-6g_rg,4152
|
371
|
+
nucliadb-6.4.1.post4344.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
|
372
|
+
nucliadb-6.4.1.post4344.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
|
373
|
+
nucliadb-6.4.1.post4344.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
|
374
|
+
nucliadb-6.4.1.post4344.dist-info/RECORD,,
|
@@ -1,41 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
import logging
|
21
|
-
from typing import Optional
|
22
|
-
|
23
|
-
from nucliadb.common.maindb.driver import Transaction
|
24
|
-
|
25
|
-
logger = logging.getLogger(__name__)
|
26
|
-
|
27
|
-
|
28
|
-
PULL_PARTITION_OFFSET = "/processing/pull-offset/{pull_type_id}/{partition}"
|
29
|
-
|
30
|
-
|
31
|
-
async def get_pull_offset(txn: Transaction, *, pull_type_id: str, partition: str) -> Optional[int]:
|
32
|
-
key = PULL_PARTITION_OFFSET.format(pull_type_id=pull_type_id, partition=partition)
|
33
|
-
val: Optional[bytes] = await txn.get(key)
|
34
|
-
if val is not None:
|
35
|
-
return int(val)
|
36
|
-
return None
|
37
|
-
|
38
|
-
|
39
|
-
async def set_pull_offset(txn: Transaction, *, pull_type_id: str, partition: str, offset: int) -> None:
|
40
|
-
key = PULL_PARTITION_OFFSET.format(pull_type_id=pull_type_id, partition=partition)
|
41
|
-
await txn.set(key, str(offset).encode())
|
File without changes
|
File without changes
|
File without changes
|