nucliadb 6.4.0.post4271__py3-none-any.whl → 6.4.0.post4279__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucliadb/common/http_clients/processing.py +80 -1
- nucliadb/ingest/app.py +22 -4
- nucliadb/ingest/consumer/consumer.py +2 -2
- nucliadb/ingest/consumer/pull.py +166 -2
- nucliadb/ingest/consumer/service.py +27 -1
- nucliadb/ingest/settings.py +9 -0
- nucliadb/search/search/chat/ask.py +55 -55
- nucliadb/search/search/chat/prompt.py +46 -6
- nucliadb/tasks/consumer.py +2 -2
- {nucliadb-6.4.0.post4271.dist-info → nucliadb-6.4.0.post4279.dist-info}/METADATA +6 -6
- {nucliadb-6.4.0.post4271.dist-info → nucliadb-6.4.0.post4279.dist-info}/RECORD +14 -14
- {nucliadb-6.4.0.post4271.dist-info → nucliadb-6.4.0.post4279.dist-info}/WHEEL +0 -0
- {nucliadb-6.4.0.post4271.dist-info → nucliadb-6.4.0.post4279.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.4.0.post4271.dist-info → nucliadb-6.4.0.post4279.dist-info}/top_level.txt +0 -0
@@ -25,6 +25,7 @@ import aiohttp
|
|
25
25
|
import jwt
|
26
26
|
import pydantic
|
27
27
|
|
28
|
+
from nucliadb_utils.helpers import MessageProgressUpdater
|
28
29
|
from nucliadb_utils.settings import nuclia_settings
|
29
30
|
|
30
31
|
from .utils import check_status
|
@@ -51,6 +52,16 @@ def get_processing_api_url() -> str:
|
|
51
52
|
return nuclia_settings.nuclia_processing_cluster_url + "/api/v1/internal/processing"
|
52
53
|
|
53
54
|
|
55
|
+
def get_processing_api_v2_url() -> str:
|
56
|
+
if nuclia_settings.nuclia_service_account:
|
57
|
+
return (
|
58
|
+
nuclia_settings.nuclia_public_url.format(zone=nuclia_settings.nuclia_zone)
|
59
|
+
+ "/api/v2/processing"
|
60
|
+
)
|
61
|
+
else:
|
62
|
+
return nuclia_settings.nuclia_processing_cluster_url + "/api/v2/internal/processing"
|
63
|
+
|
64
|
+
|
54
65
|
class PullResponse(pydantic.BaseModel):
|
55
66
|
status: str
|
56
67
|
payload: Optional[str] = None
|
@@ -150,11 +161,38 @@ class StatsResponse(pydantic.BaseModel):
|
|
150
161
|
scheduled: int
|
151
162
|
|
152
163
|
|
164
|
+
class PullRequestV2(pydantic.BaseModel):
|
165
|
+
timeout: float = 5
|
166
|
+
limit: int = 1
|
167
|
+
ack: list[str] = []
|
168
|
+
|
169
|
+
|
170
|
+
class InProgressRequest(pydantic.BaseModel):
|
171
|
+
ack: list[str] = []
|
172
|
+
|
173
|
+
|
174
|
+
class PulledMessage(pydantic.BaseModel):
|
175
|
+
payload: bytes
|
176
|
+
headers: dict[str, str]
|
177
|
+
ack_token: str
|
178
|
+
seq: int
|
179
|
+
|
180
|
+
|
181
|
+
class PullResponseV2(pydantic.BaseModel):
|
182
|
+
messages: list[PulledMessage]
|
183
|
+
ttl: float
|
184
|
+
pending: int
|
185
|
+
|
186
|
+
|
187
|
+
JSON_HEADERS = {"Content-Type": "application/json"}
|
188
|
+
|
189
|
+
|
153
190
|
class ProcessingHTTPClient:
|
154
191
|
def __init__(self):
|
155
192
|
self.session = aiohttp.ClientSession()
|
156
193
|
self.base_url = get_processing_api_url()
|
157
|
-
self.
|
194
|
+
self.base_url_v2 = get_processing_api_v2_url()
|
195
|
+
self.headers: dict[str, str] = {}
|
158
196
|
if nuclia_settings.nuclia_service_account is not None:
|
159
197
|
self.headers["X-STF-NUAKEY"] = f"Bearer {nuclia_settings.nuclia_service_account}"
|
160
198
|
|
@@ -193,6 +231,31 @@ class ProcessingHTTPClient:
|
|
193
231
|
data = PullPosition.model_validate_json(resp_text)
|
194
232
|
return data.cursor
|
195
233
|
|
234
|
+
async def in_progress(self, ack_token: str):
|
235
|
+
url = self.base_url_v2 + "/pull/in_progress"
|
236
|
+
request = InProgressRequest(ack=[ack_token])
|
237
|
+
async with self.session.post(
|
238
|
+
url, headers=self.headers | JSON_HEADERS, data=request.model_dump_json()
|
239
|
+
) as resp:
|
240
|
+
resp_text = await resp.text()
|
241
|
+
check_status(resp, resp_text)
|
242
|
+
|
243
|
+
async def pull_v2(
|
244
|
+
self, ack_tokens: list[str], limit: int = 1, timeout: float = 5
|
245
|
+
) -> Optional[PullResponseV2]:
|
246
|
+
url = self.base_url_v2 + "/pull"
|
247
|
+
request = PullRequestV2(limit=limit, timeout=timeout, ack=ack_tokens)
|
248
|
+
async with self.session.post(
|
249
|
+
url, headers=self.headers | JSON_HEADERS, data=request.model_dump_json()
|
250
|
+
) as resp:
|
251
|
+
resp_text = await resp.text()
|
252
|
+
check_status(resp, resp_text)
|
253
|
+
|
254
|
+
if resp.status == 204:
|
255
|
+
return None
|
256
|
+
else:
|
257
|
+
return PullResponseV2.model_validate_json(resp_text)
|
258
|
+
|
196
259
|
async def requests(
|
197
260
|
self,
|
198
261
|
cursor: Optional[str] = None,
|
@@ -225,3 +288,19 @@ class ProcessingHTTPClient:
|
|
225
288
|
resp_text = await resp.text()
|
226
289
|
check_status(resp, resp_text)
|
227
290
|
return StatsResponse.model_validate_json(resp_text)
|
291
|
+
|
292
|
+
|
293
|
+
class ProcessingPullMessageProgressUpdater(MessageProgressUpdater):
|
294
|
+
"""
|
295
|
+
Context manager to send progress updates to NATS.
|
296
|
+
|
297
|
+
This should allow lower ack_wait time settings without causing
|
298
|
+
messages to be redelivered.
|
299
|
+
"""
|
300
|
+
|
301
|
+
def __init__(self, client: ProcessingHTTPClient, msg: PulledMessage, timeout: float):
|
302
|
+
async def update_msg() -> bool:
|
303
|
+
await client.in_progress(msg.ack_token)
|
304
|
+
return False
|
305
|
+
|
306
|
+
super().__init__(str(msg.seq), update_msg, timeout)
|
nucliadb/ingest/app.py
CHANGED
@@ -32,7 +32,7 @@ from nucliadb.ingest.consumer import service as consumer_service
|
|
32
32
|
from nucliadb.ingest.partitions import assign_partitions
|
33
33
|
from nucliadb.ingest.processing import start_processing_engine, stop_processing_engine
|
34
34
|
from nucliadb.ingest.service import start_grpc
|
35
|
-
from nucliadb.ingest.settings import settings
|
35
|
+
from nucliadb.ingest.settings import ProcessingPullMode, settings
|
36
36
|
from nucliadb.ingest.utils import start_ingest as start_ingest_utility
|
37
37
|
from nucliadb.ingest.utils import stop_ingest as stop_ingest_utility
|
38
38
|
from nucliadb_telemetry import errors
|
@@ -101,7 +101,12 @@ async def initialize_grpc(): # pragma: no cover
|
|
101
101
|
|
102
102
|
async def initialize_pull_workers() -> list[Callable[[], Awaitable[None]]]:
|
103
103
|
finalizers = await initialize_grpc()
|
104
|
-
|
104
|
+
if settings.processing_pull_mode == ProcessingPullMode.V1:
|
105
|
+
pull_workers = await consumer_service.start_pull_workers(SERVICE_NAME)
|
106
|
+
elif settings.processing_pull_mode == ProcessingPullMode.V2:
|
107
|
+
pull_workers = [await consumer_service.start_ingest_processed_consumer_v2(SERVICE_NAME)]
|
108
|
+
else:
|
109
|
+
raise Exception("Processing pull workers not enabled and it is required")
|
105
110
|
|
106
111
|
return pull_workers + finalizers
|
107
112
|
|
@@ -113,7 +118,11 @@ async def main_consumer(): # pragma: no cover
|
|
113
118
|
grpc_health_finalizer = await health.start_grpc_health_service(settings.grpc_port)
|
114
119
|
|
115
120
|
# pull workers could be pulled out into it's own deployment
|
116
|
-
|
121
|
+
if settings.processing_pull_mode == ProcessingPullMode.V1:
|
122
|
+
pull_workers = await consumer_service.start_pull_workers(SERVICE_NAME)
|
123
|
+
else:
|
124
|
+
# In v2, pull workers run inside the ingest consumer
|
125
|
+
pull_workers = []
|
117
126
|
ingest_consumers = await consumer_service.start_ingest_consumers(SERVICE_NAME)
|
118
127
|
|
119
128
|
await run_until_exit(
|
@@ -134,7 +143,16 @@ async def main_ingest_processed_consumer(): # pragma: no cover
|
|
134
143
|
await start_processing_engine()
|
135
144
|
metrics_server = await serve_metrics()
|
136
145
|
grpc_health_finalizer = await health.start_grpc_health_service(settings.grpc_port)
|
137
|
-
|
146
|
+
|
147
|
+
if settings.processing_pull_mode == ProcessingPullMode.V1:
|
148
|
+
consumer = await consumer_service.start_ingest_processed_consumer(SERVICE_NAME)
|
149
|
+
elif settings.processing_pull_mode == ProcessingPullMode.V2:
|
150
|
+
consumer = await consumer_service.start_ingest_processed_consumer_v2(SERVICE_NAME)
|
151
|
+
else:
|
152
|
+
# Off
|
153
|
+
async def fake_consumer(): ...
|
154
|
+
|
155
|
+
consumer = fake_consumer
|
138
156
|
|
139
157
|
await run_until_exit(
|
140
158
|
[grpc_health_finalizer, consumer, metrics_server.shutdown, stop_processing_engine] + finalizers
|
@@ -39,7 +39,7 @@ from nucliadb_protos.writer_pb2 import BrokerMessage, BrokerMessageBlobReference
|
|
39
39
|
from nucliadb_telemetry import context, errors, metrics
|
40
40
|
from nucliadb_utils import const
|
41
41
|
from nucliadb_utils.cache.pubsub import PubSubDriver
|
42
|
-
from nucliadb_utils.nats import
|
42
|
+
from nucliadb_utils.nats import NatsConnectionManager, NatsMessageProgressUpdater
|
43
43
|
from nucliadb_utils.settings import nats_consumer_settings
|
44
44
|
from nucliadb_utils.storages.storage import Storage
|
45
45
|
|
@@ -181,7 +181,7 @@ class IngestConsumer:
|
|
181
181
|
start = time.monotonic()
|
182
182
|
|
183
183
|
async with (
|
184
|
-
|
184
|
+
NatsMessageProgressUpdater(msg, nats_consumer_settings.nats_ack_wait * 0.66),
|
185
185
|
self.lock,
|
186
186
|
):
|
187
187
|
try:
|
nucliadb/ingest/consumer/pull.py
CHANGED
@@ -19,21 +19,34 @@
|
|
19
19
|
#
|
20
20
|
import asyncio
|
21
21
|
import base64
|
22
|
+
from contextlib import contextmanager
|
22
23
|
from datetime import datetime, timezone
|
23
24
|
from typing import Optional
|
24
25
|
|
25
26
|
from aiohttp.client_exceptions import ClientConnectorError
|
27
|
+
from opentelemetry import trace
|
28
|
+
from opentelemetry.context import Context
|
29
|
+
from opentelemetry.propagate import extract
|
30
|
+
from opentelemetry.trace import (
|
31
|
+
Link,
|
32
|
+
)
|
26
33
|
|
27
34
|
from nucliadb.common import datamanagers
|
28
35
|
from nucliadb.common.back_pressure.materializer import BackPressureMaterializer
|
29
36
|
from nucliadb.common.back_pressure.utils import BackPressureException
|
30
|
-
from nucliadb.common.http_clients.processing import
|
37
|
+
from nucliadb.common.http_clients.processing import (
|
38
|
+
ProcessingHTTPClient,
|
39
|
+
ProcessingPullMessageProgressUpdater,
|
40
|
+
get_nua_api_id,
|
41
|
+
)
|
31
42
|
from nucliadb.common.maindb.driver import Driver
|
32
|
-
from nucliadb.ingest import logger, logger_activity
|
43
|
+
from nucliadb.ingest import SERVICE_NAME, logger, logger_activity
|
33
44
|
from nucliadb.ingest.orm.exceptions import ReallyStopPulling
|
34
45
|
from nucliadb.ingest.orm.processor import Processor
|
35
46
|
from nucliadb_protos.writer_pb2 import BrokerMessage, BrokerMessageBlobReference
|
36
47
|
from nucliadb_telemetry import errors
|
48
|
+
from nucliadb_telemetry.metrics import Gauge
|
49
|
+
from nucliadb_telemetry.utils import get_telemetry
|
37
50
|
from nucliadb_utils import const
|
38
51
|
from nucliadb_utils.cache.pubsub import PubSubDriver
|
39
52
|
from nucliadb_utils.settings import nuclia_settings
|
@@ -41,6 +54,8 @@ from nucliadb_utils.storages.storage import Storage
|
|
41
54
|
from nucliadb_utils.transaction import MaxTransactionSizeExceededError
|
42
55
|
from nucliadb_utils.utilities import get_storage, get_transaction_utility
|
43
56
|
|
57
|
+
processing_pending_messages = Gauge("nucliadb_processing_pending_messages")
|
58
|
+
|
44
59
|
|
45
60
|
class PullWorker:
|
46
61
|
"""
|
@@ -234,3 +249,152 @@ class PullWorker:
|
|
234
249
|
except Exception:
|
235
250
|
logger.exception("Unhandled error pulling messages from processing")
|
236
251
|
await asyncio.sleep(self.pull_time_error_backoff)
|
252
|
+
|
253
|
+
|
254
|
+
@contextmanager
|
255
|
+
def run_in_span(headers: dict[str, str]):
|
256
|
+
# Create a span for handling this message
|
257
|
+
tracer_provider = get_telemetry(SERVICE_NAME)
|
258
|
+
if tracer_provider is None:
|
259
|
+
yield
|
260
|
+
return
|
261
|
+
|
262
|
+
tracer = tracer_provider.get_tracer(__name__)
|
263
|
+
our_span = tracer.start_span("handle_processing_pull")
|
264
|
+
|
265
|
+
# Try to retrieve processing context to link to it
|
266
|
+
witness = Context()
|
267
|
+
processor_context = extract(headers, context=witness)
|
268
|
+
if processor_context != witness:
|
269
|
+
# We successfully extracted a context, we link from the processor span to ours for ease of navigation
|
270
|
+
with tracer.start_as_current_span(
|
271
|
+
f"Pulled from proxy", links=[Link(our_span.get_span_context())], context=processor_context
|
272
|
+
):
|
273
|
+
# And link from our span back to the processor span
|
274
|
+
our_span.add_link(trace.get_current_span().get_span_context())
|
275
|
+
|
276
|
+
# Go back to our context
|
277
|
+
trace.set_span_in_context(our_span)
|
278
|
+
with trace.use_span(our_span, end_on_exit=True):
|
279
|
+
yield
|
280
|
+
|
281
|
+
|
282
|
+
class PullV2Worker:
|
283
|
+
"""
|
284
|
+
The pull worker is responsible for pulling messages from the pull processing
|
285
|
+
http endpoint and processing them
|
286
|
+
|
287
|
+
The processing pull endpoint is also described as the "processing proxy" at times.
|
288
|
+
"""
|
289
|
+
|
290
|
+
def __init__(
|
291
|
+
self,
|
292
|
+
driver: Driver,
|
293
|
+
storage: Storage,
|
294
|
+
pull_time_error_backoff: int,
|
295
|
+
pubsub: Optional[PubSubDriver] = None,
|
296
|
+
pull_time_empty_backoff: float = 5.0,
|
297
|
+
pull_api_timeout: int = 60,
|
298
|
+
):
|
299
|
+
self.pull_time_error_backoff = pull_time_error_backoff
|
300
|
+
self.pull_time_empty_backoff = pull_time_empty_backoff
|
301
|
+
self.pull_api_timeout = pull_api_timeout
|
302
|
+
|
303
|
+
self.processor = Processor(driver, storage, pubsub, "-1")
|
304
|
+
|
305
|
+
async def handle_message(self, seq: int, payload: bytes) -> None:
|
306
|
+
pb = BrokerMessage()
|
307
|
+
data = base64.b64decode(payload)
|
308
|
+
pb.ParseFromString(data)
|
309
|
+
|
310
|
+
logger.debug(f"Resource: {pb.uuid} KB: {pb.kbid} ProcessingID: {pb.processing_id}")
|
311
|
+
|
312
|
+
await self.processor.process(
|
313
|
+
pb,
|
314
|
+
seq,
|
315
|
+
transaction_check=False,
|
316
|
+
)
|
317
|
+
|
318
|
+
async def loop(self):
|
319
|
+
"""
|
320
|
+
Run this forever
|
321
|
+
"""
|
322
|
+
while True:
|
323
|
+
try:
|
324
|
+
await self._loop()
|
325
|
+
except ReallyStopPulling:
|
326
|
+
logger.info("Exiting...")
|
327
|
+
break
|
328
|
+
except Exception as e:
|
329
|
+
errors.capture_exception(e)
|
330
|
+
logger.exception("Exception on worker", exc_info=e)
|
331
|
+
await asyncio.sleep(10)
|
332
|
+
|
333
|
+
async def _loop(self):
|
334
|
+
headers = {}
|
335
|
+
data = None
|
336
|
+
if nuclia_settings.nuclia_service_account is not None:
|
337
|
+
headers["X-STF-NUAKEY"] = f"Bearer {nuclia_settings.nuclia_service_account}"
|
338
|
+
# parse jwt sub to get pull type id
|
339
|
+
try:
|
340
|
+
get_nua_api_id()
|
341
|
+
except Exception as exc:
|
342
|
+
logger.exception("Could not read NUA API Key. Can not start pull worker")
|
343
|
+
raise ReallyStopPulling() from exc
|
344
|
+
|
345
|
+
ack_tokens = []
|
346
|
+
async with ProcessingHTTPClient() as processing_http_client:
|
347
|
+
while True:
|
348
|
+
try:
|
349
|
+
# The code is only really prepared to pull 1 message at a time. If changing this, review MessageProgressUpdate usage
|
350
|
+
pull = await processing_http_client.pull_v2(ack_tokens=ack_tokens, limit=1)
|
351
|
+
ack_tokens.clear()
|
352
|
+
if pull is None:
|
353
|
+
processing_pending_messages.set(0)
|
354
|
+
logger_activity.debug(f"No messages waiting in processing pull")
|
355
|
+
await asyncio.sleep(self.pull_time_empty_backoff)
|
356
|
+
continue
|
357
|
+
|
358
|
+
logger.info("Message received from proxy", extra={"seq": [pull.messages[0].seq]})
|
359
|
+
processing_pending_messages.set(pull.pending)
|
360
|
+
try:
|
361
|
+
for message in pull.messages:
|
362
|
+
async with ProcessingPullMessageProgressUpdater(
|
363
|
+
processing_http_client, message, pull.ttl * 0.66
|
364
|
+
):
|
365
|
+
with run_in_span(message.headers):
|
366
|
+
await self.handle_message(message.seq, message.payload)
|
367
|
+
ack_tokens.append(message.ack_token)
|
368
|
+
except Exception as e:
|
369
|
+
errors.capture_exception(e)
|
370
|
+
logger.exception("Error while pulling and processing message/s")
|
371
|
+
raise e
|
372
|
+
|
373
|
+
except (
|
374
|
+
asyncio.exceptions.CancelledError,
|
375
|
+
RuntimeError,
|
376
|
+
KeyboardInterrupt,
|
377
|
+
SystemExit,
|
378
|
+
):
|
379
|
+
if ack_tokens:
|
380
|
+
await processing_http_client.pull_v2(ack_tokens=ack_tokens, limit=0)
|
381
|
+
logger.info(f"Pull task was canceled, exiting")
|
382
|
+
raise ReallyStopPulling()
|
383
|
+
|
384
|
+
except ClientConnectorError:
|
385
|
+
logger.error(
|
386
|
+
f"Could not connect to processing engine, \
|
387
|
+
{processing_http_client.base_url} verify your internet connection"
|
388
|
+
)
|
389
|
+
await asyncio.sleep(self.pull_time_error_backoff)
|
390
|
+
|
391
|
+
except MaxTransactionSizeExceededError as e:
|
392
|
+
if data is not None:
|
393
|
+
payload_length = 0
|
394
|
+
if data.payload:
|
395
|
+
payload_length = len(base64.b64decode(data.payload))
|
396
|
+
logger.error(f"Message too big for transaction: {payload_length}")
|
397
|
+
raise e
|
398
|
+
except Exception:
|
399
|
+
logger.exception("Unhandled error pulling messages from processing")
|
400
|
+
await asyncio.sleep(self.pull_time_error_backoff)
|
@@ -28,7 +28,7 @@ from nucliadb.common.back_pressure.utils import is_back_pressure_enabled
|
|
28
28
|
from nucliadb.common.maindb.utils import setup_driver
|
29
29
|
from nucliadb.ingest import SERVICE_NAME, logger
|
30
30
|
from nucliadb.ingest.consumer.consumer import IngestConsumer, IngestProcessedConsumer
|
31
|
-
from nucliadb.ingest.consumer.pull import PullWorker
|
31
|
+
from nucliadb.ingest.consumer.pull import PullV2Worker, PullWorker
|
32
32
|
from nucliadb.ingest.settings import settings
|
33
33
|
from nucliadb_utils.exceptions import ConfigurationError
|
34
34
|
from nucliadb_utils.settings import indexing_settings, transaction_settings
|
@@ -177,6 +177,32 @@ async def start_ingest_processed_consumer(
|
|
177
177
|
return nats_connection_manager.finalize
|
178
178
|
|
179
179
|
|
180
|
+
async def start_ingest_processed_consumer_v2(
|
181
|
+
service_name: Optional[str] = None,
|
182
|
+
) -> Callable[[], Awaitable[None]]:
|
183
|
+
"""
|
184
|
+
This is not meant to be deployed with a stateful set like the other consumers.
|
185
|
+
|
186
|
+
We are not maintaining transactionability based on the nats sequence id from this
|
187
|
+
consumer and we will start off by not separating writes by partition AND
|
188
|
+
allowing NATS to manage the queue group for us.
|
189
|
+
"""
|
190
|
+
driver = await setup_driver()
|
191
|
+
pubsub = await get_pubsub()
|
192
|
+
storage = await get_storage(service_name=service_name or SERVICE_NAME)
|
193
|
+
|
194
|
+
consumer = PullV2Worker(
|
195
|
+
driver=driver,
|
196
|
+
storage=storage,
|
197
|
+
pubsub=pubsub,
|
198
|
+
pull_time_error_backoff=settings.pull_time_error_backoff,
|
199
|
+
pull_api_timeout=settings.pull_api_timeout,
|
200
|
+
)
|
201
|
+
task = asyncio.create_task(consumer.loop())
|
202
|
+
task.add_done_callback(_handle_task_result)
|
203
|
+
return partial(_exit_tasks, [task])
|
204
|
+
|
205
|
+
|
180
206
|
async def start_auditor() -> Callable[[], Awaitable[None]]:
|
181
207
|
audit = get_audit()
|
182
208
|
assert audit is not None
|
nucliadb/ingest/settings.py
CHANGED
@@ -63,6 +63,13 @@ class DriverSettings(BaseSettings):
|
|
63
63
|
)
|
64
64
|
|
65
65
|
|
66
|
+
# For use during migration from pull v1 to pull v2
|
67
|
+
class ProcessingPullMode(Enum):
|
68
|
+
OFF = "off"
|
69
|
+
V1 = "v1"
|
70
|
+
V2 = "v2"
|
71
|
+
|
72
|
+
|
66
73
|
class Settings(DriverSettings):
|
67
74
|
grpc_port: int = 8030
|
68
75
|
|
@@ -85,5 +92,7 @@ class Settings(DriverSettings):
|
|
85
92
|
|
86
93
|
max_concurrent_ingest_processing: int = 5
|
87
94
|
|
95
|
+
processing_pull_mode: ProcessingPullMode = ProcessingPullMode.V1
|
96
|
+
|
88
97
|
|
89
98
|
settings = Settings()
|
@@ -507,17 +507,18 @@ async def ask(
|
|
507
507
|
logger.info("Failed to rephrase ask query, using original")
|
508
508
|
|
509
509
|
try:
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
510
|
+
with metrics.time("retrieval"):
|
511
|
+
retrieval_results = await retrieval_step(
|
512
|
+
kbid=kbid,
|
513
|
+
# Prefer the rephrased query for retrieval if available
|
514
|
+
main_query=rephrased_query or user_query,
|
515
|
+
ask_request=ask_request,
|
516
|
+
client_type=client_type,
|
517
|
+
user_id=user_id,
|
518
|
+
origin=origin,
|
519
|
+
metrics=metrics,
|
520
|
+
resource=resource,
|
521
|
+
)
|
521
522
|
except NoRetrievalResultsError as err:
|
522
523
|
maybe_audit_chat(
|
523
524
|
kbid=kbid,
|
@@ -562,6 +563,7 @@ async def ask(
|
|
562
563
|
image_strategies=ask_request.rag_images_strategies,
|
563
564
|
max_context_characters=tokens_to_chars(generation.max_context_tokens),
|
564
565
|
visual_llm=generation.use_visual_llm,
|
566
|
+
metrics=metrics.child_span("context_building"),
|
565
567
|
)
|
566
568
|
(
|
567
569
|
prompt_context,
|
@@ -747,45 +749,44 @@ async def retrieval_in_kb(
|
|
747
749
|
) -> RetrievalResults:
|
748
750
|
prequeries = parse_prequeries(ask_request)
|
749
751
|
graph_strategy = parse_graph_strategy(ask_request)
|
750
|
-
|
751
|
-
|
752
|
+
main_results, prequeries_results, parsed_query = await get_find_results(
|
753
|
+
kbid=kbid,
|
754
|
+
query=main_query,
|
755
|
+
item=ask_request,
|
756
|
+
ndb_client=client_type,
|
757
|
+
user=user_id,
|
758
|
+
origin=origin,
|
759
|
+
metrics=metrics.child_span("hybrid_retrieval"),
|
760
|
+
prequeries_strategy=prequeries,
|
761
|
+
)
|
762
|
+
|
763
|
+
if graph_strategy is not None:
|
764
|
+
assert parsed_query.retrieval.reranker is not None, (
|
765
|
+
"find parser must provide a reranking algorithm"
|
766
|
+
)
|
767
|
+
reranker = get_reranker(parsed_query.retrieval.reranker)
|
768
|
+
graph_results, graph_request = await get_graph_results(
|
752
769
|
kbid=kbid,
|
753
770
|
query=main_query,
|
754
771
|
item=ask_request,
|
755
772
|
ndb_client=client_type,
|
756
773
|
user=user_id,
|
757
774
|
origin=origin,
|
758
|
-
|
759
|
-
|
775
|
+
graph_strategy=graph_strategy,
|
776
|
+
metrics=metrics.child_span("graph_retrieval"),
|
777
|
+
text_block_reranker=reranker,
|
760
778
|
)
|
761
779
|
|
762
|
-
if
|
763
|
-
|
764
|
-
"find parser must provide a reranking algorithm"
|
765
|
-
)
|
766
|
-
reranker = get_reranker(parsed_query.retrieval.reranker)
|
767
|
-
graph_results, graph_request = await get_graph_results(
|
768
|
-
kbid=kbid,
|
769
|
-
query=main_query,
|
770
|
-
item=ask_request,
|
771
|
-
ndb_client=client_type,
|
772
|
-
user=user_id,
|
773
|
-
origin=origin,
|
774
|
-
graph_strategy=graph_strategy,
|
775
|
-
metrics=metrics,
|
776
|
-
text_block_reranker=reranker,
|
777
|
-
)
|
778
|
-
|
779
|
-
if prequeries_results is None:
|
780
|
-
prequeries_results = []
|
780
|
+
if prequeries_results is None:
|
781
|
+
prequeries_results = []
|
781
782
|
|
782
|
-
|
783
|
-
|
783
|
+
prequery = PreQuery(id="graph", request=graph_request, weight=graph_strategy.weight)
|
784
|
+
prequeries_results.append((prequery, graph_results))
|
784
785
|
|
785
|
-
|
786
|
-
|
787
|
-
|
788
|
-
|
786
|
+
if len(main_results.resources) == 0 and all(
|
787
|
+
len(prequery_result.resources) == 0 for (_, prequery_result) in prequeries_results or []
|
788
|
+
):
|
789
|
+
raise NoRetrievalResultsError(main_results, prequeries_results)
|
789
790
|
|
790
791
|
main_query_weight = prequeries.main_query_weight if prequeries is not None else 1.0
|
791
792
|
best_matches = compute_best_matches(
|
@@ -836,21 +837,20 @@ async def retrieval_in_resource(
|
|
836
837
|
)
|
837
838
|
add_resource_filter(prequery.request, [resource])
|
838
839
|
|
839
|
-
|
840
|
-
|
841
|
-
|
842
|
-
|
843
|
-
|
844
|
-
|
845
|
-
|
846
|
-
|
847
|
-
|
848
|
-
|
849
|
-
|
850
|
-
|
851
|
-
|
852
|
-
)
|
853
|
-
raise NoRetrievalResultsError(main_results, prequeries_results)
|
840
|
+
main_results, prequeries_results, parsed_query = await get_find_results(
|
841
|
+
kbid=kbid,
|
842
|
+
query=main_query,
|
843
|
+
item=ask_request,
|
844
|
+
ndb_client=client_type,
|
845
|
+
user=user_id,
|
846
|
+
origin=origin,
|
847
|
+
metrics=metrics.child_span("hybrid_retrieval"),
|
848
|
+
prequeries_strategy=prequeries,
|
849
|
+
)
|
850
|
+
if len(main_results.resources) == 0 and all(
|
851
|
+
len(prequery_result.resources) == 0 for (_, prequery_result) in prequeries_results or []
|
852
|
+
):
|
853
|
+
raise NoRetrievalResultsError(main_results, prequeries_results)
|
854
854
|
main_query_weight = prequeries.main_query_weight if prequeries is not None else 1.0
|
855
855
|
best_matches = compute_best_matches(
|
856
856
|
main_results=main_results,
|
@@ -41,6 +41,7 @@ from nucliadb.search.search.chat.images import (
|
|
41
41
|
get_paragraph_image,
|
42
42
|
)
|
43
43
|
from nucliadb.search.search.hydrator import hydrate_field_text, hydrate_resource_text
|
44
|
+
from nucliadb.search.search.metrics import Metrics
|
44
45
|
from nucliadb.search.search.paragraphs import get_paragraph_text
|
45
46
|
from nucliadb_models.labels import translate_alias_to_system_label
|
46
47
|
from nucliadb_models.metadata import Extra, Origin
|
@@ -244,6 +245,7 @@ async def full_resource_prompt_context(
|
|
244
245
|
ordered_paragraphs: list[FindParagraph],
|
245
246
|
resource: Optional[str],
|
246
247
|
strategy: FullResourceStrategy,
|
248
|
+
metrics: Metrics,
|
247
249
|
) -> None:
|
248
250
|
"""
|
249
251
|
Algorithm steps:
|
@@ -298,6 +300,8 @@ async def full_resource_prompt_context(
|
|
298
300
|
context[field.full()] = extracted_text
|
299
301
|
added_fields.add(field.full())
|
300
302
|
|
303
|
+
metrics.set("full_resource_ops", len(added_fields))
|
304
|
+
|
301
305
|
if strategy.include_remaining_text_blocks:
|
302
306
|
for paragraph in ordered_paragraphs:
|
303
307
|
pid = cast(ParagraphId, parse_text_block_id(paragraph.id))
|
@@ -309,6 +313,7 @@ async def extend_prompt_context_with_metadata(
|
|
309
313
|
context: CappedPromptContext,
|
310
314
|
kbid: str,
|
311
315
|
strategy: MetadataExtensionStrategy,
|
316
|
+
metrics: Metrics,
|
312
317
|
) -> None:
|
313
318
|
text_block_ids: list[TextBlockId] = []
|
314
319
|
for text_block_id in context.text_block_ids():
|
@@ -321,18 +326,25 @@ async def extend_prompt_context_with_metadata(
|
|
321
326
|
if len(text_block_ids) == 0: # pragma: no cover
|
322
327
|
return
|
323
328
|
|
329
|
+
ops = 0
|
324
330
|
if MetadataExtensionType.ORIGIN in strategy.types:
|
331
|
+
ops += 1
|
325
332
|
await extend_prompt_context_with_origin_metadata(context, kbid, text_block_ids)
|
326
333
|
|
327
334
|
if MetadataExtensionType.CLASSIFICATION_LABELS in strategy.types:
|
335
|
+
ops += 1
|
328
336
|
await extend_prompt_context_with_classification_labels(context, kbid, text_block_ids)
|
329
337
|
|
330
338
|
if MetadataExtensionType.NERS in strategy.types:
|
339
|
+
ops += 1
|
331
340
|
await extend_prompt_context_with_ner(context, kbid, text_block_ids)
|
332
341
|
|
333
342
|
if MetadataExtensionType.EXTRA_METADATA in strategy.types:
|
343
|
+
ops += 1
|
334
344
|
await extend_prompt_context_with_extra_metadata(context, kbid, text_block_ids)
|
335
345
|
|
346
|
+
metrics.set("metadata_extension_ops", ops * len(text_block_ids))
|
347
|
+
|
336
348
|
|
337
349
|
def parse_text_block_id(text_block_id: str) -> TextBlockId:
|
338
350
|
try:
|
@@ -464,6 +476,7 @@ async def field_extension_prompt_context(
|
|
464
476
|
kbid: str,
|
465
477
|
ordered_paragraphs: list[FindParagraph],
|
466
478
|
strategy: FieldExtensionStrategy,
|
479
|
+
metrics: Metrics,
|
467
480
|
) -> None:
|
468
481
|
"""
|
469
482
|
Algorithm steps:
|
@@ -493,6 +506,8 @@ async def field_extension_prompt_context(
|
|
493
506
|
tasks = [hydrate_field_text(kbid, fid) for fid in extend_field_ids]
|
494
507
|
field_extracted_texts = await run_concurrently(tasks)
|
495
508
|
|
509
|
+
metrics.set("field_extension_ops", len(field_extracted_texts))
|
510
|
+
|
496
511
|
for result in field_extracted_texts:
|
497
512
|
if result is None: # pragma: no cover
|
498
513
|
continue
|
@@ -619,6 +634,7 @@ async def neighbouring_paragraphs_prompt_context(
|
|
619
634
|
kbid: str,
|
620
635
|
ordered_text_blocks: list[FindParagraph],
|
621
636
|
strategy: NeighbouringParagraphsStrategy,
|
637
|
+
metrics: Metrics,
|
622
638
|
) -> None:
|
623
639
|
"""
|
624
640
|
This function will get the paragraph texts and then craft a context with the neighbouring paragraphs of the
|
@@ -658,6 +674,9 @@ async def neighbouring_paragraphs_prompt_context(
|
|
658
674
|
return
|
659
675
|
|
660
676
|
results: list[tuple[ParagraphId, str]] = await asyncio.gather(*paragraph_ops)
|
677
|
+
|
678
|
+
metrics.set("neighbouring_paragraphs_ops", len(results))
|
679
|
+
|
661
680
|
# Add the paragraph texts to the context
|
662
681
|
for pid, text in results:
|
663
682
|
if text != "":
|
@@ -670,8 +689,10 @@ async def conversation_prompt_context(
|
|
670
689
|
ordered_paragraphs: list[FindParagraph],
|
671
690
|
conversational_strategy: ConversationalStrategy,
|
672
691
|
visual_llm: bool,
|
692
|
+
metrics: Metrics,
|
673
693
|
):
|
674
694
|
analyzed_fields: List[str] = []
|
695
|
+
ops = 0
|
675
696
|
async with get_driver().transaction(read_only=True) as txn:
|
676
697
|
storage = await get_storage()
|
677
698
|
kb = KnowledgeBoxORM(txn, storage, kbid)
|
@@ -701,6 +722,7 @@ async def conversation_prompt_context(
|
|
701
722
|
|
702
723
|
attachments: List[resources_pb2.FieldRef] = []
|
703
724
|
if conversational_strategy.full:
|
725
|
+
ops += 5
|
704
726
|
extracted_text = await field_obj.get_extracted_text()
|
705
727
|
for current_page in range(1, cmetadata.pages + 1):
|
706
728
|
conv = await field_obj.db_get_value(current_page)
|
@@ -749,6 +771,7 @@ async def conversation_prompt_context(
|
|
749
771
|
break
|
750
772
|
|
751
773
|
for message in messages:
|
774
|
+
ops += 1
|
752
775
|
text = message.content.text.strip()
|
753
776
|
pid = f"{rid}/{field_type}/{field_id}/{message.ident}/0-{len(message.content.text) + 1}"
|
754
777
|
context[pid] = text
|
@@ -757,6 +780,7 @@ async def conversation_prompt_context(
|
|
757
780
|
if conversational_strategy.attachments_text:
|
758
781
|
# add on the context the images if vlm enabled
|
759
782
|
for attachment in attachments:
|
783
|
+
ops += 1
|
760
784
|
field: File = await resource.get_field(
|
761
785
|
attachment.field_id, attachment.field_type, load=True
|
762
786
|
) # type: ignore
|
@@ -767,6 +791,7 @@ async def conversation_prompt_context(
|
|
767
791
|
|
768
792
|
if conversational_strategy.attachments_images and visual_llm:
|
769
793
|
for attachment in attachments:
|
794
|
+
ops += 1
|
770
795
|
file_field: File = await resource.get_field(
|
771
796
|
attachment.field_id, attachment.field_type, load=True
|
772
797
|
) # type: ignore
|
@@ -776,6 +801,7 @@ async def conversation_prompt_context(
|
|
776
801
|
context.images[pid] = image
|
777
802
|
|
778
803
|
analyzed_fields.append(field_unique_id)
|
804
|
+
metrics.set("conversation_ops", ops)
|
779
805
|
|
780
806
|
|
781
807
|
async def hierarchy_prompt_context(
|
@@ -783,6 +809,7 @@ async def hierarchy_prompt_context(
|
|
783
809
|
kbid: str,
|
784
810
|
ordered_paragraphs: list[FindParagraph],
|
785
811
|
strategy: HierarchyResourceStrategy,
|
812
|
+
metrics: Metrics,
|
786
813
|
) -> None:
|
787
814
|
"""
|
788
815
|
This function will get the paragraph texts (possibly with extra characters, if extra_characters > 0) and then
|
@@ -842,6 +869,8 @@ async def hierarchy_prompt_context(
|
|
842
869
|
else:
|
843
870
|
resources[rid].paragraphs.append((paragraph, extended_paragraph_text))
|
844
871
|
|
872
|
+
metrics.set("hierarchy_ops", len(resources))
|
873
|
+
|
845
874
|
# Modify the first paragraph of each resource to include the title and summary of the resource, as well as the
|
846
875
|
# extended paragraph text of all the paragraphs in the resource.
|
847
876
|
for values in resources.values():
|
@@ -886,6 +915,7 @@ class PromptContextBuilder:
|
|
886
915
|
image_strategies: Optional[Sequence[ImageRagStrategy]] = None,
|
887
916
|
max_context_characters: Optional[int] = None,
|
888
917
|
visual_llm: bool = False,
|
918
|
+
metrics: Metrics = Metrics("prompt_context_builder"),
|
889
919
|
):
|
890
920
|
self.kbid = kbid
|
891
921
|
self.ordered_paragraphs = ordered_paragraphs
|
@@ -896,6 +926,7 @@ class PromptContextBuilder:
|
|
896
926
|
self.image_strategies = image_strategies
|
897
927
|
self.max_context_characters = max_context_characters
|
898
928
|
self.visual_llm = visual_llm
|
929
|
+
self.metrics = metrics
|
899
930
|
|
900
931
|
def prepend_user_context(self, context: CappedPromptContext):
|
901
932
|
# Chat extra context passed by the user is the most important, therefore
|
@@ -920,6 +951,7 @@ class PromptContextBuilder:
|
|
920
951
|
return context, context_order, context_images
|
921
952
|
|
922
953
|
async def _build_context_images(self, context: CappedPromptContext) -> None:
|
954
|
+
ops = 0
|
923
955
|
if self.image_strategies is None or len(self.image_strategies) == 0:
|
924
956
|
# Nothing to do
|
925
957
|
return
|
@@ -958,6 +990,7 @@ class PromptContextBuilder:
|
|
958
990
|
if page_image_id not in context.images:
|
959
991
|
image = await get_page_image(self.kbid, pid, paragraph_page_number)
|
960
992
|
if image is not None:
|
993
|
+
ops += 1
|
961
994
|
context.images[page_image_id] = image
|
962
995
|
page_images_added += 1
|
963
996
|
else:
|
@@ -977,6 +1010,7 @@ class PromptContextBuilder:
|
|
977
1010
|
):
|
978
1011
|
pimage = await get_paragraph_image(self.kbid, pid, paragraph.reference)
|
979
1012
|
if pimage is not None:
|
1013
|
+
ops += 1
|
980
1014
|
context.images[paragraph.id] = pimage
|
981
1015
|
else:
|
982
1016
|
logger.warning(
|
@@ -987,6 +1021,7 @@ class PromptContextBuilder:
|
|
987
1021
|
"reference": paragraph.reference,
|
988
1022
|
},
|
989
1023
|
)
|
1024
|
+
self.metrics.set("image_ops", ops)
|
990
1025
|
|
991
1026
|
async def _build_context(self, context: CappedPromptContext) -> None:
|
992
1027
|
if self.strategies is None or len(self.strategies) == 0:
|
@@ -1038,17 +1073,17 @@ class PromptContextBuilder:
|
|
1038
1073
|
self.ordered_paragraphs,
|
1039
1074
|
self.resource,
|
1040
1075
|
full_resource,
|
1076
|
+
self.metrics,
|
1041
1077
|
)
|
1042
1078
|
if metadata_extension:
|
1043
|
-
await extend_prompt_context_with_metadata(
|
1079
|
+
await extend_prompt_context_with_metadata(
|
1080
|
+
context, self.kbid, metadata_extension, self.metrics
|
1081
|
+
)
|
1044
1082
|
return
|
1045
1083
|
|
1046
1084
|
if hierarchy:
|
1047
1085
|
await hierarchy_prompt_context(
|
1048
|
-
context,
|
1049
|
-
self.kbid,
|
1050
|
-
self.ordered_paragraphs,
|
1051
|
-
hierarchy,
|
1086
|
+
context, self.kbid, self.ordered_paragraphs, hierarchy, self.metrics
|
1052
1087
|
)
|
1053
1088
|
if neighbouring_paragraphs:
|
1054
1089
|
await neighbouring_paragraphs_prompt_context(
|
@@ -1056,6 +1091,7 @@ class PromptContextBuilder:
|
|
1056
1091
|
self.kbid,
|
1057
1092
|
self.ordered_paragraphs,
|
1058
1093
|
neighbouring_paragraphs,
|
1094
|
+
self.metrics,
|
1059
1095
|
)
|
1060
1096
|
if field_extension:
|
1061
1097
|
await field_extension_prompt_context(
|
@@ -1063,6 +1099,7 @@ class PromptContextBuilder:
|
|
1063
1099
|
self.kbid,
|
1064
1100
|
self.ordered_paragraphs,
|
1065
1101
|
field_extension,
|
1102
|
+
self.metrics,
|
1066
1103
|
)
|
1067
1104
|
if conversational_strategy:
|
1068
1105
|
await conversation_prompt_context(
|
@@ -1071,9 +1108,12 @@ class PromptContextBuilder:
|
|
1071
1108
|
self.ordered_paragraphs,
|
1072
1109
|
conversational_strategy,
|
1073
1110
|
self.visual_llm,
|
1111
|
+
self.metrics,
|
1074
1112
|
)
|
1075
1113
|
if metadata_extension:
|
1076
|
-
await extend_prompt_context_with_metadata(
|
1114
|
+
await extend_prompt_context_with_metadata(
|
1115
|
+
context, self.kbid, metadata_extension, self.metrics
|
1116
|
+
)
|
1077
1117
|
|
1078
1118
|
|
1079
1119
|
def get_paragraph_page_number(paragraph: FindParagraph) -> Optional[int]:
|
nucliadb/tasks/consumer.py
CHANGED
@@ -30,7 +30,7 @@ from nucliadb.tasks.logger import logger
|
|
30
30
|
from nucliadb.tasks.models import Callback, MsgType
|
31
31
|
from nucliadb.tasks.utils import NatsConsumer, NatsStream, create_nats_stream_if_not_exists
|
32
32
|
from nucliadb_telemetry import errors
|
33
|
-
from nucliadb_utils.nats import
|
33
|
+
from nucliadb_utils.nats import NatsMessageProgressUpdater
|
34
34
|
from nucliadb_utils.settings import nats_consumer_settings
|
35
35
|
|
36
36
|
BEFORE_NAK_SLEEP_SECONDS = 2
|
@@ -124,7 +124,7 @@ class NatsTaskConsumer(Generic[MsgType]):
|
|
124
124
|
f"Message received: subject:{subject}, seqid: {seqid}, reply: {reply}",
|
125
125
|
extra={"consumer_name": self.name},
|
126
126
|
)
|
127
|
-
async with
|
127
|
+
async with NatsMessageProgressUpdater(msg, nats_consumer_settings.nats_ack_wait * 0.66):
|
128
128
|
try:
|
129
129
|
task_msg = self.msg_type.model_validate_json(msg.data)
|
130
130
|
except pydantic.ValidationError as e:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: nucliadb
|
3
|
-
Version: 6.4.0.
|
3
|
+
Version: 6.4.0.post4279
|
4
4
|
Summary: NucliaDB
|
5
5
|
Author-email: Nuclia <nucliadb@nuclia.com>
|
6
6
|
License: AGPL
|
@@ -20,11 +20,11 @@ Classifier: Programming Language :: Python :: 3.12
|
|
20
20
|
Classifier: Programming Language :: Python :: 3 :: Only
|
21
21
|
Requires-Python: <4,>=3.9
|
22
22
|
Description-Content-Type: text/markdown
|
23
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.4.0.
|
24
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.4.0.
|
25
|
-
Requires-Dist: nucliadb-protos>=6.4.0.
|
26
|
-
Requires-Dist: nucliadb-models>=6.4.0.
|
27
|
-
Requires-Dist: nidx-protos>=6.4.0.
|
23
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.4.0.post4279
|
24
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.4.0.post4279
|
25
|
+
Requires-Dist: nucliadb-protos>=6.4.0.post4279
|
26
|
+
Requires-Dist: nucliadb-models>=6.4.0.post4279
|
27
|
+
Requires-Dist: nidx-protos>=6.4.0.post4279
|
28
28
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
29
29
|
Requires-Dist: nuclia-models>=0.24.2
|
30
30
|
Requires-Dist: uvicorn[standard]
|
@@ -101,7 +101,7 @@ nucliadb/common/external_index_providers/settings.py,sha256=EGHnIkwxqe6aypwKegXT
|
|
101
101
|
nucliadb/common/http_clients/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
102
102
|
nucliadb/common/http_clients/auth.py,sha256=srfpgAbs2wmqA9u_l-HxsV4YoO77Tse4y3gm3q2YvYM,2112
|
103
103
|
nucliadb/common/http_clients/exceptions.py,sha256=47Y8OjkaGV_F18G07FpJhOzgWKUIexhlILyuVtICz8s,1100
|
104
|
-
nucliadb/common/http_clients/processing.py,sha256=
|
104
|
+
nucliadb/common/http_clients/processing.py,sha256=VzxzFArNsHWGmFoX0c5OrQB3vFW841aeyuP5NgzPQGo,9581
|
105
105
|
nucliadb/common/http_clients/pypi.py,sha256=VHIUjwJEJVntVUo_FRoXIo8sLmluy7sa9-iXSITcrMY,1540
|
106
106
|
nucliadb/common/http_clients/utils.py,sha256=yGUkHNS41abHiBoHqo_Mg3QSqGsS7rUtbfGftbEC57U,1529
|
107
107
|
nucliadb/common/maindb/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
@@ -122,20 +122,20 @@ nucliadb/export_import/models.py,sha256=dbjScNkiMRv4X3Ktudy1JRliD25bfoDTy3JmEZgQ
|
|
122
122
|
nucliadb/export_import/tasks.py,sha256=DWbdqY97ffoyfipelGXz3Jqz1iam6JCjQSh367Fc3NA,2947
|
123
123
|
nucliadb/export_import/utils.py,sha256=8XOVMYXXw8b4ikojG7RjQ4tKN3Xu7nfu2yCUOqD50sk,23216
|
124
124
|
nucliadb/ingest/__init__.py,sha256=fsw3C38VP50km3R-nHL775LNGPpJ4JxqXJ2Ib1f5SqE,1011
|
125
|
-
nucliadb/ingest/app.py,sha256=
|
125
|
+
nucliadb/ingest/app.py,sha256=Eympy8nbz09VDNPF28MuIeKMb7wgB9cTSOObS8uvL0o,8372
|
126
126
|
nucliadb/ingest/partitions.py,sha256=2NIhMYbNT0TNBL6bX1UMSi7vxFGICstCKEqsB0TXHOE,2410
|
127
127
|
nucliadb/ingest/processing.py,sha256=QmkHq-BU4vub7JRWe9VHvQ2DcAmT6-CzgFXuZxXhcBU,20953
|
128
128
|
nucliadb/ingest/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
129
129
|
nucliadb/ingest/serialize.py,sha256=-TIjibJTbMqAowzRvyrG3R209vKqBZqXpdrQL9Dq4lo,16135
|
130
|
-
nucliadb/ingest/settings.py,sha256=
|
130
|
+
nucliadb/ingest/settings.py,sha256=inB5SpkSI6sRd-ftlJIHFH6XlbuiSaRdL-F2WGyseUw,3249
|
131
131
|
nucliadb/ingest/utils.py,sha256=l1myURu3r8oA11dx3GpHw-gNTUc1AFX8xdPm9Lgl2rA,2275
|
132
132
|
nucliadb/ingest/consumer/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
133
133
|
nucliadb/ingest/consumer/auditing.py,sha256=xK21DIa_ZAiOJVVbnkmT4jgCRGshNGyPyxsqhE6kROE,7204
|
134
|
-
nucliadb/ingest/consumer/consumer.py,sha256=
|
134
|
+
nucliadb/ingest/consumer/consumer.py,sha256=GfdlrNlnt7PWYyk75xtyzn2SHZse7475U4U9q_9jKr0,13711
|
135
135
|
nucliadb/ingest/consumer/materializer.py,sha256=tgD_rDI2twQzcz8kKNiW_L4YIth16IGh9mUfD5wiSD4,3858
|
136
136
|
nucliadb/ingest/consumer/metrics.py,sha256=ji1l_4cKiHJthQd8YNem1ft4iMbw9KThmVvJmLcv3Xg,1075
|
137
|
-
nucliadb/ingest/consumer/pull.py,sha256=
|
138
|
-
nucliadb/ingest/consumer/service.py,sha256=
|
137
|
+
nucliadb/ingest/consumer/pull.py,sha256=gfdyQ8IMFA_bpGnEpmRB9qmOJywBEwxn7pGYaueZizU,16874
|
138
|
+
nucliadb/ingest/consumer/service.py,sha256=WXBN8dY7MlmYWxqQHIbIO7w_SdVJRY1RuHAWlQUXf8o,8852
|
139
139
|
nucliadb/ingest/consumer/shard_creator.py,sha256=w0smEu01FU_2cjZnsfBRNqT_Ntho11X17zTMST-vKbc,4359
|
140
140
|
nucliadb/ingest/consumer/utils.py,sha256=jpX8D4lKzuPCpArQLZeX_Zczq3pfen_zAf8sPJfOEZU,2642
|
141
141
|
nucliadb/ingest/fields/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
@@ -255,10 +255,10 @@ nucliadb/search/search/shards.py,sha256=mc5DK-MoCv9AFhlXlOFHbPvetcyNDzTFOJ5rimK8
|
|
255
255
|
nucliadb/search/search/summarize.py,sha256=ksmYPubEQvAQgfPdZHfzB_rR19B2ci4IYZ6jLdHxZo8,4996
|
256
256
|
nucliadb/search/search/utils.py,sha256=ajRIXfdTF67dBVahQCXW-rSv6gJpUMPt3QhJrWqArTQ,2175
|
257
257
|
nucliadb/search/search/chat/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
258
|
-
nucliadb/search/search/chat/ask.py,sha256=
|
258
|
+
nucliadb/search/search/chat/ask.py,sha256=aaNj0MeAbx9dyeKpQJdm3VsHMq9OmcCESxahbgSxvCk,37805
|
259
259
|
nucliadb/search/search/chat/exceptions.py,sha256=Siy4GXW2L7oPhIR86H3WHBhE9lkV4A4YaAszuGGUf54,1356
|
260
260
|
nucliadb/search/search/chat/images.py,sha256=PA8VWxT5_HUGfW1ULhKTK46UBsVyINtWWqEM1ulzX1E,3095
|
261
|
-
nucliadb/search/search/chat/prompt.py,sha256=
|
261
|
+
nucliadb/search/search/chat/prompt.py,sha256=e8C7_MPr6Cn3nJHA4hWpeW3629KVI1ZUQA_wZf9Kiu4,48503
|
262
262
|
nucliadb/search/search/chat/query.py,sha256=6v6twBUTWfUUzklVV6xqJSYPkAshnIrBH9wbTcjQvkI,17063
|
263
263
|
nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
264
264
|
nucliadb/search/search/query_parser/exceptions.py,sha256=szAOXUZ27oNY-OSa9t2hQ5HHkQQC0EX1FZz_LluJHJE,1224
|
@@ -290,7 +290,7 @@ nucliadb/standalone/static/favicon.ico,sha256=96pKGp6Sx457JkTfjy1dtApMhkitixfU6i
|
|
290
290
|
nucliadb/standalone/static/index.html,sha256=PEZfuEQFYnYACAL1ceN8xC0im8lBrUx838RkE8tbvgA,3833
|
291
291
|
nucliadb/standalone/static/logo.svg,sha256=-wQqSvPGTdlKjUP6pHE6kiq005pgYjDzp9nPl0X71Mk,2639
|
292
292
|
nucliadb/tasks/__init__.py,sha256=oFJ3A8HD7w11mBu-IixYE_KxA7juMGlYQb7YD_y6WPM,975
|
293
|
-
nucliadb/tasks/consumer.py,sha256=
|
293
|
+
nucliadb/tasks/consumer.py,sha256=E7_9bY5o7BVlioWX9yO9yimDJaKeuj-P-tiNCJcaRz8,6964
|
294
294
|
nucliadb/tasks/logger.py,sha256=C7keOEO_mjLVp5VbqAZ2QXfqVB2Hot7NgBlUP_SDSMw,924
|
295
295
|
nucliadb/tasks/models.py,sha256=qrZKi5DNDQ07waMsp5L4_Fi7WRs57YiO-kmXlrBzEAA,1168
|
296
296
|
nucliadb/tasks/producer.py,sha256=UnpJAzhj_GElsCoO5G6T4m6MshsgOaqR2tVzJmEta64,2625
|
@@ -368,8 +368,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
|
|
368
368
|
nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
|
369
369
|
nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
|
370
370
|
nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
|
371
|
-
nucliadb-6.4.0.
|
372
|
-
nucliadb-6.4.0.
|
373
|
-
nucliadb-6.4.0.
|
374
|
-
nucliadb-6.4.0.
|
375
|
-
nucliadb-6.4.0.
|
371
|
+
nucliadb-6.4.0.post4279.dist-info/METADATA,sha256=ISm2mlidMfyHGlEXRXBJcbnPe52rP58sBjj5NLRFf68,4223
|
372
|
+
nucliadb-6.4.0.post4279.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
|
373
|
+
nucliadb-6.4.0.post4279.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
|
374
|
+
nucliadb-6.4.0.post4279.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
|
375
|
+
nucliadb-6.4.0.post4279.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|