nucliadb 6.1.0.post2504__py3-none-any.whl → 6.1.0.post2517__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucliadb/ingest/consumer/consumer.py +1 -51
- nucliadb/search/search/chat/images.py +14 -7
- nucliadb/search/search/chat/prompt.py +5 -9
- {nucliadb-6.1.0.post2504.dist-info → nucliadb-6.1.0.post2517.dist-info}/METADATA +5 -5
- {nucliadb-6.1.0.post2504.dist-info → nucliadb-6.1.0.post2517.dist-info}/RECORD +9 -9
- {nucliadb-6.1.0.post2504.dist-info → nucliadb-6.1.0.post2517.dist-info}/WHEEL +0 -0
- {nucliadb-6.1.0.post2504.dist-info → nucliadb-6.1.0.post2517.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.1.0.post2504.dist-info → nucliadb-6.1.0.post2517.dist-info}/top_level.txt +0 -0
- {nucliadb-6.1.0.post2504.dist-info → nucliadb-6.1.0.post2517.dist-info}/zip-safe +0 -0
@@ -291,56 +291,7 @@ class IngestProcessedConsumer(IngestConsumer):
|
|
291
291
|
other writes are going to be coming from user actions and we don't want to slow them down.
|
292
292
|
"""
|
293
293
|
|
294
|
-
async def get_stream_last_seqid(self) -> Optional[int]:
|
295
|
-
"""
|
296
|
-
XXX NOTE: Getting the last sequence id of the stream for the processed pull consumer is only needed when the new pull consumer is created.
|
297
|
-
|
298
|
-
For environments where we had the previous push consumer, we need to get the last sequence id from the old consumer info so that
|
299
|
-
we can start from there.
|
300
|
-
|
301
|
-
This code must be deleted once the new pull consumers are deployed on all environments and all the old push consumers have been deleted.
|
302
|
-
From then on, we will rely on the nats server to keep track of the last sequence id.
|
303
|
-
"""
|
304
|
-
if has_feature(const.Features.PULL_PROCESSED_CONSUMERS_DEPLOYED):
|
305
|
-
logger.warning(
|
306
|
-
f"Feature flag {const.Features.PULL_PROCESSED_CONSUMERS_DEPLOYED} is enabled. Relying on nats to keep track of the last sequence id."
|
307
|
-
)
|
308
|
-
return None
|
309
|
-
|
310
|
-
try:
|
311
|
-
push_consumer_info: nats.js.api.ConsumerInfo = (
|
312
|
-
await self.nats_connection_manager.js.consumer_info(
|
313
|
-
stream=const.Streams.INGEST_PROCESSED.name,
|
314
|
-
# This is the old consumer name
|
315
|
-
consumer="nucliadb-processed",
|
316
|
-
timeout=2,
|
317
|
-
)
|
318
|
-
)
|
319
|
-
# Start from the last non-acked message
|
320
|
-
if push_consumer_info.ack_floor is None:
|
321
|
-
logger.warning(
|
322
|
-
f"Nats consumer {push_consumer_info.name} has no ack floor. Starting from scratch."
|
323
|
-
)
|
324
|
-
return 1
|
325
|
-
last_seqid = push_consumer_info.ack_floor.stream_seq + 1
|
326
|
-
logger.info(
|
327
|
-
f"Starting from last sequence id {last_seqid} for {const.Streams.INGEST_PROCESSED.name}"
|
328
|
-
)
|
329
|
-
return last_seqid
|
330
|
-
except (nats.js.errors.NotFoundError, nats.errors.TimeoutError):
|
331
|
-
logger.warning(
|
332
|
-
f"Could not get last sequence id for {const.Streams.INGEST_PROCESSED.name}. Starting from scratch."
|
333
|
-
)
|
334
|
-
# Start from scratch
|
335
|
-
return 1
|
336
|
-
|
337
294
|
async def setup_nats_subscription(self):
|
338
|
-
last_sequence_id = await self.get_stream_last_seqid()
|
339
|
-
if last_sequence_id is None:
|
340
|
-
delivery_policy = nats.js.api.DeliverPolicy.ALL
|
341
|
-
else:
|
342
|
-
delivery_policy = nats.js.api.DeliverPolicy.BY_START_SEQUENCE
|
343
|
-
|
344
295
|
subject = const.Streams.INGEST_PROCESSED.subject
|
345
296
|
durable_name = const.Streams.INGEST_PROCESSED.group
|
346
297
|
self.subscription = await self.nats_connection_manager.pull_subscribe(
|
@@ -352,8 +303,7 @@ class IngestProcessedConsumer(IngestConsumer):
|
|
352
303
|
config=nats.js.api.ConsumerConfig(
|
353
304
|
durable_name=durable_name,
|
354
305
|
ack_policy=nats.js.api.AckPolicy.EXPLICIT,
|
355
|
-
deliver_policy=
|
356
|
-
opt_start_seq=last_sequence_id,
|
306
|
+
deliver_policy=nats.js.api.DeliverPolicy.ALL,
|
357
307
|
max_ack_pending=1,
|
358
308
|
max_deliver=nats_consumer_settings.nats_max_deliver,
|
359
309
|
ack_wait=nats_consumer_settings.nats_ack_wait,
|
@@ -18,11 +18,14 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
|
20
20
|
import base64
|
21
|
+
from io import BytesIO
|
21
22
|
from typing import Optional
|
22
23
|
|
23
24
|
from nucliadb.common.ids import ParagraphId
|
25
|
+
from nucliadb.ingest.fields.file import File
|
24
26
|
from nucliadb.search import SERVICE_NAME
|
25
27
|
from nucliadb_models.search import Image
|
28
|
+
from nucliadb_utils.storages.storage import Storage
|
26
29
|
from nucliadb_utils.utilities import get_storage
|
27
30
|
|
28
31
|
|
@@ -64,14 +67,18 @@ async def get_paragraph_image(kbid: str, paragraph_id: ParagraphId, reference: s
|
|
64
67
|
return image
|
65
68
|
|
66
69
|
|
67
|
-
async def
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
70
|
+
async def get_file_thumbnail_image(file: File) -> Optional[Image]:
|
71
|
+
fed = await file.get_file_extracted_data()
|
72
|
+
if fed is None or not fed.HasField("file_thumbnail"):
|
73
|
+
return None
|
74
|
+
storage: Storage = await get_storage(service_name=SERVICE_NAME)
|
75
|
+
image_bytes: BytesIO = await storage.downloadbytescf(fed.file_thumbnail)
|
76
|
+
value = image_bytes.getvalue()
|
77
|
+
if len(value) == 0:
|
72
78
|
return None
|
73
79
|
image = Image(
|
74
|
-
b64encoded=base64.b64encode(
|
75
|
-
|
80
|
+
b64encoded=base64.b64encode(value).decode(),
|
81
|
+
# We assume the thumbnail is always generated as jpeg by Nuclia processing
|
82
|
+
content_type="image/jpeg",
|
76
83
|
)
|
77
84
|
return image
|
@@ -35,7 +35,7 @@ from nucliadb.ingest.orm.knowledgebox import KnowledgeBox as KnowledgeBoxORM
|
|
35
35
|
from nucliadb.search import logger
|
36
36
|
from nucliadb.search.search import cache
|
37
37
|
from nucliadb.search.search.chat.images import (
|
38
|
-
|
38
|
+
get_file_thumbnail_image,
|
39
39
|
get_page_image,
|
40
40
|
get_paragraph_image,
|
41
41
|
)
|
@@ -753,14 +753,10 @@ async def conversation_prompt_context(
|
|
753
753
|
file_field: File = await resource.get_field(
|
754
754
|
attachment.field_id, attachment.field_type, load=True
|
755
755
|
) # type: ignore
|
756
|
-
|
757
|
-
if
|
758
|
-
"
|
759
|
-
|
760
|
-
image = await get_file_image(kbid, rid, attachment.field_id)
|
761
|
-
if image is not None:
|
762
|
-
pid = f"{rid}/f/{attachment.field_id}/0-0"
|
763
|
-
context.images[pid] = image
|
756
|
+
image = await get_file_thumbnail_image(file_field)
|
757
|
+
if image is not None:
|
758
|
+
pid = f"{rid}/f/{attachment.field_id}/0-0"
|
759
|
+
context.images[pid] = image
|
764
760
|
|
765
761
|
analyzed_fields.append(field_unique_id)
|
766
762
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: nucliadb
|
3
|
-
Version: 6.1.0.
|
3
|
+
Version: 6.1.0.post2517
|
4
4
|
Home-page: https://docs.nuclia.dev/docs/management/nucliadb/intro
|
5
5
|
Author: NucliaDB Community
|
6
6
|
Author-email: nucliadb@nuclia.com
|
@@ -22,10 +22,10 @@ Classifier: Programming Language :: Python :: 3.12
|
|
22
22
|
Classifier: Programming Language :: Python :: 3 :: Only
|
23
23
|
Requires-Python: >=3.9, <4
|
24
24
|
Description-Content-Type: text/markdown
|
25
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.1.0.
|
26
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.1.0.
|
27
|
-
Requires-Dist: nucliadb-protos>=6.1.0.
|
28
|
-
Requires-Dist: nucliadb-models>=6.1.0.
|
25
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.1.0.post2517
|
26
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.1.0.post2517
|
27
|
+
Requires-Dist: nucliadb-protos>=6.1.0.post2517
|
28
|
+
Requires-Dist: nucliadb-models>=6.1.0.post2517
|
29
29
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
30
30
|
Requires-Dist: nucliadb-node-binding>=2.26.0
|
31
31
|
Requires-Dist: uvicorn
|
@@ -115,7 +115,7 @@ nucliadb/ingest/settings.py,sha256=SDQpMRsTsNyi6IDxCJy6BZVUSKUzwAMuxf6ktp31VMM,3
|
|
115
115
|
nucliadb/ingest/utils.py,sha256=l1myURu3r8oA11dx3GpHw-gNTUc1AFX8xdPm9Lgl2rA,2275
|
116
116
|
nucliadb/ingest/consumer/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
117
117
|
nucliadb/ingest/consumer/auditing.py,sha256=EJoqRRr4dk2eUMK0GOY6b9xHO0YLQ0LjoP_xZBLACZo,7280
|
118
|
-
nucliadb/ingest/consumer/consumer.py,sha256=
|
118
|
+
nucliadb/ingest/consumer/consumer.py,sha256=y9XF4U3cZN5JFzC-ZnBCn7z5viJDMmjoeEprxm7VghE,13941
|
119
119
|
nucliadb/ingest/consumer/materializer.py,sha256=7ofLbwjldJA8TWXDRZRM4U5EviZt3qNSQ8oadmkzS0Y,3840
|
120
120
|
nucliadb/ingest/consumer/metrics.py,sha256=ji1l_4cKiHJthQd8YNem1ft4iMbw9KThmVvJmLcv3Xg,1075
|
121
121
|
nucliadb/ingest/consumer/pull.py,sha256=EYT0ImngMQgatStG68p2GSrPQBbJxeuq8nFm8DdAbwk,9280
|
@@ -229,8 +229,8 @@ nucliadb/search/search/utils.py,sha256=iF2tbBA56gRMJH1TlE2hMrqeXqjoeOPt4KgRdp2m9
|
|
229
229
|
nucliadb/search/search/chat/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
230
230
|
nucliadb/search/search/chat/ask.py,sha256=2ZIz3TbCG18X0eJZSfLc8nHyKToyrlv_SPFzOOcc5wU,33865
|
231
231
|
nucliadb/search/search/chat/exceptions.py,sha256=Siy4GXW2L7oPhIR86H3WHBhE9lkV4A4YaAszuGGUf54,1356
|
232
|
-
nucliadb/search/search/chat/images.py,sha256=
|
233
|
-
nucliadb/search/search/chat/prompt.py,sha256=
|
232
|
+
nucliadb/search/search/chat/images.py,sha256=PA8VWxT5_HUGfW1ULhKTK46UBsVyINtWWqEM1ulzX1E,3095
|
233
|
+
nucliadb/search/search/chat/prompt.py,sha256=mreHIjhIWCIGDlJ2CA7kv7KLgh8d09meoMX3-bvWje8,46152
|
234
234
|
nucliadb/search/search/chat/query.py,sha256=gKtlj2ms81m417Id29-DtHFxE3M4TtJvYNB03gAgpYo,14402
|
235
235
|
nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
236
236
|
nucliadb/search/search/query_parser/exceptions.py,sha256=tuzl7ZyvVsRz6u0_3zMe60vx39nd3pi641prs-5nC0E,872
|
@@ -331,9 +331,9 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
|
|
331
331
|
nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
|
332
332
|
nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
|
333
333
|
nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
|
334
|
-
nucliadb-6.1.0.
|
335
|
-
nucliadb-6.1.0.
|
336
|
-
nucliadb-6.1.0.
|
337
|
-
nucliadb-6.1.0.
|
338
|
-
nucliadb-6.1.0.
|
339
|
-
nucliadb-6.1.0.
|
334
|
+
nucliadb-6.1.0.post2517.dist-info/METADATA,sha256=UFOaIFgplfz5-oLinOoDftmNMSzWFqagyIpXH3eCuwo,4390
|
335
|
+
nucliadb-6.1.0.post2517.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
336
|
+
nucliadb-6.1.0.post2517.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
|
337
|
+
nucliadb-6.1.0.post2517.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
|
338
|
+
nucliadb-6.1.0.post2517.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
339
|
+
nucliadb-6.1.0.post2517.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|