nucliadb 6.1.0.post2504__py3-none-any.whl → 6.1.0.post2517__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -291,56 +291,7 @@ class IngestProcessedConsumer(IngestConsumer):
291
291
  other writes are going to be coming from user actions and we don't want to slow them down.
292
292
  """
293
293
 
294
- async def get_stream_last_seqid(self) -> Optional[int]:
295
- """
296
- XXX NOTE: Getting the last sequence id of the stream for the processed pull consumer is only needed when the new pull consumer is created.
297
-
298
- For environments where we had the previous push consumer, we need to get the last sequence id from the old consumer info so that
299
- we can start from there.
300
-
301
- This code must be deleted once the new pull consumers are deployed on all environments and all the old push consumers have been deleted.
302
- From then on, we will rely on the nats server to keep track of the last sequence id.
303
- """
304
- if has_feature(const.Features.PULL_PROCESSED_CONSUMERS_DEPLOYED):
305
- logger.warning(
306
- f"Feature flag {const.Features.PULL_PROCESSED_CONSUMERS_DEPLOYED} is enabled. Relying on nats to keep track of the last sequence id."
307
- )
308
- return None
309
-
310
- try:
311
- push_consumer_info: nats.js.api.ConsumerInfo = (
312
- await self.nats_connection_manager.js.consumer_info(
313
- stream=const.Streams.INGEST_PROCESSED.name,
314
- # This is the old consumer name
315
- consumer="nucliadb-processed",
316
- timeout=2,
317
- )
318
- )
319
- # Start from the last non-acked message
320
- if push_consumer_info.ack_floor is None:
321
- logger.warning(
322
- f"Nats consumer {push_consumer_info.name} has no ack floor. Starting from scratch."
323
- )
324
- return 1
325
- last_seqid = push_consumer_info.ack_floor.stream_seq + 1
326
- logger.info(
327
- f"Starting from last sequence id {last_seqid} for {const.Streams.INGEST_PROCESSED.name}"
328
- )
329
- return last_seqid
330
- except (nats.js.errors.NotFoundError, nats.errors.TimeoutError):
331
- logger.warning(
332
- f"Could not get last sequence id for {const.Streams.INGEST_PROCESSED.name}. Starting from scratch."
333
- )
334
- # Start from scratch
335
- return 1
336
-
337
294
  async def setup_nats_subscription(self):
338
- last_sequence_id = await self.get_stream_last_seqid()
339
- if last_sequence_id is None:
340
- delivery_policy = nats.js.api.DeliverPolicy.ALL
341
- else:
342
- delivery_policy = nats.js.api.DeliverPolicy.BY_START_SEQUENCE
343
-
344
295
  subject = const.Streams.INGEST_PROCESSED.subject
345
296
  durable_name = const.Streams.INGEST_PROCESSED.group
346
297
  self.subscription = await self.nats_connection_manager.pull_subscribe(
@@ -352,8 +303,7 @@ class IngestProcessedConsumer(IngestConsumer):
352
303
  config=nats.js.api.ConsumerConfig(
353
304
  durable_name=durable_name,
354
305
  ack_policy=nats.js.api.AckPolicy.EXPLICIT,
355
- deliver_policy=delivery_policy,
356
- opt_start_seq=last_sequence_id,
306
+ deliver_policy=nats.js.api.DeliverPolicy.ALL,
357
307
  max_ack_pending=1,
358
308
  max_deliver=nats_consumer_settings.nats_max_deliver,
359
309
  ack_wait=nats_consumer_settings.nats_ack_wait,
@@ -18,11 +18,14 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
 
20
20
  import base64
21
+ from io import BytesIO
21
22
  from typing import Optional
22
23
 
23
24
  from nucliadb.common.ids import ParagraphId
25
+ from nucliadb.ingest.fields.file import File
24
26
  from nucliadb.search import SERVICE_NAME
25
27
  from nucliadb_models.search import Image
28
+ from nucliadb_utils.storages.storage import Storage
26
29
  from nucliadb_utils.utilities import get_storage
27
30
 
28
31
 
@@ -64,14 +67,18 @@ async def get_paragraph_image(kbid: str, paragraph_id: ParagraphId, reference: s
64
67
  return image
65
68
 
66
69
 
67
- async def get_file_image(kbid: str, rid: str, field_id: str) -> Optional[Image]:
68
- storage = await get_storage(service_name=SERVICE_NAME)
69
- sf = storage.file_field(kbid, rid, field_id)
70
- image_bytes = (await sf.storage.downloadbytes(sf.bucket, sf.key)).read()
71
- if not image_bytes:
70
+ async def get_file_thumbnail_image(file: File) -> Optional[Image]:
71
+ fed = await file.get_file_extracted_data()
72
+ if fed is None or not fed.HasField("file_thumbnail"):
73
+ return None
74
+ storage: Storage = await get_storage(service_name=SERVICE_NAME)
75
+ image_bytes: BytesIO = await storage.downloadbytescf(fed.file_thumbnail)
76
+ value = image_bytes.getvalue()
77
+ if len(value) == 0:
72
78
  return None
73
79
  image = Image(
74
- b64encoded=base64.b64encode(image_bytes).decode(),
75
- content_type="image/png",
80
+ b64encoded=base64.b64encode(value).decode(),
81
+ # We assume the thumbnail is always generated as jpeg by Nuclia processing
82
+ content_type="image/jpeg",
76
83
  )
77
84
  return image
@@ -35,7 +35,7 @@ from nucliadb.ingest.orm.knowledgebox import KnowledgeBox as KnowledgeBoxORM
35
35
  from nucliadb.search import logger
36
36
  from nucliadb.search.search import cache
37
37
  from nucliadb.search.search.chat.images import (
38
- get_file_image,
38
+ get_file_thumbnail_image,
39
39
  get_page_image,
40
40
  get_paragraph_image,
41
41
  )
@@ -753,14 +753,10 @@ async def conversation_prompt_context(
753
753
  file_field: File = await resource.get_field(
754
754
  attachment.field_id, attachment.field_type, load=True
755
755
  ) # type: ignore
756
- field_metadata = await file_field.get_field_metadata()
757
- if field_metadata is not None and field_metadata.metadata.mime_type.startswith(
758
- "image"
759
- ):
760
- image = await get_file_image(kbid, rid, attachment.field_id)
761
- if image is not None:
762
- pid = f"{rid}/f/{attachment.field_id}/0-0"
763
- context.images[pid] = image
756
+ image = await get_file_thumbnail_image(file_field)
757
+ if image is not None:
758
+ pid = f"{rid}/f/{attachment.field_id}/0-0"
759
+ context.images[pid] = image
764
760
 
765
761
  analyzed_fields.append(field_unique_id)
766
762
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nucliadb
3
- Version: 6.1.0.post2504
3
+ Version: 6.1.0.post2517
4
4
  Home-page: https://docs.nuclia.dev/docs/management/nucliadb/intro
5
5
  Author: NucliaDB Community
6
6
  Author-email: nucliadb@nuclia.com
@@ -22,10 +22,10 @@ Classifier: Programming Language :: Python :: 3.12
22
22
  Classifier: Programming Language :: Python :: 3 :: Only
23
23
  Requires-Python: >=3.9, <4
24
24
  Description-Content-Type: text/markdown
25
- Requires-Dist: nucliadb-telemetry[all]>=6.1.0.post2504
26
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.1.0.post2504
27
- Requires-Dist: nucliadb-protos>=6.1.0.post2504
28
- Requires-Dist: nucliadb-models>=6.1.0.post2504
25
+ Requires-Dist: nucliadb-telemetry[all]>=6.1.0.post2517
26
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.1.0.post2517
27
+ Requires-Dist: nucliadb-protos>=6.1.0.post2517
28
+ Requires-Dist: nucliadb-models>=6.1.0.post2517
29
29
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
30
30
  Requires-Dist: nucliadb-node-binding>=2.26.0
31
31
  Requires-Dist: uvicorn
@@ -115,7 +115,7 @@ nucliadb/ingest/settings.py,sha256=SDQpMRsTsNyi6IDxCJy6BZVUSKUzwAMuxf6ktp31VMM,3
115
115
  nucliadb/ingest/utils.py,sha256=l1myURu3r8oA11dx3GpHw-gNTUc1AFX8xdPm9Lgl2rA,2275
116
116
  nucliadb/ingest/consumer/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
117
117
  nucliadb/ingest/consumer/auditing.py,sha256=EJoqRRr4dk2eUMK0GOY6b9xHO0YLQ0LjoP_xZBLACZo,7280
118
- nucliadb/ingest/consumer/consumer.py,sha256=G4YIDSTqbZALMf9COAeitPrFmDbxNUvmBMIC0h_bUvo,16341
118
+ nucliadb/ingest/consumer/consumer.py,sha256=y9XF4U3cZN5JFzC-ZnBCn7z5viJDMmjoeEprxm7VghE,13941
119
119
  nucliadb/ingest/consumer/materializer.py,sha256=7ofLbwjldJA8TWXDRZRM4U5EviZt3qNSQ8oadmkzS0Y,3840
120
120
  nucliadb/ingest/consumer/metrics.py,sha256=ji1l_4cKiHJthQd8YNem1ft4iMbw9KThmVvJmLcv3Xg,1075
121
121
  nucliadb/ingest/consumer/pull.py,sha256=EYT0ImngMQgatStG68p2GSrPQBbJxeuq8nFm8DdAbwk,9280
@@ -229,8 +229,8 @@ nucliadb/search/search/utils.py,sha256=iF2tbBA56gRMJH1TlE2hMrqeXqjoeOPt4KgRdp2m9
229
229
  nucliadb/search/search/chat/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
230
230
  nucliadb/search/search/chat/ask.py,sha256=2ZIz3TbCG18X0eJZSfLc8nHyKToyrlv_SPFzOOcc5wU,33865
231
231
  nucliadb/search/search/chat/exceptions.py,sha256=Siy4GXW2L7oPhIR86H3WHBhE9lkV4A4YaAszuGGUf54,1356
232
- nucliadb/search/search/chat/images.py,sha256=2o5zimZF_YMa8Kkbugc5BxCBdmkCSILQuHHEEt49ilo,2791
233
- nucliadb/search/search/chat/prompt.py,sha256=ZQZZcrp911KKHmBOWfMRb4klbAHxn_bQl7nmVu74uws,46414
232
+ nucliadb/search/search/chat/images.py,sha256=PA8VWxT5_HUGfW1ULhKTK46UBsVyINtWWqEM1ulzX1E,3095
233
+ nucliadb/search/search/chat/prompt.py,sha256=mreHIjhIWCIGDlJ2CA7kv7KLgh8d09meoMX3-bvWje8,46152
234
234
  nucliadb/search/search/chat/query.py,sha256=gKtlj2ms81m417Id29-DtHFxE3M4TtJvYNB03gAgpYo,14402
235
235
  nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
236
236
  nucliadb/search/search/query_parser/exceptions.py,sha256=tuzl7ZyvVsRz6u0_3zMe60vx39nd3pi641prs-5nC0E,872
@@ -331,9 +331,9 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
331
331
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
332
332
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
333
333
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
334
- nucliadb-6.1.0.post2504.dist-info/METADATA,sha256=vXm2SKesdaakWovns1MAt3f7inEItknaZKsAyBlkXTY,4390
335
- nucliadb-6.1.0.post2504.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
336
- nucliadb-6.1.0.post2504.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
337
- nucliadb-6.1.0.post2504.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
338
- nucliadb-6.1.0.post2504.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
339
- nucliadb-6.1.0.post2504.dist-info/RECORD,,
334
+ nucliadb-6.1.0.post2517.dist-info/METADATA,sha256=UFOaIFgplfz5-oLinOoDftmNMSzWFqagyIpXH3eCuwo,4390
335
+ nucliadb-6.1.0.post2517.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
336
+ nucliadb-6.1.0.post2517.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
337
+ nucliadb-6.1.0.post2517.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
338
+ nucliadb-6.1.0.post2517.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
339
+ nucliadb-6.1.0.post2517.dist-info/RECORD,,