nucliadb 6.1.0.post2533__py3-none-any.whl → 6.1.0.post2542__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -304,7 +304,9 @@ class IngestProcessedConsumer(IngestConsumer):
304
304
  durable_name=durable_name,
305
305
  ack_policy=nats.js.api.AckPolicy.EXPLICIT,
306
306
  deliver_policy=nats.js.api.DeliverPolicy.ALL,
307
- max_ack_pending=1,
307
+ # We set it to 20 because we don't care about order here and we want to be able to HPA based
308
+ # on the number of pending messages in the queue.
309
+ max_ack_pending=20,
308
310
  max_deliver=nats_consumer_settings.nats_max_deliver,
309
311
  ack_wait=nats_consumer_settings.nats_ack_wait,
310
312
  ),
@@ -680,6 +680,7 @@ async def conversation_prompt_context(
680
680
  if field_type == "c" and paragraph.score_type in (
681
681
  SCORE_TYPE.VECTOR,
682
682
  SCORE_TYPE.BOTH,
683
+ SCORE_TYPE.BM25,
683
684
  ):
684
685
  field_unique_id = "-".join([rid, field_type, field_id])
685
686
  if field_unique_id in analyzed_fields:
@@ -695,20 +696,31 @@ async def conversation_prompt_context(
695
696
 
696
697
  attachments: List[resources_pb2.FieldRef] = []
697
698
  if conversational_strategy.full:
699
+ extracted_text = await field_obj.get_extracted_text()
698
700
  for current_page in range(1, cmetadata.pages + 1):
699
701
  conv = await field_obj.db_get_value(current_page)
702
+
700
703
  for message in conv.messages:
701
- text = message.content.text.strip()
702
- pid = f"{rid}/{field_type}/{field_id}/{message.ident}/0-{len(message.content.text) + 1}"
704
+ ident = message.ident
705
+ if extracted_text is not None:
706
+ text = extracted_text.split_text.get(ident, message.content.text.strip())
707
+ else:
708
+ text = message.content.text.strip()
709
+ pid = f"{rid}/{field_type}/{field_id}/{ident}/0-{len(text) + 1}"
703
710
  context[pid] = text
704
711
  attachments.extend(message.content.attachments_fields)
705
712
  else:
706
713
  # Add first message
714
+ extracted_text = await field_obj.get_extracted_text()
707
715
  first_page = await field_obj.db_get_value()
708
716
  if len(first_page.messages) > 0:
709
717
  message = first_page.messages[0]
710
- text = message.content.text.strip()
711
- pid = f"{rid}/{field_type}/{field_id}/{message.ident}/0-{len(message.content.text) + 1}"
718
+ ident = message.ident
719
+ if extracted_text is not None:
720
+ text = extracted_text.split_text.get(ident, message.content.text.strip())
721
+ else:
722
+ text = message.content.text.strip()
723
+ pid = f"{rid}/{field_type}/{field_id}/{ident}/0-{len(text) + 1}"
712
724
  context[pid] = text
713
725
  attachments.extend(message.content.attachments_fields)
714
726
 
@@ -746,7 +758,7 @@ async def conversation_prompt_context(
746
758
  extracted_text = await field.get_extracted_text()
747
759
  if extracted_text is not None:
748
760
  pid = f"{rid}/{field_type}/{attachment.field_id}/0-{len(extracted_text.text) + 1}"
749
- context[pid] = extracted_text.text
761
+ context[pid] = f"Attachment {attachment.field_id}: {extracted_text.text}\n\n"
750
762
 
751
763
  if conversational_strategy.attachments_images and visual_llm:
752
764
  for attachment in attachments:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nucliadb
3
- Version: 6.1.0.post2533
3
+ Version: 6.1.0.post2542
4
4
  Home-page: https://docs.nuclia.dev/docs/management/nucliadb/intro
5
5
  Author: NucliaDB Community
6
6
  Author-email: nucliadb@nuclia.com
@@ -22,10 +22,10 @@ Classifier: Programming Language :: Python :: 3.12
22
22
  Classifier: Programming Language :: Python :: 3 :: Only
23
23
  Requires-Python: >=3.9, <4
24
24
  Description-Content-Type: text/markdown
25
- Requires-Dist: nucliadb-telemetry[all]>=6.1.0.post2533
26
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.1.0.post2533
27
- Requires-Dist: nucliadb-protos>=6.1.0.post2533
28
- Requires-Dist: nucliadb-models>=6.1.0.post2533
25
+ Requires-Dist: nucliadb-telemetry[all]>=6.1.0.post2542
26
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.1.0.post2542
27
+ Requires-Dist: nucliadb-protos>=6.1.0.post2542
28
+ Requires-Dist: nucliadb-models>=6.1.0.post2542
29
29
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
30
30
  Requires-Dist: nucliadb-node-binding>=2.26.0
31
31
  Requires-Dist: uvicorn
@@ -115,7 +115,7 @@ nucliadb/ingest/settings.py,sha256=SDQpMRsTsNyi6IDxCJy6BZVUSKUzwAMuxf6ktp31VMM,3
115
115
  nucliadb/ingest/utils.py,sha256=l1myURu3r8oA11dx3GpHw-gNTUc1AFX8xdPm9Lgl2rA,2275
116
116
  nucliadb/ingest/consumer/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
117
117
  nucliadb/ingest/consumer/auditing.py,sha256=EJoqRRr4dk2eUMK0GOY6b9xHO0YLQ0LjoP_xZBLACZo,7280
118
- nucliadb/ingest/consumer/consumer.py,sha256=y9XF4U3cZN5JFzC-ZnBCn7z5viJDMmjoeEprxm7VghE,13941
118
+ nucliadb/ingest/consumer/consumer.py,sha256=Lej1d6jqmaeR3vjzD0mnfKcVzZTT4TQ3lb1DOfyNWM4,14117
119
119
  nucliadb/ingest/consumer/materializer.py,sha256=7ofLbwjldJA8TWXDRZRM4U5EviZt3qNSQ8oadmkzS0Y,3840
120
120
  nucliadb/ingest/consumer/metrics.py,sha256=ji1l_4cKiHJthQd8YNem1ft4iMbw9KThmVvJmLcv3Xg,1075
121
121
  nucliadb/ingest/consumer/pull.py,sha256=EYT0ImngMQgatStG68p2GSrPQBbJxeuq8nFm8DdAbwk,9280
@@ -230,7 +230,7 @@ nucliadb/search/search/chat/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn
230
230
  nucliadb/search/search/chat/ask.py,sha256=2ZIz3TbCG18X0eJZSfLc8nHyKToyrlv_SPFzOOcc5wU,33865
231
231
  nucliadb/search/search/chat/exceptions.py,sha256=Siy4GXW2L7oPhIR86H3WHBhE9lkV4A4YaAszuGGUf54,1356
232
232
  nucliadb/search/search/chat/images.py,sha256=PA8VWxT5_HUGfW1ULhKTK46UBsVyINtWWqEM1ulzX1E,3095
233
- nucliadb/search/search/chat/prompt.py,sha256=mreHIjhIWCIGDlJ2CA7kv7KLgh8d09meoMX3-bvWje8,46152
233
+ nucliadb/search/search/chat/prompt.py,sha256=TIzjI_882hJ--KLKCY8rJomtJ_CMJ-MHYtHqivgG8Lk,46819
234
234
  nucliadb/search/search/chat/query.py,sha256=gKtlj2ms81m417Id29-DtHFxE3M4TtJvYNB03gAgpYo,14402
235
235
  nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
236
236
  nucliadb/search/search/query_parser/exceptions.py,sha256=tuzl7ZyvVsRz6u0_3zMe60vx39nd3pi641prs-5nC0E,872
@@ -331,9 +331,9 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
331
331
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
332
332
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
333
333
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
334
- nucliadb-6.1.0.post2533.dist-info/METADATA,sha256=92uKuzSMwFjVzxVwrt2ogNuoKMquNvZY64BtiIz1rEY,4390
335
- nucliadb-6.1.0.post2533.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
336
- nucliadb-6.1.0.post2533.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
337
- nucliadb-6.1.0.post2533.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
338
- nucliadb-6.1.0.post2533.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
339
- nucliadb-6.1.0.post2533.dist-info/RECORD,,
334
+ nucliadb-6.1.0.post2542.dist-info/METADATA,sha256=l-8JkGkVM1-9bJSTHqyLh_q0ddGCqrD5GYXpKzHR4ww,4390
335
+ nucliadb-6.1.0.post2542.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
336
+ nucliadb-6.1.0.post2542.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
337
+ nucliadb-6.1.0.post2542.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
338
+ nucliadb-6.1.0.post2542.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
339
+ nucliadb-6.1.0.post2542.dist-info/RECORD,,