nucliadb 6.6.1.post4590__py3-none-any.whl → 6.6.1.post4596__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -67,8 +67,10 @@ async def pgcatalog_update(txn: Transaction, kbid: str, resource: Resource, inde
67
67
  async with _pg_transaction(txn).connection.cursor() as cur:
68
68
  # Labels from the resource and classification labels from each field
69
69
  labels = [label for label in index_message.labels]
70
- for field in index_message.texts.values():
71
- labels += [label for label in field.labels if label.startswith("/l/")]
70
+ for field in (await resource.get_fields()).values():
71
+ meta = await field.get_field_metadata()
72
+ if meta:
73
+ labels += [f"/l/{c.labelset}/{c.label}" for c in meta.metadata.classifications]
72
74
 
73
75
  await cur.execute(
74
76
  """
@@ -69,6 +69,7 @@ from nucliadb_models.search import (
69
69
  RagStrategyName,
70
70
  TableImageStrategy,
71
71
  TextBlockAugmentationType,
72
+ TextPosition,
72
73
  )
73
74
  from nucliadb_protos import resources_pb2
74
75
  from nucliadb_protos.resources_pb2 import ExtractedText, FieldComputedMetadata
@@ -727,6 +728,7 @@ async def neighbouring_paragraphs_prompt_context(
727
728
  augmented_context.paragraphs[npid.full()] = AugmentedTextBlock(
728
729
  id=npid.full(),
729
730
  text=ptext,
731
+ position=get_text_position(npid, neighbour_index, field_extracted_metadata),
730
732
  parent=pid.full(),
731
733
  augmentation_type=TextBlockAugmentationType.NEIGHBOURING_PARAGRAPHS,
732
734
  )
@@ -734,6 +736,30 @@ async def neighbouring_paragraphs_prompt_context(
734
736
  metrics.set("neighbouring_paragraphs_ops", len(augmented_context.paragraphs))
735
737
 
736
738
 
739
+ def get_text_position(
740
+ paragraph_id: ParagraphId, index: int, field_metadata: FieldComputedMetadata
741
+ ) -> Optional[TextPosition]:
742
+ if paragraph_id.field_id.subfield_id:
743
+ metadata = field_metadata.split_metadata[paragraph_id.field_id.subfield_id]
744
+ else:
745
+ metadata = field_metadata.metadata
746
+ try:
747
+ pmetadata = metadata.paragraphs[index]
748
+ except IndexError:
749
+ return None
750
+ page_number = None
751
+ if pmetadata.HasField("page"):
752
+ page_number = pmetadata.page.page
753
+ return TextPosition(
754
+ page_number=page_number,
755
+ index=index,
756
+ start=pmetadata.start,
757
+ end=pmetadata.end,
758
+ start_seconds=list(pmetadata.start_seconds),
759
+ end_seconds=list(pmetadata.end_seconds),
760
+ )
761
+
762
+
737
763
  def get_neighbouring_indices(
738
764
  index: int, before: int, after: int, field_pids: list[ParagraphId]
739
765
  ) -> list[int]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nucliadb
3
- Version: 6.6.1.post4590
3
+ Version: 6.6.1.post4596
4
4
  Summary: NucliaDB
5
5
  Author-email: Nuclia <nucliadb@nuclia.com>
6
6
  License-Expression: AGPL-3.0-or-later
@@ -19,11 +19,11 @@ Classifier: Programming Language :: Python :: 3.12
19
19
  Classifier: Programming Language :: Python :: 3 :: Only
20
20
  Requires-Python: <4,>=3.9
21
21
  Description-Content-Type: text/markdown
22
- Requires-Dist: nucliadb-telemetry[all]>=6.6.1.post4590
23
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.6.1.post4590
24
- Requires-Dist: nucliadb-protos>=6.6.1.post4590
25
- Requires-Dist: nucliadb-models>=6.6.1.post4590
26
- Requires-Dist: nidx-protos>=6.6.1.post4590
22
+ Requires-Dist: nucliadb-telemetry[all]>=6.6.1.post4596
23
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.6.1.post4596
24
+ Requires-Dist: nucliadb-protos>=6.6.1.post4596
25
+ Requires-Dist: nucliadb-models>=6.6.1.post4596
26
+ Requires-Dist: nidx-protos>=6.6.1.post4596
27
27
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
28
28
  Requires-Dist: nuclia-models>=0.43.0
29
29
  Requires-Dist: uvicorn[standard]
@@ -167,7 +167,7 @@ nucliadb/ingest/orm/utils.py,sha256=fCQRuyecgqhaY7mcBG93oaXMkzkKb9BFjOcy4-ZiSNw,
167
167
  nucliadb/ingest/orm/processor/__init__.py,sha256=Aqd9wCNTvggkMkCY3WvoI8spdr94Jnqk-0iq9XpLs18,922
168
168
  nucliadb/ingest/orm/processor/auditing.py,sha256=TeYhXGJRyQ7ROytbb2u8R0fIh_FYi3HgTu3S1ribY3U,4623
169
169
  nucliadb/ingest/orm/processor/data_augmentation.py,sha256=v-pj4GbBWSuO8dQyahs5UDr5ghsyfhCZDS0ftKd6ZYc,5179
170
- nucliadb/ingest/orm/processor/pgcatalog.py,sha256=jyUn9D7tPES1f8FQqiL8kjzS4puWbgv-bqorP_PEt_A,4251
170
+ nucliadb/ingest/orm/processor/pgcatalog.py,sha256=f0_bV_5qMGXMG7iLK1DnVwi_pXwFvezfdt3aJ0fZfaQ,4347
171
171
  nucliadb/ingest/orm/processor/processor.py,sha256=jaEBwbv--WyoC8zcdxWAyF0dAzVA5crVDJl56Bqv1eI,31444
172
172
  nucliadb/ingest/orm/processor/sequence_manager.py,sha256=uqEphtI1Ir_yk9jRl2gPf7BlzzXWovbARY5MNZSBI_8,1704
173
173
  nucliadb/ingest/service/__init__.py,sha256=LHQFUkdmNBOWqBG0Md9sMMI7g5TQZ-hLAnhw6ZblrJg,2002
@@ -266,7 +266,7 @@ nucliadb/search/search/chat/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn
266
266
  nucliadb/search/search/chat/ask.py,sha256=vJ3TSdr-cT_xh43UnoYugqxnHv_-LFSCYoU7o0NnI1M,39368
267
267
  nucliadb/search/search/chat/exceptions.py,sha256=Siy4GXW2L7oPhIR86H3WHBhE9lkV4A4YaAszuGGUf54,1356
268
268
  nucliadb/search/search/chat/images.py,sha256=PA8VWxT5_HUGfW1ULhKTK46UBsVyINtWWqEM1ulzX1E,3095
269
- nucliadb/search/search/chat/prompt.py,sha256=QwHULUDqe_pS2HZvQH1vzqpYEHQG_-UagXCNtLLtJEI,52997
269
+ nucliadb/search/search/chat/prompt.py,sha256=gmYRC3aK03vrDoBElJP5H5Z7OEeu79k5yTxv3FEkN0I,53866
270
270
  nucliadb/search/search/chat/query.py,sha256=3jMPNbiFEOoS0ydMOPYkSx1qVlvAv51npzadWXDwkMs,16650
271
271
  nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
272
272
  nucliadb/search/search/query_parser/exceptions.py,sha256=sVl9gRNzhE-s480LBBVkiXzNRbKhYRQN5F3it5tNNp8,939
@@ -375,8 +375,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
375
375
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
376
376
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
377
377
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
378
- nucliadb-6.6.1.post4590.dist-info/METADATA,sha256=Qlvlg1L0sRe2pMcr6yvh9Wq27fSWSereEoun2EcPs2U,4158
379
- nucliadb-6.6.1.post4590.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
380
- nucliadb-6.6.1.post4590.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
381
- nucliadb-6.6.1.post4590.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
382
- nucliadb-6.6.1.post4590.dist-info/RECORD,,
378
+ nucliadb-6.6.1.post4596.dist-info/METADATA,sha256=BGQonZKHKd6s_8MEWZ2NMWLh1cw9lLxHES6QWnoVQww,4158
379
+ nucliadb-6.6.1.post4596.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
380
+ nucliadb-6.6.1.post4596.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
381
+ nucliadb-6.6.1.post4596.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
382
+ nucliadb-6.6.1.post4596.dist-info/RECORD,,