nucliadb 6.9.0.post5008__py3-none-any.whl → 6.9.0.post5009__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nucliadb might be problematic. Click here for more details.

@@ -30,12 +30,16 @@ from nucliadb_protos import resources_pb2
30
30
  from nucliadb_utils.utilities import get_storage
31
31
 
32
32
 
33
- async def paragraph_source_image(kbid: str, paragraph: resources_pb2.Paragraph) -> Optional[Image]:
33
+ async def paragraph_source_image(
34
+ kbid: str, paragraph_id: ParagraphId, paragraph: resources_pb2.Paragraph
35
+ ) -> Optional[Image]:
34
36
  """Certain paragraphs are extracted from images using techniques like OCR or
35
37
  inception. If that's the case, return the original image for this paragraph.
36
38
 
37
39
  """
38
40
  source_image = paragraph.representation.reference_file
41
+ if not source_image:
42
+ return None
39
43
 
40
44
  if paragraph.kind not in (
41
45
  resources_pb2.Paragraph.TypeParagraph.OCR,
@@ -43,7 +47,7 @@ async def paragraph_source_image(kbid: str, paragraph: resources_pb2.Paragraph)
43
47
  ):
44
48
  return None
45
49
 
46
- field_id = ParagraphId.from_string(paragraph.key).field_id
50
+ field_id = paragraph_id.field_id
47
51
 
48
52
  # Paragraphs extracted from an image store its original image representation
49
53
  # in the reference file. The path is incomplete though, as it's stored in
@@ -219,7 +219,9 @@ async def hydrate_paragraph(
219
219
  hydrated.image = hydration_models.HydratedParagraphImage()
220
220
 
221
221
  if config.image.source_image:
222
- hydrated.image.source_image = await paragraph_source_image(kbid, paragraph)
222
+ hydrated.image.source_image = await paragraph_source_image(
223
+ kbid, paragraph_id, paragraph
224
+ )
223
225
 
224
226
  if config.page:
225
227
  if hydrated.page is None:
@@ -116,10 +116,7 @@ async def get_sentences(kbid: str, result: str) -> list[str]:
116
116
  if split is not None:
117
117
  text = extracted_text.split_text[split]
118
118
  for paragraph in field_metadata.split_metadata[split].paragraphs:
119
- if paragraph.key == "":
120
- key = f"{rid}/{field_type}/{field}/{paragraph.start}-{paragraph.end}"
121
- else:
122
- key = paragraph.key
119
+ key = f"{rid}/{field_type}/{field}/{paragraph.start}-{paragraph.end}"
123
120
  if key == result:
124
121
  for sentence in paragraph.sentences:
125
122
  splitted_text = text[sentence.start : sentence.end]
@@ -127,10 +124,7 @@ async def get_sentences(kbid: str, result: str) -> list[str]:
127
124
  else:
128
125
  text = extracted_text.text
129
126
  for paragraph in field_metadata.metadata.paragraphs:
130
- if paragraph.key == "":
131
- key = f"{rid}/{field_type}/{field}/{paragraph.start}-{paragraph.end}"
132
- else:
133
- key = paragraph.key
127
+ key = f"{rid}/{field_type}/{field}/{paragraph.start}-{paragraph.end}"
134
128
  if key == result:
135
129
  for sentence in paragraph.sentences:
136
130
  splitted_text = text[sentence.start : sentence.end]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nucliadb
3
- Version: 6.9.0.post5008
3
+ Version: 6.9.0.post5009
4
4
  Summary: NucliaDB
5
5
  Author-email: Nuclia <nucliadb@nuclia.com>
6
6
  License-Expression: AGPL-3.0-or-later
@@ -19,11 +19,11 @@ Classifier: Programming Language :: Python :: 3.12
19
19
  Classifier: Programming Language :: Python :: 3 :: Only
20
20
  Requires-Python: <4,>=3.9
21
21
  Description-Content-Type: text/markdown
22
- Requires-Dist: nucliadb-telemetry[all]>=6.9.0.post5008
23
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.9.0.post5008
24
- Requires-Dist: nucliadb-protos>=6.9.0.post5008
25
- Requires-Dist: nucliadb-models>=6.9.0.post5008
26
- Requires-Dist: nidx-protos>=6.9.0.post5008
22
+ Requires-Dist: nucliadb-telemetry[all]>=6.9.0.post5009
23
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.9.0.post5009
24
+ Requires-Dist: nucliadb-protos>=6.9.0.post5009
25
+ Requires-Dist: nucliadb-models>=6.9.0.post5009
26
+ Requires-Dist: nidx-protos>=6.9.0.post5009
27
27
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
28
28
  Requires-Dist: nuclia-models>=0.50.0
29
29
  Requires-Dist: uvicorn[standard]
@@ -274,8 +274,8 @@ nucliadb/search/search/chat/prompt.py,sha256=Bk69WVki5XIzXFYO6o1uQw1feHtuMwfEx_A
274
274
  nucliadb/search/search/chat/query.py,sha256=qWrwVEX_GrDV7LFRyC21BURtl-WsRt8BkIocY8njkKM,17147
275
275
  nucliadb/search/search/hydrator/__init__.py,sha256=3Pc-rcax4TI174qcrllnReE728DoJTaA8tpvBUFf98g,7005
276
276
  nucliadb/search/search/hydrator/fields.py,sha256=LhKw-aNU5eJqfZADtq3iB7AGXm0l_QabAAoSHJTk8Is,5962
277
- nucliadb/search/search/hydrator/images.py,sha256=qqjQbn82DgMUrNG21r8CNnpnOK4RnqHtnJ0jdQsvfyU,4550
278
- nucliadb/search/search/hydrator/paragraphs.py,sha256=yZXsTFAzH_c9Yf8sClLHsr2OwjUxoc2KQtUcV1RT7ug,12616
277
+ nucliadb/search/search/hydrator/images.py,sha256=gS7-dr1e_DpRQ6XaGxwMW1AMEV9a-u73h_jTVy7XY38,4602
278
+ nucliadb/search/search/hydrator/paragraphs.py,sha256=7JjifwhvY7V2TPSnXoIM6aBmh-i6WwTT29D5CvF2Nu0,12676
279
279
  nucliadb/search/search/hydrator/resources.py,sha256=1pNyUac8xWRnQVXU6FkDwsXHzmOZwqKRgKhGPCUdEhE,2004
280
280
  nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
281
281
  nucliadb/search/search/query_parser/exceptions.py,sha256=sVl9gRNzhE-s480LBBVkiXzNRbKhYRQN5F3it5tNNp8,939
@@ -343,7 +343,7 @@ nucliadb/train/generators/image_classifier.py,sha256=46YShcl7nf1_iLXZklWTAFvUNII
343
343
  nucliadb/train/generators/paragraph_classifier.py,sha256=Jk3B8a2zkAf-2-59RECiNmNP1Nz7f0-hfnS4rIE69xA,2817
344
344
  nucliadb/train/generators/paragraph_streaming.py,sha256=axGNYjOTgxGsOcAAvCz_rTYzZCdZf0f1s-Hzn-VvIX0,3646
345
345
  nucliadb/train/generators/question_answer_streaming.py,sha256=YXjWQc_SJ_TyXXJRg8tGkC9IeIzbRDcn0spQw9AlRNY,5740
346
- nucliadb/train/generators/sentence_classifier.py,sha256=4JBGtInnWUQrM9wBd-P7Z2lTK1Dka6U3PPDqnLbGnFM,5177
346
+ nucliadb/train/generators/sentence_classifier.py,sha256=izCAbjMgfH6isn48lj6z09gwCunouClqct8MuaYPwVc,4989
347
347
  nucliadb/train/generators/token_classifier.py,sha256=T8JOVR1vv5g7rn7HtcQcIZ3O5TFqh15uv5rOselLBVo,9617
348
348
  nucliadb/train/generators/utils.py,sha256=88cCuHUlOkn9UDP2NR4ru7A-5hbcLGd73sv9j0fMhmY,3590
349
349
  nucliadb/writer/__init__.py,sha256=S298mrZL3vr62OrBqi97mdLxgR5cReMlRJgnaQHZV7s,1304
@@ -384,8 +384,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
384
384
  nucliadb/writer/tus/s3.py,sha256=vu1BGg4VqJ_x2P1u2BxqPKlSfw5orT_a3R-Ln5oPUpU,8483
385
385
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
386
386
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
387
- nucliadb-6.9.0.post5008.dist-info/METADATA,sha256=lWKVwRZU7H-IiUnwX8V_02JAfoHhQ0Namxoqzqw0MDU,4158
388
- nucliadb-6.9.0.post5008.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
389
- nucliadb-6.9.0.post5008.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
390
- nucliadb-6.9.0.post5008.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
391
- nucliadb-6.9.0.post5008.dist-info/RECORD,,
387
+ nucliadb-6.9.0.post5009.dist-info/METADATA,sha256=hOXZOJfSkjAHZvN5oOM2KVltbSm_QmfnVzcB-jQIFRA,4158
388
+ nucliadb-6.9.0.post5009.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
389
+ nucliadb-6.9.0.post5009.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
390
+ nucliadb-6.9.0.post5009.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
391
+ nucliadb-6.9.0.post5009.dist-info/RECORD,,