nucliadb 6.9.0.post5008__py3-none-any.whl → 6.9.0.post5009__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nucliadb might be problematic. Click here for more details.
- nucliadb/search/search/hydrator/images.py +6 -2
- nucliadb/search/search/hydrator/paragraphs.py +3 -1
- nucliadb/train/generators/sentence_classifier.py +2 -8
- {nucliadb-6.9.0.post5008.dist-info → nucliadb-6.9.0.post5009.dist-info}/METADATA +6 -6
- {nucliadb-6.9.0.post5008.dist-info → nucliadb-6.9.0.post5009.dist-info}/RECORD +8 -8
- {nucliadb-6.9.0.post5008.dist-info → nucliadb-6.9.0.post5009.dist-info}/WHEEL +0 -0
- {nucliadb-6.9.0.post5008.dist-info → nucliadb-6.9.0.post5009.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.9.0.post5008.dist-info → nucliadb-6.9.0.post5009.dist-info}/top_level.txt +0 -0
|
@@ -30,12 +30,16 @@ from nucliadb_protos import resources_pb2
|
|
|
30
30
|
from nucliadb_utils.utilities import get_storage
|
|
31
31
|
|
|
32
32
|
|
|
33
|
-
async def paragraph_source_image(
|
|
33
|
+
async def paragraph_source_image(
|
|
34
|
+
kbid: str, paragraph_id: ParagraphId, paragraph: resources_pb2.Paragraph
|
|
35
|
+
) -> Optional[Image]:
|
|
34
36
|
"""Certain paragraphs are extracted from images using techniques like OCR or
|
|
35
37
|
inception. If that's the case, return the original image for this paragraph.
|
|
36
38
|
|
|
37
39
|
"""
|
|
38
40
|
source_image = paragraph.representation.reference_file
|
|
41
|
+
if not source_image:
|
|
42
|
+
return None
|
|
39
43
|
|
|
40
44
|
if paragraph.kind not in (
|
|
41
45
|
resources_pb2.Paragraph.TypeParagraph.OCR,
|
|
@@ -43,7 +47,7 @@ async def paragraph_source_image(kbid: str, paragraph: resources_pb2.Paragraph)
|
|
|
43
47
|
):
|
|
44
48
|
return None
|
|
45
49
|
|
|
46
|
-
field_id =
|
|
50
|
+
field_id = paragraph_id.field_id
|
|
47
51
|
|
|
48
52
|
# Paragraphs extracted from an image store its original image representation
|
|
49
53
|
# in the reference file. The path is incomplete though, as it's stored in
|
|
@@ -219,7 +219,9 @@ async def hydrate_paragraph(
|
|
|
219
219
|
hydrated.image = hydration_models.HydratedParagraphImage()
|
|
220
220
|
|
|
221
221
|
if config.image.source_image:
|
|
222
|
-
hydrated.image.source_image = await paragraph_source_image(
|
|
222
|
+
hydrated.image.source_image = await paragraph_source_image(
|
|
223
|
+
kbid, paragraph_id, paragraph
|
|
224
|
+
)
|
|
223
225
|
|
|
224
226
|
if config.page:
|
|
225
227
|
if hydrated.page is None:
|
|
@@ -116,10 +116,7 @@ async def get_sentences(kbid: str, result: str) -> list[str]:
|
|
|
116
116
|
if split is not None:
|
|
117
117
|
text = extracted_text.split_text[split]
|
|
118
118
|
for paragraph in field_metadata.split_metadata[split].paragraphs:
|
|
119
|
-
|
|
120
|
-
key = f"{rid}/{field_type}/{field}/{paragraph.start}-{paragraph.end}"
|
|
121
|
-
else:
|
|
122
|
-
key = paragraph.key
|
|
119
|
+
key = f"{rid}/{field_type}/{field}/{paragraph.start}-{paragraph.end}"
|
|
123
120
|
if key == result:
|
|
124
121
|
for sentence in paragraph.sentences:
|
|
125
122
|
splitted_text = text[sentence.start : sentence.end]
|
|
@@ -127,10 +124,7 @@ async def get_sentences(kbid: str, result: str) -> list[str]:
|
|
|
127
124
|
else:
|
|
128
125
|
text = extracted_text.text
|
|
129
126
|
for paragraph in field_metadata.metadata.paragraphs:
|
|
130
|
-
|
|
131
|
-
key = f"{rid}/{field_type}/{field}/{paragraph.start}-{paragraph.end}"
|
|
132
|
-
else:
|
|
133
|
-
key = paragraph.key
|
|
127
|
+
key = f"{rid}/{field_type}/{field}/{paragraph.start}-{paragraph.end}"
|
|
134
128
|
if key == result:
|
|
135
129
|
for sentence in paragraph.sentences:
|
|
136
130
|
splitted_text = text[sentence.start : sentence.end]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nucliadb
|
|
3
|
-
Version: 6.9.0.
|
|
3
|
+
Version: 6.9.0.post5009
|
|
4
4
|
Summary: NucliaDB
|
|
5
5
|
Author-email: Nuclia <nucliadb@nuclia.com>
|
|
6
6
|
License-Expression: AGPL-3.0-or-later
|
|
@@ -19,11 +19,11 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
19
19
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
20
20
|
Requires-Python: <4,>=3.9
|
|
21
21
|
Description-Content-Type: text/markdown
|
|
22
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.9.0.
|
|
23
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.9.0.
|
|
24
|
-
Requires-Dist: nucliadb-protos>=6.9.0.
|
|
25
|
-
Requires-Dist: nucliadb-models>=6.9.0.
|
|
26
|
-
Requires-Dist: nidx-protos>=6.9.0.
|
|
22
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.9.0.post5009
|
|
23
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.9.0.post5009
|
|
24
|
+
Requires-Dist: nucliadb-protos>=6.9.0.post5009
|
|
25
|
+
Requires-Dist: nucliadb-models>=6.9.0.post5009
|
|
26
|
+
Requires-Dist: nidx-protos>=6.9.0.post5009
|
|
27
27
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
|
28
28
|
Requires-Dist: nuclia-models>=0.50.0
|
|
29
29
|
Requires-Dist: uvicorn[standard]
|
|
@@ -274,8 +274,8 @@ nucliadb/search/search/chat/prompt.py,sha256=Bk69WVki5XIzXFYO6o1uQw1feHtuMwfEx_A
|
|
|
274
274
|
nucliadb/search/search/chat/query.py,sha256=qWrwVEX_GrDV7LFRyC21BURtl-WsRt8BkIocY8njkKM,17147
|
|
275
275
|
nucliadb/search/search/hydrator/__init__.py,sha256=3Pc-rcax4TI174qcrllnReE728DoJTaA8tpvBUFf98g,7005
|
|
276
276
|
nucliadb/search/search/hydrator/fields.py,sha256=LhKw-aNU5eJqfZADtq3iB7AGXm0l_QabAAoSHJTk8Is,5962
|
|
277
|
-
nucliadb/search/search/hydrator/images.py,sha256=
|
|
278
|
-
nucliadb/search/search/hydrator/paragraphs.py,sha256=
|
|
277
|
+
nucliadb/search/search/hydrator/images.py,sha256=gS7-dr1e_DpRQ6XaGxwMW1AMEV9a-u73h_jTVy7XY38,4602
|
|
278
|
+
nucliadb/search/search/hydrator/paragraphs.py,sha256=7JjifwhvY7V2TPSnXoIM6aBmh-i6WwTT29D5CvF2Nu0,12676
|
|
279
279
|
nucliadb/search/search/hydrator/resources.py,sha256=1pNyUac8xWRnQVXU6FkDwsXHzmOZwqKRgKhGPCUdEhE,2004
|
|
280
280
|
nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
|
281
281
|
nucliadb/search/search/query_parser/exceptions.py,sha256=sVl9gRNzhE-s480LBBVkiXzNRbKhYRQN5F3it5tNNp8,939
|
|
@@ -343,7 +343,7 @@ nucliadb/train/generators/image_classifier.py,sha256=46YShcl7nf1_iLXZklWTAFvUNII
|
|
|
343
343
|
nucliadb/train/generators/paragraph_classifier.py,sha256=Jk3B8a2zkAf-2-59RECiNmNP1Nz7f0-hfnS4rIE69xA,2817
|
|
344
344
|
nucliadb/train/generators/paragraph_streaming.py,sha256=axGNYjOTgxGsOcAAvCz_rTYzZCdZf0f1s-Hzn-VvIX0,3646
|
|
345
345
|
nucliadb/train/generators/question_answer_streaming.py,sha256=YXjWQc_SJ_TyXXJRg8tGkC9IeIzbRDcn0spQw9AlRNY,5740
|
|
346
|
-
nucliadb/train/generators/sentence_classifier.py,sha256=
|
|
346
|
+
nucliadb/train/generators/sentence_classifier.py,sha256=izCAbjMgfH6isn48lj6z09gwCunouClqct8MuaYPwVc,4989
|
|
347
347
|
nucliadb/train/generators/token_classifier.py,sha256=T8JOVR1vv5g7rn7HtcQcIZ3O5TFqh15uv5rOselLBVo,9617
|
|
348
348
|
nucliadb/train/generators/utils.py,sha256=88cCuHUlOkn9UDP2NR4ru7A-5hbcLGd73sv9j0fMhmY,3590
|
|
349
349
|
nucliadb/writer/__init__.py,sha256=S298mrZL3vr62OrBqi97mdLxgR5cReMlRJgnaQHZV7s,1304
|
|
@@ -384,8 +384,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
|
|
|
384
384
|
nucliadb/writer/tus/s3.py,sha256=vu1BGg4VqJ_x2P1u2BxqPKlSfw5orT_a3R-Ln5oPUpU,8483
|
|
385
385
|
nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
|
|
386
386
|
nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
|
|
387
|
-
nucliadb-6.9.0.
|
|
388
|
-
nucliadb-6.9.0.
|
|
389
|
-
nucliadb-6.9.0.
|
|
390
|
-
nucliadb-6.9.0.
|
|
391
|
-
nucliadb-6.9.0.
|
|
387
|
+
nucliadb-6.9.0.post5009.dist-info/METADATA,sha256=hOXZOJfSkjAHZvN5oOM2KVltbSm_QmfnVzcB-jQIFRA,4158
|
|
388
|
+
nucliadb-6.9.0.post5009.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
389
|
+
nucliadb-6.9.0.post5009.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
|
|
390
|
+
nucliadb-6.9.0.post5009.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
|
|
391
|
+
nucliadb-6.9.0.post5009.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|