nucliadb 6.2.1.post2835__py3-none-any.whl → 6.2.1.post2842__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucliadb/common/external_index_providers/base.py +2 -1
- nucliadb/common/ids.py +18 -4
- nucliadb/search/api/v1/suggest.py +0 -2
- nucliadb/search/search/chat/ask.py +35 -10
- nucliadb/search/search/chat/prompt.py +4 -2
- nucliadb/search/search/chat/query.py +56 -28
- nucliadb/search/search/graph_strategy.py +913 -0
- nucliadb/search/search/hydrator.py +6 -0
- nucliadb/search/search/merge.py +54 -22
- {nucliadb-6.2.1.post2835.dist-info → nucliadb-6.2.1.post2842.dist-info}/METADATA +5 -5
- {nucliadb-6.2.1.post2835.dist-info → nucliadb-6.2.1.post2842.dist-info}/RECORD +15 -14
- {nucliadb-6.2.1.post2835.dist-info → nucliadb-6.2.1.post2842.dist-info}/WHEEL +0 -0
- {nucliadb-6.2.1.post2835.dist-info → nucliadb-6.2.1.post2842.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.2.1.post2835.dist-info → nucliadb-6.2.1.post2842.dist-info}/top_level.txt +0 -0
- {nucliadb-6.2.1.post2835.dist-info → nucliadb-6.2.1.post2842.dist-info}/zip-safe +0 -0
@@ -66,6 +66,9 @@ class TextBlockHydrationOptions(BaseModel):
|
|
66
66
|
# list of exact matches to highlight
|
67
67
|
ematches: Optional[list[str]] = None
|
68
68
|
|
69
|
+
# If true, only hydrate the text block if its text is not already populated
|
70
|
+
only_hydrate_empty: bool = False
|
71
|
+
|
69
72
|
|
70
73
|
@hydrator_observer.wrap({"type": "resource_text"})
|
71
74
|
async def hydrate_resource_text(
|
@@ -161,6 +164,8 @@ async def hydrate_text_block(
|
|
161
164
|
`text_block` object.
|
162
165
|
|
163
166
|
"""
|
167
|
+
if options.only_hydrate_empty and text_block.text:
|
168
|
+
return text_block
|
164
169
|
async with AsyncExitStack() as stack:
|
165
170
|
if concurrency_control is not None:
|
166
171
|
await stack.enter_async_context(concurrency_control)
|
@@ -188,4 +193,5 @@ def text_block_to_find_paragraph(text_block: TextBlockMatch) -> FindParagraph:
|
|
188
193
|
reference=text_block.representation_file,
|
189
194
|
page_with_visual=text_block.page_with_visual,
|
190
195
|
position=text_block.position,
|
196
|
+
relevant_relations=text_block.relevant_relations,
|
191
197
|
)
|
nucliadb/search/search/merge.py
CHANGED
@@ -23,6 +23,7 @@ import math
|
|
23
23
|
from typing import Any, Optional, Set, Union
|
24
24
|
|
25
25
|
from nucliadb.common.ids import FieldId, ParagraphId
|
26
|
+
from nucliadb.common.models_utils import from_proto
|
26
27
|
from nucliadb.common.models_utils.from_proto import RelationTypePbMap
|
27
28
|
from nucliadb.search.search import cache
|
28
29
|
from nucliadb.search.search.cut import cut_page
|
@@ -442,15 +443,38 @@ async def merge_paragraph_results(
|
|
442
443
|
async def merge_relations_results(
|
443
444
|
relations_responses: list[RelationSearchResponse],
|
444
445
|
query: EntitiesSubgraphRequest,
|
446
|
+
only_with_metadata: bool = False,
|
447
|
+
only_agentic: bool = False,
|
445
448
|
) -> Relations:
|
446
449
|
loop = asyncio.get_event_loop()
|
447
|
-
return await loop.run_in_executor(
|
450
|
+
return await loop.run_in_executor(
|
451
|
+
None,
|
452
|
+
_merge_relations_results,
|
453
|
+
relations_responses,
|
454
|
+
query,
|
455
|
+
only_with_metadata,
|
456
|
+
only_agentic,
|
457
|
+
)
|
448
458
|
|
449
459
|
|
450
460
|
def _merge_relations_results(
|
451
461
|
relations_responses: list[RelationSearchResponse],
|
452
462
|
query: EntitiesSubgraphRequest,
|
463
|
+
only_with_metadata: bool,
|
464
|
+
only_agentic: bool,
|
453
465
|
) -> Relations:
|
466
|
+
"""
|
467
|
+
Merge relation search responses into a single Relations object while applying filters.
|
468
|
+
|
469
|
+
Args:
|
470
|
+
relations_responses: List of relation search responses
|
471
|
+
query: EntitiesSubgraphRequest object
|
472
|
+
only_with_metadata: If True, only include relations with metadata. This metadata includes paragraph_id and entity positions among other things.
|
473
|
+
only_agentic: If True, only include relations extracted by a Graph Extraction Agent.
|
474
|
+
|
475
|
+
Returns:
|
476
|
+
Relations
|
477
|
+
"""
|
454
478
|
relations = Relations(entities={})
|
455
479
|
|
456
480
|
for entry_point in query.entry_points:
|
@@ -462,27 +486,37 @@ def _merge_relations_results(
|
|
462
486
|
destination = relation.to
|
463
487
|
relation_type = RelationTypePbMap[relation.relation]
|
464
488
|
relation_label = relation.relation_label
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
489
|
+
metadata = relation.metadata if relation.HasField("metadata") else None
|
490
|
+
# If only_with_metadata is True, we check that metadata for the relation is not None
|
491
|
+
# If only_agentic is True, we check that metadata for the relation is not None and that it has a data_augmentation_task_id
|
492
|
+
# TODO: This is suboptimal, we should be able to filter this in the query to the index,
|
493
|
+
if (not only_with_metadata or metadata) and (
|
494
|
+
not only_agentic or (metadata and metadata.data_augmentation_task_id)
|
495
|
+
):
|
496
|
+
if origin.value in relations.entities:
|
497
|
+
relations.entities[origin.value].related_to.append(
|
498
|
+
DirectionalRelation(
|
499
|
+
entity=destination.value,
|
500
|
+
entity_type=relation_node_type_to_entity_type(destination.ntype),
|
501
|
+
entity_subtype=destination.subtype,
|
502
|
+
relation=relation_type,
|
503
|
+
relation_label=relation_label,
|
504
|
+
direction=RelationDirection.OUT,
|
505
|
+
metadata=from_proto.relation_metadata(metadata) if metadata else None,
|
506
|
+
)
|
474
507
|
)
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
508
|
+
elif destination.value in relations.entities:
|
509
|
+
relations.entities[destination.value].related_to.append(
|
510
|
+
DirectionalRelation(
|
511
|
+
entity=origin.value,
|
512
|
+
entity_type=relation_node_type_to_entity_type(origin.ntype),
|
513
|
+
entity_subtype=origin.subtype,
|
514
|
+
relation=relation_type,
|
515
|
+
relation_label=relation_label,
|
516
|
+
direction=RelationDirection.IN,
|
517
|
+
metadata=from_proto.relation_metadata(metadata) if metadata else None,
|
518
|
+
)
|
484
519
|
)
|
485
|
-
)
|
486
520
|
|
487
521
|
return relations
|
488
522
|
|
@@ -584,8 +618,6 @@ async def merge_suggest_entities_results(
|
|
584
618
|
async def merge_suggest_results(
|
585
619
|
suggest_responses: list[SuggestResponse],
|
586
620
|
kbid: str,
|
587
|
-
show: list[ResourceProperties],
|
588
|
-
field_type_filter: list[FieldTypeName],
|
589
621
|
highlight: bool = False,
|
590
622
|
) -> KnowledgeboxSuggestResults:
|
591
623
|
api_results = KnowledgeboxSuggestResults()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: nucliadb
|
3
|
-
Version: 6.2.1.
|
3
|
+
Version: 6.2.1.post2842
|
4
4
|
Home-page: https://docs.nuclia.dev/docs/management/nucliadb/intro
|
5
5
|
Author: NucliaDB Community
|
6
6
|
Author-email: nucliadb@nuclia.com
|
@@ -22,10 +22,10 @@ Classifier: Programming Language :: Python :: 3.12
|
|
22
22
|
Classifier: Programming Language :: Python :: 3 :: Only
|
23
23
|
Requires-Python: >=3.9, <4
|
24
24
|
Description-Content-Type: text/markdown
|
25
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.2.1.
|
26
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.
|
27
|
-
Requires-Dist: nucliadb-protos>=6.2.1.
|
28
|
-
Requires-Dist: nucliadb-models>=6.2.1.
|
25
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post2842
|
26
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post2842
|
27
|
+
Requires-Dist: nucliadb-protos>=6.2.1.post2842
|
28
|
+
Requires-Dist: nucliadb-models>=6.2.1.post2842
|
29
29
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
30
30
|
Requires-Dist: nucliadb-node-binding>=2.26.0
|
31
31
|
Requires-Dist: nuclia-models>=0.24.2
|
@@ -38,7 +38,7 @@ nucliadb/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
38
38
|
nucliadb/common/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
39
39
|
nucliadb/common/constants.py,sha256=QpigxJh_CtD85Evy0PtV5cVq6x0U_f9xfIcXz1ymkUg,869
|
40
40
|
nucliadb/common/counters.py,sha256=yhJEmmrglTSrDmB8OjaFLkZ__TwhTxayyQrtacnB55I,957
|
41
|
-
nucliadb/common/ids.py,sha256=
|
41
|
+
nucliadb/common/ids.py,sha256=HMb213Kz9HaY4IsBwaQJFhUErntKWV-29s0UHaGcf1E,8004
|
42
42
|
nucliadb/common/locking.py,sha256=RL0CabZVPzxHZyUjYeUyLvsJTm7W3J9o4fEgsY_ufNc,5896
|
43
43
|
nucliadb/common/nidx.py,sha256=D74oNdniWjbc6gBBDwZP74NH-egTIORHhbfzgIto8DE,8667
|
44
44
|
nucliadb/common/cluster/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
@@ -80,7 +80,7 @@ nucliadb/common/datamanagers/synonyms.py,sha256=zk3GEH38KF5vV_VcuL6DCg-2JwgXJfQl
|
|
80
80
|
nucliadb/common/datamanagers/utils.py,sha256=McHlXvE4P3x-bBY3pr0n8djbTDQvI1G5WusJrnRdhLA,1827
|
81
81
|
nucliadb/common/datamanagers/vectorsets.py,sha256=XgHNQRw13GpWWymE6qu_ymdzuwL6hDiBKq50fN_sEMM,4007
|
82
82
|
nucliadb/common/external_index_providers/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
83
|
-
nucliadb/common/external_index_providers/base.py,sha256=
|
83
|
+
nucliadb/common/external_index_providers/base.py,sha256=yfPkCigT4unXFvAyzy1tXSy2UgWC481GcZAS9bdE4NI,8871
|
84
84
|
nucliadb/common/external_index_providers/exceptions.py,sha256=nDhhOIkb66hjCrBk4Spvl2vN1SuW5gbwrMCDmrdjHHE,1209
|
85
85
|
nucliadb/common/external_index_providers/manager.py,sha256=aFSrrKKYG1ydpTSyq4zYD0LOxFS7P-CO6rcKC0hiF4I,4267
|
86
86
|
nucliadb/common/external_index_providers/pinecone.py,sha256=afglJq6FfifFNTONGrFBCe5yuEL2h3pDFwZkKiA4_6o,39802
|
@@ -203,7 +203,7 @@ nucliadb/search/api/v1/knowledgebox.py,sha256=uPaMF5c1168Owd6fy_x7IUlcTBzDp2Qm6i
|
|
203
203
|
nucliadb/search/api/v1/predict_proxy.py,sha256=QrGzo0hKjtmyGZ6pjlJHYAh4hxwVUIOTcVcerRCw7eE,3047
|
204
204
|
nucliadb/search/api/v1/router.py,sha256=mtT07rBZcVfpa49doaw9b1tj3sdi3qLH0gn9Io6NYM0,988
|
205
205
|
nucliadb/search/api/v1/search.py,sha256=vCj5V9kozoti0JrgU_XJhTcBucWzI4SY1B0yCSj9EQw,13638
|
206
|
-
nucliadb/search/api/v1/suggest.py,sha256=
|
206
|
+
nucliadb/search/api/v1/suggest.py,sha256=S0YUTAWukzZSYZJzN3T5MUgPM3599HQvG76GOCBuAbQ,5907
|
207
207
|
nucliadb/search/api/v1/summarize.py,sha256=VAHJvE6V3xUgEBfqNKhgoxmDqCvh30RnrEIBVhMcNLU,2499
|
208
208
|
nucliadb/search/api/v1/utils.py,sha256=5Ve-frn7LAE2jqAgB85F8RSeqxDlyA08--gS-AdOLS4,1434
|
209
209
|
nucliadb/search/api/v1/resource/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
@@ -219,8 +219,9 @@ nucliadb/search/search/fetch.py,sha256=XJHIFnZmXM_8Kb37lb4lg1GYG7cZ1plT-qAIb_Qzi
|
|
219
219
|
nucliadb/search/search/filters.py,sha256=1MkHlJjAQqoRCj7e5cEzK2HvBxGLE17I_omsjiklbtw,6476
|
220
220
|
nucliadb/search/search/find.py,sha256=yQbttt85wQFc4NEaj2RNGgozP7IQx_bjAOhHke3fXY0,9890
|
221
221
|
nucliadb/search/search/find_merge.py,sha256=_R_YpHAZv5BHh3XABQ8MRd1Ci0seclGYf26yJHJ7H0I,17178
|
222
|
-
nucliadb/search/search/
|
223
|
-
nucliadb/search/search/
|
222
|
+
nucliadb/search/search/graph_strategy.py,sha256=Yw6gPiIcKSj6HtbBcIT_5Poypc7AHkqKhcf8RRzJMnI,33513
|
223
|
+
nucliadb/search/search/hydrator.py,sha256=-R37gCrGxkyaiHQalnTWHNG_FCx11Zucd7qA1vQCxuw,6985
|
224
|
+
nucliadb/search/search/merge.py,sha256=g0PxUejWtYIYWG-VroArMCgwB6AOp3lZMkzoHAPYBKE,22183
|
224
225
|
nucliadb/search/search/metrics.py,sha256=81X-tahGW4n2CLvUzCPdNxNClmZqUWZjcVOGCUHoiUM,2872
|
225
226
|
nucliadb/search/search/paragraphs.py,sha256=pNAEiYqJGGUVcEf7xf-PFMVqz0PX4Qb-WNG-_zPGN2o,7799
|
226
227
|
nucliadb/search/search/pgcatalog.py,sha256=IaNK4dAxdXs38PoIkTdgqMDuZDjeiOtcXn3LeaT-OMw,8855
|
@@ -232,11 +233,11 @@ nucliadb/search/search/shards.py,sha256=mM2aCHWhl_gwkCENXDShPukS-_qnB5tFS3UAJuzM
|
|
232
233
|
nucliadb/search/search/summarize.py,sha256=ksmYPubEQvAQgfPdZHfzB_rR19B2ci4IYZ6jLdHxZo8,4996
|
233
234
|
nucliadb/search/search/utils.py,sha256=iF2tbBA56gRMJH1TlE2hMrqeXqjoeOPt4KgRdp2m9Ek,3313
|
234
235
|
nucliadb/search/search/chat/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
235
|
-
nucliadb/search/search/chat/ask.py,sha256=
|
236
|
+
nucliadb/search/search/chat/ask.py,sha256=4PHueXt4dMF6NHuBYII--reCslbAVCzAE4LgWFAiKdY,35143
|
236
237
|
nucliadb/search/search/chat/exceptions.py,sha256=Siy4GXW2L7oPhIR86H3WHBhE9lkV4A4YaAszuGGUf54,1356
|
237
238
|
nucliadb/search/search/chat/images.py,sha256=PA8VWxT5_HUGfW1ULhKTK46UBsVyINtWWqEM1ulzX1E,3095
|
238
|
-
nucliadb/search/search/chat/prompt.py,sha256=
|
239
|
-
nucliadb/search/search/chat/query.py,sha256=
|
239
|
+
nucliadb/search/search/chat/prompt.py,sha256=HHzBOMUDta7We_zQqpI-eBLNCFdB0BfYDhijvTmw1k0,46952
|
240
|
+
nucliadb/search/search/chat/query.py,sha256=4cmTxnqnvQGPDKTdaNJL4Au8aop1reHLXzkGNhf4NWg,15345
|
240
241
|
nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
241
242
|
nucliadb/search/search/query_parser/exceptions.py,sha256=tuzl7ZyvVsRz6u0_3zMe60vx39nd3pi641prs-5nC0E,872
|
242
243
|
nucliadb/search/search/query_parser/models.py,sha256=-VlCDXUCgOroAZw1Leqhj2VMgRv_CD2w40PXXOBLaUM,2332
|
@@ -336,9 +337,9 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
|
|
336
337
|
nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
|
337
338
|
nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
|
338
339
|
nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
|
339
|
-
nucliadb-6.2.1.
|
340
|
-
nucliadb-6.2.1.
|
341
|
-
nucliadb-6.2.1.
|
342
|
-
nucliadb-6.2.1.
|
343
|
-
nucliadb-6.2.1.
|
344
|
-
nucliadb-6.2.1.
|
340
|
+
nucliadb-6.2.1.post2842.dist-info/METADATA,sha256=CmYjCtJ5NGjNjopTPvSGiJNDrhzypADVZGncMsY834k,4689
|
341
|
+
nucliadb-6.2.1.post2842.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
342
|
+
nucliadb-6.2.1.post2842.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
|
343
|
+
nucliadb-6.2.1.post2842.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
|
344
|
+
nucliadb-6.2.1.post2842.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
345
|
+
nucliadb-6.2.1.post2842.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|