nucliadb 6.2.1.post3063__py3-none-any.whl → 6.2.1.post3067__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucliadb/search/search/chat/ask.py +19 -14
- nucliadb/search/search/graph_strategy.py +25 -49
- nucliadb/search/search/merge.py +13 -0
- {nucliadb-6.2.1.post3063.dist-info → nucliadb-6.2.1.post3067.dist-info}/METADATA +5 -5
- {nucliadb-6.2.1.post3063.dist-info → nucliadb-6.2.1.post3067.dist-info}/RECORD +9 -9
- {nucliadb-6.2.1.post3063.dist-info → nucliadb-6.2.1.post3067.dist-info}/WHEEL +0 -0
- {nucliadb-6.2.1.post3063.dist-info → nucliadb-6.2.1.post3067.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.2.1.post3063.dist-info → nucliadb-6.2.1.post3067.dist-info}/top_level.txt +0 -0
- {nucliadb-6.2.1.post3063.dist-info → nucliadb-6.2.1.post3067.dist-info}/zip-safe +0 -0
@@ -719,9 +719,19 @@ async def retrieval_in_kb(
|
|
719
719
|
prequeries = parse_prequeries(ask_request)
|
720
720
|
graph_strategy = parse_graph_strategy(ask_request)
|
721
721
|
with metrics.time("retrieval"):
|
722
|
-
prequeries_results =
|
722
|
+
main_results, prequeries_results, query_parser = await get_find_results(
|
723
|
+
kbid=kbid,
|
724
|
+
query=main_query,
|
725
|
+
item=ask_request,
|
726
|
+
ndb_client=client_type,
|
727
|
+
user=user_id,
|
728
|
+
origin=origin,
|
729
|
+
metrics=metrics,
|
730
|
+
prequeries_strategy=prequeries,
|
731
|
+
)
|
732
|
+
|
723
733
|
if graph_strategy is not None:
|
724
|
-
|
734
|
+
graph_results, graph_request = await get_graph_results(
|
725
735
|
kbid=kbid,
|
726
736
|
query=main_query,
|
727
737
|
item=ask_request,
|
@@ -732,18 +742,13 @@ async def retrieval_in_kb(
|
|
732
742
|
metrics=metrics,
|
733
743
|
shards=ask_request.shards,
|
734
744
|
)
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
user=user_id,
|
743
|
-
origin=origin,
|
744
|
-
metrics=metrics,
|
745
|
-
prequeries_strategy=prequeries,
|
746
|
-
)
|
745
|
+
|
746
|
+
if prequeries_results is None:
|
747
|
+
prequeries_results = []
|
748
|
+
|
749
|
+
prequery = PreQuery(id="graph", request=graph_request, weight=graph_strategy.weight)
|
750
|
+
prequeries_results.append((prequery, graph_results))
|
751
|
+
|
747
752
|
if len(main_results.resources) == 0 and all(
|
748
753
|
len(prequery_result.resources) == 0 for (_, prequery_result) in prequeries_results or []
|
749
754
|
):
|
@@ -17,11 +17,9 @@
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
|
20
|
-
import asyncio
|
21
20
|
import heapq
|
22
21
|
import json
|
23
22
|
from collections import defaultdict
|
24
|
-
from datetime import datetime
|
25
23
|
from typing import Any, Collection, Iterable, Optional, Union
|
26
24
|
|
27
25
|
from nuclia_models.predict.generative_responses import (
|
@@ -46,9 +44,8 @@ from nucliadb.search.search.find_merge import (
|
|
46
44
|
hydrate_and_rerank,
|
47
45
|
)
|
48
46
|
from nucliadb.search.search.hydrator import ResourceHydrationOptions, TextBlockHydrationOptions
|
49
|
-
from nucliadb.search.search.merge import
|
47
|
+
from nucliadb.search.search.merge import merge_relation_prefix_results
|
50
48
|
from nucliadb.search.search.metrics import RAGMetrics
|
51
|
-
from nucliadb.search.search.query import QueryParser
|
52
49
|
from nucliadb.search.search.rerankers import Reranker, RerankingOptions
|
53
50
|
from nucliadb.search.utilities import get_predict
|
54
51
|
from nucliadb_models.common import FieldTypeName
|
@@ -62,11 +59,12 @@ from nucliadb_models.search import (
|
|
62
59
|
ChatModel,
|
63
60
|
DirectionalRelation,
|
64
61
|
EntitySubgraph,
|
62
|
+
FindRequest,
|
65
63
|
GraphStrategy,
|
66
64
|
KnowledgeboxFindResults,
|
67
|
-
KnowledgeboxSuggestResults,
|
68
65
|
NucliaDBClientType,
|
69
66
|
QueryEntityDetection,
|
67
|
+
RelatedEntities,
|
70
68
|
RelationDirection,
|
71
69
|
RelationRanking,
|
72
70
|
Relations,
|
@@ -308,7 +306,7 @@ async def get_graph_results(
|
|
308
306
|
generative_model: Optional[str] = None,
|
309
307
|
metrics: RAGMetrics = RAGMetrics(),
|
310
308
|
shards: Optional[list[str]] = None,
|
311
|
-
) -> tuple[KnowledgeboxFindResults,
|
309
|
+
) -> tuple[KnowledgeboxFindResults, FindRequest]:
|
312
310
|
relations = Relations(entities={})
|
313
311
|
explored_entities: set[str] = set()
|
314
312
|
scores: dict[str, list[float]] = {}
|
@@ -321,23 +319,18 @@ async def get_graph_results(
|
|
321
319
|
# Get the entities from the query
|
322
320
|
with metrics.time("graph_strat_query_entities"):
|
323
321
|
if graph_strategy.query_entity_detection == QueryEntityDetection.SUGGEST:
|
324
|
-
|
322
|
+
relation_result = await fuzzy_search_entities(
|
325
323
|
kbid=kbid,
|
326
324
|
query=query,
|
327
|
-
range_creation_start=item.range_creation_start,
|
328
|
-
range_creation_end=item.range_creation_end,
|
329
|
-
range_modification_start=item.range_modification_start,
|
330
|
-
range_modification_end=item.range_modification_end,
|
331
|
-
target_shard_replicas=shards,
|
332
325
|
)
|
333
|
-
if
|
326
|
+
if relation_result is not None:
|
334
327
|
entities_to_explore = (
|
335
328
|
RelationNode(
|
336
329
|
ntype=RelationNode.NodeType.ENTITY,
|
337
330
|
value=result.value,
|
338
331
|
subtype=result.family,
|
339
332
|
)
|
340
|
-
for result in
|
333
|
+
for result in relation_result.entities
|
341
334
|
)
|
342
335
|
elif (
|
343
336
|
not entities_to_explore
|
@@ -361,6 +354,7 @@ async def get_graph_results(
|
|
361
354
|
for relation in subgraph.related_to
|
362
355
|
if relation.entity not in explored_entities
|
363
356
|
)
|
357
|
+
|
364
358
|
# Get the relations for the new entities
|
365
359
|
with metrics.time("graph_strat_neighbor_relations"):
|
366
360
|
try:
|
@@ -437,54 +431,36 @@ async def get_graph_results(
|
|
437
431
|
field_type_filter=find_request.field_type_filter,
|
438
432
|
relation_text_as_paragraphs=graph_strategy.relation_text_as_paragraphs,
|
439
433
|
)
|
440
|
-
return find_results,
|
434
|
+
return find_results, find_request
|
441
435
|
|
442
436
|
|
443
437
|
async def fuzzy_search_entities(
|
444
438
|
kbid: str,
|
445
439
|
query: str,
|
446
|
-
|
447
|
-
range_creation_end: Optional[datetime] = None,
|
448
|
-
range_modification_start: Optional[datetime] = None,
|
449
|
-
range_modification_end: Optional[datetime] = None,
|
450
|
-
target_shard_replicas: Optional[list[str]] = None,
|
451
|
-
) -> KnowledgeboxSuggestResults:
|
440
|
+
) -> Optional[RelatedEntities]:
|
452
441
|
"""Fuzzy find entities in KB given a query using the same methodology as /suggest, but split by words."""
|
453
442
|
|
454
|
-
|
455
|
-
|
456
|
-
)
|
457
|
-
if range_creation_start is not None:
|
458
|
-
base_request.timestamps.from_created.FromDatetime(range_creation_start)
|
459
|
-
if range_creation_end is not None:
|
460
|
-
base_request.timestamps.to_created.FromDatetime(range_creation_end)
|
461
|
-
if range_modification_start is not None:
|
462
|
-
base_request.timestamps.from_modified.FromDatetime(range_modification_start)
|
463
|
-
if range_modification_end is not None:
|
464
|
-
base_request.timestamps.to_modified.FromDatetime(range_modification_end)
|
465
|
-
|
466
|
-
tasks = []
|
467
|
-
# XXX: Splitting by words is not ideal, in the future, modify suggest to better handle this
|
468
|
-
for word in query.split():
|
469
|
-
if len(word) < 3:
|
470
|
-
continue
|
471
|
-
request = nodereader_pb2.SuggestRequest()
|
472
|
-
request.CopyFrom(base_request)
|
473
|
-
request.body = word
|
474
|
-
tasks.append(
|
475
|
-
node_query(kbid, Method.SUGGEST, request, target_shard_replicas=target_shard_replicas)
|
476
|
-
)
|
443
|
+
request = nodereader_pb2.SearchRequest()
|
444
|
+
request.relation_prefix.query = query
|
477
445
|
|
446
|
+
results: list[nodereader_pb2.SearchResponse]
|
478
447
|
try:
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
448
|
+
(
|
449
|
+
results,
|
450
|
+
_,
|
451
|
+
_,
|
452
|
+
) = await node_query(
|
453
|
+
kbid,
|
454
|
+
Method.SEARCH,
|
455
|
+
request,
|
456
|
+
use_read_replica_nodes=True,
|
457
|
+
retry_on_primary=False,
|
483
458
|
)
|
459
|
+
return merge_relation_prefix_results(results)
|
484
460
|
except Exception as e:
|
485
461
|
capture_exception(e)
|
486
462
|
logger.exception("Error in finding entities in query for graph strategy")
|
487
|
-
return
|
463
|
+
return None
|
488
464
|
|
489
465
|
|
490
466
|
async def rank_relations_reranker(
|
nucliadb/search/search/merge.py
CHANGED
@@ -615,6 +615,19 @@ async def merge_suggest_entities_results(
|
|
615
615
|
return RelatedEntities(entities=list(unique_entities), total=len(unique_entities))
|
616
616
|
|
617
617
|
|
618
|
+
def merge_relation_prefix_results(
|
619
|
+
responses: list[SearchResponse],
|
620
|
+
) -> RelatedEntities:
|
621
|
+
unique_entities: Set[RelatedEntity] = set()
|
622
|
+
for response in responses:
|
623
|
+
response_entities = (
|
624
|
+
RelatedEntity(family=e.subtype, value=e.value) for e in response.relation.prefix.nodes
|
625
|
+
)
|
626
|
+
unique_entities.update(response_entities)
|
627
|
+
|
628
|
+
return RelatedEntities(entities=list(unique_entities), total=len(unique_entities))
|
629
|
+
|
630
|
+
|
618
631
|
async def merge_suggest_results(
|
619
632
|
suggest_responses: list[SuggestResponse],
|
620
633
|
kbid: str,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: nucliadb
|
3
|
-
Version: 6.2.1.
|
3
|
+
Version: 6.2.1.post3067
|
4
4
|
Home-page: https://docs.nuclia.dev/docs/management/nucliadb/intro
|
5
5
|
Author: NucliaDB Community
|
6
6
|
Author-email: nucliadb@nuclia.com
|
@@ -22,10 +22,10 @@ Classifier: Programming Language :: Python :: 3.12
|
|
22
22
|
Classifier: Programming Language :: Python :: 3 :: Only
|
23
23
|
Requires-Python: >=3.9, <4
|
24
24
|
Description-Content-Type: text/markdown
|
25
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.2.1.
|
26
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.
|
27
|
-
Requires-Dist: nucliadb-protos>=6.2.1.
|
28
|
-
Requires-Dist: nucliadb-models>=6.2.1.
|
25
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post3067
|
26
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post3067
|
27
|
+
Requires-Dist: nucliadb-protos>=6.2.1.post3067
|
28
|
+
Requires-Dist: nucliadb-models>=6.2.1.post3067
|
29
29
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
30
30
|
Requires-Dist: nuclia-models>=0.24.2
|
31
31
|
Requires-Dist: uvicorn
|
@@ -210,9 +210,9 @@ nucliadb/search/search/fetch.py,sha256=XJHIFnZmXM_8Kb37lb4lg1GYG7cZ1plT-qAIb_Qzi
|
|
210
210
|
nucliadb/search/search/filters.py,sha256=1MkHlJjAQqoRCj7e5cEzK2HvBxGLE17I_omsjiklbtw,6476
|
211
211
|
nucliadb/search/search/find.py,sha256=yQbttt85wQFc4NEaj2RNGgozP7IQx_bjAOhHke3fXY0,9890
|
212
212
|
nucliadb/search/search/find_merge.py,sha256=_R_YpHAZv5BHh3XABQ8MRd1Ci0seclGYf26yJHJ7H0I,17178
|
213
|
-
nucliadb/search/search/graph_strategy.py,sha256=
|
213
|
+
nucliadb/search/search/graph_strategy.py,sha256=cFcu6nrOqHfobsbUu1pOwfBndrN4ppEvv3-4jV4_4bg,31977
|
214
214
|
nucliadb/search/search/hydrator.py,sha256=-R37gCrGxkyaiHQalnTWHNG_FCx11Zucd7qA1vQCxuw,6985
|
215
|
-
nucliadb/search/search/merge.py,sha256=
|
215
|
+
nucliadb/search/search/merge.py,sha256=i_PTBFRqC5iTTziOMEltxLIlmokIou5hjjgR4BnoLBE,22635
|
216
216
|
nucliadb/search/search/metrics.py,sha256=81X-tahGW4n2CLvUzCPdNxNClmZqUWZjcVOGCUHoiUM,2872
|
217
217
|
nucliadb/search/search/paragraphs.py,sha256=pNAEiYqJGGUVcEf7xf-PFMVqz0PX4Qb-WNG-_zPGN2o,7799
|
218
218
|
nucliadb/search/search/pgcatalog.py,sha256=IaNK4dAxdXs38PoIkTdgqMDuZDjeiOtcXn3LeaT-OMw,8855
|
@@ -224,7 +224,7 @@ nucliadb/search/search/shards.py,sha256=JSRSrHgHcF4sXyuZZoJdMfK0v_LHpoSRf1lCr5-K
|
|
224
224
|
nucliadb/search/search/summarize.py,sha256=ksmYPubEQvAQgfPdZHfzB_rR19B2ci4IYZ6jLdHxZo8,4996
|
225
225
|
nucliadb/search/search/utils.py,sha256=iF2tbBA56gRMJH1TlE2hMrqeXqjoeOPt4KgRdp2m9Ek,3313
|
226
226
|
nucliadb/search/search/chat/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
227
|
-
nucliadb/search/search/chat/ask.py,sha256=
|
227
|
+
nucliadb/search/search/chat/ask.py,sha256=K85Size6WAb-q4sCn0u1drrPnqIvqCy6YbfCxQmsEt8,36348
|
228
228
|
nucliadb/search/search/chat/exceptions.py,sha256=Siy4GXW2L7oPhIR86H3WHBhE9lkV4A4YaAszuGGUf54,1356
|
229
229
|
nucliadb/search/search/chat/images.py,sha256=PA8VWxT5_HUGfW1ULhKTK46UBsVyINtWWqEM1ulzX1E,3095
|
230
230
|
nucliadb/search/search/chat/prompt.py,sha256=r2JTiRWH3YHPdeRAG5w6gD0g0fWVxdTjYIR86qAVa7k,47106
|
@@ -329,9 +329,9 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
|
|
329
329
|
nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
|
330
330
|
nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
|
331
331
|
nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
|
332
|
-
nucliadb-6.2.1.
|
333
|
-
nucliadb-6.2.1.
|
334
|
-
nucliadb-6.2.1.
|
335
|
-
nucliadb-6.2.1.
|
336
|
-
nucliadb-6.2.1.
|
337
|
-
nucliadb-6.2.1.
|
332
|
+
nucliadb-6.2.1.post3067.dist-info/METADATA,sha256=V33nguAqDHoZDoT_3Vfc2w3HTnobJN1C_uB-tlKWGMU,4603
|
333
|
+
nucliadb-6.2.1.post3067.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
334
|
+
nucliadb-6.2.1.post3067.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
|
335
|
+
nucliadb-6.2.1.post3067.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
|
336
|
+
nucliadb-6.2.1.post3067.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
337
|
+
nucliadb-6.2.1.post3067.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|