nucliadb 6.2.1.post3059__py3-none-any.whl → 6.2.1.post3067__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -719,9 +719,19 @@ async def retrieval_in_kb(
719
719
  prequeries = parse_prequeries(ask_request)
720
720
  graph_strategy = parse_graph_strategy(ask_request)
721
721
  with metrics.time("retrieval"):
722
- prequeries_results = None
722
+ main_results, prequeries_results, query_parser = await get_find_results(
723
+ kbid=kbid,
724
+ query=main_query,
725
+ item=ask_request,
726
+ ndb_client=client_type,
727
+ user=user_id,
728
+ origin=origin,
729
+ metrics=metrics,
730
+ prequeries_strategy=prequeries,
731
+ )
732
+
723
733
  if graph_strategy is not None:
724
- main_results, query_parser = await get_graph_results(
734
+ graph_results, graph_request = await get_graph_results(
725
735
  kbid=kbid,
726
736
  query=main_query,
727
737
  item=ask_request,
@@ -732,18 +742,13 @@ async def retrieval_in_kb(
732
742
  metrics=metrics,
733
743
  shards=ask_request.shards,
734
744
  )
735
- # TODO (oni): Fallback to normal retrieval if no graph results are found
736
- else:
737
- main_results, prequeries_results, query_parser = await get_find_results(
738
- kbid=kbid,
739
- query=main_query,
740
- item=ask_request,
741
- ndb_client=client_type,
742
- user=user_id,
743
- origin=origin,
744
- metrics=metrics,
745
- prequeries_strategy=prequeries,
746
- )
745
+
746
+ if prequeries_results is None:
747
+ prequeries_results = []
748
+
749
+ prequery = PreQuery(id="graph", request=graph_request, weight=graph_strategy.weight)
750
+ prequeries_results.append((prequery, graph_results))
751
+
747
752
  if len(main_results.resources) == 0 and all(
748
753
  len(prequery_result.resources) == 0 for (_, prequery_result) in prequeries_results or []
749
754
  ):
@@ -17,11 +17,9 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
 
20
- import asyncio
21
20
  import heapq
22
21
  import json
23
22
  from collections import defaultdict
24
- from datetime import datetime
25
23
  from typing import Any, Collection, Iterable, Optional, Union
26
24
 
27
25
  from nuclia_models.predict.generative_responses import (
@@ -46,9 +44,8 @@ from nucliadb.search.search.find_merge import (
46
44
  hydrate_and_rerank,
47
45
  )
48
46
  from nucliadb.search.search.hydrator import ResourceHydrationOptions, TextBlockHydrationOptions
49
- from nucliadb.search.search.merge import merge_suggest_results
47
+ from nucliadb.search.search.merge import merge_relation_prefix_results
50
48
  from nucliadb.search.search.metrics import RAGMetrics
51
- from nucliadb.search.search.query import QueryParser
52
49
  from nucliadb.search.search.rerankers import Reranker, RerankingOptions
53
50
  from nucliadb.search.utilities import get_predict
54
51
  from nucliadb_models.common import FieldTypeName
@@ -62,11 +59,12 @@ from nucliadb_models.search import (
62
59
  ChatModel,
63
60
  DirectionalRelation,
64
61
  EntitySubgraph,
62
+ FindRequest,
65
63
  GraphStrategy,
66
64
  KnowledgeboxFindResults,
67
- KnowledgeboxSuggestResults,
68
65
  NucliaDBClientType,
69
66
  QueryEntityDetection,
67
+ RelatedEntities,
70
68
  RelationDirection,
71
69
  RelationRanking,
72
70
  Relations,
@@ -308,7 +306,7 @@ async def get_graph_results(
308
306
  generative_model: Optional[str] = None,
309
307
  metrics: RAGMetrics = RAGMetrics(),
310
308
  shards: Optional[list[str]] = None,
311
- ) -> tuple[KnowledgeboxFindResults, QueryParser]:
309
+ ) -> tuple[KnowledgeboxFindResults, FindRequest]:
312
310
  relations = Relations(entities={})
313
311
  explored_entities: set[str] = set()
314
312
  scores: dict[str, list[float]] = {}
@@ -321,23 +319,18 @@ async def get_graph_results(
321
319
  # Get the entities from the query
322
320
  with metrics.time("graph_strat_query_entities"):
323
321
  if graph_strategy.query_entity_detection == QueryEntityDetection.SUGGEST:
324
- suggest_result = await fuzzy_search_entities(
322
+ relation_result = await fuzzy_search_entities(
325
323
  kbid=kbid,
326
324
  query=query,
327
- range_creation_start=item.range_creation_start,
328
- range_creation_end=item.range_creation_end,
329
- range_modification_start=item.range_modification_start,
330
- range_modification_end=item.range_modification_end,
331
- target_shard_replicas=shards,
332
325
  )
333
- if suggest_result.entities is not None:
326
+ if relation_result is not None:
334
327
  entities_to_explore = (
335
328
  RelationNode(
336
329
  ntype=RelationNode.NodeType.ENTITY,
337
330
  value=result.value,
338
331
  subtype=result.family,
339
332
  )
340
- for result in suggest_result.entities.entities
333
+ for result in relation_result.entities
341
334
  )
342
335
  elif (
343
336
  not entities_to_explore
@@ -361,6 +354,7 @@ async def get_graph_results(
361
354
  for relation in subgraph.related_to
362
355
  if relation.entity not in explored_entities
363
356
  )
357
+
364
358
  # Get the relations for the new entities
365
359
  with metrics.time("graph_strat_neighbor_relations"):
366
360
  try:
@@ -437,54 +431,36 @@ async def get_graph_results(
437
431
  field_type_filter=find_request.field_type_filter,
438
432
  relation_text_as_paragraphs=graph_strategy.relation_text_as_paragraphs,
439
433
  )
440
- return find_results, query_parser
434
+ return find_results, find_request
441
435
 
442
436
 
443
437
  async def fuzzy_search_entities(
444
438
  kbid: str,
445
439
  query: str,
446
- range_creation_start: Optional[datetime] = None,
447
- range_creation_end: Optional[datetime] = None,
448
- range_modification_start: Optional[datetime] = None,
449
- range_modification_end: Optional[datetime] = None,
450
- target_shard_replicas: Optional[list[str]] = None,
451
- ) -> KnowledgeboxSuggestResults:
440
+ ) -> Optional[RelatedEntities]:
452
441
  """Fuzzy find entities in KB given a query using the same methodology as /suggest, but split by words."""
453
442
 
454
- base_request = nodereader_pb2.SuggestRequest(
455
- body="", features=[nodereader_pb2.SuggestFeatures.ENTITIES]
456
- )
457
- if range_creation_start is not None:
458
- base_request.timestamps.from_created.FromDatetime(range_creation_start)
459
- if range_creation_end is not None:
460
- base_request.timestamps.to_created.FromDatetime(range_creation_end)
461
- if range_modification_start is not None:
462
- base_request.timestamps.from_modified.FromDatetime(range_modification_start)
463
- if range_modification_end is not None:
464
- base_request.timestamps.to_modified.FromDatetime(range_modification_end)
465
-
466
- tasks = []
467
- # XXX: Splitting by words is not ideal, in the future, modify suggest to better handle this
468
- for word in query.split():
469
- if len(word) < 3:
470
- continue
471
- request = nodereader_pb2.SuggestRequest()
472
- request.CopyFrom(base_request)
473
- request.body = word
474
- tasks.append(
475
- node_query(kbid, Method.SUGGEST, request, target_shard_replicas=target_shard_replicas)
476
- )
443
+ request = nodereader_pb2.SearchRequest()
444
+ request.relation_prefix.query = query
477
445
 
446
+ results: list[nodereader_pb2.SearchResponse]
478
447
  try:
479
- results_raw = await asyncio.gather(*tasks)
480
- return await merge_suggest_results(
481
- [item for r in results_raw for item in r[0]],
482
- kbid=kbid,
448
+ (
449
+ results,
450
+ _,
451
+ _,
452
+ ) = await node_query(
453
+ kbid,
454
+ Method.SEARCH,
455
+ request,
456
+ use_read_replica_nodes=True,
457
+ retry_on_primary=False,
483
458
  )
459
+ return merge_relation_prefix_results(results)
484
460
  except Exception as e:
485
461
  capture_exception(e)
486
462
  logger.exception("Error in finding entities in query for graph strategy")
487
- return KnowledgeboxSuggestResults(entities=None)
463
+ return None
488
464
 
489
465
 
490
466
  async def rank_relations_reranker(
@@ -615,6 +615,19 @@ async def merge_suggest_entities_results(
615
615
  return RelatedEntities(entities=list(unique_entities), total=len(unique_entities))
616
616
 
617
617
 
618
+ def merge_relation_prefix_results(
619
+ responses: list[SearchResponse],
620
+ ) -> RelatedEntities:
621
+ unique_entities: Set[RelatedEntity] = set()
622
+ for response in responses:
623
+ response_entities = (
624
+ RelatedEntity(family=e.subtype, value=e.value) for e in response.relation.prefix.nodes
625
+ )
626
+ unique_entities.update(response_entities)
627
+
628
+ return RelatedEntities(entities=list(unique_entities), total=len(unique_entities))
629
+
630
+
618
631
  async def merge_suggest_results(
619
632
  suggest_responses: list[SuggestResponse],
620
633
  kbid: str,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: nucliadb
3
- Version: 6.2.1.post3059
3
+ Version: 6.2.1.post3067
4
4
  Home-page: https://docs.nuclia.dev/docs/management/nucliadb/intro
5
5
  Author: NucliaDB Community
6
6
  Author-email: nucliadb@nuclia.com
@@ -22,10 +22,10 @@ Classifier: Programming Language :: Python :: 3.12
22
22
  Classifier: Programming Language :: Python :: 3 :: Only
23
23
  Requires-Python: >=3.9, <4
24
24
  Description-Content-Type: text/markdown
25
- Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post3059
26
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post3059
27
- Requires-Dist: nucliadb-protos>=6.2.1.post3059
28
- Requires-Dist: nucliadb-models>=6.2.1.post3059
25
+ Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post3067
26
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post3067
27
+ Requires-Dist: nucliadb-protos>=6.2.1.post3067
28
+ Requires-Dist: nucliadb-models>=6.2.1.post3067
29
29
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
30
30
  Requires-Dist: nuclia-models>=0.24.2
31
31
  Requires-Dist: uvicorn
@@ -210,9 +210,9 @@ nucliadb/search/search/fetch.py,sha256=XJHIFnZmXM_8Kb37lb4lg1GYG7cZ1plT-qAIb_Qzi
210
210
  nucliadb/search/search/filters.py,sha256=1MkHlJjAQqoRCj7e5cEzK2HvBxGLE17I_omsjiklbtw,6476
211
211
  nucliadb/search/search/find.py,sha256=yQbttt85wQFc4NEaj2RNGgozP7IQx_bjAOhHke3fXY0,9890
212
212
  nucliadb/search/search/find_merge.py,sha256=_R_YpHAZv5BHh3XABQ8MRd1Ci0seclGYf26yJHJ7H0I,17178
213
- nucliadb/search/search/graph_strategy.py,sha256=6d-KjGDbOnaXQzEwyBpA-iQM0rkveVTiK3A3m2UJq8Q,33538
213
+ nucliadb/search/search/graph_strategy.py,sha256=cFcu6nrOqHfobsbUu1pOwfBndrN4ppEvv3-4jV4_4bg,31977
214
214
  nucliadb/search/search/hydrator.py,sha256=-R37gCrGxkyaiHQalnTWHNG_FCx11Zucd7qA1vQCxuw,6985
215
- nucliadb/search/search/merge.py,sha256=g0PxUejWtYIYWG-VroArMCgwB6AOp3lZMkzoHAPYBKE,22183
215
+ nucliadb/search/search/merge.py,sha256=i_PTBFRqC5iTTziOMEltxLIlmokIou5hjjgR4BnoLBE,22635
216
216
  nucliadb/search/search/metrics.py,sha256=81X-tahGW4n2CLvUzCPdNxNClmZqUWZjcVOGCUHoiUM,2872
217
217
  nucliadb/search/search/paragraphs.py,sha256=pNAEiYqJGGUVcEf7xf-PFMVqz0PX4Qb-WNG-_zPGN2o,7799
218
218
  nucliadb/search/search/pgcatalog.py,sha256=IaNK4dAxdXs38PoIkTdgqMDuZDjeiOtcXn3LeaT-OMw,8855
@@ -224,7 +224,7 @@ nucliadb/search/search/shards.py,sha256=JSRSrHgHcF4sXyuZZoJdMfK0v_LHpoSRf1lCr5-K
224
224
  nucliadb/search/search/summarize.py,sha256=ksmYPubEQvAQgfPdZHfzB_rR19B2ci4IYZ6jLdHxZo8,4996
225
225
  nucliadb/search/search/utils.py,sha256=iF2tbBA56gRMJH1TlE2hMrqeXqjoeOPt4KgRdp2m9Ek,3313
226
226
  nucliadb/search/search/chat/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
227
- nucliadb/search/search/chat/ask.py,sha256=tE1Q5V58oLMCo-T9s0N6Kko-1RWn1e4kHfbbPBsD2uU,36266
227
+ nucliadb/search/search/chat/ask.py,sha256=K85Size6WAb-q4sCn0u1drrPnqIvqCy6YbfCxQmsEt8,36348
228
228
  nucliadb/search/search/chat/exceptions.py,sha256=Siy4GXW2L7oPhIR86H3WHBhE9lkV4A4YaAszuGGUf54,1356
229
229
  nucliadb/search/search/chat/images.py,sha256=PA8VWxT5_HUGfW1ULhKTK46UBsVyINtWWqEM1ulzX1E,3095
230
230
  nucliadb/search/search/chat/prompt.py,sha256=r2JTiRWH3YHPdeRAG5w6gD0g0fWVxdTjYIR86qAVa7k,47106
@@ -329,9 +329,9 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
329
329
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
330
330
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
331
331
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
332
- nucliadb-6.2.1.post3059.dist-info/METADATA,sha256=dGsG9jFB0KwE5eGxUB1DhoaqJmIaUzyiTDKhOJ0eSdg,4603
333
- nucliadb-6.2.1.post3059.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
334
- nucliadb-6.2.1.post3059.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
335
- nucliadb-6.2.1.post3059.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
336
- nucliadb-6.2.1.post3059.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
337
- nucliadb-6.2.1.post3059.dist-info/RECORD,,
332
+ nucliadb-6.2.1.post3067.dist-info/METADATA,sha256=V33nguAqDHoZDoT_3Vfc2w3HTnobJN1C_uB-tlKWGMU,4603
333
+ nucliadb-6.2.1.post3067.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
334
+ nucliadb-6.2.1.post3067.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
335
+ nucliadb-6.2.1.post3067.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
336
+ nucliadb-6.2.1.post3067.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
337
+ nucliadb-6.2.1.post3067.dist-info/RECORD,,