nucliadb 6.2.1.post3063__py3-none-any.whl → 6.2.1.post3071__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -104,6 +104,14 @@ class KBShardManager:
104
104
  errors.capture_exception(exc)
105
105
  raise NodeError("Node unavailable for operation") from exc
106
106
 
107
+ for result in results:
108
+ if isinstance(result, Exception):
109
+ errors.capture_exception(result)
110
+ raise NodeError(
111
+ f"Error while applying {aw.__name__} for all shards. Other similar errors may have been shadowed.\n"
112
+ f"{type(result).__name__}: {result}"
113
+ ) from result
114
+
107
115
  return results
108
116
 
109
117
  # TODO: move to data manager
@@ -26,7 +26,6 @@ from nucliadb.common.cluster.base import AbstractIndexNode
26
26
  from nucliadb.common.cluster.exceptions import (
27
27
  AlreadyExists,
28
28
  EntitiesGroupNotFound,
29
- NodeError,
30
29
  )
31
30
  from nucliadb.common.cluster.utils import get_shard_manager
32
31
  from nucliadb.common.datamanagers.entities import (
@@ -54,7 +53,6 @@ from nucliadb_protos.nodereader_pb2 import (
54
53
  )
55
54
  from nucliadb_protos.utils_pb2 import RelationNode
56
55
  from nucliadb_protos.writer_pb2 import GetEntitiesResponse
57
- from nucliadb_telemetry import errors
58
56
 
59
57
  from .exceptions import EntityManagementException
60
58
 
@@ -226,10 +224,6 @@ class EntitiesManager:
226
224
  settings.relation_search_timeout,
227
225
  use_read_replica_nodes=self.use_read_replica_nodes,
228
226
  )
229
- for result in results:
230
- if isinstance(result, Exception):
231
- errors.capture_exception(result)
232
- raise NodeError("Error while querying relation index")
233
227
 
234
228
  entities = {}
235
229
  for result in results:
@@ -305,6 +299,7 @@ class EntitiesManager:
305
299
  shard_manager = get_shard_manager()
306
300
 
307
301
  async def query_indexed_entities_group_names(node: AbstractIndexNode, shard_id: str) -> set[str]:
302
+ """Search all relation types"""
308
303
  request = SearchRequest(
309
304
  shard=shard_id,
310
305
  result_per_page=0,
@@ -316,10 +311,11 @@ class EntitiesManager:
316
311
  response: SearchResponse = await query_shard(node, shard_id, request)
317
312
  try:
318
313
  facetresults = response.document.facets["/e"].facetresults
319
- return {facet.tag.split("/")[-1] for facet in facetresults}
320
314
  except KeyError:
321
315
  # No entities found
322
316
  return set()
317
+ else:
318
+ return {facet.tag.split("/")[-1] for facet in facetresults}
323
319
 
324
320
  results = await shard_manager.apply_for_all_shards(
325
321
  self.kbid,
@@ -327,10 +323,6 @@ class EntitiesManager:
327
323
  settings.relation_types_timeout,
328
324
  use_read_replica_nodes=self.use_read_replica_nodes,
329
325
  )
330
- for result in results:
331
- if isinstance(result, Exception):
332
- errors.capture_exception(result)
333
- raise NodeError("Error while looking for relations types")
334
326
 
335
327
  if not results:
336
328
  return set()
@@ -719,9 +719,19 @@ async def retrieval_in_kb(
719
719
  prequeries = parse_prequeries(ask_request)
720
720
  graph_strategy = parse_graph_strategy(ask_request)
721
721
  with metrics.time("retrieval"):
722
- prequeries_results = None
722
+ main_results, prequeries_results, query_parser = await get_find_results(
723
+ kbid=kbid,
724
+ query=main_query,
725
+ item=ask_request,
726
+ ndb_client=client_type,
727
+ user=user_id,
728
+ origin=origin,
729
+ metrics=metrics,
730
+ prequeries_strategy=prequeries,
731
+ )
732
+
723
733
  if graph_strategy is not None:
724
- main_results, query_parser = await get_graph_results(
734
+ graph_results, graph_request = await get_graph_results(
725
735
  kbid=kbid,
726
736
  query=main_query,
727
737
  item=ask_request,
@@ -732,18 +742,13 @@ async def retrieval_in_kb(
732
742
  metrics=metrics,
733
743
  shards=ask_request.shards,
734
744
  )
735
- # TODO (oni): Fallback to normal retrieval if no graph results are found
736
- else:
737
- main_results, prequeries_results, query_parser = await get_find_results(
738
- kbid=kbid,
739
- query=main_query,
740
- item=ask_request,
741
- ndb_client=client_type,
742
- user=user_id,
743
- origin=origin,
744
- metrics=metrics,
745
- prequeries_strategy=prequeries,
746
- )
745
+
746
+ if prequeries_results is None:
747
+ prequeries_results = []
748
+
749
+ prequery = PreQuery(id="graph", request=graph_request, weight=graph_strategy.weight)
750
+ prequeries_results.append((prequery, graph_results))
751
+
747
752
  if len(main_results.resources) == 0 and all(
748
753
  len(prequery_result.resources) == 0 for (_, prequery_result) in prequeries_results or []
749
754
  ):
@@ -17,11 +17,9 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
 
20
- import asyncio
21
20
  import heapq
22
21
  import json
23
22
  from collections import defaultdict
24
- from datetime import datetime
25
23
  from typing import Any, Collection, Iterable, Optional, Union
26
24
 
27
25
  from nuclia_models.predict.generative_responses import (
@@ -46,9 +44,8 @@ from nucliadb.search.search.find_merge import (
46
44
  hydrate_and_rerank,
47
45
  )
48
46
  from nucliadb.search.search.hydrator import ResourceHydrationOptions, TextBlockHydrationOptions
49
- from nucliadb.search.search.merge import merge_suggest_results
47
+ from nucliadb.search.search.merge import merge_relation_prefix_results
50
48
  from nucliadb.search.search.metrics import RAGMetrics
51
- from nucliadb.search.search.query import QueryParser
52
49
  from nucliadb.search.search.rerankers import Reranker, RerankingOptions
53
50
  from nucliadb.search.utilities import get_predict
54
51
  from nucliadb_models.common import FieldTypeName
@@ -62,11 +59,12 @@ from nucliadb_models.search import (
62
59
  ChatModel,
63
60
  DirectionalRelation,
64
61
  EntitySubgraph,
62
+ FindRequest,
65
63
  GraphStrategy,
66
64
  KnowledgeboxFindResults,
67
- KnowledgeboxSuggestResults,
68
65
  NucliaDBClientType,
69
66
  QueryEntityDetection,
67
+ RelatedEntities,
70
68
  RelationDirection,
71
69
  RelationRanking,
72
70
  Relations,
@@ -308,7 +306,7 @@ async def get_graph_results(
308
306
  generative_model: Optional[str] = None,
309
307
  metrics: RAGMetrics = RAGMetrics(),
310
308
  shards: Optional[list[str]] = None,
311
- ) -> tuple[KnowledgeboxFindResults, QueryParser]:
309
+ ) -> tuple[KnowledgeboxFindResults, FindRequest]:
312
310
  relations = Relations(entities={})
313
311
  explored_entities: set[str] = set()
314
312
  scores: dict[str, list[float]] = {}
@@ -321,23 +319,18 @@ async def get_graph_results(
321
319
  # Get the entities from the query
322
320
  with metrics.time("graph_strat_query_entities"):
323
321
  if graph_strategy.query_entity_detection == QueryEntityDetection.SUGGEST:
324
- suggest_result = await fuzzy_search_entities(
322
+ relation_result = await fuzzy_search_entities(
325
323
  kbid=kbid,
326
324
  query=query,
327
- range_creation_start=item.range_creation_start,
328
- range_creation_end=item.range_creation_end,
329
- range_modification_start=item.range_modification_start,
330
- range_modification_end=item.range_modification_end,
331
- target_shard_replicas=shards,
332
325
  )
333
- if suggest_result.entities is not None:
326
+ if relation_result is not None:
334
327
  entities_to_explore = (
335
328
  RelationNode(
336
329
  ntype=RelationNode.NodeType.ENTITY,
337
330
  value=result.value,
338
331
  subtype=result.family,
339
332
  )
340
- for result in suggest_result.entities.entities
333
+ for result in relation_result.entities
341
334
  )
342
335
  elif (
343
336
  not entities_to_explore
@@ -361,6 +354,7 @@ async def get_graph_results(
361
354
  for relation in subgraph.related_to
362
355
  if relation.entity not in explored_entities
363
356
  )
357
+
364
358
  # Get the relations for the new entities
365
359
  with metrics.time("graph_strat_neighbor_relations"):
366
360
  try:
@@ -437,54 +431,36 @@ async def get_graph_results(
437
431
  field_type_filter=find_request.field_type_filter,
438
432
  relation_text_as_paragraphs=graph_strategy.relation_text_as_paragraphs,
439
433
  )
440
- return find_results, query_parser
434
+ return find_results, find_request
441
435
 
442
436
 
443
437
  async def fuzzy_search_entities(
444
438
  kbid: str,
445
439
  query: str,
446
- range_creation_start: Optional[datetime] = None,
447
- range_creation_end: Optional[datetime] = None,
448
- range_modification_start: Optional[datetime] = None,
449
- range_modification_end: Optional[datetime] = None,
450
- target_shard_replicas: Optional[list[str]] = None,
451
- ) -> KnowledgeboxSuggestResults:
440
+ ) -> Optional[RelatedEntities]:
452
441
  """Fuzzy find entities in KB given a query using the same methodology as /suggest, but split by words."""
453
442
 
454
- base_request = nodereader_pb2.SuggestRequest(
455
- body="", features=[nodereader_pb2.SuggestFeatures.ENTITIES]
456
- )
457
- if range_creation_start is not None:
458
- base_request.timestamps.from_created.FromDatetime(range_creation_start)
459
- if range_creation_end is not None:
460
- base_request.timestamps.to_created.FromDatetime(range_creation_end)
461
- if range_modification_start is not None:
462
- base_request.timestamps.from_modified.FromDatetime(range_modification_start)
463
- if range_modification_end is not None:
464
- base_request.timestamps.to_modified.FromDatetime(range_modification_end)
465
-
466
- tasks = []
467
- # XXX: Splitting by words is not ideal, in the future, modify suggest to better handle this
468
- for word in query.split():
469
- if len(word) < 3:
470
- continue
471
- request = nodereader_pb2.SuggestRequest()
472
- request.CopyFrom(base_request)
473
- request.body = word
474
- tasks.append(
475
- node_query(kbid, Method.SUGGEST, request, target_shard_replicas=target_shard_replicas)
476
- )
443
+ request = nodereader_pb2.SearchRequest()
444
+ request.relation_prefix.query = query
477
445
 
446
+ results: list[nodereader_pb2.SearchResponse]
478
447
  try:
479
- results_raw = await asyncio.gather(*tasks)
480
- return await merge_suggest_results(
481
- [item for r in results_raw for item in r[0]],
482
- kbid=kbid,
448
+ (
449
+ results,
450
+ _,
451
+ _,
452
+ ) = await node_query(
453
+ kbid,
454
+ Method.SEARCH,
455
+ request,
456
+ use_read_replica_nodes=True,
457
+ retry_on_primary=False,
483
458
  )
459
+ return merge_relation_prefix_results(results)
484
460
  except Exception as e:
485
461
  capture_exception(e)
486
462
  logger.exception("Error in finding entities in query for graph strategy")
487
- return KnowledgeboxSuggestResults(entities=None)
463
+ return None
488
464
 
489
465
 
490
466
  async def rank_relations_reranker(
@@ -615,6 +615,19 @@ async def merge_suggest_entities_results(
615
615
  return RelatedEntities(entities=list(unique_entities), total=len(unique_entities))
616
616
 
617
617
 
618
+ def merge_relation_prefix_results(
619
+ responses: list[SearchResponse],
620
+ ) -> RelatedEntities:
621
+ unique_entities: Set[RelatedEntity] = set()
622
+ for response in responses:
623
+ response_entities = (
624
+ RelatedEntity(family=e.subtype, value=e.value) for e in response.relation.prefix.nodes
625
+ )
626
+ unique_entities.update(response_entities)
627
+
628
+ return RelatedEntities(entities=list(unique_entities), total=len(unique_entities))
629
+
630
+
618
631
  async def merge_suggest_results(
619
632
  suggest_responses: list[SuggestResponse],
620
633
  kbid: str,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: nucliadb
3
- Version: 6.2.1.post3063
3
+ Version: 6.2.1.post3071
4
4
  Home-page: https://docs.nuclia.dev/docs/management/nucliadb/intro
5
5
  Author: NucliaDB Community
6
6
  Author-email: nucliadb@nuclia.com
@@ -22,10 +22,10 @@ Classifier: Programming Language :: Python :: 3.12
22
22
  Classifier: Programming Language :: Python :: 3 :: Only
23
23
  Requires-Python: >=3.9, <4
24
24
  Description-Content-Type: text/markdown
25
- Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post3063
26
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post3063
27
- Requires-Dist: nucliadb-protos>=6.2.1.post3063
28
- Requires-Dist: nucliadb-models>=6.2.1.post3063
25
+ Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post3071
26
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post3071
27
+ Requires-Dist: nucliadb-protos>=6.2.1.post3071
28
+ Requires-Dist: nucliadb-models>=6.2.1.post3071
29
29
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
30
30
  Requires-Dist: nuclia-models>=0.24.2
31
31
  Requires-Dist: uvicorn
@@ -47,7 +47,7 @@ nucliadb/common/cluster/base.py,sha256=kklDqyvsubNX0W494ttl9f3E58lGaX6AXqAd8XX8Z
47
47
  nucliadb/common/cluster/exceptions.py,sha256=V3c_fgH00GyJ-a5CaGLhwTuhwhUNR9YAGvS5jaRuc_Y,1495
48
48
  nucliadb/common/cluster/grpc_node_dummy.py,sha256=L85wBnfab7Rev0CfsfUjPxQC6DiHPsETKrZAOLx9XHg,3510
49
49
  nucliadb/common/cluster/index_node.py,sha256=g38H1kiAliF3Y6et_CWYInpn_xPxf7THAFJ7RtgLNZo,3246
50
- nucliadb/common/cluster/manager.py,sha256=3UnYwVb-ZykYfLndxM7TLw7-2T_vxqoFXMu0Pzxh5-A,15327
50
+ nucliadb/common/cluster/manager.py,sha256=cj8yNIspsmdyGUq3vbyW--r-LcxAUqA4S9tocz6MsCM,15695
51
51
  nucliadb/common/cluster/rebalance.py,sha256=jSEYsPgs_Dobv3FOaKl5arBko4s8JlWkahm8LOzgNnE,9135
52
52
  nucliadb/common/cluster/rollover.py,sha256=dx6AF9ywKP10iBNlcoJgRV40921fOPpVWaCUU54hztE,25823
53
53
  nucliadb/common/cluster/settings.py,sha256=TMoym-cZsQ2soWfLAce0moSa2XncttQyhahL43LrWTo,3384
@@ -128,7 +128,7 @@ nucliadb/ingest/fields/text.py,sha256=tFvSQJAe0W7ePpp2_WDfLiE2yglR1OTU0Zht9acvOF
128
128
  nucliadb/ingest/orm/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
129
129
  nucliadb/ingest/orm/brain.py,sha256=UND5EsNUdd7XdjScYqRqg4r_xCx3l-My8alGw5M9CWg,28398
130
130
  nucliadb/ingest/orm/broker_message.py,sha256=ZEMueoGuuRKO4tHgzc0P0AM1Ls1TTYey_4UvRQf0BpY,6915
131
- nucliadb/ingest/orm/entities.py,sha256=5d6Gfo-Yz-rns_mNJeRqiGaPeWpUMgSKZnmWIGMLCKo,15537
131
+ nucliadb/ingest/orm/entities.py,sha256=sBhg8eahsWVwO34KoAJV2YRix4Uw5GINx3srJWxRC9k,15148
132
132
  nucliadb/ingest/orm/exceptions.py,sha256=k4Esv4NtL4TrGTcsQpwrSfDhPQpiYcRbB1SpYmBX5MY,1432
133
133
  nucliadb/ingest/orm/knowledgebox.py,sha256=IGOPvBR1qXqDxE5DeiOdYCLdPgjzOVVpsASJ2zYvWwQ,23651
134
134
  nucliadb/ingest/orm/metrics.py,sha256=OkwMSPKLZcKba0ZTwtTiIxwBgaLMX5ydhGieKvi2y7E,1096
@@ -210,9 +210,9 @@ nucliadb/search/search/fetch.py,sha256=XJHIFnZmXM_8Kb37lb4lg1GYG7cZ1plT-qAIb_Qzi
210
210
  nucliadb/search/search/filters.py,sha256=1MkHlJjAQqoRCj7e5cEzK2HvBxGLE17I_omsjiklbtw,6476
211
211
  nucliadb/search/search/find.py,sha256=yQbttt85wQFc4NEaj2RNGgozP7IQx_bjAOhHke3fXY0,9890
212
212
  nucliadb/search/search/find_merge.py,sha256=_R_YpHAZv5BHh3XABQ8MRd1Ci0seclGYf26yJHJ7H0I,17178
213
- nucliadb/search/search/graph_strategy.py,sha256=6d-KjGDbOnaXQzEwyBpA-iQM0rkveVTiK3A3m2UJq8Q,33538
213
+ nucliadb/search/search/graph_strategy.py,sha256=cFcu6nrOqHfobsbUu1pOwfBndrN4ppEvv3-4jV4_4bg,31977
214
214
  nucliadb/search/search/hydrator.py,sha256=-R37gCrGxkyaiHQalnTWHNG_FCx11Zucd7qA1vQCxuw,6985
215
- nucliadb/search/search/merge.py,sha256=g0PxUejWtYIYWG-VroArMCgwB6AOp3lZMkzoHAPYBKE,22183
215
+ nucliadb/search/search/merge.py,sha256=i_PTBFRqC5iTTziOMEltxLIlmokIou5hjjgR4BnoLBE,22635
216
216
  nucliadb/search/search/metrics.py,sha256=81X-tahGW4n2CLvUzCPdNxNClmZqUWZjcVOGCUHoiUM,2872
217
217
  nucliadb/search/search/paragraphs.py,sha256=pNAEiYqJGGUVcEf7xf-PFMVqz0PX4Qb-WNG-_zPGN2o,7799
218
218
  nucliadb/search/search/pgcatalog.py,sha256=IaNK4dAxdXs38PoIkTdgqMDuZDjeiOtcXn3LeaT-OMw,8855
@@ -224,7 +224,7 @@ nucliadb/search/search/shards.py,sha256=JSRSrHgHcF4sXyuZZoJdMfK0v_LHpoSRf1lCr5-K
224
224
  nucliadb/search/search/summarize.py,sha256=ksmYPubEQvAQgfPdZHfzB_rR19B2ci4IYZ6jLdHxZo8,4996
225
225
  nucliadb/search/search/utils.py,sha256=iF2tbBA56gRMJH1TlE2hMrqeXqjoeOPt4KgRdp2m9Ek,3313
226
226
  nucliadb/search/search/chat/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
227
- nucliadb/search/search/chat/ask.py,sha256=tE1Q5V58oLMCo-T9s0N6Kko-1RWn1e4kHfbbPBsD2uU,36266
227
+ nucliadb/search/search/chat/ask.py,sha256=K85Size6WAb-q4sCn0u1drrPnqIvqCy6YbfCxQmsEt8,36348
228
228
  nucliadb/search/search/chat/exceptions.py,sha256=Siy4GXW2L7oPhIR86H3WHBhE9lkV4A4YaAszuGGUf54,1356
229
229
  nucliadb/search/search/chat/images.py,sha256=PA8VWxT5_HUGfW1ULhKTK46UBsVyINtWWqEM1ulzX1E,3095
230
230
  nucliadb/search/search/chat/prompt.py,sha256=r2JTiRWH3YHPdeRAG5w6gD0g0fWVxdTjYIR86qAVa7k,47106
@@ -329,9 +329,9 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
329
329
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
330
330
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
331
331
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
332
- nucliadb-6.2.1.post3063.dist-info/METADATA,sha256=LB2WO-kk-GSCoSk1ef5S7xI1WmB7BTeo_6WvD2RdtlM,4603
333
- nucliadb-6.2.1.post3063.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
334
- nucliadb-6.2.1.post3063.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
335
- nucliadb-6.2.1.post3063.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
336
- nucliadb-6.2.1.post3063.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
337
- nucliadb-6.2.1.post3063.dist-info/RECORD,,
332
+ nucliadb-6.2.1.post3071.dist-info/METADATA,sha256=mbRwoP8OS4lUygXdPQgnAcU7Q3o7AunTre_KXI5E66o,4603
333
+ nucliadb-6.2.1.post3071.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
334
+ nucliadb-6.2.1.post3071.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
335
+ nucliadb-6.2.1.post3071.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
336
+ nucliadb-6.2.1.post3071.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
337
+ nucliadb-6.2.1.post3071.dist-info/RECORD,,