nucliadb 6.4.0.post4293__py3-none-any.whl → 6.4.0.post4313__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -232,26 +232,39 @@ async def purge_kb_vectorsets(driver: Driver, storage: Storage):
232
232
  async for resource in kb.iterate_resources():
233
233
  fields.extend((await resource.get_fields(force=True)).values())
234
234
 
235
- # we don't need the maindb transaction anymore to remove vectors from storage
236
- for field in fields:
237
- if purge_payload.storage_key_kind == VectorSetConfig.StorageKeyKind.UNSET:
238
- # Bw/c for purge before adding purge payload. We assume
239
- # there's only 2 kinds of KBs: with one or with more than
240
- # one vectorset. KBs with one vectorset are not allowed to
241
- # delete their vectorset, so we wouldn't be here. It has to
242
- # be a KB with multiple, so the storage key kind has to be
243
- # this:
244
- await field.delete_vectors(
245
- vectorset, VectorSetConfig.StorageKeyKind.VECTORSET_PREFIX
246
- )
247
- else:
248
- await field.delete_vectors(vectorset, purge_payload.storage_key_kind)
235
+ logger.info(f"Purging {len(fields)} fields for vectorset {vectorset}", extra={"kbid": kbid})
236
+ for fields_batch in batchify(fields, 20):
237
+ tasks = []
238
+ for field in fields_batch:
239
+ if purge_payload.storage_key_kind == VectorSetConfig.StorageKeyKind.UNSET:
240
+ # Bw/c for purge before adding purge payload. We assume
241
+ # there's only 2 kinds of KBs: with one or with more than
242
+ # one vectorset. KBs with one vectorset are not allowed to
243
+ # delete their vectorset, so we wouldn't be here. It has to
244
+ # be a KB with multiple, so the storage key kind has to be
245
+ # this:
246
+ tasks.append(
247
+ asyncio.create_task(
248
+ field.delete_vectors(
249
+ vectorset, VectorSetConfig.StorageKeyKind.VECTORSET_PREFIX
250
+ )
251
+ )
252
+ )
253
+ else:
254
+ tasks.append(
255
+ asyncio.create_task(
256
+ field.delete_vectors(vectorset, purge_payload.storage_key_kind)
257
+ )
258
+ )
259
+ await asyncio.gather(*tasks)
249
260
 
250
261
  # Finally, delete the key
251
262
  async with driver.transaction() as txn:
252
263
  await txn.delete(key)
253
264
  await txn.commit()
254
265
 
266
+ logger.info(f"Finished purging vectorset {vectorset} for KB", extra={"kbid": kbid})
267
+
255
268
  except Exception as exc:
256
269
  errors.capture_exception(exc)
257
270
  logger.error(
@@ -304,3 +317,9 @@ def run() -> int: # pragma: no cover
304
317
  setup_logging()
305
318
  errors.setup_error_handling(importlib.metadata.distribution("nucliadb").version)
306
319
  return asyncio.run(main())
320
+
321
+
322
+ def batchify(iterable, n=1):
323
+ """Yield successive n-sized chunks from iterable."""
324
+ for i in range(0, len(iterable), n):
325
+ yield iterable[i : i + n]
@@ -21,7 +21,7 @@ from fastapi import Header, Request, Response
21
21
  from fastapi_versioning import version
22
22
 
23
23
  from nucliadb.search.api.v1.router import KB_PREFIX, api
24
- from nucliadb.search.requesters.utils import Method, node_query
24
+ from nucliadb.search.requesters.utils import Method, nidx_query
25
25
  from nucliadb.search.search.graph_merge import (
26
26
  build_graph_nodes_response,
27
27
  build_graph_relations_response,
@@ -71,7 +71,7 @@ async def graph_search_knowledgebox(
71
71
  ) -> GraphSearchResponse:
72
72
  pb_query = await parse_graph_search(kbid, item)
73
73
 
74
- results, _, _ = await node_query(kbid, Method.GRAPH, pb_query)
74
+ results, _ = await nidx_query(kbid, Method.GRAPH, pb_query)
75
75
 
76
76
  return build_graph_response(results)
77
77
 
@@ -98,7 +98,7 @@ async def graph_nodes_search_knowledgebox(
98
98
  ) -> GraphNodesSearchResponse:
99
99
  pb_query = await parse_graph_node_search(kbid, item)
100
100
 
101
- results, _, _ = await node_query(kbid, Method.GRAPH, pb_query)
101
+ results, _ = await nidx_query(kbid, Method.GRAPH, pb_query)
102
102
 
103
103
  return build_graph_nodes_response(results)
104
104
 
@@ -125,6 +125,6 @@ async def graph_relations_search_knowledgebox(
125
125
  ) -> GraphRelationsSearchResponse:
126
126
  pb_query = await parse_graph_relation_search(kbid, item)
127
127
 
128
- results, _, _ = await node_query(kbid, Method.GRAPH, pb_query)
128
+ results, _ = await nidx_query(kbid, Method.GRAPH, pb_query)
129
129
 
130
130
  return build_graph_relations_response(results)
@@ -27,7 +27,7 @@ from pydantic import ValidationError
27
27
  from nucliadb.models.responses import HTTPClientError
28
28
  from nucliadb.search.api.v1.router import KB_PREFIX, RESOURCE_PREFIX, api
29
29
  from nucliadb.search.api.v1.utils import fastapi_query
30
- from nucliadb.search.requesters.utils import Method, node_query
30
+ from nucliadb.search.requesters.utils import Method, nidx_query
31
31
  from nucliadb.search.search import cache
32
32
  from nucliadb.search.search.exceptions import InvalidQueryError
33
33
  from nucliadb.search.search.merge import merge_paragraphs_results
@@ -110,7 +110,7 @@ async def resource_search(
110
110
  detail = json.loads(exc.json())
111
111
  return HTTPClientError(status_code=422, detail=detail)
112
112
 
113
- results, incomplete_results, queried_shards = await node_query(kbid, Method.SEARCH, pb_query)
113
+ results, queried_shards = await nidx_query(kbid, Method.SEARCH, pb_query)
114
114
 
115
115
  # We need to merge
116
116
  search_results = await merge_paragraphs_results(
@@ -120,8 +120,5 @@ async def resource_search(
120
120
  highlight_split=highlight,
121
121
  min_score=0.0,
122
122
  )
123
-
124
- response.status_code = 206 if incomplete_results else 200
125
-
126
123
  search_results.shards = queried_shards
127
124
  return search_results
@@ -32,7 +32,7 @@ from nucliadb.models.responses import HTTPClientError
32
32
  from nucliadb.search import predict
33
33
  from nucliadb.search.api.v1.router import KB_PREFIX, api
34
34
  from nucliadb.search.api.v1.utils import fastapi_query
35
- from nucliadb.search.requesters.utils import Method, node_query
35
+ from nucliadb.search.requesters.utils import Method, nidx_query
36
36
  from nucliadb.search.search import cache
37
37
  from nucliadb.search.search.exceptions import InvalidQueryError
38
38
  from nucliadb.search.search.merge import merge_results
@@ -265,8 +265,7 @@ async def search(
265
265
  pb_query, incomplete_results, autofilters, _ = await legacy_convert_retrieval_to_proto(parsed)
266
266
 
267
267
  # We need to query all nodes
268
- results, query_incomplete_results, queried_shards = await node_query(kbid, Method.SEARCH, pb_query)
269
- incomplete_results = incomplete_results or query_incomplete_results
268
+ results, queried_shards = await nidx_query(kbid, Method.SEARCH, pb_query)
270
269
 
271
270
  # We need to merge
272
271
  search_results = await merge_results(
@@ -28,7 +28,7 @@ from pydantic import ValidationError
28
28
  from nucliadb.models.responses import HTTPClientError
29
29
  from nucliadb.search.api.v1.router import KB_PREFIX, api
30
30
  from nucliadb.search.api.v1.utils import fastapi_query
31
- from nucliadb.search.requesters.utils import Method, node_query
31
+ from nucliadb.search.requesters.utils import Method, nidx_query
32
32
  from nucliadb.search.search import cache
33
33
  from nucliadb.search.search.exceptions import InvalidQueryError
34
34
  from nucliadb.search.search.merge import merge_suggest_results
@@ -160,7 +160,7 @@ async def suggest(
160
160
  range_modification_end,
161
161
  hidden,
162
162
  )
163
- results, incomplete_results, queried_shards = await node_query(kbid, Method.SUGGEST, pb_query)
163
+ results, queried_shards = await nidx_query(kbid, Method.SUGGEST, pb_query)
164
164
 
165
165
  # We need to merge
166
166
  search_results = await merge_suggest_results(
@@ -168,9 +168,6 @@ async def suggest(
168
168
  kbid=kbid,
169
169
  highlight=highlight,
170
170
  )
171
-
172
- response.status_code = 206 if incomplete_results else 200
173
-
174
171
  if debug and queried_shards:
175
172
  search_results.shards = queried_shards
176
173
 
@@ -71,38 +71,38 @@ T = TypeVar(
71
71
 
72
72
 
73
73
  @overload
74
- async def node_query(
74
+ async def nidx_query(
75
75
  kbid: str,
76
76
  method: Method,
77
77
  pb_query: SuggestRequest,
78
78
  timeout: Optional[float] = None,
79
- ) -> tuple[list[SuggestResponse], bool, list[str]]: ...
79
+ ) -> tuple[list[SuggestResponse], list[str]]: ...
80
80
 
81
81
 
82
82
  @overload
83
- async def node_query(
83
+ async def nidx_query(
84
84
  kbid: str,
85
85
  method: Method,
86
86
  pb_query: SearchRequest,
87
87
  timeout: Optional[float] = None,
88
- ) -> tuple[list[SearchResponse], bool, list[str]]: ...
88
+ ) -> tuple[list[SearchResponse], list[str]]: ...
89
89
 
90
90
 
91
91
  @overload
92
- async def node_query(
92
+ async def nidx_query(
93
93
  kbid: str,
94
94
  method: Method,
95
95
  pb_query: GraphSearchRequest,
96
96
  timeout: Optional[float] = None,
97
- ) -> tuple[list[GraphSearchResponse], bool, list[str]]: ...
97
+ ) -> tuple[list[GraphSearchResponse], list[str]]: ...
98
98
 
99
99
 
100
- async def node_query(
100
+ async def nidx_query(
101
101
  kbid: str,
102
102
  method: Method,
103
103
  pb_query: REQUEST_TYPE,
104
104
  timeout: Optional[float] = None,
105
- ) -> tuple[Sequence[Union[T, BaseException]], bool, list[str]]:
105
+ ) -> tuple[Sequence[Union[T, BaseException]], list[str]]:
106
106
  timeout = timeout or settings.search_timeout
107
107
  shard_manager = get_shard_manager()
108
108
  try:
@@ -115,7 +115,6 @@ async def node_query(
115
115
 
116
116
  ops = []
117
117
  queried_shards = []
118
- incomplete_results = False
119
118
 
120
119
  for shard_obj in shard_groups:
121
120
  shard_id = shard_obj.nidx_shard_id
@@ -127,10 +126,10 @@ async def node_query(
127
126
  queried_shards.append(shard_id)
128
127
 
129
128
  if not ops:
130
- logger.warning(f"No node found for any of this resources shards {kbid}")
129
+ logger.warning(f"No shards found for kb", extra={"kbid": kbid})
131
130
  raise HTTPException(
132
131
  status_code=512,
133
- detail=f"No node found for any of this resources shards {kbid}",
132
+ detail=f"No shards found for kb",
134
133
  )
135
134
 
136
135
  try:
@@ -144,12 +143,12 @@ async def node_query(
144
143
  )
145
144
  results = [exc]
146
145
 
147
- error = validate_node_query_results(results or [])
146
+ error = validate_nidx_query_results(results or [])
148
147
  if error is not None:
149
148
  query_dict = MessageToDict(pb_query)
150
149
  query_dict.pop("vector", None)
151
150
  logger.error(
152
- "Error while querying nodes",
151
+ "Error while querying nidx",
153
152
  extra={
154
153
  "kbid": kbid,
155
154
  "query": json.dumps(query_dict),
@@ -157,12 +156,12 @@ async def node_query(
157
156
  )
158
157
  raise error
159
158
 
160
- return results, incomplete_results, queried_shards
159
+ return results, queried_shards
161
160
 
162
161
 
163
- def validate_node_query_results(results: list[Any]) -> Optional[HTTPException]:
162
+ def validate_nidx_query_results(results: list[Any]) -> Optional[HTTPException]:
164
163
  """
165
- Validate the results of a node query and return an exception if any error is found
164
+ Validate the results of a nidx query and return an exception if any error is found
166
165
 
167
166
  Handling of exception is responsibility of caller.
168
167
  """
@@ -175,14 +174,14 @@ def validate_node_query_results(results: list[Any]) -> Optional[HTTPException]:
175
174
  reason = "Error while querying shard data."
176
175
  if isinstance(result, AioRpcError):
177
176
  if result.code() is GrpcStatusCode.INTERNAL:
178
- # handle node response errors
177
+ # handle nidx response errors
179
178
  details = result.details() or "gRPC error without details"
180
179
  if "AllButQueryForbidden" in details:
181
180
  status_code = 412
182
181
  reason = details.split(":")[-1].strip().strip("'")
183
182
  else:
184
183
  reason = details
185
- logger.exception(f"Unhandled node error", exc_info=result)
184
+ logger.exception(f"Unhandled nidx error", exc_info=result)
186
185
  else:
187
186
  logger.error(
188
187
  f"Unhandled GRPC error while querying shard data: {result.debug_error_string()}"
@@ -28,7 +28,7 @@ from nidx_protos.nodereader_pb2 import (
28
28
  from nucliadb.common.models_utils import to_proto
29
29
  from nucliadb.search import logger
30
30
  from nucliadb.search.predict import AnswerStatusCode, RephraseResponse
31
- from nucliadb.search.requesters.utils import Method, node_query
31
+ from nucliadb.search.requesters.utils import Method, nidx_query
32
32
  from nucliadb.search.search.chat.exceptions import NoRetrievalResultsError
33
33
  from nucliadb.search.search.exceptions import IncompleteFindResultsError
34
34
  from nucliadb.search.search.find import find
@@ -246,8 +246,6 @@ async def get_relations_results(
246
246
  kbid: str,
247
247
  text_answer: str,
248
248
  timeout: Optional[float] = None,
249
- only_with_metadata: bool = False,
250
- only_agentic_relations: bool = False,
251
249
  ) -> Relations:
252
250
  try:
253
251
  predict = get_predict()
@@ -257,8 +255,6 @@ async def get_relations_results(
257
255
  kbid=kbid,
258
256
  entities=detected_entities,
259
257
  timeout=timeout,
260
- only_with_metadata=only_with_metadata,
261
- only_agentic_relations=only_agentic_relations,
262
258
  )
263
259
  except Exception as exc:
264
260
  capture_exception(exc)
@@ -271,9 +267,6 @@ async def get_relations_results_from_entities(
271
267
  kbid: str,
272
268
  entities: Iterable[RelationNode],
273
269
  timeout: Optional[float] = None,
274
- only_with_metadata: bool = False,
275
- only_agentic_relations: bool = False,
276
- only_entity_to_entity: bool = False,
277
270
  deleted_entities: set[str] = set(),
278
271
  ) -> Relations:
279
272
  entry_points = list(entities)
@@ -293,8 +286,7 @@ async def get_relations_results_from_entities(
293
286
  (
294
287
  results,
295
288
  _,
296
- _,
297
- ) = await node_query(
289
+ ) = await nidx_query(
298
290
  kbid,
299
291
  Method.SEARCH,
300
292
  request,
@@ -304,9 +296,6 @@ async def get_relations_results_from_entities(
304
296
  return await merge_relations_results(
305
297
  relations_results,
306
298
  entry_points,
307
- only_with_metadata,
308
- only_agentic_relations,
309
- only_entity_to_entity,
310
299
  )
311
300
 
312
301
 
@@ -23,7 +23,7 @@ from time import time
23
23
  from nucliadb.common.external_index_providers.base import ExternalIndexManager
24
24
  from nucliadb.common.external_index_providers.manager import get_external_index_manager
25
25
  from nucliadb.common.models_utils import to_proto
26
- from nucliadb.search.requesters.utils import Method, node_query
26
+ from nucliadb.search.requesters.utils import Method, nidx_query
27
27
  from nucliadb.search.search.find_merge import (
28
28
  build_find_response,
29
29
  compose_find_resources,
@@ -105,10 +105,7 @@ async def _index_node_retrieval(
105
105
  ) = await legacy_convert_retrieval_to_proto(parsed)
106
106
 
107
107
  with metrics.time("index_search"):
108
- results, query_incomplete_results, queried_shards = await node_query(
109
- kbid, Method.SEARCH, pb_query
110
- )
111
- incomplete_results = incomplete_results or query_incomplete_results
108
+ results, queried_shards = await nidx_query(kbid, Method.SEARCH, pb_query)
112
109
 
113
110
  # Rank fusion merge, cut, hydrate and rerank
114
111
  with metrics.time("results_merge"):
@@ -19,6 +19,7 @@
19
19
  import heapq
20
20
  import json
21
21
  from collections import defaultdict
22
+ from dataclasses import dataclass
22
23
  from typing import Any, Collection, Iterable, Optional, Union
23
24
 
24
25
  from nidx_protos import nodereader_pb2
@@ -33,16 +34,16 @@ from sentry_sdk import capture_exception
33
34
  from nucliadb.common.external_index_providers.base import TextBlockMatch
34
35
  from nucliadb.common.ids import FieldId, ParagraphId
35
36
  from nucliadb.search import logger
36
- from nucliadb.search.requesters.utils import Method, node_query
37
+ from nucliadb.search.requesters.utils import Method, nidx_query
37
38
  from nucliadb.search.search.chat.query import (
38
39
  find_request_from_ask_request,
39
- get_relations_results_from_entities,
40
40
  )
41
41
  from nucliadb.search.search.find_merge import (
42
42
  compose_find_resources,
43
43
  hydrate_and_rerank,
44
44
  )
45
45
  from nucliadb.search.search.hydrator import ResourceHydrationOptions, TextBlockHydrationOptions
46
+ from nucliadb.search.search.merge import entity_type_to_relation_node_type, merge_relations_results
46
47
  from nucliadb.search.search.metrics import Metrics
47
48
  from nucliadb.search.search.rerankers import (
48
49
  Reranker,
@@ -74,7 +75,7 @@ from nucliadb_models.search import (
74
75
  TextPosition,
75
76
  UserPrompt,
76
77
  )
77
- from nucliadb_protos.utils_pb2 import RelationNode
78
+ from nucliadb_protos.utils_pb2 import Relation, RelationNode
78
79
 
79
80
  SCHEMA = {
80
81
  "title": "score_triplets",
@@ -289,6 +290,17 @@ Now, let's get started! Here are the triplets you need to score:
289
290
  """
290
291
 
291
292
 
293
+ @dataclass(frozen=True)
294
+ class FrozenRelationNode:
295
+ ntype: RelationNode.NodeType.ValueType
296
+ subtype: str
297
+ value: str
298
+
299
+
300
+ def freeze_node(r: RelationNode):
301
+ return FrozenRelationNode(ntype=r.ntype, subtype=r.subtype, value=r.value)
302
+
303
+
292
304
  class RelationsParagraphMatch(BaseModel):
293
305
  paragraph_id: ParagraphId
294
306
  score: float
@@ -310,13 +322,12 @@ async def get_graph_results(
310
322
  shards: Optional[list[str]] = None,
311
323
  ) -> tuple[KnowledgeboxFindResults, FindRequest]:
312
324
  relations = Relations(entities={})
313
- explored_entities: set[str] = set()
325
+ explored_entities: set[FrozenRelationNode] = set()
314
326
  scores: dict[str, list[float]] = {}
315
327
  predict = get_predict()
328
+ entities_to_explore: list[RelationNode] = []
316
329
 
317
330
  for hop in range(graph_strategy.hops):
318
- entities_to_explore: Iterable[RelationNode] = []
319
-
320
331
  if hop == 0:
321
332
  # Get the entities from the query
322
333
  with metrics.time("graph_strat_query_entities"):
@@ -326,14 +337,14 @@ async def get_graph_results(
326
337
  query=query,
327
338
  )
328
339
  if relation_result is not None:
329
- entities_to_explore = (
340
+ entities_to_explore = [
330
341
  RelationNode(
331
342
  ntype=RelationNode.NodeType.ENTITY,
332
343
  value=result.value,
333
344
  subtype=result.family,
334
345
  )
335
346
  for result in relation_result.entities
336
- )
347
+ ]
337
348
  elif (
338
349
  not entities_to_explore
339
350
  or graph_strategy.query_entity_detection == QueryEntityDetection.PREDICT
@@ -353,7 +364,7 @@ async def get_graph_results(
353
364
  entities_to_explore = []
354
365
  else:
355
366
  # Find neighbors of the current relations and remove the ones already explored
356
- entities_to_explore = (
367
+ entities_to_explore = [
357
368
  RelationNode(
358
369
  ntype=RelationNode.NodeType.ENTITY,
359
370
  value=relation.entity,
@@ -361,35 +372,50 @@ async def get_graph_results(
361
372
  )
362
373
  for subgraph in relations.entities.values()
363
374
  for relation in subgraph.related_to
364
- if relation.entity not in explored_entities
365
- )
375
+ if FrozenRelationNode(
376
+ ntype=entity_type_to_relation_node_type(relation.entity_type),
377
+ subtype=relation.entity_subtype,
378
+ value=relation.entity,
379
+ )
380
+ not in explored_entities
381
+ ]
382
+
383
+ if not entities_to_explore:
384
+ break
366
385
 
367
386
  # Get the relations for the new entities
387
+ relations_results = []
368
388
  with metrics.time("graph_strat_neighbor_relations"):
369
389
  try:
370
- new_relations = await get_relations_results_from_entities(
371
- kbid=kbid,
372
- entities=entities_to_explore,
373
- timeout=5.0,
390
+ relations_results = await find_graph_neighbours(
391
+ kbid,
392
+ entities_to_explore,
393
+ explored_entities,
394
+ exclude_processor_relations=graph_strategy.exclude_processor_relations,
395
+ )
396
+ new_relations = await merge_relations_results(
397
+ relations_results,
398
+ entities_to_explore,
374
399
  only_with_metadata=not graph_strategy.relation_text_as_paragraphs,
375
- only_agentic_relations=graph_strategy.agentic_graph_only,
376
- # We only want entity to entity relations (skip resource/labels/collaborators/etc.)
377
- only_entity_to_entity=True,
378
- deleted_entities=explored_entities,
379
400
  )
380
401
  except Exception as e:
381
402
  capture_exception(e)
382
403
  logger.exception("Error in getting query relations for graph strategy")
383
404
  new_relations = Relations(entities={})
384
405
 
385
- new_subgraphs = new_relations.entities
406
+ relations.entities.update(new_relations.entities)
407
+ discovered_entities = []
386
408
 
387
- explored_entities.update(new_subgraphs.keys())
409
+ for shard in relations_results:
410
+ for node in shard.nodes:
411
+ if node not in entities_to_explore and freeze_node(node) not in explored_entities:
412
+ discovered_entities.append(node)
388
413
 
389
- if not new_subgraphs or all(not subgraph.related_to for subgraph in new_subgraphs.values()):
414
+ if not discovered_entities:
390
415
  break
391
416
 
392
- relations.entities.update(new_subgraphs)
417
+ explored_entities.update([freeze_node(n) for n in entities_to_explore])
418
+ entities_to_explore = discovered_entities
393
419
 
394
420
  # Rank the relevance of the relations
395
421
  with metrics.time("graph_strat_rank_relations"):
@@ -458,7 +484,7 @@ async def fuzzy_search_entities(
458
484
  request.query.path.bool_or.operands.append(subquery)
459
485
 
460
486
  try:
461
- results, _, _ = await node_query(kbid, Method.GRAPH, request)
487
+ results, _ = await nidx_query(kbid, Method.GRAPH, request)
462
488
  except Exception as exc:
463
489
  capture_exception(exc)
464
490
  logger.exception("Error in finding entities in query for graph strategy")
@@ -898,3 +924,51 @@ def relations_matches_to_text_block_matches(
898
924
  paragraph_matches: Collection[RelationsParagraphMatch],
899
925
  ) -> list[TextBlockMatch]:
900
926
  return [relations_match_to_text_block_match(match) for match in paragraph_matches]
927
+
928
+
929
+ async def find_graph_neighbours(
930
+ kbid: str,
931
+ entities_to_explore: list[RelationNode],
932
+ explored_entities: set[FrozenRelationNode],
933
+ exclude_processor_relations: bool,
934
+ ) -> list[nodereader_pb2.GraphSearchResponse]:
935
+ graph_query = nodereader_pb2.GraphSearchRequest(
936
+ kind=nodereader_pb2.GraphSearchRequest.QueryKind.PATH, top_k=100
937
+ )
938
+
939
+ # Explore starting from some entities
940
+ query_to_explore = nodereader_pb2.GraphQuery.PathQuery()
941
+ for entity in entities_to_explore:
942
+ entity_query = nodereader_pb2.GraphQuery.PathQuery()
943
+ entity_query.path.source.node_type = entity.ntype
944
+ entity_query.path.source.node_subtype = entity.subtype
945
+ entity_query.path.source.value = entity.value
946
+ entity_query.path.undirected = True
947
+ query_to_explore.bool_or.operands.append(entity_query)
948
+ graph_query.query.path.bool_and.operands.append(query_to_explore)
949
+
950
+ # Do not return already known entities
951
+ if explored_entities:
952
+ query_exclude_explored = nodereader_pb2.GraphQuery.PathQuery()
953
+ for explored in explored_entities:
954
+ entity_query = nodereader_pb2.GraphQuery.PathQuery()
955
+ entity_query.path.source.node_type = explored.ntype
956
+ entity_query.path.source.node_subtype = explored.subtype
957
+ entity_query.path.source.value = explored.value
958
+ entity_query.path.undirected = True
959
+ query_exclude_explored.bool_not.bool_or.operands.append(entity_query)
960
+ graph_query.query.path.bool_and.operands.append(query_exclude_explored)
961
+
962
+ # Only include relations between entities
963
+ only_entities = nodereader_pb2.GraphQuery.PathQuery()
964
+ only_entities.path.relation.relation_type = Relation.RelationType.ENTITY
965
+ graph_query.query.path.bool_and.operands.append(only_entities)
966
+
967
+ # Exclude processor entities
968
+ if exclude_processor_relations:
969
+ exclude_processor = nodereader_pb2.GraphQuery.PathQuery()
970
+ exclude_processor.facet.facet = "/g"
971
+ graph_query.query.path.bool_and.operands.append(exclude_processor)
972
+
973
+ (relations_results, _) = await nidx_query(kbid, Method.GRAPH, graph_query, timeout=5.0)
974
+ return relations_results
@@ -48,7 +48,6 @@ from nucliadb.search.search.fetch import (
48
48
  from nucliadb.search.search.query_parser.models import FulltextQuery, UnitRetrieval
49
49
  from nucliadb_models.common import FieldTypeName
50
50
  from nucliadb_models.labels import translate_system_to_alias_label
51
- from nucliadb_models.metadata import RelationType
52
51
  from nucliadb_models.resource import ExtractedDataTypeName
53
52
  from nucliadb_models.search import (
54
53
  DirectionalRelation,
@@ -93,6 +92,15 @@ def relation_node_type_to_entity_type(node_type: RelationNode.NodeType.ValueType
93
92
  }[node_type]
94
93
 
95
94
 
95
+ def entity_type_to_relation_node_type(node_type: EntityType) -> RelationNode.NodeType.ValueType:
96
+ return {
97
+ EntityType.ENTITY: RelationNode.NodeType.ENTITY,
98
+ EntityType.LABEL: RelationNode.NodeType.LABEL,
99
+ EntityType.RESOURCE: RelationNode.NodeType.RESOURCE,
100
+ EntityType.USER: RelationNode.NodeType.USER,
101
+ }[node_type]
102
+
103
+
96
104
  def sort_results_by_score(results: Union[list[ParagraphResult], list[DocumentResult]]):
97
105
  results.sort(key=lambda x: (x.score.bm25, x.score.booster), reverse=True)
98
106
 
@@ -442,18 +450,10 @@ async def merge_relations_results(
442
450
  graph_responses: list[GraphSearchResponse],
443
451
  query_entry_points: Iterable[RelationNode],
444
452
  only_with_metadata: bool = False,
445
- only_agentic: bool = False,
446
- only_entity_to_entity: bool = False,
447
453
  ) -> Relations:
448
454
  loop = asyncio.get_event_loop()
449
455
  return await loop.run_in_executor(
450
- None,
451
- _merge_relations_results,
452
- graph_responses,
453
- query_entry_points,
454
- only_with_metadata,
455
- only_agentic,
456
- only_entity_to_entity,
456
+ None, _merge_relations_results, graph_responses, query_entry_points, only_with_metadata
457
457
  )
458
458
 
459
459
 
@@ -461,21 +461,8 @@ def _merge_relations_results(
461
461
  graph_responses: list[GraphSearchResponse],
462
462
  query_entry_points: Iterable[RelationNode],
463
463
  only_with_metadata: bool,
464
- only_agentic: bool,
465
- only_entity_to_entity: bool,
466
464
  ) -> Relations:
467
- """Merge relation search responses into a single Relations object while applying filters.
468
-
469
- - When `only_with_metadata` is enabled, only include paths with metadata
470
- (this can include paragraph_id and entity positions among other things)
471
-
472
- - When `only_agentic` is enabled, ony include relations extracted by a Graph
473
- Extraction Agent
474
-
475
- - When `only_entity_to_entity` is enabled, only include relations between
476
- nodes with type ENTITY
477
-
478
- """
465
+ """Merge relation search responses into a single Relations object while applying filters."""
479
466
  relations = Relations(entities={})
480
467
 
481
468
  for entry_point in query_entry_points:
@@ -492,18 +479,9 @@ def _merge_relations_results(
492
479
  if path.resource_field_id is not None:
493
480
  resource_id = path.resource_field_id.split("/")[0]
494
481
 
495
- # If only_with_metadata is True, we check that metadata for the relation is not None
496
- # If only_agentic is True, we check that metadata for the relation is not None and that it has a data_augmentation_task_id
497
- # TODO: This is suboptimal, we should be able to filter this in the query to the index,
498
482
  if only_with_metadata and not metadata:
499
483
  continue
500
484
 
501
- if only_agentic and (not metadata or not metadata.data_augmentation_task_id):
502
- continue
503
-
504
- if only_entity_to_entity and relation_type != RelationType.ENTITY:
505
- continue
506
-
507
485
  if origin.value in relations.entities:
508
486
  relations.entities[origin.value].related_to.append(
509
487
  DirectionalRelation(
@@ -273,6 +273,10 @@ class _Converter:
273
273
 
274
274
 
275
275
  def is_incomplete(retrieval: UnitRetrieval) -> bool:
276
+ """
277
+ Return true if the retrieval had the semantic feature on but the query endpoint
278
+ did not return the vector in the response.
279
+ """
276
280
  if retrieval.query.semantic is None:
277
281
  return False
278
282
  incomplete = retrieval.query.semantic.query is None or len(retrieval.query.semantic.query) == 0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nucliadb
3
- Version: 6.4.0.post4293
3
+ Version: 6.4.0.post4313
4
4
  Summary: NucliaDB
5
5
  Author-email: Nuclia <nucliadb@nuclia.com>
6
6
  License: AGPL
@@ -20,11 +20,11 @@ Classifier: Programming Language :: Python :: 3.12
20
20
  Classifier: Programming Language :: Python :: 3 :: Only
21
21
  Requires-Python: <4,>=3.9
22
22
  Description-Content-Type: text/markdown
23
- Requires-Dist: nucliadb-telemetry[all]>=6.4.0.post4293
24
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.4.0.post4293
25
- Requires-Dist: nucliadb-protos>=6.4.0.post4293
26
- Requires-Dist: nucliadb-models>=6.4.0.post4293
27
- Requires-Dist: nidx-protos>=6.4.0.post4293
23
+ Requires-Dist: nucliadb-telemetry[all]>=6.4.0.post4313
24
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.4.0.post4313
25
+ Requires-Dist: nucliadb-protos>=6.4.0.post4313
26
+ Requires-Dist: nucliadb-models>=6.4.0.post4313
27
+ Requires-Dist: nidx-protos>=6.4.0.post4313
28
28
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
29
29
  Requires-Dist: nuclia-models>=0.24.2
30
30
  Requires-Dist: uvicorn[standard]
@@ -179,7 +179,7 @@ nucliadb/models/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,8
179
179
  nucliadb/models/responses.py,sha256=qnuOoc7TrVSUnpikfTwHLKez47_DE4mSFzpxrwtqijA,1599
180
180
  nucliadb/models/internal/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
181
181
  nucliadb/models/internal/processing.py,sha256=bzPr-hXliY81zMUgG-PDyDiFKP7Xbs71s2d0SIAu4Do,4090
182
- nucliadb/purge/__init__.py,sha256=UXbto56EWYLwZj6uEc-flQVe3gDDNFtM6EV-aIkryPU,12353
182
+ nucliadb/purge/__init__.py,sha256=lZE7_FQMVz2rWiwRYrtKpAjVoO6tbnzTYofQbsGUqos,13118
183
183
  nucliadb/purge/orphan_shards.py,sha256=fcP37QoFNjS6q2XozLQImY1swC_EmHeNhAJwLvEkOww,7769
184
184
  nucliadb/reader/__init__.py,sha256=C5Efic7WlGm2U2C5WOyquMFbIj2Pojwe_8mwzVYnOzE,1304
185
185
  nucliadb/reader/app.py,sha256=Se-BFTE6d1v1msLzQn4q5XIhjnSxa2ckDSHdvm7NRf8,3096
@@ -216,34 +216,34 @@ nucliadb/search/api/v1/ask.py,sha256=b4tz33HNsfT5DXv_2DMc_jirnFsHuobreWkbAKkzj5o
216
216
  nucliadb/search/api/v1/catalog.py,sha256=W0cPWuC27Y4bO7Ifl1VQp8OPYfF5gv5yeWZBsuJMxUU,7721
217
217
  nucliadb/search/api/v1/feedback.py,sha256=kNLc4dHz2SXHzV0PwC1WiRAwY88fDptPcP-kO0q-FrQ,2620
218
218
  nucliadb/search/api/v1/find.py,sha256=C4sTGFRS9tQFF8v1zhnHQvnExJoGDYi78bZTRfwhGrc,10831
219
- nucliadb/search/api/v1/graph.py,sha256=ItVpzJbqfDLjoIo2fTb2mKGCM1Z34sx7CBb3gNmj6IQ,4274
219
+ nucliadb/search/api/v1/graph.py,sha256=Km_ysePnhaEahdYp0gaF-234FHliB8LdUpfGOnqZ0rc,4265
220
220
  nucliadb/search/api/v1/knowledgebox.py,sha256=e9xeLPUqnQTx33i4A8xuV93ENvtJGrpjPlLRbGJtAI8,8415
221
221
  nucliadb/search/api/v1/predict_proxy.py,sha256=Q03ZTvWp7Sq0x71t5Br4LHxTiYsRd6-GCb4YuKqhynM,3131
222
222
  nucliadb/search/api/v1/router.py,sha256=mtT07rBZcVfpa49doaw9b1tj3sdi3qLH0gn9Io6NYM0,988
223
- nucliadb/search/api/v1/search.py,sha256=Or-mUvmBAyh0Y55NqTYNXe_BWR0lLLaTSL2ChjJaE2M,12402
224
- nucliadb/search/api/v1/suggest.py,sha256=Em7ApddZNHMHjL_ZfXmUIVUk504f58J96JlxJXnIxaM,6438
223
+ nucliadb/search/api/v1/search.py,sha256=bp2JfBO_wiPl7vG3-MXJfqdFfIGwJM3L25UqqGWj4V4,12304
224
+ nucliadb/search/api/v1/suggest.py,sha256=GJ7DveD6c9_h0m6NbI7IAvfO2j82TtrGuLg6UF3GBh4,6350
225
225
  nucliadb/search/api/v1/summarize.py,sha256=VAHJvE6V3xUgEBfqNKhgoxmDqCvh30RnrEIBVhMcNLU,2499
226
226
  nucliadb/search/api/v1/utils.py,sha256=5Ve-frn7LAE2jqAgB85F8RSeqxDlyA08--gS-AdOLS4,1434
227
227
  nucliadb/search/api/v1/resource/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
228
228
  nucliadb/search/api/v1/resource/ask.py,sha256=nsVzBSanSSlf0Ody6LSTjdEy75Vg283_YhbkAtWEjh8,3637
229
229
  nucliadb/search/api/v1/resource/ingestion_agents.py,sha256=fqqRCd8Wc9GciS5P98lcnihvTKStsZYYtOU-T1bc-6E,4771
230
- nucliadb/search/api/v1/resource/search.py,sha256=sz8-aAg_ucc6dfJddWH_C0Om4PrKJhvvDmgKHBczyBI,5018
230
+ nucliadb/search/api/v1/resource/search.py,sha256=Gnn4CY5NO4AK5ZWwrSIRJqBDm16u8k0XtpUwDXEBeYY,4930
231
231
  nucliadb/search/api/v1/resource/utils.py,sha256=-NjZqAQtFEXKpIh8ui5S26ItnJ5rzmmG0BHxGSS9QPw,1141
232
232
  nucliadb/search/requesters/__init__.py,sha256=itSI7dtTwFP55YMX4iK7JzdMHS5CQVUiB1XzQu4UBh8,833
233
- nucliadb/search/requesters/utils.py,sha256=o5JtXX5KrqMtUJo3u6rw9EOOKXPiw-GaF0oGuZu7PPc,6225
233
+ nucliadb/search/requesters/utils.py,sha256=Ne5fweSWk9hettQKyUZAMZrw_MTjPE5W_EVqj4p5XiI,6109
234
234
  nucliadb/search/search/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
235
235
  nucliadb/search/search/cache.py,sha256=-6l3i2Qi8ig2SM_FCgOLIaQ48XVj7L5ctd5PdQRY5mY,4458
236
236
  nucliadb/search/search/cut.py,sha256=ytY0_GY7ocNjfxTb4aosxEp4ZfhQNDP--JkhEMGD298,1153
237
237
  nucliadb/search/search/exceptions.py,sha256=klGLgAGGrXcSGix_W6418ZBMqDchAIGjN77ofkOScEI,1039
238
238
  nucliadb/search/search/fetch.py,sha256=eiljOKim-4OOEZn-3fyVZSYxztCH156BXYdqlIwVdN4,6181
239
239
  nucliadb/search/search/filters.py,sha256=1MkHlJjAQqoRCj7e5cEzK2HvBxGLE17I_omsjiklbtw,6476
240
- nucliadb/search/search/find.py,sha256=i1auc8visRakBwbbZGhyQgXNAmsaAVheisYi2xGjdKY,7925
240
+ nucliadb/search/search/find.py,sha256=ZocoQNN28OHOmMaroGVFCnce3YHPZbFb1-9jxLNHSFM,7805
241
241
  nucliadb/search/search/find_merge.py,sha256=c-7IlfjfdmWAvQOyM7IO3bKS1EQpnR4oi6pN6mwrQKw,19815
242
242
  nucliadb/search/search/graph_merge.py,sha256=y5V7X-BhjHsKDXE69tzQLIIKGm4XuaFrZXw0odcHVNM,3402
243
- nucliadb/search/search/graph_strategy.py,sha256=zYfi1df982ZYOFtYSksnHEJvQn-ZZsCIFSruVZP_934,32891
243
+ nucliadb/search/search/graph_strategy.py,sha256=LtPWGVL0RzxUgDLjrYgoQdZFmPBbln1fUsmXM1z5krs,35941
244
244
  nucliadb/search/search/hydrator.py,sha256=-R37gCrGxkyaiHQalnTWHNG_FCx11Zucd7qA1vQCxuw,6985
245
245
  nucliadb/search/search/ingestion_agents.py,sha256=NeJr4EEX-bvFFMGvXOOwLv8uU7NuQ-ntJnnrhnKfMzY,3174
246
- nucliadb/search/search/merge.py,sha256=Abg9YblQJvH2jDvXVT45MNxaIpNa7TTpsiUSJqb3NDc,23307
246
+ nucliadb/search/search/merge.py,sha256=XiRBsxhYPshPV7lZXD-9E259KZOPIf4I2tKosY0lPo4,22470
247
247
  nucliadb/search/search/metrics.py,sha256=3I6IN0qDSmqIvUaWJmT3rt-Jyjs6LcvnKI8ZqCiuJPY,3501
248
248
  nucliadb/search/search/paragraphs.py,sha256=pNAEiYqJGGUVcEf7xf-PFMVqz0PX4Qb-WNG-_zPGN2o,7799
249
249
  nucliadb/search/search/pgcatalog.py,sha256=s_J98fsX_RuFXwpejpkGqG-tD9ELuzz4YQ6U3ew5h2g,9313
@@ -259,7 +259,7 @@ nucliadb/search/search/chat/ask.py,sha256=aaNj0MeAbx9dyeKpQJdm3VsHMq9OmcCESxahbg
259
259
  nucliadb/search/search/chat/exceptions.py,sha256=Siy4GXW2L7oPhIR86H3WHBhE9lkV4A4YaAszuGGUf54,1356
260
260
  nucliadb/search/search/chat/images.py,sha256=PA8VWxT5_HUGfW1ULhKTK46UBsVyINtWWqEM1ulzX1E,3095
261
261
  nucliadb/search/search/chat/prompt.py,sha256=e8C7_MPr6Cn3nJHA4hWpeW3629KVI1ZUQA_wZf9Kiu4,48503
262
- nucliadb/search/search/chat/query.py,sha256=6v6twBUTWfUUzklVV6xqJSYPkAshnIrBH9wbTcjQvkI,17063
262
+ nucliadb/search/search/chat/query.py,sha256=3jMPNbiFEOoS0ydMOPYkSx1qVlvAv51npzadWXDwkMs,16650
263
263
  nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
264
264
  nucliadb/search/search/query_parser/exceptions.py,sha256=szAOXUZ27oNY-OSa9t2hQ5HHkQQC0EX1FZz_LluJHJE,1224
265
265
  nucliadb/search/search/query_parser/fetcher.py,sha256=SkvBRDfSKmuz-QygNKLAU4AhZhhDo1dnOZmt1zA28RA,16851
@@ -273,7 +273,7 @@ nucliadb/search/search/query_parser/parsers/common.py,sha256=o3028wUnK78lOmFK0jt
273
273
  nucliadb/search/search/query_parser/parsers/find.py,sha256=Fo4lXOnCbP0AKEc1mKLNINJBv63B4DPlix0vlhyesck,12717
274
274
  nucliadb/search/search/query_parser/parsers/graph.py,sha256=lDRJO_JvOe7yytNgXZyMogyPMgB5xc8obNY2kqz3yGU,9405
275
275
  nucliadb/search/search/query_parser/parsers/search.py,sha256=yEebeMOXJza7HMK3TdIPO6UGQbe79maSDg-GgohQIMk,10517
276
- nucliadb/search/search/query_parser/parsers/unit_retrieval.py,sha256=bcSvF2mW6IHFAs7_yA6TePw0zVtk9CmEA3j6xkhkDO8,11328
276
+ nucliadb/search/search/query_parser/parsers/unit_retrieval.py,sha256=rW3YHDWLkI2Hhznl_1oOMhC01bwZMAjv-Wu3iHPIaiU,11475
277
277
  nucliadb/standalone/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
278
278
  nucliadb/standalone/api_router.py,sha256=hgq9FXpihzgjHkwcVGfGCSwyXy67fqXTfLFHuINzIi0,5567
279
279
  nucliadb/standalone/app.py,sha256=mAApNK_iVsQgJyd-mtwCeZq5csSimwnXmlQGH9a70pE,5586
@@ -368,8 +368,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
368
368
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
369
369
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
370
370
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
371
- nucliadb-6.4.0.post4293.dist-info/METADATA,sha256=-Dsk2JpP3WFTtfqDx92QjQqjbRhNV-wdkpcn14t1ttg,4223
372
- nucliadb-6.4.0.post4293.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
373
- nucliadb-6.4.0.post4293.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
374
- nucliadb-6.4.0.post4293.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
375
- nucliadb-6.4.0.post4293.dist-info/RECORD,,
371
+ nucliadb-6.4.0.post4313.dist-info/METADATA,sha256=-Mp65qW_udL5EUZsp6CwEysmjbvuTY3u6Qbn0Bc3epI,4223
372
+ nucliadb-6.4.0.post4313.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
373
+ nucliadb-6.4.0.post4313.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
374
+ nucliadb-6.4.0.post4313.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
375
+ nucliadb-6.4.0.post4313.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.4.0)
2
+ Generator: setuptools (80.7.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5