nucliadb 6.3.4.post3812__py3-none-any.whl → 6.3.4.post3821__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -103,7 +103,8 @@ async def modify_slug(txn: Transaction, *, kbid: str, rid: str, new_slug: str) -
103
103
  async def get_resource_shard_id(
104
104
  txn: Transaction, *, kbid: str, rid: str, for_update: bool = False
105
105
  ) -> Optional[str]:
106
- shard = await txn.get(KB_RESOURCE_SHARD.format(kbid=kbid, uuid=rid, for_update=for_update))
106
+ key = KB_RESOURCE_SHARD.format(kbid=kbid, uuid=rid)
107
+ shard = await txn.get(key, for_update=for_update)
107
108
  if shard is not None:
108
109
  return shard.decode()
109
110
  else:
@@ -36,7 +36,7 @@ from nucliadb.common.datamanagers.entities import (
36
36
  from nucliadb.common.maindb.driver import Transaction
37
37
  from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
38
38
  from nucliadb.ingest.settings import settings
39
- from nucliadb.search.search.shards import query_shard
39
+ from nucliadb.search.search.shards import graph_search_shard, query_shard
40
40
  from nucliadb_protos.knowledgebox_pb2 import (
41
41
  DeletedEntitiesGroups,
42
42
  EntitiesGroup,
@@ -45,9 +45,8 @@ from nucliadb_protos.knowledgebox_pb2 import (
45
45
  )
46
46
  from nucliadb_protos.nodereader_pb2 import (
47
47
  Faceted,
48
- RelationNodeFilter,
49
- RelationPrefixSearchRequest,
50
- RelationSearchResponse,
48
+ GraphSearchRequest,
49
+ GraphSearchResponse,
51
50
  SearchRequest,
52
51
  SearchResponse,
53
52
  )
@@ -203,18 +202,16 @@ class EntitiesManager:
203
202
  async def get_indexed_entities_group(self, group: str) -> Optional[EntitiesGroup]:
204
203
  shard_manager = get_shard_manager()
205
204
 
206
- async def do_entities_search(node: AbstractIndexNode, shard_id: str) -> RelationSearchResponse:
207
- request = SearchRequest(
208
- shard=shard_id,
209
- relation_prefix=RelationPrefixSearchRequest(
210
- prefix="",
211
- node_filters=[
212
- RelationNodeFilter(node_type=RelationNode.NodeType.ENTITY, node_subtype=group)
213
- ],
214
- ),
215
- )
216
- response = await query_shard(node, shard_id, request)
217
- return response.relation
205
+ async def do_entities_search(node: AbstractIndexNode, shard_id: str) -> GraphSearchResponse:
206
+ request = GraphSearchRequest()
207
+ # XXX: this is a wild guess. Are those enough or too many?
208
+ request.top_k = 500
209
+ request.kind = GraphSearchRequest.QueryKind.NODES
210
+ request.query.path.path.source.node_type = RelationNode.NodeType.ENTITY
211
+ request.query.path.path.source.node_subtype = group
212
+ request.query.path.path.undirected = True
213
+ response = await graph_search_shard(node, shard_id, request)
214
+ return response
218
215
 
219
216
  results = await shard_manager.apply_for_all_shards(
220
217
  self.kbid,
@@ -224,7 +221,7 @@ class EntitiesManager:
224
221
 
225
222
  entities = {}
226
223
  for result in results:
227
- entities.update({node.value: Entity(value=node.value) for node in result.prefix.nodes})
224
+ entities.update({node.value: Entity(value=node.value) for node in result.nodes})
228
225
 
229
226
  if not entities:
230
227
  return None
@@ -44,7 +44,6 @@ from nucliadb.search.search.find_merge import (
44
44
  hydrate_and_rerank,
45
45
  )
46
46
  from nucliadb.search.search.hydrator import ResourceHydrationOptions, TextBlockHydrationOptions
47
- from nucliadb.search.search.merge import merge_relation_prefix_results
48
47
  from nucliadb.search.search.metrics import RAGMetrics
49
48
  from nucliadb.search.search.rerankers import Reranker, RerankingOptions
50
49
  from nucliadb.search.utilities import get_predict
@@ -65,6 +64,7 @@ from nucliadb_models.search import (
65
64
  NucliaDBClientType,
66
65
  QueryEntityDetection,
67
66
  RelatedEntities,
67
+ RelatedEntity,
68
68
  RelationDirection,
69
69
  RelationRanking,
70
70
  Relations,
@@ -443,26 +443,35 @@ async def fuzzy_search_entities(
443
443
  ) -> Optional[RelatedEntities]:
444
444
  """Fuzzy find entities in KB given a query using the same methodology as /suggest, but split by words."""
445
445
 
446
- request = nodereader_pb2.SearchRequest()
447
- request.relation_prefix.query = query
446
+ # Build an OR for each word in the query matching with fuzzy any word in any
447
+ # node in any position. I.e., for the query "Rose Hamiltn", it'll match
448
+ # "Rosa Parks" and "Margaret Hamilton"
449
+ request = nodereader_pb2.GraphSearchRequest()
450
+ # XXX Are those enough results? Too many?
451
+ request.top_k = 50
452
+ request.kind = nodereader_pb2.GraphSearchRequest.QueryKind.NODES
453
+ for word in query.split():
454
+ subquery = nodereader_pb2.GraphQuery.PathQuery()
455
+ subquery.path.source.value = word
456
+ subquery.path.source.fuzzy.kind = nodereader_pb2.GraphQuery.Node.MatchLocation.WORDS
457
+ subquery.path.source.fuzzy.distance = 1
458
+ subquery.path.undirected = True
459
+ request.query.path.bool_or.operands.append(subquery)
448
460
 
449
- results: list[nodereader_pb2.SearchResponse]
450
461
  try:
451
- (
452
- results,
453
- _,
454
- _,
455
- ) = await node_query(
456
- kbid,
457
- Method.SEARCH,
458
- request,
459
- )
460
- return merge_relation_prefix_results(results)
461
- except Exception as e:
462
- capture_exception(e)
462
+ results, _, _ = await node_query(kbid, Method.GRAPH, request)
463
+ except Exception as exc:
464
+ capture_exception(exc)
463
465
  logger.exception("Error in finding entities in query for graph strategy")
464
466
  return None
465
467
 
468
+ # merge shard results while deduplicating repeated entities across shards
469
+ unique_entities: set[RelatedEntity] = set()
470
+ for response in results:
471
+ unique_entities.update((RelatedEntity(family=e.subtype, value=e.value) for e in response.nodes))
472
+
473
+ return RelatedEntities(entities=list(unique_entities), total=len(unique_entities))
474
+
466
475
 
467
476
  async def rank_relations_reranker(
468
477
  relations: Relations,
@@ -632,19 +632,6 @@ async def merge_suggest_entities_results(
632
632
  return RelatedEntities(entities=list(unique_entities), total=len(unique_entities))
633
633
 
634
634
 
635
- def merge_relation_prefix_results(
636
- responses: list[SearchResponse],
637
- ) -> RelatedEntities:
638
- unique_entities: Set[RelatedEntity] = set()
639
- for response in responses:
640
- response_entities = (
641
- RelatedEntity(family=e.subtype, value=e.value) for e in response.relation.prefix.nodes
642
- )
643
- unique_entities.update(response_entities)
644
-
645
- return RelatedEntities(entities=list(unique_entities), total=len(unique_entities))
646
-
647
-
648
635
  async def merge_suggest_results(
649
636
  suggest_responses: list[SuggestResponse],
650
637
  kbid: str,
@@ -221,11 +221,12 @@ def _parse_relation_query(
221
221
  def _set_node_to_pb(node: graph_requests.GraphNode, pb: nodereader_pb2.GraphQuery.Node):
222
222
  if node.value is not None:
223
223
  pb.value = node.value
224
- if node.match == graph_requests.NodeMatchKind.EXACT:
225
- pb.match_kind = nodereader_pb2.GraphQuery.Node.MatchKind.DEPRECATED_EXACT
224
+ if node.match == graph_requests.NodeMatchKindName.EXACT:
225
+ pb.exact.kind = nodereader_pb2.GraphQuery.Node.MatchLocation.FULL
226
226
 
227
- elif node.match == graph_requests.NodeMatchKind.FUZZY:
228
- pb.match_kind = nodereader_pb2.GraphQuery.Node.MatchKind.DEPRECATED_FUZZY
227
+ elif node.match == graph_requests.NodeMatchKindName.FUZZY:
228
+ pb.fuzzy.kind = nodereader_pb2.GraphQuery.Node.MatchLocation.PREFIX
229
+ pb.fuzzy.distance = 1
229
230
 
230
231
  else: # pragma: nocover
231
232
  # This is a trick so mypy generates an error if this branch can be reached,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nucliadb
3
- Version: 6.3.4.post3812
3
+ Version: 6.3.4.post3821
4
4
  Summary: NucliaDB
5
5
  Author-email: Nuclia <nucliadb@nuclia.com>
6
6
  License: AGPL
@@ -20,11 +20,11 @@ Classifier: Programming Language :: Python :: 3.12
20
20
  Classifier: Programming Language :: Python :: 3 :: Only
21
21
  Requires-Python: <4,>=3.9
22
22
  Description-Content-Type: text/markdown
23
- Requires-Dist: nucliadb-telemetry[all]>=6.3.4.post3812
24
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.4.post3812
25
- Requires-Dist: nucliadb-protos>=6.3.4.post3812
26
- Requires-Dist: nucliadb-models>=6.3.4.post3812
27
- Requires-Dist: nidx-protos>=6.3.4.post3812
23
+ Requires-Dist: nucliadb-telemetry[all]>=6.3.4.post3821
24
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.4.post3821
25
+ Requires-Dist: nucliadb-protos>=6.3.4.post3821
26
+ Requires-Dist: nucliadb-models>=6.3.4.post3821
27
+ Requires-Dist: nidx-protos>=6.3.4.post3821
28
28
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
29
29
  Requires-Dist: nuclia-models>=0.24.2
30
30
  Requires-Dist: uvicorn
@@ -78,7 +78,7 @@ nucliadb/common/datamanagers/fields.py,sha256=9KqBzTssAT68FR5hd17Xu_CSwAYdKFuYic
78
78
  nucliadb/common/datamanagers/kb.py,sha256=P7EhF4tApIUG2jw_HH1oMufTKG9__kuOLKnrCNGbDM4,6156
79
79
  nucliadb/common/datamanagers/labels.py,sha256=Zm0GQpSPoGXEEysUY7VsDIcyKSIIQsMVphj23IyM9_c,4502
80
80
  nucliadb/common/datamanagers/processing.py,sha256=ByxdZzdbAfJGqC6__mY-zryjk040TyQfcUq3rxujeoY,1587
81
- nucliadb/common/datamanagers/resources.py,sha256=cuwcVL-GEjS1VHigJtz5SG_dRhl09UADQ9MSYOOqs70,10779
81
+ nucliadb/common/datamanagers/resources.py,sha256=VwFdCyHSnzMU3ASYRhC-wuCjCQEjOKEF7tIob4lTcPg,10793
82
82
  nucliadb/common/datamanagers/rollover.py,sha256=BM1hJ2cEU91xekM5PtmnA0SN3i3w0WmodiyTpO8YZZs,7865
83
83
  nucliadb/common/datamanagers/search_configurations.py,sha256=O-8eW43CE46GcxO6TB5hpi27NBguv4BL4SI1vLlN8os,2463
84
84
  nucliadb/common/datamanagers/synonyms.py,sha256=zk3GEH38KF5vV_VcuL6DCg-2JwgXJfQl7Io6VPqv2cw,1566
@@ -141,7 +141,7 @@ nucliadb/ingest/fields/text.py,sha256=tFvSQJAe0W7ePpp2_WDfLiE2yglR1OTU0Zht9acvOF
141
141
  nucliadb/ingest/orm/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
142
142
  nucliadb/ingest/orm/brain.py,sha256=A8H1J7Bo95sNzDgYr0_UNoemQhWOFEFz9UlYfs6ug-8,29407
143
143
  nucliadb/ingest/orm/broker_message.py,sha256=XWaiZgDOz94NPOPT-hqbRr5ZkpVimUw6PjUJNftfoVw,7514
144
- nucliadb/ingest/orm/entities.py,sha256=3_n6lKhBy2GsdmNmkh0_mvxP8md20OZsbtTNEmfJ8Hg,14888
144
+ nucliadb/ingest/orm/entities.py,sha256=a-aYuKBUQhxDKFtXOzTAkLlY_t2JiTfaptw2vt3AQDQ,14915
145
145
  nucliadb/ingest/orm/exceptions.py,sha256=k4Esv4NtL4TrGTcsQpwrSfDhPQpiYcRbB1SpYmBX5MY,1432
146
146
  nucliadb/ingest/orm/knowledgebox.py,sha256=Bfb4-MIQWlaJrQAUDbgs_iIsXCYjS7s5YiiGl_Jb4jo,23887
147
147
  nucliadb/ingest/orm/metrics.py,sha256=OkwMSPKLZcKba0ZTwtTiIxwBgaLMX5ydhGieKvi2y7E,1096
@@ -229,10 +229,10 @@ nucliadb/search/search/filters.py,sha256=1MkHlJjAQqoRCj7e5cEzK2HvBxGLE17I_omsjik
229
229
  nucliadb/search/search/find.py,sha256=pjc-i55j_p_F1H0La4qc9slZSl_gICoFzl3fy6FLYTM,10314
230
230
  nucliadb/search/search/find_merge.py,sha256=3FnzKFEnVemg6FO_6zveulbAU7klvsiPEBvLrpBBMg8,17450
231
231
  nucliadb/search/search/graph_merge.py,sha256=OiUNiXOWwrUVKqStuRcoUJwvDbDYamqIgiAy_FwPdMI,3405
232
- nucliadb/search/search/graph_strategy.py,sha256=gisL2GpbSIa_SucyOwEt7TWdqURyAQqxvD_-PkXQct8,32339
232
+ nucliadb/search/search/graph_strategy.py,sha256=SPJdDHQcTFsNb1IEWdWzklC5j1Vv9igibo0dYQAgcy0,33113
233
233
  nucliadb/search/search/hydrator.py,sha256=-R37gCrGxkyaiHQalnTWHNG_FCx11Zucd7qA1vQCxuw,6985
234
234
  nucliadb/search/search/ingestion_agents.py,sha256=NeJr4EEX-bvFFMGvXOOwLv8uU7NuQ-ntJnnrhnKfMzY,3174
235
- nucliadb/search/search/merge.py,sha256=bPO51Kc3Ec69UuCbF5ulokd02gWZhp7zlJSuMjGDo9Y,23141
235
+ nucliadb/search/search/merge.py,sha256=fh5WnA_xnXp-Iiq5Cud9hIGole7_0OW2b3Oymk32D6Y,22689
236
236
  nucliadb/search/search/metrics.py,sha256=GGGtXHLhK79_ESV277xkBVjcaMURXHCxYG0EdGamUd8,2886
237
237
  nucliadb/search/search/paragraphs.py,sha256=pNAEiYqJGGUVcEf7xf-PFMVqz0PX4Qb-WNG-_zPGN2o,7799
238
238
  nucliadb/search/search/pgcatalog.py,sha256=s_J98fsX_RuFXwpejpkGqG-tD9ELuzz4YQ6U3ew5h2g,9313
@@ -258,7 +258,7 @@ nucliadb/search/search/query_parser/old_filters.py,sha256=-zbfN-RsXoj_DRjh3Lfp-w
258
258
  nucliadb/search/search/query_parser/parsers/__init__.py,sha256=ySCNSdbesLXGZyR88919njulA6UE10_3PhqMG_Yj1o4,1034
259
259
  nucliadb/search/search/query_parser/parsers/catalog.py,sha256=XdBiTweGTQkj8m_V_i2xbwp7P5pPO8K1Tud692XKhMw,7149
260
260
  nucliadb/search/search/query_parser/parsers/find.py,sha256=q3wH_i0DGceeKckYEH3c5MqM5EvRiMCL7r-6nCAId9Q,4666
261
- nucliadb/search/search/query_parser/parsers/graph.py,sha256=43S8iSg4j9I_XD8zpW1VggDspQD-NMyS26B5Mw6Dktw,9358
261
+ nucliadb/search/search/query_parser/parsers/graph.py,sha256=QJs-pybNXPsMSEkIHctb0Q0xQG-aArks8BtUxbJL5rU,9386
262
262
  nucliadb/standalone/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
263
263
  nucliadb/standalone/api_router.py,sha256=hgq9FXpihzgjHkwcVGfGCSwyXy67fqXTfLFHuINzIi0,5567
264
264
  nucliadb/standalone/app.py,sha256=mAApNK_iVsQgJyd-mtwCeZq5csSimwnXmlQGH9a70pE,5586
@@ -354,8 +354,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
354
354
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
355
355
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
356
356
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
357
- nucliadb-6.3.4.post3812.dist-info/METADATA,sha256=yjF1rSCHEhQiCv7vEdwAPyTtjrWrsKJgae-L39Y_zE8,4291
358
- nucliadb-6.3.4.post3812.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
359
- nucliadb-6.3.4.post3812.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
360
- nucliadb-6.3.4.post3812.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
361
- nucliadb-6.3.4.post3812.dist-info/RECORD,,
357
+ nucliadb-6.3.4.post3821.dist-info/METADATA,sha256=I_IqkABVw9DZGcLJyK5KVtV7Tr4d8x1Ut3GX-Oncydo,4291
358
+ nucliadb-6.3.4.post3821.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
359
+ nucliadb-6.3.4.post3821.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
360
+ nucliadb-6.3.4.post3821.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
361
+ nucliadb-6.3.4.post3821.dist-info/RECORD,,