nucliadb 6.3.4.post3812__py3-none-any.whl → 6.3.4.post3821__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucliadb/common/datamanagers/resources.py +2 -1
- nucliadb/ingest/orm/entities.py +14 -17
- nucliadb/search/search/graph_strategy.py +25 -16
- nucliadb/search/search/merge.py +0 -13
- nucliadb/search/search/query_parser/parsers/graph.py +5 -4
- {nucliadb-6.3.4.post3812.dist-info → nucliadb-6.3.4.post3821.dist-info}/METADATA +6 -6
- {nucliadb-6.3.4.post3812.dist-info → nucliadb-6.3.4.post3821.dist-info}/RECORD +10 -10
- {nucliadb-6.3.4.post3812.dist-info → nucliadb-6.3.4.post3821.dist-info}/WHEEL +0 -0
- {nucliadb-6.3.4.post3812.dist-info → nucliadb-6.3.4.post3821.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.3.4.post3812.dist-info → nucliadb-6.3.4.post3821.dist-info}/top_level.txt +0 -0
@@ -103,7 +103,8 @@ async def modify_slug(txn: Transaction, *, kbid: str, rid: str, new_slug: str) -
|
|
103
103
|
async def get_resource_shard_id(
|
104
104
|
txn: Transaction, *, kbid: str, rid: str, for_update: bool = False
|
105
105
|
) -> Optional[str]:
|
106
|
-
|
106
|
+
key = KB_RESOURCE_SHARD.format(kbid=kbid, uuid=rid)
|
107
|
+
shard = await txn.get(key, for_update=for_update)
|
107
108
|
if shard is not None:
|
108
109
|
return shard.decode()
|
109
110
|
else:
|
nucliadb/ingest/orm/entities.py
CHANGED
@@ -36,7 +36,7 @@ from nucliadb.common.datamanagers.entities import (
|
|
36
36
|
from nucliadb.common.maindb.driver import Transaction
|
37
37
|
from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
|
38
38
|
from nucliadb.ingest.settings import settings
|
39
|
-
from nucliadb.search.search.shards import query_shard
|
39
|
+
from nucliadb.search.search.shards import graph_search_shard, query_shard
|
40
40
|
from nucliadb_protos.knowledgebox_pb2 import (
|
41
41
|
DeletedEntitiesGroups,
|
42
42
|
EntitiesGroup,
|
@@ -45,9 +45,8 @@ from nucliadb_protos.knowledgebox_pb2 import (
|
|
45
45
|
)
|
46
46
|
from nucliadb_protos.nodereader_pb2 import (
|
47
47
|
Faceted,
|
48
|
-
|
49
|
-
|
50
|
-
RelationSearchResponse,
|
48
|
+
GraphSearchRequest,
|
49
|
+
GraphSearchResponse,
|
51
50
|
SearchRequest,
|
52
51
|
SearchResponse,
|
53
52
|
)
|
@@ -203,18 +202,16 @@ class EntitiesManager:
|
|
203
202
|
async def get_indexed_entities_group(self, group: str) -> Optional[EntitiesGroup]:
|
204
203
|
shard_manager = get_shard_manager()
|
205
204
|
|
206
|
-
async def do_entities_search(node: AbstractIndexNode, shard_id: str) ->
|
207
|
-
request =
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
response = await query_shard(node, shard_id, request)
|
217
|
-
return response.relation
|
205
|
+
async def do_entities_search(node: AbstractIndexNode, shard_id: str) -> GraphSearchResponse:
|
206
|
+
request = GraphSearchRequest()
|
207
|
+
# XXX: this is a wild guess. Are those enough or too many?
|
208
|
+
request.top_k = 500
|
209
|
+
request.kind = GraphSearchRequest.QueryKind.NODES
|
210
|
+
request.query.path.path.source.node_type = RelationNode.NodeType.ENTITY
|
211
|
+
request.query.path.path.source.node_subtype = group
|
212
|
+
request.query.path.path.undirected = True
|
213
|
+
response = await graph_search_shard(node, shard_id, request)
|
214
|
+
return response
|
218
215
|
|
219
216
|
results = await shard_manager.apply_for_all_shards(
|
220
217
|
self.kbid,
|
@@ -224,7 +221,7 @@ class EntitiesManager:
|
|
224
221
|
|
225
222
|
entities = {}
|
226
223
|
for result in results:
|
227
|
-
entities.update({node.value: Entity(value=node.value) for node in result.
|
224
|
+
entities.update({node.value: Entity(value=node.value) for node in result.nodes})
|
228
225
|
|
229
226
|
if not entities:
|
230
227
|
return None
|
@@ -44,7 +44,6 @@ from nucliadb.search.search.find_merge import (
|
|
44
44
|
hydrate_and_rerank,
|
45
45
|
)
|
46
46
|
from nucliadb.search.search.hydrator import ResourceHydrationOptions, TextBlockHydrationOptions
|
47
|
-
from nucliadb.search.search.merge import merge_relation_prefix_results
|
48
47
|
from nucliadb.search.search.metrics import RAGMetrics
|
49
48
|
from nucliadb.search.search.rerankers import Reranker, RerankingOptions
|
50
49
|
from nucliadb.search.utilities import get_predict
|
@@ -65,6 +64,7 @@ from nucliadb_models.search import (
|
|
65
64
|
NucliaDBClientType,
|
66
65
|
QueryEntityDetection,
|
67
66
|
RelatedEntities,
|
67
|
+
RelatedEntity,
|
68
68
|
RelationDirection,
|
69
69
|
RelationRanking,
|
70
70
|
Relations,
|
@@ -443,26 +443,35 @@ async def fuzzy_search_entities(
|
|
443
443
|
) -> Optional[RelatedEntities]:
|
444
444
|
"""Fuzzy find entities in KB given a query using the same methodology as /suggest, but split by words."""
|
445
445
|
|
446
|
-
|
447
|
-
|
446
|
+
# Build an OR for each word in the query matching with fuzzy any word in any
|
447
|
+
# node in any position. I.e., for the query "Rose Hamiltn", it'll match
|
448
|
+
# "Rosa Parks" and "Margaret Hamilton"
|
449
|
+
request = nodereader_pb2.GraphSearchRequest()
|
450
|
+
# XXX Are those enough results? Too many?
|
451
|
+
request.top_k = 50
|
452
|
+
request.kind = nodereader_pb2.GraphSearchRequest.QueryKind.NODES
|
453
|
+
for word in query.split():
|
454
|
+
subquery = nodereader_pb2.GraphQuery.PathQuery()
|
455
|
+
subquery.path.source.value = word
|
456
|
+
subquery.path.source.fuzzy.kind = nodereader_pb2.GraphQuery.Node.MatchLocation.WORDS
|
457
|
+
subquery.path.source.fuzzy.distance = 1
|
458
|
+
subquery.path.undirected = True
|
459
|
+
request.query.path.bool_or.operands.append(subquery)
|
448
460
|
|
449
|
-
results: list[nodereader_pb2.SearchResponse]
|
450
461
|
try:
|
451
|
-
(
|
452
|
-
|
453
|
-
|
454
|
-
_,
|
455
|
-
) = await node_query(
|
456
|
-
kbid,
|
457
|
-
Method.SEARCH,
|
458
|
-
request,
|
459
|
-
)
|
460
|
-
return merge_relation_prefix_results(results)
|
461
|
-
except Exception as e:
|
462
|
-
capture_exception(e)
|
462
|
+
results, _, _ = await node_query(kbid, Method.GRAPH, request)
|
463
|
+
except Exception as exc:
|
464
|
+
capture_exception(exc)
|
463
465
|
logger.exception("Error in finding entities in query for graph strategy")
|
464
466
|
return None
|
465
467
|
|
468
|
+
# merge shard results while deduplicating repeated entities across shards
|
469
|
+
unique_entities: set[RelatedEntity] = set()
|
470
|
+
for response in results:
|
471
|
+
unique_entities.update((RelatedEntity(family=e.subtype, value=e.value) for e in response.nodes))
|
472
|
+
|
473
|
+
return RelatedEntities(entities=list(unique_entities), total=len(unique_entities))
|
474
|
+
|
466
475
|
|
467
476
|
async def rank_relations_reranker(
|
468
477
|
relations: Relations,
|
nucliadb/search/search/merge.py
CHANGED
@@ -632,19 +632,6 @@ async def merge_suggest_entities_results(
|
|
632
632
|
return RelatedEntities(entities=list(unique_entities), total=len(unique_entities))
|
633
633
|
|
634
634
|
|
635
|
-
def merge_relation_prefix_results(
|
636
|
-
responses: list[SearchResponse],
|
637
|
-
) -> RelatedEntities:
|
638
|
-
unique_entities: Set[RelatedEntity] = set()
|
639
|
-
for response in responses:
|
640
|
-
response_entities = (
|
641
|
-
RelatedEntity(family=e.subtype, value=e.value) for e in response.relation.prefix.nodes
|
642
|
-
)
|
643
|
-
unique_entities.update(response_entities)
|
644
|
-
|
645
|
-
return RelatedEntities(entities=list(unique_entities), total=len(unique_entities))
|
646
|
-
|
647
|
-
|
648
635
|
async def merge_suggest_results(
|
649
636
|
suggest_responses: list[SuggestResponse],
|
650
637
|
kbid: str,
|
@@ -221,11 +221,12 @@ def _parse_relation_query(
|
|
221
221
|
def _set_node_to_pb(node: graph_requests.GraphNode, pb: nodereader_pb2.GraphQuery.Node):
|
222
222
|
if node.value is not None:
|
223
223
|
pb.value = node.value
|
224
|
-
if node.match == graph_requests.
|
225
|
-
pb.
|
224
|
+
if node.match == graph_requests.NodeMatchKindName.EXACT:
|
225
|
+
pb.exact.kind = nodereader_pb2.GraphQuery.Node.MatchLocation.FULL
|
226
226
|
|
227
|
-
elif node.match == graph_requests.
|
228
|
-
pb.
|
227
|
+
elif node.match == graph_requests.NodeMatchKindName.FUZZY:
|
228
|
+
pb.fuzzy.kind = nodereader_pb2.GraphQuery.Node.MatchLocation.PREFIX
|
229
|
+
pb.fuzzy.distance = 1
|
229
230
|
|
230
231
|
else: # pragma: nocover
|
231
232
|
# This is a trick so mypy generates an error if this branch can be reached,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: nucliadb
|
3
|
-
Version: 6.3.4.
|
3
|
+
Version: 6.3.4.post3821
|
4
4
|
Summary: NucliaDB
|
5
5
|
Author-email: Nuclia <nucliadb@nuclia.com>
|
6
6
|
License: AGPL
|
@@ -20,11 +20,11 @@ Classifier: Programming Language :: Python :: 3.12
|
|
20
20
|
Classifier: Programming Language :: Python :: 3 :: Only
|
21
21
|
Requires-Python: <4,>=3.9
|
22
22
|
Description-Content-Type: text/markdown
|
23
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.3.4.
|
24
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.4.
|
25
|
-
Requires-Dist: nucliadb-protos>=6.3.4.
|
26
|
-
Requires-Dist: nucliadb-models>=6.3.4.
|
27
|
-
Requires-Dist: nidx-protos>=6.3.4.
|
23
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.3.4.post3821
|
24
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.4.post3821
|
25
|
+
Requires-Dist: nucliadb-protos>=6.3.4.post3821
|
26
|
+
Requires-Dist: nucliadb-models>=6.3.4.post3821
|
27
|
+
Requires-Dist: nidx-protos>=6.3.4.post3821
|
28
28
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
29
29
|
Requires-Dist: nuclia-models>=0.24.2
|
30
30
|
Requires-Dist: uvicorn
|
@@ -78,7 +78,7 @@ nucliadb/common/datamanagers/fields.py,sha256=9KqBzTssAT68FR5hd17Xu_CSwAYdKFuYic
|
|
78
78
|
nucliadb/common/datamanagers/kb.py,sha256=P7EhF4tApIUG2jw_HH1oMufTKG9__kuOLKnrCNGbDM4,6156
|
79
79
|
nucliadb/common/datamanagers/labels.py,sha256=Zm0GQpSPoGXEEysUY7VsDIcyKSIIQsMVphj23IyM9_c,4502
|
80
80
|
nucliadb/common/datamanagers/processing.py,sha256=ByxdZzdbAfJGqC6__mY-zryjk040TyQfcUq3rxujeoY,1587
|
81
|
-
nucliadb/common/datamanagers/resources.py,sha256=
|
81
|
+
nucliadb/common/datamanagers/resources.py,sha256=VwFdCyHSnzMU3ASYRhC-wuCjCQEjOKEF7tIob4lTcPg,10793
|
82
82
|
nucliadb/common/datamanagers/rollover.py,sha256=BM1hJ2cEU91xekM5PtmnA0SN3i3w0WmodiyTpO8YZZs,7865
|
83
83
|
nucliadb/common/datamanagers/search_configurations.py,sha256=O-8eW43CE46GcxO6TB5hpi27NBguv4BL4SI1vLlN8os,2463
|
84
84
|
nucliadb/common/datamanagers/synonyms.py,sha256=zk3GEH38KF5vV_VcuL6DCg-2JwgXJfQl7Io6VPqv2cw,1566
|
@@ -141,7 +141,7 @@ nucliadb/ingest/fields/text.py,sha256=tFvSQJAe0W7ePpp2_WDfLiE2yglR1OTU0Zht9acvOF
|
|
141
141
|
nucliadb/ingest/orm/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
142
142
|
nucliadb/ingest/orm/brain.py,sha256=A8H1J7Bo95sNzDgYr0_UNoemQhWOFEFz9UlYfs6ug-8,29407
|
143
143
|
nucliadb/ingest/orm/broker_message.py,sha256=XWaiZgDOz94NPOPT-hqbRr5ZkpVimUw6PjUJNftfoVw,7514
|
144
|
-
nucliadb/ingest/orm/entities.py,sha256=
|
144
|
+
nucliadb/ingest/orm/entities.py,sha256=a-aYuKBUQhxDKFtXOzTAkLlY_t2JiTfaptw2vt3AQDQ,14915
|
145
145
|
nucliadb/ingest/orm/exceptions.py,sha256=k4Esv4NtL4TrGTcsQpwrSfDhPQpiYcRbB1SpYmBX5MY,1432
|
146
146
|
nucliadb/ingest/orm/knowledgebox.py,sha256=Bfb4-MIQWlaJrQAUDbgs_iIsXCYjS7s5YiiGl_Jb4jo,23887
|
147
147
|
nucliadb/ingest/orm/metrics.py,sha256=OkwMSPKLZcKba0ZTwtTiIxwBgaLMX5ydhGieKvi2y7E,1096
|
@@ -229,10 +229,10 @@ nucliadb/search/search/filters.py,sha256=1MkHlJjAQqoRCj7e5cEzK2HvBxGLE17I_omsjik
|
|
229
229
|
nucliadb/search/search/find.py,sha256=pjc-i55j_p_F1H0La4qc9slZSl_gICoFzl3fy6FLYTM,10314
|
230
230
|
nucliadb/search/search/find_merge.py,sha256=3FnzKFEnVemg6FO_6zveulbAU7klvsiPEBvLrpBBMg8,17450
|
231
231
|
nucliadb/search/search/graph_merge.py,sha256=OiUNiXOWwrUVKqStuRcoUJwvDbDYamqIgiAy_FwPdMI,3405
|
232
|
-
nucliadb/search/search/graph_strategy.py,sha256=
|
232
|
+
nucliadb/search/search/graph_strategy.py,sha256=SPJdDHQcTFsNb1IEWdWzklC5j1Vv9igibo0dYQAgcy0,33113
|
233
233
|
nucliadb/search/search/hydrator.py,sha256=-R37gCrGxkyaiHQalnTWHNG_FCx11Zucd7qA1vQCxuw,6985
|
234
234
|
nucliadb/search/search/ingestion_agents.py,sha256=NeJr4EEX-bvFFMGvXOOwLv8uU7NuQ-ntJnnrhnKfMzY,3174
|
235
|
-
nucliadb/search/search/merge.py,sha256=
|
235
|
+
nucliadb/search/search/merge.py,sha256=fh5WnA_xnXp-Iiq5Cud9hIGole7_0OW2b3Oymk32D6Y,22689
|
236
236
|
nucliadb/search/search/metrics.py,sha256=GGGtXHLhK79_ESV277xkBVjcaMURXHCxYG0EdGamUd8,2886
|
237
237
|
nucliadb/search/search/paragraphs.py,sha256=pNAEiYqJGGUVcEf7xf-PFMVqz0PX4Qb-WNG-_zPGN2o,7799
|
238
238
|
nucliadb/search/search/pgcatalog.py,sha256=s_J98fsX_RuFXwpejpkGqG-tD9ELuzz4YQ6U3ew5h2g,9313
|
@@ -258,7 +258,7 @@ nucliadb/search/search/query_parser/old_filters.py,sha256=-zbfN-RsXoj_DRjh3Lfp-w
|
|
258
258
|
nucliadb/search/search/query_parser/parsers/__init__.py,sha256=ySCNSdbesLXGZyR88919njulA6UE10_3PhqMG_Yj1o4,1034
|
259
259
|
nucliadb/search/search/query_parser/parsers/catalog.py,sha256=XdBiTweGTQkj8m_V_i2xbwp7P5pPO8K1Tud692XKhMw,7149
|
260
260
|
nucliadb/search/search/query_parser/parsers/find.py,sha256=q3wH_i0DGceeKckYEH3c5MqM5EvRiMCL7r-6nCAId9Q,4666
|
261
|
-
nucliadb/search/search/query_parser/parsers/graph.py,sha256=
|
261
|
+
nucliadb/search/search/query_parser/parsers/graph.py,sha256=QJs-pybNXPsMSEkIHctb0Q0xQG-aArks8BtUxbJL5rU,9386
|
262
262
|
nucliadb/standalone/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
263
263
|
nucliadb/standalone/api_router.py,sha256=hgq9FXpihzgjHkwcVGfGCSwyXy67fqXTfLFHuINzIi0,5567
|
264
264
|
nucliadb/standalone/app.py,sha256=mAApNK_iVsQgJyd-mtwCeZq5csSimwnXmlQGH9a70pE,5586
|
@@ -354,8 +354,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
|
|
354
354
|
nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
|
355
355
|
nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
|
356
356
|
nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
|
357
|
-
nucliadb-6.3.4.
|
358
|
-
nucliadb-6.3.4.
|
359
|
-
nucliadb-6.3.4.
|
360
|
-
nucliadb-6.3.4.
|
361
|
-
nucliadb-6.3.4.
|
357
|
+
nucliadb-6.3.4.post3821.dist-info/METADATA,sha256=I_IqkABVw9DZGcLJyK5KVtV7Tr4d8x1Ut3GX-Oncydo,4291
|
358
|
+
nucliadb-6.3.4.post3821.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
359
|
+
nucliadb-6.3.4.post3821.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
|
360
|
+
nucliadb-6.3.4.post3821.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
|
361
|
+
nucliadb-6.3.4.post3821.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|