graphiti-core 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of graphiti-core might be problematic. Click here for more details.
- graphiti_core/edges.py +7 -10
- graphiti_core/embedder/__init__.py +4 -0
- graphiti_core/embedder/client.py +34 -0
- graphiti_core/embedder/openai.py +48 -0
- graphiti_core/embedder/voyage.py +47 -0
- graphiti_core/errors.py +8 -0
- graphiti_core/graphiti.py +24 -18
- graphiti_core/helpers.py +30 -0
- graphiti_core/llm_client/anthropic_client.py +0 -5
- graphiti_core/llm_client/client.py +0 -4
- graphiti_core/llm_client/config.py +0 -1
- graphiti_core/llm_client/groq_client.py +0 -5
- graphiti_core/llm_client/openai_client.py +0 -6
- graphiti_core/llm_client/utils.py +3 -7
- graphiti_core/nodes.py +7 -9
- graphiti_core/prompts/eval.py +90 -0
- graphiti_core/prompts/lib.py +6 -0
- graphiti_core/search/search.py +54 -49
- graphiti_core/search/search_utils.py +40 -146
- graphiti_core/utils/maintenance/community_operations.py +2 -1
- graphiti_core/utils/maintenance/graph_data_operations.py +17 -31
- {graphiti_core-0.3.5.dist-info → graphiti_core-0.3.7.dist-info}/METADATA +1 -1
- {graphiti_core-0.3.5.dist-info → graphiti_core-0.3.7.dist-info}/RECORD +25 -20
- {graphiti_core-0.3.5.dist-info → graphiti_core-0.3.7.dist-info}/LICENSE +0 -0
- {graphiti_core-0.3.5.dist-info → graphiti_core-0.3.7.dist-info}/WHEEL +0 -0
graphiti_core/search/search.py
CHANGED
|
@@ -14,6 +14,7 @@ See the License for the specific language governing permissions and
|
|
|
14
14
|
limitations under the License.
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
|
+
import asyncio
|
|
17
18
|
import logging
|
|
18
19
|
from collections import defaultdict
|
|
19
20
|
from time import time
|
|
@@ -21,8 +22,8 @@ from time import time
|
|
|
21
22
|
from neo4j import AsyncDriver
|
|
22
23
|
|
|
23
24
|
from graphiti_core.edges import EntityEdge
|
|
25
|
+
from graphiti_core.embedder import EmbedderClient
|
|
24
26
|
from graphiti_core.errors import SearchRerankerError
|
|
25
|
-
from graphiti_core.llm_client.config import EMBEDDING_DIM
|
|
26
27
|
from graphiti_core.nodes import CommunityNode, EntityNode
|
|
27
28
|
from graphiti_core.search.search_config import (
|
|
28
29
|
DEFAULT_SEARCH_LIMIT,
|
|
@@ -55,7 +56,7 @@ logger = logging.getLogger(__name__)
|
|
|
55
56
|
|
|
56
57
|
async def search(
|
|
57
58
|
driver: AsyncDriver,
|
|
58
|
-
embedder,
|
|
59
|
+
embedder: EmbedderClient,
|
|
59
60
|
query: str,
|
|
60
61
|
group_ids: list[str] | None,
|
|
61
62
|
config: SearchConfig,
|
|
@@ -65,32 +66,39 @@ async def search(
|
|
|
65
66
|
query = query.replace('\n', ' ')
|
|
66
67
|
# if group_ids is empty, set it to None
|
|
67
68
|
group_ids = group_ids if group_ids else None
|
|
68
|
-
edges = (
|
|
69
|
-
|
|
70
|
-
driver,
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
69
|
+
edges, nodes, communities = await asyncio.gather(
|
|
70
|
+
edge_search(
|
|
71
|
+
driver,
|
|
72
|
+
embedder,
|
|
73
|
+
query,
|
|
74
|
+
group_ids,
|
|
75
|
+
config.edge_config,
|
|
76
|
+
center_node_uuid,
|
|
77
|
+
config.limit,
|
|
78
|
+
),
|
|
79
|
+
node_search(
|
|
80
|
+
driver,
|
|
81
|
+
embedder,
|
|
82
|
+
query,
|
|
83
|
+
group_ids,
|
|
84
|
+
config.node_config,
|
|
85
|
+
center_node_uuid,
|
|
86
|
+
config.limit,
|
|
87
|
+
),
|
|
88
|
+
community_search(
|
|
89
|
+
driver,
|
|
90
|
+
embedder,
|
|
91
|
+
query,
|
|
92
|
+
group_ids,
|
|
93
|
+
config.community_config,
|
|
94
|
+
config.limit,
|
|
95
|
+
),
|
|
88
96
|
)
|
|
89
97
|
|
|
90
98
|
results = SearchResults(
|
|
91
|
-
edges=edges
|
|
92
|
-
nodes=nodes
|
|
93
|
-
communities=communities
|
|
99
|
+
edges=edges,
|
|
100
|
+
nodes=nodes,
|
|
101
|
+
communities=communities,
|
|
94
102
|
)
|
|
95
103
|
|
|
96
104
|
end = time()
|
|
@@ -102,13 +110,16 @@ async def search(
|
|
|
102
110
|
|
|
103
111
|
async def edge_search(
|
|
104
112
|
driver: AsyncDriver,
|
|
105
|
-
embedder,
|
|
113
|
+
embedder: EmbedderClient,
|
|
106
114
|
query: str,
|
|
107
115
|
group_ids: list[str] | None,
|
|
108
|
-
config: EdgeSearchConfig,
|
|
116
|
+
config: EdgeSearchConfig | None,
|
|
109
117
|
center_node_uuid: str | None = None,
|
|
110
118
|
limit=DEFAULT_SEARCH_LIMIT,
|
|
111
119
|
) -> list[EntityEdge]:
|
|
120
|
+
if config is None:
|
|
121
|
+
return []
|
|
122
|
+
|
|
112
123
|
search_results: list[list[EntityEdge]] = []
|
|
113
124
|
|
|
114
125
|
if EdgeSearchMethod.bm25 in config.search_methods:
|
|
@@ -116,11 +127,7 @@ async def edge_search(
|
|
|
116
127
|
search_results.append(text_search)
|
|
117
128
|
|
|
118
129
|
if EdgeSearchMethod.cosine_similarity in config.search_methods:
|
|
119
|
-
search_vector = (
|
|
120
|
-
(await embedder.create(input=[query], model='text-embedding-3-small'))
|
|
121
|
-
.data[0]
|
|
122
|
-
.embedding[:EMBEDDING_DIM]
|
|
123
|
-
)
|
|
130
|
+
search_vector = await embedder.create(input=[query])
|
|
124
131
|
|
|
125
132
|
similarity_search = await edge_similarity_search(
|
|
126
133
|
driver, search_vector, None, None, group_ids, 2 * limit
|
|
@@ -162,18 +169,21 @@ async def edge_search(
|
|
|
162
169
|
if config.reranker == EdgeReranker.episode_mentions:
|
|
163
170
|
reranked_edges.sort(reverse=True, key=lambda edge: len(edge.episodes))
|
|
164
171
|
|
|
165
|
-
return reranked_edges
|
|
172
|
+
return reranked_edges[:limit]
|
|
166
173
|
|
|
167
174
|
|
|
168
175
|
async def node_search(
|
|
169
176
|
driver: AsyncDriver,
|
|
170
|
-
embedder,
|
|
177
|
+
embedder: EmbedderClient,
|
|
171
178
|
query: str,
|
|
172
179
|
group_ids: list[str] | None,
|
|
173
|
-
config: NodeSearchConfig,
|
|
180
|
+
config: NodeSearchConfig | None,
|
|
174
181
|
center_node_uuid: str | None = None,
|
|
175
182
|
limit=DEFAULT_SEARCH_LIMIT,
|
|
176
183
|
) -> list[EntityNode]:
|
|
184
|
+
if config is None:
|
|
185
|
+
return []
|
|
186
|
+
|
|
177
187
|
search_results: list[list[EntityNode]] = []
|
|
178
188
|
|
|
179
189
|
if NodeSearchMethod.bm25 in config.search_methods:
|
|
@@ -181,11 +191,7 @@ async def node_search(
|
|
|
181
191
|
search_results.append(text_search)
|
|
182
192
|
|
|
183
193
|
if NodeSearchMethod.cosine_similarity in config.search_methods:
|
|
184
|
-
search_vector = (
|
|
185
|
-
(await embedder.create(input=[query], model='text-embedding-3-small'))
|
|
186
|
-
.data[0]
|
|
187
|
-
.embedding[:EMBEDDING_DIM]
|
|
188
|
-
)
|
|
194
|
+
search_vector = await embedder.create(input=[query])
|
|
189
195
|
|
|
190
196
|
similarity_search = await node_similarity_search(
|
|
191
197
|
driver, search_vector, group_ids, 2 * limit
|
|
@@ -212,17 +218,20 @@ async def node_search(
|
|
|
212
218
|
|
|
213
219
|
reranked_nodes = [node_uuid_map[uuid] for uuid in reranked_uuids]
|
|
214
220
|
|
|
215
|
-
return reranked_nodes
|
|
221
|
+
return reranked_nodes[:limit]
|
|
216
222
|
|
|
217
223
|
|
|
218
224
|
async def community_search(
|
|
219
225
|
driver: AsyncDriver,
|
|
220
|
-
embedder,
|
|
226
|
+
embedder: EmbedderClient,
|
|
221
227
|
query: str,
|
|
222
228
|
group_ids: list[str] | None,
|
|
223
|
-
config: CommunitySearchConfig,
|
|
229
|
+
config: CommunitySearchConfig | None,
|
|
224
230
|
limit=DEFAULT_SEARCH_LIMIT,
|
|
225
231
|
) -> list[CommunityNode]:
|
|
232
|
+
if config is None:
|
|
233
|
+
return []
|
|
234
|
+
|
|
226
235
|
search_results: list[list[CommunityNode]] = []
|
|
227
236
|
|
|
228
237
|
if CommunitySearchMethod.bm25 in config.search_methods:
|
|
@@ -230,11 +239,7 @@ async def community_search(
|
|
|
230
239
|
search_results.append(text_search)
|
|
231
240
|
|
|
232
241
|
if CommunitySearchMethod.cosine_similarity in config.search_methods:
|
|
233
|
-
search_vector = (
|
|
234
|
-
(await embedder.create(input=[query], model='text-embedding-3-small'))
|
|
235
|
-
.data[0]
|
|
236
|
-
.embedding[:EMBEDDING_DIM]
|
|
237
|
-
)
|
|
242
|
+
search_vector = await embedder.create(input=[query])
|
|
238
243
|
|
|
239
244
|
similarity_search = await community_similarity_search(
|
|
240
245
|
driver, search_vector, group_ids, 2 * limit
|
|
@@ -255,4 +260,4 @@ async def community_search(
|
|
|
255
260
|
|
|
256
261
|
reranked_communities = [community_uuid_map[uuid] for uuid in reranked_uuids]
|
|
257
262
|
|
|
258
|
-
return reranked_communities
|
|
263
|
+
return reranked_communities[:limit]
|
|
@@ -16,13 +16,13 @@ limitations under the License.
|
|
|
16
16
|
|
|
17
17
|
import asyncio
|
|
18
18
|
import logging
|
|
19
|
-
import re
|
|
20
19
|
from collections import defaultdict
|
|
21
20
|
from time import time
|
|
22
21
|
|
|
23
22
|
from neo4j import AsyncDriver, Query
|
|
24
23
|
|
|
25
24
|
from graphiti_core.edges import EntityEdge, get_entity_edge_from_record
|
|
25
|
+
from graphiti_core.helpers import lucene_sanitize
|
|
26
26
|
from graphiti_core.nodes import (
|
|
27
27
|
CommunityNode,
|
|
28
28
|
EntityNode,
|
|
@@ -36,6 +36,22 @@ logger = logging.getLogger(__name__)
|
|
|
36
36
|
RELEVANT_SCHEMA_LIMIT = 3
|
|
37
37
|
|
|
38
38
|
|
|
39
|
+
def fulltext_query(query: str, group_ids: list[str] | None = None):
|
|
40
|
+
group_ids_filter_list = (
|
|
41
|
+
[f'group_id:"{lucene_sanitize(g)}"' for g in group_ids] if group_ids is not None else []
|
|
42
|
+
)
|
|
43
|
+
group_ids_filter = ''
|
|
44
|
+
for f in group_ids_filter_list:
|
|
45
|
+
group_ids_filter += f if not group_ids_filter else f'OR {f}'
|
|
46
|
+
|
|
47
|
+
group_ids_filter += ' AND ' if group_ids_filter else ''
|
|
48
|
+
|
|
49
|
+
fuzzy_query = lucene_sanitize(query) + '~'
|
|
50
|
+
full_query = group_ids_filter + fuzzy_query
|
|
51
|
+
|
|
52
|
+
return full_query
|
|
53
|
+
|
|
54
|
+
|
|
39
55
|
async def get_mentioned_nodes(
|
|
40
56
|
driver: AsyncDriver, episodes: list[EpisodicNode]
|
|
41
57
|
) -> list[EntityNode]:
|
|
@@ -91,11 +107,15 @@ async def edge_fulltext_search(
|
|
|
91
107
|
limit=RELEVANT_SCHEMA_LIMIT,
|
|
92
108
|
) -> list[EntityEdge]:
|
|
93
109
|
# fulltext search over facts
|
|
110
|
+
fuzzy_query = fulltext_query(query, group_ids)
|
|
111
|
+
|
|
94
112
|
cypher_query = Query("""
|
|
95
|
-
CALL db.index.fulltext.queryRelationships("
|
|
113
|
+
CALL db.index.fulltext.queryRelationships("edge_name_and_fact", $query)
|
|
96
114
|
YIELD relationship AS rel, score
|
|
97
|
-
MATCH (n:Entity
|
|
98
|
-
WHERE $
|
|
115
|
+
MATCH (n:Entity)-[r {uuid: rel.uuid}]-(m:Entity)
|
|
116
|
+
WHERE ($source_uuid IS NULL OR n.uuid = $source_uuid)
|
|
117
|
+
AND ($target_uuid IS NULL OR m.uuid = $target_uuid)
|
|
118
|
+
AND ($group_ids IS NULL OR n.group_id IN $group_ids)
|
|
99
119
|
RETURN
|
|
100
120
|
r.uuid AS uuid,
|
|
101
121
|
r.group_id AS group_id,
|
|
@@ -112,72 +132,6 @@ async def edge_fulltext_search(
|
|
|
112
132
|
ORDER BY score DESC LIMIT $limit
|
|
113
133
|
""")
|
|
114
134
|
|
|
115
|
-
if source_node_uuid is None and target_node_uuid is None:
|
|
116
|
-
cypher_query = Query("""
|
|
117
|
-
CALL db.index.fulltext.queryRelationships("name_and_fact", $query)
|
|
118
|
-
YIELD relationship AS rel, score
|
|
119
|
-
MATCH (n:Entity)-[r {uuid: rel.uuid}]-(m:Entity)
|
|
120
|
-
WHERE $group_ids IS NULL OR r.group_id IN $group_ids
|
|
121
|
-
RETURN
|
|
122
|
-
r.uuid AS uuid,
|
|
123
|
-
r.group_id AS group_id,
|
|
124
|
-
n.uuid AS source_node_uuid,
|
|
125
|
-
m.uuid AS target_node_uuid,
|
|
126
|
-
r.created_at AS created_at,
|
|
127
|
-
r.name AS name,
|
|
128
|
-
r.fact AS fact,
|
|
129
|
-
r.fact_embedding AS fact_embedding,
|
|
130
|
-
r.episodes AS episodes,
|
|
131
|
-
r.expired_at AS expired_at,
|
|
132
|
-
r.valid_at AS valid_at,
|
|
133
|
-
r.invalid_at AS invalid_at
|
|
134
|
-
ORDER BY score DESC LIMIT $limit
|
|
135
|
-
""")
|
|
136
|
-
elif source_node_uuid is None:
|
|
137
|
-
cypher_query = Query("""
|
|
138
|
-
CALL db.index.fulltext.queryRelationships("name_and_fact", $query)
|
|
139
|
-
YIELD relationship AS rel, score
|
|
140
|
-
MATCH (n:Entity)-[r {uuid: rel.uuid}]-(m:Entity {uuid: $target_uuid})
|
|
141
|
-
WHERE $group_ids IS NULL OR r.group_id IN $group_ids
|
|
142
|
-
RETURN
|
|
143
|
-
r.uuid AS uuid,
|
|
144
|
-
r.group_id AS group_id,
|
|
145
|
-
n.uuid AS source_node_uuid,
|
|
146
|
-
m.uuid AS target_node_uuid,
|
|
147
|
-
r.created_at AS created_at,
|
|
148
|
-
r.name AS name,
|
|
149
|
-
r.fact AS fact,
|
|
150
|
-
r.fact_embedding AS fact_embedding,
|
|
151
|
-
r.episodes AS episodes,
|
|
152
|
-
r.expired_at AS expired_at,
|
|
153
|
-
r.valid_at AS valid_at,
|
|
154
|
-
r.invalid_at AS invalid_at
|
|
155
|
-
ORDER BY score DESC LIMIT $limit
|
|
156
|
-
""")
|
|
157
|
-
elif target_node_uuid is None:
|
|
158
|
-
cypher_query = Query("""
|
|
159
|
-
CALL db.index.fulltext.queryRelationships("name_and_fact", $query)
|
|
160
|
-
YIELD relationship AS rel, score
|
|
161
|
-
MATCH (n:Entity {uuid: $source_uuid})-[r {uuid: rel.uuid}]-(m:Entity)
|
|
162
|
-
WHERE $group_ids IS NULL OR r.group_id IN $group_ids
|
|
163
|
-
RETURN
|
|
164
|
-
r.uuid AS uuid,
|
|
165
|
-
r.group_id AS group_id,
|
|
166
|
-
n.uuid AS source_node_uuid,
|
|
167
|
-
m.uuid AS target_node_uuid,
|
|
168
|
-
r.created_at AS created_at,
|
|
169
|
-
r.name AS name,
|
|
170
|
-
r.fact AS fact,
|
|
171
|
-
r.fact_embedding AS fact_embedding,
|
|
172
|
-
r.episodes AS episodes,
|
|
173
|
-
r.expired_at AS expired_at,
|
|
174
|
-
r.valid_at AS valid_at,
|
|
175
|
-
r.invalid_at AS invalid_at
|
|
176
|
-
ORDER BY score DESC LIMIT $limit
|
|
177
|
-
""")
|
|
178
|
-
|
|
179
|
-
fuzzy_query = re.sub(r'[^\w\s]', '', query) + '~'
|
|
180
|
-
|
|
181
135
|
records, _, _ = await driver.execute_query(
|
|
182
136
|
cypher_query,
|
|
183
137
|
query=fuzzy_query,
|
|
@@ -202,11 +156,12 @@ async def edge_similarity_search(
|
|
|
202
156
|
) -> list[EntityEdge]:
|
|
203
157
|
# vector similarity search over embedded facts
|
|
204
158
|
query = Query("""
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
159
|
+
MATCH (n:Entity)-[r:RELATES_TO]-(m:Entity)
|
|
160
|
+
WHERE ($group_ids IS NULL OR r.group_id IN $group_ids)
|
|
161
|
+
AND ($source_uuid IS NULL OR n.uuid = $source_uuid)
|
|
162
|
+
AND ($target_uuid IS NULL OR m.uuid = $target_uuid)
|
|
209
163
|
RETURN
|
|
164
|
+
vector.similarity.cosine(r.fact_embedding, $search_vector) AS score,
|
|
210
165
|
r.uuid AS uuid,
|
|
211
166
|
r.group_id AS group_id,
|
|
212
167
|
n.uuid AS source_node_uuid,
|
|
@@ -220,72 +175,9 @@ async def edge_similarity_search(
|
|
|
220
175
|
r.valid_at AS valid_at,
|
|
221
176
|
r.invalid_at AS invalid_at
|
|
222
177
|
ORDER BY score DESC
|
|
178
|
+
LIMIT $limit
|
|
223
179
|
""")
|
|
224
180
|
|
|
225
|
-
if source_node_uuid is None and target_node_uuid is None:
|
|
226
|
-
query = Query("""
|
|
227
|
-
CALL db.index.vector.queryRelationships("fact_embedding", $limit, $search_vector)
|
|
228
|
-
YIELD relationship AS rel, score
|
|
229
|
-
MATCH (n:Entity)-[r {uuid: rel.uuid}]-(m:Entity)
|
|
230
|
-
WHERE $group_ids IS NULL OR r.group_id IN $group_ids
|
|
231
|
-
RETURN
|
|
232
|
-
r.uuid AS uuid,
|
|
233
|
-
r.group_id AS group_id,
|
|
234
|
-
n.uuid AS source_node_uuid,
|
|
235
|
-
m.uuid AS target_node_uuid,
|
|
236
|
-
r.created_at AS created_at,
|
|
237
|
-
r.name AS name,
|
|
238
|
-
r.fact AS fact,
|
|
239
|
-
r.fact_embedding AS fact_embedding,
|
|
240
|
-
r.episodes AS episodes,
|
|
241
|
-
r.expired_at AS expired_at,
|
|
242
|
-
r.valid_at AS valid_at,
|
|
243
|
-
r.invalid_at AS invalid_at
|
|
244
|
-
ORDER BY score DESC
|
|
245
|
-
""")
|
|
246
|
-
elif source_node_uuid is None:
|
|
247
|
-
query = Query("""
|
|
248
|
-
CALL db.index.vector.queryRelationships("fact_embedding", $limit, $search_vector)
|
|
249
|
-
YIELD relationship AS rel, score
|
|
250
|
-
MATCH (n:Entity)-[r {uuid: rel.uuid}]-(m:Entity {uuid: $target_uuid})
|
|
251
|
-
WHERE $group_ids IS NULL OR r.group_id IN $group_ids
|
|
252
|
-
RETURN
|
|
253
|
-
r.uuid AS uuid,
|
|
254
|
-
r.group_id AS group_id,
|
|
255
|
-
n.uuid AS source_node_uuid,
|
|
256
|
-
m.uuid AS target_node_uuid,
|
|
257
|
-
r.created_at AS created_at,
|
|
258
|
-
r.name AS name,
|
|
259
|
-
r.fact AS fact,
|
|
260
|
-
r.fact_embedding AS fact_embedding,
|
|
261
|
-
r.episodes AS episodes,
|
|
262
|
-
r.expired_at AS expired_at,
|
|
263
|
-
r.valid_at AS valid_at,
|
|
264
|
-
r.invalid_at AS invalid_at
|
|
265
|
-
ORDER BY score DESC
|
|
266
|
-
""")
|
|
267
|
-
elif target_node_uuid is None:
|
|
268
|
-
query = Query("""
|
|
269
|
-
CALL db.index.vector.queryRelationships("fact_embedding", $limit, $search_vector)
|
|
270
|
-
YIELD relationship AS rel, score
|
|
271
|
-
MATCH (n:Entity {uuid: $source_uuid})-[r {uuid: rel.uuid}]-(m:Entity)
|
|
272
|
-
WHERE $group_ids IS NULL OR r.group_id IN $group_ids
|
|
273
|
-
RETURN
|
|
274
|
-
r.uuid AS uuid,
|
|
275
|
-
r.group_id AS group_id,
|
|
276
|
-
n.uuid AS source_node_uuid,
|
|
277
|
-
m.uuid AS target_node_uuid,
|
|
278
|
-
r.created_at AS created_at,
|
|
279
|
-
r.name AS name,
|
|
280
|
-
r.fact AS fact,
|
|
281
|
-
r.fact_embedding AS fact_embedding,
|
|
282
|
-
r.episodes AS episodes,
|
|
283
|
-
r.expired_at AS expired_at,
|
|
284
|
-
r.valid_at AS valid_at,
|
|
285
|
-
r.invalid_at AS invalid_at
|
|
286
|
-
ORDER BY score DESC
|
|
287
|
-
""")
|
|
288
|
-
|
|
289
181
|
records, _, _ = await driver.execute_query(
|
|
290
182
|
query,
|
|
291
183
|
search_vector=search_vector,
|
|
@@ -307,10 +199,11 @@ async def node_fulltext_search(
|
|
|
307
199
|
limit=RELEVANT_SCHEMA_LIMIT,
|
|
308
200
|
) -> list[EntityNode]:
|
|
309
201
|
# BM25 search to get top nodes
|
|
310
|
-
fuzzy_query =
|
|
202
|
+
fuzzy_query = fulltext_query(query, group_ids)
|
|
203
|
+
|
|
311
204
|
records, _, _ = await driver.execute_query(
|
|
312
205
|
"""
|
|
313
|
-
CALL db.index.fulltext.queryNodes("
|
|
206
|
+
CALL db.index.fulltext.queryNodes("node_name_and_summary", $query)
|
|
314
207
|
YIELD node AS n, score
|
|
315
208
|
WHERE $group_ids IS NULL OR n.group_id IN $group_ids
|
|
316
209
|
RETURN
|
|
@@ -341,11 +234,10 @@ async def node_similarity_search(
|
|
|
341
234
|
# vector similarity search over entity names
|
|
342
235
|
records, _, _ = await driver.execute_query(
|
|
343
236
|
"""
|
|
344
|
-
CALL db.index.vector.queryNodes("name_embedding", $limit, $search_vector)
|
|
345
|
-
YIELD node AS n, score
|
|
346
237
|
MATCH (n:Entity)
|
|
347
238
|
WHERE $group_ids IS NULL OR n.group_id IN $group_ids
|
|
348
239
|
RETURN
|
|
240
|
+
vector.similarity.cosine(n.name_embedding, $search_vector) AS score,
|
|
349
241
|
n.uuid As uuid,
|
|
350
242
|
n.group_id AS group_id,
|
|
351
243
|
n.name AS name,
|
|
@@ -353,6 +245,7 @@ async def node_similarity_search(
|
|
|
353
245
|
n.created_at AS created_at,
|
|
354
246
|
n.summary AS summary
|
|
355
247
|
ORDER BY score DESC
|
|
248
|
+
LIMIT $limit
|
|
356
249
|
""",
|
|
357
250
|
search_vector=search_vector,
|
|
358
251
|
group_ids=group_ids,
|
|
@@ -370,7 +263,8 @@ async def community_fulltext_search(
|
|
|
370
263
|
limit=RELEVANT_SCHEMA_LIMIT,
|
|
371
264
|
) -> list[CommunityNode]:
|
|
372
265
|
# BM25 search to get top communities
|
|
373
|
-
fuzzy_query =
|
|
266
|
+
fuzzy_query = fulltext_query(query, group_ids)
|
|
267
|
+
|
|
374
268
|
records, _, _ = await driver.execute_query(
|
|
375
269
|
"""
|
|
376
270
|
CALL db.index.fulltext.queryNodes("community_name", $query)
|
|
@@ -405,11 +299,10 @@ async def community_similarity_search(
|
|
|
405
299
|
# vector similarity search over entity names
|
|
406
300
|
records, _, _ = await driver.execute_query(
|
|
407
301
|
"""
|
|
408
|
-
CALL db.index.vector.queryNodes("community_name_embedding", $limit, $search_vector)
|
|
409
|
-
YIELD node AS comm, score
|
|
410
302
|
MATCH (comm:Community)
|
|
411
|
-
WHERE $group_ids IS NULL OR comm.group_id IN $group_ids
|
|
303
|
+
WHERE ($group_ids IS NULL OR comm.group_id IN $group_ids)
|
|
412
304
|
RETURN
|
|
305
|
+
vector.similarity.cosine(comm.name_embedding, $search_vector) AS score,
|
|
413
306
|
comm.uuid As uuid,
|
|
414
307
|
comm.group_id AS group_id,
|
|
415
308
|
comm.name AS name,
|
|
@@ -417,6 +310,7 @@ async def community_similarity_search(
|
|
|
417
310
|
comm.created_at AS created_at,
|
|
418
311
|
comm.summary AS summary
|
|
419
312
|
ORDER BY score DESC
|
|
313
|
+
LIMIT $limit
|
|
420
314
|
""",
|
|
421
315
|
search_vector=search_vector,
|
|
422
316
|
group_ids=group_ids,
|
|
@@ -7,6 +7,7 @@ from neo4j import AsyncDriver
|
|
|
7
7
|
from pydantic import BaseModel
|
|
8
8
|
|
|
9
9
|
from graphiti_core.edges import CommunityEdge
|
|
10
|
+
from graphiti_core.embedder import EmbedderClient
|
|
10
11
|
from graphiti_core.llm_client import LLMClient
|
|
11
12
|
from graphiti_core.nodes import CommunityNode, EntityNode, get_community_node_from_record
|
|
12
13
|
from graphiti_core.prompts import prompt_library
|
|
@@ -288,7 +289,7 @@ async def determine_entity_community(
|
|
|
288
289
|
|
|
289
290
|
|
|
290
291
|
async def update_community(
|
|
291
|
-
driver: AsyncDriver, llm_client: LLMClient, embedder, entity: EntityNode
|
|
292
|
+
driver: AsyncDriver, llm_client: LLMClient, embedder: EmbedderClient, entity: EntityNode
|
|
292
293
|
):
|
|
293
294
|
community, is_new = await determine_entity_community(driver, entity)
|
|
294
295
|
|
|
@@ -28,7 +28,16 @@ EPISODE_WINDOW_LEN = 3
|
|
|
28
28
|
logger = logging.getLogger(__name__)
|
|
29
29
|
|
|
30
30
|
|
|
31
|
-
async def build_indices_and_constraints(driver: AsyncDriver):
|
|
31
|
+
async def build_indices_and_constraints(driver: AsyncDriver, delete_existing: bool = False):
|
|
32
|
+
if delete_existing:
|
|
33
|
+
records, _, _ = await driver.execute_query("""
|
|
34
|
+
SHOW INDEXES YIELD name
|
|
35
|
+
""")
|
|
36
|
+
index_names = [record['name'] for record in records]
|
|
37
|
+
await asyncio.gather(
|
|
38
|
+
*[driver.execute_query("""DROP INDEX $name""", name=name) for name in index_names]
|
|
39
|
+
)
|
|
40
|
+
|
|
32
41
|
range_indices: list[LiteralString] = [
|
|
33
42
|
'CREATE INDEX entity_uuid IF NOT EXISTS FOR (n:Entity) ON (n.uuid)',
|
|
34
43
|
'CREATE INDEX episode_uuid IF NOT EXISTS FOR (n:Episodic) ON (n.uuid)',
|
|
@@ -52,38 +61,15 @@ async def build_indices_and_constraints(driver: AsyncDriver):
|
|
|
52
61
|
]
|
|
53
62
|
|
|
54
63
|
fulltext_indices: list[LiteralString] = [
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
64
|
+
"""CREATE FULLTEXT INDEX node_name_and_summary IF NOT EXISTS
|
|
65
|
+
FOR (n:Entity) ON EACH [n.name, n.summary, n.group_id]""",
|
|
66
|
+
"""CREATE FULLTEXT INDEX community_name IF NOT EXISTS
|
|
67
|
+
FOR (n:Community) ON EACH [n.name, n.group_id]""",
|
|
68
|
+
"""CREATE FULLTEXT INDEX edge_name_and_fact IF NOT EXISTS
|
|
69
|
+
FOR ()-[e:RELATES_TO]-() ON EACH [e.name, e.fact, e.group_id]""",
|
|
58
70
|
]
|
|
59
71
|
|
|
60
|
-
|
|
61
|
-
"""
|
|
62
|
-
CREATE VECTOR INDEX fact_embedding IF NOT EXISTS
|
|
63
|
-
FOR ()-[r:RELATES_TO]-() ON (r.fact_embedding)
|
|
64
|
-
OPTIONS {indexConfig: {
|
|
65
|
-
`vector.dimensions`: 1024,
|
|
66
|
-
`vector.similarity_function`: 'cosine'
|
|
67
|
-
}}
|
|
68
|
-
""",
|
|
69
|
-
"""
|
|
70
|
-
CREATE VECTOR INDEX name_embedding IF NOT EXISTS
|
|
71
|
-
FOR (n:Entity) ON (n.name_embedding)
|
|
72
|
-
OPTIONS {indexConfig: {
|
|
73
|
-
`vector.dimensions`: 1024,
|
|
74
|
-
`vector.similarity_function`: 'cosine'
|
|
75
|
-
}}
|
|
76
|
-
""",
|
|
77
|
-
"""
|
|
78
|
-
CREATE VECTOR INDEX community_name_embedding IF NOT EXISTS
|
|
79
|
-
FOR (n:Community) ON (n.name_embedding)
|
|
80
|
-
OPTIONS {indexConfig: {
|
|
81
|
-
`vector.dimensions`: 1024,
|
|
82
|
-
`vector.similarity_function`: 'cosine'
|
|
83
|
-
}}
|
|
84
|
-
""",
|
|
85
|
-
]
|
|
86
|
-
index_queries: list[LiteralString] = range_indices + fulltext_indices + vector_indices
|
|
72
|
+
index_queries: list[LiteralString] = range_indices + fulltext_indices
|
|
87
73
|
|
|
88
74
|
await asyncio.gather(*[driver.execute_query(query) for query in index_queries])
|
|
89
75
|
|
|
@@ -1,43 +1,48 @@
|
|
|
1
1
|
graphiti_core/__init__.py,sha256=e5SWFkRiaUwfprYIeIgVIh7JDedNiloZvd3roU-0aDY,55
|
|
2
|
-
graphiti_core/edges.py,sha256=
|
|
3
|
-
graphiti_core/
|
|
4
|
-
graphiti_core/
|
|
5
|
-
graphiti_core/
|
|
2
|
+
graphiti_core/edges.py,sha256=lLuRKjSHTk1GvTS06OUw2lSMiDAB4TQSXgnLq1fU3n8,13378
|
|
3
|
+
graphiti_core/embedder/__init__.py,sha256=eWd-0sPxflnYXLoWNT9sxwCIFun5JNO9Fk4E-ZXXf8Y,164
|
|
4
|
+
graphiti_core/embedder/client.py,sha256=Sd9CyYXaqRazdOH8opKackrTx-y9y-T54M78XTVMzxs,1006
|
|
5
|
+
graphiti_core/embedder/openai.py,sha256=28cl4qQCQeu6EGxVVPw3lPesA-Z_Cpvuhozyc1jdqVg,1586
|
|
6
|
+
graphiti_core/embedder/voyage.py,sha256=pGrSquGnSiYl4nXGnutbdWchtYgZb0Fi_yW3c90dPlI,1497
|
|
7
|
+
graphiti_core/errors.py,sha256=iJrkk5sTgc2z16ABS6TziPylEabdBJcpk0x9KyBUmxs,1527
|
|
8
|
+
graphiti_core/graphiti.py,sha256=5E2UbYlbl65D3MZyagEUPgoPrb_kVYDIqIw7KVlU_NM,26162
|
|
9
|
+
graphiti_core/helpers.py,sha256=_wTSDcYmeXT3u0AwX15iSLuTRa_SR4jJdT10rxfl1_E,1484
|
|
6
10
|
graphiti_core/llm_client/__init__.py,sha256=PA80TSMeX-sUXITXEAxMDEt3gtfZgcJrGJUcyds1mSo,207
|
|
7
|
-
graphiti_core/llm_client/anthropic_client.py,sha256=
|
|
8
|
-
graphiti_core/llm_client/client.py,sha256=
|
|
9
|
-
graphiti_core/llm_client/config.py,sha256=
|
|
11
|
+
graphiti_core/llm_client/anthropic_client.py,sha256=4l2PbCjIoeRr7UJ2DUh2grYLTtE2vNaWlo72IIRQDeI,2405
|
|
12
|
+
graphiti_core/llm_client/client.py,sha256=WAnX0e4EuCFHXdFHeq_O1HZsW1STSByvDCFUHMAHEFU,3394
|
|
13
|
+
graphiti_core/llm_client/config.py,sha256=VwtvD0B7TNqE6Cl-rvH5v-bAfmjMLhEUuFmHSPt10EI,2339
|
|
10
14
|
graphiti_core/llm_client/errors.py,sha256=-qlWwv1X-UjfsFIiNl-7yJIYvPwi7z8srVRfX4-s6uk,814
|
|
11
|
-
graphiti_core/llm_client/groq_client.py,sha256=
|
|
12
|
-
graphiti_core/llm_client/openai_client.py,sha256=
|
|
13
|
-
graphiti_core/llm_client/utils.py,sha256=
|
|
14
|
-
graphiti_core/nodes.py,sha256=
|
|
15
|
+
graphiti_core/llm_client/groq_client.py,sha256=5uGWeQ903EuNxuRiaeH-_J1U2Le_b7Q1UGV_K8bQAiw,2329
|
|
16
|
+
graphiti_core/llm_client/openai_client.py,sha256=xLkbpusRVFRK0zPr3kOqY31HK_XCXrpO5rqUSpcEqEU,3825
|
|
17
|
+
graphiti_core/llm_client/utils.py,sha256=Ms-QhA5X9rps7NBdJeQZUgQLD3vaZRWPiTlhJa6BjXM,995
|
|
18
|
+
graphiti_core/nodes.py,sha256=wIYeRspoRErcX0vvesk_fxhdXKCYn4rpgjgm3PdwSkI,13669
|
|
15
19
|
graphiti_core/prompts/__init__.py,sha256=EA-x9xUki9l8wnu2l8ek_oNf75-do5tq5hVq7Zbv8Kw,101
|
|
16
20
|
graphiti_core/prompts/dedupe_edges.py,sha256=DUNHdIudj50FAjkla4nc68tSFSD2yjmYHBw-Bb7ph20,6529
|
|
17
21
|
graphiti_core/prompts/dedupe_nodes.py,sha256=BZ9S-PB9SSGjc5Oo8ivdgA6rZx3OGOFhKtwrBlQ0bm0,7269
|
|
22
|
+
graphiti_core/prompts/eval.py,sha256=fYLY2nKwgE9dB7mtYMNKyn1tQXM8B-tOeYmSzB5Bxk8,2844
|
|
18
23
|
graphiti_core/prompts/extract_edge_dates.py,sha256=oOCR8mC_3gI1bumrmIjUbkNO-WTuLTXXAalPDYnDXeM,3655
|
|
19
24
|
graphiti_core/prompts/extract_edges.py,sha256=AQ8xYbAv_RKXAT6WMwXs1_GvUdLtM_lhLNbt3SkOAmk,5348
|
|
20
25
|
graphiti_core/prompts/extract_nodes.py,sha256=VIr0Nh0mSiodI3iGOQFszh7DOni4mufOKJDuGkMysl8,6889
|
|
21
26
|
graphiti_core/prompts/invalidate_edges.py,sha256=8SHt3iPTdmqk8A52LxgdMtI39w4USKqVDMOS2i6lRQ4,4342
|
|
22
|
-
graphiti_core/prompts/lib.py,sha256=
|
|
27
|
+
graphiti_core/prompts/lib.py,sha256=ZOE6nNoI_wQ12Sufx7rQkQtkIm_eTAL7pCiYGU2hcMI,4054
|
|
23
28
|
graphiti_core/prompts/models.py,sha256=cvx_Bv5RMFUD_5IUawYrbpOKLPHogai7_bm7YXrSz84,867
|
|
24
29
|
graphiti_core/prompts/summarize_nodes.py,sha256=FLuZpGTABgcxuIDkx_IKH115nHEw0rIaFhcGlWveAMc,2357
|
|
25
30
|
graphiti_core/py.typed,sha256=vlmmzQOt7bmeQl9L3XJP4W6Ry0iiELepnOrinKz5KQg,79
|
|
26
31
|
graphiti_core/search/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
27
|
-
graphiti_core/search/search.py,sha256=
|
|
32
|
+
graphiti_core/search/search.py,sha256=odxpm6MJw5ihEDjbBQ2Icvtr5Mf2oG8Yj6LpNqO3gFw,8620
|
|
28
33
|
graphiti_core/search/search_config.py,sha256=d8w9RDO55G2bwbjYQBaD6gXqEWK1-NsDANrNibYB6t8,2165
|
|
29
34
|
graphiti_core/search/search_config_recipes.py,sha256=_VJqvYB70e8Jke3hsbeQF3Bdogn2MubpYeAQe15M2Jo,3450
|
|
30
|
-
graphiti_core/search/search_utils.py,sha256=
|
|
35
|
+
graphiti_core/search/search_utils.py,sha256=WE-iVPI92AWR13aM3JQxtHaYoiPzDMtOOo8rEob8QEI,17844
|
|
31
36
|
graphiti_core/utils/__init__.py,sha256=cJAcMnBZdHBQmWrZdU1PQ1YmaL75bhVUkyVpIPuOyns,260
|
|
32
37
|
graphiti_core/utils/bulk_utils.py,sha256=JtoYTZPCigPa3n2E43Oe7QhFZRTA_QKNGy1jVgklHag,12614
|
|
33
38
|
graphiti_core/utils/maintenance/__init__.py,sha256=4b9sfxqyFZMLwxxS2lnQ6_wBr3xrJRIqfAWOidK8EK0,388
|
|
34
|
-
graphiti_core/utils/maintenance/community_operations.py,sha256=
|
|
39
|
+
graphiti_core/utils/maintenance/community_operations.py,sha256=Z2lVrTmUh42sEPqSDZq4fXbcj507BuZrHZKV1vJk6tU,9875
|
|
35
40
|
graphiti_core/utils/maintenance/edge_operations.py,sha256=lSeesSnWQ3vpeD2dIY0tSiHEHRMK6fiirEhNNT-s5os,11438
|
|
36
|
-
graphiti_core/utils/maintenance/graph_data_operations.py,sha256=
|
|
41
|
+
graphiti_core/utils/maintenance/graph_data_operations.py,sha256=RgdqYSau9Mr-f7IUSD1sSPztxlyO0C80C3MPPmPBRi0,6100
|
|
37
42
|
graphiti_core/utils/maintenance/node_operations.py,sha256=QAg4KQkSAOXx9QRaUp7t6DCaztZlzeOBC3__57FCs_o,9025
|
|
38
43
|
graphiti_core/utils/maintenance/temporal_operations.py,sha256=BzfGDm96w4HcUEsaWTHUBt5S8dNmDQL1eX6AuBL-XFM,8135
|
|
39
44
|
graphiti_core/utils/maintenance/utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
40
|
-
graphiti_core-0.3.
|
|
41
|
-
graphiti_core-0.3.
|
|
42
|
-
graphiti_core-0.3.
|
|
43
|
-
graphiti_core-0.3.
|
|
45
|
+
graphiti_core-0.3.7.dist-info/LICENSE,sha256=KCUwCyDXuVEgmDWkozHyniRyWjnWUWjkuDHfU6o3JlA,11325
|
|
46
|
+
graphiti_core-0.3.7.dist-info/METADATA,sha256=6NPJcK3qV8rcVDjopZyTqyc8WlcFXKYDVEOdOzco1KI,9395
|
|
47
|
+
graphiti_core-0.3.7.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
48
|
+
graphiti_core-0.3.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|