graphiti-core 0.12.0rc5__py3-none-any.whl → 0.12.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of graphiti-core might be problematic. Click here for more details.
- graphiti_core/cross_encoder/openai_reranker_client.py +1 -1
- graphiti_core/driver/__init__.py +17 -0
- graphiti_core/driver/driver.py +66 -0
- graphiti_core/driver/falkordb_driver.py +131 -0
- graphiti_core/driver/neo4j_driver.py +61 -0
- graphiti_core/edges.py +26 -26
- graphiti_core/embedder/azure_openai.py +64 -0
- graphiti_core/graph_queries.py +149 -0
- graphiti_core/graphiti.py +21 -8
- graphiti_core/graphiti_types.py +2 -2
- graphiti_core/helpers.py +9 -3
- graphiti_core/llm_client/__init__.py +16 -0
- graphiti_core/llm_client/azure_openai_client.py +73 -0
- graphiti_core/nodes.py +31 -31
- graphiti_core/prompts/dedupe_nodes.py +5 -1
- graphiti_core/prompts/extract_edges.py +2 -0
- graphiti_core/prompts/extract_nodes.py +2 -0
- graphiti_core/search/search.py +6 -10
- graphiti_core/search/search_utils.py +243 -187
- graphiti_core/utils/bulk_utils.py +21 -11
- graphiti_core/utils/maintenance/community_operations.py +6 -7
- graphiti_core/utils/maintenance/edge_operations.py +68 -3
- graphiti_core/utils/maintenance/graph_data_operations.py +13 -42
- graphiti_core/utils/maintenance/node_operations.py +19 -5
- {graphiti_core-0.12.0rc5.dist-info → graphiti_core-0.12.2.dist-info}/METADATA +4 -3
- {graphiti_core-0.12.0rc5.dist-info → graphiti_core-0.12.2.dist-info}/RECORD +28 -21
- {graphiti_core-0.12.0rc5.dist-info → graphiti_core-0.12.2.dist-info}/LICENSE +0 -0
- {graphiti_core-0.12.0rc5.dist-info → graphiti_core-0.12.2.dist-info}/WHEEL +0 -0
|
@@ -20,22 +20,24 @@ from collections import defaultdict
|
|
|
20
20
|
from datetime import datetime
|
|
21
21
|
from math import ceil
|
|
22
22
|
|
|
23
|
-
from neo4j import AsyncDriver, AsyncManagedTransaction
|
|
24
23
|
from numpy import dot, sqrt
|
|
25
24
|
from pydantic import BaseModel
|
|
26
25
|
from typing_extensions import Any
|
|
27
26
|
|
|
27
|
+
from graphiti_core.driver.driver import GraphDriver, GraphDriverSession
|
|
28
28
|
from graphiti_core.edges import Edge, EntityEdge, EpisodicEdge
|
|
29
29
|
from graphiti_core.embedder import EmbedderClient
|
|
30
|
+
from graphiti_core.graph_queries import (
|
|
31
|
+
get_entity_edge_save_bulk_query,
|
|
32
|
+
get_entity_node_save_bulk_query,
|
|
33
|
+
)
|
|
30
34
|
from graphiti_core.graphiti_types import GraphitiClients
|
|
31
35
|
from graphiti_core.helpers import DEFAULT_DATABASE, semaphore_gather
|
|
32
36
|
from graphiti_core.llm_client import LLMClient
|
|
33
37
|
from graphiti_core.models.edges.edge_db_queries import (
|
|
34
|
-
ENTITY_EDGE_SAVE_BULK,
|
|
35
38
|
EPISODIC_EDGE_SAVE_BULK,
|
|
36
39
|
)
|
|
37
40
|
from graphiti_core.models.nodes.node_db_queries import (
|
|
38
|
-
ENTITY_NODE_SAVE_BULK,
|
|
39
41
|
EPISODIC_NODE_SAVE_BULK,
|
|
40
42
|
)
|
|
41
43
|
from graphiti_core.nodes import EntityNode, EpisodeType, EpisodicNode
|
|
@@ -73,7 +75,7 @@ class RawEpisode(BaseModel):
|
|
|
73
75
|
|
|
74
76
|
|
|
75
77
|
async def retrieve_previous_episodes_bulk(
|
|
76
|
-
driver:
|
|
78
|
+
driver: GraphDriver, episodes: list[EpisodicNode]
|
|
77
79
|
) -> list[tuple[EpisodicNode, list[EpisodicNode]]]:
|
|
78
80
|
previous_episodes_list = await semaphore_gather(
|
|
79
81
|
*[
|
|
@@ -91,14 +93,15 @@ async def retrieve_previous_episodes_bulk(
|
|
|
91
93
|
|
|
92
94
|
|
|
93
95
|
async def add_nodes_and_edges_bulk(
|
|
94
|
-
driver:
|
|
96
|
+
driver: GraphDriver,
|
|
95
97
|
episodic_nodes: list[EpisodicNode],
|
|
96
98
|
episodic_edges: list[EpisodicEdge],
|
|
97
99
|
entity_nodes: list[EntityNode],
|
|
98
100
|
entity_edges: list[EntityEdge],
|
|
99
101
|
embedder: EmbedderClient,
|
|
100
102
|
):
|
|
101
|
-
|
|
103
|
+
session = driver.session(database=DEFAULT_DATABASE)
|
|
104
|
+
try:
|
|
102
105
|
await session.execute_write(
|
|
103
106
|
add_nodes_and_edges_bulk_tx,
|
|
104
107
|
episodic_nodes,
|
|
@@ -106,16 +109,20 @@ async def add_nodes_and_edges_bulk(
|
|
|
106
109
|
entity_nodes,
|
|
107
110
|
entity_edges,
|
|
108
111
|
embedder,
|
|
112
|
+
driver=driver,
|
|
109
113
|
)
|
|
114
|
+
finally:
|
|
115
|
+
await session.close()
|
|
110
116
|
|
|
111
117
|
|
|
112
118
|
async def add_nodes_and_edges_bulk_tx(
|
|
113
|
-
tx:
|
|
119
|
+
tx: GraphDriverSession,
|
|
114
120
|
episodic_nodes: list[EpisodicNode],
|
|
115
121
|
episodic_edges: list[EpisodicEdge],
|
|
116
122
|
entity_nodes: list[EntityNode],
|
|
117
123
|
entity_edges: list[EntityEdge],
|
|
118
124
|
embedder: EmbedderClient,
|
|
125
|
+
driver: GraphDriver,
|
|
119
126
|
):
|
|
120
127
|
episodes = [dict(episode) for episode in episodic_nodes]
|
|
121
128
|
for episode in episodes:
|
|
@@ -160,11 +167,13 @@ async def add_nodes_and_edges_bulk_tx(
|
|
|
160
167
|
edges.append(edge_data)
|
|
161
168
|
|
|
162
169
|
await tx.run(EPISODIC_NODE_SAVE_BULK, episodes=episodes)
|
|
163
|
-
|
|
170
|
+
entity_node_save_bulk = get_entity_node_save_bulk_query(nodes, driver.provider)
|
|
171
|
+
await tx.run(entity_node_save_bulk, nodes=nodes)
|
|
164
172
|
await tx.run(
|
|
165
173
|
EPISODIC_EDGE_SAVE_BULK, episodic_edges=[edge.model_dump() for edge in episodic_edges]
|
|
166
174
|
)
|
|
167
|
-
|
|
175
|
+
entity_edge_save_bulk = get_entity_edge_save_bulk_query(driver.provider)
|
|
176
|
+
await tx.run(entity_edge_save_bulk, entity_edges=edges)
|
|
168
177
|
|
|
169
178
|
|
|
170
179
|
async def extract_nodes_and_edges_bulk(
|
|
@@ -189,6 +198,7 @@ async def extract_nodes_and_edges_bulk(
|
|
|
189
198
|
episode,
|
|
190
199
|
extracted_nodes_bulk[i],
|
|
191
200
|
previous_episodes_list[i],
|
|
201
|
+
{},
|
|
192
202
|
episode.group_id,
|
|
193
203
|
)
|
|
194
204
|
for i, episode in enumerate(episodes)
|
|
@@ -211,7 +221,7 @@ async def extract_nodes_and_edges_bulk(
|
|
|
211
221
|
|
|
212
222
|
|
|
213
223
|
async def dedupe_nodes_bulk(
|
|
214
|
-
driver:
|
|
224
|
+
driver: GraphDriver,
|
|
215
225
|
llm_client: LLMClient,
|
|
216
226
|
extracted_nodes: list[EntityNode],
|
|
217
227
|
) -> tuple[list[EntityNode], dict[str, str]]:
|
|
@@ -247,7 +257,7 @@ async def dedupe_nodes_bulk(
|
|
|
247
257
|
|
|
248
258
|
|
|
249
259
|
async def dedupe_edges_bulk(
|
|
250
|
-
driver:
|
|
260
|
+
driver: GraphDriver, llm_client: LLMClient, extracted_edges: list[EntityEdge]
|
|
251
261
|
) -> list[EntityEdge]:
|
|
252
262
|
# First compress edges
|
|
253
263
|
compressed_edges = await compress_edges(llm_client, extracted_edges)
|
|
@@ -2,9 +2,9 @@ import asyncio
|
|
|
2
2
|
import logging
|
|
3
3
|
from collections import defaultdict
|
|
4
4
|
|
|
5
|
-
from neo4j import AsyncDriver
|
|
6
5
|
from pydantic import BaseModel
|
|
7
6
|
|
|
7
|
+
from graphiti_core.driver.driver import GraphDriver
|
|
8
8
|
from graphiti_core.edges import CommunityEdge
|
|
9
9
|
from graphiti_core.embedder import EmbedderClient
|
|
10
10
|
from graphiti_core.helpers import DEFAULT_DATABASE, semaphore_gather
|
|
@@ -26,7 +26,7 @@ class Neighbor(BaseModel):
|
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
async def get_community_clusters(
|
|
29
|
-
driver:
|
|
29
|
+
driver: GraphDriver, group_ids: list[str] | None
|
|
30
30
|
) -> list[list[EntityNode]]:
|
|
31
31
|
community_clusters: list[list[EntityNode]] = []
|
|
32
32
|
|
|
@@ -95,7 +95,6 @@ def label_propagation(projection: dict[str, list[Neighbor]]) -> list[list[str]]:
|
|
|
95
95
|
community_candidates: dict[int, int] = defaultdict(int)
|
|
96
96
|
for neighbor in neighbors:
|
|
97
97
|
community_candidates[community_map[neighbor.node_uuid]] += neighbor.edge_count
|
|
98
|
-
|
|
99
98
|
community_lst = [
|
|
100
99
|
(count, community) for community, count in community_candidates.items()
|
|
101
100
|
]
|
|
@@ -194,7 +193,7 @@ async def build_community(
|
|
|
194
193
|
|
|
195
194
|
|
|
196
195
|
async def build_communities(
|
|
197
|
-
driver:
|
|
196
|
+
driver: GraphDriver, llm_client: LLMClient, group_ids: list[str] | None
|
|
198
197
|
) -> tuple[list[CommunityNode], list[CommunityEdge]]:
|
|
199
198
|
community_clusters = await get_community_clusters(driver, group_ids)
|
|
200
199
|
|
|
@@ -219,7 +218,7 @@ async def build_communities(
|
|
|
219
218
|
return community_nodes, community_edges
|
|
220
219
|
|
|
221
220
|
|
|
222
|
-
async def remove_communities(driver:
|
|
221
|
+
async def remove_communities(driver: GraphDriver):
|
|
223
222
|
await driver.execute_query(
|
|
224
223
|
"""
|
|
225
224
|
MATCH (c:Community)
|
|
@@ -230,7 +229,7 @@ async def remove_communities(driver: AsyncDriver):
|
|
|
230
229
|
|
|
231
230
|
|
|
232
231
|
async def determine_entity_community(
|
|
233
|
-
driver:
|
|
232
|
+
driver: GraphDriver, entity: EntityNode
|
|
234
233
|
) -> tuple[CommunityNode | None, bool]:
|
|
235
234
|
# Check if the node is already part of a community
|
|
236
235
|
records, _, _ = await driver.execute_query(
|
|
@@ -291,7 +290,7 @@ async def determine_entity_community(
|
|
|
291
290
|
|
|
292
291
|
|
|
293
292
|
async def update_community(
|
|
294
|
-
driver:
|
|
293
|
+
driver: GraphDriver, llm_client: LLMClient, embedder: EmbedderClient, entity: EntityNode
|
|
295
294
|
):
|
|
296
295
|
community, is_new = await determine_entity_community(driver, entity)
|
|
297
296
|
|
|
@@ -19,7 +19,9 @@ from datetime import datetime
|
|
|
19
19
|
from time import time
|
|
20
20
|
|
|
21
21
|
from pydantic import BaseModel
|
|
22
|
+
from typing_extensions import LiteralString
|
|
22
23
|
|
|
24
|
+
from graphiti_core.driver.driver import GraphDriver
|
|
23
25
|
from graphiti_core.edges import (
|
|
24
26
|
CommunityEdge,
|
|
25
27
|
EntityEdge,
|
|
@@ -27,7 +29,7 @@ from graphiti_core.edges import (
|
|
|
27
29
|
create_entity_edge_embeddings,
|
|
28
30
|
)
|
|
29
31
|
from graphiti_core.graphiti_types import GraphitiClients
|
|
30
|
-
from graphiti_core.helpers import MAX_REFLEXION_ITERATIONS, semaphore_gather
|
|
32
|
+
from graphiti_core.helpers import DEFAULT_DATABASE, MAX_REFLEXION_ITERATIONS, semaphore_gather
|
|
31
33
|
from graphiti_core.llm_client import LLMClient
|
|
32
34
|
from graphiti_core.llm_client.config import ModelSize
|
|
33
35
|
from graphiti_core.nodes import CommunityNode, EntityNode, EpisodicNode
|
|
@@ -61,6 +63,28 @@ def build_episodic_edges(
|
|
|
61
63
|
return episodic_edges
|
|
62
64
|
|
|
63
65
|
|
|
66
|
+
def build_duplicate_of_edges(
|
|
67
|
+
episode: EpisodicNode,
|
|
68
|
+
created_at: datetime,
|
|
69
|
+
duplicate_nodes: list[tuple[EntityNode, EntityNode]],
|
|
70
|
+
) -> list[EntityEdge]:
|
|
71
|
+
is_duplicate_of_edges: list[EntityEdge] = [
|
|
72
|
+
EntityEdge(
|
|
73
|
+
source_node_uuid=source_node.uuid,
|
|
74
|
+
target_node_uuid=target_node.uuid,
|
|
75
|
+
name='IS_DUPLICATE_OF',
|
|
76
|
+
group_id=episode.group_id,
|
|
77
|
+
fact=f'{source_node.name} is a duplicate of {target_node.name}',
|
|
78
|
+
episodes=[episode.uuid],
|
|
79
|
+
created_at=created_at,
|
|
80
|
+
valid_at=created_at,
|
|
81
|
+
)
|
|
82
|
+
for source_node, target_node in duplicate_nodes
|
|
83
|
+
]
|
|
84
|
+
|
|
85
|
+
return is_duplicate_of_edges
|
|
86
|
+
|
|
87
|
+
|
|
64
88
|
def build_community_edges(
|
|
65
89
|
entity_nodes: list[EntityNode],
|
|
66
90
|
community_node: CommunityNode,
|
|
@@ -84,6 +108,7 @@ async def extract_edges(
|
|
|
84
108
|
episode: EpisodicNode,
|
|
85
109
|
nodes: list[EntityNode],
|
|
86
110
|
previous_episodes: list[EpisodicNode],
|
|
111
|
+
edge_type_map: dict[tuple[str, str], list[str]],
|
|
87
112
|
group_id: str = '',
|
|
88
113
|
edge_types: dict[str, BaseModel] | None = None,
|
|
89
114
|
) -> list[EntityEdge]:
|
|
@@ -92,10 +117,17 @@ async def extract_edges(
|
|
|
92
117
|
extract_edges_max_tokens = 16384
|
|
93
118
|
llm_client = clients.llm_client
|
|
94
119
|
|
|
120
|
+
edge_type_signature_map: dict[str, tuple[str, str]] = {
|
|
121
|
+
edge_type: signature
|
|
122
|
+
for signature, edge_types in edge_type_map.items()
|
|
123
|
+
for edge_type in edge_types
|
|
124
|
+
}
|
|
125
|
+
|
|
95
126
|
edge_types_context = (
|
|
96
127
|
[
|
|
97
128
|
{
|
|
98
129
|
'fact_type_name': type_name,
|
|
130
|
+
'fact_type_signature': edge_type_signature_map.get(type_name, ('Entity', 'Entity')),
|
|
99
131
|
'fact_type_description': type_model.__doc__,
|
|
100
132
|
}
|
|
101
133
|
for type_name, type_model in edge_types.items()
|
|
@@ -107,7 +139,10 @@ async def extract_edges(
|
|
|
107
139
|
# Prepare context for LLM
|
|
108
140
|
context = {
|
|
109
141
|
'episode_content': episode.content,
|
|
110
|
-
'nodes': [
|
|
142
|
+
'nodes': [
|
|
143
|
+
{'id': idx, 'name': node.name, 'entity_types': node.labels}
|
|
144
|
+
for idx, node in enumerate(nodes)
|
|
145
|
+
],
|
|
111
146
|
'previous_episodes': [ep.content for ep in previous_episodes],
|
|
112
147
|
'reference_time': episode.valid_at,
|
|
113
148
|
'edge_types': edge_types_context,
|
|
@@ -260,7 +295,6 @@ async def resolve_extracted_edges(
|
|
|
260
295
|
driver = clients.driver
|
|
261
296
|
llm_client = clients.llm_client
|
|
262
297
|
embedder = clients.embedder
|
|
263
|
-
|
|
264
298
|
await create_entity_edge_embeddings(embedder, extracted_edges)
|
|
265
299
|
|
|
266
300
|
search_results: tuple[list[list[EntityEdge]], list[list[EntityEdge]]] = await semaphore_gather(
|
|
@@ -571,3 +605,34 @@ async def dedupe_edge_list(
|
|
|
571
605
|
unique_edges.append(edge)
|
|
572
606
|
|
|
573
607
|
return unique_edges
|
|
608
|
+
|
|
609
|
+
|
|
610
|
+
async def filter_existing_duplicate_of_edges(
|
|
611
|
+
driver: GraphDriver, duplicates_node_tuples: list[tuple[EntityNode, EntityNode]]
|
|
612
|
+
) -> list[tuple[EntityNode, EntityNode]]:
|
|
613
|
+
query: LiteralString = """
|
|
614
|
+
UNWIND $duplicate_node_uuids AS duplicate_tuple
|
|
615
|
+
MATCH (n:Entity {uuid: duplicate_tuple[0]})-[r:RELATES_TO {name: 'IS_DUPLICATE_OF'}]->(m:Entity {uuid: duplicate_tuple[1]})
|
|
616
|
+
RETURN DISTINCT
|
|
617
|
+
n.uuid AS source_uuid,
|
|
618
|
+
m.uuid AS target_uuid
|
|
619
|
+
"""
|
|
620
|
+
|
|
621
|
+
duplicate_nodes_map = {
|
|
622
|
+
(source.uuid, target.uuid): (source, target) for source, target in duplicates_node_tuples
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
records, _, _ = await driver.execute_query(
|
|
626
|
+
query,
|
|
627
|
+
duplicate_node_uuids=list(duplicate_nodes_map.keys()),
|
|
628
|
+
database_=DEFAULT_DATABASE,
|
|
629
|
+
routing_='r',
|
|
630
|
+
)
|
|
631
|
+
|
|
632
|
+
# Remove duplicates that already have the IS_DUPLICATE_OF edge
|
|
633
|
+
for record in records:
|
|
634
|
+
duplicate_tuple = (record.get('source_uuid'), record.get('target_uuid'))
|
|
635
|
+
if duplicate_nodes_map.get(duplicate_tuple):
|
|
636
|
+
duplicate_nodes_map.pop(duplicate_tuple)
|
|
637
|
+
|
|
638
|
+
return list(duplicate_nodes_map.values())
|
|
@@ -17,9 +17,10 @@ limitations under the License.
|
|
|
17
17
|
import logging
|
|
18
18
|
from datetime import datetime, timezone
|
|
19
19
|
|
|
20
|
-
from neo4j import AsyncDriver
|
|
21
20
|
from typing_extensions import LiteralString
|
|
22
21
|
|
|
22
|
+
from graphiti_core.driver.driver import GraphDriver
|
|
23
|
+
from graphiti_core.graph_queries import get_fulltext_indices, get_range_indices
|
|
23
24
|
from graphiti_core.helpers import DEFAULT_DATABASE, semaphore_gather
|
|
24
25
|
from graphiti_core.nodes import EpisodeType, EpisodicNode
|
|
25
26
|
|
|
@@ -28,7 +29,7 @@ EPISODE_WINDOW_LEN = 3
|
|
|
28
29
|
logger = logging.getLogger(__name__)
|
|
29
30
|
|
|
30
31
|
|
|
31
|
-
async def build_indices_and_constraints(driver:
|
|
32
|
+
async def build_indices_and_constraints(driver: GraphDriver, delete_existing: bool = False):
|
|
32
33
|
if delete_existing:
|
|
33
34
|
records, _, _ = await driver.execute_query(
|
|
34
35
|
"""
|
|
@@ -47,39 +48,9 @@ async def build_indices_and_constraints(driver: AsyncDriver, delete_existing: bo
|
|
|
47
48
|
for name in index_names
|
|
48
49
|
]
|
|
49
50
|
)
|
|
51
|
+
range_indices: list[LiteralString] = get_range_indices(driver.provider)
|
|
50
52
|
|
|
51
|
-
|
|
52
|
-
'CREATE INDEX entity_uuid IF NOT EXISTS FOR (n:Entity) ON (n.uuid)',
|
|
53
|
-
'CREATE INDEX episode_uuid IF NOT EXISTS FOR (n:Episodic) ON (n.uuid)',
|
|
54
|
-
'CREATE INDEX community_uuid IF NOT EXISTS FOR (n:Community) ON (n.uuid)',
|
|
55
|
-
'CREATE INDEX relation_uuid IF NOT EXISTS FOR ()-[e:RELATES_TO]-() ON (e.uuid)',
|
|
56
|
-
'CREATE INDEX mention_uuid IF NOT EXISTS FOR ()-[e:MENTIONS]-() ON (e.uuid)',
|
|
57
|
-
'CREATE INDEX has_member_uuid IF NOT EXISTS FOR ()-[e:HAS_MEMBER]-() ON (e.uuid)',
|
|
58
|
-
'CREATE INDEX entity_group_id IF NOT EXISTS FOR (n:Entity) ON (n.group_id)',
|
|
59
|
-
'CREATE INDEX episode_group_id IF NOT EXISTS FOR (n:Episodic) ON (n.group_id)',
|
|
60
|
-
'CREATE INDEX relation_group_id IF NOT EXISTS FOR ()-[e:RELATES_TO]-() ON (e.group_id)',
|
|
61
|
-
'CREATE INDEX mention_group_id IF NOT EXISTS FOR ()-[e:MENTIONS]-() ON (e.group_id)',
|
|
62
|
-
'CREATE INDEX name_entity_index IF NOT EXISTS FOR (n:Entity) ON (n.name)',
|
|
63
|
-
'CREATE INDEX created_at_entity_index IF NOT EXISTS FOR (n:Entity) ON (n.created_at)',
|
|
64
|
-
'CREATE INDEX created_at_episodic_index IF NOT EXISTS FOR (n:Episodic) ON (n.created_at)',
|
|
65
|
-
'CREATE INDEX valid_at_episodic_index IF NOT EXISTS FOR (n:Episodic) ON (n.valid_at)',
|
|
66
|
-
'CREATE INDEX name_edge_index IF NOT EXISTS FOR ()-[e:RELATES_TO]-() ON (e.name)',
|
|
67
|
-
'CREATE INDEX created_at_edge_index IF NOT EXISTS FOR ()-[e:RELATES_TO]-() ON (e.created_at)',
|
|
68
|
-
'CREATE INDEX expired_at_edge_index IF NOT EXISTS FOR ()-[e:RELATES_TO]-() ON (e.expired_at)',
|
|
69
|
-
'CREATE INDEX valid_at_edge_index IF NOT EXISTS FOR ()-[e:RELATES_TO]-() ON (e.valid_at)',
|
|
70
|
-
'CREATE INDEX invalid_at_edge_index IF NOT EXISTS FOR ()-[e:RELATES_TO]-() ON (e.invalid_at)',
|
|
71
|
-
]
|
|
72
|
-
|
|
73
|
-
fulltext_indices: list[LiteralString] = [
|
|
74
|
-
"""CREATE FULLTEXT INDEX episode_content IF NOT EXISTS
|
|
75
|
-
FOR (e:Episodic) ON EACH [e.content, e.source, e.source_description, e.group_id]""",
|
|
76
|
-
"""CREATE FULLTEXT INDEX node_name_and_summary IF NOT EXISTS
|
|
77
|
-
FOR (n:Entity) ON EACH [n.name, n.summary, n.group_id]""",
|
|
78
|
-
"""CREATE FULLTEXT INDEX community_name IF NOT EXISTS
|
|
79
|
-
FOR (n:Community) ON EACH [n.name, n.group_id]""",
|
|
80
|
-
"""CREATE FULLTEXT INDEX edge_name_and_fact IF NOT EXISTS
|
|
81
|
-
FOR ()-[e:RELATES_TO]-() ON EACH [e.name, e.fact, e.group_id]""",
|
|
82
|
-
]
|
|
53
|
+
fulltext_indices: list[LiteralString] = get_fulltext_indices(driver.provider)
|
|
83
54
|
|
|
84
55
|
index_queries: list[LiteralString] = range_indices + fulltext_indices
|
|
85
56
|
|
|
@@ -94,7 +65,7 @@ async def build_indices_and_constraints(driver: AsyncDriver, delete_existing: bo
|
|
|
94
65
|
)
|
|
95
66
|
|
|
96
67
|
|
|
97
|
-
async def clear_data(driver:
|
|
68
|
+
async def clear_data(driver: GraphDriver, group_ids: list[str] | None = None):
|
|
98
69
|
async with driver.session(database=DEFAULT_DATABASE) as session:
|
|
99
70
|
|
|
100
71
|
async def delete_all(tx):
|
|
@@ -113,7 +84,7 @@ async def clear_data(driver: AsyncDriver, group_ids: list[str] | None = None):
|
|
|
113
84
|
|
|
114
85
|
|
|
115
86
|
async def retrieve_episodes(
|
|
116
|
-
driver:
|
|
87
|
+
driver: GraphDriver,
|
|
117
88
|
reference_time: datetime,
|
|
118
89
|
last_n: int = EPISODE_WINDOW_LEN,
|
|
119
90
|
group_ids: list[str] | None = None,
|
|
@@ -123,7 +94,7 @@ async def retrieve_episodes(
|
|
|
123
94
|
Retrieve the last n episodic nodes from the graph.
|
|
124
95
|
|
|
125
96
|
Args:
|
|
126
|
-
driver (
|
|
97
|
+
driver (Driver): The Neo4j driver instance.
|
|
127
98
|
reference_time (datetime): The reference time to filter episodes. Only episodes with a valid_at timestamp
|
|
128
99
|
less than or equal to this reference_time will be retrieved. This allows for
|
|
129
100
|
querying the graph's state at a specific point in time.
|
|
@@ -140,8 +111,8 @@ async def retrieve_episodes(
|
|
|
140
111
|
|
|
141
112
|
query: LiteralString = (
|
|
142
113
|
"""
|
|
143
|
-
|
|
144
|
-
|
|
114
|
+
MATCH (e:Episodic) WHERE e.valid_at <= $reference_time
|
|
115
|
+
"""
|
|
145
116
|
+ group_id_filter
|
|
146
117
|
+ source_filter
|
|
147
118
|
+ """
|
|
@@ -157,8 +128,7 @@ async def retrieve_episodes(
|
|
|
157
128
|
LIMIT $num_episodes
|
|
158
129
|
"""
|
|
159
130
|
)
|
|
160
|
-
|
|
161
|
-
result = await driver.execute_query(
|
|
131
|
+
result, _, _ = await driver.execute_query(
|
|
162
132
|
query,
|
|
163
133
|
reference_time=reference_time,
|
|
164
134
|
source=source.name if source is not None else None,
|
|
@@ -166,6 +136,7 @@ async def retrieve_episodes(
|
|
|
166
136
|
group_ids=group_ids,
|
|
167
137
|
database_=DEFAULT_DATABASE,
|
|
168
138
|
)
|
|
139
|
+
|
|
169
140
|
episodes = [
|
|
170
141
|
EpisodicNode(
|
|
171
142
|
content=record['content'],
|
|
@@ -179,6 +150,6 @@ async def retrieve_episodes(
|
|
|
179
150
|
name=record['name'],
|
|
180
151
|
source_description=record['source_description'],
|
|
181
152
|
)
|
|
182
|
-
for record in result
|
|
153
|
+
for record in result
|
|
183
154
|
]
|
|
184
155
|
return list(reversed(episodes)) # Return in chronological order
|
|
@@ -40,6 +40,7 @@ from graphiti_core.search.search_config import SearchResults
|
|
|
40
40
|
from graphiti_core.search.search_config_recipes import NODE_HYBRID_SEARCH_RRF
|
|
41
41
|
from graphiti_core.search.search_filters import SearchFilters
|
|
42
42
|
from graphiti_core.utils.datetime_utils import utc_now
|
|
43
|
+
from graphiti_core.utils.maintenance.edge_operations import filter_existing_duplicate_of_edges
|
|
43
44
|
|
|
44
45
|
logger = logging.getLogger(__name__)
|
|
45
46
|
|
|
@@ -225,8 +226,9 @@ async def resolve_extracted_nodes(
|
|
|
225
226
|
episode: EpisodicNode | None = None,
|
|
226
227
|
previous_episodes: list[EpisodicNode] | None = None,
|
|
227
228
|
entity_types: dict[str, BaseModel] | None = None,
|
|
228
|
-
) -> tuple[list[EntityNode], dict[str, str]]:
|
|
229
|
+
) -> tuple[list[EntityNode], dict[str, str], list[tuple[EntityNode, EntityNode]]]:
|
|
229
230
|
llm_client = clients.llm_client
|
|
231
|
+
driver = clients.driver
|
|
230
232
|
|
|
231
233
|
search_results: list[SearchResults] = await semaphore_gather(
|
|
232
234
|
*[
|
|
@@ -295,9 +297,10 @@ async def resolve_extracted_nodes(
|
|
|
295
297
|
|
|
296
298
|
resolved_nodes: list[EntityNode] = []
|
|
297
299
|
uuid_map: dict[str, str] = {}
|
|
300
|
+
node_duplicates: list[tuple[EntityNode, EntityNode]] = []
|
|
298
301
|
for resolution in node_resolutions:
|
|
299
|
-
resolution_id = resolution.get('id', -1)
|
|
300
|
-
duplicate_idx = resolution.get('duplicate_idx', -1)
|
|
302
|
+
resolution_id: int = resolution.get('id', -1)
|
|
303
|
+
duplicate_idx: int = resolution.get('duplicate_idx', -1)
|
|
301
304
|
|
|
302
305
|
extracted_node = extracted_nodes[resolution_id]
|
|
303
306
|
|
|
@@ -312,9 +315,21 @@ async def resolve_extracted_nodes(
|
|
|
312
315
|
resolved_nodes.append(resolved_node)
|
|
313
316
|
uuid_map[extracted_node.uuid] = resolved_node.uuid
|
|
314
317
|
|
|
318
|
+
additional_duplicates: list[int] = resolution.get('additional_duplicates', [])
|
|
319
|
+
for idx in additional_duplicates:
|
|
320
|
+
existing_node = existing_nodes[idx] if idx < len(existing_nodes) else resolved_node
|
|
321
|
+
if existing_node == resolved_node:
|
|
322
|
+
continue
|
|
323
|
+
|
|
324
|
+
node_duplicates.append((resolved_node, existing_nodes[idx]))
|
|
325
|
+
|
|
315
326
|
logger.debug(f'Resolved nodes: {[(n.name, n.uuid) for n in resolved_nodes]}')
|
|
316
327
|
|
|
317
|
-
|
|
328
|
+
new_node_duplicates: list[
|
|
329
|
+
tuple[EntityNode, EntityNode]
|
|
330
|
+
] = await filter_existing_duplicate_of_edges(driver, node_duplicates)
|
|
331
|
+
|
|
332
|
+
return resolved_nodes, uuid_map, new_node_duplicates
|
|
318
333
|
|
|
319
334
|
|
|
320
335
|
async def extract_attributes_from_nodes(
|
|
@@ -326,7 +341,6 @@ async def extract_attributes_from_nodes(
|
|
|
326
341
|
) -> list[EntityNode]:
|
|
327
342
|
llm_client = clients.llm_client
|
|
328
343
|
embedder = clients.embedder
|
|
329
|
-
|
|
330
344
|
updated_nodes: list[EntityNode] = await semaphore_gather(
|
|
331
345
|
*[
|
|
332
346
|
extract_attributes_from_node(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: graphiti-core
|
|
3
|
-
Version: 0.12.
|
|
3
|
+
Version: 0.12.2
|
|
4
4
|
Summary: A temporal graph building library
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Author: Paul Paliychuk
|
|
@@ -17,9 +17,10 @@ Provides-Extra: google-genai
|
|
|
17
17
|
Provides-Extra: groq
|
|
18
18
|
Requires-Dist: anthropic (>=0.49.0) ; extra == "anthropic"
|
|
19
19
|
Requires-Dist: diskcache (>=5.6.3)
|
|
20
|
+
Requires-Dist: falkordb (>=1.1.2,<2.0.0)
|
|
20
21
|
Requires-Dist: google-genai (>=1.8.0) ; extra == "google-genai"
|
|
21
22
|
Requires-Dist: groq (>=0.2.0) ; extra == "groq"
|
|
22
|
-
Requires-Dist: neo4j (>=5.
|
|
23
|
+
Requires-Dist: neo4j (>=5.26.0)
|
|
23
24
|
Requires-Dist: numpy (>=1.0.0)
|
|
24
25
|
Requires-Dist: openai (>=1.53.0)
|
|
25
26
|
Requires-Dist: pydantic (>=2.11.5)
|
|
@@ -136,7 +137,7 @@ Graphiti is specifically designed to address the challenges of dynamic and frequ
|
|
|
136
137
|
Requirements:
|
|
137
138
|
|
|
138
139
|
- Python 3.10 or higher
|
|
139
|
-
- Neo4j 5.26 or higher (serves as the embeddings storage backend)
|
|
140
|
+
- Neo4j 5.26 / FalkorDB 1.1.2 or higher (serves as the embeddings storage backend)
|
|
140
141
|
- OpenAI API key (for LLM inference and embedding)
|
|
141
142
|
|
|
142
143
|
> [!IMPORTANT]
|
|
@@ -2,19 +2,26 @@ graphiti_core/__init__.py,sha256=e5SWFkRiaUwfprYIeIgVIh7JDedNiloZvd3roU-0aDY,55
|
|
|
2
2
|
graphiti_core/cross_encoder/__init__.py,sha256=hry59vz21x-AtGZ0MJ7ugw0HTwJkXiddpp_Yqnwsen0,723
|
|
3
3
|
graphiti_core/cross_encoder/bge_reranker_client.py,sha256=sY7RKsCp90vTjYxv6vmIHT4p3oCsFCRYWH-H0Ia0vN0,1449
|
|
4
4
|
graphiti_core/cross_encoder/client.py,sha256=KLsbfWKOEaAV3adFe3XZlAeb-gje9_sVKCVZTaJP3ac,1441
|
|
5
|
-
graphiti_core/cross_encoder/openai_reranker_client.py,sha256=
|
|
6
|
-
graphiti_core/
|
|
5
|
+
graphiti_core/cross_encoder/openai_reranker_client.py,sha256=_Hftiz250HbEkY_26z6A1oxg4pzM8Sbr8CwnbJEsggc,4522
|
|
6
|
+
graphiti_core/driver/__init__.py,sha256=DumfxIEY3z_nkz5YGaYH1GM50HgeAdEowNK189jcdAg,626
|
|
7
|
+
graphiti_core/driver/driver.py,sha256=-FHAA2gM8FA0re-q6udmjQ6pNFdFGRQrMRuAiqX_1A4,1829
|
|
8
|
+
graphiti_core/driver/falkordb_driver.py,sha256=Iz3wnfoJIO7EslqZvG6mduyZ5C-DWxFDPM5Q4QJRCuo,4686
|
|
9
|
+
graphiti_core/driver/neo4j_driver.py,sha256=D8CV5GbhKoHIQ78BA9ozlwdvXPLUbBmFSfT2lww8PJk,1910
|
|
10
|
+
graphiti_core/edges.py,sha256=h67vyXYhZYqlwaOmaqjHiGns6nEjuBVSIAFBMveNVo8,16257
|
|
7
11
|
graphiti_core/embedder/__init__.py,sha256=EL564ZuE-DZjcuKNUK_exMn_XHXm2LdO9fzdXePVKL4,179
|
|
12
|
+
graphiti_core/embedder/azure_openai.py,sha256=OyomPwC1fIsddI-3n6g00kQFdQznZorBhHwkQKCLUok,2384
|
|
8
13
|
graphiti_core/embedder/client.py,sha256=qEpSHceL_Gc4QQPJWIOnuNLemNuR_TYA4r28t2Vldbg,1115
|
|
9
14
|
graphiti_core/embedder/gemini.py,sha256=7En-W46YxqC5qL3vYB5Ed-Xm0hqLxi7-LgZ95c4M7ME,3263
|
|
10
15
|
graphiti_core/embedder/openai.py,sha256=bIThUoLMeGlHG2-3VikzK6JZfOHKn4PKvUMx5sHxJy8,2192
|
|
11
16
|
graphiti_core/embedder/voyage.py,sha256=gQhdcz2IYPSyOcDn3w8aHToVS3KQhyZrUBm4vqr3WcE,2224
|
|
12
17
|
graphiti_core/errors.py,sha256=Nib1uQx2cO_VOizupmRjpFfmuRg-hFAVqTtZAuBehR8,2405
|
|
13
|
-
graphiti_core/
|
|
14
|
-
graphiti_core/
|
|
15
|
-
graphiti_core/
|
|
16
|
-
graphiti_core/
|
|
18
|
+
graphiti_core/graph_queries.py,sha256=KfWDp8xDnPa9bcHskw8NeMpeeHBtZWBCosVdu1Iwv34,7076
|
|
19
|
+
graphiti_core/graphiti.py,sha256=cCUYxwNoQiEtzbPNvTEwfYOrK9UePFpRCUeFxl1-vg0,28335
|
|
20
|
+
graphiti_core/graphiti_types.py,sha256=rL-9bvnLobunJfXU4hkD6mAj14pofKp_wq8QsFDZwDU,1035
|
|
21
|
+
graphiti_core/helpers.py,sha256=sfC1M6KefKaZll6FQcpcNvWnCN6iCvBSJksAkGTItT4,3059
|
|
22
|
+
graphiti_core/llm_client/__init__.py,sha256=QgBWUiCeBp6YiA_xqyrDvJ9jIyy1hngH8g7FWahN3nw,776
|
|
17
23
|
graphiti_core/llm_client/anthropic_client.py,sha256=392rtkH_I7yOJUlQvjoOnS8Lz14WBP8egQ3OfRH0nFs,12481
|
|
24
|
+
graphiti_core/llm_client/azure_openai_client.py,sha256=B6EbNIktP9FBqiFrGunVQlego2e3C5zBAbcHI55Y-OY,2680
|
|
18
25
|
graphiti_core/llm_client/client.py,sha256=v_w5TBbDJYYADCXSs2r287g5Ami2Urma-GGEbHSI_Jg,5826
|
|
19
26
|
graphiti_core/llm_client/config.py,sha256=90IgSBxZE_3nWdaEONVLUznI8lytPA7ZyexQz-_c55U,2560
|
|
20
27
|
graphiti_core/llm_client/errors.py,sha256=pn6brRiLW60DAUIXJYKBT6MInrS4ueuH1hNLbn_JbQo,1243
|
|
@@ -28,14 +35,14 @@ graphiti_core/models/edges/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJ
|
|
|
28
35
|
graphiti_core/models/edges/edge_db_queries.py,sha256=W2-ljKnZOt5MlD9_M4f_823GdyTMRzW2tJX0CezaixY,2284
|
|
29
36
|
graphiti_core/models/nodes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
30
37
|
graphiti_core/models/nodes/node_db_queries.py,sha256=AQgRGVO-GgFWfLq1G6k8s86WItwpXruy3Mj4DBli-vM,2145
|
|
31
|
-
graphiti_core/nodes.py,sha256=
|
|
38
|
+
graphiti_core/nodes.py,sha256=kdJY-Ugyk6J2x70w4EF_EoFNgy7D3TMOMVSUfEth6rE,18665
|
|
32
39
|
graphiti_core/prompts/__init__.py,sha256=EA-x9xUki9l8wnu2l8ek_oNf75-do5tq5hVq7Zbv8Kw,101
|
|
33
40
|
graphiti_core/prompts/dedupe_edges.py,sha256=AFVC1EQ0TvNkSp0G7QZmIh3YpGg9FVXo1_sT3TlRqA8,5473
|
|
34
|
-
graphiti_core/prompts/dedupe_nodes.py,sha256=
|
|
41
|
+
graphiti_core/prompts/dedupe_nodes.py,sha256=F_TNygUfAf0R9Md5aHnD-VQb58_0MTMN9UQjopcWJvY,7615
|
|
35
42
|
graphiti_core/prompts/eval.py,sha256=gnBQTmwsCl3Qvwpcm7aieVszzo6y1sMCUT8jQiKTvvE,5317
|
|
36
43
|
graphiti_core/prompts/extract_edge_dates.py,sha256=3Drs3CmvP0gJN5BidWSxrNvLet3HPoTybU3BUIAoc0Y,4218
|
|
37
|
-
graphiti_core/prompts/extract_edges.py,sha256=
|
|
38
|
-
graphiti_core/prompts/extract_nodes.py,sha256=
|
|
44
|
+
graphiti_core/prompts/extract_edges.py,sha256=1zP-Xo70UbHm1uSJIcVHhHrXIpH9ILU6ieunLPaRdnA,6761
|
|
45
|
+
graphiti_core/prompts/extract_nodes.py,sha256=bJ3Inp4tMDsXgTPHKOyAtasVHEeLjbY7wfV3LwdvCsc,9747
|
|
39
46
|
graphiti_core/prompts/invalidate_edges.py,sha256=yfpcs_pyctnoM77ULPZXEtKW0oHr1MeLsJzC5yrE-o4,3547
|
|
40
47
|
graphiti_core/prompts/lib.py,sha256=DCyHePM4_q-CptTpEXGO_dBv9k7xDtclEaB1dGu7EcI,4092
|
|
41
48
|
graphiti_core/prompts/models.py,sha256=NgxdbPHJpBEcpbXovKyScgpBc73Q-GIW-CBDlBtDjto,894
|
|
@@ -43,24 +50,24 @@ graphiti_core/prompts/prompt_helpers.py,sha256=-9TABwIcIQUVHcNANx6wIZd-FT2DgYKyG
|
|
|
43
50
|
graphiti_core/prompts/summarize_nodes.py,sha256=tbg-AgWlzgFBeImKkZ28h2SpmqfPPqvN2Ol1Q71VF9Y,4146
|
|
44
51
|
graphiti_core/py.typed,sha256=vlmmzQOt7bmeQl9L3XJP4W6Ry0iiELepnOrinKz5KQg,79
|
|
45
52
|
graphiti_core/search/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
46
|
-
graphiti_core/search/search.py,sha256=
|
|
53
|
+
graphiti_core/search/search.py,sha256=bJCFaNApu5396pXTa-xciu8ORDdRFJqfE3j2ieRVd7Y,15162
|
|
47
54
|
graphiti_core/search/search_config.py,sha256=VvKg6AB_RPhoe56DBBXHRBXHThAVJ_OLFCyq_yKof-A,3765
|
|
48
55
|
graphiti_core/search/search_config_recipes.py,sha256=4GquRphHhJlpXQhAZOySYnCzBWYoTwxlJj44eTOavZQ,7443
|
|
49
56
|
graphiti_core/search/search_filters.py,sha256=jG30nMWX03xoT9ohgyHNu_Xes8GwjIF2eTv6QaiWMqw,6466
|
|
50
57
|
graphiti_core/search/search_helpers.py,sha256=G5Ceaq5Pfgx0Weelqgeylp_pUHwiBnINaUYsDbURJbE,2636
|
|
51
|
-
graphiti_core/search/search_utils.py,sha256=
|
|
58
|
+
graphiti_core/search/search_utils.py,sha256=k9KKN4sYde0Hqw9BKb5T-8q-3hInIPwq9aYoGfheq6E,34877
|
|
52
59
|
graphiti_core/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
53
|
-
graphiti_core/utils/bulk_utils.py,sha256=
|
|
60
|
+
graphiti_core/utils/bulk_utils.py,sha256=RPPTAqBRg6iR1T6g6TDpfUGvkYmTEyVNrVPz_y91f-s,16196
|
|
54
61
|
graphiti_core/utils/datetime_utils.py,sha256=Ti-2tnrDFRzBsbfblzsHybsM3jaDLP4-VT2t0VhpIzU,1357
|
|
55
62
|
graphiti_core/utils/maintenance/__init__.py,sha256=vW4H1KyapTl-OOz578uZABYcpND4wPx3Vt6aAPaXh78,301
|
|
56
|
-
graphiti_core/utils/maintenance/community_operations.py,sha256=
|
|
57
|
-
graphiti_core/utils/maintenance/edge_operations.py,sha256=
|
|
58
|
-
graphiti_core/utils/maintenance/graph_data_operations.py,sha256=
|
|
59
|
-
graphiti_core/utils/maintenance/node_operations.py,sha256=
|
|
63
|
+
graphiti_core/utils/maintenance/community_operations.py,sha256=2rhRqtL9gDbjXKO4-S0nGpaWvS4ck5rFiazZiogIJao,10088
|
|
64
|
+
graphiti_core/utils/maintenance/edge_operations.py,sha256=Fwu2TLmQF_9EVcA-uUlt1ZiGC6RILIfKDr9W7R4gAno,21633
|
|
65
|
+
graphiti_core/utils/maintenance/graph_data_operations.py,sha256=OHuiAyP1Z7dfR90dWVQ87TJQO83P0sQihJyr4WIhOhk,5362
|
|
66
|
+
graphiti_core/utils/maintenance/node_operations.py,sha256=cqSZ9CmyaH-WeZlzZEmR8Q-GlaQFR31sjw_qcObCyrw,15564
|
|
60
67
|
graphiti_core/utils/maintenance/temporal_operations.py,sha256=mJkw9xLB4W2BsLfC5POr0r-PHWL9SIfNj_l_xu0B5ug,3410
|
|
61
68
|
graphiti_core/utils/maintenance/utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
62
69
|
graphiti_core/utils/ontology_utils/entity_types_utils.py,sha256=QJX5cG0GSSNF_Mm_yrldr69wjVAbN_MxLhOSznz85Hk,1279
|
|
63
|
-
graphiti_core-0.12.
|
|
64
|
-
graphiti_core-0.12.
|
|
65
|
-
graphiti_core-0.12.
|
|
66
|
-
graphiti_core-0.12.
|
|
70
|
+
graphiti_core-0.12.2.dist-info/LICENSE,sha256=KCUwCyDXuVEgmDWkozHyniRyWjnWUWjkuDHfU6o3JlA,11325
|
|
71
|
+
graphiti_core-0.12.2.dist-info/METADATA,sha256=qAj41UAuvLM-5XTugbsFSRr_DQElAzoHJ6JX3pyU9Lo,15590
|
|
72
|
+
graphiti_core-0.12.2.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
|
|
73
|
+
graphiti_core-0.12.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|