graphiti-core 0.3.2__tar.gz → 0.3.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of graphiti-core might be problematic. Click here for more details.
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/PKG-INFO +58 -38
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/README.md +57 -37
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/edges.py +147 -7
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/graphiti.py +47 -6
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/llm_client/anthropic_client.py +5 -1
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/llm_client/client.py +1 -1
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/llm_client/config.py +1 -1
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/llm_client/groq_client.py +5 -1
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/llm_client/openai_client.py +39 -2
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/nodes.py +64 -13
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/search/search.py +7 -1
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/search/search_config.py +2 -0
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/search/search_config_recipes.py +16 -0
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/search/search_utils.py +57 -1
- graphiti_core-0.3.4/graphiti_core/utils/maintenance/community_operations.py +310 -0
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/utils/maintenance/edge_operations.py +2 -0
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/utils/maintenance/node_operations.py +6 -3
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/pyproject.toml +3 -3
- graphiti_core-0.3.2/graphiti_core/utils/maintenance/community_operations.py +0 -155
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/LICENSE +0 -0
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/__init__.py +0 -0
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/errors.py +0 -0
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/helpers.py +0 -0
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/llm_client/__init__.py +0 -0
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/llm_client/errors.py +0 -0
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/llm_client/utils.py +0 -0
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/prompts/__init__.py +0 -0
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/prompts/dedupe_edges.py +0 -0
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/prompts/dedupe_nodes.py +0 -0
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/prompts/extract_edge_dates.py +0 -0
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/prompts/extract_edges.py +0 -0
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/prompts/extract_nodes.py +0 -0
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/prompts/invalidate_edges.py +0 -0
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/prompts/lib.py +0 -0
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/prompts/models.py +0 -0
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/prompts/summarize_nodes.py +0 -0
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/py.typed +0 -0
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/search/__init__.py +0 -0
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/utils/__init__.py +0 -0
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/utils/bulk_utils.py +0 -0
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/utils/maintenance/__init__.py +0 -0
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/utils/maintenance/graph_data_operations.py +0 -0
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/utils/maintenance/temporal_operations.py +0 -0
- {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/utils/maintenance/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: graphiti-core
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.4
|
|
4
4
|
Summary: A temporal graph building library
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Author: Paul Paliychuk
|
|
@@ -21,7 +21,7 @@ Description-Content-Type: text/markdown
|
|
|
21
21
|
|
|
22
22
|
<div align="center">
|
|
23
23
|
|
|
24
|
-
|
|
24
|
+
<img width="350" alt="Graphiti-ts-small" src="https://github.com/user-attachments/assets/bbd02947-e435-4a05-b25a-bbbac36d52c8">
|
|
25
25
|
|
|
26
26
|
## Temporal Knowledge Graphs for Agentic Applications
|
|
27
27
|
|
|
@@ -37,7 +37,9 @@ Description-Content-Type: text/markdown
|
|
|
37
37
|
|
|
38
38
|
</div>
|
|
39
39
|
|
|
40
|
-
Graphiti builds dynamic, temporally aware Knowledge Graphs that represent complex, evolving relationships between
|
|
40
|
+
Graphiti builds dynamic, temporally aware Knowledge Graphs that represent complex, evolving relationships between
|
|
41
|
+
entities over time. Graphiti ingests both unstructured and structured data, and the resulting graph may be queried using
|
|
42
|
+
a fusion of time, full-text, semantic, and graph algorithm approaches.
|
|
41
43
|
|
|
42
44
|
<br />
|
|
43
45
|
|
|
@@ -47,25 +49,39 @@ Graphiti builds dynamic, temporally aware Knowledge Graphs that represent comple
|
|
|
47
49
|
|
|
48
50
|
<br />
|
|
49
51
|
|
|
50
|
-
Graphiti helps you create and query Knowledge Graphs that evolve over time. A knowledge graph is a network of
|
|
52
|
+
Graphiti helps you create and query Knowledge Graphs that evolve over time. A knowledge graph is a network of
|
|
53
|
+
interconnected facts, such as _“Kendra loves Adidas shoes.”_ Each fact is a “triplet” represented by two entities, or
|
|
54
|
+
nodes (_”Kendra”_, _“Adidas shoes”_), and their relationship, or edge (_”loves”_). Knowledge Graphs have been explored
|
|
55
|
+
extensively for information retrieval. What makes Graphiti unique is its ability to autonomously build a knowledge graph
|
|
56
|
+
while handling changing relationships and maintaining historical context.
|
|
51
57
|
|
|
52
58
|
With Graphiti, you can build LLM applications such as:
|
|
53
59
|
|
|
54
|
-
- Assistants that learn from user interactions, fusing personal knowledge with dynamic data from business systems like
|
|
60
|
+
- Assistants that learn from user interactions, fusing personal knowledge with dynamic data from business systems like
|
|
61
|
+
CRMs and billing platforms.
|
|
55
62
|
- Agents that autonomously execute complex tasks, reasoning with state changes from multiple dynamic sources.
|
|
56
63
|
|
|
57
|
-
Graphiti supports a wide range of applications in sales, customer service, health, finance, and more, enabling long-term
|
|
64
|
+
Graphiti supports a wide range of applications in sales, customer service, health, finance, and more, enabling long-term
|
|
65
|
+
recall and state-based reasoning for both assistants and agents.
|
|
58
66
|
|
|
59
67
|
## Why Graphiti?
|
|
60
68
|
|
|
61
|
-
We were intrigued by Microsoft’s GraphRAG, which expanded on RAG text chunking by using a graph to better model a
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
+
We were intrigued by Microsoft’s GraphRAG, which expanded on RAG text chunking by using a graph to better model a
|
|
70
|
+
document corpus and making this representation available via semantic and graph search techniques. However, GraphRAG did
|
|
71
|
+
not address our core problem: It's primarily designed for static documents and doesn't inherently handle temporal
|
|
72
|
+
aspects of data.
|
|
73
|
+
|
|
74
|
+
Graphiti is designed from the ground up to handle constantly changing information, hybrid semantic and graph search, and
|
|
75
|
+
scale:
|
|
76
|
+
|
|
77
|
+
- **Temporal Awareness:** Tracks changes in facts and relationships over time, enabling point-in-time queries. Graph
|
|
78
|
+
edges include temporal metadata to record relationship lifecycles.
|
|
79
|
+
- **Episodic Processing:** Ingests data as discrete episodes, maintaining data provenance and allowing incremental
|
|
80
|
+
entity and relationship extraction.
|
|
81
|
+
- **Hybrid Search:** Combines semantic and BM25 full-text search, with the ability to rerank results by distance from a
|
|
82
|
+
central node e.g. “Kendra”.
|
|
83
|
+
- **Scalable:** Designed for processing large datasets, with parallelization of LLM calls for bulk processing while
|
|
84
|
+
preserving the chronology of events.
|
|
69
85
|
- **Supports Varied Sources:** Can ingest both unstructured text and structured JSON data.
|
|
70
86
|
|
|
71
87
|
<p align="center">
|
|
@@ -91,7 +107,8 @@ Optional:
|
|
|
91
107
|
- Anthropic or Groq API key (for alternative LLM providers)
|
|
92
108
|
|
|
93
109
|
> [!TIP]
|
|
94
|
-
> The simplest way to install Neo4j is via [Neo4j Desktop](https://neo4j.com/download/). It provides a user-friendly
|
|
110
|
+
> The simplest way to install Neo4j is via [Neo4j Desktop](https://neo4j.com/download/). It provides a user-friendly
|
|
111
|
+
> interface to manage Neo4j instances and databases.
|
|
95
112
|
|
|
96
113
|
```bash
|
|
97
114
|
pip install graphiti-core
|
|
@@ -106,7 +123,8 @@ poetry add graphiti-core
|
|
|
106
123
|
## Quick Start
|
|
107
124
|
|
|
108
125
|
> [!IMPORTANT]
|
|
109
|
-
> Graphiti uses OpenAI for LLM inference and embedding. Ensure that an `OPENAI_API_KEY` is set in your environment.
|
|
126
|
+
> Graphiti uses OpenAI for LLM inference and embedding. Ensure that an `OPENAI_API_KEY` is set in your environment.
|
|
127
|
+
> Support for Anthropic and Groq LLM inferences is available, too.
|
|
110
128
|
|
|
111
129
|
```python
|
|
112
130
|
from graphiti_core import Graphiti
|
|
@@ -140,25 +158,25 @@ for i, episode in enumerate(episodes):
|
|
|
140
158
|
results = await graphiti.search('Who was the California Attorney General?')
|
|
141
159
|
[
|
|
142
160
|
EntityEdge(
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
161
|
+
│ uuid = '3133258f738e487383f07b04e15d4ac0',
|
|
162
|
+
│ source_node_uuid = '2a85789b318d4e418050506879906e62',
|
|
163
|
+
│ target_node_uuid = 'baf7781f445945989d6e4f927f881556',
|
|
164
|
+
│ created_at = datetime.datetime(2024, 8, 26, 13, 13, 24, 861097),
|
|
165
|
+
│ name = 'HELD_POSITION',
|
|
166
|
+
# the fact reflects the updated state that Harris is
|
|
167
|
+
# no longer the AG of California
|
|
168
|
+
│ fact = 'Kamala Harris was the Attorney General of California',
|
|
169
|
+
│ fact_embedding = [
|
|
170
|
+
│ │ -0.009955154731869698,
|
|
171
|
+
│ ...
|
|
172
|
+
│ │ 0.00784289836883545
|
|
173
|
+
│],
|
|
174
|
+
│ episodes = ['b43e98ad0a904088a76c67985caecc22'],
|
|
175
|
+
│ expired_at = datetime.datetime(2024, 8, 26, 20, 18, 1, 53812),
|
|
176
|
+
# These dates represent the date this edge was true.
|
|
177
|
+
│ valid_at = datetime.datetime(2011, 1, 3, 0, 0, tzinfo= < UTC >),
|
|
178
|
+
│ invalid_at = datetime.datetime(2017, 1, 3, 0, 0, tzinfo= < UTC >)
|
|
179
|
+
)
|
|
162
180
|
]
|
|
163
181
|
|
|
164
182
|
# Rerank search results based on graph distance
|
|
@@ -191,14 +209,16 @@ Graphiti is under active development. We aim to maintain API stability while wor
|
|
|
191
209
|
- [ ] Achieving good performance with different LLM and embedding models
|
|
192
210
|
- [ ] Creating a dedicated embedder interface
|
|
193
211
|
- [ ] Supporting custom graph schemas:
|
|
194
|
-
|
|
195
|
-
|
|
212
|
+
- Allow developers to provide their own defined node and edge classes when ingesting episodes
|
|
213
|
+
- Enable more flexible knowledge representation tailored to specific use cases
|
|
196
214
|
- [ ] Enhancing retrieval capabilities with more robust and configurable options
|
|
197
215
|
- [ ] Expanding test coverage to ensure reliability and catch edge cases
|
|
198
216
|
|
|
199
217
|
## Contributing
|
|
200
218
|
|
|
201
|
-
We encourage and appreciate all forms of contributions, whether it's code, documentation, addressing GitHub Issues, or
|
|
219
|
+
We encourage and appreciate all forms of contributions, whether it's code, documentation, addressing GitHub Issues, or
|
|
220
|
+
answering questions in the Graphiti Discord channel. For detailed guidelines on code contributions, please refer
|
|
221
|
+
to [CONTRIBUTING](CONTRIBUTING.md).
|
|
202
222
|
|
|
203
223
|
## Support
|
|
204
224
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
<div align="center">
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
<img width="350" alt="Graphiti-ts-small" src="https://github.com/user-attachments/assets/bbd02947-e435-4a05-b25a-bbbac36d52c8">
|
|
4
4
|
|
|
5
5
|
## Temporal Knowledge Graphs for Agentic Applications
|
|
6
6
|
|
|
@@ -16,7 +16,9 @@
|
|
|
16
16
|
|
|
17
17
|
</div>
|
|
18
18
|
|
|
19
|
-
Graphiti builds dynamic, temporally aware Knowledge Graphs that represent complex, evolving relationships between
|
|
19
|
+
Graphiti builds dynamic, temporally aware Knowledge Graphs that represent complex, evolving relationships between
|
|
20
|
+
entities over time. Graphiti ingests both unstructured and structured data, and the resulting graph may be queried using
|
|
21
|
+
a fusion of time, full-text, semantic, and graph algorithm approaches.
|
|
20
22
|
|
|
21
23
|
<br />
|
|
22
24
|
|
|
@@ -26,25 +28,39 @@ Graphiti builds dynamic, temporally aware Knowledge Graphs that represent comple
|
|
|
26
28
|
|
|
27
29
|
<br />
|
|
28
30
|
|
|
29
|
-
Graphiti helps you create and query Knowledge Graphs that evolve over time. A knowledge graph is a network of
|
|
31
|
+
Graphiti helps you create and query Knowledge Graphs that evolve over time. A knowledge graph is a network of
|
|
32
|
+
interconnected facts, such as _“Kendra loves Adidas shoes.”_ Each fact is a “triplet” represented by two entities, or
|
|
33
|
+
nodes (_”Kendra”_, _“Adidas shoes”_), and their relationship, or edge (_”loves”_). Knowledge Graphs have been explored
|
|
34
|
+
extensively for information retrieval. What makes Graphiti unique is its ability to autonomously build a knowledge graph
|
|
35
|
+
while handling changing relationships and maintaining historical context.
|
|
30
36
|
|
|
31
37
|
With Graphiti, you can build LLM applications such as:
|
|
32
38
|
|
|
33
|
-
- Assistants that learn from user interactions, fusing personal knowledge with dynamic data from business systems like
|
|
39
|
+
- Assistants that learn from user interactions, fusing personal knowledge with dynamic data from business systems like
|
|
40
|
+
CRMs and billing platforms.
|
|
34
41
|
- Agents that autonomously execute complex tasks, reasoning with state changes from multiple dynamic sources.
|
|
35
42
|
|
|
36
|
-
Graphiti supports a wide range of applications in sales, customer service, health, finance, and more, enabling long-term
|
|
43
|
+
Graphiti supports a wide range of applications in sales, customer service, health, finance, and more, enabling long-term
|
|
44
|
+
recall and state-based reasoning for both assistants and agents.
|
|
37
45
|
|
|
38
46
|
## Why Graphiti?
|
|
39
47
|
|
|
40
|
-
We were intrigued by Microsoft’s GraphRAG, which expanded on RAG text chunking by using a graph to better model a
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
+
We were intrigued by Microsoft’s GraphRAG, which expanded on RAG text chunking by using a graph to better model a
|
|
49
|
+
document corpus and making this representation available via semantic and graph search techniques. However, GraphRAG did
|
|
50
|
+
not address our core problem: It's primarily designed for static documents and doesn't inherently handle temporal
|
|
51
|
+
aspects of data.
|
|
52
|
+
|
|
53
|
+
Graphiti is designed from the ground up to handle constantly changing information, hybrid semantic and graph search, and
|
|
54
|
+
scale:
|
|
55
|
+
|
|
56
|
+
- **Temporal Awareness:** Tracks changes in facts and relationships over time, enabling point-in-time queries. Graph
|
|
57
|
+
edges include temporal metadata to record relationship lifecycles.
|
|
58
|
+
- **Episodic Processing:** Ingests data as discrete episodes, maintaining data provenance and allowing incremental
|
|
59
|
+
entity and relationship extraction.
|
|
60
|
+
- **Hybrid Search:** Combines semantic and BM25 full-text search, with the ability to rerank results by distance from a
|
|
61
|
+
central node e.g. “Kendra”.
|
|
62
|
+
- **Scalable:** Designed for processing large datasets, with parallelization of LLM calls for bulk processing while
|
|
63
|
+
preserving the chronology of events.
|
|
48
64
|
- **Supports Varied Sources:** Can ingest both unstructured text and structured JSON data.
|
|
49
65
|
|
|
50
66
|
<p align="center">
|
|
@@ -70,7 +86,8 @@ Optional:
|
|
|
70
86
|
- Anthropic or Groq API key (for alternative LLM providers)
|
|
71
87
|
|
|
72
88
|
> [!TIP]
|
|
73
|
-
> The simplest way to install Neo4j is via [Neo4j Desktop](https://neo4j.com/download/). It provides a user-friendly
|
|
89
|
+
> The simplest way to install Neo4j is via [Neo4j Desktop](https://neo4j.com/download/). It provides a user-friendly
|
|
90
|
+
> interface to manage Neo4j instances and databases.
|
|
74
91
|
|
|
75
92
|
```bash
|
|
76
93
|
pip install graphiti-core
|
|
@@ -85,7 +102,8 @@ poetry add graphiti-core
|
|
|
85
102
|
## Quick Start
|
|
86
103
|
|
|
87
104
|
> [!IMPORTANT]
|
|
88
|
-
> Graphiti uses OpenAI for LLM inference and embedding. Ensure that an `OPENAI_API_KEY` is set in your environment.
|
|
105
|
+
> Graphiti uses OpenAI for LLM inference and embedding. Ensure that an `OPENAI_API_KEY` is set in your environment.
|
|
106
|
+
> Support for Anthropic and Groq LLM inferences is available, too.
|
|
89
107
|
|
|
90
108
|
```python
|
|
91
109
|
from graphiti_core import Graphiti
|
|
@@ -119,25 +137,25 @@ for i, episode in enumerate(episodes):
|
|
|
119
137
|
results = await graphiti.search('Who was the California Attorney General?')
|
|
120
138
|
[
|
|
121
139
|
EntityEdge(
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
140
|
+
│ uuid = '3133258f738e487383f07b04e15d4ac0',
|
|
141
|
+
│ source_node_uuid = '2a85789b318d4e418050506879906e62',
|
|
142
|
+
│ target_node_uuid = 'baf7781f445945989d6e4f927f881556',
|
|
143
|
+
│ created_at = datetime.datetime(2024, 8, 26, 13, 13, 24, 861097),
|
|
144
|
+
│ name = 'HELD_POSITION',
|
|
145
|
+
# the fact reflects the updated state that Harris is
|
|
146
|
+
# no longer the AG of California
|
|
147
|
+
│ fact = 'Kamala Harris was the Attorney General of California',
|
|
148
|
+
│ fact_embedding = [
|
|
149
|
+
│ │ -0.009955154731869698,
|
|
150
|
+
│ ...
|
|
151
|
+
│ │ 0.00784289836883545
|
|
152
|
+
│],
|
|
153
|
+
│ episodes = ['b43e98ad0a904088a76c67985caecc22'],
|
|
154
|
+
│ expired_at = datetime.datetime(2024, 8, 26, 20, 18, 1, 53812),
|
|
155
|
+
# These dates represent the date this edge was true.
|
|
156
|
+
│ valid_at = datetime.datetime(2011, 1, 3, 0, 0, tzinfo= < UTC >),
|
|
157
|
+
│ invalid_at = datetime.datetime(2017, 1, 3, 0, 0, tzinfo= < UTC >)
|
|
158
|
+
)
|
|
141
159
|
]
|
|
142
160
|
|
|
143
161
|
# Rerank search results based on graph distance
|
|
@@ -170,14 +188,16 @@ Graphiti is under active development. We aim to maintain API stability while wor
|
|
|
170
188
|
- [ ] Achieving good performance with different LLM and embedding models
|
|
171
189
|
- [ ] Creating a dedicated embedder interface
|
|
172
190
|
- [ ] Supporting custom graph schemas:
|
|
173
|
-
|
|
174
|
-
|
|
191
|
+
- Allow developers to provide their own defined node and edge classes when ingesting episodes
|
|
192
|
+
- Enable more flexible knowledge representation tailored to specific use cases
|
|
175
193
|
- [ ] Enhancing retrieval capabilities with more robust and configurable options
|
|
176
194
|
- [ ] Expanding test coverage to ensure reliability and catch edge cases
|
|
177
195
|
|
|
178
196
|
## Contributing
|
|
179
197
|
|
|
180
|
-
We encourage and appreciate all forms of contributions, whether it's code, documentation, addressing GitHub Issues, or
|
|
198
|
+
We encourage and appreciate all forms of contributions, whether it's code, documentation, addressing GitHub Issues, or
|
|
199
|
+
answering questions in the Graphiti Discord channel. For detailed guidelines on code contributions, please refer
|
|
200
|
+
to [CONTRIBUTING](CONTRIBUTING.md).
|
|
181
201
|
|
|
182
202
|
## Support
|
|
183
203
|
|
|
@@ -33,7 +33,7 @@ logger = logging.getLogger(__name__)
|
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
class Edge(BaseModel, ABC):
|
|
36
|
-
uuid: str = Field(default_factory=lambda: uuid4()
|
|
36
|
+
uuid: str = Field(default_factory=lambda: str(uuid4()))
|
|
37
37
|
group_id: str | None = Field(description='partition of the graph')
|
|
38
38
|
source_node_uuid: str
|
|
39
39
|
target_node_uuid: str
|
|
@@ -104,18 +104,62 @@ class EpisodicEdge(Edge):
|
|
|
104
104
|
|
|
105
105
|
edges = [get_episodic_edge_from_record(record) for record in records]
|
|
106
106
|
|
|
107
|
-
logger.info(f'Found Edge: {uuid}')
|
|
108
107
|
if len(edges) == 0:
|
|
109
108
|
raise EdgeNotFoundError(uuid)
|
|
110
109
|
return edges[0]
|
|
111
110
|
|
|
111
|
+
@classmethod
|
|
112
|
+
async def get_by_uuids(cls, driver: AsyncDriver, uuids: list[str]):
|
|
113
|
+
records, _, _ = await driver.execute_query(
|
|
114
|
+
"""
|
|
115
|
+
MATCH (n:Episodic)-[e:MENTIONS]->(m:Entity)
|
|
116
|
+
WHERE e.uuid IN $uuids
|
|
117
|
+
RETURN
|
|
118
|
+
e.uuid As uuid,
|
|
119
|
+
e.group_id AS group_id,
|
|
120
|
+
n.uuid AS source_node_uuid,
|
|
121
|
+
m.uuid AS target_node_uuid,
|
|
122
|
+
e.created_at AS created_at
|
|
123
|
+
""",
|
|
124
|
+
uuids=uuids,
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
edges = [get_episodic_edge_from_record(record) for record in records]
|
|
128
|
+
|
|
129
|
+
if len(edges) == 0:
|
|
130
|
+
raise EdgeNotFoundError(uuids[0])
|
|
131
|
+
return edges
|
|
132
|
+
|
|
133
|
+
@classmethod
|
|
134
|
+
async def get_by_group_ids(cls, driver: AsyncDriver, group_ids: list[str | None]):
|
|
135
|
+
records, _, _ = await driver.execute_query(
|
|
136
|
+
"""
|
|
137
|
+
MATCH (n:Episodic)-[e:MENTIONS]->(m:Entity)
|
|
138
|
+
WHERE e.group_id IN $group_ids
|
|
139
|
+
RETURN
|
|
140
|
+
e.uuid As uuid,
|
|
141
|
+
e.group_id AS group_id,
|
|
142
|
+
n.uuid AS source_node_uuid,
|
|
143
|
+
m.uuid AS target_node_uuid,
|
|
144
|
+
e.created_at AS created_at
|
|
145
|
+
""",
|
|
146
|
+
group_ids=group_ids,
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
edges = [get_episodic_edge_from_record(record) for record in records]
|
|
150
|
+
uuids = [edge.uuid for edge in edges]
|
|
151
|
+
|
|
152
|
+
if len(edges) == 0:
|
|
153
|
+
raise EdgeNotFoundError(uuids[0])
|
|
154
|
+
return edges
|
|
155
|
+
|
|
112
156
|
|
|
113
157
|
class EntityEdge(Edge):
|
|
114
158
|
name: str = Field(description='name of the edge, relation name')
|
|
115
159
|
fact: str = Field(description='fact representing the edge and nodes that it connects')
|
|
116
160
|
fact_embedding: list[float] | None = Field(default=None, description='embedding of the fact')
|
|
117
|
-
episodes: list[str]
|
|
118
|
-
default=
|
|
161
|
+
episodes: list[str] = Field(
|
|
162
|
+
default=[],
|
|
119
163
|
description='list of episode ids that reference these entity edges',
|
|
120
164
|
)
|
|
121
165
|
expired_at: datetime | None = Field(
|
|
@@ -192,11 +236,69 @@ class EntityEdge(Edge):
|
|
|
192
236
|
|
|
193
237
|
edges = [get_entity_edge_from_record(record) for record in records]
|
|
194
238
|
|
|
195
|
-
logger.info(f'Found Edge: {uuid}')
|
|
196
239
|
if len(edges) == 0:
|
|
197
240
|
raise EdgeNotFoundError(uuid)
|
|
198
241
|
return edges[0]
|
|
199
242
|
|
|
243
|
+
@classmethod
|
|
244
|
+
async def get_by_uuids(cls, driver: AsyncDriver, uuids: list[str]):
|
|
245
|
+
records, _, _ = await driver.execute_query(
|
|
246
|
+
"""
|
|
247
|
+
MATCH (n:Entity)-[e:RELATES_TO]->(m:Entity)
|
|
248
|
+
WHERE e.uuid IN $uuids
|
|
249
|
+
RETURN
|
|
250
|
+
e.uuid AS uuid,
|
|
251
|
+
n.uuid AS source_node_uuid,
|
|
252
|
+
m.uuid AS target_node_uuid,
|
|
253
|
+
e.created_at AS created_at,
|
|
254
|
+
e.name AS name,
|
|
255
|
+
e.group_id AS group_id,
|
|
256
|
+
e.fact AS fact,
|
|
257
|
+
e.fact_embedding AS fact_embedding,
|
|
258
|
+
e.episodes AS episodes,
|
|
259
|
+
e.expired_at AS expired_at,
|
|
260
|
+
e.valid_at AS valid_at,
|
|
261
|
+
e.invalid_at AS invalid_at
|
|
262
|
+
""",
|
|
263
|
+
uuids=uuids,
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
edges = [get_entity_edge_from_record(record) for record in records]
|
|
267
|
+
|
|
268
|
+
if len(edges) == 0:
|
|
269
|
+
raise EdgeNotFoundError(uuids[0])
|
|
270
|
+
return edges
|
|
271
|
+
|
|
272
|
+
@classmethod
|
|
273
|
+
async def get_by_group_ids(cls, driver: AsyncDriver, group_ids: list[str | None]):
|
|
274
|
+
records, _, _ = await driver.execute_query(
|
|
275
|
+
"""
|
|
276
|
+
MATCH (n:Entity)-[e:RELATES_TO]->(m:Entity)
|
|
277
|
+
WHERE e.group_id IN $group_ids
|
|
278
|
+
RETURN
|
|
279
|
+
e.uuid AS uuid,
|
|
280
|
+
n.uuid AS source_node_uuid,
|
|
281
|
+
m.uuid AS target_node_uuid,
|
|
282
|
+
e.created_at AS created_at,
|
|
283
|
+
e.name AS name,
|
|
284
|
+
e.group_id AS group_id,
|
|
285
|
+
e.fact AS fact,
|
|
286
|
+
e.fact_embedding AS fact_embedding,
|
|
287
|
+
e.episodes AS episodes,
|
|
288
|
+
e.expired_at AS expired_at,
|
|
289
|
+
e.valid_at AS valid_at,
|
|
290
|
+
e.invalid_at AS invalid_at
|
|
291
|
+
""",
|
|
292
|
+
group_ids=group_ids,
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
edges = [get_entity_edge_from_record(record) for record in records]
|
|
296
|
+
uuids = [edge.uuid for edge in edges]
|
|
297
|
+
|
|
298
|
+
if len(edges) == 0:
|
|
299
|
+
raise EdgeNotFoundError(uuids[0])
|
|
300
|
+
return edges
|
|
301
|
+
|
|
200
302
|
|
|
201
303
|
class CommunityEdge(Edge):
|
|
202
304
|
async def save(self, driver: AsyncDriver):
|
|
@@ -235,10 +337,48 @@ class CommunityEdge(Edge):
|
|
|
235
337
|
|
|
236
338
|
edges = [get_community_edge_from_record(record) for record in records]
|
|
237
339
|
|
|
238
|
-
logger.info(f'Found Edge: {uuid}')
|
|
239
|
-
|
|
240
340
|
return edges[0]
|
|
241
341
|
|
|
342
|
+
@classmethod
|
|
343
|
+
async def get_by_uuids(cls, driver: AsyncDriver, uuids: list[str]):
|
|
344
|
+
records, _, _ = await driver.execute_query(
|
|
345
|
+
"""
|
|
346
|
+
MATCH (n:Community)-[e:HAS_MEMBER]->(m:Entity | Community)
|
|
347
|
+
WHERE e.uuid IN $uuids
|
|
348
|
+
RETURN
|
|
349
|
+
e.uuid As uuid,
|
|
350
|
+
e.group_id AS group_id,
|
|
351
|
+
n.uuid AS source_node_uuid,
|
|
352
|
+
m.uuid AS target_node_uuid,
|
|
353
|
+
e.created_at AS created_at
|
|
354
|
+
""",
|
|
355
|
+
uuids=uuids,
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
edges = [get_community_edge_from_record(record) for record in records]
|
|
359
|
+
|
|
360
|
+
return edges
|
|
361
|
+
|
|
362
|
+
@classmethod
|
|
363
|
+
async def get_by_group_ids(cls, driver: AsyncDriver, group_ids: list[str | None]):
|
|
364
|
+
records, _, _ = await driver.execute_query(
|
|
365
|
+
"""
|
|
366
|
+
MATCH (n:Community)-[e:HAS_MEMBER]->(m:Entity | Community)
|
|
367
|
+
WHERE e.group_id IN $group_ids
|
|
368
|
+
RETURN
|
|
369
|
+
e.uuid As uuid,
|
|
370
|
+
e.group_id AS group_id,
|
|
371
|
+
n.uuid AS source_node_uuid,
|
|
372
|
+
m.uuid AS target_node_uuid,
|
|
373
|
+
e.created_at AS created_at
|
|
374
|
+
""",
|
|
375
|
+
group_ids=group_ids,
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
edges = [get_community_edge_from_record(record) for record in records]
|
|
379
|
+
|
|
380
|
+
return edges
|
|
381
|
+
|
|
242
382
|
|
|
243
383
|
# Edge helpers
|
|
244
384
|
def get_episodic_edge_from_record(record: Any) -> EpisodicEdge:
|
|
@@ -35,6 +35,8 @@ from graphiti_core.search.search_config_recipes import (
|
|
|
35
35
|
)
|
|
36
36
|
from graphiti_core.search.search_utils import (
|
|
37
37
|
RELEVANT_SCHEMA_LIMIT,
|
|
38
|
+
get_communities_by_nodes,
|
|
39
|
+
get_mentioned_nodes,
|
|
38
40
|
get_relevant_edges,
|
|
39
41
|
get_relevant_nodes,
|
|
40
42
|
)
|
|
@@ -54,6 +56,7 @@ from graphiti_core.utils.bulk_utils import (
|
|
|
54
56
|
from graphiti_core.utils.maintenance.community_operations import (
|
|
55
57
|
build_communities,
|
|
56
58
|
remove_communities,
|
|
59
|
+
update_community,
|
|
57
60
|
)
|
|
58
61
|
from graphiti_core.utils.maintenance.edge_operations import (
|
|
59
62
|
extract_edges,
|
|
@@ -74,7 +77,14 @@ load_dotenv()
|
|
|
74
77
|
|
|
75
78
|
|
|
76
79
|
class Graphiti:
|
|
77
|
-
def __init__(
|
|
80
|
+
def __init__(
|
|
81
|
+
self,
|
|
82
|
+
uri: str,
|
|
83
|
+
user: str,
|
|
84
|
+
password: str,
|
|
85
|
+
llm_client: LLMClient | None = None,
|
|
86
|
+
store_raw_episode_content: bool = True,
|
|
87
|
+
):
|
|
78
88
|
"""
|
|
79
89
|
Initialize a Graphiti instance.
|
|
80
90
|
|
|
@@ -113,6 +123,7 @@ class Graphiti:
|
|
|
113
123
|
"""
|
|
114
124
|
self.driver = AsyncGraphDatabase.driver(uri, auth=(user, password))
|
|
115
125
|
self.database = 'neo4j'
|
|
126
|
+
self.store_raw_episode_content = store_raw_episode_content
|
|
116
127
|
if llm_client:
|
|
117
128
|
self.llm_client = llm_client
|
|
118
129
|
else:
|
|
@@ -147,8 +158,8 @@ class Graphiti:
|
|
|
147
158
|
# Use graphiti...
|
|
148
159
|
finally:
|
|
149
160
|
graphiti.close()
|
|
150
|
-
self.driver.close()
|
|
151
161
|
"""
|
|
162
|
+
self.driver.close()
|
|
152
163
|
|
|
153
164
|
async def build_indices_and_constraints(self):
|
|
154
165
|
"""
|
|
@@ -224,6 +235,7 @@ class Graphiti:
|
|
|
224
235
|
source: EpisodeType = EpisodeType.message,
|
|
225
236
|
group_id: str | None = None,
|
|
226
237
|
uuid: str | None = None,
|
|
238
|
+
update_communities: bool = False,
|
|
227
239
|
):
|
|
228
240
|
"""
|
|
229
241
|
Process an episode and update the graph.
|
|
@@ -247,6 +259,8 @@ class Graphiti:
|
|
|
247
259
|
An id for the graph partition the episode is a part of.
|
|
248
260
|
uuid : str | None
|
|
249
261
|
Optional uuid of the episode.
|
|
262
|
+
update_communities : bool
|
|
263
|
+
Optional. Whether to update communities with new node information
|
|
250
264
|
|
|
251
265
|
Returns
|
|
252
266
|
-------
|
|
@@ -272,7 +286,6 @@ class Graphiti:
|
|
|
272
286
|
try:
|
|
273
287
|
start = time()
|
|
274
288
|
|
|
275
|
-
nodes: list[EntityNode] = []
|
|
276
289
|
entity_edges: list[EntityEdge] = []
|
|
277
290
|
embedder = self.llm_client.get_embedder()
|
|
278
291
|
now = datetime.now()
|
|
@@ -291,6 +304,8 @@ class Graphiti:
|
|
|
291
304
|
valid_at=reference_time,
|
|
292
305
|
)
|
|
293
306
|
episode.uuid = uuid if uuid is not None else episode.uuid
|
|
307
|
+
if not self.store_raw_episode_content:
|
|
308
|
+
episode.content = ''
|
|
294
309
|
|
|
295
310
|
# Extract entities as nodes
|
|
296
311
|
|
|
@@ -319,7 +334,7 @@ class Graphiti:
|
|
|
319
334
|
),
|
|
320
335
|
)
|
|
321
336
|
logger.info(f'Adjusted mentioned nodes: {[(n.name, n.uuid) for n in mentioned_nodes]}')
|
|
322
|
-
nodes
|
|
337
|
+
nodes = mentioned_nodes
|
|
323
338
|
|
|
324
339
|
extracted_edges_with_resolved_pointers = resolve_edge_pointers(
|
|
325
340
|
extracted_edges, uuid_map
|
|
@@ -409,12 +424,22 @@ class Graphiti:
|
|
|
409
424
|
|
|
410
425
|
logger.info(f'Built episodic edges: {episodic_edges}')
|
|
411
426
|
|
|
427
|
+
episode.entity_edges = [edge.uuid for edge in entity_edges]
|
|
428
|
+
|
|
412
429
|
# Future optimization would be using batch operations to save nodes and edges
|
|
413
430
|
await episode.save(self.driver)
|
|
414
431
|
await asyncio.gather(*[node.save(self.driver) for node in nodes])
|
|
415
432
|
await asyncio.gather(*[edge.save(self.driver) for edge in episodic_edges])
|
|
416
433
|
await asyncio.gather(*[edge.save(self.driver) for edge in entity_edges])
|
|
417
434
|
|
|
435
|
+
# Update any communities
|
|
436
|
+
if update_communities:
|
|
437
|
+
await asyncio.gather(
|
|
438
|
+
*[
|
|
439
|
+
update_community(self.driver, self.llm_client, embedder, node)
|
|
440
|
+
for node in nodes
|
|
441
|
+
]
|
|
442
|
+
)
|
|
418
443
|
end = time()
|
|
419
444
|
logger.info(f'Completed add_episode in {(end - start) * 1000} ms')
|
|
420
445
|
|
|
@@ -554,7 +579,7 @@ class Graphiti:
|
|
|
554
579
|
center_node_uuid: str | None = None,
|
|
555
580
|
group_ids: list[str | None] | None = None,
|
|
556
581
|
num_results=DEFAULT_SEARCH_LIMIT,
|
|
557
|
-
):
|
|
582
|
+
) -> list[EntityEdge]:
|
|
558
583
|
"""
|
|
559
584
|
Perform a hybrid search on the knowledge graph.
|
|
560
585
|
|
|
@@ -569,7 +594,7 @@ class Graphiti:
|
|
|
569
594
|
Facts will be reranked based on proximity to this node
|
|
570
595
|
group_ids : list[str | None] | None, optional
|
|
571
596
|
The graph partitions to return data from.
|
|
572
|
-
|
|
597
|
+
num_results : int, optional
|
|
573
598
|
The maximum number of results to return. Defaults to 10.
|
|
574
599
|
|
|
575
600
|
Returns
|
|
@@ -668,3 +693,19 @@ class Graphiti:
|
|
|
668
693
|
await search(self.driver, embedder, query, group_ids, search_config, center_node_uuid)
|
|
669
694
|
).nodes
|
|
670
695
|
return nodes
|
|
696
|
+
|
|
697
|
+
|
|
698
|
+
async def get_episode_mentions(self, episode_uuids: list[str]) -> SearchResults:
|
|
699
|
+
episodes = await EpisodicNode.get_by_uuids(self.driver, episode_uuids)
|
|
700
|
+
|
|
701
|
+
edges_list = await asyncio.gather(
|
|
702
|
+
*[EntityEdge.get_by_uuids(self.driver, episode.entity_edges) for episode in episodes]
|
|
703
|
+
)
|
|
704
|
+
|
|
705
|
+
edges: list[EntityEdge] = [edge for lst in edges_list for edge in lst]
|
|
706
|
+
|
|
707
|
+
nodes = await get_mentioned_nodes(self.driver, episodes)
|
|
708
|
+
|
|
709
|
+
communities = await get_communities_by_nodes(self.driver, nodes)
|
|
710
|
+
|
|
711
|
+
return SearchResults(edges=edges, nodes=nodes, communities=communities)
|