graphiti-core 0.3.2__tar.gz → 0.3.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of graphiti-core might be problematic. Click here for more details.

Files changed (44) hide show
  1. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/PKG-INFO +58 -38
  2. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/README.md +57 -37
  3. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/edges.py +147 -7
  4. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/graphiti.py +47 -6
  5. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/llm_client/anthropic_client.py +5 -1
  6. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/llm_client/client.py +1 -1
  7. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/llm_client/config.py +1 -1
  8. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/llm_client/groq_client.py +5 -1
  9. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/llm_client/openai_client.py +39 -2
  10. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/nodes.py +64 -13
  11. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/search/search.py +7 -1
  12. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/search/search_config.py +2 -0
  13. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/search/search_config_recipes.py +16 -0
  14. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/search/search_utils.py +57 -1
  15. graphiti_core-0.3.4/graphiti_core/utils/maintenance/community_operations.py +310 -0
  16. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/utils/maintenance/edge_operations.py +2 -0
  17. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/utils/maintenance/node_operations.py +6 -3
  18. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/pyproject.toml +3 -3
  19. graphiti_core-0.3.2/graphiti_core/utils/maintenance/community_operations.py +0 -155
  20. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/LICENSE +0 -0
  21. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/__init__.py +0 -0
  22. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/errors.py +0 -0
  23. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/helpers.py +0 -0
  24. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/llm_client/__init__.py +0 -0
  25. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/llm_client/errors.py +0 -0
  26. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/llm_client/utils.py +0 -0
  27. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/prompts/__init__.py +0 -0
  28. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/prompts/dedupe_edges.py +0 -0
  29. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/prompts/dedupe_nodes.py +0 -0
  30. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/prompts/extract_edge_dates.py +0 -0
  31. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/prompts/extract_edges.py +0 -0
  32. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/prompts/extract_nodes.py +0 -0
  33. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/prompts/invalidate_edges.py +0 -0
  34. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/prompts/lib.py +0 -0
  35. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/prompts/models.py +0 -0
  36. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/prompts/summarize_nodes.py +0 -0
  37. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/py.typed +0 -0
  38. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/search/__init__.py +0 -0
  39. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/utils/__init__.py +0 -0
  40. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/utils/bulk_utils.py +0 -0
  41. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/utils/maintenance/__init__.py +0 -0
  42. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/utils/maintenance/graph_data_operations.py +0 -0
  43. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/utils/maintenance/temporal_operations.py +0 -0
  44. {graphiti_core-0.3.2 → graphiti_core-0.3.4}/graphiti_core/utils/maintenance/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: graphiti-core
3
- Version: 0.3.2
3
+ Version: 0.3.4
4
4
  Summary: A temporal graph building library
5
5
  License: Apache-2.0
6
6
  Author: Paul Paliychuk
@@ -21,7 +21,7 @@ Description-Content-Type: text/markdown
21
21
 
22
22
  <div align="center">
23
23
 
24
- # Graphiti
24
+ <img width="350" alt="Graphiti-ts-small" src="https://github.com/user-attachments/assets/bbd02947-e435-4a05-b25a-bbbac36d52c8">
25
25
 
26
26
  ## Temporal Knowledge Graphs for Agentic Applications
27
27
 
@@ -37,7 +37,9 @@ Description-Content-Type: text/markdown
37
37
 
38
38
  </div>
39
39
 
40
- Graphiti builds dynamic, temporally aware Knowledge Graphs that represent complex, evolving relationships between entities over time. Graphiti ingests both unstructured and structured data, and the resulting graph may be queried using a fusion of time, full-text, semantic, and graph algorithm approaches.
40
+ Graphiti builds dynamic, temporally aware Knowledge Graphs that represent complex, evolving relationships between
41
+ entities over time. Graphiti ingests both unstructured and structured data, and the resulting graph may be queried using
42
+ a fusion of time, full-text, semantic, and graph algorithm approaches.
41
43
 
42
44
  <br />
43
45
 
@@ -47,25 +49,39 @@ Graphiti builds dynamic, temporally aware Knowledge Graphs that represent comple
47
49
 
48
50
  <br />
49
51
 
50
- Graphiti helps you create and query Knowledge Graphs that evolve over time. A knowledge graph is a network of interconnected facts, such as _“Kendra loves Adidas shoes.”_ Each fact is a “triplet” represented by two entities, or nodes (_”Kendra”_, _“Adidas shoes”_), and their relationship, or edge (_”loves”_). Knowledge Graphs have been explored extensively for information retrieval. What makes Graphiti unique is its ability to autonomously build a knowledge graph while handling changing relationships and maintaining historical context.
52
+ Graphiti helps you create and query Knowledge Graphs that evolve over time. A knowledge graph is a network of
53
+ interconnected facts, such as _“Kendra loves Adidas shoes.”_ Each fact is a “triplet” represented by two entities, or
54
+ nodes (_”Kendra”_, _“Adidas shoes”_), and their relationship, or edge (_”loves”_). Knowledge Graphs have been explored
55
+ extensively for information retrieval. What makes Graphiti unique is its ability to autonomously build a knowledge graph
56
+ while handling changing relationships and maintaining historical context.
51
57
 
52
58
  With Graphiti, you can build LLM applications such as:
53
59
 
54
- - Assistants that learn from user interactions, fusing personal knowledge with dynamic data from business systems like CRMs and billing platforms.
60
+ - Assistants that learn from user interactions, fusing personal knowledge with dynamic data from business systems like
61
+ CRMs and billing platforms.
55
62
  - Agents that autonomously execute complex tasks, reasoning with state changes from multiple dynamic sources.
56
63
 
57
- Graphiti supports a wide range of applications in sales, customer service, health, finance, and more, enabling long-term recall and state-based reasoning for both assistants and agents.
64
+ Graphiti supports a wide range of applications in sales, customer service, health, finance, and more, enabling long-term
65
+ recall and state-based reasoning for both assistants and agents.
58
66
 
59
67
  ## Why Graphiti?
60
68
 
61
- We were intrigued by Microsoft’s GraphRAG, which expanded on RAG text chunking by using a graph to better model a document corpus and making this representation available via semantic and graph search techniques. However, GraphRAG did not address our core problem: It's primarily designed for static documents and doesn't inherently handle temporal aspects of data.
62
-
63
- Graphiti is designed from the ground up to handle constantly changing information, hybrid semantic and graph search, and scale:
64
-
65
- - **Temporal Awareness:** Tracks changes in facts and relationships over time, enabling point-in-time queries. Graph edges include temporal metadata to record relationship lifecycles.
66
- - **Episodic Processing:** Ingests data as discrete episodes, maintaining data provenance and allowing incremental entity and relationship extraction.
67
- - **Hybrid Search:** Combines semantic and BM25 full-text search, with the ability to rerank results by distance from a central node e.g. “Kendra”.
68
- - **Scalable:** Designed for processing large datasets, with parallelization of LLM calls for bulk processing while preserving the chronology of events.
69
+ We were intrigued by Microsoft’s GraphRAG, which expanded on RAG text chunking by using a graph to better model a
70
+ document corpus and making this representation available via semantic and graph search techniques. However, GraphRAG did
71
+ not address our core problem: It's primarily designed for static documents and doesn't inherently handle temporal
72
+ aspects of data.
73
+
74
+ Graphiti is designed from the ground up to handle constantly changing information, hybrid semantic and graph search, and
75
+ scale:
76
+
77
+ - **Temporal Awareness:** Tracks changes in facts and relationships over time, enabling point-in-time queries. Graph
78
+ edges include temporal metadata to record relationship lifecycles.
79
+ - **Episodic Processing:** Ingests data as discrete episodes, maintaining data provenance and allowing incremental
80
+ entity and relationship extraction.
81
+ - **Hybrid Search:** Combines semantic and BM25 full-text search, with the ability to rerank results by distance from a
82
+ central node e.g. “Kendra”.
83
+ - **Scalable:** Designed for processing large datasets, with parallelization of LLM calls for bulk processing while
84
+ preserving the chronology of events.
69
85
  - **Supports Varied Sources:** Can ingest both unstructured text and structured JSON data.
70
86
 
71
87
  <p align="center">
@@ -91,7 +107,8 @@ Optional:
91
107
  - Anthropic or Groq API key (for alternative LLM providers)
92
108
 
93
109
  > [!TIP]
94
- > The simplest way to install Neo4j is via [Neo4j Desktop](https://neo4j.com/download/). It provides a user-friendly interface to manage Neo4j instances and databases.
110
+ > The simplest way to install Neo4j is via [Neo4j Desktop](https://neo4j.com/download/). It provides a user-friendly
111
+ > interface to manage Neo4j instances and databases.
95
112
 
96
113
  ```bash
97
114
  pip install graphiti-core
@@ -106,7 +123,8 @@ poetry add graphiti-core
106
123
  ## Quick Start
107
124
 
108
125
  > [!IMPORTANT]
109
- > Graphiti uses OpenAI for LLM inference and embedding. Ensure that an `OPENAI_API_KEY` is set in your environment. Support for Anthropic and Groq LLM inferences is available, too.
126
+ > Graphiti uses OpenAI for LLM inference and embedding. Ensure that an `OPENAI_API_KEY` is set in your environment.
127
+ > Support for Anthropic and Groq LLM inferences is available, too.
110
128
 
111
129
  ```python
112
130
  from graphiti_core import Graphiti
@@ -140,25 +158,25 @@ for i, episode in enumerate(episodes):
140
158
  results = await graphiti.search('Who was the California Attorney General?')
141
159
  [
142
160
  EntityEdge(
143
- │ uuid='3133258f738e487383f07b04e15d4ac0',
144
- │ source_node_uuid='2a85789b318d4e418050506879906e62',
145
- │ target_node_uuid='baf7781f445945989d6e4f927f881556',
146
- │ created_at=datetime.datetime(2024, 8, 26, 13, 13, 24, 861097),
147
- │ name='HELD_POSITION',
148
- # the fact reflects the updated state that Harris is
149
- # no longer the AG of California
150
- │ fact='Kamala Harris was the Attorney General of California',
151
- │ fact_embedding=[
152
- │ │ -0.009955154731869698,
153
- │ ...
154
- │ │ 0.00784289836883545
155
- ],
156
- │ episodes=['b43e98ad0a904088a76c67985caecc22'],
157
- │ expired_at=datetime.datetime(2024, 8, 26, 20, 18, 1, 53812),
158
- # These dates represent the date this edge was true.
159
- │ valid_at=datetime.datetime(2011, 1, 3, 0, 0, tzinfo=<UTC>),
160
- │ invalid_at=datetime.datetime(2017, 1, 3, 0, 0, tzinfo=<UTC>)
161
- )
161
+ │ uuid = '3133258f738e487383f07b04e15d4ac0',
162
+ │ source_node_uuid = '2a85789b318d4e418050506879906e62',
163
+ │ target_node_uuid = 'baf7781f445945989d6e4f927f881556',
164
+ │ created_at = datetime.datetime(2024, 8, 26, 13, 13, 24, 861097),
165
+ │ name = 'HELD_POSITION',
166
+ # the fact reflects the updated state that Harris is
167
+ # no longer the AG of California
168
+ │ fact = 'Kamala Harris was the Attorney General of California',
169
+ │ fact_embedding = [
170
+ │ │ -0.009955154731869698,
171
+ │ ...
172
+ │ │ 0.00784289836883545
173
+ │],
174
+ │ episodes = ['b43e98ad0a904088a76c67985caecc22'],
175
+ │ expired_at = datetime.datetime(2024, 8, 26, 20, 18, 1, 53812),
176
+ # These dates represent the date this edge was true.
177
+ │ valid_at = datetime.datetime(2011, 1, 3, 0, 0, tzinfo= < UTC >),
178
+ │ invalid_at = datetime.datetime(2017, 1, 3, 0, 0, tzinfo= < UTC >)
179
+ )
162
180
  ]
163
181
 
164
182
  # Rerank search results based on graph distance
@@ -191,14 +209,16 @@ Graphiti is under active development. We aim to maintain API stability while wor
191
209
  - [ ] Achieving good performance with different LLM and embedding models
192
210
  - [ ] Creating a dedicated embedder interface
193
211
  - [ ] Supporting custom graph schemas:
194
- - Allow developers to provide their own defined node and edge classes when ingesting episodes
195
- - Enable more flexible knowledge representation tailored to specific use cases
212
+ - Allow developers to provide their own defined node and edge classes when ingesting episodes
213
+ - Enable more flexible knowledge representation tailored to specific use cases
196
214
  - [ ] Enhancing retrieval capabilities with more robust and configurable options
197
215
  - [ ] Expanding test coverage to ensure reliability and catch edge cases
198
216
 
199
217
  ## Contributing
200
218
 
201
- We encourage and appreciate all forms of contributions, whether it's code, documentation, addressing GitHub Issues, or answering questions in the Graphiti Discord channel. For detailed guidelines on code contributions, please refer to [CONTRIBUTING](CONTRIBUTING.md).
219
+ We encourage and appreciate all forms of contributions, whether it's code, documentation, addressing GitHub Issues, or
220
+ answering questions in the Graphiti Discord channel. For detailed guidelines on code contributions, please refer
221
+ to [CONTRIBUTING](CONTRIBUTING.md).
202
222
 
203
223
  ## Support
204
224
 
@@ -1,6 +1,6 @@
1
1
  <div align="center">
2
2
 
3
- # Graphiti
3
+ <img width="350" alt="Graphiti-ts-small" src="https://github.com/user-attachments/assets/bbd02947-e435-4a05-b25a-bbbac36d52c8">
4
4
 
5
5
  ## Temporal Knowledge Graphs for Agentic Applications
6
6
 
@@ -16,7 +16,9 @@
16
16
 
17
17
  </div>
18
18
 
19
- Graphiti builds dynamic, temporally aware Knowledge Graphs that represent complex, evolving relationships between entities over time. Graphiti ingests both unstructured and structured data, and the resulting graph may be queried using a fusion of time, full-text, semantic, and graph algorithm approaches.
19
+ Graphiti builds dynamic, temporally aware Knowledge Graphs that represent complex, evolving relationships between
20
+ entities over time. Graphiti ingests both unstructured and structured data, and the resulting graph may be queried using
21
+ a fusion of time, full-text, semantic, and graph algorithm approaches.
20
22
 
21
23
  <br />
22
24
 
@@ -26,25 +28,39 @@ Graphiti builds dynamic, temporally aware Knowledge Graphs that represent comple
26
28
 
27
29
  <br />
28
30
 
29
- Graphiti helps you create and query Knowledge Graphs that evolve over time. A knowledge graph is a network of interconnected facts, such as _“Kendra loves Adidas shoes.”_ Each fact is a “triplet” represented by two entities, or nodes (_”Kendra”_, _“Adidas shoes”_), and their relationship, or edge (_”loves”_). Knowledge Graphs have been explored extensively for information retrieval. What makes Graphiti unique is its ability to autonomously build a knowledge graph while handling changing relationships and maintaining historical context.
31
+ Graphiti helps you create and query Knowledge Graphs that evolve over time. A knowledge graph is a network of
32
+ interconnected facts, such as _“Kendra loves Adidas shoes.”_ Each fact is a “triplet” represented by two entities, or
33
+ nodes (_”Kendra”_, _“Adidas shoes”_), and their relationship, or edge (_”loves”_). Knowledge Graphs have been explored
34
+ extensively for information retrieval. What makes Graphiti unique is its ability to autonomously build a knowledge graph
35
+ while handling changing relationships and maintaining historical context.
30
36
 
31
37
  With Graphiti, you can build LLM applications such as:
32
38
 
33
- - Assistants that learn from user interactions, fusing personal knowledge with dynamic data from business systems like CRMs and billing platforms.
39
+ - Assistants that learn from user interactions, fusing personal knowledge with dynamic data from business systems like
40
+ CRMs and billing platforms.
34
41
  - Agents that autonomously execute complex tasks, reasoning with state changes from multiple dynamic sources.
35
42
 
36
- Graphiti supports a wide range of applications in sales, customer service, health, finance, and more, enabling long-term recall and state-based reasoning for both assistants and agents.
43
+ Graphiti supports a wide range of applications in sales, customer service, health, finance, and more, enabling long-term
44
+ recall and state-based reasoning for both assistants and agents.
37
45
 
38
46
  ## Why Graphiti?
39
47
 
40
- We were intrigued by Microsoft’s GraphRAG, which expanded on RAG text chunking by using a graph to better model a document corpus and making this representation available via semantic and graph search techniques. However, GraphRAG did not address our core problem: It's primarily designed for static documents and doesn't inherently handle temporal aspects of data.
41
-
42
- Graphiti is designed from the ground up to handle constantly changing information, hybrid semantic and graph search, and scale:
43
-
44
- - **Temporal Awareness:** Tracks changes in facts and relationships over time, enabling point-in-time queries. Graph edges include temporal metadata to record relationship lifecycles.
45
- - **Episodic Processing:** Ingests data as discrete episodes, maintaining data provenance and allowing incremental entity and relationship extraction.
46
- - **Hybrid Search:** Combines semantic and BM25 full-text search, with the ability to rerank results by distance from a central node e.g. “Kendra”.
47
- - **Scalable:** Designed for processing large datasets, with parallelization of LLM calls for bulk processing while preserving the chronology of events.
48
+ We were intrigued by Microsoft’s GraphRAG, which expanded on RAG text chunking by using a graph to better model a
49
+ document corpus and making this representation available via semantic and graph search techniques. However, GraphRAG did
50
+ not address our core problem: It's primarily designed for static documents and doesn't inherently handle temporal
51
+ aspects of data.
52
+
53
+ Graphiti is designed from the ground up to handle constantly changing information, hybrid semantic and graph search, and
54
+ scale:
55
+
56
+ - **Temporal Awareness:** Tracks changes in facts and relationships over time, enabling point-in-time queries. Graph
57
+ edges include temporal metadata to record relationship lifecycles.
58
+ - **Episodic Processing:** Ingests data as discrete episodes, maintaining data provenance and allowing incremental
59
+ entity and relationship extraction.
60
+ - **Hybrid Search:** Combines semantic and BM25 full-text search, with the ability to rerank results by distance from a
61
+ central node e.g. “Kendra”.
62
+ - **Scalable:** Designed for processing large datasets, with parallelization of LLM calls for bulk processing while
63
+ preserving the chronology of events.
48
64
  - **Supports Varied Sources:** Can ingest both unstructured text and structured JSON data.
49
65
 
50
66
  <p align="center">
@@ -70,7 +86,8 @@ Optional:
70
86
  - Anthropic or Groq API key (for alternative LLM providers)
71
87
 
72
88
  > [!TIP]
73
- > The simplest way to install Neo4j is via [Neo4j Desktop](https://neo4j.com/download/). It provides a user-friendly interface to manage Neo4j instances and databases.
89
+ > The simplest way to install Neo4j is via [Neo4j Desktop](https://neo4j.com/download/). It provides a user-friendly
90
+ > interface to manage Neo4j instances and databases.
74
91
 
75
92
  ```bash
76
93
  pip install graphiti-core
@@ -85,7 +102,8 @@ poetry add graphiti-core
85
102
  ## Quick Start
86
103
 
87
104
  > [!IMPORTANT]
88
- > Graphiti uses OpenAI for LLM inference and embedding. Ensure that an `OPENAI_API_KEY` is set in your environment. Support for Anthropic and Groq LLM inferences is available, too.
105
+ > Graphiti uses OpenAI for LLM inference and embedding. Ensure that an `OPENAI_API_KEY` is set in your environment.
106
+ > Support for Anthropic and Groq LLM inferences is available, too.
89
107
 
90
108
  ```python
91
109
  from graphiti_core import Graphiti
@@ -119,25 +137,25 @@ for i, episode in enumerate(episodes):
119
137
  results = await graphiti.search('Who was the California Attorney General?')
120
138
  [
121
139
  EntityEdge(
122
- │ uuid='3133258f738e487383f07b04e15d4ac0',
123
- │ source_node_uuid='2a85789b318d4e418050506879906e62',
124
- │ target_node_uuid='baf7781f445945989d6e4f927f881556',
125
- │ created_at=datetime.datetime(2024, 8, 26, 13, 13, 24, 861097),
126
- │ name='HELD_POSITION',
127
- # the fact reflects the updated state that Harris is
128
- # no longer the AG of California
129
- │ fact='Kamala Harris was the Attorney General of California',
130
- │ fact_embedding=[
131
- │ │ -0.009955154731869698,
132
- │ ...
133
- │ │ 0.00784289836883545
134
- ],
135
- │ episodes=['b43e98ad0a904088a76c67985caecc22'],
136
- │ expired_at=datetime.datetime(2024, 8, 26, 20, 18, 1, 53812),
137
- # These dates represent the date this edge was true.
138
- │ valid_at=datetime.datetime(2011, 1, 3, 0, 0, tzinfo=<UTC>),
139
- │ invalid_at=datetime.datetime(2017, 1, 3, 0, 0, tzinfo=<UTC>)
140
- )
140
+ │ uuid = '3133258f738e487383f07b04e15d4ac0',
141
+ │ source_node_uuid = '2a85789b318d4e418050506879906e62',
142
+ │ target_node_uuid = 'baf7781f445945989d6e4f927f881556',
143
+ │ created_at = datetime.datetime(2024, 8, 26, 13, 13, 24, 861097),
144
+ │ name = 'HELD_POSITION',
145
+ # the fact reflects the updated state that Harris is
146
+ # no longer the AG of California
147
+ │ fact = 'Kamala Harris was the Attorney General of California',
148
+ │ fact_embedding = [
149
+ │ │ -0.009955154731869698,
150
+ │ ...
151
+ │ │ 0.00784289836883545
152
+ │],
153
+ │ episodes = ['b43e98ad0a904088a76c67985caecc22'],
154
+ │ expired_at = datetime.datetime(2024, 8, 26, 20, 18, 1, 53812),
155
+ # These dates represent the date this edge was true.
156
+ │ valid_at = datetime.datetime(2011, 1, 3, 0, 0, tzinfo= < UTC >),
157
+ │ invalid_at = datetime.datetime(2017, 1, 3, 0, 0, tzinfo= < UTC >)
158
+ )
141
159
  ]
142
160
 
143
161
  # Rerank search results based on graph distance
@@ -170,14 +188,16 @@ Graphiti is under active development. We aim to maintain API stability while wor
170
188
  - [ ] Achieving good performance with different LLM and embedding models
171
189
  - [ ] Creating a dedicated embedder interface
172
190
  - [ ] Supporting custom graph schemas:
173
- - Allow developers to provide their own defined node and edge classes when ingesting episodes
174
- - Enable more flexible knowledge representation tailored to specific use cases
191
+ - Allow developers to provide their own defined node and edge classes when ingesting episodes
192
+ - Enable more flexible knowledge representation tailored to specific use cases
175
193
  - [ ] Enhancing retrieval capabilities with more robust and configurable options
176
194
  - [ ] Expanding test coverage to ensure reliability and catch edge cases
177
195
 
178
196
  ## Contributing
179
197
 
180
- We encourage and appreciate all forms of contributions, whether it's code, documentation, addressing GitHub Issues, or answering questions in the Graphiti Discord channel. For detailed guidelines on code contributions, please refer to [CONTRIBUTING](CONTRIBUTING.md).
198
+ We encourage and appreciate all forms of contributions, whether it's code, documentation, addressing GitHub Issues, or
199
+ answering questions in the Graphiti Discord channel. For detailed guidelines on code contributions, please refer
200
+ to [CONTRIBUTING](CONTRIBUTING.md).
181
201
 
182
202
  ## Support
183
203
 
@@ -33,7 +33,7 @@ logger = logging.getLogger(__name__)
33
33
 
34
34
 
35
35
  class Edge(BaseModel, ABC):
36
- uuid: str = Field(default_factory=lambda: uuid4().hex)
36
+ uuid: str = Field(default_factory=lambda: str(uuid4()))
37
37
  group_id: str | None = Field(description='partition of the graph')
38
38
  source_node_uuid: str
39
39
  target_node_uuid: str
@@ -104,18 +104,62 @@ class EpisodicEdge(Edge):
104
104
 
105
105
  edges = [get_episodic_edge_from_record(record) for record in records]
106
106
 
107
- logger.info(f'Found Edge: {uuid}')
108
107
  if len(edges) == 0:
109
108
  raise EdgeNotFoundError(uuid)
110
109
  return edges[0]
111
110
 
111
+ @classmethod
112
+ async def get_by_uuids(cls, driver: AsyncDriver, uuids: list[str]):
113
+ records, _, _ = await driver.execute_query(
114
+ """
115
+ MATCH (n:Episodic)-[e:MENTIONS]->(m:Entity)
116
+ WHERE e.uuid IN $uuids
117
+ RETURN
118
+ e.uuid As uuid,
119
+ e.group_id AS group_id,
120
+ n.uuid AS source_node_uuid,
121
+ m.uuid AS target_node_uuid,
122
+ e.created_at AS created_at
123
+ """,
124
+ uuids=uuids,
125
+ )
126
+
127
+ edges = [get_episodic_edge_from_record(record) for record in records]
128
+
129
+ if len(edges) == 0:
130
+ raise EdgeNotFoundError(uuids[0])
131
+ return edges
132
+
133
+ @classmethod
134
+ async def get_by_group_ids(cls, driver: AsyncDriver, group_ids: list[str | None]):
135
+ records, _, _ = await driver.execute_query(
136
+ """
137
+ MATCH (n:Episodic)-[e:MENTIONS]->(m:Entity)
138
+ WHERE e.group_id IN $group_ids
139
+ RETURN
140
+ e.uuid As uuid,
141
+ e.group_id AS group_id,
142
+ n.uuid AS source_node_uuid,
143
+ m.uuid AS target_node_uuid,
144
+ e.created_at AS created_at
145
+ """,
146
+ group_ids=group_ids,
147
+ )
148
+
149
+ edges = [get_episodic_edge_from_record(record) for record in records]
150
+ uuids = [edge.uuid for edge in edges]
151
+
152
+ if len(edges) == 0:
153
+ raise EdgeNotFoundError(uuids[0])
154
+ return edges
155
+
112
156
 
113
157
  class EntityEdge(Edge):
114
158
  name: str = Field(description='name of the edge, relation name')
115
159
  fact: str = Field(description='fact representing the edge and nodes that it connects')
116
160
  fact_embedding: list[float] | None = Field(default=None, description='embedding of the fact')
117
- episodes: list[str] | None = Field(
118
- default=None,
161
+ episodes: list[str] = Field(
162
+ default=[],
119
163
  description='list of episode ids that reference these entity edges',
120
164
  )
121
165
  expired_at: datetime | None = Field(
@@ -192,11 +236,69 @@ class EntityEdge(Edge):
192
236
 
193
237
  edges = [get_entity_edge_from_record(record) for record in records]
194
238
 
195
- logger.info(f'Found Edge: {uuid}')
196
239
  if len(edges) == 0:
197
240
  raise EdgeNotFoundError(uuid)
198
241
  return edges[0]
199
242
 
243
+ @classmethod
244
+ async def get_by_uuids(cls, driver: AsyncDriver, uuids: list[str]):
245
+ records, _, _ = await driver.execute_query(
246
+ """
247
+ MATCH (n:Entity)-[e:RELATES_TO]->(m:Entity)
248
+ WHERE e.uuid IN $uuids
249
+ RETURN
250
+ e.uuid AS uuid,
251
+ n.uuid AS source_node_uuid,
252
+ m.uuid AS target_node_uuid,
253
+ e.created_at AS created_at,
254
+ e.name AS name,
255
+ e.group_id AS group_id,
256
+ e.fact AS fact,
257
+ e.fact_embedding AS fact_embedding,
258
+ e.episodes AS episodes,
259
+ e.expired_at AS expired_at,
260
+ e.valid_at AS valid_at,
261
+ e.invalid_at AS invalid_at
262
+ """,
263
+ uuids=uuids,
264
+ )
265
+
266
+ edges = [get_entity_edge_from_record(record) for record in records]
267
+
268
+ if len(edges) == 0:
269
+ raise EdgeNotFoundError(uuids[0])
270
+ return edges
271
+
272
+ @classmethod
273
+ async def get_by_group_ids(cls, driver: AsyncDriver, group_ids: list[str | None]):
274
+ records, _, _ = await driver.execute_query(
275
+ """
276
+ MATCH (n:Entity)-[e:RELATES_TO]->(m:Entity)
277
+ WHERE e.group_id IN $group_ids
278
+ RETURN
279
+ e.uuid AS uuid,
280
+ n.uuid AS source_node_uuid,
281
+ m.uuid AS target_node_uuid,
282
+ e.created_at AS created_at,
283
+ e.name AS name,
284
+ e.group_id AS group_id,
285
+ e.fact AS fact,
286
+ e.fact_embedding AS fact_embedding,
287
+ e.episodes AS episodes,
288
+ e.expired_at AS expired_at,
289
+ e.valid_at AS valid_at,
290
+ e.invalid_at AS invalid_at
291
+ """,
292
+ group_ids=group_ids,
293
+ )
294
+
295
+ edges = [get_entity_edge_from_record(record) for record in records]
296
+ uuids = [edge.uuid for edge in edges]
297
+
298
+ if len(edges) == 0:
299
+ raise EdgeNotFoundError(uuids[0])
300
+ return edges
301
+
200
302
 
201
303
  class CommunityEdge(Edge):
202
304
  async def save(self, driver: AsyncDriver):
@@ -235,10 +337,48 @@ class CommunityEdge(Edge):
235
337
 
236
338
  edges = [get_community_edge_from_record(record) for record in records]
237
339
 
238
- logger.info(f'Found Edge: {uuid}')
239
-
240
340
  return edges[0]
241
341
 
342
+ @classmethod
343
+ async def get_by_uuids(cls, driver: AsyncDriver, uuids: list[str]):
344
+ records, _, _ = await driver.execute_query(
345
+ """
346
+ MATCH (n:Community)-[e:HAS_MEMBER]->(m:Entity | Community)
347
+ WHERE e.uuid IN $uuids
348
+ RETURN
349
+ e.uuid As uuid,
350
+ e.group_id AS group_id,
351
+ n.uuid AS source_node_uuid,
352
+ m.uuid AS target_node_uuid,
353
+ e.created_at AS created_at
354
+ """,
355
+ uuids=uuids,
356
+ )
357
+
358
+ edges = [get_community_edge_from_record(record) for record in records]
359
+
360
+ return edges
361
+
362
+ @classmethod
363
+ async def get_by_group_ids(cls, driver: AsyncDriver, group_ids: list[str | None]):
364
+ records, _, _ = await driver.execute_query(
365
+ """
366
+ MATCH (n:Community)-[e:HAS_MEMBER]->(m:Entity | Community)
367
+ WHERE e.group_id IN $group_ids
368
+ RETURN
369
+ e.uuid As uuid,
370
+ e.group_id AS group_id,
371
+ n.uuid AS source_node_uuid,
372
+ m.uuid AS target_node_uuid,
373
+ e.created_at AS created_at
374
+ """,
375
+ group_ids=group_ids,
376
+ )
377
+
378
+ edges = [get_community_edge_from_record(record) for record in records]
379
+
380
+ return edges
381
+
242
382
 
243
383
  # Edge helpers
244
384
  def get_episodic_edge_from_record(record: Any) -> EpisodicEdge:
@@ -35,6 +35,8 @@ from graphiti_core.search.search_config_recipes import (
35
35
  )
36
36
  from graphiti_core.search.search_utils import (
37
37
  RELEVANT_SCHEMA_LIMIT,
38
+ get_communities_by_nodes,
39
+ get_mentioned_nodes,
38
40
  get_relevant_edges,
39
41
  get_relevant_nodes,
40
42
  )
@@ -54,6 +56,7 @@ from graphiti_core.utils.bulk_utils import (
54
56
  from graphiti_core.utils.maintenance.community_operations import (
55
57
  build_communities,
56
58
  remove_communities,
59
+ update_community,
57
60
  )
58
61
  from graphiti_core.utils.maintenance.edge_operations import (
59
62
  extract_edges,
@@ -74,7 +77,14 @@ load_dotenv()
74
77
 
75
78
 
76
79
  class Graphiti:
77
- def __init__(self, uri: str, user: str, password: str, llm_client: LLMClient | None = None):
80
+ def __init__(
81
+ self,
82
+ uri: str,
83
+ user: str,
84
+ password: str,
85
+ llm_client: LLMClient | None = None,
86
+ store_raw_episode_content: bool = True,
87
+ ):
78
88
  """
79
89
  Initialize a Graphiti instance.
80
90
 
@@ -113,6 +123,7 @@ class Graphiti:
113
123
  """
114
124
  self.driver = AsyncGraphDatabase.driver(uri, auth=(user, password))
115
125
  self.database = 'neo4j'
126
+ self.store_raw_episode_content = store_raw_episode_content
116
127
  if llm_client:
117
128
  self.llm_client = llm_client
118
129
  else:
@@ -147,8 +158,8 @@ class Graphiti:
147
158
  # Use graphiti...
148
159
  finally:
149
160
  graphiti.close()
150
- self.driver.close()
151
161
  """
162
+ self.driver.close()
152
163
 
153
164
  async def build_indices_and_constraints(self):
154
165
  """
@@ -224,6 +235,7 @@ class Graphiti:
224
235
  source: EpisodeType = EpisodeType.message,
225
236
  group_id: str | None = None,
226
237
  uuid: str | None = None,
238
+ update_communities: bool = False,
227
239
  ):
228
240
  """
229
241
  Process an episode and update the graph.
@@ -247,6 +259,8 @@ class Graphiti:
247
259
  An id for the graph partition the episode is a part of.
248
260
  uuid : str | None
249
261
  Optional uuid of the episode.
262
+ update_communities : bool
263
+ Optional. Whether to update communities with new node information
250
264
 
251
265
  Returns
252
266
  -------
@@ -272,7 +286,6 @@ class Graphiti:
272
286
  try:
273
287
  start = time()
274
288
 
275
- nodes: list[EntityNode] = []
276
289
  entity_edges: list[EntityEdge] = []
277
290
  embedder = self.llm_client.get_embedder()
278
291
  now = datetime.now()
@@ -291,6 +304,8 @@ class Graphiti:
291
304
  valid_at=reference_time,
292
305
  )
293
306
  episode.uuid = uuid if uuid is not None else episode.uuid
307
+ if not self.store_raw_episode_content:
308
+ episode.content = ''
294
309
 
295
310
  # Extract entities as nodes
296
311
 
@@ -319,7 +334,7 @@ class Graphiti:
319
334
  ),
320
335
  )
321
336
  logger.info(f'Adjusted mentioned nodes: {[(n.name, n.uuid) for n in mentioned_nodes]}')
322
- nodes.extend(mentioned_nodes)
337
+ nodes = mentioned_nodes
323
338
 
324
339
  extracted_edges_with_resolved_pointers = resolve_edge_pointers(
325
340
  extracted_edges, uuid_map
@@ -409,12 +424,22 @@ class Graphiti:
409
424
 
410
425
  logger.info(f'Built episodic edges: {episodic_edges}')
411
426
 
427
+ episode.entity_edges = [edge.uuid for edge in entity_edges]
428
+
412
429
  # Future optimization would be using batch operations to save nodes and edges
413
430
  await episode.save(self.driver)
414
431
  await asyncio.gather(*[node.save(self.driver) for node in nodes])
415
432
  await asyncio.gather(*[edge.save(self.driver) for edge in episodic_edges])
416
433
  await asyncio.gather(*[edge.save(self.driver) for edge in entity_edges])
417
434
 
435
+ # Update any communities
436
+ if update_communities:
437
+ await asyncio.gather(
438
+ *[
439
+ update_community(self.driver, self.llm_client, embedder, node)
440
+ for node in nodes
441
+ ]
442
+ )
418
443
  end = time()
419
444
  logger.info(f'Completed add_episode in {(end - start) * 1000} ms')
420
445
 
@@ -554,7 +579,7 @@ class Graphiti:
554
579
  center_node_uuid: str | None = None,
555
580
  group_ids: list[str | None] | None = None,
556
581
  num_results=DEFAULT_SEARCH_LIMIT,
557
- ):
582
+ ) -> list[EntityEdge]:
558
583
  """
559
584
  Perform a hybrid search on the knowledge graph.
560
585
 
@@ -569,7 +594,7 @@ class Graphiti:
569
594
  Facts will be reranked based on proximity to this node
570
595
  group_ids : list[str | None] | None, optional
571
596
  The graph partitions to return data from.
572
- limit : int, optional
597
+ num_results : int, optional
573
598
  The maximum number of results to return. Defaults to 10.
574
599
 
575
600
  Returns
@@ -668,3 +693,19 @@ class Graphiti:
668
693
  await search(self.driver, embedder, query, group_ids, search_config, center_node_uuid)
669
694
  ).nodes
670
695
  return nodes
696
+
697
+
698
+ async def get_episode_mentions(self, episode_uuids: list[str]) -> SearchResults:
699
+ episodes = await EpisodicNode.get_by_uuids(self.driver, episode_uuids)
700
+
701
+ edges_list = await asyncio.gather(
702
+ *[EntityEdge.get_by_uuids(self.driver, episode.entity_edges) for episode in episodes]
703
+ )
704
+
705
+ edges: list[EntityEdge] = [edge for lst in edges_list for edge in lst]
706
+
707
+ nodes = await get_mentioned_nodes(self.driver, episodes)
708
+
709
+ communities = await get_communities_by_nodes(self.driver, nodes)
710
+
711
+ return SearchResults(edges=edges, nodes=nodes, communities=communities)