graphiti-core 0.3.3__tar.gz → 0.3.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of graphiti-core might be problematic. Click here for more details.

Files changed (43) hide show
  1. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/PKG-INFO +58 -38
  2. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/README.md +57 -37
  3. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/edges.py +73 -8
  4. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/graphiti.py +23 -12
  5. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/llm_client/anthropic_client.py +5 -1
  6. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/llm_client/client.py +1 -1
  7. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/llm_client/config.py +1 -1
  8. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/llm_client/groq_client.py +5 -1
  9. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/llm_client/openai_client.py +40 -3
  10. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/nodes.py +62 -12
  11. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/search/search.py +20 -10
  12. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/search/search_utils.py +33 -62
  13. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/utils/maintenance/community_operations.py +99 -28
  14. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/utils/maintenance/edge_operations.py +1 -1
  15. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/utils/maintenance/graph_data_operations.py +3 -2
  16. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/utils/maintenance/node_operations.py +6 -3
  17. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/pyproject.toml +2 -2
  18. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/LICENSE +0 -0
  19. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/__init__.py +0 -0
  20. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/errors.py +0 -0
  21. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/helpers.py +0 -0
  22. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/llm_client/__init__.py +0 -0
  23. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/llm_client/errors.py +0 -0
  24. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/llm_client/utils.py +0 -0
  25. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/prompts/__init__.py +0 -0
  26. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/prompts/dedupe_edges.py +0 -0
  27. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/prompts/dedupe_nodes.py +0 -0
  28. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/prompts/extract_edge_dates.py +0 -0
  29. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/prompts/extract_edges.py +0 -0
  30. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/prompts/extract_nodes.py +0 -0
  31. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/prompts/invalidate_edges.py +0 -0
  32. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/prompts/lib.py +0 -0
  33. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/prompts/models.py +0 -0
  34. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/prompts/summarize_nodes.py +0 -0
  35. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/py.typed +0 -0
  36. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/search/__init__.py +0 -0
  37. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/search/search_config.py +0 -0
  38. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/search/search_config_recipes.py +0 -0
  39. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/utils/__init__.py +0 -0
  40. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/utils/bulk_utils.py +0 -0
  41. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/utils/maintenance/__init__.py +0 -0
  42. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/utils/maintenance/temporal_operations.py +0 -0
  43. {graphiti_core-0.3.3 → graphiti_core-0.3.5}/graphiti_core/utils/maintenance/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: graphiti-core
3
- Version: 0.3.3
3
+ Version: 0.3.5
4
4
  Summary: A temporal graph building library
5
5
  License: Apache-2.0
6
6
  Author: Paul Paliychuk
@@ -21,7 +21,7 @@ Description-Content-Type: text/markdown
21
21
 
22
22
  <div align="center">
23
23
 
24
- # Graphiti
24
+ <img width="350" alt="Graphiti-ts-small" src="https://github.com/user-attachments/assets/bbd02947-e435-4a05-b25a-bbbac36d52c8">
25
25
 
26
26
  ## Temporal Knowledge Graphs for Agentic Applications
27
27
 
@@ -37,7 +37,9 @@ Description-Content-Type: text/markdown
37
37
 
38
38
  </div>
39
39
 
40
- Graphiti builds dynamic, temporally aware Knowledge Graphs that represent complex, evolving relationships between entities over time. Graphiti ingests both unstructured and structured data, and the resulting graph may be queried using a fusion of time, full-text, semantic, and graph algorithm approaches.
40
+ Graphiti builds dynamic, temporally aware Knowledge Graphs that represent complex, evolving relationships between
41
+ entities over time. Graphiti ingests both unstructured and structured data, and the resulting graph may be queried using
42
+ a fusion of time, full-text, semantic, and graph algorithm approaches.
41
43
 
42
44
  <br />
43
45
 
@@ -47,25 +49,39 @@ Graphiti builds dynamic, temporally aware Knowledge Graphs that represent comple
47
49
 
48
50
  <br />
49
51
 
50
- Graphiti helps you create and query Knowledge Graphs that evolve over time. A knowledge graph is a network of interconnected facts, such as _“Kendra loves Adidas shoes.”_ Each fact is a “triplet” represented by two entities, or nodes (_”Kendra”_, _“Adidas shoes”_), and their relationship, or edge (_”loves”_). Knowledge Graphs have been explored extensively for information retrieval. What makes Graphiti unique is its ability to autonomously build a knowledge graph while handling changing relationships and maintaining historical context.
52
+ Graphiti helps you create and query Knowledge Graphs that evolve over time. A knowledge graph is a network of
53
+ interconnected facts, such as _“Kendra loves Adidas shoes.”_ Each fact is a “triplet” represented by two entities, or
54
+ nodes (_”Kendra”_, _“Adidas shoes”_), and their relationship, or edge (_”loves”_). Knowledge Graphs have been explored
55
+ extensively for information retrieval. What makes Graphiti unique is its ability to autonomously build a knowledge graph
56
+ while handling changing relationships and maintaining historical context.
51
57
 
52
58
  With Graphiti, you can build LLM applications such as:
53
59
 
54
- - Assistants that learn from user interactions, fusing personal knowledge with dynamic data from business systems like CRMs and billing platforms.
60
+ - Assistants that learn from user interactions, fusing personal knowledge with dynamic data from business systems like
61
+ CRMs and billing platforms.
55
62
  - Agents that autonomously execute complex tasks, reasoning with state changes from multiple dynamic sources.
56
63
 
57
- Graphiti supports a wide range of applications in sales, customer service, health, finance, and more, enabling long-term recall and state-based reasoning for both assistants and agents.
64
+ Graphiti supports a wide range of applications in sales, customer service, health, finance, and more, enabling long-term
65
+ recall and state-based reasoning for both assistants and agents.
58
66
 
59
67
  ## Why Graphiti?
60
68
 
61
- We were intrigued by Microsoft’s GraphRAG, which expanded on RAG text chunking by using a graph to better model a document corpus and making this representation available via semantic and graph search techniques. However, GraphRAG did not address our core problem: It's primarily designed for static documents and doesn't inherently handle temporal aspects of data.
62
-
63
- Graphiti is designed from the ground up to handle constantly changing information, hybrid semantic and graph search, and scale:
64
-
65
- - **Temporal Awareness:** Tracks changes in facts and relationships over time, enabling point-in-time queries. Graph edges include temporal metadata to record relationship lifecycles.
66
- - **Episodic Processing:** Ingests data as discrete episodes, maintaining data provenance and allowing incremental entity and relationship extraction.
67
- - **Hybrid Search:** Combines semantic and BM25 full-text search, with the ability to rerank results by distance from a central node e.g. “Kendra”.
68
- - **Scalable:** Designed for processing large datasets, with parallelization of LLM calls for bulk processing while preserving the chronology of events.
69
+ We were intrigued by Microsoft’s GraphRAG, which expanded on RAG text chunking by using a graph to better model a
70
+ document corpus and making this representation available via semantic and graph search techniques. However, GraphRAG did
71
+ not address our core problem: It's primarily designed for static documents and doesn't inherently handle temporal
72
+ aspects of data.
73
+
74
+ Graphiti is designed from the ground up to handle constantly changing information, hybrid semantic and graph search, and
75
+ scale:
76
+
77
+ - **Temporal Awareness:** Tracks changes in facts and relationships over time, enabling point-in-time queries. Graph
78
+ edges include temporal metadata to record relationship lifecycles.
79
+ - **Episodic Processing:** Ingests data as discrete episodes, maintaining data provenance and allowing incremental
80
+ entity and relationship extraction.
81
+ - **Hybrid Search:** Combines semantic and BM25 full-text search, with the ability to rerank results by distance from a
82
+ central node e.g. “Kendra”.
83
+ - **Scalable:** Designed for processing large datasets, with parallelization of LLM calls for bulk processing while
84
+ preserving the chronology of events.
69
85
  - **Supports Varied Sources:** Can ingest both unstructured text and structured JSON data.
70
86
 
71
87
  <p align="center">
@@ -91,7 +107,8 @@ Optional:
91
107
  - Anthropic or Groq API key (for alternative LLM providers)
92
108
 
93
109
  > [!TIP]
94
- > The simplest way to install Neo4j is via [Neo4j Desktop](https://neo4j.com/download/). It provides a user-friendly interface to manage Neo4j instances and databases.
110
+ > The simplest way to install Neo4j is via [Neo4j Desktop](https://neo4j.com/download/). It provides a user-friendly
111
+ > interface to manage Neo4j instances and databases.
95
112
 
96
113
  ```bash
97
114
  pip install graphiti-core
@@ -106,7 +123,8 @@ poetry add graphiti-core
106
123
  ## Quick Start
107
124
 
108
125
  > [!IMPORTANT]
109
- > Graphiti uses OpenAI for LLM inference and embedding. Ensure that an `OPENAI_API_KEY` is set in your environment. Support for Anthropic and Groq LLM inferences is available, too.
126
+ > Graphiti uses OpenAI for LLM inference and embedding. Ensure that an `OPENAI_API_KEY` is set in your environment.
127
+ > Support for Anthropic and Groq LLM inferences is available, too.
110
128
 
111
129
  ```python
112
130
  from graphiti_core import Graphiti
@@ -140,25 +158,25 @@ for i, episode in enumerate(episodes):
140
158
  results = await graphiti.search('Who was the California Attorney General?')
141
159
  [
142
160
  EntityEdge(
143
- │ uuid='3133258f738e487383f07b04e15d4ac0',
144
- │ source_node_uuid='2a85789b318d4e418050506879906e62',
145
- │ target_node_uuid='baf7781f445945989d6e4f927f881556',
146
- │ created_at=datetime.datetime(2024, 8, 26, 13, 13, 24, 861097),
147
- │ name='HELD_POSITION',
148
- # the fact reflects the updated state that Harris is
149
- # no longer the AG of California
150
- │ fact='Kamala Harris was the Attorney General of California',
151
- │ fact_embedding=[
152
- │ │ -0.009955154731869698,
153
- │ ...
154
- │ │ 0.00784289836883545
155
- ],
156
- │ episodes=['b43e98ad0a904088a76c67985caecc22'],
157
- │ expired_at=datetime.datetime(2024, 8, 26, 20, 18, 1, 53812),
158
- # These dates represent the date this edge was true.
159
- │ valid_at=datetime.datetime(2011, 1, 3, 0, 0, tzinfo=<UTC>),
160
- │ invalid_at=datetime.datetime(2017, 1, 3, 0, 0, tzinfo=<UTC>)
161
- )
161
+ │ uuid = '3133258f738e487383f07b04e15d4ac0',
162
+ │ source_node_uuid = '2a85789b318d4e418050506879906e62',
163
+ │ target_node_uuid = 'baf7781f445945989d6e4f927f881556',
164
+ │ created_at = datetime.datetime(2024, 8, 26, 13, 13, 24, 861097),
165
+ │ name = 'HELD_POSITION',
166
+ # the fact reflects the updated state that Harris is
167
+ # no longer the AG of California
168
+ │ fact = 'Kamala Harris was the Attorney General of California',
169
+ │ fact_embedding = [
170
+ │ │ -0.009955154731869698,
171
+ │ ...
172
+ │ │ 0.00784289836883545
173
+ │],
174
+ │ episodes = ['b43e98ad0a904088a76c67985caecc22'],
175
+ │ expired_at = datetime.datetime(2024, 8, 26, 20, 18, 1, 53812),
176
+ # These dates represent the date this edge was true.
177
+ │ valid_at = datetime.datetime(2011, 1, 3, 0, 0, tzinfo= < UTC >),
178
+ │ invalid_at = datetime.datetime(2017, 1, 3, 0, 0, tzinfo= < UTC >)
179
+ )
162
180
  ]
163
181
 
164
182
  # Rerank search results based on graph distance
@@ -191,14 +209,16 @@ Graphiti is under active development. We aim to maintain API stability while wor
191
209
  - [ ] Achieving good performance with different LLM and embedding models
192
210
  - [ ] Creating a dedicated embedder interface
193
211
  - [ ] Supporting custom graph schemas:
194
- - Allow developers to provide their own defined node and edge classes when ingesting episodes
195
- - Enable more flexible knowledge representation tailored to specific use cases
212
+ - Allow developers to provide their own defined node and edge classes when ingesting episodes
213
+ - Enable more flexible knowledge representation tailored to specific use cases
196
214
  - [ ] Enhancing retrieval capabilities with more robust and configurable options
197
215
  - [ ] Expanding test coverage to ensure reliability and catch edge cases
198
216
 
199
217
  ## Contributing
200
218
 
201
- We encourage and appreciate all forms of contributions, whether it's code, documentation, addressing GitHub Issues, or answering questions in the Graphiti Discord channel. For detailed guidelines on code contributions, please refer to [CONTRIBUTING](CONTRIBUTING.md).
219
+ We encourage and appreciate all forms of contributions, whether it's code, documentation, addressing GitHub Issues, or
220
+ answering questions in the Graphiti Discord channel. For detailed guidelines on code contributions, please refer
221
+ to [CONTRIBUTING](CONTRIBUTING.md).
202
222
 
203
223
  ## Support
204
224
 
@@ -1,6 +1,6 @@
1
1
  <div align="center">
2
2
 
3
- # Graphiti
3
+ <img width="350" alt="Graphiti-ts-small" src="https://github.com/user-attachments/assets/bbd02947-e435-4a05-b25a-bbbac36d52c8">
4
4
 
5
5
  ## Temporal Knowledge Graphs for Agentic Applications
6
6
 
@@ -16,7 +16,9 @@
16
16
 
17
17
  </div>
18
18
 
19
- Graphiti builds dynamic, temporally aware Knowledge Graphs that represent complex, evolving relationships between entities over time. Graphiti ingests both unstructured and structured data, and the resulting graph may be queried using a fusion of time, full-text, semantic, and graph algorithm approaches.
19
+ Graphiti builds dynamic, temporally aware Knowledge Graphs that represent complex, evolving relationships between
20
+ entities over time. Graphiti ingests both unstructured and structured data, and the resulting graph may be queried using
21
+ a fusion of time, full-text, semantic, and graph algorithm approaches.
20
22
 
21
23
  <br />
22
24
 
@@ -26,25 +28,39 @@ Graphiti builds dynamic, temporally aware Knowledge Graphs that represent comple
26
28
 
27
29
  <br />
28
30
 
29
- Graphiti helps you create and query Knowledge Graphs that evolve over time. A knowledge graph is a network of interconnected facts, such as _“Kendra loves Adidas shoes.”_ Each fact is a “triplet” represented by two entities, or nodes (_”Kendra”_, _“Adidas shoes”_), and their relationship, or edge (_”loves”_). Knowledge Graphs have been explored extensively for information retrieval. What makes Graphiti unique is its ability to autonomously build a knowledge graph while handling changing relationships and maintaining historical context.
31
+ Graphiti helps you create and query Knowledge Graphs that evolve over time. A knowledge graph is a network of
32
+ interconnected facts, such as _“Kendra loves Adidas shoes.”_ Each fact is a “triplet” represented by two entities, or
33
+ nodes (_”Kendra”_, _“Adidas shoes”_), and their relationship, or edge (_”loves”_). Knowledge Graphs have been explored
34
+ extensively for information retrieval. What makes Graphiti unique is its ability to autonomously build a knowledge graph
35
+ while handling changing relationships and maintaining historical context.
30
36
 
31
37
  With Graphiti, you can build LLM applications such as:
32
38
 
33
- - Assistants that learn from user interactions, fusing personal knowledge with dynamic data from business systems like CRMs and billing platforms.
39
+ - Assistants that learn from user interactions, fusing personal knowledge with dynamic data from business systems like
40
+ CRMs and billing platforms.
34
41
  - Agents that autonomously execute complex tasks, reasoning with state changes from multiple dynamic sources.
35
42
 
36
- Graphiti supports a wide range of applications in sales, customer service, health, finance, and more, enabling long-term recall and state-based reasoning for both assistants and agents.
43
+ Graphiti supports a wide range of applications in sales, customer service, health, finance, and more, enabling long-term
44
+ recall and state-based reasoning for both assistants and agents.
37
45
 
38
46
  ## Why Graphiti?
39
47
 
40
- We were intrigued by Microsoft’s GraphRAG, which expanded on RAG text chunking by using a graph to better model a document corpus and making this representation available via semantic and graph search techniques. However, GraphRAG did not address our core problem: It's primarily designed for static documents and doesn't inherently handle temporal aspects of data.
41
-
42
- Graphiti is designed from the ground up to handle constantly changing information, hybrid semantic and graph search, and scale:
43
-
44
- - **Temporal Awareness:** Tracks changes in facts and relationships over time, enabling point-in-time queries. Graph edges include temporal metadata to record relationship lifecycles.
45
- - **Episodic Processing:** Ingests data as discrete episodes, maintaining data provenance and allowing incremental entity and relationship extraction.
46
- - **Hybrid Search:** Combines semantic and BM25 full-text search, with the ability to rerank results by distance from a central node e.g. “Kendra”.
47
- - **Scalable:** Designed for processing large datasets, with parallelization of LLM calls for bulk processing while preserving the chronology of events.
48
+ We were intrigued by Microsoft’s GraphRAG, which expanded on RAG text chunking by using a graph to better model a
49
+ document corpus and making this representation available via semantic and graph search techniques. However, GraphRAG did
50
+ not address our core problem: It's primarily designed for static documents and doesn't inherently handle temporal
51
+ aspects of data.
52
+
53
+ Graphiti is designed from the ground up to handle constantly changing information, hybrid semantic and graph search, and
54
+ scale:
55
+
56
+ - **Temporal Awareness:** Tracks changes in facts and relationships over time, enabling point-in-time queries. Graph
57
+ edges include temporal metadata to record relationship lifecycles.
58
+ - **Episodic Processing:** Ingests data as discrete episodes, maintaining data provenance and allowing incremental
59
+ entity and relationship extraction.
60
+ - **Hybrid Search:** Combines semantic and BM25 full-text search, with the ability to rerank results by distance from a
61
+ central node e.g. “Kendra”.
62
+ - **Scalable:** Designed for processing large datasets, with parallelization of LLM calls for bulk processing while
63
+ preserving the chronology of events.
48
64
  - **Supports Varied Sources:** Can ingest both unstructured text and structured JSON data.
49
65
 
50
66
  <p align="center">
@@ -70,7 +86,8 @@ Optional:
70
86
  - Anthropic or Groq API key (for alternative LLM providers)
71
87
 
72
88
  > [!TIP]
73
- > The simplest way to install Neo4j is via [Neo4j Desktop](https://neo4j.com/download/). It provides a user-friendly interface to manage Neo4j instances and databases.
89
+ > The simplest way to install Neo4j is via [Neo4j Desktop](https://neo4j.com/download/). It provides a user-friendly
90
+ > interface to manage Neo4j instances and databases.
74
91
 
75
92
  ```bash
76
93
  pip install graphiti-core
@@ -85,7 +102,8 @@ poetry add graphiti-core
85
102
  ## Quick Start
86
103
 
87
104
  > [!IMPORTANT]
88
- > Graphiti uses OpenAI for LLM inference and embedding. Ensure that an `OPENAI_API_KEY` is set in your environment. Support for Anthropic and Groq LLM inferences is available, too.
105
+ > Graphiti uses OpenAI for LLM inference and embedding. Ensure that an `OPENAI_API_KEY` is set in your environment.
106
+ > Support for Anthropic and Groq LLM inferences is available, too.
89
107
 
90
108
  ```python
91
109
  from graphiti_core import Graphiti
@@ -119,25 +137,25 @@ for i, episode in enumerate(episodes):
119
137
  results = await graphiti.search('Who was the California Attorney General?')
120
138
  [
121
139
  EntityEdge(
122
- │ uuid='3133258f738e487383f07b04e15d4ac0',
123
- │ source_node_uuid='2a85789b318d4e418050506879906e62',
124
- │ target_node_uuid='baf7781f445945989d6e4f927f881556',
125
- │ created_at=datetime.datetime(2024, 8, 26, 13, 13, 24, 861097),
126
- │ name='HELD_POSITION',
127
- # the fact reflects the updated state that Harris is
128
- # no longer the AG of California
129
- │ fact='Kamala Harris was the Attorney General of California',
130
- │ fact_embedding=[
131
- │ │ -0.009955154731869698,
132
- │ ...
133
- │ │ 0.00784289836883545
134
- ],
135
- │ episodes=['b43e98ad0a904088a76c67985caecc22'],
136
- │ expired_at=datetime.datetime(2024, 8, 26, 20, 18, 1, 53812),
137
- # These dates represent the date this edge was true.
138
- │ valid_at=datetime.datetime(2011, 1, 3, 0, 0, tzinfo=<UTC>),
139
- │ invalid_at=datetime.datetime(2017, 1, 3, 0, 0, tzinfo=<UTC>)
140
- )
140
+ │ uuid = '3133258f738e487383f07b04e15d4ac0',
141
+ │ source_node_uuid = '2a85789b318d4e418050506879906e62',
142
+ │ target_node_uuid = 'baf7781f445945989d6e4f927f881556',
143
+ │ created_at = datetime.datetime(2024, 8, 26, 13, 13, 24, 861097),
144
+ │ name = 'HELD_POSITION',
145
+ # the fact reflects the updated state that Harris is
146
+ # no longer the AG of California
147
+ │ fact = 'Kamala Harris was the Attorney General of California',
148
+ │ fact_embedding = [
149
+ │ │ -0.009955154731869698,
150
+ │ ...
151
+ │ │ 0.00784289836883545
152
+ │],
153
+ │ episodes = ['b43e98ad0a904088a76c67985caecc22'],
154
+ │ expired_at = datetime.datetime(2024, 8, 26, 20, 18, 1, 53812),
155
+ # These dates represent the date this edge was true.
156
+ │ valid_at = datetime.datetime(2011, 1, 3, 0, 0, tzinfo= < UTC >),
157
+ │ invalid_at = datetime.datetime(2017, 1, 3, 0, 0, tzinfo= < UTC >)
158
+ )
141
159
  ]
142
160
 
143
161
  # Rerank search results based on graph distance
@@ -170,14 +188,16 @@ Graphiti is under active development. We aim to maintain API stability while wor
170
188
  - [ ] Achieving good performance with different LLM and embedding models
171
189
  - [ ] Creating a dedicated embedder interface
172
190
  - [ ] Supporting custom graph schemas:
173
- - Allow developers to provide their own defined node and edge classes when ingesting episodes
174
- - Enable more flexible knowledge representation tailored to specific use cases
191
+ - Allow developers to provide their own defined node and edge classes when ingesting episodes
192
+ - Enable more flexible knowledge representation tailored to specific use cases
175
193
  - [ ] Enhancing retrieval capabilities with more robust and configurable options
176
194
  - [ ] Expanding test coverage to ensure reliability and catch edge cases
177
195
 
178
196
  ## Contributing
179
197
 
180
- We encourage and appreciate all forms of contributions, whether it's code, documentation, addressing GitHub Issues, or answering questions in the Graphiti Discord channel. For detailed guidelines on code contributions, please refer to [CONTRIBUTING](CONTRIBUTING.md).
198
+ We encourage and appreciate all forms of contributions, whether it's code, documentation, addressing GitHub Issues, or
199
+ answering questions in the Graphiti Discord channel. For detailed guidelines on code contributions, please refer
200
+ to [CONTRIBUTING](CONTRIBUTING.md).
181
201
 
182
202
  ## Support
183
203
 
@@ -34,7 +34,7 @@ logger = logging.getLogger(__name__)
34
34
 
35
35
  class Edge(BaseModel, ABC):
36
36
  uuid: str = Field(default_factory=lambda: str(uuid4()))
37
- group_id: str | None = Field(description='partition of the graph')
37
+ group_id: str = Field(description='partition of the graph')
38
38
  source_node_uuid: str
39
39
  target_node_uuid: str
40
40
  created_at: datetime
@@ -104,7 +104,6 @@ class EpisodicEdge(Edge):
104
104
 
105
105
  edges = [get_episodic_edge_from_record(record) for record in records]
106
106
 
107
- logger.info(f'Found Edge: {uuid}')
108
107
  if len(edges) == 0:
109
108
  raise EdgeNotFoundError(uuid)
110
109
  return edges[0]
@@ -127,7 +126,29 @@ class EpisodicEdge(Edge):
127
126
 
128
127
  edges = [get_episodic_edge_from_record(record) for record in records]
129
128
 
130
- logger.info(f'Found Edges: {uuids}')
129
+ if len(edges) == 0:
130
+ raise EdgeNotFoundError(uuids[0])
131
+ return edges
132
+
133
+ @classmethod
134
+ async def get_by_group_ids(cls, driver: AsyncDriver, group_ids: list[str]):
135
+ records, _, _ = await driver.execute_query(
136
+ """
137
+ MATCH (n:Episodic)-[e:MENTIONS]->(m:Entity)
138
+ WHERE e.group_id IN $group_ids
139
+ RETURN
140
+ e.uuid As uuid,
141
+ e.group_id AS group_id,
142
+ n.uuid AS source_node_uuid,
143
+ m.uuid AS target_node_uuid,
144
+ e.created_at AS created_at
145
+ """,
146
+ group_ids=group_ids,
147
+ )
148
+
149
+ edges = [get_episodic_edge_from_record(record) for record in records]
150
+ uuids = [edge.uuid for edge in edges]
151
+
131
152
  if len(edges) == 0:
132
153
  raise EdgeNotFoundError(uuids[0])
133
154
  return edges
@@ -215,7 +236,6 @@ class EntityEdge(Edge):
215
236
 
216
237
  edges = [get_entity_edge_from_record(record) for record in records]
217
238
 
218
- logger.info(f'Found Edge: {uuid}')
219
239
  if len(edges) == 0:
220
240
  raise EdgeNotFoundError(uuid)
221
241
  return edges[0]
@@ -245,7 +265,36 @@ class EntityEdge(Edge):
245
265
 
246
266
  edges = [get_entity_edge_from_record(record) for record in records]
247
267
 
248
- logger.info(f'Found Edges: {uuids}')
268
+ if len(edges) == 0:
269
+ raise EdgeNotFoundError(uuids[0])
270
+ return edges
271
+
272
+ @classmethod
273
+ async def get_by_group_ids(cls, driver: AsyncDriver, group_ids: list[str]):
274
+ records, _, _ = await driver.execute_query(
275
+ """
276
+ MATCH (n:Entity)-[e:RELATES_TO]->(m:Entity)
277
+ WHERE e.group_id IN $group_ids
278
+ RETURN
279
+ e.uuid AS uuid,
280
+ n.uuid AS source_node_uuid,
281
+ m.uuid AS target_node_uuid,
282
+ e.created_at AS created_at,
283
+ e.name AS name,
284
+ e.group_id AS group_id,
285
+ e.fact AS fact,
286
+ e.fact_embedding AS fact_embedding,
287
+ e.episodes AS episodes,
288
+ e.expired_at AS expired_at,
289
+ e.valid_at AS valid_at,
290
+ e.invalid_at AS invalid_at
291
+ """,
292
+ group_ids=group_ids,
293
+ )
294
+
295
+ edges = [get_entity_edge_from_record(record) for record in records]
296
+ uuids = [edge.uuid for edge in edges]
297
+
249
298
  if len(edges) == 0:
250
299
  raise EdgeNotFoundError(uuids[0])
251
300
  return edges
@@ -288,8 +337,6 @@ class CommunityEdge(Edge):
288
337
 
289
338
  edges = [get_community_edge_from_record(record) for record in records]
290
339
 
291
- logger.info(f'Found Edge: {uuid}')
292
-
293
340
  return edges[0]
294
341
 
295
342
  @classmethod
@@ -310,7 +357,25 @@ class CommunityEdge(Edge):
310
357
 
311
358
  edges = [get_community_edge_from_record(record) for record in records]
312
359
 
313
- logger.info(f'Found Edges: {uuids}')
360
+ return edges
361
+
362
+ @classmethod
363
+ async def get_by_group_ids(cls, driver: AsyncDriver, group_ids: list[str]):
364
+ records, _, _ = await driver.execute_query(
365
+ """
366
+ MATCH (n:Community)-[e:HAS_MEMBER]->(m:Entity | Community)
367
+ WHERE e.group_id IN $group_ids
368
+ RETURN
369
+ e.uuid As uuid,
370
+ e.group_id AS group_id,
371
+ n.uuid AS source_node_uuid,
372
+ m.uuid AS target_node_uuid,
373
+ e.created_at AS created_at
374
+ """,
375
+ group_ids=group_ids,
376
+ )
377
+
378
+ edges = [get_community_edge_from_record(record) for record in records]
314
379
 
315
380
  return edges
316
381
 
@@ -77,7 +77,14 @@ load_dotenv()
77
77
 
78
78
 
79
79
  class Graphiti:
80
- def __init__(self, uri: str, user: str, password: str, llm_client: LLMClient | None = None):
80
+ def __init__(
81
+ self,
82
+ uri: str,
83
+ user: str,
84
+ password: str,
85
+ llm_client: LLMClient | None = None,
86
+ store_raw_episode_content: bool = True,
87
+ ):
81
88
  """
82
89
  Initialize a Graphiti instance.
83
90
 
@@ -116,12 +123,13 @@ class Graphiti:
116
123
  """
117
124
  self.driver = AsyncGraphDatabase.driver(uri, auth=(user, password))
118
125
  self.database = 'neo4j'
126
+ self.store_raw_episode_content = store_raw_episode_content
119
127
  if llm_client:
120
128
  self.llm_client = llm_client
121
129
  else:
122
130
  self.llm_client = OpenAIClient()
123
131
 
124
- def close(self):
132
+ async def close(self):
125
133
  """
126
134
  Close the connection to the Neo4j database.
127
135
 
@@ -150,8 +158,8 @@ class Graphiti:
150
158
  # Use graphiti...
151
159
  finally:
152
160
  graphiti.close()
153
- self.driver.close()
154
161
  """
162
+ await self.driver.close()
155
163
 
156
164
  async def build_indices_and_constraints(self):
157
165
  """
@@ -189,7 +197,7 @@ class Graphiti:
189
197
  self,
190
198
  reference_time: datetime,
191
199
  last_n: int = EPISODE_WINDOW_LEN,
192
- group_ids: list[str | None] | None = None,
200
+ group_ids: list[str] | None = None,
193
201
  ) -> list[EpisodicNode]:
194
202
  """
195
203
  Retrieve the last n episodic nodes from the graph.
@@ -225,7 +233,7 @@ class Graphiti:
225
233
  source_description: str,
226
234
  reference_time: datetime,
227
235
  source: EpisodeType = EpisodeType.message,
228
- group_id: str | None = None,
236
+ group_id: str = '',
229
237
  uuid: str | None = None,
230
238
  update_communities: bool = False,
231
239
  ):
@@ -251,6 +259,8 @@ class Graphiti:
251
259
  An id for the graph partition the episode is a part of.
252
260
  uuid : str | None
253
261
  Optional uuid of the episode.
262
+ update_communities : bool
263
+ Optional. Whether to update communities with new node information
254
264
 
255
265
  Returns
256
266
  -------
@@ -276,7 +286,6 @@ class Graphiti:
276
286
  try:
277
287
  start = time()
278
288
 
279
- nodes: list[EntityNode] = []
280
289
  entity_edges: list[EntityEdge] = []
281
290
  embedder = self.llm_client.get_embedder()
282
291
  now = datetime.now()
@@ -295,6 +304,8 @@ class Graphiti:
295
304
  valid_at=reference_time,
296
305
  )
297
306
  episode.uuid = uuid if uuid is not None else episode.uuid
307
+ if not self.store_raw_episode_content:
308
+ episode.content = ''
298
309
 
299
310
  # Extract entities as nodes
300
311
 
@@ -323,7 +334,7 @@ class Graphiti:
323
334
  ),
324
335
  )
325
336
  logger.info(f'Adjusted mentioned nodes: {[(n.name, n.uuid) for n in mentioned_nodes]}')
326
- nodes.extend(mentioned_nodes)
337
+ nodes = mentioned_nodes
327
338
 
328
339
  extracted_edges_with_resolved_pointers = resolve_edge_pointers(
329
340
  extracted_edges, uuid_map
@@ -435,7 +446,7 @@ class Graphiti:
435
446
  except Exception as e:
436
447
  raise e
437
448
 
438
- async def add_episode_bulk(self, bulk_episodes: list[RawEpisode], group_id: str | None = None):
449
+ async def add_episode_bulk(self, bulk_episodes: list[RawEpisode], group_id: str = ''):
439
450
  """
440
451
  Process multiple episodes in bulk and update the graph.
441
452
 
@@ -566,9 +577,9 @@ class Graphiti:
566
577
  self,
567
578
  query: str,
568
579
  center_node_uuid: str | None = None,
569
- group_ids: list[str | None] | None = None,
580
+ group_ids: list[str] | None = None,
570
581
  num_results=DEFAULT_SEARCH_LIMIT,
571
- ):
582
+ ) -> list[EntityEdge]:
572
583
  """
573
584
  Perform a hybrid search on the knowledge graph.
574
585
 
@@ -622,7 +633,7 @@ class Graphiti:
622
633
  self,
623
634
  query: str,
624
635
  config: SearchConfig,
625
- group_ids: list[str | None] | None = None,
636
+ group_ids: list[str] | None = None,
626
637
  center_node_uuid: str | None = None,
627
638
  ) -> SearchResults:
628
639
  return await search(
@@ -633,7 +644,7 @@ class Graphiti:
633
644
  self,
634
645
  query: str,
635
646
  center_node_uuid: str | None = None,
636
- group_ids: list[str | None] | None = None,
647
+ group_ids: list[str] | None = None,
637
648
  limit: int = DEFAULT_SEARCH_LIMIT,
638
649
  ) -> list[EntityNode]:
639
650
  """
@@ -30,13 +30,17 @@ from .errors import RateLimitError
30
30
  logger = logging.getLogger(__name__)
31
31
 
32
32
  DEFAULT_MODEL = 'claude-3-5-sonnet-20240620'
33
+ DEFAULT_MAX_TOKENS = 8192
33
34
 
34
35
 
35
36
  class AnthropicClient(LLMClient):
36
37
  def __init__(self, config: LLMConfig | None = None, cache: bool = False):
37
38
  if config is None:
38
- config = LLMConfig()
39
+ config = LLMConfig(max_tokens=DEFAULT_MAX_TOKENS)
40
+ elif config.max_tokens is None:
41
+ config.max_tokens = DEFAULT_MAX_TOKENS
39
42
  super().__init__(config, cache)
43
+
40
44
  self.client = AsyncAnthropic(
41
45
  api_key=config.api_key,
42
46
  # we'll use tenacity to retry
@@ -35,7 +35,7 @@ logger = logging.getLogger(__name__)
35
35
 
36
36
 
37
37
  def is_server_or_retry_error(exception):
38
- if isinstance(exception, RateLimitError):
38
+ if isinstance(exception, (RateLimitError, json.decoder.JSONDecodeError)):
39
39
  return True
40
40
 
41
41
  return (
@@ -15,7 +15,7 @@ limitations under the License.
15
15
  """
16
16
 
17
17
  EMBEDDING_DIM = 1024
18
- DEFAULT_MAX_TOKENS = 4096
18
+ DEFAULT_MAX_TOKENS = 16384
19
19
  DEFAULT_TEMPERATURE = 0
20
20
 
21
21
 
@@ -31,13 +31,17 @@ from .errors import RateLimitError
31
31
  logger = logging.getLogger(__name__)
32
32
 
33
33
  DEFAULT_MODEL = 'llama-3.1-70b-versatile'
34
+ DEFAULT_MAX_TOKENS = 2048
34
35
 
35
36
 
36
37
  class GroqClient(LLMClient):
37
38
  def __init__(self, config: LLMConfig | None = None, cache: bool = False):
38
39
  if config is None:
39
- config = LLMConfig()
40
+ config = LLMConfig(max_tokens=DEFAULT_MAX_TOKENS)
41
+ elif config.max_tokens is None:
42
+ config.max_tokens = DEFAULT_MAX_TOKENS
40
43
  super().__init__(config, cache)
44
+
41
45
  self.client = AsyncGroq(api_key=config.api_key)
42
46
 
43
47
  def get_embedder(self) -> typing.Any: