graphiti-core 0.3.3__tar.gz → 0.3.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of graphiti-core might be problematic. Click here for more details.

Files changed (43) hide show
  1. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/PKG-INFO +58 -38
  2. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/README.md +57 -37
  3. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/edges.py +72 -7
  4. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/graphiti.py +16 -5
  5. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/llm_client/anthropic_client.py +5 -1
  6. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/llm_client/client.py +1 -1
  7. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/llm_client/config.py +1 -1
  8. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/llm_client/groq_client.py +5 -1
  9. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/llm_client/openai_client.py +39 -2
  10. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/nodes.py +61 -11
  11. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/utils/maintenance/community_operations.py +105 -34
  12. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/utils/maintenance/node_operations.py +6 -3
  13. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/pyproject.toml +1 -1
  14. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/LICENSE +0 -0
  15. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/__init__.py +0 -0
  16. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/errors.py +0 -0
  17. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/helpers.py +0 -0
  18. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/llm_client/__init__.py +0 -0
  19. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/llm_client/errors.py +0 -0
  20. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/llm_client/utils.py +0 -0
  21. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/prompts/__init__.py +0 -0
  22. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/prompts/dedupe_edges.py +0 -0
  23. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/prompts/dedupe_nodes.py +0 -0
  24. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/prompts/extract_edge_dates.py +0 -0
  25. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/prompts/extract_edges.py +0 -0
  26. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/prompts/extract_nodes.py +0 -0
  27. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/prompts/invalidate_edges.py +0 -0
  28. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/prompts/lib.py +0 -0
  29. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/prompts/models.py +0 -0
  30. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/prompts/summarize_nodes.py +0 -0
  31. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/py.typed +0 -0
  32. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/search/__init__.py +0 -0
  33. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/search/search.py +0 -0
  34. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/search/search_config.py +0 -0
  35. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/search/search_config_recipes.py +0 -0
  36. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/search/search_utils.py +0 -0
  37. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/utils/__init__.py +0 -0
  38. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/utils/bulk_utils.py +0 -0
  39. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/utils/maintenance/__init__.py +0 -0
  40. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/utils/maintenance/edge_operations.py +0 -0
  41. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/utils/maintenance/graph_data_operations.py +0 -0
  42. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/utils/maintenance/temporal_operations.py +0 -0
  43. {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/utils/maintenance/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: graphiti-core
3
- Version: 0.3.3
3
+ Version: 0.3.4
4
4
  Summary: A temporal graph building library
5
5
  License: Apache-2.0
6
6
  Author: Paul Paliychuk
@@ -21,7 +21,7 @@ Description-Content-Type: text/markdown
21
21
 
22
22
  <div align="center">
23
23
 
24
- # Graphiti
24
+ <img width="350" alt="Graphiti-ts-small" src="https://github.com/user-attachments/assets/bbd02947-e435-4a05-b25a-bbbac36d52c8">
25
25
 
26
26
  ## Temporal Knowledge Graphs for Agentic Applications
27
27
 
@@ -37,7 +37,9 @@ Description-Content-Type: text/markdown
37
37
 
38
38
  </div>
39
39
 
40
- Graphiti builds dynamic, temporally aware Knowledge Graphs that represent complex, evolving relationships between entities over time. Graphiti ingests both unstructured and structured data, and the resulting graph may be queried using a fusion of time, full-text, semantic, and graph algorithm approaches.
40
+ Graphiti builds dynamic, temporally aware Knowledge Graphs that represent complex, evolving relationships between
41
+ entities over time. Graphiti ingests both unstructured and structured data, and the resulting graph may be queried using
42
+ a fusion of time, full-text, semantic, and graph algorithm approaches.
41
43
 
42
44
  <br />
43
45
 
@@ -47,25 +49,39 @@ Graphiti builds dynamic, temporally aware Knowledge Graphs that represent comple
47
49
 
48
50
  <br />
49
51
 
50
- Graphiti helps you create and query Knowledge Graphs that evolve over time. A knowledge graph is a network of interconnected facts, such as _“Kendra loves Adidas shoes.”_ Each fact is a “triplet” represented by two entities, or nodes (_”Kendra”_, _“Adidas shoes”_), and their relationship, or edge (_”loves”_). Knowledge Graphs have been explored extensively for information retrieval. What makes Graphiti unique is its ability to autonomously build a knowledge graph while handling changing relationships and maintaining historical context.
52
+ Graphiti helps you create and query Knowledge Graphs that evolve over time. A knowledge graph is a network of
53
+ interconnected facts, such as _“Kendra loves Adidas shoes.”_ Each fact is a “triplet” represented by two entities, or
54
+ nodes (_”Kendra”_, _“Adidas shoes”_), and their relationship, or edge (_”loves”_). Knowledge Graphs have been explored
55
+ extensively for information retrieval. What makes Graphiti unique is its ability to autonomously build a knowledge graph
56
+ while handling changing relationships and maintaining historical context.
51
57
 
52
58
  With Graphiti, you can build LLM applications such as:
53
59
 
54
- - Assistants that learn from user interactions, fusing personal knowledge with dynamic data from business systems like CRMs and billing platforms.
60
+ - Assistants that learn from user interactions, fusing personal knowledge with dynamic data from business systems like
61
+ CRMs and billing platforms.
55
62
  - Agents that autonomously execute complex tasks, reasoning with state changes from multiple dynamic sources.
56
63
 
57
- Graphiti supports a wide range of applications in sales, customer service, health, finance, and more, enabling long-term recall and state-based reasoning for both assistants and agents.
64
+ Graphiti supports a wide range of applications in sales, customer service, health, finance, and more, enabling long-term
65
+ recall and state-based reasoning for both assistants and agents.
58
66
 
59
67
  ## Why Graphiti?
60
68
 
61
- We were intrigued by Microsoft’s GraphRAG, which expanded on RAG text chunking by using a graph to better model a document corpus and making this representation available via semantic and graph search techniques. However, GraphRAG did not address our core problem: It's primarily designed for static documents and doesn't inherently handle temporal aspects of data.
62
-
63
- Graphiti is designed from the ground up to handle constantly changing information, hybrid semantic and graph search, and scale:
64
-
65
- - **Temporal Awareness:** Tracks changes in facts and relationships over time, enabling point-in-time queries. Graph edges include temporal metadata to record relationship lifecycles.
66
- - **Episodic Processing:** Ingests data as discrete episodes, maintaining data provenance and allowing incremental entity and relationship extraction.
67
- - **Hybrid Search:** Combines semantic and BM25 full-text search, with the ability to rerank results by distance from a central node e.g. “Kendra”.
68
- - **Scalable:** Designed for processing large datasets, with parallelization of LLM calls for bulk processing while preserving the chronology of events.
69
+ We were intrigued by Microsoft’s GraphRAG, which expanded on RAG text chunking by using a graph to better model a
70
+ document corpus and making this representation available via semantic and graph search techniques. However, GraphRAG did
71
+ not address our core problem: It's primarily designed for static documents and doesn't inherently handle temporal
72
+ aspects of data.
73
+
74
+ Graphiti is designed from the ground up to handle constantly changing information, hybrid semantic and graph search, and
75
+ scale:
76
+
77
+ - **Temporal Awareness:** Tracks changes in facts and relationships over time, enabling point-in-time queries. Graph
78
+ edges include temporal metadata to record relationship lifecycles.
79
+ - **Episodic Processing:** Ingests data as discrete episodes, maintaining data provenance and allowing incremental
80
+ entity and relationship extraction.
81
+ - **Hybrid Search:** Combines semantic and BM25 full-text search, with the ability to rerank results by distance from a
82
+ central node e.g. “Kendra”.
83
+ - **Scalable:** Designed for processing large datasets, with parallelization of LLM calls for bulk processing while
84
+ preserving the chronology of events.
69
85
  - **Supports Varied Sources:** Can ingest both unstructured text and structured JSON data.
70
86
 
71
87
  <p align="center">
@@ -91,7 +107,8 @@ Optional:
91
107
  - Anthropic or Groq API key (for alternative LLM providers)
92
108
 
93
109
  > [!TIP]
94
- > The simplest way to install Neo4j is via [Neo4j Desktop](https://neo4j.com/download/). It provides a user-friendly interface to manage Neo4j instances and databases.
110
+ > The simplest way to install Neo4j is via [Neo4j Desktop](https://neo4j.com/download/). It provides a user-friendly
111
+ > interface to manage Neo4j instances and databases.
95
112
 
96
113
  ```bash
97
114
  pip install graphiti-core
@@ -106,7 +123,8 @@ poetry add graphiti-core
106
123
  ## Quick Start
107
124
 
108
125
  > [!IMPORTANT]
109
- > Graphiti uses OpenAI for LLM inference and embedding. Ensure that an `OPENAI_API_KEY` is set in your environment. Support for Anthropic and Groq LLM inferences is available, too.
126
+ > Graphiti uses OpenAI for LLM inference and embedding. Ensure that an `OPENAI_API_KEY` is set in your environment.
127
+ > Support for Anthropic and Groq LLM inferences is available, too.
110
128
 
111
129
  ```python
112
130
  from graphiti_core import Graphiti
@@ -140,25 +158,25 @@ for i, episode in enumerate(episodes):
140
158
  results = await graphiti.search('Who was the California Attorney General?')
141
159
  [
142
160
  EntityEdge(
143
- │ uuid='3133258f738e487383f07b04e15d4ac0',
144
- │ source_node_uuid='2a85789b318d4e418050506879906e62',
145
- │ target_node_uuid='baf7781f445945989d6e4f927f881556',
146
- │ created_at=datetime.datetime(2024, 8, 26, 13, 13, 24, 861097),
147
- │ name='HELD_POSITION',
148
- # the fact reflects the updated state that Harris is
149
- # no longer the AG of California
150
- │ fact='Kamala Harris was the Attorney General of California',
151
- │ fact_embedding=[
152
- │ │ -0.009955154731869698,
153
- │ ...
154
- │ │ 0.00784289836883545
155
- ],
156
- │ episodes=['b43e98ad0a904088a76c67985caecc22'],
157
- │ expired_at=datetime.datetime(2024, 8, 26, 20, 18, 1, 53812),
158
- # These dates represent the date this edge was true.
159
- │ valid_at=datetime.datetime(2011, 1, 3, 0, 0, tzinfo=<UTC>),
160
- │ invalid_at=datetime.datetime(2017, 1, 3, 0, 0, tzinfo=<UTC>)
161
- )
161
+ │ uuid = '3133258f738e487383f07b04e15d4ac0',
162
+ │ source_node_uuid = '2a85789b318d4e418050506879906e62',
163
+ │ target_node_uuid = 'baf7781f445945989d6e4f927f881556',
164
+ │ created_at = datetime.datetime(2024, 8, 26, 13, 13, 24, 861097),
165
+ │ name = 'HELD_POSITION',
166
+ # the fact reflects the updated state that Harris is
167
+ # no longer the AG of California
168
+ │ fact = 'Kamala Harris was the Attorney General of California',
169
+ │ fact_embedding = [
170
+ │ │ -0.009955154731869698,
171
+ │ ...
172
+ │ │ 0.00784289836883545
173
+ │],
174
+ │ episodes = ['b43e98ad0a904088a76c67985caecc22'],
175
+ │ expired_at = datetime.datetime(2024, 8, 26, 20, 18, 1, 53812),
176
+ # These dates represent the date this edge was true.
177
+ │ valid_at = datetime.datetime(2011, 1, 3, 0, 0, tzinfo= < UTC >),
178
+ │ invalid_at = datetime.datetime(2017, 1, 3, 0, 0, tzinfo= < UTC >)
179
+ )
162
180
  ]
163
181
 
164
182
  # Rerank search results based on graph distance
@@ -191,14 +209,16 @@ Graphiti is under active development. We aim to maintain API stability while wor
191
209
  - [ ] Achieving good performance with different LLM and embedding models
192
210
  - [ ] Creating a dedicated embedder interface
193
211
  - [ ] Supporting custom graph schemas:
194
- - Allow developers to provide their own defined node and edge classes when ingesting episodes
195
- - Enable more flexible knowledge representation tailored to specific use cases
212
+ - Allow developers to provide their own defined node and edge classes when ingesting episodes
213
+ - Enable more flexible knowledge representation tailored to specific use cases
196
214
  - [ ] Enhancing retrieval capabilities with more robust and configurable options
197
215
  - [ ] Expanding test coverage to ensure reliability and catch edge cases
198
216
 
199
217
  ## Contributing
200
218
 
201
- We encourage and appreciate all forms of contributions, whether it's code, documentation, addressing GitHub Issues, or answering questions in the Graphiti Discord channel. For detailed guidelines on code contributions, please refer to [CONTRIBUTING](CONTRIBUTING.md).
219
+ We encourage and appreciate all forms of contributions, whether it's code, documentation, addressing GitHub Issues, or
220
+ answering questions in the Graphiti Discord channel. For detailed guidelines on code contributions, please refer
221
+ to [CONTRIBUTING](CONTRIBUTING.md).
202
222
 
203
223
  ## Support
204
224
 
@@ -1,6 +1,6 @@
1
1
  <div align="center">
2
2
 
3
- # Graphiti
3
+ <img width="350" alt="Graphiti-ts-small" src="https://github.com/user-attachments/assets/bbd02947-e435-4a05-b25a-bbbac36d52c8">
4
4
 
5
5
  ## Temporal Knowledge Graphs for Agentic Applications
6
6
 
@@ -16,7 +16,9 @@
16
16
 
17
17
  </div>
18
18
 
19
- Graphiti builds dynamic, temporally aware Knowledge Graphs that represent complex, evolving relationships between entities over time. Graphiti ingests both unstructured and structured data, and the resulting graph may be queried using a fusion of time, full-text, semantic, and graph algorithm approaches.
19
+ Graphiti builds dynamic, temporally aware Knowledge Graphs that represent complex, evolving relationships between
20
+ entities over time. Graphiti ingests both unstructured and structured data, and the resulting graph may be queried using
21
+ a fusion of time, full-text, semantic, and graph algorithm approaches.
20
22
 
21
23
  <br />
22
24
 
@@ -26,25 +28,39 @@ Graphiti builds dynamic, temporally aware Knowledge Graphs that represent comple
26
28
 
27
29
  <br />
28
30
 
29
- Graphiti helps you create and query Knowledge Graphs that evolve over time. A knowledge graph is a network of interconnected facts, such as _“Kendra loves Adidas shoes.”_ Each fact is a “triplet” represented by two entities, or nodes (_”Kendra”_, _“Adidas shoes”_), and their relationship, or edge (_”loves”_). Knowledge Graphs have been explored extensively for information retrieval. What makes Graphiti unique is its ability to autonomously build a knowledge graph while handling changing relationships and maintaining historical context.
31
+ Graphiti helps you create and query Knowledge Graphs that evolve over time. A knowledge graph is a network of
32
+ interconnected facts, such as _“Kendra loves Adidas shoes.”_ Each fact is a “triplet” represented by two entities, or
33
+ nodes (_”Kendra”_, _“Adidas shoes”_), and their relationship, or edge (_”loves”_). Knowledge Graphs have been explored
34
+ extensively for information retrieval. What makes Graphiti unique is its ability to autonomously build a knowledge graph
35
+ while handling changing relationships and maintaining historical context.
30
36
 
31
37
  With Graphiti, you can build LLM applications such as:
32
38
 
33
- - Assistants that learn from user interactions, fusing personal knowledge with dynamic data from business systems like CRMs and billing platforms.
39
+ - Assistants that learn from user interactions, fusing personal knowledge with dynamic data from business systems like
40
+ CRMs and billing platforms.
34
41
  - Agents that autonomously execute complex tasks, reasoning with state changes from multiple dynamic sources.
35
42
 
36
- Graphiti supports a wide range of applications in sales, customer service, health, finance, and more, enabling long-term recall and state-based reasoning for both assistants and agents.
43
+ Graphiti supports a wide range of applications in sales, customer service, health, finance, and more, enabling long-term
44
+ recall and state-based reasoning for both assistants and agents.
37
45
 
38
46
  ## Why Graphiti?
39
47
 
40
- We were intrigued by Microsoft’s GraphRAG, which expanded on RAG text chunking by using a graph to better model a document corpus and making this representation available via semantic and graph search techniques. However, GraphRAG did not address our core problem: It's primarily designed for static documents and doesn't inherently handle temporal aspects of data.
41
-
42
- Graphiti is designed from the ground up to handle constantly changing information, hybrid semantic and graph search, and scale:
43
-
44
- - **Temporal Awareness:** Tracks changes in facts and relationships over time, enabling point-in-time queries. Graph edges include temporal metadata to record relationship lifecycles.
45
- - **Episodic Processing:** Ingests data as discrete episodes, maintaining data provenance and allowing incremental entity and relationship extraction.
46
- - **Hybrid Search:** Combines semantic and BM25 full-text search, with the ability to rerank results by distance from a central node e.g. “Kendra”.
47
- - **Scalable:** Designed for processing large datasets, with parallelization of LLM calls for bulk processing while preserving the chronology of events.
48
+ We were intrigued by Microsoft’s GraphRAG, which expanded on RAG text chunking by using a graph to better model a
49
+ document corpus and making this representation available via semantic and graph search techniques. However, GraphRAG did
50
+ not address our core problem: It's primarily designed for static documents and doesn't inherently handle temporal
51
+ aspects of data.
52
+
53
+ Graphiti is designed from the ground up to handle constantly changing information, hybrid semantic and graph search, and
54
+ scale:
55
+
56
+ - **Temporal Awareness:** Tracks changes in facts and relationships over time, enabling point-in-time queries. Graph
57
+ edges include temporal metadata to record relationship lifecycles.
58
+ - **Episodic Processing:** Ingests data as discrete episodes, maintaining data provenance and allowing incremental
59
+ entity and relationship extraction.
60
+ - **Hybrid Search:** Combines semantic and BM25 full-text search, with the ability to rerank results by distance from a
61
+ central node e.g. “Kendra”.
62
+ - **Scalable:** Designed for processing large datasets, with parallelization of LLM calls for bulk processing while
63
+ preserving the chronology of events.
48
64
  - **Supports Varied Sources:** Can ingest both unstructured text and structured JSON data.
49
65
 
50
66
  <p align="center">
@@ -70,7 +86,8 @@ Optional:
70
86
  - Anthropic or Groq API key (for alternative LLM providers)
71
87
 
72
88
  > [!TIP]
73
- > The simplest way to install Neo4j is via [Neo4j Desktop](https://neo4j.com/download/). It provides a user-friendly interface to manage Neo4j instances and databases.
89
+ > The simplest way to install Neo4j is via [Neo4j Desktop](https://neo4j.com/download/). It provides a user-friendly
90
+ > interface to manage Neo4j instances and databases.
74
91
 
75
92
  ```bash
76
93
  pip install graphiti-core
@@ -85,7 +102,8 @@ poetry add graphiti-core
85
102
  ## Quick Start
86
103
 
87
104
  > [!IMPORTANT]
88
- > Graphiti uses OpenAI for LLM inference and embedding. Ensure that an `OPENAI_API_KEY` is set in your environment. Support for Anthropic and Groq LLM inferences is available, too.
105
+ > Graphiti uses OpenAI for LLM inference and embedding. Ensure that an `OPENAI_API_KEY` is set in your environment.
106
+ > Support for Anthropic and Groq LLM inferences is available, too.
89
107
 
90
108
  ```python
91
109
  from graphiti_core import Graphiti
@@ -119,25 +137,25 @@ for i, episode in enumerate(episodes):
119
137
  results = await graphiti.search('Who was the California Attorney General?')
120
138
  [
121
139
  EntityEdge(
122
- │ uuid='3133258f738e487383f07b04e15d4ac0',
123
- │ source_node_uuid='2a85789b318d4e418050506879906e62',
124
- │ target_node_uuid='baf7781f445945989d6e4f927f881556',
125
- │ created_at=datetime.datetime(2024, 8, 26, 13, 13, 24, 861097),
126
- │ name='HELD_POSITION',
127
- # the fact reflects the updated state that Harris is
128
- # no longer the AG of California
129
- │ fact='Kamala Harris was the Attorney General of California',
130
- │ fact_embedding=[
131
- │ │ -0.009955154731869698,
132
- │ ...
133
- │ │ 0.00784289836883545
134
- ],
135
- │ episodes=['b43e98ad0a904088a76c67985caecc22'],
136
- │ expired_at=datetime.datetime(2024, 8, 26, 20, 18, 1, 53812),
137
- # These dates represent the date this edge was true.
138
- │ valid_at=datetime.datetime(2011, 1, 3, 0, 0, tzinfo=<UTC>),
139
- │ invalid_at=datetime.datetime(2017, 1, 3, 0, 0, tzinfo=<UTC>)
140
- )
140
+ │ uuid = '3133258f738e487383f07b04e15d4ac0',
141
+ │ source_node_uuid = '2a85789b318d4e418050506879906e62',
142
+ │ target_node_uuid = 'baf7781f445945989d6e4f927f881556',
143
+ │ created_at = datetime.datetime(2024, 8, 26, 13, 13, 24, 861097),
144
+ │ name = 'HELD_POSITION',
145
+ # the fact reflects the updated state that Harris is
146
+ # no longer the AG of California
147
+ │ fact = 'Kamala Harris was the Attorney General of California',
148
+ │ fact_embedding = [
149
+ │ │ -0.009955154731869698,
150
+ │ ...
151
+ │ │ 0.00784289836883545
152
+ │],
153
+ │ episodes = ['b43e98ad0a904088a76c67985caecc22'],
154
+ │ expired_at = datetime.datetime(2024, 8, 26, 20, 18, 1, 53812),
155
+ # These dates represent the date this edge was true.
156
+ │ valid_at = datetime.datetime(2011, 1, 3, 0, 0, tzinfo= < UTC >),
157
+ │ invalid_at = datetime.datetime(2017, 1, 3, 0, 0, tzinfo= < UTC >)
158
+ )
141
159
  ]
142
160
 
143
161
  # Rerank search results based on graph distance
@@ -170,14 +188,16 @@ Graphiti is under active development. We aim to maintain API stability while wor
170
188
  - [ ] Achieving good performance with different LLM and embedding models
171
189
  - [ ] Creating a dedicated embedder interface
172
190
  - [ ] Supporting custom graph schemas:
173
- - Allow developers to provide their own defined node and edge classes when ingesting episodes
174
- - Enable more flexible knowledge representation tailored to specific use cases
191
+ - Allow developers to provide their own defined node and edge classes when ingesting episodes
192
+ - Enable more flexible knowledge representation tailored to specific use cases
175
193
  - [ ] Enhancing retrieval capabilities with more robust and configurable options
176
194
  - [ ] Expanding test coverage to ensure reliability and catch edge cases
177
195
 
178
196
  ## Contributing
179
197
 
180
- We encourage and appreciate all forms of contributions, whether it's code, documentation, addressing GitHub Issues, or answering questions in the Graphiti Discord channel. For detailed guidelines on code contributions, please refer to [CONTRIBUTING](CONTRIBUTING.md).
198
+ We encourage and appreciate all forms of contributions, whether it's code, documentation, addressing GitHub Issues, or
199
+ answering questions in the Graphiti Discord channel. For detailed guidelines on code contributions, please refer
200
+ to [CONTRIBUTING](CONTRIBUTING.md).
181
201
 
182
202
  ## Support
183
203
 
@@ -104,7 +104,6 @@ class EpisodicEdge(Edge):
104
104
 
105
105
  edges = [get_episodic_edge_from_record(record) for record in records]
106
106
 
107
- logger.info(f'Found Edge: {uuid}')
108
107
  if len(edges) == 0:
109
108
  raise EdgeNotFoundError(uuid)
110
109
  return edges[0]
@@ -127,7 +126,29 @@ class EpisodicEdge(Edge):
127
126
 
128
127
  edges = [get_episodic_edge_from_record(record) for record in records]
129
128
 
130
- logger.info(f'Found Edges: {uuids}')
129
+ if len(edges) == 0:
130
+ raise EdgeNotFoundError(uuids[0])
131
+ return edges
132
+
133
+ @classmethod
134
+ async def get_by_group_ids(cls, driver: AsyncDriver, group_ids: list[str | None]):
135
+ records, _, _ = await driver.execute_query(
136
+ """
137
+ MATCH (n:Episodic)-[e:MENTIONS]->(m:Entity)
138
+ WHERE e.group_id IN $group_ids
139
+ RETURN
140
+ e.uuid As uuid,
141
+ e.group_id AS group_id,
142
+ n.uuid AS source_node_uuid,
143
+ m.uuid AS target_node_uuid,
144
+ e.created_at AS created_at
145
+ """,
146
+ group_ids=group_ids,
147
+ )
148
+
149
+ edges = [get_episodic_edge_from_record(record) for record in records]
150
+ uuids = [edge.uuid for edge in edges]
151
+
131
152
  if len(edges) == 0:
132
153
  raise EdgeNotFoundError(uuids[0])
133
154
  return edges
@@ -215,7 +236,6 @@ class EntityEdge(Edge):
215
236
 
216
237
  edges = [get_entity_edge_from_record(record) for record in records]
217
238
 
218
- logger.info(f'Found Edge: {uuid}')
219
239
  if len(edges) == 0:
220
240
  raise EdgeNotFoundError(uuid)
221
241
  return edges[0]
@@ -245,7 +265,36 @@ class EntityEdge(Edge):
245
265
 
246
266
  edges = [get_entity_edge_from_record(record) for record in records]
247
267
 
248
- logger.info(f'Found Edges: {uuids}')
268
+ if len(edges) == 0:
269
+ raise EdgeNotFoundError(uuids[0])
270
+ return edges
271
+
272
+ @classmethod
273
+ async def get_by_group_ids(cls, driver: AsyncDriver, group_ids: list[str | None]):
274
+ records, _, _ = await driver.execute_query(
275
+ """
276
+ MATCH (n:Entity)-[e:RELATES_TO]->(m:Entity)
277
+ WHERE e.group_id IN $group_ids
278
+ RETURN
279
+ e.uuid AS uuid,
280
+ n.uuid AS source_node_uuid,
281
+ m.uuid AS target_node_uuid,
282
+ e.created_at AS created_at,
283
+ e.name AS name,
284
+ e.group_id AS group_id,
285
+ e.fact AS fact,
286
+ e.fact_embedding AS fact_embedding,
287
+ e.episodes AS episodes,
288
+ e.expired_at AS expired_at,
289
+ e.valid_at AS valid_at,
290
+ e.invalid_at AS invalid_at
291
+ """,
292
+ group_ids=group_ids,
293
+ )
294
+
295
+ edges = [get_entity_edge_from_record(record) for record in records]
296
+ uuids = [edge.uuid for edge in edges]
297
+
249
298
  if len(edges) == 0:
250
299
  raise EdgeNotFoundError(uuids[0])
251
300
  return edges
@@ -288,8 +337,6 @@ class CommunityEdge(Edge):
288
337
 
289
338
  edges = [get_community_edge_from_record(record) for record in records]
290
339
 
291
- logger.info(f'Found Edge: {uuid}')
292
-
293
340
  return edges[0]
294
341
 
295
342
  @classmethod
@@ -310,7 +357,25 @@ class CommunityEdge(Edge):
310
357
 
311
358
  edges = [get_community_edge_from_record(record) for record in records]
312
359
 
313
- logger.info(f'Found Edges: {uuids}')
360
+ return edges
361
+
362
+ @classmethod
363
+ async def get_by_group_ids(cls, driver: AsyncDriver, group_ids: list[str | None]):
364
+ records, _, _ = await driver.execute_query(
365
+ """
366
+ MATCH (n:Community)-[e:HAS_MEMBER]->(m:Entity | Community)
367
+ WHERE e.group_id IN $group_ids
368
+ RETURN
369
+ e.uuid As uuid,
370
+ e.group_id AS group_id,
371
+ n.uuid AS source_node_uuid,
372
+ m.uuid AS target_node_uuid,
373
+ e.created_at AS created_at
374
+ """,
375
+ group_ids=group_ids,
376
+ )
377
+
378
+ edges = [get_community_edge_from_record(record) for record in records]
314
379
 
315
380
  return edges
316
381
 
@@ -77,7 +77,14 @@ load_dotenv()
77
77
 
78
78
 
79
79
  class Graphiti:
80
- def __init__(self, uri: str, user: str, password: str, llm_client: LLMClient | None = None):
80
+ def __init__(
81
+ self,
82
+ uri: str,
83
+ user: str,
84
+ password: str,
85
+ llm_client: LLMClient | None = None,
86
+ store_raw_episode_content: bool = True,
87
+ ):
81
88
  """
82
89
  Initialize a Graphiti instance.
83
90
 
@@ -116,6 +123,7 @@ class Graphiti:
116
123
  """
117
124
  self.driver = AsyncGraphDatabase.driver(uri, auth=(user, password))
118
125
  self.database = 'neo4j'
126
+ self.store_raw_episode_content = store_raw_episode_content
119
127
  if llm_client:
120
128
  self.llm_client = llm_client
121
129
  else:
@@ -150,8 +158,8 @@ class Graphiti:
150
158
  # Use graphiti...
151
159
  finally:
152
160
  graphiti.close()
153
- self.driver.close()
154
161
  """
162
+ self.driver.close()
155
163
 
156
164
  async def build_indices_and_constraints(self):
157
165
  """
@@ -251,6 +259,8 @@ class Graphiti:
251
259
  An id for the graph partition the episode is a part of.
252
260
  uuid : str | None
253
261
  Optional uuid of the episode.
262
+ update_communities : bool
263
+ Optional. Whether to update communities with new node information
254
264
 
255
265
  Returns
256
266
  -------
@@ -276,7 +286,6 @@ class Graphiti:
276
286
  try:
277
287
  start = time()
278
288
 
279
- nodes: list[EntityNode] = []
280
289
  entity_edges: list[EntityEdge] = []
281
290
  embedder = self.llm_client.get_embedder()
282
291
  now = datetime.now()
@@ -295,6 +304,8 @@ class Graphiti:
295
304
  valid_at=reference_time,
296
305
  )
297
306
  episode.uuid = uuid if uuid is not None else episode.uuid
307
+ if not self.store_raw_episode_content:
308
+ episode.content = ''
298
309
 
299
310
  # Extract entities as nodes
300
311
 
@@ -323,7 +334,7 @@ class Graphiti:
323
334
  ),
324
335
  )
325
336
  logger.info(f'Adjusted mentioned nodes: {[(n.name, n.uuid) for n in mentioned_nodes]}')
326
- nodes.extend(mentioned_nodes)
337
+ nodes = mentioned_nodes
327
338
 
328
339
  extracted_edges_with_resolved_pointers = resolve_edge_pointers(
329
340
  extracted_edges, uuid_map
@@ -568,7 +579,7 @@ class Graphiti:
568
579
  center_node_uuid: str | None = None,
569
580
  group_ids: list[str | None] | None = None,
570
581
  num_results=DEFAULT_SEARCH_LIMIT,
571
- ):
582
+ ) -> list[EntityEdge]:
572
583
  """
573
584
  Perform a hybrid search on the knowledge graph.
574
585
 
@@ -30,13 +30,17 @@ from .errors import RateLimitError
30
30
  logger = logging.getLogger(__name__)
31
31
 
32
32
  DEFAULT_MODEL = 'claude-3-5-sonnet-20240620'
33
+ DEFAULT_MAX_TOKENS = 8192
33
34
 
34
35
 
35
36
  class AnthropicClient(LLMClient):
36
37
  def __init__(self, config: LLMConfig | None = None, cache: bool = False):
37
38
  if config is None:
38
- config = LLMConfig()
39
+ config = LLMConfig(max_tokens=DEFAULT_MAX_TOKENS)
40
+ elif config.max_tokens is None:
41
+ config.max_tokens = DEFAULT_MAX_TOKENS
39
42
  super().__init__(config, cache)
43
+
40
44
  self.client = AsyncAnthropic(
41
45
  api_key=config.api_key,
42
46
  # we'll use tenacity to retry
@@ -35,7 +35,7 @@ logger = logging.getLogger(__name__)
35
35
 
36
36
 
37
37
  def is_server_or_retry_error(exception):
38
- if isinstance(exception, RateLimitError):
38
+ if isinstance(exception, (RateLimitError, json.decoder.JSONDecodeError)):
39
39
  return True
40
40
 
41
41
  return (
@@ -15,7 +15,7 @@ limitations under the License.
15
15
  """
16
16
 
17
17
  EMBEDDING_DIM = 1024
18
- DEFAULT_MAX_TOKENS = 4096
18
+ DEFAULT_MAX_TOKENS = 16384
19
19
  DEFAULT_TEMPERATURE = 0
20
20
 
21
21
 
@@ -31,13 +31,17 @@ from .errors import RateLimitError
31
31
  logger = logging.getLogger(__name__)
32
32
 
33
33
  DEFAULT_MODEL = 'llama-3.1-70b-versatile'
34
+ DEFAULT_MAX_TOKENS = 2048
34
35
 
35
36
 
36
37
  class GroqClient(LLMClient):
37
38
  def __init__(self, config: LLMConfig | None = None, cache: bool = False):
38
39
  if config is None:
39
- config = LLMConfig()
40
+ config = LLMConfig(max_tokens=DEFAULT_MAX_TOKENS)
41
+ elif config.max_tokens is None:
42
+ config.max_tokens = DEFAULT_MAX_TOKENS
40
43
  super().__init__(config, cache)
44
+
41
45
  self.client = AsyncGroq(api_key=config.api_key)
42
46
 
43
47
  def get_embedder(self) -> typing.Any:
@@ -33,13 +33,50 @@ DEFAULT_MODEL = 'gpt-4o-2024-08-06'
33
33
 
34
34
 
35
35
  class OpenAIClient(LLMClient):
36
- def __init__(self, config: LLMConfig | None = None, cache: bool = False):
36
+ """
37
+ OpenAIClient is a client class for interacting with OpenAI's language models.
38
+
39
+ This class extends the LLMClient and provides methods to initialize the client,
40
+ get an embedder, and generate responses from the language model.
41
+
42
+ Attributes:
43
+ client (AsyncOpenAI): The OpenAI client used to interact with the API.
44
+ model (str): The model name to use for generating responses.
45
+ temperature (float): The temperature to use for generating responses.
46
+ max_tokens (int): The maximum number of tokens to generate in a response.
47
+
48
+ Methods:
49
+ __init__(config: LLMConfig | None = None, cache: bool = False, client: typing.Any = None):
50
+ Initializes the OpenAIClient with the provided configuration, cache setting, and client.
51
+
52
+ get_embedder() -> typing.Any:
53
+ Returns the embedder from the OpenAI client.
54
+
55
+ _generate_response(messages: list[Message]) -> dict[str, typing.Any]:
56
+ Generates a response from the language model based on the provided messages.
57
+ """
58
+
59
+ def __init__(
60
+ self, config: LLMConfig | None = None, cache: bool = False, client: typing.Any = None
61
+ ):
62
+ """
63
+ Initialize the OpenAIClient with the provided configuration, cache setting, and client.
64
+
65
+ Args:
66
+ config (LLMConfig | None): The configuration for the LLM client, including API key, model, base URL, temperature, and max tokens.
67
+ cache (bool): Whether to use caching for responses. Defaults to False.
68
+ client (Any | None): An optional async client instance to use. If not provided, a new AsyncOpenAI client is created.
69
+
70
+ """
37
71
  if config is None:
38
72
  config = LLMConfig()
39
73
 
40
74
  super().__init__(config, cache)
41
75
 
42
- self.client = AsyncOpenAI(api_key=config.api_key, base_url=config.base_url)
76
+ if client is None:
77
+ self.client = AsyncOpenAI(api_key=config.api_key, base_url=config.base_url)
78
+ else:
79
+ self.client = client
43
80
 
44
81
  def get_embedder(self) -> typing.Any:
45
82
  return self.client.embeddings
@@ -158,8 +158,6 @@ class EpisodicNode(Node):
158
158
 
159
159
  episodes = [get_episodic_node_from_record(record) for record in records]
160
160
 
161
- logger.info(f'Found Node: {uuid}')
162
-
163
161
  if len(episodes) == 0:
164
162
  raise NodeNotFoundError(uuid)
165
163
 
@@ -185,7 +183,27 @@ class EpisodicNode(Node):
185
183
 
186
184
  episodes = [get_episodic_node_from_record(record) for record in records]
187
185
 
188
- logger.info(f'Found Nodes: {uuids}')
186
+ return episodes
187
+
188
+ @classmethod
189
+ async def get_by_group_ids(cls, driver: AsyncDriver, group_ids: list[str | None]):
190
+ records, _, _ = await driver.execute_query(
191
+ """
192
+ MATCH (e:Episodic) WHERE e.group_id IN $group_ids
193
+ RETURN DISTINCT
194
+ e.content AS content,
195
+ e.created_at AS created_at,
196
+ e.valid_at AS valid_at,
197
+ e.uuid AS uuid,
198
+ e.name AS name,
199
+ e.group_id AS group_id,
200
+ e.source_description AS source_description,
201
+ e.source AS source
202
+ """,
203
+ group_ids=group_ids,
204
+ )
205
+
206
+ episodes = [get_episodic_node_from_record(record) for record in records]
189
207
 
190
208
  return episodes
191
209
 
@@ -240,8 +258,6 @@ class EntityNode(Node):
240
258
 
241
259
  nodes = [get_entity_node_from_record(record) for record in records]
242
260
 
243
- logger.info(f'Found Node: {uuid}')
244
-
245
261
  return nodes[0]
246
262
 
247
263
  @classmethod
@@ -262,7 +278,25 @@ class EntityNode(Node):
262
278
 
263
279
  nodes = [get_entity_node_from_record(record) for record in records]
264
280
 
265
- logger.info(f'Found Nodes: {uuids}')
281
+ return nodes
282
+
283
+ @classmethod
284
+ async def get_by_group_ids(cls, driver: AsyncDriver, group_ids: list[str | None]):
285
+ records, _, _ = await driver.execute_query(
286
+ """
287
+ MATCH (n:Entity) WHERE n.group_id IN $group_ids
288
+ RETURN
289
+ n.uuid As uuid,
290
+ n.name AS name,
291
+ n.name_embedding AS name_embedding,
292
+ n.group_id AS group_id,
293
+ n.created_at AS created_at,
294
+ n.summary AS summary
295
+ """,
296
+ group_ids=group_ids,
297
+ )
298
+
299
+ nodes = [get_entity_node_from_record(record) for record in records]
266
300
 
267
301
  return nodes
268
302
 
@@ -317,8 +351,6 @@ class CommunityNode(Node):
317
351
 
318
352
  nodes = [get_community_node_from_record(record) for record in records]
319
353
 
320
- logger.info(f'Found Node: {uuid}')
321
-
322
354
  return nodes[0]
323
355
 
324
356
  @classmethod
@@ -337,11 +369,29 @@ class CommunityNode(Node):
337
369
  uuids=uuids,
338
370
  )
339
371
 
340
- nodes = [get_community_node_from_record(record) for record in records]
372
+ communities = [get_community_node_from_record(record) for record in records]
341
373
 
342
- logger.info(f'Found Nodes: {uuids}')
374
+ return communities
343
375
 
344
- return nodes
376
+ @classmethod
377
+ async def get_by_group_ids(cls, driver: AsyncDriver, group_ids: list[str | None]):
378
+ records, _, _ = await driver.execute_query(
379
+ """
380
+ MATCH (n:Community) WHERE n.group_id IN $group_ids
381
+ RETURN
382
+ n.uuid As uuid,
383
+ n.name AS name,
384
+ n.name_embedding AS name_embedding,
385
+ n.group_id AS group_id,
386
+ n.created_at AS created_at,
387
+ n.summary AS summary
388
+ """,
389
+ group_ids=group_ids,
390
+ )
391
+
392
+ communities = [get_community_node_from_record(record) for record in records]
393
+
394
+ return communities
345
395
 
346
396
 
347
397
  # Node helpers
@@ -4,6 +4,7 @@ from collections import defaultdict
4
4
  from datetime import datetime
5
5
 
6
6
  from neo4j import AsyncDriver
7
+ from pydantic import BaseModel
7
8
 
8
9
  from graphiti_core.edges import CommunityEdge
9
10
  from graphiti_core.llm_client import LLMClient
@@ -11,9 +12,17 @@ from graphiti_core.nodes import CommunityNode, EntityNode, get_community_node_fr
11
12
  from graphiti_core.prompts import prompt_library
12
13
  from graphiti_core.utils.maintenance.edge_operations import build_community_edges
13
14
 
15
+ MAX_COMMUNITY_BUILD_CONCURRENCY = 10
16
+
17
+
14
18
  logger = logging.getLogger(__name__)
15
19
 
16
20
 
21
+ class Neighbor(BaseModel):
22
+ node_uuid: str
23
+ edge_count: int
24
+
25
+
17
26
  async def build_community_projection(driver: AsyncDriver) -> str:
18
27
  records, _, _ = await driver.execute_query("""
19
28
  CALL gds.graph.project("communities", "Entity",
@@ -29,36 +38,96 @@ async def build_community_projection(driver: AsyncDriver) -> str:
29
38
  return records[0]['graph']
30
39
 
31
40
 
32
- async def destroy_projection(driver: AsyncDriver, projection_name: str):
33
- await driver.execute_query(
34
- """
35
- CALL gds.graph.drop($projection_name)
36
- """,
37
- projection_name=projection_name,
38
- )
41
+ async def get_community_clusters(driver: AsyncDriver) -> list[list[EntityNode]]:
42
+ community_clusters: list[list[EntityNode]] = []
39
43
 
40
-
41
- async def get_community_clusters(
42
- driver: AsyncDriver, projection_name: str
43
- ) -> list[list[EntityNode]]:
44
- records, _, _ = await driver.execute_query("""
45
- CALL gds.leiden.stream("communities")
46
- YIELD nodeId, communityId
47
- RETURN gds.util.asNode(nodeId).uuid AS entity_uuid, communityId
44
+ group_id_values, _, _ = await driver.execute_query("""
45
+ MATCH (n:Entity WHERE n.group_id IS NOT NULL)
46
+ RETURN
47
+ collect(DISTINCT n.group_id) AS group_ids
48
48
  """)
49
- community_map: dict[int, list[str]] = defaultdict(list)
50
- for record in records:
51
- community_map[record['communityId']].append(record['entity_uuid'])
52
49
 
53
- community_clusters: list[list[EntityNode]] = list(
54
- await asyncio.gather(
55
- *[EntityNode.get_by_uuids(driver, cluster) for cluster in community_map.values()]
50
+ group_ids = group_id_values[0]['group_ids']
51
+ for group_id in group_ids:
52
+ projection: dict[str, list[Neighbor]] = {}
53
+ nodes = await EntityNode.get_by_group_ids(driver, [group_id])
54
+ for node in nodes:
55
+ records, _, _ = await driver.execute_query(
56
+ """
57
+ MATCH (n:Entity {group_id: $group_id, uuid: $uuid})-[r:RELATES_TO]-(m: Entity {group_id: $group_id})
58
+ WITH count(r) AS count, m.uuid AS uuid
59
+ RETURN
60
+ uuid,
61
+ count
62
+ """,
63
+ uuid=node.uuid,
64
+ group_id=group_id,
65
+ )
66
+
67
+ projection[node.uuid] = [
68
+ Neighbor(node_uuid=record['uuid'], edge_count=record['count']) for record in records
69
+ ]
70
+
71
+ cluster_uuids = label_propagation(projection)
72
+
73
+ community_clusters.extend(
74
+ list(
75
+ await asyncio.gather(
76
+ *[EntityNode.get_by_uuids(driver, cluster) for cluster in cluster_uuids]
77
+ )
78
+ )
56
79
  )
57
- )
58
80
 
59
81
  return community_clusters
60
82
 
61
83
 
84
+ def label_propagation(projection: dict[str, list[Neighbor]]) -> list[list[str]]:
85
+ # Implement the label propagation community detection algorithm.
86
+ # 1. Start with each node being assigned its own community
87
+ # 2. Each node will take on the community of the plurality of its neighbors
88
+ # 3. Ties are broken by going to the largest community
89
+ # 4. Continue until no communities change during propagation
90
+
91
+ community_map = {uuid: i for i, uuid in enumerate(projection.keys())}
92
+
93
+ while True:
94
+ no_change = True
95
+ new_community_map: dict[str, int] = {}
96
+
97
+ for uuid, neighbors in projection.items():
98
+ curr_community = community_map[uuid]
99
+
100
+ community_candidates: dict[int, int] = defaultdict(int)
101
+ for neighbor in neighbors:
102
+ community_candidates[community_map[neighbor.node_uuid]] += neighbor.edge_count
103
+
104
+ community_lst = [
105
+ (count, community) for community, count in community_candidates.items()
106
+ ]
107
+
108
+ community_lst.sort(reverse=True)
109
+ community_candidate = community_lst[0][1] if len(community_lst) > 0 else -1
110
+
111
+ new_community = max(community_candidate, curr_community)
112
+
113
+ new_community_map[uuid] = new_community
114
+
115
+ if new_community != curr_community:
116
+ no_change = False
117
+
118
+ if no_change:
119
+ break
120
+
121
+ community_map = new_community_map
122
+
123
+ community_cluster_map = defaultdict(list)
124
+ for uuid, community in community_map.items():
125
+ community_cluster_map[community].append(uuid)
126
+
127
+ clusters = [cluster for cluster in community_cluster_map.values()]
128
+ return clusters
129
+
130
+
62
131
  async def summarize_pair(llm_client: LLMClient, summary_pair: tuple[str, str]) -> str:
63
132
  # Prepare context for LLM
64
133
  context = {'node_summaries': [{'summary': summary} for summary in summary_pair]}
@@ -85,7 +154,7 @@ async def generate_summary_description(llm_client: LLMClient, summary: str) -> s
85
154
 
86
155
 
87
156
  async def build_community(
88
- llm_client: LLMClient, community_cluster: list[EntityNode]
157
+ llm_client: LLMClient, community_cluster: list[EntityNode]
89
158
  ) -> tuple[CommunityNode, list[CommunityEdge]]:
90
159
  summaries = [entity.summary for entity in community_cluster]
91
160
  length = len(summaries)
@@ -99,7 +168,7 @@ async def build_community(
99
168
  *[
100
169
  summarize_pair(llm_client, (str(left_summary), str(right_summary)))
101
170
  for left_summary, right_summary in zip(
102
- summaries[: int(length / 2)], summaries[int(length / 2) :]
171
+ summaries[: int(length / 2)], summaries[int(length / 2):]
103
172
  )
104
173
  ]
105
174
  )
@@ -127,15 +196,18 @@ async def build_community(
127
196
 
128
197
 
129
198
  async def build_communities(
130
- driver: AsyncDriver, llm_client: LLMClient
199
+ driver: AsyncDriver, llm_client: LLMClient
131
200
  ) -> tuple[list[CommunityNode], list[CommunityEdge]]:
132
- projection = await build_community_projection(driver)
133
- community_clusters = await get_community_clusters(driver, projection)
201
+ community_clusters = await get_community_clusters(driver)
202
+
203
+ semaphore = asyncio.Semaphore(MAX_COMMUNITY_BUILD_CONCURRENCY)
204
+
205
+ async def limited_build_community(cluster):
206
+ async with semaphore:
207
+ return await build_community(llm_client, cluster)
134
208
 
135
209
  communities: list[tuple[CommunityNode, list[CommunityEdge]]] = list(
136
- await asyncio.gather(
137
- *[build_community(llm_client, cluster) for cluster in community_clusters]
138
- )
210
+ await asyncio.gather(*[limited_build_community(cluster) for cluster in community_clusters])
139
211
  )
140
212
 
141
213
  community_nodes: list[CommunityNode] = []
@@ -144,7 +216,6 @@ async def build_communities(
144
216
  community_nodes.append(community[0])
145
217
  community_edges.extend(community[1])
146
218
 
147
- await destroy_projection(driver, projection)
148
219
  return community_nodes, community_edges
149
220
 
150
221
 
@@ -156,7 +227,7 @@ async def remove_communities(driver: AsyncDriver):
156
227
 
157
228
 
158
229
  async def determine_entity_community(
159
- driver: AsyncDriver, entity: EntityNode
230
+ driver: AsyncDriver, entity: EntityNode
160
231
  ) -> tuple[CommunityNode | None, bool]:
161
232
  # Check if the node is already part of a community
162
233
  records, _, _ = await driver.execute_query(
@@ -217,7 +288,7 @@ async def determine_entity_community(
217
288
 
218
289
 
219
290
  async def update_community(
220
- driver: AsyncDriver, llm_client: LLMClient, embedder, entity: EntityNode
291
+ driver: AsyncDriver, llm_client: LLMClient, embedder, entity: EntityNode
221
292
  ):
222
293
  community, is_new = await determine_entity_community(driver, entity)
223
294
 
@@ -236,4 +307,4 @@ async def update_community(
236
307
 
237
308
  await community.generate_name_embedding(embedder)
238
309
 
239
- await community.save(driver)
310
+ await community.save(driver)
@@ -272,9 +272,12 @@ async def dedupe_node_list(
272
272
  unique_nodes = []
273
273
  uuid_map: dict[str, str] = {}
274
274
  for node_data in nodes_data:
275
- node = node_map[node_data['uuids'][0]]
276
- node.summary = node_data['summary']
277
- unique_nodes.append(node)
275
+ node_instance: EntityNode | None = node_map.get(node_data['uuids'][0])
276
+ if node_instance is None:
277
+ logger.warning(f'Node {node_data["uuids"][0]} not found in node map')
278
+ continue
279
+ node_instance.summary = node_data['summary']
280
+ unique_nodes.append(node_instance)
278
281
 
279
282
  for uuid in node_data['uuids'][1:]:
280
283
  uuid_value = node_map[node_data['uuids'][0]].uuid
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "graphiti-core"
3
- version = "0.3.3"
3
+ version = "0.3.4"
4
4
  description = "A temporal graph building library"
5
5
  authors = [
6
6
  "Paul Paliychuk <paul@getzep.com>",
File without changes