graphiti-core 0.3.3__tar.gz → 0.3.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of graphiti-core might be problematic. Click here for more details.
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/PKG-INFO +58 -38
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/README.md +57 -37
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/edges.py +72 -7
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/graphiti.py +16 -5
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/llm_client/anthropic_client.py +5 -1
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/llm_client/client.py +1 -1
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/llm_client/config.py +1 -1
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/llm_client/groq_client.py +5 -1
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/llm_client/openai_client.py +39 -2
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/nodes.py +61 -11
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/utils/maintenance/community_operations.py +105 -34
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/utils/maintenance/node_operations.py +6 -3
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/pyproject.toml +1 -1
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/LICENSE +0 -0
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/__init__.py +0 -0
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/errors.py +0 -0
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/helpers.py +0 -0
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/llm_client/__init__.py +0 -0
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/llm_client/errors.py +0 -0
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/llm_client/utils.py +0 -0
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/prompts/__init__.py +0 -0
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/prompts/dedupe_edges.py +0 -0
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/prompts/dedupe_nodes.py +0 -0
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/prompts/extract_edge_dates.py +0 -0
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/prompts/extract_edges.py +0 -0
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/prompts/extract_nodes.py +0 -0
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/prompts/invalidate_edges.py +0 -0
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/prompts/lib.py +0 -0
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/prompts/models.py +0 -0
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/prompts/summarize_nodes.py +0 -0
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/py.typed +0 -0
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/search/__init__.py +0 -0
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/search/search.py +0 -0
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/search/search_config.py +0 -0
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/search/search_config_recipes.py +0 -0
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/search/search_utils.py +0 -0
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/utils/__init__.py +0 -0
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/utils/bulk_utils.py +0 -0
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/utils/maintenance/__init__.py +0 -0
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/utils/maintenance/edge_operations.py +0 -0
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/utils/maintenance/graph_data_operations.py +0 -0
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/utils/maintenance/temporal_operations.py +0 -0
- {graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/utils/maintenance/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: graphiti-core
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.4
|
|
4
4
|
Summary: A temporal graph building library
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Author: Paul Paliychuk
|
|
@@ -21,7 +21,7 @@ Description-Content-Type: text/markdown
|
|
|
21
21
|
|
|
22
22
|
<div align="center">
|
|
23
23
|
|
|
24
|
-
|
|
24
|
+
<img width="350" alt="Graphiti-ts-small" src="https://github.com/user-attachments/assets/bbd02947-e435-4a05-b25a-bbbac36d52c8">
|
|
25
25
|
|
|
26
26
|
## Temporal Knowledge Graphs for Agentic Applications
|
|
27
27
|
|
|
@@ -37,7 +37,9 @@ Description-Content-Type: text/markdown
|
|
|
37
37
|
|
|
38
38
|
</div>
|
|
39
39
|
|
|
40
|
-
Graphiti builds dynamic, temporally aware Knowledge Graphs that represent complex, evolving relationships between
|
|
40
|
+
Graphiti builds dynamic, temporally aware Knowledge Graphs that represent complex, evolving relationships between
|
|
41
|
+
entities over time. Graphiti ingests both unstructured and structured data, and the resulting graph may be queried using
|
|
42
|
+
a fusion of time, full-text, semantic, and graph algorithm approaches.
|
|
41
43
|
|
|
42
44
|
<br />
|
|
43
45
|
|
|
@@ -47,25 +49,39 @@ Graphiti builds dynamic, temporally aware Knowledge Graphs that represent comple
|
|
|
47
49
|
|
|
48
50
|
<br />
|
|
49
51
|
|
|
50
|
-
Graphiti helps you create and query Knowledge Graphs that evolve over time. A knowledge graph is a network of
|
|
52
|
+
Graphiti helps you create and query Knowledge Graphs that evolve over time. A knowledge graph is a network of
|
|
53
|
+
interconnected facts, such as _“Kendra loves Adidas shoes.”_ Each fact is a “triplet” represented by two entities, or
|
|
54
|
+
nodes (_”Kendra”_, _“Adidas shoes”_), and their relationship, or edge (_”loves”_). Knowledge Graphs have been explored
|
|
55
|
+
extensively for information retrieval. What makes Graphiti unique is its ability to autonomously build a knowledge graph
|
|
56
|
+
while handling changing relationships and maintaining historical context.
|
|
51
57
|
|
|
52
58
|
With Graphiti, you can build LLM applications such as:
|
|
53
59
|
|
|
54
|
-
- Assistants that learn from user interactions, fusing personal knowledge with dynamic data from business systems like
|
|
60
|
+
- Assistants that learn from user interactions, fusing personal knowledge with dynamic data from business systems like
|
|
61
|
+
CRMs and billing platforms.
|
|
55
62
|
- Agents that autonomously execute complex tasks, reasoning with state changes from multiple dynamic sources.
|
|
56
63
|
|
|
57
|
-
Graphiti supports a wide range of applications in sales, customer service, health, finance, and more, enabling long-term
|
|
64
|
+
Graphiti supports a wide range of applications in sales, customer service, health, finance, and more, enabling long-term
|
|
65
|
+
recall and state-based reasoning for both assistants and agents.
|
|
58
66
|
|
|
59
67
|
## Why Graphiti?
|
|
60
68
|
|
|
61
|
-
We were intrigued by Microsoft’s GraphRAG, which expanded on RAG text chunking by using a graph to better model a
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
+
We were intrigued by Microsoft’s GraphRAG, which expanded on RAG text chunking by using a graph to better model a
|
|
70
|
+
document corpus and making this representation available via semantic and graph search techniques. However, GraphRAG did
|
|
71
|
+
not address our core problem: It's primarily designed for static documents and doesn't inherently handle temporal
|
|
72
|
+
aspects of data.
|
|
73
|
+
|
|
74
|
+
Graphiti is designed from the ground up to handle constantly changing information, hybrid semantic and graph search, and
|
|
75
|
+
scale:
|
|
76
|
+
|
|
77
|
+
- **Temporal Awareness:** Tracks changes in facts and relationships over time, enabling point-in-time queries. Graph
|
|
78
|
+
edges include temporal metadata to record relationship lifecycles.
|
|
79
|
+
- **Episodic Processing:** Ingests data as discrete episodes, maintaining data provenance and allowing incremental
|
|
80
|
+
entity and relationship extraction.
|
|
81
|
+
- **Hybrid Search:** Combines semantic and BM25 full-text search, with the ability to rerank results by distance from a
|
|
82
|
+
central node e.g. “Kendra”.
|
|
83
|
+
- **Scalable:** Designed for processing large datasets, with parallelization of LLM calls for bulk processing while
|
|
84
|
+
preserving the chronology of events.
|
|
69
85
|
- **Supports Varied Sources:** Can ingest both unstructured text and structured JSON data.
|
|
70
86
|
|
|
71
87
|
<p align="center">
|
|
@@ -91,7 +107,8 @@ Optional:
|
|
|
91
107
|
- Anthropic or Groq API key (for alternative LLM providers)
|
|
92
108
|
|
|
93
109
|
> [!TIP]
|
|
94
|
-
> The simplest way to install Neo4j is via [Neo4j Desktop](https://neo4j.com/download/). It provides a user-friendly
|
|
110
|
+
> The simplest way to install Neo4j is via [Neo4j Desktop](https://neo4j.com/download/). It provides a user-friendly
|
|
111
|
+
> interface to manage Neo4j instances and databases.
|
|
95
112
|
|
|
96
113
|
```bash
|
|
97
114
|
pip install graphiti-core
|
|
@@ -106,7 +123,8 @@ poetry add graphiti-core
|
|
|
106
123
|
## Quick Start
|
|
107
124
|
|
|
108
125
|
> [!IMPORTANT]
|
|
109
|
-
> Graphiti uses OpenAI for LLM inference and embedding. Ensure that an `OPENAI_API_KEY` is set in your environment.
|
|
126
|
+
> Graphiti uses OpenAI for LLM inference and embedding. Ensure that an `OPENAI_API_KEY` is set in your environment.
|
|
127
|
+
> Support for Anthropic and Groq LLM inferences is available, too.
|
|
110
128
|
|
|
111
129
|
```python
|
|
112
130
|
from graphiti_core import Graphiti
|
|
@@ -140,25 +158,25 @@ for i, episode in enumerate(episodes):
|
|
|
140
158
|
results = await graphiti.search('Who was the California Attorney General?')
|
|
141
159
|
[
|
|
142
160
|
EntityEdge(
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
161
|
+
│ uuid = '3133258f738e487383f07b04e15d4ac0',
|
|
162
|
+
│ source_node_uuid = '2a85789b318d4e418050506879906e62',
|
|
163
|
+
│ target_node_uuid = 'baf7781f445945989d6e4f927f881556',
|
|
164
|
+
│ created_at = datetime.datetime(2024, 8, 26, 13, 13, 24, 861097),
|
|
165
|
+
│ name = 'HELD_POSITION',
|
|
166
|
+
# the fact reflects the updated state that Harris is
|
|
167
|
+
# no longer the AG of California
|
|
168
|
+
│ fact = 'Kamala Harris was the Attorney General of California',
|
|
169
|
+
│ fact_embedding = [
|
|
170
|
+
│ │ -0.009955154731869698,
|
|
171
|
+
│ ...
|
|
172
|
+
│ │ 0.00784289836883545
|
|
173
|
+
│],
|
|
174
|
+
│ episodes = ['b43e98ad0a904088a76c67985caecc22'],
|
|
175
|
+
│ expired_at = datetime.datetime(2024, 8, 26, 20, 18, 1, 53812),
|
|
176
|
+
# These dates represent the date this edge was true.
|
|
177
|
+
│ valid_at = datetime.datetime(2011, 1, 3, 0, 0, tzinfo= < UTC >),
|
|
178
|
+
│ invalid_at = datetime.datetime(2017, 1, 3, 0, 0, tzinfo= < UTC >)
|
|
179
|
+
)
|
|
162
180
|
]
|
|
163
181
|
|
|
164
182
|
# Rerank search results based on graph distance
|
|
@@ -191,14 +209,16 @@ Graphiti is under active development. We aim to maintain API stability while wor
|
|
|
191
209
|
- [ ] Achieving good performance with different LLM and embedding models
|
|
192
210
|
- [ ] Creating a dedicated embedder interface
|
|
193
211
|
- [ ] Supporting custom graph schemas:
|
|
194
|
-
|
|
195
|
-
|
|
212
|
+
- Allow developers to provide their own defined node and edge classes when ingesting episodes
|
|
213
|
+
- Enable more flexible knowledge representation tailored to specific use cases
|
|
196
214
|
- [ ] Enhancing retrieval capabilities with more robust and configurable options
|
|
197
215
|
- [ ] Expanding test coverage to ensure reliability and catch edge cases
|
|
198
216
|
|
|
199
217
|
## Contributing
|
|
200
218
|
|
|
201
|
-
We encourage and appreciate all forms of contributions, whether it's code, documentation, addressing GitHub Issues, or
|
|
219
|
+
We encourage and appreciate all forms of contributions, whether it's code, documentation, addressing GitHub Issues, or
|
|
220
|
+
answering questions in the Graphiti Discord channel. For detailed guidelines on code contributions, please refer
|
|
221
|
+
to [CONTRIBUTING](CONTRIBUTING.md).
|
|
202
222
|
|
|
203
223
|
## Support
|
|
204
224
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
<div align="center">
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
<img width="350" alt="Graphiti-ts-small" src="https://github.com/user-attachments/assets/bbd02947-e435-4a05-b25a-bbbac36d52c8">
|
|
4
4
|
|
|
5
5
|
## Temporal Knowledge Graphs for Agentic Applications
|
|
6
6
|
|
|
@@ -16,7 +16,9 @@
|
|
|
16
16
|
|
|
17
17
|
</div>
|
|
18
18
|
|
|
19
|
-
Graphiti builds dynamic, temporally aware Knowledge Graphs that represent complex, evolving relationships between
|
|
19
|
+
Graphiti builds dynamic, temporally aware Knowledge Graphs that represent complex, evolving relationships between
|
|
20
|
+
entities over time. Graphiti ingests both unstructured and structured data, and the resulting graph may be queried using
|
|
21
|
+
a fusion of time, full-text, semantic, and graph algorithm approaches.
|
|
20
22
|
|
|
21
23
|
<br />
|
|
22
24
|
|
|
@@ -26,25 +28,39 @@ Graphiti builds dynamic, temporally aware Knowledge Graphs that represent comple
|
|
|
26
28
|
|
|
27
29
|
<br />
|
|
28
30
|
|
|
29
|
-
Graphiti helps you create and query Knowledge Graphs that evolve over time. A knowledge graph is a network of
|
|
31
|
+
Graphiti helps you create and query Knowledge Graphs that evolve over time. A knowledge graph is a network of
|
|
32
|
+
interconnected facts, such as _“Kendra loves Adidas shoes.”_ Each fact is a “triplet” represented by two entities, or
|
|
33
|
+
nodes (_”Kendra”_, _“Adidas shoes”_), and their relationship, or edge (_”loves”_). Knowledge Graphs have been explored
|
|
34
|
+
extensively for information retrieval. What makes Graphiti unique is its ability to autonomously build a knowledge graph
|
|
35
|
+
while handling changing relationships and maintaining historical context.
|
|
30
36
|
|
|
31
37
|
With Graphiti, you can build LLM applications such as:
|
|
32
38
|
|
|
33
|
-
- Assistants that learn from user interactions, fusing personal knowledge with dynamic data from business systems like
|
|
39
|
+
- Assistants that learn from user interactions, fusing personal knowledge with dynamic data from business systems like
|
|
40
|
+
CRMs and billing platforms.
|
|
34
41
|
- Agents that autonomously execute complex tasks, reasoning with state changes from multiple dynamic sources.
|
|
35
42
|
|
|
36
|
-
Graphiti supports a wide range of applications in sales, customer service, health, finance, and more, enabling long-term
|
|
43
|
+
Graphiti supports a wide range of applications in sales, customer service, health, finance, and more, enabling long-term
|
|
44
|
+
recall and state-based reasoning for both assistants and agents.
|
|
37
45
|
|
|
38
46
|
## Why Graphiti?
|
|
39
47
|
|
|
40
|
-
We were intrigued by Microsoft’s GraphRAG, which expanded on RAG text chunking by using a graph to better model a
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
+
We were intrigued by Microsoft’s GraphRAG, which expanded on RAG text chunking by using a graph to better model a
|
|
49
|
+
document corpus and making this representation available via semantic and graph search techniques. However, GraphRAG did
|
|
50
|
+
not address our core problem: It's primarily designed for static documents and doesn't inherently handle temporal
|
|
51
|
+
aspects of data.
|
|
52
|
+
|
|
53
|
+
Graphiti is designed from the ground up to handle constantly changing information, hybrid semantic and graph search, and
|
|
54
|
+
scale:
|
|
55
|
+
|
|
56
|
+
- **Temporal Awareness:** Tracks changes in facts and relationships over time, enabling point-in-time queries. Graph
|
|
57
|
+
edges include temporal metadata to record relationship lifecycles.
|
|
58
|
+
- **Episodic Processing:** Ingests data as discrete episodes, maintaining data provenance and allowing incremental
|
|
59
|
+
entity and relationship extraction.
|
|
60
|
+
- **Hybrid Search:** Combines semantic and BM25 full-text search, with the ability to rerank results by distance from a
|
|
61
|
+
central node e.g. “Kendra”.
|
|
62
|
+
- **Scalable:** Designed for processing large datasets, with parallelization of LLM calls for bulk processing while
|
|
63
|
+
preserving the chronology of events.
|
|
48
64
|
- **Supports Varied Sources:** Can ingest both unstructured text and structured JSON data.
|
|
49
65
|
|
|
50
66
|
<p align="center">
|
|
@@ -70,7 +86,8 @@ Optional:
|
|
|
70
86
|
- Anthropic or Groq API key (for alternative LLM providers)
|
|
71
87
|
|
|
72
88
|
> [!TIP]
|
|
73
|
-
> The simplest way to install Neo4j is via [Neo4j Desktop](https://neo4j.com/download/). It provides a user-friendly
|
|
89
|
+
> The simplest way to install Neo4j is via [Neo4j Desktop](https://neo4j.com/download/). It provides a user-friendly
|
|
90
|
+
> interface to manage Neo4j instances and databases.
|
|
74
91
|
|
|
75
92
|
```bash
|
|
76
93
|
pip install graphiti-core
|
|
@@ -85,7 +102,8 @@ poetry add graphiti-core
|
|
|
85
102
|
## Quick Start
|
|
86
103
|
|
|
87
104
|
> [!IMPORTANT]
|
|
88
|
-
> Graphiti uses OpenAI for LLM inference and embedding. Ensure that an `OPENAI_API_KEY` is set in your environment.
|
|
105
|
+
> Graphiti uses OpenAI for LLM inference and embedding. Ensure that an `OPENAI_API_KEY` is set in your environment.
|
|
106
|
+
> Support for Anthropic and Groq LLM inferences is available, too.
|
|
89
107
|
|
|
90
108
|
```python
|
|
91
109
|
from graphiti_core import Graphiti
|
|
@@ -119,25 +137,25 @@ for i, episode in enumerate(episodes):
|
|
|
119
137
|
results = await graphiti.search('Who was the California Attorney General?')
|
|
120
138
|
[
|
|
121
139
|
EntityEdge(
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
140
|
+
│ uuid = '3133258f738e487383f07b04e15d4ac0',
|
|
141
|
+
│ source_node_uuid = '2a85789b318d4e418050506879906e62',
|
|
142
|
+
│ target_node_uuid = 'baf7781f445945989d6e4f927f881556',
|
|
143
|
+
│ created_at = datetime.datetime(2024, 8, 26, 13, 13, 24, 861097),
|
|
144
|
+
│ name = 'HELD_POSITION',
|
|
145
|
+
# the fact reflects the updated state that Harris is
|
|
146
|
+
# no longer the AG of California
|
|
147
|
+
│ fact = 'Kamala Harris was the Attorney General of California',
|
|
148
|
+
│ fact_embedding = [
|
|
149
|
+
│ │ -0.009955154731869698,
|
|
150
|
+
│ ...
|
|
151
|
+
│ │ 0.00784289836883545
|
|
152
|
+
│],
|
|
153
|
+
│ episodes = ['b43e98ad0a904088a76c67985caecc22'],
|
|
154
|
+
│ expired_at = datetime.datetime(2024, 8, 26, 20, 18, 1, 53812),
|
|
155
|
+
# These dates represent the date this edge was true.
|
|
156
|
+
│ valid_at = datetime.datetime(2011, 1, 3, 0, 0, tzinfo= < UTC >),
|
|
157
|
+
│ invalid_at = datetime.datetime(2017, 1, 3, 0, 0, tzinfo= < UTC >)
|
|
158
|
+
)
|
|
141
159
|
]
|
|
142
160
|
|
|
143
161
|
# Rerank search results based on graph distance
|
|
@@ -170,14 +188,16 @@ Graphiti is under active development. We aim to maintain API stability while wor
|
|
|
170
188
|
- [ ] Achieving good performance with different LLM and embedding models
|
|
171
189
|
- [ ] Creating a dedicated embedder interface
|
|
172
190
|
- [ ] Supporting custom graph schemas:
|
|
173
|
-
|
|
174
|
-
|
|
191
|
+
- Allow developers to provide their own defined node and edge classes when ingesting episodes
|
|
192
|
+
- Enable more flexible knowledge representation tailored to specific use cases
|
|
175
193
|
- [ ] Enhancing retrieval capabilities with more robust and configurable options
|
|
176
194
|
- [ ] Expanding test coverage to ensure reliability and catch edge cases
|
|
177
195
|
|
|
178
196
|
## Contributing
|
|
179
197
|
|
|
180
|
-
We encourage and appreciate all forms of contributions, whether it's code, documentation, addressing GitHub Issues, or
|
|
198
|
+
We encourage and appreciate all forms of contributions, whether it's code, documentation, addressing GitHub Issues, or
|
|
199
|
+
answering questions in the Graphiti Discord channel. For detailed guidelines on code contributions, please refer
|
|
200
|
+
to [CONTRIBUTING](CONTRIBUTING.md).
|
|
181
201
|
|
|
182
202
|
## Support
|
|
183
203
|
|
|
@@ -104,7 +104,6 @@ class EpisodicEdge(Edge):
|
|
|
104
104
|
|
|
105
105
|
edges = [get_episodic_edge_from_record(record) for record in records]
|
|
106
106
|
|
|
107
|
-
logger.info(f'Found Edge: {uuid}')
|
|
108
107
|
if len(edges) == 0:
|
|
109
108
|
raise EdgeNotFoundError(uuid)
|
|
110
109
|
return edges[0]
|
|
@@ -127,7 +126,29 @@ class EpisodicEdge(Edge):
|
|
|
127
126
|
|
|
128
127
|
edges = [get_episodic_edge_from_record(record) for record in records]
|
|
129
128
|
|
|
130
|
-
|
|
129
|
+
if len(edges) == 0:
|
|
130
|
+
raise EdgeNotFoundError(uuids[0])
|
|
131
|
+
return edges
|
|
132
|
+
|
|
133
|
+
@classmethod
|
|
134
|
+
async def get_by_group_ids(cls, driver: AsyncDriver, group_ids: list[str | None]):
|
|
135
|
+
records, _, _ = await driver.execute_query(
|
|
136
|
+
"""
|
|
137
|
+
MATCH (n:Episodic)-[e:MENTIONS]->(m:Entity)
|
|
138
|
+
WHERE e.group_id IN $group_ids
|
|
139
|
+
RETURN
|
|
140
|
+
e.uuid As uuid,
|
|
141
|
+
e.group_id AS group_id,
|
|
142
|
+
n.uuid AS source_node_uuid,
|
|
143
|
+
m.uuid AS target_node_uuid,
|
|
144
|
+
e.created_at AS created_at
|
|
145
|
+
""",
|
|
146
|
+
group_ids=group_ids,
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
edges = [get_episodic_edge_from_record(record) for record in records]
|
|
150
|
+
uuids = [edge.uuid for edge in edges]
|
|
151
|
+
|
|
131
152
|
if len(edges) == 0:
|
|
132
153
|
raise EdgeNotFoundError(uuids[0])
|
|
133
154
|
return edges
|
|
@@ -215,7 +236,6 @@ class EntityEdge(Edge):
|
|
|
215
236
|
|
|
216
237
|
edges = [get_entity_edge_from_record(record) for record in records]
|
|
217
238
|
|
|
218
|
-
logger.info(f'Found Edge: {uuid}')
|
|
219
239
|
if len(edges) == 0:
|
|
220
240
|
raise EdgeNotFoundError(uuid)
|
|
221
241
|
return edges[0]
|
|
@@ -245,7 +265,36 @@ class EntityEdge(Edge):
|
|
|
245
265
|
|
|
246
266
|
edges = [get_entity_edge_from_record(record) for record in records]
|
|
247
267
|
|
|
248
|
-
|
|
268
|
+
if len(edges) == 0:
|
|
269
|
+
raise EdgeNotFoundError(uuids[0])
|
|
270
|
+
return edges
|
|
271
|
+
|
|
272
|
+
@classmethod
|
|
273
|
+
async def get_by_group_ids(cls, driver: AsyncDriver, group_ids: list[str | None]):
|
|
274
|
+
records, _, _ = await driver.execute_query(
|
|
275
|
+
"""
|
|
276
|
+
MATCH (n:Entity)-[e:RELATES_TO]->(m:Entity)
|
|
277
|
+
WHERE e.group_id IN $group_ids
|
|
278
|
+
RETURN
|
|
279
|
+
e.uuid AS uuid,
|
|
280
|
+
n.uuid AS source_node_uuid,
|
|
281
|
+
m.uuid AS target_node_uuid,
|
|
282
|
+
e.created_at AS created_at,
|
|
283
|
+
e.name AS name,
|
|
284
|
+
e.group_id AS group_id,
|
|
285
|
+
e.fact AS fact,
|
|
286
|
+
e.fact_embedding AS fact_embedding,
|
|
287
|
+
e.episodes AS episodes,
|
|
288
|
+
e.expired_at AS expired_at,
|
|
289
|
+
e.valid_at AS valid_at,
|
|
290
|
+
e.invalid_at AS invalid_at
|
|
291
|
+
""",
|
|
292
|
+
group_ids=group_ids,
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
edges = [get_entity_edge_from_record(record) for record in records]
|
|
296
|
+
uuids = [edge.uuid for edge in edges]
|
|
297
|
+
|
|
249
298
|
if len(edges) == 0:
|
|
250
299
|
raise EdgeNotFoundError(uuids[0])
|
|
251
300
|
return edges
|
|
@@ -288,8 +337,6 @@ class CommunityEdge(Edge):
|
|
|
288
337
|
|
|
289
338
|
edges = [get_community_edge_from_record(record) for record in records]
|
|
290
339
|
|
|
291
|
-
logger.info(f'Found Edge: {uuid}')
|
|
292
|
-
|
|
293
340
|
return edges[0]
|
|
294
341
|
|
|
295
342
|
@classmethod
|
|
@@ -310,7 +357,25 @@ class CommunityEdge(Edge):
|
|
|
310
357
|
|
|
311
358
|
edges = [get_community_edge_from_record(record) for record in records]
|
|
312
359
|
|
|
313
|
-
|
|
360
|
+
return edges
|
|
361
|
+
|
|
362
|
+
@classmethod
|
|
363
|
+
async def get_by_group_ids(cls, driver: AsyncDriver, group_ids: list[str | None]):
|
|
364
|
+
records, _, _ = await driver.execute_query(
|
|
365
|
+
"""
|
|
366
|
+
MATCH (n:Community)-[e:HAS_MEMBER]->(m:Entity | Community)
|
|
367
|
+
WHERE e.group_id IN $group_ids
|
|
368
|
+
RETURN
|
|
369
|
+
e.uuid As uuid,
|
|
370
|
+
e.group_id AS group_id,
|
|
371
|
+
n.uuid AS source_node_uuid,
|
|
372
|
+
m.uuid AS target_node_uuid,
|
|
373
|
+
e.created_at AS created_at
|
|
374
|
+
""",
|
|
375
|
+
group_ids=group_ids,
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
edges = [get_community_edge_from_record(record) for record in records]
|
|
314
379
|
|
|
315
380
|
return edges
|
|
316
381
|
|
|
@@ -77,7 +77,14 @@ load_dotenv()
|
|
|
77
77
|
|
|
78
78
|
|
|
79
79
|
class Graphiti:
|
|
80
|
-
def __init__(
|
|
80
|
+
def __init__(
|
|
81
|
+
self,
|
|
82
|
+
uri: str,
|
|
83
|
+
user: str,
|
|
84
|
+
password: str,
|
|
85
|
+
llm_client: LLMClient | None = None,
|
|
86
|
+
store_raw_episode_content: bool = True,
|
|
87
|
+
):
|
|
81
88
|
"""
|
|
82
89
|
Initialize a Graphiti instance.
|
|
83
90
|
|
|
@@ -116,6 +123,7 @@ class Graphiti:
|
|
|
116
123
|
"""
|
|
117
124
|
self.driver = AsyncGraphDatabase.driver(uri, auth=(user, password))
|
|
118
125
|
self.database = 'neo4j'
|
|
126
|
+
self.store_raw_episode_content = store_raw_episode_content
|
|
119
127
|
if llm_client:
|
|
120
128
|
self.llm_client = llm_client
|
|
121
129
|
else:
|
|
@@ -150,8 +158,8 @@ class Graphiti:
|
|
|
150
158
|
# Use graphiti...
|
|
151
159
|
finally:
|
|
152
160
|
graphiti.close()
|
|
153
|
-
self.driver.close()
|
|
154
161
|
"""
|
|
162
|
+
self.driver.close()
|
|
155
163
|
|
|
156
164
|
async def build_indices_and_constraints(self):
|
|
157
165
|
"""
|
|
@@ -251,6 +259,8 @@ class Graphiti:
|
|
|
251
259
|
An id for the graph partition the episode is a part of.
|
|
252
260
|
uuid : str | None
|
|
253
261
|
Optional uuid of the episode.
|
|
262
|
+
update_communities : bool
|
|
263
|
+
Optional. Whether to update communities with new node information
|
|
254
264
|
|
|
255
265
|
Returns
|
|
256
266
|
-------
|
|
@@ -276,7 +286,6 @@ class Graphiti:
|
|
|
276
286
|
try:
|
|
277
287
|
start = time()
|
|
278
288
|
|
|
279
|
-
nodes: list[EntityNode] = []
|
|
280
289
|
entity_edges: list[EntityEdge] = []
|
|
281
290
|
embedder = self.llm_client.get_embedder()
|
|
282
291
|
now = datetime.now()
|
|
@@ -295,6 +304,8 @@ class Graphiti:
|
|
|
295
304
|
valid_at=reference_time,
|
|
296
305
|
)
|
|
297
306
|
episode.uuid = uuid if uuid is not None else episode.uuid
|
|
307
|
+
if not self.store_raw_episode_content:
|
|
308
|
+
episode.content = ''
|
|
298
309
|
|
|
299
310
|
# Extract entities as nodes
|
|
300
311
|
|
|
@@ -323,7 +334,7 @@ class Graphiti:
|
|
|
323
334
|
),
|
|
324
335
|
)
|
|
325
336
|
logger.info(f'Adjusted mentioned nodes: {[(n.name, n.uuid) for n in mentioned_nodes]}')
|
|
326
|
-
nodes
|
|
337
|
+
nodes = mentioned_nodes
|
|
327
338
|
|
|
328
339
|
extracted_edges_with_resolved_pointers = resolve_edge_pointers(
|
|
329
340
|
extracted_edges, uuid_map
|
|
@@ -568,7 +579,7 @@ class Graphiti:
|
|
|
568
579
|
center_node_uuid: str | None = None,
|
|
569
580
|
group_ids: list[str | None] | None = None,
|
|
570
581
|
num_results=DEFAULT_SEARCH_LIMIT,
|
|
571
|
-
):
|
|
582
|
+
) -> list[EntityEdge]:
|
|
572
583
|
"""
|
|
573
584
|
Perform a hybrid search on the knowledge graph.
|
|
574
585
|
|
|
@@ -30,13 +30,17 @@ from .errors import RateLimitError
|
|
|
30
30
|
logger = logging.getLogger(__name__)
|
|
31
31
|
|
|
32
32
|
DEFAULT_MODEL = 'claude-3-5-sonnet-20240620'
|
|
33
|
+
DEFAULT_MAX_TOKENS = 8192
|
|
33
34
|
|
|
34
35
|
|
|
35
36
|
class AnthropicClient(LLMClient):
|
|
36
37
|
def __init__(self, config: LLMConfig | None = None, cache: bool = False):
|
|
37
38
|
if config is None:
|
|
38
|
-
config = LLMConfig()
|
|
39
|
+
config = LLMConfig(max_tokens=DEFAULT_MAX_TOKENS)
|
|
40
|
+
elif config.max_tokens is None:
|
|
41
|
+
config.max_tokens = DEFAULT_MAX_TOKENS
|
|
39
42
|
super().__init__(config, cache)
|
|
43
|
+
|
|
40
44
|
self.client = AsyncAnthropic(
|
|
41
45
|
api_key=config.api_key,
|
|
42
46
|
# we'll use tenacity to retry
|
|
@@ -31,13 +31,17 @@ from .errors import RateLimitError
|
|
|
31
31
|
logger = logging.getLogger(__name__)
|
|
32
32
|
|
|
33
33
|
DEFAULT_MODEL = 'llama-3.1-70b-versatile'
|
|
34
|
+
DEFAULT_MAX_TOKENS = 2048
|
|
34
35
|
|
|
35
36
|
|
|
36
37
|
class GroqClient(LLMClient):
|
|
37
38
|
def __init__(self, config: LLMConfig | None = None, cache: bool = False):
|
|
38
39
|
if config is None:
|
|
39
|
-
config = LLMConfig()
|
|
40
|
+
config = LLMConfig(max_tokens=DEFAULT_MAX_TOKENS)
|
|
41
|
+
elif config.max_tokens is None:
|
|
42
|
+
config.max_tokens = DEFAULT_MAX_TOKENS
|
|
40
43
|
super().__init__(config, cache)
|
|
44
|
+
|
|
41
45
|
self.client = AsyncGroq(api_key=config.api_key)
|
|
42
46
|
|
|
43
47
|
def get_embedder(self) -> typing.Any:
|
|
@@ -33,13 +33,50 @@ DEFAULT_MODEL = 'gpt-4o-2024-08-06'
|
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
class OpenAIClient(LLMClient):
|
|
36
|
-
|
|
36
|
+
"""
|
|
37
|
+
OpenAIClient is a client class for interacting with OpenAI's language models.
|
|
38
|
+
|
|
39
|
+
This class extends the LLMClient and provides methods to initialize the client,
|
|
40
|
+
get an embedder, and generate responses from the language model.
|
|
41
|
+
|
|
42
|
+
Attributes:
|
|
43
|
+
client (AsyncOpenAI): The OpenAI client used to interact with the API.
|
|
44
|
+
model (str): The model name to use for generating responses.
|
|
45
|
+
temperature (float): The temperature to use for generating responses.
|
|
46
|
+
max_tokens (int): The maximum number of tokens to generate in a response.
|
|
47
|
+
|
|
48
|
+
Methods:
|
|
49
|
+
__init__(config: LLMConfig | None = None, cache: bool = False, client: typing.Any = None):
|
|
50
|
+
Initializes the OpenAIClient with the provided configuration, cache setting, and client.
|
|
51
|
+
|
|
52
|
+
get_embedder() -> typing.Any:
|
|
53
|
+
Returns the embedder from the OpenAI client.
|
|
54
|
+
|
|
55
|
+
_generate_response(messages: list[Message]) -> dict[str, typing.Any]:
|
|
56
|
+
Generates a response from the language model based on the provided messages.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
def __init__(
|
|
60
|
+
self, config: LLMConfig | None = None, cache: bool = False, client: typing.Any = None
|
|
61
|
+
):
|
|
62
|
+
"""
|
|
63
|
+
Initialize the OpenAIClient with the provided configuration, cache setting, and client.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
config (LLMConfig | None): The configuration for the LLM client, including API key, model, base URL, temperature, and max tokens.
|
|
67
|
+
cache (bool): Whether to use caching for responses. Defaults to False.
|
|
68
|
+
client (Any | None): An optional async client instance to use. If not provided, a new AsyncOpenAI client is created.
|
|
69
|
+
|
|
70
|
+
"""
|
|
37
71
|
if config is None:
|
|
38
72
|
config = LLMConfig()
|
|
39
73
|
|
|
40
74
|
super().__init__(config, cache)
|
|
41
75
|
|
|
42
|
-
|
|
76
|
+
if client is None:
|
|
77
|
+
self.client = AsyncOpenAI(api_key=config.api_key, base_url=config.base_url)
|
|
78
|
+
else:
|
|
79
|
+
self.client = client
|
|
43
80
|
|
|
44
81
|
def get_embedder(self) -> typing.Any:
|
|
45
82
|
return self.client.embeddings
|
|
@@ -158,8 +158,6 @@ class EpisodicNode(Node):
|
|
|
158
158
|
|
|
159
159
|
episodes = [get_episodic_node_from_record(record) for record in records]
|
|
160
160
|
|
|
161
|
-
logger.info(f'Found Node: {uuid}')
|
|
162
|
-
|
|
163
161
|
if len(episodes) == 0:
|
|
164
162
|
raise NodeNotFoundError(uuid)
|
|
165
163
|
|
|
@@ -185,7 +183,27 @@ class EpisodicNode(Node):
|
|
|
185
183
|
|
|
186
184
|
episodes = [get_episodic_node_from_record(record) for record in records]
|
|
187
185
|
|
|
188
|
-
|
|
186
|
+
return episodes
|
|
187
|
+
|
|
188
|
+
@classmethod
|
|
189
|
+
async def get_by_group_ids(cls, driver: AsyncDriver, group_ids: list[str | None]):
|
|
190
|
+
records, _, _ = await driver.execute_query(
|
|
191
|
+
"""
|
|
192
|
+
MATCH (e:Episodic) WHERE e.group_id IN $group_ids
|
|
193
|
+
RETURN DISTINCT
|
|
194
|
+
e.content AS content,
|
|
195
|
+
e.created_at AS created_at,
|
|
196
|
+
e.valid_at AS valid_at,
|
|
197
|
+
e.uuid AS uuid,
|
|
198
|
+
e.name AS name,
|
|
199
|
+
e.group_id AS group_id,
|
|
200
|
+
e.source_description AS source_description,
|
|
201
|
+
e.source AS source
|
|
202
|
+
""",
|
|
203
|
+
group_ids=group_ids,
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
episodes = [get_episodic_node_from_record(record) for record in records]
|
|
189
207
|
|
|
190
208
|
return episodes
|
|
191
209
|
|
|
@@ -240,8 +258,6 @@ class EntityNode(Node):
|
|
|
240
258
|
|
|
241
259
|
nodes = [get_entity_node_from_record(record) for record in records]
|
|
242
260
|
|
|
243
|
-
logger.info(f'Found Node: {uuid}')
|
|
244
|
-
|
|
245
261
|
return nodes[0]
|
|
246
262
|
|
|
247
263
|
@classmethod
|
|
@@ -262,7 +278,25 @@ class EntityNode(Node):
|
|
|
262
278
|
|
|
263
279
|
nodes = [get_entity_node_from_record(record) for record in records]
|
|
264
280
|
|
|
265
|
-
|
|
281
|
+
return nodes
|
|
282
|
+
|
|
283
|
+
@classmethod
|
|
284
|
+
async def get_by_group_ids(cls, driver: AsyncDriver, group_ids: list[str | None]):
|
|
285
|
+
records, _, _ = await driver.execute_query(
|
|
286
|
+
"""
|
|
287
|
+
MATCH (n:Entity) WHERE n.group_id IN $group_ids
|
|
288
|
+
RETURN
|
|
289
|
+
n.uuid As uuid,
|
|
290
|
+
n.name AS name,
|
|
291
|
+
n.name_embedding AS name_embedding,
|
|
292
|
+
n.group_id AS group_id,
|
|
293
|
+
n.created_at AS created_at,
|
|
294
|
+
n.summary AS summary
|
|
295
|
+
""",
|
|
296
|
+
group_ids=group_ids,
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
nodes = [get_entity_node_from_record(record) for record in records]
|
|
266
300
|
|
|
267
301
|
return nodes
|
|
268
302
|
|
|
@@ -317,8 +351,6 @@ class CommunityNode(Node):
|
|
|
317
351
|
|
|
318
352
|
nodes = [get_community_node_from_record(record) for record in records]
|
|
319
353
|
|
|
320
|
-
logger.info(f'Found Node: {uuid}')
|
|
321
|
-
|
|
322
354
|
return nodes[0]
|
|
323
355
|
|
|
324
356
|
@classmethod
|
|
@@ -337,11 +369,29 @@ class CommunityNode(Node):
|
|
|
337
369
|
uuids=uuids,
|
|
338
370
|
)
|
|
339
371
|
|
|
340
|
-
|
|
372
|
+
communities = [get_community_node_from_record(record) for record in records]
|
|
341
373
|
|
|
342
|
-
|
|
374
|
+
return communities
|
|
343
375
|
|
|
344
|
-
|
|
376
|
+
@classmethod
|
|
377
|
+
async def get_by_group_ids(cls, driver: AsyncDriver, group_ids: list[str | None]):
|
|
378
|
+
records, _, _ = await driver.execute_query(
|
|
379
|
+
"""
|
|
380
|
+
MATCH (n:Community) WHERE n.group_id IN $group_ids
|
|
381
|
+
RETURN
|
|
382
|
+
n.uuid As uuid,
|
|
383
|
+
n.name AS name,
|
|
384
|
+
n.name_embedding AS name_embedding,
|
|
385
|
+
n.group_id AS group_id,
|
|
386
|
+
n.created_at AS created_at,
|
|
387
|
+
n.summary AS summary
|
|
388
|
+
""",
|
|
389
|
+
group_ids=group_ids,
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
communities = [get_community_node_from_record(record) for record in records]
|
|
393
|
+
|
|
394
|
+
return communities
|
|
345
395
|
|
|
346
396
|
|
|
347
397
|
# Node helpers
|
{graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/utils/maintenance/community_operations.py
RENAMED
|
@@ -4,6 +4,7 @@ from collections import defaultdict
|
|
|
4
4
|
from datetime import datetime
|
|
5
5
|
|
|
6
6
|
from neo4j import AsyncDriver
|
|
7
|
+
from pydantic import BaseModel
|
|
7
8
|
|
|
8
9
|
from graphiti_core.edges import CommunityEdge
|
|
9
10
|
from graphiti_core.llm_client import LLMClient
|
|
@@ -11,9 +12,17 @@ from graphiti_core.nodes import CommunityNode, EntityNode, get_community_node_fr
|
|
|
11
12
|
from graphiti_core.prompts import prompt_library
|
|
12
13
|
from graphiti_core.utils.maintenance.edge_operations import build_community_edges
|
|
13
14
|
|
|
15
|
+
MAX_COMMUNITY_BUILD_CONCURRENCY = 10
|
|
16
|
+
|
|
17
|
+
|
|
14
18
|
logger = logging.getLogger(__name__)
|
|
15
19
|
|
|
16
20
|
|
|
21
|
+
class Neighbor(BaseModel):
|
|
22
|
+
node_uuid: str
|
|
23
|
+
edge_count: int
|
|
24
|
+
|
|
25
|
+
|
|
17
26
|
async def build_community_projection(driver: AsyncDriver) -> str:
|
|
18
27
|
records, _, _ = await driver.execute_query("""
|
|
19
28
|
CALL gds.graph.project("communities", "Entity",
|
|
@@ -29,36 +38,96 @@ async def build_community_projection(driver: AsyncDriver) -> str:
|
|
|
29
38
|
return records[0]['graph']
|
|
30
39
|
|
|
31
40
|
|
|
32
|
-
async def
|
|
33
|
-
|
|
34
|
-
"""
|
|
35
|
-
CALL gds.graph.drop($projection_name)
|
|
36
|
-
""",
|
|
37
|
-
projection_name=projection_name,
|
|
38
|
-
)
|
|
41
|
+
async def get_community_clusters(driver: AsyncDriver) -> list[list[EntityNode]]:
|
|
42
|
+
community_clusters: list[list[EntityNode]] = []
|
|
39
43
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
)
|
|
44
|
-
records, _, _ = await driver.execute_query("""
|
|
45
|
-
CALL gds.leiden.stream("communities")
|
|
46
|
-
YIELD nodeId, communityId
|
|
47
|
-
RETURN gds.util.asNode(nodeId).uuid AS entity_uuid, communityId
|
|
44
|
+
group_id_values, _, _ = await driver.execute_query("""
|
|
45
|
+
MATCH (n:Entity WHERE n.group_id IS NOT NULL)
|
|
46
|
+
RETURN
|
|
47
|
+
collect(DISTINCT n.group_id) AS group_ids
|
|
48
48
|
""")
|
|
49
|
-
community_map: dict[int, list[str]] = defaultdict(list)
|
|
50
|
-
for record in records:
|
|
51
|
-
community_map[record['communityId']].append(record['entity_uuid'])
|
|
52
49
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
50
|
+
group_ids = group_id_values[0]['group_ids']
|
|
51
|
+
for group_id in group_ids:
|
|
52
|
+
projection: dict[str, list[Neighbor]] = {}
|
|
53
|
+
nodes = await EntityNode.get_by_group_ids(driver, [group_id])
|
|
54
|
+
for node in nodes:
|
|
55
|
+
records, _, _ = await driver.execute_query(
|
|
56
|
+
"""
|
|
57
|
+
MATCH (n:Entity {group_id: $group_id, uuid: $uuid})-[r:RELATES_TO]-(m: Entity {group_id: $group_id})
|
|
58
|
+
WITH count(r) AS count, m.uuid AS uuid
|
|
59
|
+
RETURN
|
|
60
|
+
uuid,
|
|
61
|
+
count
|
|
62
|
+
""",
|
|
63
|
+
uuid=node.uuid,
|
|
64
|
+
group_id=group_id,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
projection[node.uuid] = [
|
|
68
|
+
Neighbor(node_uuid=record['uuid'], edge_count=record['count']) for record in records
|
|
69
|
+
]
|
|
70
|
+
|
|
71
|
+
cluster_uuids = label_propagation(projection)
|
|
72
|
+
|
|
73
|
+
community_clusters.extend(
|
|
74
|
+
list(
|
|
75
|
+
await asyncio.gather(
|
|
76
|
+
*[EntityNode.get_by_uuids(driver, cluster) for cluster in cluster_uuids]
|
|
77
|
+
)
|
|
78
|
+
)
|
|
56
79
|
)
|
|
57
|
-
)
|
|
58
80
|
|
|
59
81
|
return community_clusters
|
|
60
82
|
|
|
61
83
|
|
|
84
|
+
def label_propagation(projection: dict[str, list[Neighbor]]) -> list[list[str]]:
|
|
85
|
+
# Implement the label propagation community detection algorithm.
|
|
86
|
+
# 1. Start with each node being assigned its own community
|
|
87
|
+
# 2. Each node will take on the community of the plurality of its neighbors
|
|
88
|
+
# 3. Ties are broken by going to the largest community
|
|
89
|
+
# 4. Continue until no communities change during propagation
|
|
90
|
+
|
|
91
|
+
community_map = {uuid: i for i, uuid in enumerate(projection.keys())}
|
|
92
|
+
|
|
93
|
+
while True:
|
|
94
|
+
no_change = True
|
|
95
|
+
new_community_map: dict[str, int] = {}
|
|
96
|
+
|
|
97
|
+
for uuid, neighbors in projection.items():
|
|
98
|
+
curr_community = community_map[uuid]
|
|
99
|
+
|
|
100
|
+
community_candidates: dict[int, int] = defaultdict(int)
|
|
101
|
+
for neighbor in neighbors:
|
|
102
|
+
community_candidates[community_map[neighbor.node_uuid]] += neighbor.edge_count
|
|
103
|
+
|
|
104
|
+
community_lst = [
|
|
105
|
+
(count, community) for community, count in community_candidates.items()
|
|
106
|
+
]
|
|
107
|
+
|
|
108
|
+
community_lst.sort(reverse=True)
|
|
109
|
+
community_candidate = community_lst[0][1] if len(community_lst) > 0 else -1
|
|
110
|
+
|
|
111
|
+
new_community = max(community_candidate, curr_community)
|
|
112
|
+
|
|
113
|
+
new_community_map[uuid] = new_community
|
|
114
|
+
|
|
115
|
+
if new_community != curr_community:
|
|
116
|
+
no_change = False
|
|
117
|
+
|
|
118
|
+
if no_change:
|
|
119
|
+
break
|
|
120
|
+
|
|
121
|
+
community_map = new_community_map
|
|
122
|
+
|
|
123
|
+
community_cluster_map = defaultdict(list)
|
|
124
|
+
for uuid, community in community_map.items():
|
|
125
|
+
community_cluster_map[community].append(uuid)
|
|
126
|
+
|
|
127
|
+
clusters = [cluster for cluster in community_cluster_map.values()]
|
|
128
|
+
return clusters
|
|
129
|
+
|
|
130
|
+
|
|
62
131
|
async def summarize_pair(llm_client: LLMClient, summary_pair: tuple[str, str]) -> str:
|
|
63
132
|
# Prepare context for LLM
|
|
64
133
|
context = {'node_summaries': [{'summary': summary} for summary in summary_pair]}
|
|
@@ -85,7 +154,7 @@ async def generate_summary_description(llm_client: LLMClient, summary: str) -> s
|
|
|
85
154
|
|
|
86
155
|
|
|
87
156
|
async def build_community(
|
|
88
|
-
|
|
157
|
+
llm_client: LLMClient, community_cluster: list[EntityNode]
|
|
89
158
|
) -> tuple[CommunityNode, list[CommunityEdge]]:
|
|
90
159
|
summaries = [entity.summary for entity in community_cluster]
|
|
91
160
|
length = len(summaries)
|
|
@@ -99,7 +168,7 @@ async def build_community(
|
|
|
99
168
|
*[
|
|
100
169
|
summarize_pair(llm_client, (str(left_summary), str(right_summary)))
|
|
101
170
|
for left_summary, right_summary in zip(
|
|
102
|
-
summaries[: int(length / 2)], summaries[int(length / 2)
|
|
171
|
+
summaries[: int(length / 2)], summaries[int(length / 2):]
|
|
103
172
|
)
|
|
104
173
|
]
|
|
105
174
|
)
|
|
@@ -127,15 +196,18 @@ async def build_community(
|
|
|
127
196
|
|
|
128
197
|
|
|
129
198
|
async def build_communities(
|
|
130
|
-
|
|
199
|
+
driver: AsyncDriver, llm_client: LLMClient
|
|
131
200
|
) -> tuple[list[CommunityNode], list[CommunityEdge]]:
|
|
132
|
-
|
|
133
|
-
|
|
201
|
+
community_clusters = await get_community_clusters(driver)
|
|
202
|
+
|
|
203
|
+
semaphore = asyncio.Semaphore(MAX_COMMUNITY_BUILD_CONCURRENCY)
|
|
204
|
+
|
|
205
|
+
async def limited_build_community(cluster):
|
|
206
|
+
async with semaphore:
|
|
207
|
+
return await build_community(llm_client, cluster)
|
|
134
208
|
|
|
135
209
|
communities: list[tuple[CommunityNode, list[CommunityEdge]]] = list(
|
|
136
|
-
await asyncio.gather(
|
|
137
|
-
*[build_community(llm_client, cluster) for cluster in community_clusters]
|
|
138
|
-
)
|
|
210
|
+
await asyncio.gather(*[limited_build_community(cluster) for cluster in community_clusters])
|
|
139
211
|
)
|
|
140
212
|
|
|
141
213
|
community_nodes: list[CommunityNode] = []
|
|
@@ -144,7 +216,6 @@ async def build_communities(
|
|
|
144
216
|
community_nodes.append(community[0])
|
|
145
217
|
community_edges.extend(community[1])
|
|
146
218
|
|
|
147
|
-
await destroy_projection(driver, projection)
|
|
148
219
|
return community_nodes, community_edges
|
|
149
220
|
|
|
150
221
|
|
|
@@ -156,7 +227,7 @@ async def remove_communities(driver: AsyncDriver):
|
|
|
156
227
|
|
|
157
228
|
|
|
158
229
|
async def determine_entity_community(
|
|
159
|
-
|
|
230
|
+
driver: AsyncDriver, entity: EntityNode
|
|
160
231
|
) -> tuple[CommunityNode | None, bool]:
|
|
161
232
|
# Check if the node is already part of a community
|
|
162
233
|
records, _, _ = await driver.execute_query(
|
|
@@ -217,7 +288,7 @@ async def determine_entity_community(
|
|
|
217
288
|
|
|
218
289
|
|
|
219
290
|
async def update_community(
|
|
220
|
-
|
|
291
|
+
driver: AsyncDriver, llm_client: LLMClient, embedder, entity: EntityNode
|
|
221
292
|
):
|
|
222
293
|
community, is_new = await determine_entity_community(driver, entity)
|
|
223
294
|
|
|
@@ -236,4 +307,4 @@ async def update_community(
|
|
|
236
307
|
|
|
237
308
|
await community.generate_name_embedding(embedder)
|
|
238
309
|
|
|
239
|
-
await community.save(driver)
|
|
310
|
+
await community.save(driver)
|
{graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/utils/maintenance/node_operations.py
RENAMED
|
@@ -272,9 +272,12 @@ async def dedupe_node_list(
|
|
|
272
272
|
unique_nodes = []
|
|
273
273
|
uuid_map: dict[str, str] = {}
|
|
274
274
|
for node_data in nodes_data:
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
275
|
+
node_instance: EntityNode | None = node_map.get(node_data['uuids'][0])
|
|
276
|
+
if node_instance is None:
|
|
277
|
+
logger.warning(f'Node {node_data["uuids"][0]} not found in node map')
|
|
278
|
+
continue
|
|
279
|
+
node_instance.summary = node_data['summary']
|
|
280
|
+
unique_nodes.append(node_instance)
|
|
278
281
|
|
|
279
282
|
for uuid in node_data['uuids'][1:]:
|
|
280
283
|
uuid_value = node_map[node_data['uuids'][0]].uuid
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/utils/maintenance/edge_operations.py
RENAMED
|
File without changes
|
{graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/utils/maintenance/graph_data_operations.py
RENAMED
|
File without changes
|
{graphiti_core-0.3.3 → graphiti_core-0.3.4}/graphiti_core/utils/maintenance/temporal_operations.py
RENAMED
|
File without changes
|
|
File without changes
|