graphiti-core 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of graphiti-core might be problematic. Click here for more details.

graphiti_core/edges.py CHANGED
@@ -34,7 +34,7 @@ logger = logging.getLogger(__name__)
34
34
 
35
35
  class Edge(BaseModel, ABC):
36
36
  uuid: str = Field(default_factory=lambda: str(uuid4()))
37
- group_id: str | None = Field(description='partition of the graph')
37
+ group_id: str = Field(description='partition of the graph')
38
38
  source_node_uuid: str
39
39
  target_node_uuid: str
40
40
  created_at: datetime
@@ -104,7 +104,6 @@ class EpisodicEdge(Edge):
104
104
 
105
105
  edges = [get_episodic_edge_from_record(record) for record in records]
106
106
 
107
- logger.info(f'Found Edge: {uuid}')
108
107
  if len(edges) == 0:
109
108
  raise EdgeNotFoundError(uuid)
110
109
  return edges[0]
@@ -127,7 +126,29 @@ class EpisodicEdge(Edge):
127
126
 
128
127
  edges = [get_episodic_edge_from_record(record) for record in records]
129
128
 
130
- logger.info(f'Found Edges: {uuids}')
129
+ if len(edges) == 0:
130
+ raise EdgeNotFoundError(uuids[0])
131
+ return edges
132
+
133
+ @classmethod
134
+ async def get_by_group_ids(cls, driver: AsyncDriver, group_ids: list[str]):
135
+ records, _, _ = await driver.execute_query(
136
+ """
137
+ MATCH (n:Episodic)-[e:MENTIONS]->(m:Entity)
138
+ WHERE e.group_id IN $group_ids
139
+ RETURN
140
+ e.uuid As uuid,
141
+ e.group_id AS group_id,
142
+ n.uuid AS source_node_uuid,
143
+ m.uuid AS target_node_uuid,
144
+ e.created_at AS created_at
145
+ """,
146
+ group_ids=group_ids,
147
+ )
148
+
149
+ edges = [get_episodic_edge_from_record(record) for record in records]
150
+ uuids = [edge.uuid for edge in edges]
151
+
131
152
  if len(edges) == 0:
132
153
  raise EdgeNotFoundError(uuids[0])
133
154
  return edges
@@ -215,7 +236,6 @@ class EntityEdge(Edge):
215
236
 
216
237
  edges = [get_entity_edge_from_record(record) for record in records]
217
238
 
218
- logger.info(f'Found Edge: {uuid}')
219
239
  if len(edges) == 0:
220
240
  raise EdgeNotFoundError(uuid)
221
241
  return edges[0]
@@ -245,7 +265,36 @@ class EntityEdge(Edge):
245
265
 
246
266
  edges = [get_entity_edge_from_record(record) for record in records]
247
267
 
248
- logger.info(f'Found Edges: {uuids}')
268
+ if len(edges) == 0:
269
+ raise EdgeNotFoundError(uuids[0])
270
+ return edges
271
+
272
+ @classmethod
273
+ async def get_by_group_ids(cls, driver: AsyncDriver, group_ids: list[str]):
274
+ records, _, _ = await driver.execute_query(
275
+ """
276
+ MATCH (n:Entity)-[e:RELATES_TO]->(m:Entity)
277
+ WHERE e.group_id IN $group_ids
278
+ RETURN
279
+ e.uuid AS uuid,
280
+ n.uuid AS source_node_uuid,
281
+ m.uuid AS target_node_uuid,
282
+ e.created_at AS created_at,
283
+ e.name AS name,
284
+ e.group_id AS group_id,
285
+ e.fact AS fact,
286
+ e.fact_embedding AS fact_embedding,
287
+ e.episodes AS episodes,
288
+ e.expired_at AS expired_at,
289
+ e.valid_at AS valid_at,
290
+ e.invalid_at AS invalid_at
291
+ """,
292
+ group_ids=group_ids,
293
+ )
294
+
295
+ edges = [get_entity_edge_from_record(record) for record in records]
296
+ uuids = [edge.uuid for edge in edges]
297
+
249
298
  if len(edges) == 0:
250
299
  raise EdgeNotFoundError(uuids[0])
251
300
  return edges
@@ -288,8 +337,6 @@ class CommunityEdge(Edge):
288
337
 
289
338
  edges = [get_community_edge_from_record(record) for record in records]
290
339
 
291
- logger.info(f'Found Edge: {uuid}')
292
-
293
340
  return edges[0]
294
341
 
295
342
  @classmethod
@@ -310,7 +357,25 @@ class CommunityEdge(Edge):
310
357
 
311
358
  edges = [get_community_edge_from_record(record) for record in records]
312
359
 
313
- logger.info(f'Found Edges: {uuids}')
360
+ return edges
361
+
362
+ @classmethod
363
+ async def get_by_group_ids(cls, driver: AsyncDriver, group_ids: list[str]):
364
+ records, _, _ = await driver.execute_query(
365
+ """
366
+ MATCH (n:Community)-[e:HAS_MEMBER]->(m:Entity | Community)
367
+ WHERE e.group_id IN $group_ids
368
+ RETURN
369
+ e.uuid As uuid,
370
+ e.group_id AS group_id,
371
+ n.uuid AS source_node_uuid,
372
+ m.uuid AS target_node_uuid,
373
+ e.created_at AS created_at
374
+ """,
375
+ group_ids=group_ids,
376
+ )
377
+
378
+ edges = [get_community_edge_from_record(record) for record in records]
314
379
 
315
380
  return edges
316
381
 
graphiti_core/graphiti.py CHANGED
@@ -77,7 +77,14 @@ load_dotenv()
77
77
 
78
78
 
79
79
  class Graphiti:
80
- def __init__(self, uri: str, user: str, password: str, llm_client: LLMClient | None = None):
80
+ def __init__(
81
+ self,
82
+ uri: str,
83
+ user: str,
84
+ password: str,
85
+ llm_client: LLMClient | None = None,
86
+ store_raw_episode_content: bool = True,
87
+ ):
81
88
  """
82
89
  Initialize a Graphiti instance.
83
90
 
@@ -116,12 +123,13 @@ class Graphiti:
116
123
  """
117
124
  self.driver = AsyncGraphDatabase.driver(uri, auth=(user, password))
118
125
  self.database = 'neo4j'
126
+ self.store_raw_episode_content = store_raw_episode_content
119
127
  if llm_client:
120
128
  self.llm_client = llm_client
121
129
  else:
122
130
  self.llm_client = OpenAIClient()
123
131
 
124
- def close(self):
132
+ async def close(self):
125
133
  """
126
134
  Close the connection to the Neo4j database.
127
135
 
@@ -150,8 +158,8 @@ class Graphiti:
150
158
  # Use graphiti...
151
159
  finally:
152
160
  graphiti.close()
153
- self.driver.close()
154
161
  """
162
+ await self.driver.close()
155
163
 
156
164
  async def build_indices_and_constraints(self):
157
165
  """
@@ -189,7 +197,7 @@ class Graphiti:
189
197
  self,
190
198
  reference_time: datetime,
191
199
  last_n: int = EPISODE_WINDOW_LEN,
192
- group_ids: list[str | None] | None = None,
200
+ group_ids: list[str] | None = None,
193
201
  ) -> list[EpisodicNode]:
194
202
  """
195
203
  Retrieve the last n episodic nodes from the graph.
@@ -225,7 +233,7 @@ class Graphiti:
225
233
  source_description: str,
226
234
  reference_time: datetime,
227
235
  source: EpisodeType = EpisodeType.message,
228
- group_id: str | None = None,
236
+ group_id: str = '',
229
237
  uuid: str | None = None,
230
238
  update_communities: bool = False,
231
239
  ):
@@ -251,6 +259,8 @@ class Graphiti:
251
259
  An id for the graph partition the episode is a part of.
252
260
  uuid : str | None
253
261
  Optional uuid of the episode.
262
+ update_communities : bool
263
+ Optional. Whether to update communities with new node information
254
264
 
255
265
  Returns
256
266
  -------
@@ -276,7 +286,6 @@ class Graphiti:
276
286
  try:
277
287
  start = time()
278
288
 
279
- nodes: list[EntityNode] = []
280
289
  entity_edges: list[EntityEdge] = []
281
290
  embedder = self.llm_client.get_embedder()
282
291
  now = datetime.now()
@@ -295,6 +304,8 @@ class Graphiti:
295
304
  valid_at=reference_time,
296
305
  )
297
306
  episode.uuid = uuid if uuid is not None else episode.uuid
307
+ if not self.store_raw_episode_content:
308
+ episode.content = ''
298
309
 
299
310
  # Extract entities as nodes
300
311
 
@@ -323,7 +334,7 @@ class Graphiti:
323
334
  ),
324
335
  )
325
336
  logger.info(f'Adjusted mentioned nodes: {[(n.name, n.uuid) for n in mentioned_nodes]}')
326
- nodes.extend(mentioned_nodes)
337
+ nodes = mentioned_nodes
327
338
 
328
339
  extracted_edges_with_resolved_pointers = resolve_edge_pointers(
329
340
  extracted_edges, uuid_map
@@ -435,7 +446,7 @@ class Graphiti:
435
446
  except Exception as e:
436
447
  raise e
437
448
 
438
- async def add_episode_bulk(self, bulk_episodes: list[RawEpisode], group_id: str | None = None):
449
+ async def add_episode_bulk(self, bulk_episodes: list[RawEpisode], group_id: str = ''):
439
450
  """
440
451
  Process multiple episodes in bulk and update the graph.
441
452
 
@@ -566,9 +577,9 @@ class Graphiti:
566
577
  self,
567
578
  query: str,
568
579
  center_node_uuid: str | None = None,
569
- group_ids: list[str | None] | None = None,
580
+ group_ids: list[str] | None = None,
570
581
  num_results=DEFAULT_SEARCH_LIMIT,
571
- ):
582
+ ) -> list[EntityEdge]:
572
583
  """
573
584
  Perform a hybrid search on the knowledge graph.
574
585
 
@@ -622,7 +633,7 @@ class Graphiti:
622
633
  self,
623
634
  query: str,
624
635
  config: SearchConfig,
625
- group_ids: list[str | None] | None = None,
636
+ group_ids: list[str] | None = None,
626
637
  center_node_uuid: str | None = None,
627
638
  ) -> SearchResults:
628
639
  return await search(
@@ -633,7 +644,7 @@ class Graphiti:
633
644
  self,
634
645
  query: str,
635
646
  center_node_uuid: str | None = None,
636
- group_ids: list[str | None] | None = None,
647
+ group_ids: list[str] | None = None,
637
648
  limit: int = DEFAULT_SEARCH_LIMIT,
638
649
  ) -> list[EntityNode]:
639
650
  """
@@ -30,13 +30,17 @@ from .errors import RateLimitError
30
30
  logger = logging.getLogger(__name__)
31
31
 
32
32
  DEFAULT_MODEL = 'claude-3-5-sonnet-20240620'
33
+ DEFAULT_MAX_TOKENS = 8192
33
34
 
34
35
 
35
36
  class AnthropicClient(LLMClient):
36
37
  def __init__(self, config: LLMConfig | None = None, cache: bool = False):
37
38
  if config is None:
38
- config = LLMConfig()
39
+ config = LLMConfig(max_tokens=DEFAULT_MAX_TOKENS)
40
+ elif config.max_tokens is None:
41
+ config.max_tokens = DEFAULT_MAX_TOKENS
39
42
  super().__init__(config, cache)
43
+
40
44
  self.client = AsyncAnthropic(
41
45
  api_key=config.api_key,
42
46
  # we'll use tenacity to retry
@@ -35,7 +35,7 @@ logger = logging.getLogger(__name__)
35
35
 
36
36
 
37
37
  def is_server_or_retry_error(exception):
38
- if isinstance(exception, RateLimitError):
38
+ if isinstance(exception, (RateLimitError, json.decoder.JSONDecodeError)):
39
39
  return True
40
40
 
41
41
  return (
@@ -15,7 +15,7 @@ limitations under the License.
15
15
  """
16
16
 
17
17
  EMBEDDING_DIM = 1024
18
- DEFAULT_MAX_TOKENS = 4096
18
+ DEFAULT_MAX_TOKENS = 16384
19
19
  DEFAULT_TEMPERATURE = 0
20
20
 
21
21
 
@@ -31,13 +31,17 @@ from .errors import RateLimitError
31
31
  logger = logging.getLogger(__name__)
32
32
 
33
33
  DEFAULT_MODEL = 'llama-3.1-70b-versatile'
34
+ DEFAULT_MAX_TOKENS = 2048
34
35
 
35
36
 
36
37
  class GroqClient(LLMClient):
37
38
  def __init__(self, config: LLMConfig | None = None, cache: bool = False):
38
39
  if config is None:
39
- config = LLMConfig()
40
+ config = LLMConfig(max_tokens=DEFAULT_MAX_TOKENS)
41
+ elif config.max_tokens is None:
42
+ config.max_tokens = DEFAULT_MAX_TOKENS
40
43
  super().__init__(config, cache)
44
+
41
45
  self.client = AsyncGroq(api_key=config.api_key)
42
46
 
43
47
  def get_embedder(self) -> typing.Any:
@@ -29,17 +29,54 @@ from .errors import RateLimitError
29
29
 
30
30
  logger = logging.getLogger(__name__)
31
31
 
32
- DEFAULT_MODEL = 'gpt-4o-2024-08-06'
32
+ DEFAULT_MODEL = 'gpt-4o-mini'
33
33
 
34
34
 
35
35
  class OpenAIClient(LLMClient):
36
- def __init__(self, config: LLMConfig | None = None, cache: bool = False):
36
+ """
37
+ OpenAIClient is a client class for interacting with OpenAI's language models.
38
+
39
+ This class extends the LLMClient and provides methods to initialize the client,
40
+ get an embedder, and generate responses from the language model.
41
+
42
+ Attributes:
43
+ client (AsyncOpenAI): The OpenAI client used to interact with the API.
44
+ model (str): The model name to use for generating responses.
45
+ temperature (float): The temperature to use for generating responses.
46
+ max_tokens (int): The maximum number of tokens to generate in a response.
47
+
48
+ Methods:
49
+ __init__(config: LLMConfig | None = None, cache: bool = False, client: typing.Any = None):
50
+ Initializes the OpenAIClient with the provided configuration, cache setting, and client.
51
+
52
+ get_embedder() -> typing.Any:
53
+ Returns the embedder from the OpenAI client.
54
+
55
+ _generate_response(messages: list[Message]) -> dict[str, typing.Any]:
56
+ Generates a response from the language model based on the provided messages.
57
+ """
58
+
59
+ def __init__(
60
+ self, config: LLMConfig | None = None, cache: bool = False, client: typing.Any = None
61
+ ):
62
+ """
63
+ Initialize the OpenAIClient with the provided configuration, cache setting, and client.
64
+
65
+ Args:
66
+ config (LLMConfig | None): The configuration for the LLM client, including API key, model, base URL, temperature, and max tokens.
67
+ cache (bool): Whether to use caching for responses. Defaults to False.
68
+ client (Any | None): An optional async client instance to use. If not provided, a new AsyncOpenAI client is created.
69
+
70
+ """
37
71
  if config is None:
38
72
  config = LLMConfig()
39
73
 
40
74
  super().__init__(config, cache)
41
75
 
42
- self.client = AsyncOpenAI(api_key=config.api_key, base_url=config.base_url)
76
+ if client is None:
77
+ self.client = AsyncOpenAI(api_key=config.api_key, base_url=config.base_url)
78
+ else:
79
+ self.client = client
43
80
 
44
81
  def get_embedder(self) -> typing.Any:
45
82
  return self.client.embeddings
graphiti_core/nodes.py CHANGED
@@ -70,7 +70,7 @@ class EpisodeType(Enum):
70
70
  class Node(BaseModel, ABC):
71
71
  uuid: str = Field(default_factory=lambda: str(uuid4()))
72
72
  name: str = Field(description='name of the node')
73
- group_id: str | None = Field(description='partition of the graph')
73
+ group_id: str = Field(description='partition of the graph')
74
74
  labels: list[str] = Field(default_factory=list)
75
75
  created_at: datetime = Field(default_factory=lambda: datetime.now())
76
76
 
@@ -158,8 +158,6 @@ class EpisodicNode(Node):
158
158
 
159
159
  episodes = [get_episodic_node_from_record(record) for record in records]
160
160
 
161
- logger.info(f'Found Node: {uuid}')
162
-
163
161
  if len(episodes) == 0:
164
162
  raise NodeNotFoundError(uuid)
165
163
 
@@ -185,7 +183,27 @@ class EpisodicNode(Node):
185
183
 
186
184
  episodes = [get_episodic_node_from_record(record) for record in records]
187
185
 
188
- logger.info(f'Found Nodes: {uuids}')
186
+ return episodes
187
+
188
+ @classmethod
189
+ async def get_by_group_ids(cls, driver: AsyncDriver, group_ids: list[str]):
190
+ records, _, _ = await driver.execute_query(
191
+ """
192
+ MATCH (e:Episodic) WHERE e.group_id IN $group_ids
193
+ RETURN DISTINCT
194
+ e.content AS content,
195
+ e.created_at AS created_at,
196
+ e.valid_at AS valid_at,
197
+ e.uuid AS uuid,
198
+ e.name AS name,
199
+ e.group_id AS group_id,
200
+ e.source_description AS source_description,
201
+ e.source AS source
202
+ """,
203
+ group_ids=group_ids,
204
+ )
205
+
206
+ episodes = [get_episodic_node_from_record(record) for record in records]
189
207
 
190
208
  return episodes
191
209
 
@@ -240,8 +258,6 @@ class EntityNode(Node):
240
258
 
241
259
  nodes = [get_entity_node_from_record(record) for record in records]
242
260
 
243
- logger.info(f'Found Node: {uuid}')
244
-
245
261
  return nodes[0]
246
262
 
247
263
  @classmethod
@@ -262,7 +278,25 @@ class EntityNode(Node):
262
278
 
263
279
  nodes = [get_entity_node_from_record(record) for record in records]
264
280
 
265
- logger.info(f'Found Nodes: {uuids}')
281
+ return nodes
282
+
283
+ @classmethod
284
+ async def get_by_group_ids(cls, driver: AsyncDriver, group_ids: list[str]):
285
+ records, _, _ = await driver.execute_query(
286
+ """
287
+ MATCH (n:Entity) WHERE n.group_id IN $group_ids
288
+ RETURN
289
+ n.uuid As uuid,
290
+ n.name AS name,
291
+ n.name_embedding AS name_embedding,
292
+ n.group_id AS group_id,
293
+ n.created_at AS created_at,
294
+ n.summary AS summary
295
+ """,
296
+ group_ids=group_ids,
297
+ )
298
+
299
+ nodes = [get_entity_node_from_record(record) for record in records]
266
300
 
267
301
  return nodes
268
302
 
@@ -317,8 +351,6 @@ class CommunityNode(Node):
317
351
 
318
352
  nodes = [get_community_node_from_record(record) for record in records]
319
353
 
320
- logger.info(f'Found Node: {uuid}')
321
-
322
354
  return nodes[0]
323
355
 
324
356
  @classmethod
@@ -337,11 +369,29 @@ class CommunityNode(Node):
337
369
  uuids=uuids,
338
370
  )
339
371
 
340
- nodes = [get_community_node_from_record(record) for record in records]
372
+ communities = [get_community_node_from_record(record) for record in records]
341
373
 
342
- logger.info(f'Found Nodes: {uuids}')
374
+ return communities
343
375
 
344
- return nodes
376
+ @classmethod
377
+ async def get_by_group_ids(cls, driver: AsyncDriver, group_ids: list[str]):
378
+ records, _, _ = await driver.execute_query(
379
+ """
380
+ MATCH (n:Community) WHERE n.group_id IN $group_ids
381
+ RETURN
382
+ n.uuid As uuid,
383
+ n.name AS name,
384
+ n.name_embedding AS name_embedding,
385
+ n.group_id AS group_id,
386
+ n.created_at AS created_at,
387
+ n.summary AS summary
388
+ """,
389
+ group_ids=group_ids,
390
+ )
391
+
392
+ communities = [get_community_node_from_record(record) for record in records]
393
+
394
+ return communities
345
395
 
346
396
 
347
397
  # Node helpers
@@ -15,6 +15,7 @@ limitations under the License.
15
15
  """
16
16
 
17
17
  import logging
18
+ from collections import defaultdict
18
19
  from time import time
19
20
 
20
21
  from neo4j import AsyncDriver
@@ -56,7 +57,7 @@ async def search(
56
57
  driver: AsyncDriver,
57
58
  embedder,
58
59
  query: str,
59
- group_ids: list[str | None] | None,
60
+ group_ids: list[str] | None,
60
61
  config: SearchConfig,
61
62
  center_node_uuid: str | None = None,
62
63
  ) -> SearchResults:
@@ -103,7 +104,7 @@ async def edge_search(
103
104
  driver: AsyncDriver,
104
105
  embedder,
105
106
  query: str,
106
- group_ids: list[str | None] | None,
107
+ group_ids: list[str] | None,
107
108
  config: EdgeSearchConfig,
108
109
  center_node_uuid: str | None = None,
109
110
  limit=DEFAULT_SEARCH_LIMIT,
@@ -140,14 +141,21 @@ async def edge_search(
140
141
  if center_node_uuid is None:
141
142
  raise SearchRerankerError('No center node provided for Node Distance reranker')
142
143
 
143
- source_to_edge_uuid_map = {
144
- edge.source_node_uuid: edge.uuid for result in search_results for edge in result
145
- }
146
- source_uuids = [[edge.source_node_uuid for edge in result] for result in search_results]
144
+ # use rrf as a preliminary sort
145
+ sorted_result_uuids = rrf([[edge.uuid for edge in result] for result in search_results])
146
+ sorted_results = [edge_uuid_map[uuid] for uuid in sorted_result_uuids]
147
+
148
+ # node distance reranking
149
+ source_to_edge_uuid_map = defaultdict(list)
150
+ for edge in sorted_results:
151
+ source_to_edge_uuid_map[edge.source_node_uuid].append(edge.uuid)
152
+
153
+ source_uuids = [edge.source_node_uuid for edge in sorted_results]
147
154
 
148
155
  reranked_node_uuids = await node_distance_reranker(driver, source_uuids, center_node_uuid)
149
156
 
150
- reranked_uuids = [source_to_edge_uuid_map[node_uuid] for node_uuid in reranked_node_uuids]
157
+ for node_uuid in reranked_node_uuids:
158
+ reranked_uuids.extend(source_to_edge_uuid_map[node_uuid])
151
159
 
152
160
  reranked_edges = [edge_uuid_map[uuid] for uuid in reranked_uuids]
153
161
 
@@ -161,7 +169,7 @@ async def node_search(
161
169
  driver: AsyncDriver,
162
170
  embedder,
163
171
  query: str,
164
- group_ids: list[str | None] | None,
172
+ group_ids: list[str] | None,
165
173
  config: NodeSearchConfig,
166
174
  center_node_uuid: str | None = None,
167
175
  limit=DEFAULT_SEARCH_LIMIT,
@@ -198,7 +206,9 @@ async def node_search(
198
206
  elif config.reranker == NodeReranker.node_distance:
199
207
  if center_node_uuid is None:
200
208
  raise SearchRerankerError('No center node provided for Node Distance reranker')
201
- reranked_uuids = await node_distance_reranker(driver, search_result_uuids, center_node_uuid)
209
+ reranked_uuids = await node_distance_reranker(
210
+ driver, rrf(search_result_uuids), center_node_uuid
211
+ )
202
212
 
203
213
  reranked_nodes = [node_uuid_map[uuid] for uuid in reranked_uuids]
204
214
 
@@ -209,7 +219,7 @@ async def community_search(
209
219
  driver: AsyncDriver,
210
220
  embedder,
211
221
  query: str,
212
- group_ids: list[str | None] | None,
222
+ group_ids: list[str] | None,
213
223
  config: CommunitySearchConfig,
214
224
  limit=DEFAULT_SEARCH_LIMIT,
215
225
  ) -> list[CommunityNode]:
@@ -87,7 +87,7 @@ async def edge_fulltext_search(
87
87
  query: str,
88
88
  source_node_uuid: str | None,
89
89
  target_node_uuid: str | None,
90
- group_ids: list[str | None] | None = None,
90
+ group_ids: list[str] | None = None,
91
91
  limit=RELEVANT_SCHEMA_LIMIT,
92
92
  ) -> list[EntityEdge]:
93
93
  # fulltext search over facts
@@ -95,10 +95,7 @@ async def edge_fulltext_search(
95
95
  CALL db.index.fulltext.queryRelationships("name_and_fact", $query)
96
96
  YIELD relationship AS rel, score
97
97
  MATCH (n:Entity {uuid: $source_uuid})-[r {uuid: rel.uuid}]-(m:Entity {uuid: $target_uuid})
98
- WHERE CASE
99
- WHEN $group_ids IS NULL THEN n.group_id IS NULL
100
- ELSE n.group_id IN $group_ids
101
- END
98
+ WHERE $group_ids IS NULL OR n.group_id IN $group_ids
102
99
  RETURN
103
100
  r.uuid AS uuid,
104
101
  r.group_id AS group_id,
@@ -120,10 +117,7 @@ async def edge_fulltext_search(
120
117
  CALL db.index.fulltext.queryRelationships("name_and_fact", $query)
121
118
  YIELD relationship AS rel, score
122
119
  MATCH (n:Entity)-[r {uuid: rel.uuid}]-(m:Entity)
123
- WHERE CASE
124
- WHEN $group_ids IS NULL THEN r.group_id IS NULL
125
- ELSE r.group_id IN $group_ids
126
- END
120
+ WHERE $group_ids IS NULL OR r.group_id IN $group_ids
127
121
  RETURN
128
122
  r.uuid AS uuid,
129
123
  r.group_id AS group_id,
@@ -144,10 +138,7 @@ async def edge_fulltext_search(
144
138
  CALL db.index.fulltext.queryRelationships("name_and_fact", $query)
145
139
  YIELD relationship AS rel, score
146
140
  MATCH (n:Entity)-[r {uuid: rel.uuid}]-(m:Entity {uuid: $target_uuid})
147
- WHERE CASE
148
- WHEN $group_ids IS NULL THEN r.group_id IS NULL
149
- ELSE r.group_id IN $group_ids
150
- END
141
+ WHERE $group_ids IS NULL OR r.group_id IN $group_ids
151
142
  RETURN
152
143
  r.uuid AS uuid,
153
144
  r.group_id AS group_id,
@@ -168,10 +159,7 @@ async def edge_fulltext_search(
168
159
  CALL db.index.fulltext.queryRelationships("name_and_fact", $query)
169
160
  YIELD relationship AS rel, score
170
161
  MATCH (n:Entity {uuid: $source_uuid})-[r {uuid: rel.uuid}]-(m:Entity)
171
- WHERE CASE
172
- WHEN $group_ids IS NULL THEN r.group_id IS NULL
173
- ELSE r.group_id IN $group_ids
174
- END
162
+ WHERE $group_ids IS NULL OR r.group_id IN $group_ids
175
163
  RETURN
176
164
  r.uuid AS uuid,
177
165
  r.group_id AS group_id,
@@ -209,7 +197,7 @@ async def edge_similarity_search(
209
197
  search_vector: list[float],
210
198
  source_node_uuid: str | None,
211
199
  target_node_uuid: str | None,
212
- group_ids: list[str | None] | None = None,
200
+ group_ids: list[str] | None = None,
213
201
  limit: int = RELEVANT_SCHEMA_LIMIT,
214
202
  ) -> list[EntityEdge]:
215
203
  # vector similarity search over embedded facts
@@ -217,10 +205,7 @@ async def edge_similarity_search(
217
205
  CALL db.index.vector.queryRelationships("fact_embedding", $limit, $search_vector)
218
206
  YIELD relationship AS rel, score
219
207
  MATCH (n:Entity {uuid: $source_uuid})-[r {uuid: rel.uuid}]-(m:Entity {uuid: $target_uuid})
220
- WHERE CASE
221
- WHEN $group_ids IS NULL THEN r.group_id IS NULL
222
- ELSE r.group_id IN $group_ids
223
- END
208
+ WHERE $group_ids IS NULL OR r.group_id IN $group_ids
224
209
  RETURN
225
210
  r.uuid AS uuid,
226
211
  r.group_id AS group_id,
@@ -242,10 +227,7 @@ async def edge_similarity_search(
242
227
  CALL db.index.vector.queryRelationships("fact_embedding", $limit, $search_vector)
243
228
  YIELD relationship AS rel, score
244
229
  MATCH (n:Entity)-[r {uuid: rel.uuid}]-(m:Entity)
245
- WHERE CASE
246
- WHEN $group_ids IS NULL THEN r.group_id IS NULL
247
- ELSE r.group_id IN $group_ids
248
- END
230
+ WHERE $group_ids IS NULL OR r.group_id IN $group_ids
249
231
  RETURN
250
232
  r.uuid AS uuid,
251
233
  r.group_id AS group_id,
@@ -266,10 +248,7 @@ async def edge_similarity_search(
266
248
  CALL db.index.vector.queryRelationships("fact_embedding", $limit, $search_vector)
267
249
  YIELD relationship AS rel, score
268
250
  MATCH (n:Entity)-[r {uuid: rel.uuid}]-(m:Entity {uuid: $target_uuid})
269
- WHERE CASE
270
- WHEN $group_ids IS NULL THEN r.group_id IS NULL
271
- ELSE r.group_id IN $group_ids
272
- END
251
+ WHERE $group_ids IS NULL OR r.group_id IN $group_ids
273
252
  RETURN
274
253
  r.uuid AS uuid,
275
254
  r.group_id AS group_id,
@@ -290,10 +269,7 @@ async def edge_similarity_search(
290
269
  CALL db.index.vector.queryRelationships("fact_embedding", $limit, $search_vector)
291
270
  YIELD relationship AS rel, score
292
271
  MATCH (n:Entity {uuid: $source_uuid})-[r {uuid: rel.uuid}]-(m:Entity)
293
- WHERE CASE
294
- WHEN $group_ids IS NULL THEN r.group_id IS NULL
295
- ELSE r.group_id IN $group_ids
296
- END
272
+ WHERE $group_ids IS NULL OR r.group_id IN $group_ids
297
273
  RETURN
298
274
  r.uuid AS uuid,
299
275
  r.group_id AS group_id,
@@ -327,7 +303,7 @@ async def edge_similarity_search(
327
303
  async def node_fulltext_search(
328
304
  driver: AsyncDriver,
329
305
  query: str,
330
- group_ids: list[str | None] | None = None,
306
+ group_ids: list[str] | None = None,
331
307
  limit=RELEVANT_SCHEMA_LIMIT,
332
308
  ) -> list[EntityNode]:
333
309
  # BM25 search to get top nodes
@@ -336,10 +312,7 @@ async def node_fulltext_search(
336
312
  """
337
313
  CALL db.index.fulltext.queryNodes("name_and_summary", $query)
338
314
  YIELD node AS n, score
339
- WHERE CASE
340
- WHEN $group_ids IS NULL THEN n.group_id IS NULL
341
- ELSE n.group_id IN $group_ids
342
- END
315
+ WHERE $group_ids IS NULL OR n.group_id IN $group_ids
343
316
  RETURN
344
317
  n.uuid AS uuid,
345
318
  n.group_id AS group_id,
@@ -362,17 +335,16 @@ async def node_fulltext_search(
362
335
  async def node_similarity_search(
363
336
  driver: AsyncDriver,
364
337
  search_vector: list[float],
365
- group_ids: list[str | None] | None = None,
338
+ group_ids: list[str] | None = None,
366
339
  limit=RELEVANT_SCHEMA_LIMIT,
367
340
  ) -> list[EntityNode]:
368
- group_ids = group_ids if group_ids is not None else [None]
369
-
370
341
  # vector similarity search over entity names
371
342
  records, _, _ = await driver.execute_query(
372
343
  """
373
344
  CALL db.index.vector.queryNodes("name_embedding", $limit, $search_vector)
374
345
  YIELD node AS n, score
375
- MATCH (n WHERE n.group_id IN $group_ids)
346
+ MATCH (n:Entity)
347
+ WHERE $group_ids IS NULL OR n.group_id IN $group_ids
376
348
  RETURN
377
349
  n.uuid As uuid,
378
350
  n.group_id AS group_id,
@@ -394,18 +366,17 @@ async def node_similarity_search(
394
366
  async def community_fulltext_search(
395
367
  driver: AsyncDriver,
396
368
  query: str,
397
- group_ids: list[str | None] | None = None,
369
+ group_ids: list[str] | None = None,
398
370
  limit=RELEVANT_SCHEMA_LIMIT,
399
371
  ) -> list[CommunityNode]:
400
- group_ids = group_ids if group_ids is not None else [None]
401
-
402
372
  # BM25 search to get top communities
403
373
  fuzzy_query = re.sub(r'[^\w\s]', '', query) + '~'
404
374
  records, _, _ = await driver.execute_query(
405
375
  """
406
376
  CALL db.index.fulltext.queryNodes("community_name", $query)
407
377
  YIELD node AS comm, score
408
- MATCH (comm WHERE comm.group_id in $group_ids)
378
+ MATCH (comm:Community)
379
+ WHERE $group_ids IS NULL OR comm.group_id in $group_ids
409
380
  RETURN
410
381
  comm.uuid AS uuid,
411
382
  comm.group_id AS group_id,
@@ -428,17 +399,16 @@ async def community_fulltext_search(
428
399
  async def community_similarity_search(
429
400
  driver: AsyncDriver,
430
401
  search_vector: list[float],
431
- group_ids: list[str | None] | None = None,
402
+ group_ids: list[str] | None = None,
432
403
  limit=RELEVANT_SCHEMA_LIMIT,
433
404
  ) -> list[CommunityNode]:
434
- group_ids = group_ids if group_ids is not None else [None]
435
-
436
405
  # vector similarity search over entity names
437
406
  records, _, _ = await driver.execute_query(
438
407
  """
439
408
  CALL db.index.vector.queryNodes("community_name_embedding", $limit, $search_vector)
440
409
  YIELD node AS comm, score
441
- MATCH (comm WHERE comm.group_id IN $group_ids)
410
+ MATCH (comm:Community)
411
+ WHERE $group_ids IS NULL OR comm.group_id IN $group_ids
442
412
  RETURN
443
413
  comm.uuid As uuid,
444
414
  comm.group_id AS group_id,
@@ -461,7 +431,7 @@ async def hybrid_node_search(
461
431
  queries: list[str],
462
432
  embeddings: list[list[float]],
463
433
  driver: AsyncDriver,
464
- group_ids: list[str | None] | None = None,
434
+ group_ids: list[str] | None = None,
465
435
  limit: int = RELEVANT_SCHEMA_LIMIT,
466
436
  ) -> list[EntityNode]:
467
437
  """
@@ -503,7 +473,6 @@ async def hybrid_node_search(
503
473
  """
504
474
 
505
475
  start = time()
506
-
507
476
  results: list[list[EntityNode]] = list(
508
477
  await asyncio.gather(
509
478
  *[node_fulltext_search(driver, q, group_ids, 2 * limit) for q in queries],
@@ -625,14 +594,14 @@ def rrf(results: list[list[str]], rank_const=1) -> list[str]:
625
594
 
626
595
 
627
596
  async def node_distance_reranker(
628
- driver: AsyncDriver, node_uuids: list[list[str]], center_node_uuid: str
597
+ driver: AsyncDriver, node_uuids: list[str], center_node_uuid: str
629
598
  ) -> list[str]:
630
- # use rrf as a preliminary ranker
631
- sorted_uuids = rrf(node_uuids)
599
+ # filter out node_uuid center node node uuid
600
+ filtered_uuids = list(filter(lambda uuid: uuid != center_node_uuid, node_uuids))
632
601
  scores: dict[str, float] = {}
633
602
 
634
603
  # Find the shortest path to center node
635
- query = Query("""
604
+ query = Query("""
636
605
  MATCH p = SHORTEST 1 (center:Entity {uuid: $center_uuid})-[:RELATES_TO]-+(n:Entity {uuid: $node_uuid})
637
606
  RETURN length(p) AS score
638
607
  """)
@@ -644,21 +613,23 @@ async def node_distance_reranker(
644
613
  node_uuid=uuid,
645
614
  center_uuid=center_node_uuid,
646
615
  )
647
- for uuid in sorted_uuids
616
+ for uuid in filtered_uuids
648
617
  ]
649
618
  )
650
619
 
651
- for uuid, result in zip(sorted_uuids, path_results):
620
+ for uuid, result in zip(filtered_uuids, path_results):
652
621
  records = result[0]
653
622
  record = records[0] if len(records) > 0 else None
654
623
  distance: float = record['score'] if record is not None else float('inf')
655
- distance = 0 if uuid == center_node_uuid else distance
656
624
  scores[uuid] = distance
657
625
 
658
626
  # rerank on shortest distance
659
- sorted_uuids.sort(key=lambda cur_uuid: scores[cur_uuid])
627
+ filtered_uuids.sort(key=lambda cur_uuid: scores[cur_uuid])
660
628
 
661
- return sorted_uuids
629
+ # add back in filtered center uuids
630
+ filtered_uuids = [center_node_uuid] + filtered_uuids
631
+
632
+ return filtered_uuids
662
633
 
663
634
 
664
635
  async def episode_mentions_reranker(driver: AsyncDriver, node_uuids: list[list[str]]) -> list[str]:
@@ -4,6 +4,7 @@ from collections import defaultdict
4
4
  from datetime import datetime
5
5
 
6
6
  from neo4j import AsyncDriver
7
+ from pydantic import BaseModel
7
8
 
8
9
  from graphiti_core.edges import CommunityEdge
9
10
  from graphiti_core.llm_client import LLMClient
@@ -11,9 +12,17 @@ from graphiti_core.nodes import CommunityNode, EntityNode, get_community_node_fr
11
12
  from graphiti_core.prompts import prompt_library
12
13
  from graphiti_core.utils.maintenance.edge_operations import build_community_edges
13
14
 
15
+ MAX_COMMUNITY_BUILD_CONCURRENCY = 10
16
+
17
+
14
18
  logger = logging.getLogger(__name__)
15
19
 
16
20
 
21
+ class Neighbor(BaseModel):
22
+ node_uuid: str
23
+ edge_count: int
24
+
25
+
17
26
  async def build_community_projection(driver: AsyncDriver) -> str:
18
27
  records, _, _ = await driver.execute_query("""
19
28
  CALL gds.graph.project("communities", "Entity",
@@ -29,36 +38,96 @@ async def build_community_projection(driver: AsyncDriver) -> str:
29
38
  return records[0]['graph']
30
39
 
31
40
 
32
- async def destroy_projection(driver: AsyncDriver, projection_name: str):
33
- await driver.execute_query(
34
- """
35
- CALL gds.graph.drop($projection_name)
36
- """,
37
- projection_name=projection_name,
38
- )
41
+ async def get_community_clusters(driver: AsyncDriver) -> list[list[EntityNode]]:
42
+ community_clusters: list[list[EntityNode]] = []
39
43
 
40
-
41
- async def get_community_clusters(
42
- driver: AsyncDriver, projection_name: str
43
- ) -> list[list[EntityNode]]:
44
- records, _, _ = await driver.execute_query("""
45
- CALL gds.leiden.stream("communities")
46
- YIELD nodeId, communityId
47
- RETURN gds.util.asNode(nodeId).uuid AS entity_uuid, communityId
44
+ group_id_values, _, _ = await driver.execute_query("""
45
+ MATCH (n:Entity WHERE n.group_id IS NOT NULL)
46
+ RETURN
47
+ collect(DISTINCT n.group_id) AS group_ids
48
48
  """)
49
- community_map: dict[int, list[str]] = defaultdict(list)
50
- for record in records:
51
- community_map[record['communityId']].append(record['entity_uuid'])
52
49
 
53
- community_clusters: list[list[EntityNode]] = list(
54
- await asyncio.gather(
55
- *[EntityNode.get_by_uuids(driver, cluster) for cluster in community_map.values()]
50
+ group_ids = group_id_values[0]['group_ids']
51
+ for group_id in group_ids:
52
+ projection: dict[str, list[Neighbor]] = {}
53
+ nodes = await EntityNode.get_by_group_ids(driver, [group_id])
54
+ for node in nodes:
55
+ records, _, _ = await driver.execute_query(
56
+ """
57
+ MATCH (n:Entity {group_id: $group_id, uuid: $uuid})-[r:RELATES_TO]-(m: Entity {group_id: $group_id})
58
+ WITH count(r) AS count, m.uuid AS uuid
59
+ RETURN
60
+ uuid,
61
+ count
62
+ """,
63
+ uuid=node.uuid,
64
+ group_id=group_id,
65
+ )
66
+
67
+ projection[node.uuid] = [
68
+ Neighbor(node_uuid=record['uuid'], edge_count=record['count']) for record in records
69
+ ]
70
+
71
+ cluster_uuids = label_propagation(projection)
72
+
73
+ community_clusters.extend(
74
+ list(
75
+ await asyncio.gather(
76
+ *[EntityNode.get_by_uuids(driver, cluster) for cluster in cluster_uuids]
77
+ )
78
+ )
56
79
  )
57
- )
58
80
 
59
81
  return community_clusters
60
82
 
61
83
 
84
+ def label_propagation(projection: dict[str, list[Neighbor]]) -> list[list[str]]:
85
+ # Implement the label propagation community detection algorithm.
86
+ # 1. Start with each node being assigned its own community
87
+ # 2. Each node will take on the community of the plurality of its neighbors
88
+ # 3. Ties are broken by going to the largest community
89
+ # 4. Continue until no communities change during propagation
90
+
91
+ community_map = {uuid: i for i, uuid in enumerate(projection.keys())}
92
+
93
+ while True:
94
+ no_change = True
95
+ new_community_map: dict[str, int] = {}
96
+
97
+ for uuid, neighbors in projection.items():
98
+ curr_community = community_map[uuid]
99
+
100
+ community_candidates: dict[int, int] = defaultdict(int)
101
+ for neighbor in neighbors:
102
+ community_candidates[community_map[neighbor.node_uuid]] += neighbor.edge_count
103
+
104
+ community_lst = [
105
+ (count, community) for community, count in community_candidates.items()
106
+ ]
107
+
108
+ community_lst.sort(reverse=True)
109
+ community_candidate = community_lst[0][1] if len(community_lst) > 0 else -1
110
+
111
+ new_community = max(community_candidate, curr_community)
112
+
113
+ new_community_map[uuid] = new_community
114
+
115
+ if new_community != curr_community:
116
+ no_change = False
117
+
118
+ if no_change:
119
+ break
120
+
121
+ community_map = new_community_map
122
+
123
+ community_cluster_map = defaultdict(list)
124
+ for uuid, community in community_map.items():
125
+ community_cluster_map[community].append(uuid)
126
+
127
+ clusters = [cluster for cluster in community_cluster_map.values()]
128
+ return clusters
129
+
130
+
62
131
  async def summarize_pair(llm_client: LLMClient, summary_pair: tuple[str, str]) -> str:
63
132
  # Prepare context for LLM
64
133
  context = {'node_summaries': [{'summary': summary} for summary in summary_pair]}
@@ -129,13 +198,16 @@ async def build_community(
129
198
  async def build_communities(
130
199
  driver: AsyncDriver, llm_client: LLMClient
131
200
  ) -> tuple[list[CommunityNode], list[CommunityEdge]]:
132
- projection = await build_community_projection(driver)
133
- community_clusters = await get_community_clusters(driver, projection)
201
+ community_clusters = await get_community_clusters(driver)
202
+
203
+ semaphore = asyncio.Semaphore(MAX_COMMUNITY_BUILD_CONCURRENCY)
204
+
205
+ async def limited_build_community(cluster):
206
+ async with semaphore:
207
+ return await build_community(llm_client, cluster)
134
208
 
135
209
  communities: list[tuple[CommunityNode, list[CommunityEdge]]] = list(
136
- await asyncio.gather(
137
- *[build_community(llm_client, cluster) for cluster in community_clusters]
138
- )
210
+ await asyncio.gather(*[limited_build_community(cluster) for cluster in community_clusters])
139
211
  )
140
212
 
141
213
  community_nodes: list[CommunityNode] = []
@@ -144,7 +216,6 @@ async def build_communities(
144
216
  community_nodes.append(community[0])
145
217
  community_edges.extend(community[1])
146
218
 
147
- await destroy_projection(driver, projection)
148
219
  return community_nodes, community_edges
149
220
 
150
221
 
@@ -73,7 +73,7 @@ async def extract_edges(
73
73
  episode: EpisodicNode,
74
74
  nodes: list[EntityNode],
75
75
  previous_episodes: list[EpisodicNode],
76
- group_id: str | None,
76
+ group_id: str = '',
77
77
  ) -> list[EntityEdge]:
78
78
  start = time()
79
79
 
@@ -101,7 +101,7 @@ async def retrieve_episodes(
101
101
  driver: AsyncDriver,
102
102
  reference_time: datetime,
103
103
  last_n: int = EPISODE_WINDOW_LEN,
104
- group_ids: list[str | None] | None = None,
104
+ group_ids: list[str] | None = None,
105
105
  ) -> list[EpisodicNode]:
106
106
  """
107
107
  Retrieve the last n episodic nodes from the graph.
@@ -119,7 +119,8 @@ async def retrieve_episodes(
119
119
  """
120
120
  result = await driver.execute_query(
121
121
  """
122
- MATCH (e:Episodic) WHERE e.valid_at <= $reference_time AND e.group_id in $group_ids
122
+ MATCH (e:Episodic) WHERE e.valid_at <= $reference_time
123
+ AND ($group_ids IS NULL) OR e.group_id in $group_ids
123
124
  RETURN e.content AS content,
124
125
  e.created_at AS created_at,
125
126
  e.valid_at AS valid_at,
@@ -272,9 +272,12 @@ async def dedupe_node_list(
272
272
  unique_nodes = []
273
273
  uuid_map: dict[str, str] = {}
274
274
  for node_data in nodes_data:
275
- node = node_map[node_data['uuids'][0]]
276
- node.summary = node_data['summary']
277
- unique_nodes.append(node)
275
+ node_instance: EntityNode | None = node_map.get(node_data['uuids'][0])
276
+ if node_instance is None:
277
+ logger.warning(f'Node {node_data["uuids"][0]} not found in node map')
278
+ continue
279
+ node_instance.summary = node_data['summary']
280
+ unique_nodes.append(node_instance)
278
281
 
279
282
  for uuid in node_data['uuids'][1:]:
280
283
  uuid_value = node_map[node_data['uuids'][0]].uuid
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: graphiti-core
3
- Version: 0.3.3
3
+ Version: 0.3.5
4
4
  Summary: A temporal graph building library
5
5
  License: Apache-2.0
6
6
  Author: Paul Paliychuk
@@ -21,7 +21,7 @@ Description-Content-Type: text/markdown
21
21
 
22
22
  <div align="center">
23
23
 
24
- # Graphiti
24
+ <img width="350" alt="Graphiti-ts-small" src="https://github.com/user-attachments/assets/bbd02947-e435-4a05-b25a-bbbac36d52c8">
25
25
 
26
26
  ## Temporal Knowledge Graphs for Agentic Applications
27
27
 
@@ -37,7 +37,9 @@ Description-Content-Type: text/markdown
37
37
 
38
38
  </div>
39
39
 
40
- Graphiti builds dynamic, temporally aware Knowledge Graphs that represent complex, evolving relationships between entities over time. Graphiti ingests both unstructured and structured data, and the resulting graph may be queried using a fusion of time, full-text, semantic, and graph algorithm approaches.
40
+ Graphiti builds dynamic, temporally aware Knowledge Graphs that represent complex, evolving relationships between
41
+ entities over time. Graphiti ingests both unstructured and structured data, and the resulting graph may be queried using
42
+ a fusion of time, full-text, semantic, and graph algorithm approaches.
41
43
 
42
44
  <br />
43
45
 
@@ -47,25 +49,39 @@ Graphiti builds dynamic, temporally aware Knowledge Graphs that represent comple
47
49
 
48
50
  <br />
49
51
 
50
- Graphiti helps you create and query Knowledge Graphs that evolve over time. A knowledge graph is a network of interconnected facts, such as _“Kendra loves Adidas shoes.”_ Each fact is a “triplet” represented by two entities, or nodes (_”Kendra”_, _“Adidas shoes”_), and their relationship, or edge (_”loves”_). Knowledge Graphs have been explored extensively for information retrieval. What makes Graphiti unique is its ability to autonomously build a knowledge graph while handling changing relationships and maintaining historical context.
52
+ Graphiti helps you create and query Knowledge Graphs that evolve over time. A knowledge graph is a network of
53
+ interconnected facts, such as _“Kendra loves Adidas shoes.”_ Each fact is a “triplet” represented by two entities, or
54
+ nodes (_”Kendra”_, _“Adidas shoes”_), and their relationship, or edge (_”loves”_). Knowledge Graphs have been explored
55
+ extensively for information retrieval. What makes Graphiti unique is its ability to autonomously build a knowledge graph
56
+ while handling changing relationships and maintaining historical context.
51
57
 
52
58
  With Graphiti, you can build LLM applications such as:
53
59
 
54
- - Assistants that learn from user interactions, fusing personal knowledge with dynamic data from business systems like CRMs and billing platforms.
60
+ - Assistants that learn from user interactions, fusing personal knowledge with dynamic data from business systems like
61
+ CRMs and billing platforms.
55
62
  - Agents that autonomously execute complex tasks, reasoning with state changes from multiple dynamic sources.
56
63
 
57
- Graphiti supports a wide range of applications in sales, customer service, health, finance, and more, enabling long-term recall and state-based reasoning for both assistants and agents.
64
+ Graphiti supports a wide range of applications in sales, customer service, health, finance, and more, enabling long-term
65
+ recall and state-based reasoning for both assistants and agents.
58
66
 
59
67
  ## Why Graphiti?
60
68
 
61
- We were intrigued by Microsoft’s GraphRAG, which expanded on RAG text chunking by using a graph to better model a document corpus and making this representation available via semantic and graph search techniques. However, GraphRAG did not address our core problem: It's primarily designed for static documents and doesn't inherently handle temporal aspects of data.
62
-
63
- Graphiti is designed from the ground up to handle constantly changing information, hybrid semantic and graph search, and scale:
64
-
65
- - **Temporal Awareness:** Tracks changes in facts and relationships over time, enabling point-in-time queries. Graph edges include temporal metadata to record relationship lifecycles.
66
- - **Episodic Processing:** Ingests data as discrete episodes, maintaining data provenance and allowing incremental entity and relationship extraction.
67
- - **Hybrid Search:** Combines semantic and BM25 full-text search, with the ability to rerank results by distance from a central node e.g. “Kendra”.
68
- - **Scalable:** Designed for processing large datasets, with parallelization of LLM calls for bulk processing while preserving the chronology of events.
69
+ We were intrigued by Microsoft’s GraphRAG, which expanded on RAG text chunking by using a graph to better model a
70
+ document corpus and making this representation available via semantic and graph search techniques. However, GraphRAG did
71
+ not address our core problem: It's primarily designed for static documents and doesn't inherently handle temporal
72
+ aspects of data.
73
+
74
+ Graphiti is designed from the ground up to handle constantly changing information, hybrid semantic and graph search, and
75
+ scale:
76
+
77
+ - **Temporal Awareness:** Tracks changes in facts and relationships over time, enabling point-in-time queries. Graph
78
+ edges include temporal metadata to record relationship lifecycles.
79
+ - **Episodic Processing:** Ingests data as discrete episodes, maintaining data provenance and allowing incremental
80
+ entity and relationship extraction.
81
+ - **Hybrid Search:** Combines semantic and BM25 full-text search, with the ability to rerank results by distance from a
82
+ central node e.g. “Kendra”.
83
+ - **Scalable:** Designed for processing large datasets, with parallelization of LLM calls for bulk processing while
84
+ preserving the chronology of events.
69
85
  - **Supports Varied Sources:** Can ingest both unstructured text and structured JSON data.
70
86
 
71
87
  <p align="center">
@@ -91,7 +107,8 @@ Optional:
91
107
  - Anthropic or Groq API key (for alternative LLM providers)
92
108
 
93
109
  > [!TIP]
94
- > The simplest way to install Neo4j is via [Neo4j Desktop](https://neo4j.com/download/). It provides a user-friendly interface to manage Neo4j instances and databases.
110
+ > The simplest way to install Neo4j is via [Neo4j Desktop](https://neo4j.com/download/). It provides a user-friendly
111
+ > interface to manage Neo4j instances and databases.
95
112
 
96
113
  ```bash
97
114
  pip install graphiti-core
@@ -106,7 +123,8 @@ poetry add graphiti-core
106
123
  ## Quick Start
107
124
 
108
125
  > [!IMPORTANT]
109
- > Graphiti uses OpenAI for LLM inference and embedding. Ensure that an `OPENAI_API_KEY` is set in your environment. Support for Anthropic and Groq LLM inferences is available, too.
126
+ > Graphiti uses OpenAI for LLM inference and embedding. Ensure that an `OPENAI_API_KEY` is set in your environment.
127
+ > Support for Anthropic and Groq LLM inferences is available, too.
110
128
 
111
129
  ```python
112
130
  from graphiti_core import Graphiti
@@ -140,25 +158,25 @@ for i, episode in enumerate(episodes):
140
158
  results = await graphiti.search('Who was the California Attorney General?')
141
159
  [
142
160
  EntityEdge(
143
- │ uuid='3133258f738e487383f07b04e15d4ac0',
144
- │ source_node_uuid='2a85789b318d4e418050506879906e62',
145
- │ target_node_uuid='baf7781f445945989d6e4f927f881556',
146
- │ created_at=datetime.datetime(2024, 8, 26, 13, 13, 24, 861097),
147
- │ name='HELD_POSITION',
148
- # the fact reflects the updated state that Harris is
149
- # no longer the AG of California
150
- │ fact='Kamala Harris was the Attorney General of California',
151
- │ fact_embedding=[
152
- │ │ -0.009955154731869698,
153
- │ ...
154
- │ │ 0.00784289836883545
155
- ],
156
- │ episodes=['b43e98ad0a904088a76c67985caecc22'],
157
- │ expired_at=datetime.datetime(2024, 8, 26, 20, 18, 1, 53812),
158
- # These dates represent the date this edge was true.
159
- │ valid_at=datetime.datetime(2011, 1, 3, 0, 0, tzinfo=<UTC>),
160
- │ invalid_at=datetime.datetime(2017, 1, 3, 0, 0, tzinfo=<UTC>)
161
- )
161
+ │ uuid = '3133258f738e487383f07b04e15d4ac0',
162
+ │ source_node_uuid = '2a85789b318d4e418050506879906e62',
163
+ │ target_node_uuid = 'baf7781f445945989d6e4f927f881556',
164
+ │ created_at = datetime.datetime(2024, 8, 26, 13, 13, 24, 861097),
165
+ │ name = 'HELD_POSITION',
166
+ # the fact reflects the updated state that Harris is
167
+ # no longer the AG of California
168
+ │ fact = 'Kamala Harris was the Attorney General of California',
169
+ │ fact_embedding = [
170
+ │ │ -0.009955154731869698,
171
+ │ ...
172
+ │ │ 0.00784289836883545
173
+ │],
174
+ │ episodes = ['b43e98ad0a904088a76c67985caecc22'],
175
+ │ expired_at = datetime.datetime(2024, 8, 26, 20, 18, 1, 53812),
176
+ # These dates represent the date this edge was true.
177
+ │ valid_at = datetime.datetime(2011, 1, 3, 0, 0, tzinfo= < UTC >),
178
+ │ invalid_at = datetime.datetime(2017, 1, 3, 0, 0, tzinfo= < UTC >)
179
+ )
162
180
  ]
163
181
 
164
182
  # Rerank search results based on graph distance
@@ -191,14 +209,16 @@ Graphiti is under active development. We aim to maintain API stability while wor
191
209
  - [ ] Achieving good performance with different LLM and embedding models
192
210
  - [ ] Creating a dedicated embedder interface
193
211
  - [ ] Supporting custom graph schemas:
194
- - Allow developers to provide their own defined node and edge classes when ingesting episodes
195
- - Enable more flexible knowledge representation tailored to specific use cases
212
+ - Allow developers to provide their own defined node and edge classes when ingesting episodes
213
+ - Enable more flexible knowledge representation tailored to specific use cases
196
214
  - [ ] Enhancing retrieval capabilities with more robust and configurable options
197
215
  - [ ] Expanding test coverage to ensure reliability and catch edge cases
198
216
 
199
217
  ## Contributing
200
218
 
201
- We encourage and appreciate all forms of contributions, whether it's code, documentation, addressing GitHub Issues, or answering questions in the Graphiti Discord channel. For detailed guidelines on code contributions, please refer to [CONTRIBUTING](CONTRIBUTING.md).
219
+ We encourage and appreciate all forms of contributions, whether it's code, documentation, addressing GitHub Issues, or
220
+ answering questions in the Graphiti Discord channel. For detailed guidelines on code contributions, please refer
221
+ to [CONTRIBUTING](CONTRIBUTING.md).
202
222
 
203
223
  ## Support
204
224
 
@@ -1,17 +1,17 @@
1
1
  graphiti_core/__init__.py,sha256=e5SWFkRiaUwfprYIeIgVIh7JDedNiloZvd3roU-0aDY,55
2
- graphiti_core/edges.py,sha256=fnzxqbf2CT0fMEIOZvlpKSqdWWGXNUq3EwIN8kkuqEI,11444
2
+ graphiti_core/edges.py,sha256=ueibtKrSGgThIL9WucOgdGeqHSORRY5BIjf--4OgvQI,13521
3
3
  graphiti_core/errors.py,sha256=BOwL0VVnoUuMjK3EUYKvqefsbsYhRhcKcVWXaX9hanw,1259
4
- graphiti_core/graphiti.py,sha256=5o3W8iVP4P4On0KbPJmuHHfhJP-nrHRbPbaJI4jG-0U,25646
4
+ graphiti_core/graphiti.py,sha256=z6a4tCyDID_o6gloXDuUFmbL22bRiUE7A22JPYGVIyI,25947
5
5
  graphiti_core/helpers.py,sha256=qQqZJBkc_z5f3x5axPfCKK_QHLRybvWNFb57WXNENfQ,769
6
6
  graphiti_core/llm_client/__init__.py,sha256=PA80TSMeX-sUXITXEAxMDEt3gtfZgcJrGJUcyds1mSo,207
7
- graphiti_core/llm_client/anthropic_client.py,sha256=3zsOkewLFxBhKe90OkmpfkvrcwykgGwRoqII05Jno_Q,2410
8
- graphiti_core/llm_client/client.py,sha256=7-gEhOKxjdkllV_xS2Ikn-a4QzK9NE63CANnZgdn3VY,3438
9
- graphiti_core/llm_client/config.py,sha256=d1oZ9tt7QBQlbph7v-0HjItb6otK9_-IwF8kkRYL2rc,2359
7
+ graphiti_core/llm_client/anthropic_client.py,sha256=16cWm_fQSUvJTSmgISBcNF8vytIhk-c5mmMK0Xd7SPE,2557
8
+ graphiti_core/llm_client/client.py,sha256=g3vEBNV0E24HdKR3DmqjY8cqqr1CDlvrdh7SaiCUkDc,3470
9
+ graphiti_core/llm_client/config.py,sha256=YIuR5XTINvxsEGDcpPXCqDWfWXGHTB4GB0k5DSRD7Rg,2360
10
10
  graphiti_core/llm_client/errors.py,sha256=-qlWwv1X-UjfsFIiNl-7yJIYvPwi7z8srVRfX4-s6uk,814
11
- graphiti_core/llm_client/groq_client.py,sha256=clQvQ9-zCRoqK9NGMx9Icyl4lUXmM70lZgVquXikxBo,2334
12
- graphiti_core/llm_client/openai_client.py,sha256=VqzWdSrHuNfF2l1aRDua00NHhtP9UR7VNtLcu8h9vLc,2343
11
+ graphiti_core/llm_client/groq_client.py,sha256=j467rL2tNaKplpTOP9pZNUCxG3rrHAEE26CBDk24jzw,2481
12
+ graphiti_core/llm_client/openai_client.py,sha256=LlvhAI5nfrLUWehQ0TSPkeqzgV4wJMDiK56XUQxR21A,4002
13
13
  graphiti_core/llm_client/utils.py,sha256=0KT4XxTVw3c0__HLDj3F8kNR4K_qY0hT0TH-pQZ_IZw,1126
14
- graphiti_core/nodes.py,sha256=QqLduy5DHso3xGLClvmue2YbHLw5oCB0zGEyveh0jXM,12176
14
+ graphiti_core/nodes.py,sha256=4RdkzvaiqEIqppkaYq53JLZ3tr6AskeKVOHOCJLR2BA,13847
15
15
  graphiti_core/prompts/__init__.py,sha256=EA-x9xUki9l8wnu2l8ek_oNf75-do5tq5hVq7Zbv8Kw,101
16
16
  graphiti_core/prompts/dedupe_edges.py,sha256=DUNHdIudj50FAjkla4nc68tSFSD2yjmYHBw-Bb7ph20,6529
17
17
  graphiti_core/prompts/dedupe_nodes.py,sha256=BZ9S-PB9SSGjc5Oo8ivdgA6rZx3OGOFhKtwrBlQ0bm0,7269
@@ -24,20 +24,20 @@ graphiti_core/prompts/models.py,sha256=cvx_Bv5RMFUD_5IUawYrbpOKLPHogai7_bm7YXrSz
24
24
  graphiti_core/prompts/summarize_nodes.py,sha256=FLuZpGTABgcxuIDkx_IKH115nHEw0rIaFhcGlWveAMc,2357
25
25
  graphiti_core/py.typed,sha256=vlmmzQOt7bmeQl9L3XJP4W6Ry0iiELepnOrinKz5KQg,79
26
26
  graphiti_core/search/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
- graphiti_core/search/search.py,sha256=ktsPp_moKRtrt7HGoTMSzW22LI1LYB_hpP6ARGyqIRE,8466
27
+ graphiti_core/search/search.py,sha256=QDgxWihEQuQ9H9BNGfWOEEDH0aGXUtwfpb7inhN88fk,8773
28
28
  graphiti_core/search/search_config.py,sha256=d8w9RDO55G2bwbjYQBaD6gXqEWK1-NsDANrNibYB6t8,2165
29
29
  graphiti_core/search/search_config_recipes.py,sha256=_VJqvYB70e8Jke3hsbeQF3Bdogn2MubpYeAQe15M2Jo,3450
30
- graphiti_core/search/search_utils.py,sha256=4seDnQct4dc2RDGIZDluG8A7UlMKcFHFv4SrE741Ogc,24374
30
+ graphiti_core/search/search_utils.py,sha256=LKFskMPqgRoxFpn5cdNYjAGTMy6z-FybNRBhNVtpJZM,23497
31
31
  graphiti_core/utils/__init__.py,sha256=cJAcMnBZdHBQmWrZdU1PQ1YmaL75bhVUkyVpIPuOyns,260
32
32
  graphiti_core/utils/bulk_utils.py,sha256=JtoYTZPCigPa3n2E43Oe7QhFZRTA_QKNGy1jVgklHag,12614
33
33
  graphiti_core/utils/maintenance/__init__.py,sha256=4b9sfxqyFZMLwxxS2lnQ6_wBr3xrJRIqfAWOidK8EK0,388
34
- graphiti_core/utils/maintenance/community_operations.py,sha256=qvWFViMdjM46XO20riTYYuvi9Ftl8_ToHYu3NHUIoHU,7425
35
- graphiti_core/utils/maintenance/edge_operations.py,sha256=s6Uuu8k_VhFx89cUUD7iRQCZ6get5NrTxeNEqVj9Duw,11440
36
- graphiti_core/utils/maintenance/graph_data_operations.py,sha256=d27efEVLvQTmoKE7Hq21wAWSmfqkKzw7jMbVo1zKggE,6489
37
- graphiti_core/utils/maintenance/node_operations.py,sha256=WXJFU1AprYjmHSq6rZhTIX4JFHtF5W9LbzA2Tfksp5Q,8838
34
+ graphiti_core/utils/maintenance/community_operations.py,sha256=2jtA0ZwjwZyDiC1Es8d4p0KafT98w1fSkQOvi1IdT80,9809
35
+ graphiti_core/utils/maintenance/edge_operations.py,sha256=lSeesSnWQ3vpeD2dIY0tSiHEHRMK6fiirEhNNT-s5os,11438
36
+ graphiti_core/utils/maintenance/graph_data_operations.py,sha256=zk-Ir7msJIbdQj-8KTl0As9a8zYUG-e-dVdbVacxlf8,6515
37
+ graphiti_core/utils/maintenance/node_operations.py,sha256=QAg4KQkSAOXx9QRaUp7t6DCaztZlzeOBC3__57FCs_o,9025
38
38
  graphiti_core/utils/maintenance/temporal_operations.py,sha256=BzfGDm96w4HcUEsaWTHUBt5S8dNmDQL1eX6AuBL-XFM,8135
39
39
  graphiti_core/utils/maintenance/utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
- graphiti_core-0.3.3.dist-info/LICENSE,sha256=KCUwCyDXuVEgmDWkozHyniRyWjnWUWjkuDHfU6o3JlA,11325
41
- graphiti_core-0.3.3.dist-info/METADATA,sha256=s3l1gh77El04LZRBjza7CTfzG1pLSz1JkM15Tiirl80,9323
42
- graphiti_core-0.3.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
43
- graphiti_core-0.3.3.dist-info/RECORD,,
40
+ graphiti_core-0.3.5.dist-info/LICENSE,sha256=KCUwCyDXuVEgmDWkozHyniRyWjnWUWjkuDHfU6o3JlA,11325
41
+ graphiti_core-0.3.5.dist-info/METADATA,sha256=HtvWtTlWBd-JoGXTOwrw2FV7d_UOPaJZ-brSsuZbhuY,9395
42
+ graphiti_core-0.3.5.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
43
+ graphiti_core-0.3.5.dist-info/RECORD,,