graphiti-core 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of graphiti-core might be problematic. Click here for more details.

graphiti_core/edges.py CHANGED
@@ -33,7 +33,7 @@ logger = logging.getLogger(__name__)
33
33
 
34
34
 
35
35
  class Edge(BaseModel, ABC):
36
- uuid: str = Field(default_factory=lambda: uuid4().hex)
36
+ uuid: str = Field(default_factory=lambda: str(uuid4()))
37
37
  group_id: str | None = Field(description='partition of the graph')
38
38
  source_node_uuid: str
39
39
  target_node_uuid: str
@@ -104,18 +104,62 @@ class EpisodicEdge(Edge):
104
104
 
105
105
  edges = [get_episodic_edge_from_record(record) for record in records]
106
106
 
107
- logger.info(f'Found Edge: {uuid}')
108
107
  if len(edges) == 0:
109
108
  raise EdgeNotFoundError(uuid)
110
109
  return edges[0]
111
110
 
111
+ @classmethod
112
+ async def get_by_uuids(cls, driver: AsyncDriver, uuids: list[str]):
113
+ records, _, _ = await driver.execute_query(
114
+ """
115
+ MATCH (n:Episodic)-[e:MENTIONS]->(m:Entity)
116
+ WHERE e.uuid IN $uuids
117
+ RETURN
118
+ e.uuid As uuid,
119
+ e.group_id AS group_id,
120
+ n.uuid AS source_node_uuid,
121
+ m.uuid AS target_node_uuid,
122
+ e.created_at AS created_at
123
+ """,
124
+ uuids=uuids,
125
+ )
126
+
127
+ edges = [get_episodic_edge_from_record(record) for record in records]
128
+
129
+ if len(edges) == 0:
130
+ raise EdgeNotFoundError(uuids[0])
131
+ return edges
132
+
133
+ @classmethod
134
+ async def get_by_group_ids(cls, driver: AsyncDriver, group_ids: list[str | None]):
135
+ records, _, _ = await driver.execute_query(
136
+ """
137
+ MATCH (n:Episodic)-[e:MENTIONS]->(m:Entity)
138
+ WHERE e.group_id IN $group_ids
139
+ RETURN
140
+ e.uuid As uuid,
141
+ e.group_id AS group_id,
142
+ n.uuid AS source_node_uuid,
143
+ m.uuid AS target_node_uuid,
144
+ e.created_at AS created_at
145
+ """,
146
+ group_ids=group_ids,
147
+ )
148
+
149
+ edges = [get_episodic_edge_from_record(record) for record in records]
150
+ uuids = [edge.uuid for edge in edges]
151
+
152
+ if len(edges) == 0:
153
+ raise EdgeNotFoundError(uuids[0])
154
+ return edges
155
+
112
156
 
113
157
  class EntityEdge(Edge):
114
158
  name: str = Field(description='name of the edge, relation name')
115
159
  fact: str = Field(description='fact representing the edge and nodes that it connects')
116
160
  fact_embedding: list[float] | None = Field(default=None, description='embedding of the fact')
117
- episodes: list[str] | None = Field(
118
- default=None,
161
+ episodes: list[str] = Field(
162
+ default=[],
119
163
  description='list of episode ids that reference these entity edges',
120
164
  )
121
165
  expired_at: datetime | None = Field(
@@ -192,11 +236,69 @@ class EntityEdge(Edge):
192
236
 
193
237
  edges = [get_entity_edge_from_record(record) for record in records]
194
238
 
195
- logger.info(f'Found Edge: {uuid}')
196
239
  if len(edges) == 0:
197
240
  raise EdgeNotFoundError(uuid)
198
241
  return edges[0]
199
242
 
243
+ @classmethod
244
+ async def get_by_uuids(cls, driver: AsyncDriver, uuids: list[str]):
245
+ records, _, _ = await driver.execute_query(
246
+ """
247
+ MATCH (n:Entity)-[e:RELATES_TO]->(m:Entity)
248
+ WHERE e.uuid IN $uuids
249
+ RETURN
250
+ e.uuid AS uuid,
251
+ n.uuid AS source_node_uuid,
252
+ m.uuid AS target_node_uuid,
253
+ e.created_at AS created_at,
254
+ e.name AS name,
255
+ e.group_id AS group_id,
256
+ e.fact AS fact,
257
+ e.fact_embedding AS fact_embedding,
258
+ e.episodes AS episodes,
259
+ e.expired_at AS expired_at,
260
+ e.valid_at AS valid_at,
261
+ e.invalid_at AS invalid_at
262
+ """,
263
+ uuids=uuids,
264
+ )
265
+
266
+ edges = [get_entity_edge_from_record(record) for record in records]
267
+
268
+ if len(edges) == 0:
269
+ raise EdgeNotFoundError(uuids[0])
270
+ return edges
271
+
272
+ @classmethod
273
+ async def get_by_group_ids(cls, driver: AsyncDriver, group_ids: list[str | None]):
274
+ records, _, _ = await driver.execute_query(
275
+ """
276
+ MATCH (n:Entity)-[e:RELATES_TO]->(m:Entity)
277
+ WHERE e.group_id IN $group_ids
278
+ RETURN
279
+ e.uuid AS uuid,
280
+ n.uuid AS source_node_uuid,
281
+ m.uuid AS target_node_uuid,
282
+ e.created_at AS created_at,
283
+ e.name AS name,
284
+ e.group_id AS group_id,
285
+ e.fact AS fact,
286
+ e.fact_embedding AS fact_embedding,
287
+ e.episodes AS episodes,
288
+ e.expired_at AS expired_at,
289
+ e.valid_at AS valid_at,
290
+ e.invalid_at AS invalid_at
291
+ """,
292
+ group_ids=group_ids,
293
+ )
294
+
295
+ edges = [get_entity_edge_from_record(record) for record in records]
296
+ uuids = [edge.uuid for edge in edges]
297
+
298
+ if len(edges) == 0:
299
+ raise EdgeNotFoundError(uuids[0])
300
+ return edges
301
+
200
302
 
201
303
  class CommunityEdge(Edge):
202
304
  async def save(self, driver: AsyncDriver):
@@ -235,10 +337,48 @@ class CommunityEdge(Edge):
235
337
 
236
338
  edges = [get_community_edge_from_record(record) for record in records]
237
339
 
238
- logger.info(f'Found Edge: {uuid}')
239
-
240
340
  return edges[0]
241
341
 
342
+ @classmethod
343
+ async def get_by_uuids(cls, driver: AsyncDriver, uuids: list[str]):
344
+ records, _, _ = await driver.execute_query(
345
+ """
346
+ MATCH (n:Community)-[e:HAS_MEMBER]->(m:Entity | Community)
347
+ WHERE e.uuid IN $uuids
348
+ RETURN
349
+ e.uuid As uuid,
350
+ e.group_id AS group_id,
351
+ n.uuid AS source_node_uuid,
352
+ m.uuid AS target_node_uuid,
353
+ e.created_at AS created_at
354
+ """,
355
+ uuids=uuids,
356
+ )
357
+
358
+ edges = [get_community_edge_from_record(record) for record in records]
359
+
360
+ return edges
361
+
362
+ @classmethod
363
+ async def get_by_group_ids(cls, driver: AsyncDriver, group_ids: list[str | None]):
364
+ records, _, _ = await driver.execute_query(
365
+ """
366
+ MATCH (n:Community)-[e:HAS_MEMBER]->(m:Entity | Community)
367
+ WHERE e.group_id IN $group_ids
368
+ RETURN
369
+ e.uuid As uuid,
370
+ e.group_id AS group_id,
371
+ n.uuid AS source_node_uuid,
372
+ m.uuid AS target_node_uuid,
373
+ e.created_at AS created_at
374
+ """,
375
+ group_ids=group_ids,
376
+ )
377
+
378
+ edges = [get_community_edge_from_record(record) for record in records]
379
+
380
+ return edges
381
+
242
382
 
243
383
  # Edge helpers
244
384
  def get_episodic_edge_from_record(record: Any) -> EpisodicEdge:
graphiti_core/graphiti.py CHANGED
@@ -35,6 +35,8 @@ from graphiti_core.search.search_config_recipes import (
35
35
  )
36
36
  from graphiti_core.search.search_utils import (
37
37
  RELEVANT_SCHEMA_LIMIT,
38
+ get_communities_by_nodes,
39
+ get_mentioned_nodes,
38
40
  get_relevant_edges,
39
41
  get_relevant_nodes,
40
42
  )
@@ -54,6 +56,7 @@ from graphiti_core.utils.bulk_utils import (
54
56
  from graphiti_core.utils.maintenance.community_operations import (
55
57
  build_communities,
56
58
  remove_communities,
59
+ update_community,
57
60
  )
58
61
  from graphiti_core.utils.maintenance.edge_operations import (
59
62
  extract_edges,
@@ -74,7 +77,14 @@ load_dotenv()
74
77
 
75
78
 
76
79
  class Graphiti:
77
- def __init__(self, uri: str, user: str, password: str, llm_client: LLMClient | None = None):
80
+ def __init__(
81
+ self,
82
+ uri: str,
83
+ user: str,
84
+ password: str,
85
+ llm_client: LLMClient | None = None,
86
+ store_raw_episode_content: bool = True,
87
+ ):
78
88
  """
79
89
  Initialize a Graphiti instance.
80
90
 
@@ -113,6 +123,7 @@ class Graphiti:
113
123
  """
114
124
  self.driver = AsyncGraphDatabase.driver(uri, auth=(user, password))
115
125
  self.database = 'neo4j'
126
+ self.store_raw_episode_content = store_raw_episode_content
116
127
  if llm_client:
117
128
  self.llm_client = llm_client
118
129
  else:
@@ -147,8 +158,8 @@ class Graphiti:
147
158
  # Use graphiti...
148
159
  finally:
149
160
  graphiti.close()
150
- self.driver.close()
151
161
  """
162
+ self.driver.close()
152
163
 
153
164
  async def build_indices_and_constraints(self):
154
165
  """
@@ -224,6 +235,7 @@ class Graphiti:
224
235
  source: EpisodeType = EpisodeType.message,
225
236
  group_id: str | None = None,
226
237
  uuid: str | None = None,
238
+ update_communities: bool = False,
227
239
  ):
228
240
  """
229
241
  Process an episode and update the graph.
@@ -247,6 +259,8 @@ class Graphiti:
247
259
  An id for the graph partition the episode is a part of.
248
260
  uuid : str | None
249
261
  Optional uuid of the episode.
262
+ update_communities : bool
263
+ Optional. Whether to update communities with new node information
250
264
 
251
265
  Returns
252
266
  -------
@@ -272,7 +286,6 @@ class Graphiti:
272
286
  try:
273
287
  start = time()
274
288
 
275
- nodes: list[EntityNode] = []
276
289
  entity_edges: list[EntityEdge] = []
277
290
  embedder = self.llm_client.get_embedder()
278
291
  now = datetime.now()
@@ -291,6 +304,8 @@ class Graphiti:
291
304
  valid_at=reference_time,
292
305
  )
293
306
  episode.uuid = uuid if uuid is not None else episode.uuid
307
+ if not self.store_raw_episode_content:
308
+ episode.content = ''
294
309
 
295
310
  # Extract entities as nodes
296
311
 
@@ -319,7 +334,7 @@ class Graphiti:
319
334
  ),
320
335
  )
321
336
  logger.info(f'Adjusted mentioned nodes: {[(n.name, n.uuid) for n in mentioned_nodes]}')
322
- nodes.extend(mentioned_nodes)
337
+ nodes = mentioned_nodes
323
338
 
324
339
  extracted_edges_with_resolved_pointers = resolve_edge_pointers(
325
340
  extracted_edges, uuid_map
@@ -409,12 +424,22 @@ class Graphiti:
409
424
 
410
425
  logger.info(f'Built episodic edges: {episodic_edges}')
411
426
 
427
+ episode.entity_edges = [edge.uuid for edge in entity_edges]
428
+
412
429
  # Future optimization would be using batch operations to save nodes and edges
413
430
  await episode.save(self.driver)
414
431
  await asyncio.gather(*[node.save(self.driver) for node in nodes])
415
432
  await asyncio.gather(*[edge.save(self.driver) for edge in episodic_edges])
416
433
  await asyncio.gather(*[edge.save(self.driver) for edge in entity_edges])
417
434
 
435
+ # Update any communities
436
+ if update_communities:
437
+ await asyncio.gather(
438
+ *[
439
+ update_community(self.driver, self.llm_client, embedder, node)
440
+ for node in nodes
441
+ ]
442
+ )
418
443
  end = time()
419
444
  logger.info(f'Completed add_episode in {(end - start) * 1000} ms')
420
445
 
@@ -554,7 +579,7 @@ class Graphiti:
554
579
  center_node_uuid: str | None = None,
555
580
  group_ids: list[str | None] | None = None,
556
581
  num_results=DEFAULT_SEARCH_LIMIT,
557
- ):
582
+ ) -> list[EntityEdge]:
558
583
  """
559
584
  Perform a hybrid search on the knowledge graph.
560
585
 
@@ -569,7 +594,7 @@ class Graphiti:
569
594
  Facts will be reranked based on proximity to this node
570
595
  group_ids : list[str | None] | None, optional
571
596
  The graph partitions to return data from.
572
- limit : int, optional
597
+ num_results : int, optional
573
598
  The maximum number of results to return. Defaults to 10.
574
599
 
575
600
  Returns
@@ -668,3 +693,19 @@ class Graphiti:
668
693
  await search(self.driver, embedder, query, group_ids, search_config, center_node_uuid)
669
694
  ).nodes
670
695
  return nodes
696
+
697
+
698
+ async def get_episode_mentions(self, episode_uuids: list[str]) -> SearchResults:
699
+ episodes = await EpisodicNode.get_by_uuids(self.driver, episode_uuids)
700
+
701
+ edges_list = await asyncio.gather(
702
+ *[EntityEdge.get_by_uuids(self.driver, episode.entity_edges) for episode in episodes]
703
+ )
704
+
705
+ edges: list[EntityEdge] = [edge for lst in edges_list for edge in lst]
706
+
707
+ nodes = await get_mentioned_nodes(self.driver, episodes)
708
+
709
+ communities = await get_communities_by_nodes(self.driver, nodes)
710
+
711
+ return SearchResults(edges=edges, nodes=nodes, communities=communities)
@@ -30,13 +30,17 @@ from .errors import RateLimitError
30
30
  logger = logging.getLogger(__name__)
31
31
 
32
32
  DEFAULT_MODEL = 'claude-3-5-sonnet-20240620'
33
+ DEFAULT_MAX_TOKENS = 8192
33
34
 
34
35
 
35
36
  class AnthropicClient(LLMClient):
36
37
  def __init__(self, config: LLMConfig | None = None, cache: bool = False):
37
38
  if config is None:
38
- config = LLMConfig()
39
+ config = LLMConfig(max_tokens=DEFAULT_MAX_TOKENS)
40
+ elif config.max_tokens is None:
41
+ config.max_tokens = DEFAULT_MAX_TOKENS
39
42
  super().__init__(config, cache)
43
+
40
44
  self.client = AsyncAnthropic(
41
45
  api_key=config.api_key,
42
46
  # we'll use tenacity to retry
@@ -35,7 +35,7 @@ logger = logging.getLogger(__name__)
35
35
 
36
36
 
37
37
  def is_server_or_retry_error(exception):
38
- if isinstance(exception, RateLimitError):
38
+ if isinstance(exception, (RateLimitError, json.decoder.JSONDecodeError)):
39
39
  return True
40
40
 
41
41
  return (
@@ -15,7 +15,7 @@ limitations under the License.
15
15
  """
16
16
 
17
17
  EMBEDDING_DIM = 1024
18
- DEFAULT_MAX_TOKENS = 4096
18
+ DEFAULT_MAX_TOKENS = 16384
19
19
  DEFAULT_TEMPERATURE = 0
20
20
 
21
21
 
@@ -31,13 +31,17 @@ from .errors import RateLimitError
31
31
  logger = logging.getLogger(__name__)
32
32
 
33
33
  DEFAULT_MODEL = 'llama-3.1-70b-versatile'
34
+ DEFAULT_MAX_TOKENS = 2048
34
35
 
35
36
 
36
37
  class GroqClient(LLMClient):
37
38
  def __init__(self, config: LLMConfig | None = None, cache: bool = False):
38
39
  if config is None:
39
- config = LLMConfig()
40
+ config = LLMConfig(max_tokens=DEFAULT_MAX_TOKENS)
41
+ elif config.max_tokens is None:
42
+ config.max_tokens = DEFAULT_MAX_TOKENS
40
43
  super().__init__(config, cache)
44
+
41
45
  self.client = AsyncGroq(api_key=config.api_key)
42
46
 
43
47
  def get_embedder(self) -> typing.Any:
@@ -33,13 +33,50 @@ DEFAULT_MODEL = 'gpt-4o-2024-08-06'
33
33
 
34
34
 
35
35
  class OpenAIClient(LLMClient):
36
- def __init__(self, config: LLMConfig | None = None, cache: bool = False):
36
+ """
37
+ OpenAIClient is a client class for interacting with OpenAI's language models.
38
+
39
+ This class extends the LLMClient and provides methods to initialize the client,
40
+ get an embedder, and generate responses from the language model.
41
+
42
+ Attributes:
43
+ client (AsyncOpenAI): The OpenAI client used to interact with the API.
44
+ model (str): The model name to use for generating responses.
45
+ temperature (float): The temperature to use for generating responses.
46
+ max_tokens (int): The maximum number of tokens to generate in a response.
47
+
48
+ Methods:
49
+ __init__(config: LLMConfig | None = None, cache: bool = False, client: typing.Any = None):
50
+ Initializes the OpenAIClient with the provided configuration, cache setting, and client.
51
+
52
+ get_embedder() -> typing.Any:
53
+ Returns the embedder from the OpenAI client.
54
+
55
+ _generate_response(messages: list[Message]) -> dict[str, typing.Any]:
56
+ Generates a response from the language model based on the provided messages.
57
+ """
58
+
59
+ def __init__(
60
+ self, config: LLMConfig | None = None, cache: bool = False, client: typing.Any = None
61
+ ):
62
+ """
63
+ Initialize the OpenAIClient with the provided configuration, cache setting, and client.
64
+
65
+ Args:
66
+ config (LLMConfig | None): The configuration for the LLM client, including API key, model, base URL, temperature, and max tokens.
67
+ cache (bool): Whether to use caching for responses. Defaults to False.
68
+ client (Any | None): An optional async client instance to use. If not provided, a new AsyncOpenAI client is created.
69
+
70
+ """
37
71
  if config is None:
38
72
  config = LLMConfig()
39
73
 
40
74
  super().__init__(config, cache)
41
75
 
42
- self.client = AsyncOpenAI(api_key=config.api_key, base_url=config.base_url)
76
+ if client is None:
77
+ self.client = AsyncOpenAI(api_key=config.api_key, base_url=config.base_url)
78
+ else:
79
+ self.client = client
43
80
 
44
81
  def get_embedder(self) -> typing.Any:
45
82
  return self.client.embeddings
graphiti_core/nodes.py CHANGED
@@ -68,7 +68,7 @@ class EpisodeType(Enum):
68
68
 
69
69
 
70
70
  class Node(BaseModel, ABC):
71
- uuid: str = Field(default_factory=lambda: uuid4().hex)
71
+ uuid: str = Field(default_factory=lambda: str(uuid4()))
72
72
  name: str = Field(description='name of the node')
73
73
  group_id: str | None = Field(description='partition of the graph')
74
74
  labels: list[str] = Field(default_factory=list)
@@ -158,8 +158,6 @@ class EpisodicNode(Node):
158
158
 
159
159
  episodes = [get_episodic_node_from_record(record) for record in records]
160
160
 
161
- logger.info(f'Found Node: {uuid}')
162
-
163
161
  if len(episodes) == 0:
164
162
  raise NodeNotFoundError(uuid)
165
163
 
@@ -170,7 +168,8 @@ class EpisodicNode(Node):
170
168
  records, _, _ = await driver.execute_query(
171
169
  """
172
170
  MATCH (e:Episodic) WHERE e.uuid IN $uuids
173
- RETURN e.content AS content,
171
+ RETURN DISTINCT
172
+ e.content AS content,
174
173
  e.created_at AS created_at,
175
174
  e.valid_at AS valid_at,
176
175
  e.uuid AS uuid,
@@ -184,7 +183,27 @@ class EpisodicNode(Node):
184
183
 
185
184
  episodes = [get_episodic_node_from_record(record) for record in records]
186
185
 
187
- logger.info(f'Found Nodes: {uuids}')
186
+ return episodes
187
+
188
+ @classmethod
189
+ async def get_by_group_ids(cls, driver: AsyncDriver, group_ids: list[str | None]):
190
+ records, _, _ = await driver.execute_query(
191
+ """
192
+ MATCH (e:Episodic) WHERE e.group_id IN $group_ids
193
+ RETURN DISTINCT
194
+ e.content AS content,
195
+ e.created_at AS created_at,
196
+ e.valid_at AS valid_at,
197
+ e.uuid AS uuid,
198
+ e.name AS name,
199
+ e.group_id AS group_id,
200
+ e.source_description AS source_description,
201
+ e.source AS source
202
+ """,
203
+ group_ids=group_ids,
204
+ )
205
+
206
+ episodes = [get_episodic_node_from_record(record) for record in records]
188
207
 
189
208
  return episodes
190
209
 
@@ -239,8 +258,6 @@ class EntityNode(Node):
239
258
 
240
259
  nodes = [get_entity_node_from_record(record) for record in records]
241
260
 
242
- logger.info(f'Found Node: {uuid}')
243
-
244
261
  return nodes[0]
245
262
 
246
263
  @classmethod
@@ -261,7 +278,25 @@ class EntityNode(Node):
261
278
 
262
279
  nodes = [get_entity_node_from_record(record) for record in records]
263
280
 
264
- logger.info(f'Found Nodes: {uuids}')
281
+ return nodes
282
+
283
+ @classmethod
284
+ async def get_by_group_ids(cls, driver: AsyncDriver, group_ids: list[str | None]):
285
+ records, _, _ = await driver.execute_query(
286
+ """
287
+ MATCH (n:Entity) WHERE n.group_id IN $group_ids
288
+ RETURN
289
+ n.uuid As uuid,
290
+ n.name AS name,
291
+ n.name_embedding AS name_embedding,
292
+ n.group_id AS group_id,
293
+ n.created_at AS created_at,
294
+ n.summary AS summary
295
+ """,
296
+ group_ids=group_ids,
297
+ )
298
+
299
+ nodes = [get_entity_node_from_record(record) for record in records]
265
300
 
266
301
  return nodes
267
302
 
@@ -316,8 +351,6 @@ class CommunityNode(Node):
316
351
 
317
352
  nodes = [get_community_node_from_record(record) for record in records]
318
353
 
319
- logger.info(f'Found Node: {uuid}')
320
-
321
354
  return nodes[0]
322
355
 
323
356
  @classmethod
@@ -336,11 +369,29 @@ class CommunityNode(Node):
336
369
  uuids=uuids,
337
370
  )
338
371
 
339
- nodes = [get_community_node_from_record(record) for record in records]
372
+ communities = [get_community_node_from_record(record) for record in records]
340
373
 
341
- logger.info(f'Found Nodes: {uuids}')
374
+ return communities
342
375
 
343
- return nodes
376
+ @classmethod
377
+ async def get_by_group_ids(cls, driver: AsyncDriver, group_ids: list[str | None]):
378
+ records, _, _ = await driver.execute_query(
379
+ """
380
+ MATCH (n:Community) WHERE n.group_id IN $group_ids
381
+ RETURN
382
+ n.uuid As uuid,
383
+ n.name AS name,
384
+ n.name_embedding AS name_embedding,
385
+ n.group_id AS group_id,
386
+ n.created_at AS created_at,
387
+ n.summary AS summary
388
+ """,
389
+ group_ids=group_ids,
390
+ )
391
+
392
+ communities = [get_community_node_from_record(record) for record in records]
393
+
394
+ return communities
344
395
 
345
396
 
346
397
  # Node helpers
@@ -42,6 +42,7 @@ from graphiti_core.search.search_utils import (
42
42
  community_similarity_search,
43
43
  edge_fulltext_search,
44
44
  edge_similarity_search,
45
+ episode_mentions_reranker,
45
46
  node_distance_reranker,
46
47
  node_fulltext_search,
47
48
  node_similarity_search,
@@ -131,7 +132,7 @@ async def edge_search(
131
132
  edge_uuid_map = {edge.uuid: edge for result in search_results for edge in result}
132
133
 
133
134
  reranked_uuids: list[str] = []
134
- if config.reranker == EdgeReranker.rrf:
135
+ if config.reranker == EdgeReranker.rrf or config.reranker == EdgeReranker.episode_mentions:
135
136
  search_result_uuids = [[edge.uuid for edge in result] for result in search_results]
136
137
 
137
138
  reranked_uuids = rrf(search_result_uuids)
@@ -150,6 +151,9 @@ async def edge_search(
150
151
 
151
152
  reranked_edges = [edge_uuid_map[uuid] for uuid in reranked_uuids]
152
153
 
154
+ if config.reranker == EdgeReranker.episode_mentions:
155
+ reranked_edges.sort(reverse=True, key=lambda edge: len(edge.episodes))
156
+
153
157
  return reranked_edges
154
158
 
155
159
 
@@ -189,6 +193,8 @@ async def node_search(
189
193
  reranked_uuids: list[str] = []
190
194
  if config.reranker == NodeReranker.rrf:
191
195
  reranked_uuids = rrf(search_result_uuids)
196
+ elif config.reranker == NodeReranker.episode_mentions:
197
+ reranked_uuids = await episode_mentions_reranker(driver, search_result_uuids)
192
198
  elif config.reranker == NodeReranker.node_distance:
193
199
  if center_node_uuid is None:
194
200
  raise SearchRerankerError('No center node provided for Node Distance reranker')
@@ -42,11 +42,13 @@ class CommunitySearchMethod(Enum):
42
42
  class EdgeReranker(Enum):
43
43
  rrf = 'reciprocal_rank_fusion'
44
44
  node_distance = 'node_distance'
45
+ episode_mentions = 'episode_mentions'
45
46
 
46
47
 
47
48
  class NodeReranker(Enum):
48
49
  rrf = 'reciprocal_rank_fusion'
49
50
  node_distance = 'node_distance'
51
+ episode_mentions = 'episode_mentions'
50
52
 
51
53
 
52
54
  class CommunityReranker(Enum):
@@ -59,6 +59,14 @@ EDGE_HYBRID_SEARCH_NODE_DISTANCE = SearchConfig(
59
59
  )
60
60
  )
61
61
 
62
+ # performs a hybrid search over edges with episode mention reranking
63
+ EDGE_HYBRID_SEARCH_EPISODE_MENTIONS = SearchConfig(
64
+ edge_config=EdgeSearchConfig(
65
+ search_methods=[EdgeSearchMethod.bm25, EdgeSearchMethod.cosine_similarity],
66
+ reranker=EdgeReranker.episode_mentions,
67
+ )
68
+ )
69
+
62
70
  # performs a hybrid search over nodes with rrf reranking
63
71
  NODE_HYBRID_SEARCH_RRF = SearchConfig(
64
72
  node_config=NodeSearchConfig(
@@ -75,6 +83,14 @@ NODE_HYBRID_SEARCH_NODE_DISTANCE = SearchConfig(
75
83
  )
76
84
  )
77
85
 
86
+ # performs a hybrid search over nodes with episode mentions reranking
87
+ NODE_HYBRID_SEARCH_EPISODE_MENTIONS = SearchConfig(
88
+ node_config=NodeSearchConfig(
89
+ search_methods=[NodeSearchMethod.bm25, NodeSearchMethod.cosine_similarity],
90
+ reranker=NodeReranker.episode_mentions,
91
+ )
92
+ )
93
+
78
94
  # performs a hybrid search over communities with rrf reranking
79
95
  COMMUNITY_HYBRID_SEARCH_RRF = SearchConfig(
80
96
  community_config=CommunitySearchConfig(
@@ -36,7 +36,9 @@ logger = logging.getLogger(__name__)
36
36
  RELEVANT_SCHEMA_LIMIT = 3
37
37
 
38
38
 
39
- async def get_mentioned_nodes(driver: AsyncDriver, episodes: list[EpisodicNode]):
39
+ async def get_mentioned_nodes(
40
+ driver: AsyncDriver, episodes: list[EpisodicNode]
41
+ ) -> list[EntityNode]:
40
42
  episode_uuids = [episode.uuid for episode in episodes]
41
43
  records, _, _ = await driver.execute_query(
42
44
  """
@@ -57,6 +59,29 @@ async def get_mentioned_nodes(driver: AsyncDriver, episodes: list[EpisodicNode])
57
59
  return nodes
58
60
 
59
61
 
62
+ async def get_communities_by_nodes(
63
+ driver: AsyncDriver, nodes: list[EntityNode]
64
+ ) -> list[CommunityNode]:
65
+ node_uuids = [node.uuid for node in nodes]
66
+ records, _, _ = await driver.execute_query(
67
+ """
68
+ MATCH (c:Community)-[:HAS_MEMBER]->(n:Entity) WHERE n.uuid IN $uuids
69
+ RETURN DISTINCT
70
+ c.uuid As uuid,
71
+ c.group_id AS group_id,
72
+ c.name AS name,
73
+ c.name_embedding AS name_embedding
74
+ c.created_at AS created_at,
75
+ c.summary AS summary
76
+ """,
77
+ uuids=node_uuids,
78
+ )
79
+
80
+ communities = [get_community_node_from_record(record) for record in records]
81
+
82
+ return communities
83
+
84
+
60
85
  async def edge_fulltext_search(
61
86
  driver: AsyncDriver,
62
87
  query: str,
@@ -634,3 +659,34 @@ async def node_distance_reranker(
634
659
  sorted_uuids.sort(key=lambda cur_uuid: scores[cur_uuid])
635
660
 
636
661
  return sorted_uuids
662
+
663
+
664
+ async def episode_mentions_reranker(driver: AsyncDriver, node_uuids: list[list[str]]) -> list[str]:
665
+ # use rrf as a preliminary ranker
666
+ sorted_uuids = rrf(node_uuids)
667
+ scores: dict[str, float] = {}
668
+
669
+ # Find the shortest path to center node
670
+ query = Query("""
671
+ MATCH (episode:Episodic)-[r:MENTIONS]->(n:Entity {uuid: $node_uuid})
672
+ RETURN count(*) AS score
673
+ """)
674
+
675
+ result_scores = await asyncio.gather(
676
+ *[
677
+ driver.execute_query(
678
+ query,
679
+ node_uuid=uuid,
680
+ )
681
+ for uuid in sorted_uuids
682
+ ]
683
+ )
684
+
685
+ for uuid, result in zip(sorted_uuids, result_scores):
686
+ record = result[0][0]
687
+ scores[uuid] = record['score']
688
+
689
+ # rerank on shortest distance
690
+ sorted_uuids.sort(key=lambda cur_uuid: scores[cur_uuid])
691
+
692
+ return sorted_uuids
@@ -4,16 +4,25 @@ from collections import defaultdict
4
4
  from datetime import datetime
5
5
 
6
6
  from neo4j import AsyncDriver
7
+ from pydantic import BaseModel
7
8
 
8
9
  from graphiti_core.edges import CommunityEdge
9
10
  from graphiti_core.llm_client import LLMClient
10
- from graphiti_core.nodes import CommunityNode, EntityNode
11
+ from graphiti_core.nodes import CommunityNode, EntityNode, get_community_node_from_record
11
12
  from graphiti_core.prompts import prompt_library
12
13
  from graphiti_core.utils.maintenance.edge_operations import build_community_edges
13
14
 
15
+ MAX_COMMUNITY_BUILD_CONCURRENCY = 10
16
+
17
+
14
18
  logger = logging.getLogger(__name__)
15
19
 
16
20
 
21
+ class Neighbor(BaseModel):
22
+ node_uuid: str
23
+ edge_count: int
24
+
25
+
17
26
  async def build_community_projection(driver: AsyncDriver) -> str:
18
27
  records, _, _ = await driver.execute_query("""
19
28
  CALL gds.graph.project("communities", "Entity",
@@ -29,36 +38,96 @@ async def build_community_projection(driver: AsyncDriver) -> str:
29
38
  return records[0]['graph']
30
39
 
31
40
 
32
- async def destroy_projection(driver: AsyncDriver, projection_name: str):
33
- await driver.execute_query(
34
- """
35
- CALL gds.graph.drop($projection_name)
36
- """,
37
- projection_name=projection_name,
38
- )
39
-
41
+ async def get_community_clusters(driver: AsyncDriver) -> list[list[EntityNode]]:
42
+ community_clusters: list[list[EntityNode]] = []
40
43
 
41
- async def get_community_clusters(
42
- driver: AsyncDriver, projection_name: str
43
- ) -> list[list[EntityNode]]:
44
- records, _, _ = await driver.execute_query("""
45
- CALL gds.leiden.stream("communities")
46
- YIELD nodeId, communityId
47
- RETURN gds.util.asNode(nodeId).uuid AS entity_uuid, communityId
44
+ group_id_values, _, _ = await driver.execute_query("""
45
+ MATCH (n:Entity WHERE n.group_id IS NOT NULL)
46
+ RETURN
47
+ collect(DISTINCT n.group_id) AS group_ids
48
48
  """)
49
- community_map: dict[int, list[str]] = defaultdict(list)
50
- for record in records:
51
- community_map[record['communityId']].append(record['entity_uuid'])
52
49
 
53
- community_clusters: list[list[EntityNode]] = list(
54
- await asyncio.gather(
55
- *[EntityNode.get_by_uuids(driver, cluster) for cluster in community_map.values()]
50
+ group_ids = group_id_values[0]['group_ids']
51
+ for group_id in group_ids:
52
+ projection: dict[str, list[Neighbor]] = {}
53
+ nodes = await EntityNode.get_by_group_ids(driver, [group_id])
54
+ for node in nodes:
55
+ records, _, _ = await driver.execute_query(
56
+ """
57
+ MATCH (n:Entity {group_id: $group_id, uuid: $uuid})-[r:RELATES_TO]-(m: Entity {group_id: $group_id})
58
+ WITH count(r) AS count, m.uuid AS uuid
59
+ RETURN
60
+ uuid,
61
+ count
62
+ """,
63
+ uuid=node.uuid,
64
+ group_id=group_id,
65
+ )
66
+
67
+ projection[node.uuid] = [
68
+ Neighbor(node_uuid=record['uuid'], edge_count=record['count']) for record in records
69
+ ]
70
+
71
+ cluster_uuids = label_propagation(projection)
72
+
73
+ community_clusters.extend(
74
+ list(
75
+ await asyncio.gather(
76
+ *[EntityNode.get_by_uuids(driver, cluster) for cluster in cluster_uuids]
77
+ )
78
+ )
56
79
  )
57
- )
58
80
 
59
81
  return community_clusters
60
82
 
61
83
 
84
+ def label_propagation(projection: dict[str, list[Neighbor]]) -> list[list[str]]:
85
+ # Implement the label propagation community detection algorithm.
86
+ # 1. Start with each node being assigned its own community
87
+ # 2. Each node will take on the community of the plurality of its neighbors
88
+ # 3. Ties are broken by going to the largest community
89
+ # 4. Continue until no communities change during propagation
90
+
91
+ community_map = {uuid: i for i, uuid in enumerate(projection.keys())}
92
+
93
+ while True:
94
+ no_change = True
95
+ new_community_map: dict[str, int] = {}
96
+
97
+ for uuid, neighbors in projection.items():
98
+ curr_community = community_map[uuid]
99
+
100
+ community_candidates: dict[int, int] = defaultdict(int)
101
+ for neighbor in neighbors:
102
+ community_candidates[community_map[neighbor.node_uuid]] += neighbor.edge_count
103
+
104
+ community_lst = [
105
+ (count, community) for community, count in community_candidates.items()
106
+ ]
107
+
108
+ community_lst.sort(reverse=True)
109
+ community_candidate = community_lst[0][1] if len(community_lst) > 0 else -1
110
+
111
+ new_community = max(community_candidate, curr_community)
112
+
113
+ new_community_map[uuid] = new_community
114
+
115
+ if new_community != curr_community:
116
+ no_change = False
117
+
118
+ if no_change:
119
+ break
120
+
121
+ community_map = new_community_map
122
+
123
+ community_cluster_map = defaultdict(list)
124
+ for uuid, community in community_map.items():
125
+ community_cluster_map[community].append(uuid)
126
+
127
+ clusters = [cluster for cluster in community_cluster_map.values()]
128
+ return clusters
129
+
130
+
62
131
  async def summarize_pair(llm_client: LLMClient, summary_pair: tuple[str, str]) -> str:
63
132
  # Prepare context for LLM
64
133
  context = {'node_summaries': [{'summary': summary} for summary in summary_pair]}
@@ -85,7 +154,7 @@ async def generate_summary_description(llm_client: LLMClient, summary: str) -> s
85
154
 
86
155
 
87
156
  async def build_community(
88
- llm_client: LLMClient, community_cluster: list[EntityNode]
157
+ llm_client: LLMClient, community_cluster: list[EntityNode]
89
158
  ) -> tuple[CommunityNode, list[CommunityEdge]]:
90
159
  summaries = [entity.summary for entity in community_cluster]
91
160
  length = len(summaries)
@@ -99,7 +168,7 @@ async def build_community(
99
168
  *[
100
169
  summarize_pair(llm_client, (str(left_summary), str(right_summary)))
101
170
  for left_summary, right_summary in zip(
102
- summaries[: int(length / 2)], summaries[int(length / 2) :]
171
+ summaries[: int(length / 2)], summaries[int(length / 2):]
103
172
  )
104
173
  ]
105
174
  )
@@ -127,15 +196,18 @@ async def build_community(
127
196
 
128
197
 
129
198
  async def build_communities(
130
- driver: AsyncDriver, llm_client: LLMClient
199
+ driver: AsyncDriver, llm_client: LLMClient
131
200
  ) -> tuple[list[CommunityNode], list[CommunityEdge]]:
132
- projection = await build_community_projection(driver)
133
- community_clusters = await get_community_clusters(driver, projection)
201
+ community_clusters = await get_community_clusters(driver)
202
+
203
+ semaphore = asyncio.Semaphore(MAX_COMMUNITY_BUILD_CONCURRENCY)
204
+
205
+ async def limited_build_community(cluster):
206
+ async with semaphore:
207
+ return await build_community(llm_client, cluster)
134
208
 
135
209
  communities: list[tuple[CommunityNode, list[CommunityEdge]]] = list(
136
- await asyncio.gather(
137
- *[build_community(llm_client, cluster) for cluster in community_clusters]
138
- )
210
+ await asyncio.gather(*[limited_build_community(cluster) for cluster in community_clusters])
139
211
  )
140
212
 
141
213
  community_nodes: list[CommunityNode] = []
@@ -144,7 +216,6 @@ async def build_communities(
144
216
  community_nodes.append(community[0])
145
217
  community_edges.extend(community[1])
146
218
 
147
- await destroy_projection(driver, projection)
148
219
  return community_nodes, community_edges
149
220
 
150
221
 
@@ -153,3 +224,87 @@ async def remove_communities(driver: AsyncDriver):
153
224
  MATCH (c:Community)
154
225
  DETACH DELETE c
155
226
  """)
227
+
228
+
229
+ async def determine_entity_community(
230
+ driver: AsyncDriver, entity: EntityNode
231
+ ) -> tuple[CommunityNode | None, bool]:
232
+ # Check if the node is already part of a community
233
+ records, _, _ = await driver.execute_query(
234
+ """
235
+ MATCH (c:Community)-[:HAS_MEMBER]->(n:Entity {uuid: $entity_uuid})
236
+ RETURN
237
+ c.uuid As uuid,
238
+ c.name AS name,
239
+ c.name_embedding AS name_embedding,
240
+ c.group_id AS group_id,
241
+ c.created_at AS created_at,
242
+ c.summary AS summary
243
+ """,
244
+ entity_uuid=entity.uuid,
245
+ )
246
+
247
+ if len(records) > 0:
248
+ return get_community_node_from_record(records[0]), False
249
+
250
+ # If the node has no community, add it to the mode community of surrounding entities
251
+ records, _, _ = await driver.execute_query(
252
+ """
253
+ MATCH (c:Community)-[:HAS_MEMBER]->(m:Entity)-[:RELATES_TO]-(n:Entity {uuid: $entity_uuid})
254
+ RETURN
255
+ c.uuid As uuid,
256
+ c.name AS name,
257
+ c.name_embedding AS name_embedding,
258
+ c.group_id AS group_id,
259
+ c.created_at AS created_at,
260
+ c.summary AS summary
261
+ """,
262
+ entity_uuid=entity.uuid,
263
+ )
264
+
265
+ communities: list[CommunityNode] = [
266
+ get_community_node_from_record(record) for record in records
267
+ ]
268
+
269
+ community_map: dict[str, int] = defaultdict(int)
270
+ for community in communities:
271
+ community_map[community.uuid] += 1
272
+
273
+ community_uuid = None
274
+ max_count = 0
275
+ for uuid, count in community_map.items():
276
+ if count > max_count:
277
+ community_uuid = uuid
278
+ max_count = count
279
+
280
+ if max_count == 0:
281
+ return None, False
282
+
283
+ for community in communities:
284
+ if community.uuid == community_uuid:
285
+ return community, True
286
+
287
+ return None, False
288
+
289
+
290
+ async def update_community(
291
+ driver: AsyncDriver, llm_client: LLMClient, embedder, entity: EntityNode
292
+ ):
293
+ community, is_new = await determine_entity_community(driver, entity)
294
+
295
+ if community is None:
296
+ return
297
+
298
+ new_summary = await summarize_pair(llm_client, (entity.summary, community.summary))
299
+ new_name = await generate_summary_description(llm_client, new_summary)
300
+
301
+ community.summary = new_summary
302
+ community.name = new_name
303
+
304
+ if is_new:
305
+ community_edge = (build_community_edges([entity], community, datetime.now()))[0]
306
+ await community_edge.save(driver)
307
+
308
+ await community.generate_name_embedding(embedder)
309
+
310
+ await community.save(driver)
@@ -163,6 +163,8 @@ async def dedupe_extracted_edges(
163
163
  if edge.uuid in duplicate_uuid_map:
164
164
  existing_uuid = duplicate_uuid_map[edge.uuid]
165
165
  existing_edge = edge_map[existing_uuid]
166
+ # Add current episode to the episodes list
167
+ existing_edge.episodes += edge.episodes
166
168
  edges.append(existing_edge)
167
169
  else:
168
170
  edges.append(edge)
@@ -272,9 +272,12 @@ async def dedupe_node_list(
272
272
  unique_nodes = []
273
273
  uuid_map: dict[str, str] = {}
274
274
  for node_data in nodes_data:
275
- node = node_map[node_data['uuids'][0]]
276
- node.summary = node_data['summary']
277
- unique_nodes.append(node)
275
+ node_instance: EntityNode | None = node_map.get(node_data['uuids'][0])
276
+ if node_instance is None:
277
+ logger.warning(f'Node {node_data["uuids"][0]} not found in node map')
278
+ continue
279
+ node_instance.summary = node_data['summary']
280
+ unique_nodes.append(node_instance)
278
281
 
279
282
  for uuid in node_data['uuids'][1:]:
280
283
  uuid_value = node_map[node_data['uuids'][0]].uuid
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: graphiti-core
3
- Version: 0.3.2
3
+ Version: 0.3.4
4
4
  Summary: A temporal graph building library
5
5
  License: Apache-2.0
6
6
  Author: Paul Paliychuk
@@ -21,7 +21,7 @@ Description-Content-Type: text/markdown
21
21
 
22
22
  <div align="center">
23
23
 
24
- # Graphiti
24
+ <img width="350" alt="Graphiti-ts-small" src="https://github.com/user-attachments/assets/bbd02947-e435-4a05-b25a-bbbac36d52c8">
25
25
 
26
26
  ## Temporal Knowledge Graphs for Agentic Applications
27
27
 
@@ -37,7 +37,9 @@ Description-Content-Type: text/markdown
37
37
 
38
38
  </div>
39
39
 
40
- Graphiti builds dynamic, temporally aware Knowledge Graphs that represent complex, evolving relationships between entities over time. Graphiti ingests both unstructured and structured data, and the resulting graph may be queried using a fusion of time, full-text, semantic, and graph algorithm approaches.
40
+ Graphiti builds dynamic, temporally aware Knowledge Graphs that represent complex, evolving relationships between
41
+ entities over time. Graphiti ingests both unstructured and structured data, and the resulting graph may be queried using
42
+ a fusion of time, full-text, semantic, and graph algorithm approaches.
41
43
 
42
44
  <br />
43
45
 
@@ -47,25 +49,39 @@ Graphiti builds dynamic, temporally aware Knowledge Graphs that represent comple
47
49
 
48
50
  <br />
49
51
 
50
- Graphiti helps you create and query Knowledge Graphs that evolve over time. A knowledge graph is a network of interconnected facts, such as _“Kendra loves Adidas shoes.”_ Each fact is a “triplet” represented by two entities, or nodes (_”Kendra”_, _“Adidas shoes”_), and their relationship, or edge (_”loves”_). Knowledge Graphs have been explored extensively for information retrieval. What makes Graphiti unique is its ability to autonomously build a knowledge graph while handling changing relationships and maintaining historical context.
52
+ Graphiti helps you create and query Knowledge Graphs that evolve over time. A knowledge graph is a network of
53
+ interconnected facts, such as _“Kendra loves Adidas shoes.”_ Each fact is a “triplet” represented by two entities, or
54
+ nodes (_”Kendra”_, _“Adidas shoes”_), and their relationship, or edge (_”loves”_). Knowledge Graphs have been explored
55
+ extensively for information retrieval. What makes Graphiti unique is its ability to autonomously build a knowledge graph
56
+ while handling changing relationships and maintaining historical context.
51
57
 
52
58
  With Graphiti, you can build LLM applications such as:
53
59
 
54
- - Assistants that learn from user interactions, fusing personal knowledge with dynamic data from business systems like CRMs and billing platforms.
60
+ - Assistants that learn from user interactions, fusing personal knowledge with dynamic data from business systems like
61
+ CRMs and billing platforms.
55
62
  - Agents that autonomously execute complex tasks, reasoning with state changes from multiple dynamic sources.
56
63
 
57
- Graphiti supports a wide range of applications in sales, customer service, health, finance, and more, enabling long-term recall and state-based reasoning for both assistants and agents.
64
+ Graphiti supports a wide range of applications in sales, customer service, health, finance, and more, enabling long-term
65
+ recall and state-based reasoning for both assistants and agents.
58
66
 
59
67
  ## Why Graphiti?
60
68
 
61
- We were intrigued by Microsoft’s GraphRAG, which expanded on RAG text chunking by using a graph to better model a document corpus and making this representation available via semantic and graph search techniques. However, GraphRAG did not address our core problem: It's primarily designed for static documents and doesn't inherently handle temporal aspects of data.
62
-
63
- Graphiti is designed from the ground up to handle constantly changing information, hybrid semantic and graph search, and scale:
64
-
65
- - **Temporal Awareness:** Tracks changes in facts and relationships over time, enabling point-in-time queries. Graph edges include temporal metadata to record relationship lifecycles.
66
- - **Episodic Processing:** Ingests data as discrete episodes, maintaining data provenance and allowing incremental entity and relationship extraction.
67
- - **Hybrid Search:** Combines semantic and BM25 full-text search, with the ability to rerank results by distance from a central node e.g. “Kendra”.
68
- - **Scalable:** Designed for processing large datasets, with parallelization of LLM calls for bulk processing while preserving the chronology of events.
69
+ We were intrigued by Microsoft’s GraphRAG, which expanded on RAG text chunking by using a graph to better model a
70
+ document corpus and making this representation available via semantic and graph search techniques. However, GraphRAG did
71
+ not address our core problem: It's primarily designed for static documents and doesn't inherently handle temporal
72
+ aspects of data.
73
+
74
+ Graphiti is designed from the ground up to handle constantly changing information, hybrid semantic and graph search, and
75
+ scale:
76
+
77
+ - **Temporal Awareness:** Tracks changes in facts and relationships over time, enabling point-in-time queries. Graph
78
+ edges include temporal metadata to record relationship lifecycles.
79
+ - **Episodic Processing:** Ingests data as discrete episodes, maintaining data provenance and allowing incremental
80
+ entity and relationship extraction.
81
+ - **Hybrid Search:** Combines semantic and BM25 full-text search, with the ability to rerank results by distance from a
82
+ central node e.g. “Kendra”.
83
+ - **Scalable:** Designed for processing large datasets, with parallelization of LLM calls for bulk processing while
84
+ preserving the chronology of events.
69
85
  - **Supports Varied Sources:** Can ingest both unstructured text and structured JSON data.
70
86
 
71
87
  <p align="center">
@@ -91,7 +107,8 @@ Optional:
91
107
  - Anthropic or Groq API key (for alternative LLM providers)
92
108
 
93
109
  > [!TIP]
94
- > The simplest way to install Neo4j is via [Neo4j Desktop](https://neo4j.com/download/). It provides a user-friendly interface to manage Neo4j instances and databases.
110
+ > The simplest way to install Neo4j is via [Neo4j Desktop](https://neo4j.com/download/). It provides a user-friendly
111
+ > interface to manage Neo4j instances and databases.
95
112
 
96
113
  ```bash
97
114
  pip install graphiti-core
@@ -106,7 +123,8 @@ poetry add graphiti-core
106
123
  ## Quick Start
107
124
 
108
125
  > [!IMPORTANT]
109
- > Graphiti uses OpenAI for LLM inference and embedding. Ensure that an `OPENAI_API_KEY` is set in your environment. Support for Anthropic and Groq LLM inferences is available, too.
126
+ > Graphiti uses OpenAI for LLM inference and embedding. Ensure that an `OPENAI_API_KEY` is set in your environment.
127
+ > Support for Anthropic and Groq LLM inferences is available, too.
110
128
 
111
129
  ```python
112
130
  from graphiti_core import Graphiti
@@ -140,25 +158,25 @@ for i, episode in enumerate(episodes):
140
158
  results = await graphiti.search('Who was the California Attorney General?')
141
159
  [
142
160
  EntityEdge(
143
- │ uuid='3133258f738e487383f07b04e15d4ac0',
144
- │ source_node_uuid='2a85789b318d4e418050506879906e62',
145
- │ target_node_uuid='baf7781f445945989d6e4f927f881556',
146
- │ created_at=datetime.datetime(2024, 8, 26, 13, 13, 24, 861097),
147
- │ name='HELD_POSITION',
148
- # the fact reflects the updated state that Harris is
149
- # no longer the AG of California
150
- │ fact='Kamala Harris was the Attorney General of California',
151
- │ fact_embedding=[
152
- │ │ -0.009955154731869698,
153
- │ ...
154
- │ │ 0.00784289836883545
155
- ],
156
- │ episodes=['b43e98ad0a904088a76c67985caecc22'],
157
- │ expired_at=datetime.datetime(2024, 8, 26, 20, 18, 1, 53812),
158
- # These dates represent the date this edge was true.
159
- │ valid_at=datetime.datetime(2011, 1, 3, 0, 0, tzinfo=<UTC>),
160
- │ invalid_at=datetime.datetime(2017, 1, 3, 0, 0, tzinfo=<UTC>)
161
- )
161
+ │ uuid = '3133258f738e487383f07b04e15d4ac0',
162
+ │ source_node_uuid = '2a85789b318d4e418050506879906e62',
163
+ │ target_node_uuid = 'baf7781f445945989d6e4f927f881556',
164
+ │ created_at = datetime.datetime(2024, 8, 26, 13, 13, 24, 861097),
165
+ │ name = 'HELD_POSITION',
166
+ # the fact reflects the updated state that Harris is
167
+ # no longer the AG of California
168
+ │ fact = 'Kamala Harris was the Attorney General of California',
169
+ │ fact_embedding = [
170
+ │ │ -0.009955154731869698,
171
+ │ ...
172
+ │ │ 0.00784289836883545
173
+ │],
174
+ │ episodes = ['b43e98ad0a904088a76c67985caecc22'],
175
+ │ expired_at = datetime.datetime(2024, 8, 26, 20, 18, 1, 53812),
176
+ # These dates represent the date this edge was true.
177
+ │ valid_at = datetime.datetime(2011, 1, 3, 0, 0, tzinfo= < UTC >),
178
+ │ invalid_at = datetime.datetime(2017, 1, 3, 0, 0, tzinfo= < UTC >)
179
+ )
162
180
  ]
163
181
 
164
182
  # Rerank search results based on graph distance
@@ -191,14 +209,16 @@ Graphiti is under active development. We aim to maintain API stability while wor
191
209
  - [ ] Achieving good performance with different LLM and embedding models
192
210
  - [ ] Creating a dedicated embedder interface
193
211
  - [ ] Supporting custom graph schemas:
194
- - Allow developers to provide their own defined node and edge classes when ingesting episodes
195
- - Enable more flexible knowledge representation tailored to specific use cases
212
+ - Allow developers to provide their own defined node and edge classes when ingesting episodes
213
+ - Enable more flexible knowledge representation tailored to specific use cases
196
214
  - [ ] Enhancing retrieval capabilities with more robust and configurable options
197
215
  - [ ] Expanding test coverage to ensure reliability and catch edge cases
198
216
 
199
217
  ## Contributing
200
218
 
201
- We encourage and appreciate all forms of contributions, whether it's code, documentation, addressing GitHub Issues, or answering questions in the Graphiti Discord channel. For detailed guidelines on code contributions, please refer to [CONTRIBUTING](CONTRIBUTING.md).
219
+ We encourage and appreciate all forms of contributions, whether it's code, documentation, addressing GitHub Issues, or
220
+ answering questions in the Graphiti Discord channel. For detailed guidelines on code contributions, please refer
221
+ to [CONTRIBUTING](CONTRIBUTING.md).
202
222
 
203
223
  ## Support
204
224
 
@@ -1,17 +1,17 @@
1
1
  graphiti_core/__init__.py,sha256=e5SWFkRiaUwfprYIeIgVIh7JDedNiloZvd3roU-0aDY,55
2
- graphiti_core/edges.py,sha256=bKzlrIrzofggRckgL3RA3MKLTgCKwkPVMB-tVA6Vd_A,9130
2
+ graphiti_core/edges.py,sha256=IZWpf2pMYFAXQ5-h0wQozMqXLRDb2iUKH91MeeJpttY,13549
3
3
  graphiti_core/errors.py,sha256=BOwL0VVnoUuMjK3EUYKvqefsbsYhRhcKcVWXaX9hanw,1259
4
- graphiti_core/graphiti.py,sha256=ViKKrF84VENTIR6WFtGpZ3FCZqC9B9__lKVsiXPCjV8,24563
4
+ graphiti_core/graphiti.py,sha256=FWfPOlP58_lSR_uO7S7-xGVCI2qc5iAKIxJczzjl980,25981
5
5
  graphiti_core/helpers.py,sha256=qQqZJBkc_z5f3x5axPfCKK_QHLRybvWNFb57WXNENfQ,769
6
6
  graphiti_core/llm_client/__init__.py,sha256=PA80TSMeX-sUXITXEAxMDEt3gtfZgcJrGJUcyds1mSo,207
7
- graphiti_core/llm_client/anthropic_client.py,sha256=3zsOkewLFxBhKe90OkmpfkvrcwykgGwRoqII05Jno_Q,2410
8
- graphiti_core/llm_client/client.py,sha256=7-gEhOKxjdkllV_xS2Ikn-a4QzK9NE63CANnZgdn3VY,3438
9
- graphiti_core/llm_client/config.py,sha256=d1oZ9tt7QBQlbph7v-0HjItb6otK9_-IwF8kkRYL2rc,2359
7
+ graphiti_core/llm_client/anthropic_client.py,sha256=16cWm_fQSUvJTSmgISBcNF8vytIhk-c5mmMK0Xd7SPE,2557
8
+ graphiti_core/llm_client/client.py,sha256=g3vEBNV0E24HdKR3DmqjY8cqqr1CDlvrdh7SaiCUkDc,3470
9
+ graphiti_core/llm_client/config.py,sha256=YIuR5XTINvxsEGDcpPXCqDWfWXGHTB4GB0k5DSRD7Rg,2360
10
10
  graphiti_core/llm_client/errors.py,sha256=-qlWwv1X-UjfsFIiNl-7yJIYvPwi7z8srVRfX4-s6uk,814
11
- graphiti_core/llm_client/groq_client.py,sha256=clQvQ9-zCRoqK9NGMx9Icyl4lUXmM70lZgVquXikxBo,2334
12
- graphiti_core/llm_client/openai_client.py,sha256=VqzWdSrHuNfF2l1aRDua00NHhtP9UR7VNtLcu8h9vLc,2343
11
+ graphiti_core/llm_client/groq_client.py,sha256=j467rL2tNaKplpTOP9pZNUCxG3rrHAEE26CBDk24jzw,2481
12
+ graphiti_core/llm_client/openai_client.py,sha256=rfiCTRdL-I5AtnNpCbNTiiMjVb1ZwJ9dvp2OKrbycWU,4008
13
13
  graphiti_core/llm_client/utils.py,sha256=0KT4XxTVw3c0__HLDj3F8kNR4K_qY0hT0TH-pQZ_IZw,1126
14
- graphiti_core/nodes.py,sha256=w2cbyA7g_0eSm7axFWraG4opYxQz7-mPCxkcNHdefJY,12154
14
+ graphiti_core/nodes.py,sha256=cdFC9619oGp4KbIUgqL8O_Raji74-FabGcYP6-mLrW4,13875
15
15
  graphiti_core/prompts/__init__.py,sha256=EA-x9xUki9l8wnu2l8ek_oNf75-do5tq5hVq7Zbv8Kw,101
16
16
  graphiti_core/prompts/dedupe_edges.py,sha256=DUNHdIudj50FAjkla4nc68tSFSD2yjmYHBw-Bb7ph20,6529
17
17
  graphiti_core/prompts/dedupe_nodes.py,sha256=BZ9S-PB9SSGjc5Oo8ivdgA6rZx3OGOFhKtwrBlQ0bm0,7269
@@ -24,20 +24,20 @@ graphiti_core/prompts/models.py,sha256=cvx_Bv5RMFUD_5IUawYrbpOKLPHogai7_bm7YXrSz
24
24
  graphiti_core/prompts/summarize_nodes.py,sha256=FLuZpGTABgcxuIDkx_IKH115nHEw0rIaFhcGlWveAMc,2357
25
25
  graphiti_core/py.typed,sha256=vlmmzQOt7bmeQl9L3XJP4W6Ry0iiELepnOrinKz5KQg,79
26
26
  graphiti_core/search/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
- graphiti_core/search/search.py,sha256=BtyZBhwAt_IbU8dqm-DeRAIovkFDTdFly5IBGxs4yy8,8101
28
- graphiti_core/search/search_config.py,sha256=nOLU_k2p_sM0-JBYci8rWhc-mERv8uWkDn0GOYqZjL8,2081
29
- graphiti_core/search/search_config_recipes.py,sha256=CJIhYjXPgSm20cY9IkXQxArCgwLvjz-4xB7mr4NylWg,2857
30
- graphiti_core/search/search_utils.py,sha256=vFxLMt0CB_1Avn32d1PFsJPtJ26MCEdoq-BSBx2uCGQ,22802
27
+ graphiti_core/search/search.py,sha256=ktsPp_moKRtrt7HGoTMSzW22LI1LYB_hpP6ARGyqIRE,8466
28
+ graphiti_core/search/search_config.py,sha256=d8w9RDO55G2bwbjYQBaD6gXqEWK1-NsDANrNibYB6t8,2165
29
+ graphiti_core/search/search_config_recipes.py,sha256=_VJqvYB70e8Jke3hsbeQF3Bdogn2MubpYeAQe15M2Jo,3450
30
+ graphiti_core/search/search_utils.py,sha256=4seDnQct4dc2RDGIZDluG8A7UlMKcFHFv4SrE741Ogc,24374
31
31
  graphiti_core/utils/__init__.py,sha256=cJAcMnBZdHBQmWrZdU1PQ1YmaL75bhVUkyVpIPuOyns,260
32
32
  graphiti_core/utils/bulk_utils.py,sha256=JtoYTZPCigPa3n2E43Oe7QhFZRTA_QKNGy1jVgklHag,12614
33
33
  graphiti_core/utils/maintenance/__init__.py,sha256=4b9sfxqyFZMLwxxS2lnQ6_wBr3xrJRIqfAWOidK8EK0,388
34
- graphiti_core/utils/maintenance/community_operations.py,sha256=vbMKY_BfgsrsL-strsK-853MJSXhLeLsSVSWS79OaYo,4931
35
- graphiti_core/utils/maintenance/edge_operations.py,sha256=nXsCwB4YEbXWyNQaII31QNbCrABB7lguuZ3i1M0fTas,11333
34
+ graphiti_core/utils/maintenance/community_operations.py,sha256=2edxME6vk5yn4bwLjz7l_6-8QKE9aMK0fUXMA_FLyWI,9823
35
+ graphiti_core/utils/maintenance/edge_operations.py,sha256=s6Uuu8k_VhFx89cUUD7iRQCZ6get5NrTxeNEqVj9Duw,11440
36
36
  graphiti_core/utils/maintenance/graph_data_operations.py,sha256=d27efEVLvQTmoKE7Hq21wAWSmfqkKzw7jMbVo1zKggE,6489
37
- graphiti_core/utils/maintenance/node_operations.py,sha256=WXJFU1AprYjmHSq6rZhTIX4JFHtF5W9LbzA2Tfksp5Q,8838
37
+ graphiti_core/utils/maintenance/node_operations.py,sha256=QAg4KQkSAOXx9QRaUp7t6DCaztZlzeOBC3__57FCs_o,9025
38
38
  graphiti_core/utils/maintenance/temporal_operations.py,sha256=BzfGDm96w4HcUEsaWTHUBt5S8dNmDQL1eX6AuBL-XFM,8135
39
39
  graphiti_core/utils/maintenance/utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
- graphiti_core-0.3.2.dist-info/LICENSE,sha256=KCUwCyDXuVEgmDWkozHyniRyWjnWUWjkuDHfU6o3JlA,11325
41
- graphiti_core-0.3.2.dist-info/METADATA,sha256=zmcAQu2r7J1odYWWcYQ91fpahTbTu4YZnxEXN-1Qge0,9323
42
- graphiti_core-0.3.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
43
- graphiti_core-0.3.2.dist-info/RECORD,,
40
+ graphiti_core-0.3.4.dist-info/LICENSE,sha256=KCUwCyDXuVEgmDWkozHyniRyWjnWUWjkuDHfU6o3JlA,11325
41
+ graphiti_core-0.3.4.dist-info/METADATA,sha256=vwNQn0BpKaXxH6PdgL_UdUJOw6Ejdjn2Xz9r6Pnq9NA,9395
42
+ graphiti_core-0.3.4.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
43
+ graphiti_core-0.3.4.dist-info/RECORD,,