graphiti-core 0.17.4__py3-none-any.whl → 0.17.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of graphiti-core might be problematic. Click here for more details.

graphiti_core/edges.py CHANGED
@@ -50,8 +50,7 @@ ENTITY_EDGE_RETURN: LiteralString = """
50
50
  e.expired_at AS expired_at,
51
51
  e.valid_at AS valid_at,
52
52
  e.invalid_at AS invalid_at,
53
- properties(e) AS attributes
54
- """
53
+ properties(e) AS attributes"""
55
54
 
56
55
 
57
56
  class Edge(BaseModel, ABC):
@@ -303,21 +302,34 @@ class EntityEdge(Edge):
303
302
  group_ids: list[str],
304
303
  limit: int | None = None,
305
304
  uuid_cursor: str | None = None,
305
+ with_embeddings: bool = False,
306
306
  ):
307
307
  cursor_query: LiteralString = 'AND e.uuid < $uuid' if uuid_cursor else ''
308
308
  limit_query: LiteralString = 'LIMIT $limit' if limit is not None else ''
309
+ with_embeddings_query: LiteralString = (
310
+ """,
311
+ e.fact_embedding AS fact_embedding
312
+ """
313
+ if with_embeddings
314
+ else ''
315
+ )
309
316
 
310
- records, _, _ = await driver.execute_query(
317
+ query: LiteralString = (
318
+ """
319
+ MATCH (n:Entity)-[e:RELATES_TO]->(m:Entity)
320
+ WHERE e.group_id IN $group_ids
311
321
  """
312
- MATCH (n:Entity)-[e:RELATES_TO]->(m:Entity)
313
- WHERE e.group_id IN $group_ids
314
- """
315
322
  + cursor_query
316
323
  + ENTITY_EDGE_RETURN
324
+ + with_embeddings_query
317
325
  + """
318
326
  ORDER BY e.uuid DESC
319
327
  """
320
- + limit_query,
328
+ + limit_query
329
+ )
330
+
331
+ records, _, _ = await driver.execute_query(
332
+ query,
321
333
  group_ids=group_ids,
322
334
  uuid=uuid_cursor,
323
335
  limit=limit,
@@ -334,8 +346,8 @@ class EntityEdge(Edge):
334
346
  async def get_by_node_uuid(cls, driver: GraphDriver, node_uuid: str):
335
347
  query: LiteralString = (
336
348
  """
337
- MATCH (n:Entity {uuid: $node_uuid})-[e:RELATES_TO]-(m:Entity)
338
- """
349
+ MATCH (n:Entity {uuid: $node_uuid})-[e:RELATES_TO]-(m:Entity)
350
+ """
339
351
  + ENTITY_EDGE_RETURN
340
352
  )
341
353
  records, _, _ = await driver.execute_query(query, node_uuid=node_uuid, routing_='r')
@@ -456,6 +468,7 @@ def get_entity_edge_from_record(record: Any) -> EntityEdge:
456
468
  source_node_uuid=record['source_node_uuid'],
457
469
  target_node_uuid=record['target_node_uuid'],
458
470
  fact=record['fact'],
471
+ fact_embedding=record.get('fact_embedding'),
459
472
  name=record['name'],
460
473
  group_id=record['group_id'],
461
474
  episodes=record['episodes'],
graphiti_core/graphiti.py CHANGED
@@ -640,6 +640,7 @@ class Graphiti:
640
640
  self.clients, extracted_nodes_bulk, episode_context, entity_types
641
641
  )
642
642
 
643
+ # Create Episodic Edges
643
644
  episodic_edges: list[EpisodicEdge] = []
644
645
  for episode_uuid, nodes in nodes_by_episode.items():
645
646
  episodic_edges.extend(build_episodic_edges(nodes, episode_uuid, now))
@@ -695,18 +696,112 @@ class Graphiti:
695
696
 
696
697
  hydrated_nodes = [node for nodes in new_hydrated_nodes for node in nodes]
697
698
 
698
- # TODO: Resolve nodes and edges against the existing graph
699
- edges_by_uuid: dict[str, EntityEdge] = {
700
- edge.uuid: edge for edges in edges_by_episode.values() for edge in edges
701
- }
699
+ # Update nodes_by_uuid map with the hydrated nodes
700
+ for hydrated_node in hydrated_nodes:
701
+ nodes_by_uuid[hydrated_node.uuid] = hydrated_node
702
+
703
+ # Resolve nodes and edges against the existing graph
704
+ nodes_by_episode_unique: dict[str, list[EntityNode]] = {}
705
+ nodes_uuid_set: set[str] = set()
706
+ for episode, _ in episode_context:
707
+ nodes_by_episode_unique[episode.uuid] = []
708
+ nodes = [nodes_by_uuid[node.uuid] for node in nodes_by_episode[episode.uuid]]
709
+ for node in nodes:
710
+ if node.uuid not in nodes_uuid_set:
711
+ nodes_by_episode_unique[episode.uuid].append(node)
712
+ nodes_uuid_set.add(node.uuid)
713
+
714
+ node_results = await semaphore_gather(
715
+ *[
716
+ resolve_extracted_nodes(
717
+ self.clients,
718
+ nodes_by_episode_unique[episode.uuid],
719
+ episode,
720
+ previous_episodes,
721
+ entity_types,
722
+ )
723
+ for episode, previous_episodes in episode_context
724
+ ]
725
+ )
726
+
727
+ resolved_nodes: list[EntityNode] = []
728
+ uuid_map: dict[str, str] = {}
729
+ node_duplicates: list[tuple[EntityNode, EntityNode]] = []
730
+ for result in node_results:
731
+ resolved_nodes.extend(result[0])
732
+ uuid_map.update(result[1])
733
+ node_duplicates.extend(result[2])
734
+
735
+ # Update nodes_by_uuid map with the resolved nodes
736
+ for resolved_node in resolved_nodes:
737
+ nodes_by_uuid[resolved_node.uuid] = resolved_node
738
+
739
+ # update nodes_by_episode_unique mapping
740
+ for episode_uuid, nodes in nodes_by_episode_unique.items():
741
+ updated_nodes: list[EntityNode] = []
742
+ for node in nodes:
743
+ updated_node_uuid = uuid_map.get(node.uuid, node.uuid)
744
+ updated_node = nodes_by_uuid[updated_node_uuid]
745
+ updated_nodes.append(updated_node)
746
+
747
+ nodes_by_episode_unique[episode_uuid] = updated_nodes
748
+
749
+ hydrated_nodes_results: list[list[EntityNode]] = await semaphore_gather(
750
+ *[
751
+ extract_attributes_from_nodes(
752
+ self.clients,
753
+ nodes_by_episode_unique[episode.uuid],
754
+ episode,
755
+ previous_episodes,
756
+ entity_types,
757
+ )
758
+ for episode, previous_episodes in episode_context
759
+ ]
760
+ )
761
+
762
+ final_hydrated_nodes = [node for nodes in hydrated_nodes_results for node in nodes]
763
+
764
+ edges_by_episode_unique: dict[str, list[EntityEdge]] = {}
765
+ edges_uuid_set: set[str] = set()
766
+ for episode_uuid, edges in edges_by_episode.items():
767
+ edges_with_updated_pointers = resolve_edge_pointers(edges, uuid_map)
768
+ edges_by_episode_unique[episode_uuid] = []
769
+
770
+ for edge in edges_with_updated_pointers:
771
+ if edge.uuid not in edges_uuid_set:
772
+ edges_by_episode_unique[episode_uuid].append(edge)
773
+ edges_uuid_set.add(edge.uuid)
774
+
775
+ edge_results = await semaphore_gather(
776
+ *[
777
+ resolve_extracted_edges(
778
+ self.clients,
779
+ edges_by_episode_unique[episode.uuid],
780
+ episode,
781
+ hydrated_nodes,
782
+ edge_types or {},
783
+ edge_type_map or edge_type_map_default,
784
+ )
785
+ for episode in episodes
786
+ ]
787
+ )
788
+
789
+ resolved_edges: list[EntityEdge] = []
790
+ invalidated_edges: list[EntityEdge] = []
791
+ for result in edge_results:
792
+ resolved_edges.extend(result[0])
793
+ invalidated_edges.extend(result[1])
794
+
795
+ # Resolved pointers for episodic edges
796
+ resolved_episodic_edges = resolve_edge_pointers(episodic_edges, uuid_map)
702
797
 
703
798
  # save data to KG
704
799
  await add_nodes_and_edges_bulk(
705
800
  self.driver,
706
801
  episodes,
707
- episodic_edges,
708
- hydrated_nodes,
709
- list(edges_by_uuid.values()),
802
+ resolved_episodic_edges,
803
+ final_hydrated_nodes,
804
+ resolved_edges + invalidated_edges,
710
805
  self.embedder,
711
806
  )
712
807
 
graphiti_core/nodes.py CHANGED
@@ -46,8 +46,7 @@ ENTITY_NODE_RETURN: LiteralString = """
46
46
  n.created_at AS created_at,
47
47
  n.summary AS summary,
48
48
  labels(n) AS labels,
49
- properties(n) AS attributes
50
- """
49
+ properties(n) AS attributes"""
51
50
 
52
51
 
53
52
  class EpisodeType(Enum):
@@ -335,8 +334,8 @@ class EntityNode(Node):
335
334
  async def get_by_uuid(cls, driver: GraphDriver, uuid: str):
336
335
  query = (
337
336
  """
338
- MATCH (n:Entity {uuid: $uuid})
339
- """
337
+ MATCH (n:Entity {uuid: $uuid})
338
+ """
340
339
  + ENTITY_NODE_RETURN
341
340
  )
342
341
  records, _, _ = await driver.execute_query(
@@ -374,9 +373,17 @@ class EntityNode(Node):
374
373
  group_ids: list[str],
375
374
  limit: int | None = None,
376
375
  uuid_cursor: str | None = None,
376
+ with_embeddings: bool = False,
377
377
  ):
378
378
  cursor_query: LiteralString = 'AND n.uuid < $uuid' if uuid_cursor else ''
379
379
  limit_query: LiteralString = 'LIMIT $limit' if limit is not None else ''
380
+ with_embeddings_query: LiteralString = (
381
+ """,
382
+ n.name_embedding AS name_embedding
383
+ """
384
+ if with_embeddings
385
+ else ''
386
+ )
380
387
 
381
388
  records, _, _ = await driver.execute_query(
382
389
  """
@@ -384,6 +391,7 @@ class EntityNode(Node):
384
391
  """
385
392
  + cursor_query
386
393
  + ENTITY_NODE_RETURN
394
+ + with_embeddings_query
387
395
  + """
388
396
  ORDER BY n.uuid DESC
389
397
  """
@@ -546,6 +554,7 @@ def get_entity_node_from_record(record: Any) -> EntityNode:
546
554
  entity_node = EntityNode(
547
555
  uuid=record['uuid'],
548
556
  name=record['name'],
557
+ name_embedding=record.get('name_embedding'),
549
558
  group_id=record['group_id'],
550
559
  labels=record['labels'],
551
560
  created_at=parse_db_date(record['created_at']), # type: ignore
@@ -141,9 +141,9 @@ def resolve_edge(context: dict[str, Any]) -> list[Message]:
141
141
 
142
142
 
143
143
  Task:
144
- If the NEW FACT represents the same factual information as any fact in EXISTING FACTS, return the idx of the duplicate fact.
144
+ If the NEW FACT represents identical factual information of one or more in EXISTING FACTS, return the idx of the duplicate facts.
145
145
  Facts with similar information that contain key differences should not be marked as duplicates.
146
- If the NEW FACT is not a duplicate of any of the EXISTING FACTS, return -1.
146
+ If the NEW FACT is not a duplicate of any of the EXISTING FACTS, return an empty list.
147
147
 
148
148
  Given the predefined FACT TYPES, determine if the NEW FACT should be classified as one of these types.
149
149
  Return the fact type as fact_type or DEFAULT if NEW FACT is not one of the FACT TYPES.
@@ -153,8 +153,8 @@ def resolve_edge(context: dict[str, Any]) -> list[Message]:
153
153
  If there are no contradicted facts, return an empty list.
154
154
 
155
155
  Guidelines:
156
- 1. The facts do not need to be completely identical to be duplicates, they just need to express the same information.
157
- 2. Some facts may be very similar but will have key differences, particularly around numeric values in the facts.
156
+ 1. Some facts may be very similar but will have key differences, particularly around numeric values in the facts.
157
+ Do not mark these facts as duplicates.
158
158
  """,
159
159
  ),
160
160
  ]
@@ -317,6 +317,12 @@ async def dedupe_edges_bulk(
317
317
  for existing_edge in existing_edges:
318
318
  # Approximate BM25 by checking for word overlaps (this is faster than creating many in-memory indices)
319
319
  # This approach will cast a wider net than BM25, which is ideal for this use case
320
+ if (
321
+ edge.source_node_uuid != existing_edge.source_node_uuid
322
+ or edge.target_node_uuid != existing_edge.target_node_uuid
323
+ ):
324
+ continue
325
+
320
326
  edge_words = set(edge.fact.lower().split())
321
327
  existing_edge_words = set(existing_edge.fact.lower().split())
322
328
  has_overlap = not edge_words.isdisjoint(existing_edge_words)
@@ -345,6 +351,7 @@ async def dedupe_edges_bulk(
345
351
  ]
346
352
  )
347
353
 
354
+ # For now we won't track edge invalidation
348
355
  duplicate_pairs: list[tuple[str, str]] = []
349
356
  for i, (_, _, duplicates) in enumerate(bulk_edge_resolutions):
350
357
  episode, edge, candidates = dedupe_tuples[i]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: graphiti-core
3
- Version: 0.17.4
3
+ Version: 0.17.5
4
4
  Summary: A temporal graph building library
5
5
  Project-URL: Homepage, https://help.getzep.com/graphiti/graphiti/overview
6
6
  Project-URL: Repository, https://github.com/getzep/graphiti
@@ -1,11 +1,11 @@
1
1
  graphiti_core/__init__.py,sha256=e5SWFkRiaUwfprYIeIgVIh7JDedNiloZvd3roU-0aDY,55
2
- graphiti_core/edges.py,sha256=oo9PUxH8QJ09IByl91bbmez4lXIVTD7pvZvIEg6jOgg,15619
2
+ graphiti_core/edges.py,sha256=-SSP6rhk8Dl8LwUZ08GHymJTT5pNDtzb3BV-6z1fBYY,16030
3
3
  graphiti_core/errors.py,sha256=cH_v9TPgEPeQE6GFOHIg5TvejpUCBddGarMY2Whxbwc,2707
4
4
  graphiti_core/graph_queries.py,sha256=KfWDp8xDnPa9bcHskw8NeMpeeHBtZWBCosVdu1Iwv34,7076
5
- graphiti_core/graphiti.py,sha256=xRtP6ko7OSByBwgMQcx2X4WFoW5zr_ciiCz2xMhlfrk,35136
5
+ graphiti_core/graphiti.py,sha256=rlz1Q2zF6vcFqfLoFP7jbxZvHNKgy8eVcTHDqoIykjs,39244
6
6
  graphiti_core/graphiti_types.py,sha256=rL-9bvnLobunJfXU4hkD6mAj14pofKp_wq8QsFDZwDU,1035
7
7
  graphiti_core/helpers.py,sha256=b4998WOrqgzesNyMlw7UKIdo3x3uo_BwheztqMblwzE,4885
8
- graphiti_core/nodes.py,sha256=X1mv0PQ5DKPWAzodc1Xiafv-zDDhNQSuF6wpfB3I7vE,18276
8
+ graphiti_core/nodes.py,sha256=AcqHvhNWyapQwBSuziMvPJ-HnOr4Pv1-OiYsEodJcAA,18613
9
9
  graphiti_core/py.typed,sha256=vlmmzQOt7bmeQl9L3XJP4W6Ry0iiELepnOrinKz5KQg,79
10
10
  graphiti_core/cross_encoder/__init__.py,sha256=hry59vz21x-AtGZ0MJ7ugw0HTwJkXiddpp_Yqnwsen0,723
11
11
  graphiti_core/cross_encoder/bge_reranker_client.py,sha256=y3TfFxZh0Yvj6HUShmfUm6MC7OPXwWUlv1Qe5HF3S3I,1797
@@ -40,7 +40,7 @@ graphiti_core/models/edges/edge_db_queries.py,sha256=YyGc0UT4eeOHQrYuXfAGIwxiX4x
40
40
  graphiti_core/models/nodes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
41
  graphiti_core/models/nodes/node_db_queries.py,sha256=AQgRGVO-GgFWfLq1G6k8s86WItwpXruy3Mj4DBli-vM,2145
42
42
  graphiti_core/prompts/__init__.py,sha256=EA-x9xUki9l8wnu2l8ek_oNf75-do5tq5hVq7Zbv8Kw,101
43
- graphiti_core/prompts/dedupe_edges.py,sha256=_26fV_iLufmt6iWpJ7QduhrNVW3G5dGrx4EbVLuvxNk,5783
43
+ graphiti_core/prompts/dedupe_edges.py,sha256=WlW18HjhuKOXjxSbvrmwk0yH0tqA4t89ntG7LOTq_k4,5724
44
44
  graphiti_core/prompts/dedupe_nodes.py,sha256=GBHSFfkumiQQU8qDnO-kUoSefzDNSzYUpzUl6hPcakc,7740
45
45
  graphiti_core/prompts/eval.py,sha256=gnBQTmwsCl3Qvwpcm7aieVszzo6y1sMCUT8jQiKTvvE,5317
46
46
  graphiti_core/prompts/extract_edge_dates.py,sha256=3Drs3CmvP0gJN5BidWSxrNvLet3HPoTybU3BUIAoc0Y,4218
@@ -61,7 +61,7 @@ graphiti_core/search/search_utils.py,sha256=616pGqC95PgW7DljgP5TDybzDKyO6IqrSPEE
61
61
  graphiti_core/telemetry/__init__.py,sha256=5kALLDlU9bb2v19CdN7qVANsJWyfnL9E60J6FFgzm3o,226
62
62
  graphiti_core/telemetry/telemetry.py,sha256=47LrzOVBCcZxsYPsnSxWFiztHoxYKKxPwyRX0hnbDGc,3230
63
63
  graphiti_core/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
64
- graphiti_core/utils/bulk_utils.py,sha256=ovJrbT21RBz9RFIsyaVzMw0vLRAuCua2nhqPP7FTsO8,14797
64
+ graphiti_core/utils/bulk_utils.py,sha256=jMpGEyiAd1uCllGzxEpsV8pwLOq0DbWK4Ph_QhdAsz4,15069
65
65
  graphiti_core/utils/datetime_utils.py,sha256=Ti-2tnrDFRzBsbfblzsHybsM3jaDLP4-VT2t0VhpIzU,1357
66
66
  graphiti_core/utils/maintenance/__init__.py,sha256=vW4H1KyapTl-OOz578uZABYcpND4wPx3Vt6aAPaXh78,301
67
67
  graphiti_core/utils/maintenance/community_operations.py,sha256=ROKo9_5Jj3RqfTrD9wJjlDRSF6iUyXUY4czkc9RGVdw,9905
@@ -71,7 +71,7 @@ graphiti_core/utils/maintenance/node_operations.py,sha256=ZnopNRTNdBjBotQ2uQiI7E
71
71
  graphiti_core/utils/maintenance/temporal_operations.py,sha256=mJkw9xLB4W2BsLfC5POr0r-PHWL9SIfNj_l_xu0B5ug,3410
72
72
  graphiti_core/utils/maintenance/utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
73
73
  graphiti_core/utils/ontology_utils/entity_types_utils.py,sha256=QJX5cG0GSSNF_Mm_yrldr69wjVAbN_MxLhOSznz85Hk,1279
74
- graphiti_core-0.17.4.dist-info/METADATA,sha256=kTDMAVw9rynMn8XP5YFA65WHoqMQHe1Y1IgivFQnygs,23812
75
- graphiti_core-0.17.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
76
- graphiti_core-0.17.4.dist-info/licenses/LICENSE,sha256=KCUwCyDXuVEgmDWkozHyniRyWjnWUWjkuDHfU6o3JlA,11325
77
- graphiti_core-0.17.4.dist-info/RECORD,,
74
+ graphiti_core-0.17.5.dist-info/METADATA,sha256=WGz5dFLlBQ5foRMl7gnecaQzjvJlngqhcFEkLexJAmo,23812
75
+ graphiti_core-0.17.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
76
+ graphiti_core-0.17.5.dist-info/licenses/LICENSE,sha256=KCUwCyDXuVEgmDWkozHyniRyWjnWUWjkuDHfU6o3JlA,11325
77
+ graphiti_core-0.17.5.dist-info/RECORD,,