graphiti-core 0.12.1__py3-none-any.whl → 0.12.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of graphiti-core might be problematic. Click here for more details.

graphiti_core/graphiti.py CHANGED
@@ -63,6 +63,7 @@ from graphiti_core.utils.maintenance.community_operations import (
63
63
  update_community,
64
64
  )
65
65
  from graphiti_core.utils.maintenance.edge_operations import (
66
+ build_duplicate_of_edges,
66
67
  build_episodic_edges,
67
68
  extract_edges,
68
69
  resolve_extracted_edge,
@@ -375,7 +376,7 @@ class Graphiti:
375
376
  )
376
377
 
377
378
  # Extract edges and resolve nodes
378
- (nodes, uuid_map), extracted_edges = await semaphore_gather(
379
+ (nodes, uuid_map, node_duplicates), extracted_edges = await semaphore_gather(
379
380
  resolve_extracted_nodes(
380
381
  self.clients,
381
382
  extracted_nodes,
@@ -384,7 +385,13 @@ class Graphiti:
384
385
  entity_types,
385
386
  ),
386
387
  extract_edges(
387
- self.clients, episode, extracted_nodes, previous_episodes, group_id, edge_types
388
+ self.clients,
389
+ episode,
390
+ extracted_nodes,
391
+ previous_episodes,
392
+ edge_type_map or edge_type_map_default,
393
+ group_id,
394
+ edge_types,
388
395
  ),
389
396
  )
390
397
 
@@ -404,7 +411,9 @@ class Graphiti:
404
411
  ),
405
412
  )
406
413
 
407
- entity_edges = resolved_edges + invalidated_edges
414
+ duplicate_of_edges = build_duplicate_of_edges(episode, now, node_duplicates)
415
+
416
+ entity_edges = resolved_edges + invalidated_edges + duplicate_of_edges
408
417
 
409
418
  episodic_edges = build_episodic_edges(nodes, episode, now)
410
419
 
@@ -691,7 +700,7 @@ class Graphiti:
691
700
  if edge.fact_embedding is None:
692
701
  await edge.generate_embedding(self.embedder)
693
702
 
694
- resolved_nodes, uuid_map = await resolve_extracted_nodes(
703
+ resolved_nodes, uuid_map, _ = await resolve_extracted_nodes(
695
704
  self.clients,
696
705
  [source_node, target_node],
697
706
  )
@@ -26,12 +26,16 @@ class NodeDuplicate(BaseModel):
26
26
  id: int = Field(..., description='integer id of the entity')
27
27
  duplicate_idx: int = Field(
28
28
  ...,
29
- description='idx of the duplicate node. If no duplicate nodes are found, default to -1.',
29
+ description='idx of the duplicate entity. If no duplicate entities are found, default to -1.',
30
30
  )
31
31
  name: str = Field(
32
32
  ...,
33
33
  description='Name of the entity. Should be the most complete and descriptive name possible.',
34
34
  )
35
+ additional_duplicates: list[int] = Field(
36
+ ...,
37
+ description='idx of additional duplicate entities. Use this list if the entity has multiple duplicates among existing entities.',
38
+ )
35
39
 
36
40
 
37
41
  class NodeResolutions(BaseModel):
@@ -97,6 +97,8 @@ Only extract facts that:
97
97
  - The FACT TYPES provide a list of the most important types of facts, make sure to extract facts of these types
98
98
  - The FACT TYPES are not an exhaustive list, extract all facts from the message even if they do not fit into one
99
99
  of the FACT TYPES
100
+ - The FACT TYPES each contain their fact_type_signature which represents the entity types which that fact_type is defined for.
101
+ A Type of Entity in the signature represents any extracted entity (it is a generic universal type for all entities).
100
102
 
101
103
  You may use information from the PREVIOUS MESSAGES only to disambiguate references or support continuity.
102
104
 
@@ -90,6 +90,8 @@ def extract_message(context: dict[str, Any]) -> list[Message]:
90
90
  Instructions:
91
91
 
92
92
  You are given a conversation context and a CURRENT MESSAGE. Your task is to extract **entity nodes** mentioned **explicitly or implicitly** in the CURRENT MESSAGE.
93
+ Pronoun references such as he/she/they or this/that/those should be disambiguated to the names of the
94
+ reference entities.
93
95
 
94
96
  1. **Speaker Extraction**: Always extract the speaker (the part before the colon `:` in each dialogue line) as the first entity node.
95
97
  - If the speaker is mentioned again in the message, treat both mentions as a **single entity**.
@@ -198,6 +198,7 @@ async def extract_nodes_and_edges_bulk(
198
198
  episode,
199
199
  extracted_nodes_bulk[i],
200
200
  previous_episodes_list[i],
201
+ {},
201
202
  episode.group_id,
202
203
  )
203
204
  for i, episode in enumerate(episodes)
@@ -19,7 +19,9 @@ from datetime import datetime
19
19
  from time import time
20
20
 
21
21
  from pydantic import BaseModel
22
+ from typing_extensions import LiteralString
22
23
 
24
+ from graphiti_core.driver.driver import GraphDriver
23
25
  from graphiti_core.edges import (
24
26
  CommunityEdge,
25
27
  EntityEdge,
@@ -27,7 +29,7 @@ from graphiti_core.edges import (
27
29
  create_entity_edge_embeddings,
28
30
  )
29
31
  from graphiti_core.graphiti_types import GraphitiClients
30
- from graphiti_core.helpers import MAX_REFLEXION_ITERATIONS, semaphore_gather
32
+ from graphiti_core.helpers import DEFAULT_DATABASE, MAX_REFLEXION_ITERATIONS, semaphore_gather
31
33
  from graphiti_core.llm_client import LLMClient
32
34
  from graphiti_core.llm_client.config import ModelSize
33
35
  from graphiti_core.nodes import CommunityNode, EntityNode, EpisodicNode
@@ -61,6 +63,28 @@ def build_episodic_edges(
61
63
  return episodic_edges
62
64
 
63
65
 
66
+ def build_duplicate_of_edges(
67
+ episode: EpisodicNode,
68
+ created_at: datetime,
69
+ duplicate_nodes: list[tuple[EntityNode, EntityNode]],
70
+ ) -> list[EntityEdge]:
71
+ is_duplicate_of_edges: list[EntityEdge] = [
72
+ EntityEdge(
73
+ source_node_uuid=source_node.uuid,
74
+ target_node_uuid=target_node.uuid,
75
+ name='IS_DUPLICATE_OF',
76
+ group_id=episode.group_id,
77
+ fact=f'{source_node.name} is a duplicate of {target_node.name}',
78
+ episodes=[episode.uuid],
79
+ created_at=created_at,
80
+ valid_at=created_at,
81
+ )
82
+ for source_node, target_node in duplicate_nodes
83
+ ]
84
+
85
+ return is_duplicate_of_edges
86
+
87
+
64
88
  def build_community_edges(
65
89
  entity_nodes: list[EntityNode],
66
90
  community_node: CommunityNode,
@@ -84,6 +108,7 @@ async def extract_edges(
84
108
  episode: EpisodicNode,
85
109
  nodes: list[EntityNode],
86
110
  previous_episodes: list[EpisodicNode],
111
+ edge_type_map: dict[tuple[str, str], list[str]],
87
112
  group_id: str = '',
88
113
  edge_types: dict[str, BaseModel] | None = None,
89
114
  ) -> list[EntityEdge]:
@@ -92,10 +117,17 @@ async def extract_edges(
92
117
  extract_edges_max_tokens = 16384
93
118
  llm_client = clients.llm_client
94
119
 
120
+ edge_type_signature_map: dict[str, tuple[str, str]] = {
121
+ edge_type: signature
122
+ for signature, edge_types in edge_type_map.items()
123
+ for edge_type in edge_types
124
+ }
125
+
95
126
  edge_types_context = (
96
127
  [
97
128
  {
98
129
  'fact_type_name': type_name,
130
+ 'fact_type_signature': edge_type_signature_map.get(type_name, ('Entity', 'Entity')),
99
131
  'fact_type_description': type_model.__doc__,
100
132
  }
101
133
  for type_name, type_model in edge_types.items()
@@ -107,7 +139,10 @@ async def extract_edges(
107
139
  # Prepare context for LLM
108
140
  context = {
109
141
  'episode_content': episode.content,
110
- 'nodes': [{'id': idx, 'name': node.name} for idx, node in enumerate(nodes)],
142
+ 'nodes': [
143
+ {'id': idx, 'name': node.name, 'entity_types': node.labels}
144
+ for idx, node in enumerate(nodes)
145
+ ],
111
146
  'previous_episodes': [ep.content for ep in previous_episodes],
112
147
  'reference_time': episode.valid_at,
113
148
  'edge_types': edge_types_context,
@@ -570,3 +605,34 @@ async def dedupe_edge_list(
570
605
  unique_edges.append(edge)
571
606
 
572
607
  return unique_edges
608
+
609
+
610
+ async def filter_existing_duplicate_of_edges(
611
+ driver: GraphDriver, duplicates_node_tuples: list[tuple[EntityNode, EntityNode]]
612
+ ) -> list[tuple[EntityNode, EntityNode]]:
613
+ query: LiteralString = """
614
+ UNWIND $duplicate_node_uuids AS duplicate_tuple
615
+ MATCH (n:Entity {uuid: duplicate_tuple[0]})-[r:RELATES_TO {name: 'IS_DUPLICATE_OF'}]->(m:Entity {uuid: duplicate_tuple[1]})
616
+ RETURN DISTINCT
617
+ n.uuid AS source_uuid,
618
+ m.uuid AS target_uuid
619
+ """
620
+
621
+ duplicate_nodes_map = {
622
+ (source.uuid, target.uuid): (source, target) for source, target in duplicates_node_tuples
623
+ }
624
+
625
+ records, _, _ = await driver.execute_query(
626
+ query,
627
+ duplicate_node_uuids=list(duplicate_nodes_map.keys()),
628
+ database_=DEFAULT_DATABASE,
629
+ routing_='r',
630
+ )
631
+
632
+ # Remove duplicates that already have the IS_DUPLICATE_OF edge
633
+ for record in records:
634
+ duplicate_tuple = (record.get('source_uuid'), record.get('target_uuid'))
635
+ if duplicate_nodes_map.get(duplicate_tuple):
636
+ duplicate_nodes_map.pop(duplicate_tuple)
637
+
638
+ return list(duplicate_nodes_map.values())
@@ -40,6 +40,7 @@ from graphiti_core.search.search_config import SearchResults
40
40
  from graphiti_core.search.search_config_recipes import NODE_HYBRID_SEARCH_RRF
41
41
  from graphiti_core.search.search_filters import SearchFilters
42
42
  from graphiti_core.utils.datetime_utils import utc_now
43
+ from graphiti_core.utils.maintenance.edge_operations import filter_existing_duplicate_of_edges
43
44
 
44
45
  logger = logging.getLogger(__name__)
45
46
 
@@ -225,8 +226,9 @@ async def resolve_extracted_nodes(
225
226
  episode: EpisodicNode | None = None,
226
227
  previous_episodes: list[EpisodicNode] | None = None,
227
228
  entity_types: dict[str, BaseModel] | None = None,
228
- ) -> tuple[list[EntityNode], dict[str, str]]:
229
+ ) -> tuple[list[EntityNode], dict[str, str], list[tuple[EntityNode, EntityNode]]]:
229
230
  llm_client = clients.llm_client
231
+ driver = clients.driver
230
232
 
231
233
  search_results: list[SearchResults] = await semaphore_gather(
232
234
  *[
@@ -295,9 +297,10 @@ async def resolve_extracted_nodes(
295
297
 
296
298
  resolved_nodes: list[EntityNode] = []
297
299
  uuid_map: dict[str, str] = {}
300
+ node_duplicates: list[tuple[EntityNode, EntityNode]] = []
298
301
  for resolution in node_resolutions:
299
- resolution_id = resolution.get('id', -1)
300
- duplicate_idx = resolution.get('duplicate_idx', -1)
302
+ resolution_id: int = resolution.get('id', -1)
303
+ duplicate_idx: int = resolution.get('duplicate_idx', -1)
301
304
 
302
305
  extracted_node = extracted_nodes[resolution_id]
303
306
 
@@ -312,9 +315,21 @@ async def resolve_extracted_nodes(
312
315
  resolved_nodes.append(resolved_node)
313
316
  uuid_map[extracted_node.uuid] = resolved_node.uuid
314
317
 
318
+ additional_duplicates: list[int] = resolution.get('additional_duplicates', [])
319
+ for idx in additional_duplicates:
320
+ existing_node = existing_nodes[idx] if idx < len(existing_nodes) else resolved_node
321
+ if existing_node == resolved_node:
322
+ continue
323
+
324
+ node_duplicates.append((resolved_node, existing_nodes[idx]))
325
+
315
326
  logger.debug(f'Resolved nodes: {[(n.name, n.uuid) for n in resolved_nodes]}')
316
327
 
317
- return resolved_nodes, uuid_map
328
+ new_node_duplicates: list[
329
+ tuple[EntityNode, EntityNode]
330
+ ] = await filter_existing_duplicate_of_edges(driver, node_duplicates)
331
+
332
+ return resolved_nodes, uuid_map, new_node_duplicates
318
333
 
319
334
 
320
335
  async def extract_attributes_from_nodes(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: graphiti-core
3
- Version: 0.12.1
3
+ Version: 0.12.3
4
4
  Summary: A temporal graph building library
5
5
  License: Apache-2.0
6
6
  Author: Paul Paliychuk
@@ -13,11 +13,12 @@ Classifier: Programming Language :: Python :: 3.11
13
13
  Classifier: Programming Language :: Python :: 3.12
14
14
  Classifier: Programming Language :: Python :: 3.13
15
15
  Provides-Extra: anthropic
16
+ Provides-Extra: falkord-db
16
17
  Provides-Extra: google-genai
17
18
  Provides-Extra: groq
18
19
  Requires-Dist: anthropic (>=0.49.0) ; extra == "anthropic"
19
20
  Requires-Dist: diskcache (>=5.6.3)
20
- Requires-Dist: falkordb (>=1.1.2,<2.0.0)
21
+ Requires-Dist: falkordb (>=1.1.2,<2.0.0) ; extra == "falkord-db"
21
22
  Requires-Dist: google-genai (>=1.8.0) ; extra == "google-genai"
22
23
  Requires-Dist: groq (>=0.2.0) ; extra == "groq"
23
24
  Requires-Dist: neo4j (>=5.26.0)
@@ -16,7 +16,7 @@ graphiti_core/embedder/openai.py,sha256=bIThUoLMeGlHG2-3VikzK6JZfOHKn4PKvUMx5sHx
16
16
  graphiti_core/embedder/voyage.py,sha256=gQhdcz2IYPSyOcDn3w8aHToVS3KQhyZrUBm4vqr3WcE,2224
17
17
  graphiti_core/errors.py,sha256=Nib1uQx2cO_VOizupmRjpFfmuRg-hFAVqTtZAuBehR8,2405
18
18
  graphiti_core/graph_queries.py,sha256=KfWDp8xDnPa9bcHskw8NeMpeeHBtZWBCosVdu1Iwv34,7076
19
- graphiti_core/graphiti.py,sha256=oMLoKs87aoT3WhPVPuKCyd9H0i_ZTXl7FleuCNnBVms,28013
19
+ graphiti_core/graphiti.py,sha256=cCUYxwNoQiEtzbPNvTEwfYOrK9UePFpRCUeFxl1-vg0,28335
20
20
  graphiti_core/graphiti_types.py,sha256=rL-9bvnLobunJfXU4hkD6mAj14pofKp_wq8QsFDZwDU,1035
21
21
  graphiti_core/helpers.py,sha256=sfC1M6KefKaZll6FQcpcNvWnCN6iCvBSJksAkGTItT4,3059
22
22
  graphiti_core/llm_client/__init__.py,sha256=QgBWUiCeBp6YiA_xqyrDvJ9jIyy1hngH8g7FWahN3nw,776
@@ -38,11 +38,11 @@ graphiti_core/models/nodes/node_db_queries.py,sha256=AQgRGVO-GgFWfLq1G6k8s86WItw
38
38
  graphiti_core/nodes.py,sha256=kdJY-Ugyk6J2x70w4EF_EoFNgy7D3TMOMVSUfEth6rE,18665
39
39
  graphiti_core/prompts/__init__.py,sha256=EA-x9xUki9l8wnu2l8ek_oNf75-do5tq5hVq7Zbv8Kw,101
40
40
  graphiti_core/prompts/dedupe_edges.py,sha256=AFVC1EQ0TvNkSp0G7QZmIh3YpGg9FVXo1_sT3TlRqA8,5473
41
- graphiti_core/prompts/dedupe_nodes.py,sha256=OIhMkKexRpQQ0dEr4NW_WE1ta7wLO3RibJA7Ge41uDg,7407
41
+ graphiti_core/prompts/dedupe_nodes.py,sha256=F_TNygUfAf0R9Md5aHnD-VQb58_0MTMN9UQjopcWJvY,7615
42
42
  graphiti_core/prompts/eval.py,sha256=gnBQTmwsCl3Qvwpcm7aieVszzo6y1sMCUT8jQiKTvvE,5317
43
43
  graphiti_core/prompts/extract_edge_dates.py,sha256=3Drs3CmvP0gJN5BidWSxrNvLet3HPoTybU3BUIAoc0Y,4218
44
- graphiti_core/prompts/extract_edges.py,sha256=9NdxAKyXHiFOSuyAzzxRM38BmqtynGEbtmMUr3VTrtM,6513
45
- graphiti_core/prompts/extract_nodes.py,sha256=EYuX99P8ly7pSOBz87ZA9fJF8V6g6epbVj5Cq0YM8h8,9624
44
+ graphiti_core/prompts/extract_edges.py,sha256=1zP-Xo70UbHm1uSJIcVHhHrXIpH9ILU6ieunLPaRdnA,6761
45
+ graphiti_core/prompts/extract_nodes.py,sha256=bJ3Inp4tMDsXgTPHKOyAtasVHEeLjbY7wfV3LwdvCsc,9747
46
46
  graphiti_core/prompts/invalidate_edges.py,sha256=yfpcs_pyctnoM77ULPZXEtKW0oHr1MeLsJzC5yrE-o4,3547
47
47
  graphiti_core/prompts/lib.py,sha256=DCyHePM4_q-CptTpEXGO_dBv9k7xDtclEaB1dGu7EcI,4092
48
48
  graphiti_core/prompts/models.py,sha256=NgxdbPHJpBEcpbXovKyScgpBc73Q-GIW-CBDlBtDjto,894
@@ -57,17 +57,17 @@ graphiti_core/search/search_filters.py,sha256=jG30nMWX03xoT9ohgyHNu_Xes8GwjIF2eT
57
57
  graphiti_core/search/search_helpers.py,sha256=G5Ceaq5Pfgx0Weelqgeylp_pUHwiBnINaUYsDbURJbE,2636
58
58
  graphiti_core/search/search_utils.py,sha256=k9KKN4sYde0Hqw9BKb5T-8q-3hInIPwq9aYoGfheq6E,34877
59
59
  graphiti_core/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
60
- graphiti_core/utils/bulk_utils.py,sha256=8JgxHnp8whiNJEJjd6Pp4wVeNOquymbi4rczV1ygsBs,16176
60
+ graphiti_core/utils/bulk_utils.py,sha256=RPPTAqBRg6iR1T6g6TDpfUGvkYmTEyVNrVPz_y91f-s,16196
61
61
  graphiti_core/utils/datetime_utils.py,sha256=Ti-2tnrDFRzBsbfblzsHybsM3jaDLP4-VT2t0VhpIzU,1357
62
62
  graphiti_core/utils/maintenance/__init__.py,sha256=vW4H1KyapTl-OOz578uZABYcpND4wPx3Vt6aAPaXh78,301
63
63
  graphiti_core/utils/maintenance/community_operations.py,sha256=2rhRqtL9gDbjXKO4-S0nGpaWvS4ck5rFiazZiogIJao,10088
64
- graphiti_core/utils/maintenance/edge_operations.py,sha256=74HcCWaq7rfQuUH8RsWPr-OYE-5qe8s2BaiBYfkMNT0,19272
64
+ graphiti_core/utils/maintenance/edge_operations.py,sha256=Fwu2TLmQF_9EVcA-uUlt1ZiGC6RILIfKDr9W7R4gAno,21633
65
65
  graphiti_core/utils/maintenance/graph_data_operations.py,sha256=OHuiAyP1Z7dfR90dWVQ87TJQO83P0sQihJyr4WIhOhk,5362
66
- graphiti_core/utils/maintenance/node_operations.py,sha256=5kouyCkzAzIGYUBtViMhNWYL95tA1__TsSWuLj0_RdY,14795
66
+ graphiti_core/utils/maintenance/node_operations.py,sha256=cqSZ9CmyaH-WeZlzZEmR8Q-GlaQFR31sjw_qcObCyrw,15564
67
67
  graphiti_core/utils/maintenance/temporal_operations.py,sha256=mJkw9xLB4W2BsLfC5POr0r-PHWL9SIfNj_l_xu0B5ug,3410
68
68
  graphiti_core/utils/maintenance/utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
69
69
  graphiti_core/utils/ontology_utils/entity_types_utils.py,sha256=QJX5cG0GSSNF_Mm_yrldr69wjVAbN_MxLhOSznz85Hk,1279
70
- graphiti_core-0.12.1.dist-info/LICENSE,sha256=KCUwCyDXuVEgmDWkozHyniRyWjnWUWjkuDHfU6o3JlA,11325
71
- graphiti_core-0.12.1.dist-info/METADATA,sha256=fnlE-myapsbDDM_d23H4FbZ88RBrChm-iF9jhIHGhMo,15590
72
- graphiti_core-0.12.1.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
73
- graphiti_core-0.12.1.dist-info/RECORD,,
70
+ graphiti_core-0.12.3.dist-info/LICENSE,sha256=KCUwCyDXuVEgmDWkozHyniRyWjnWUWjkuDHfU6o3JlA,11325
71
+ graphiti_core-0.12.3.dist-info/METADATA,sha256=47AFbvfxgjSo0FNl3n33NPJrJlCJjGGd9-PKeg9SIIw,15641
72
+ graphiti_core-0.12.3.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
73
+ graphiti_core-0.12.3.dist-info/RECORD,,