graphiti-core 0.3.20__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of graphiti-core might be problematic. Click here for more details.

Files changed (60) hide show
  1. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/PKG-INFO +6 -5
  2. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/README.md +2 -2
  3. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/graphiti.py +50 -71
  4. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/helpers.py +1 -0
  5. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/models/edges/edge_db_queries.py +16 -0
  6. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/models/nodes/node_db_queries.py +16 -0
  7. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/nodes.py +2 -2
  8. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/prompts/dedupe_edges.py +9 -93
  9. graphiti_core-0.4.0/graphiti_core/prompts/dedupe_nodes.py +117 -0
  10. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/prompts/extract_edge_dates.py +14 -7
  11. graphiti_core-0.4.0/graphiti_core/prompts/extract_edges.py +114 -0
  12. graphiti_core-0.4.0/graphiti_core/prompts/extract_nodes.py +165 -0
  13. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/prompts/summarize_nodes.py +40 -1
  14. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/search/search.py +20 -0
  15. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/search/search_config_recipes.py +35 -0
  16. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/search/search_utils.py +5 -4
  17. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/utils/bulk_utils.py +3 -3
  18. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/utils/maintenance/community_operations.py +3 -3
  19. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/utils/maintenance/edge_operations.py +87 -55
  20. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/utils/maintenance/node_operations.py +122 -52
  21. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/pyproject.toml +6 -6
  22. graphiti_core-0.3.20/graphiti_core/prompts/dedupe_nodes.py +0 -199
  23. graphiti_core-0.3.20/graphiti_core/prompts/extract_edges.py +0 -140
  24. graphiti_core-0.3.20/graphiti_core/prompts/extract_nodes.py +0 -189
  25. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/LICENSE +0 -0
  26. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/__init__.py +0 -0
  27. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/cross_encoder/__init__.py +0 -0
  28. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/cross_encoder/bge_reranker_client.py +0 -0
  29. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/cross_encoder/client.py +0 -0
  30. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/cross_encoder/openai_reranker_client.py +2 -2
  31. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/edges.py +0 -0
  32. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/embedder/__init__.py +0 -0
  33. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/embedder/client.py +0 -0
  34. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/embedder/openai.py +0 -0
  35. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/embedder/voyage.py +0 -0
  36. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/errors.py +0 -0
  37. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/llm_client/__init__.py +0 -0
  38. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/llm_client/anthropic_client.py +0 -0
  39. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/llm_client/client.py +0 -0
  40. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/llm_client/config.py +0 -0
  41. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/llm_client/errors.py +0 -0
  42. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/llm_client/groq_client.py +0 -0
  43. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/llm_client/openai_client.py +0 -0
  44. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/llm_client/utils.py +0 -0
  45. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/models/__init__.py +0 -0
  46. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/models/edges/__init__.py +0 -0
  47. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/models/nodes/__init__.py +0 -0
  48. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/prompts/__init__.py +0 -0
  49. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/prompts/eval.py +0 -0
  50. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/prompts/invalidate_edges.py +0 -0
  51. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/prompts/lib.py +0 -0
  52. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/prompts/models.py +0 -0
  53. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/py.typed +0 -0
  54. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/search/__init__.py +0 -0
  55. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/search/search_config.py +0 -0
  56. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/utils/__init__.py +0 -0
  57. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/utils/maintenance/__init__.py +0 -0
  58. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/utils/maintenance/graph_data_operations.py +0 -0
  59. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/utils/maintenance/temporal_operations.py +0 -0
  60. {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/utils/maintenance/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: graphiti-core
3
- Version: 0.3.20
3
+ Version: 0.4.0
4
4
  Summary: A temporal graph building library
5
5
  License: Apache-2.0
6
6
  Author: Paul Paliychuk
@@ -14,9 +14,10 @@ Classifier: Programming Language :: Python :: 3.12
14
14
  Requires-Dist: diskcache (>=5.6.3,<6.0.0)
15
15
  Requires-Dist: neo4j (>=5.23.0,<6.0.0)
16
16
  Requires-Dist: numpy (>=1.0.0)
17
- Requires-Dist: openai (>=1.52.2,<2.0.0)
17
+ Requires-Dist: openai (>=1.53.0,<2.0.0)
18
18
  Requires-Dist: pydantic (>=2.8.2,<3.0.0)
19
- Requires-Dist: tenacity (<9.0.0)
19
+ Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
20
+ Requires-Dist: tenacity (==9.0.0)
20
21
  Description-Content-Type: text/markdown
21
22
 
22
23
  <div align="center">
@@ -129,7 +130,7 @@ poetry add graphiti-core
129
130
  ```python
130
131
  from graphiti_core import Graphiti
131
132
  from graphiti_core.nodes import EpisodeType
132
- from datetime import datetime
133
+ from datetime import datetime, timezone
133
134
 
134
135
  # Initialize Graphiti
135
136
  graphiti = Graphiti("bolt://localhost:7687", "neo4j", "password")
@@ -149,7 +150,7 @@ for i, episode in enumerate(episodes):
149
150
  episode_body=episode,
150
151
  source=EpisodeType.text,
151
152
  source_description="podcast",
152
- reference_time=datetime.now()
153
+ reference_time=datetime.now(timezone.utc)
153
154
  )
154
155
 
155
156
  # Search the graph
@@ -108,7 +108,7 @@ poetry add graphiti-core
108
108
  ```python
109
109
  from graphiti_core import Graphiti
110
110
  from graphiti_core.nodes import EpisodeType
111
- from datetime import datetime
111
+ from datetime import datetime, timezone
112
112
 
113
113
  # Initialize Graphiti
114
114
  graphiti = Graphiti("bolt://localhost:7687", "neo4j", "password")
@@ -128,7 +128,7 @@ for i, episode in enumerate(episodes):
128
128
  episode_body=episode,
129
129
  source=EpisodeType.text,
130
130
  source_description="podcast",
131
- reference_time=datetime.now()
131
+ reference_time=datetime.now(timezone.utc)
132
132
  )
133
133
 
134
134
  # Search the graph
@@ -16,7 +16,7 @@ limitations under the License.
16
16
 
17
17
  import asyncio
18
18
  import logging
19
- from datetime import datetime
19
+ from datetime import datetime, timezone
20
20
  from time import time
21
21
 
22
22
  from dotenv import load_dotenv
@@ -35,8 +35,6 @@ from graphiti_core.search.search_config import DEFAULT_SEARCH_LIMIT, SearchResul
35
35
  from graphiti_core.search.search_config_recipes import (
36
36
  EDGE_HYBRID_SEARCH_NODE_DISTANCE,
37
37
  EDGE_HYBRID_SEARCH_RRF,
38
- NODE_HYBRID_SEARCH_NODE_DISTANCE,
39
- NODE_HYBRID_SEARCH_RRF,
40
38
  )
41
39
  from graphiti_core.search.search_utils import (
42
40
  RELEVANT_SCHEMA_LIMIT,
@@ -65,7 +63,9 @@ from graphiti_core.utils.maintenance.community_operations import (
65
63
  update_community,
66
64
  )
67
65
  from graphiti_core.utils.maintenance.edge_operations import (
66
+ dedupe_extracted_edge,
68
67
  extract_edges,
68
+ resolve_edge_contradictions,
69
69
  resolve_extracted_edges,
70
70
  )
71
71
  from graphiti_core.utils.maintenance.graph_data_operations import (
@@ -76,6 +76,7 @@ from graphiti_core.utils.maintenance.node_operations import (
76
76
  extract_nodes,
77
77
  resolve_extracted_nodes,
78
78
  )
79
+ from graphiti_core.utils.maintenance.temporal_operations import get_edge_contradictions
79
80
 
80
81
  logger = logging.getLogger(__name__)
81
82
 
@@ -312,10 +313,10 @@ class Graphiti:
312
313
  start = time()
313
314
 
314
315
  entity_edges: list[EntityEdge] = []
315
- now = datetime.now()
316
+ now = datetime.now(timezone.utc)
316
317
 
317
318
  previous_episodes = await self.retrieve_episodes(
318
- reference_time, last_n=3, group_ids=[group_id]
319
+ reference_time, last_n=RELEVANT_SCHEMA_LIMIT, group_ids=[group_id]
319
320
  )
320
321
  episode = EpisodicNode(
321
322
  name=name,
@@ -340,17 +341,24 @@ class Graphiti:
340
341
  *[node.generate_name_embedding(self.embedder) for node in extracted_nodes]
341
342
  )
342
343
 
343
- # Resolve extracted nodes with nodes already in the graph and extract facts
344
+ # Find relevant nodes already in the graph
344
345
  existing_nodes_lists: list[list[EntityNode]] = list(
345
346
  await asyncio.gather(
346
- *[get_relevant_nodes([node], self.driver) for node in extracted_nodes]
347
+ *[get_relevant_nodes(self.driver, [node]) for node in extracted_nodes]
347
348
  )
348
349
  )
349
350
 
351
+ # Resolve extracted nodes with nodes already in the graph and extract facts
350
352
  logger.debug(f'Extracted nodes: {[(n.name, n.uuid) for n in extracted_nodes]}')
351
353
 
352
354
  (mentioned_nodes, uuid_map), extracted_edges = await asyncio.gather(
353
- resolve_extracted_nodes(self.llm_client, extracted_nodes, existing_nodes_lists),
355
+ resolve_extracted_nodes(
356
+ self.llm_client,
357
+ extracted_nodes,
358
+ existing_nodes_lists,
359
+ episode,
360
+ previous_episodes,
361
+ ),
354
362
  extract_edges(
355
363
  self.llm_client, episode, extracted_nodes, previous_episodes, group_id
356
364
  ),
@@ -448,7 +456,6 @@ class Graphiti:
448
456
 
449
457
  episode.entity_edges = [edge.uuid for edge in entity_edges]
450
458
 
451
- # Future optimization would be using batch operations to save nodes and edges
452
459
  if not self.store_raw_episode_content:
453
460
  episode.content = ''
454
461
 
@@ -511,7 +518,7 @@ class Graphiti:
511
518
  """
512
519
  try:
513
520
  start = time()
514
- now = datetime.now()
521
+ now = datetime.now(timezone.utc)
515
522
 
516
523
  episodes = [
517
524
  EpisodicNode(
@@ -685,67 +692,6 @@ class Graphiti:
685
692
  bfs_origin_node_uuids,
686
693
  )
687
694
 
688
- async def get_nodes_by_query(
689
- self,
690
- query: str,
691
- center_node_uuid: str | None = None,
692
- group_ids: list[str] | None = None,
693
- limit: int = DEFAULT_SEARCH_LIMIT,
694
- ) -> list[EntityNode]:
695
- """
696
- Retrieve nodes from the graph database based on a text query.
697
-
698
- This method performs a hybrid search using both text-based and
699
- embedding-based approaches to find relevant nodes.
700
-
701
- Parameters
702
- ----------
703
- query : str
704
- The text query to search for in the graph
705
- center_node_uuid: str, optional
706
- Facts will be reranked based on proximity to this node.
707
- group_ids : list[str | None] | None, optional
708
- The graph partitions to return data from.
709
- limit : int | None, optional
710
- The maximum number of results to return per search method.
711
- If None, a default limit will be applied.
712
-
713
- Returns
714
- -------
715
- list[EntityNode]
716
- A list of EntityNode objects that match the search criteria.
717
-
718
- Notes
719
- -----
720
- This method uses the following steps:
721
- 1. Generates an embedding for the input query using the LLM client's embedder.
722
- 2. Calls the hybrid_node_search function with both the text query and its embedding.
723
- 3. The hybrid search combines fulltext search and vector similarity search
724
- to find the most relevant nodes.
725
-
726
- The method leverages the LLM client's embedding capabilities to enhance
727
- the search with semantic similarity matching. The 'limit' parameter is applied
728
- to each individual search method before results are combined and deduplicated.
729
- If not specified, a default limit (defined in the search functions) will be used.
730
- """
731
- search_config = (
732
- NODE_HYBRID_SEARCH_RRF if center_node_uuid is None else NODE_HYBRID_SEARCH_NODE_DISTANCE
733
- )
734
- search_config.limit = limit
735
-
736
- nodes = (
737
- await search(
738
- self.driver,
739
- self.embedder,
740
- self.cross_encoder,
741
- query,
742
- group_ids,
743
- search_config,
744
- center_node_uuid,
745
- )
746
- ).nodes
747
- return nodes
748
-
749
695
  async def get_episode_mentions(self, episode_uuids: list[str]) -> SearchResults:
750
696
  episodes = await EpisodicNode.get_by_uuids(self.driver, episode_uuids)
751
697
 
@@ -760,3 +706,36 @@ class Graphiti:
760
706
  communities = await get_communities_by_nodes(self.driver, nodes)
761
707
 
762
708
  return SearchResults(edges=edges, nodes=nodes, communities=communities)
709
+
710
+ async def add_triplet(self, source_node: EntityNode, edge: EntityEdge, target_node: EntityNode):
711
+ if source_node.name_embedding is None:
712
+ await source_node.generate_name_embedding(self.embedder)
713
+ if target_node.name_embedding is None:
714
+ await target_node.generate_name_embedding(self.embedder)
715
+ if edge.fact_embedding is None:
716
+ await edge.generate_embedding(self.embedder)
717
+
718
+ resolved_nodes, _ = await resolve_extracted_nodes(
719
+ self.llm_client,
720
+ [source_node, target_node],
721
+ [
722
+ await get_relevant_nodes(self.driver, [source_node]),
723
+ await get_relevant_nodes(self.driver, [target_node]),
724
+ ],
725
+ )
726
+
727
+ related_edges = await get_relevant_edges(
728
+ self.driver,
729
+ [edge],
730
+ source_node_uuid=resolved_nodes[0].uuid,
731
+ target_node_uuid=resolved_nodes[1].uuid,
732
+ )
733
+
734
+ resolved_edge = await dedupe_extracted_edge(self.llm_client, edge, related_edges)
735
+
736
+ contradicting_edges = await get_edge_contradictions(self.llm_client, edge, related_edges)
737
+ invalidated_edges = resolve_edge_contradictions(resolved_edge, contradicting_edges)
738
+
739
+ await add_nodes_and_edges_bulk(
740
+ self.driver, [], [], resolved_nodes, [resolved_edge] + invalidated_edges
741
+ )
@@ -25,6 +25,7 @@ load_dotenv()
25
25
 
26
26
  DEFAULT_DATABASE = os.getenv('DEFAULT_DATABASE', None)
27
27
  USE_PARALLEL_RUNTIME = bool(os.getenv('USE_PARALLEL_RUNTIME', False))
28
+ MAX_REFLEXION_ITERATIONS = 2
28
29
 
29
30
 
30
31
  def parse_db_date(neo_date: neo4j_time.DateTime | None) -> datetime | None:
@@ -1,3 +1,19 @@
1
+ """
2
+ Copyright 2024, Zep Software, Inc.
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
1
17
  EPISODIC_EDGE_SAVE = """
2
18
  MATCH (episode:Episodic {uuid: $episode_uuid})
3
19
  MATCH (node:Entity {uuid: $entity_uuid})
@@ -1,3 +1,19 @@
1
+ """
2
+ Copyright 2024, Zep Software, Inc.
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
1
17
  EPISODIC_NODE_SAVE = """
2
18
  MERGE (n:Episodic {uuid: $uuid})
3
19
  SET n = {uuid: $uuid, name: $name, group_id: $group_id, source_description: $source_description, source: $source, content: $content,
@@ -16,7 +16,7 @@ limitations under the License.
16
16
 
17
17
  import logging
18
18
  from abc import ABC, abstractmethod
19
- from datetime import datetime
19
+ from datetime import datetime, timezone
20
20
  from enum import Enum
21
21
  from time import time
22
22
  from typing import Any
@@ -78,7 +78,7 @@ class Node(BaseModel, ABC):
78
78
  name: str = Field(description='name of the node')
79
79
  group_id: str = Field(description='partition of the graph')
80
80
  labels: list[str] = Field(default_factory=list)
81
- created_at: datetime = Field(default_factory=lambda: datetime.now())
81
+ created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
82
82
 
83
83
  @abstractmethod
84
84
  async def save(self, driver: AsyncDriver): ...
@@ -21,103 +21,16 @@ from .models import Message, PromptFunction, PromptVersion
21
21
 
22
22
 
23
23
  class Prompt(Protocol):
24
- v1: PromptVersion
25
- v2: PromptVersion
26
- v3: PromptVersion
24
+ edge: PromptVersion
27
25
  edge_list: PromptVersion
28
26
 
29
27
 
30
28
  class Versions(TypedDict):
31
- v1: PromptFunction
32
- v2: PromptFunction
33
- v3: PromptFunction
29
+ edge: PromptFunction
34
30
  edge_list: PromptFunction
35
31
 
36
32
 
37
- def v1(context: dict[str, Any]) -> list[Message]:
38
- return [
39
- Message(
40
- role='system',
41
- content='You are a helpful assistant that de-duplicates relationship from edge lists.',
42
- ),
43
- Message(
44
- role='user',
45
- content=f"""
46
- Given the following context, deduplicate facts from a list of new facts given a list of existing edges:
47
-
48
- Existing Edges:
49
- {json.dumps(context['existing_edges'], indent=2)}
50
-
51
- New Edges:
52
- {json.dumps(context['extracted_edges'], indent=2)}
53
-
54
- Task:
55
- If any edge in New Edges is a duplicate of an edge in Existing Edges, add their uuids to the output list.
56
- When finding duplicates edges, synthesize their facts into a short new fact.
57
-
58
- Guidelines:
59
- 1. identical or near identical facts are duplicates
60
- 2. Facts are also duplicates if they are represented by similar sentences
61
- 3. Facts will often discuss the same or similar relation between identical entities
62
-
63
- Respond with a JSON object in the following format:
64
- {{
65
- "duplicates": [
66
- {{
67
- "uuid": "uuid of the new node like 5d643020624c42fa9de13f97b1b3fa39",
68
- "duplicate_of": "uuid of the existing node",
69
- "fact": "one sentence description of the fact"
70
- }}
71
- ]
72
- }}
73
- """,
74
- ),
75
- ]
76
-
77
-
78
- def v2(context: dict[str, Any]) -> list[Message]:
79
- return [
80
- Message(
81
- role='system',
82
- content='You are a helpful assistant that de-duplicates relationship from edge lists.',
83
- ),
84
- Message(
85
- role='user',
86
- content=f"""
87
- Given the following context, deduplicate edges from a list of new edges given a list of existing edges:
88
-
89
- Existing Edges:
90
- {json.dumps(context['existing_edges'], indent=2)}
91
-
92
- New Edges:
93
- {json.dumps(context['extracted_edges'], indent=2)}
94
-
95
- Task:
96
- 1. start with the list of edges from New Edges
97
- 2. If any edge in New Edges is a duplicate of an edge in Existing Edges, replace the new edge with the existing
98
- edge in the list
99
- 3. Respond with the resulting list of edges
100
-
101
- Guidelines:
102
- 1. Use both the triplet name and fact of edges to determine if they are duplicates,
103
- duplicate edges may have different names meaning the same thing and slight variations in the facts.
104
- 2. If you encounter facts that are semantically equivalent or very similar, keep the original edge
105
-
106
- Respond with a JSON object in the following format:
107
- {{
108
- "new_edges": [
109
- {{
110
- "triplet": "source_node_name-edge_name-target_node_name",
111
- "fact": "one sentence description of the fact"
112
- }}
113
- ]
114
- }}
115
- """,
116
- ),
117
- ]
118
-
119
-
120
- def v3(context: dict[str, Any]) -> list[Message]:
33
+ def edge(context: dict[str, Any]) -> list[Message]:
121
34
  return [
122
35
  Message(
123
36
  role='system',
@@ -128,11 +41,14 @@ def v3(context: dict[str, Any]) -> list[Message]:
128
41
  content=f"""
129
42
  Given the following context, determine whether the New Edge represents any of the edges in the list of Existing Edges.
130
43
 
131
- Existing Edges:
44
+ <EXISTING EDGES>
132
45
  {json.dumps(context['related_edges'], indent=2)}
46
+ </EXISTING EDGES>
133
47
 
134
- New Edge:
48
+ <NEW EDGE>
135
49
  {json.dumps(context['extracted_edges'], indent=2)}
50
+ </NEW EDGE>
51
+
136
52
  Task:
137
53
  1. If the New Edges represents the same factual information as any edge in Existing Edges, return 'is_duplicate: true' in the
138
54
  response. Otherwise, return 'is_duplicate: false'
@@ -189,4 +105,4 @@ def edge_list(context: dict[str, Any]) -> list[Message]:
189
105
  ]
190
106
 
191
107
 
192
- versions: Versions = {'v1': v1, 'v2': v2, 'v3': v3, 'edge_list': edge_list}
108
+ versions: Versions = {'edge': edge, 'edge_list': edge_list}
@@ -0,0 +1,117 @@
1
+ """
2
+ Copyright 2024, Zep Software, Inc.
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
17
+ import json
18
+ from typing import Any, Protocol, TypedDict
19
+
20
+ from .models import Message, PromptFunction, PromptVersion
21
+
22
+
23
+ class Prompt(Protocol):
24
+ node: PromptVersion
25
+ node_list: PromptVersion
26
+
27
+
28
+ class Versions(TypedDict):
29
+ node: PromptFunction
30
+ node_list: PromptFunction
31
+
32
+
33
+ def node(context: dict[str, Any]) -> list[Message]:
34
+ return [
35
+ Message(
36
+ role='system',
37
+ content='You are a helpful assistant that de-duplicates nodes from node lists.',
38
+ ),
39
+ Message(
40
+ role='user',
41
+ content=f"""
42
+ <PREVIOUS MESSAGES>
43
+ {json.dumps([ep for ep in context['previous_episodes']], indent=2)}
44
+ </PREVIOUS MESSAGES>
45
+ <CURRENT MESSAGE>
46
+ {context["episode_content"]}
47
+ </CURRENT MESSAGE>
48
+
49
+ <EXISTING NODES>
50
+ {json.dumps(context['existing_nodes'], indent=2)}
51
+ </EXISTING NODES>
52
+
53
+ Given the above EXISTING NODES, MESSAGE, and PREVIOUS MESSAGES. Determine if the NEW NODE extracted from the conversation
54
+ is a duplicate entity of one of the EXISTING NODES.
55
+
56
+ <NEW NODE>
57
+ {json.dumps(context['extracted_nodes'], indent=2)}
58
+ </NEW NODE>
59
+ Task:
60
+ 1. If the New Node represents the same entity as any node in Existing Nodes, return 'is_duplicate: true' in the
61
+ response. Otherwise, return 'is_duplicate: false'
62
+ 2. If is_duplicate is true, also return the uuid of the existing node in the response
63
+ 3. If is_duplicate is true, return a name for the node that is the most complete full name.
64
+
65
+ Guidelines:
66
+ 1. Use both the name and summary of nodes to determine if the entities are duplicates,
67
+ duplicate nodes may have different names
68
+
69
+ Respond with a JSON object in the following format:
70
+ {{
71
+ "is_duplicate": true or false,
72
+ "uuid": "uuid of the existing node like 5d643020624c42fa9de13f97b1b3fa39 or null",
73
+ "name": "Updated name of the new node (use the best name between the new node's name, an existing duplicate name, or a combination of both)"
74
+ }}
75
+ """,
76
+ ),
77
+ ]
78
+
79
+
80
+ def node_list(context: dict[str, Any]) -> list[Message]:
81
+ return [
82
+ Message(
83
+ role='system',
84
+ content='You are a helpful assistant that de-duplicates nodes from node lists.',
85
+ ),
86
+ Message(
87
+ role='user',
88
+ content=f"""
89
+ Given the following context, deduplicate a list of nodes:
90
+
91
+ Nodes:
92
+ {json.dumps(context['nodes'], indent=2)}
93
+
94
+ Task:
95
+ 1. Group nodes together such that all duplicate nodes are in the same list of uuids
96
+ 2. All duplicate uuids should be grouped together in the same list
97
+ 3. Also return a new summary that synthesizes the summary into a new short summary
98
+
99
+ Guidelines:
100
+ 1. Each uuid from the list of nodes should appear EXACTLY once in your response
101
+ 2. If a node has no duplicates, it should appear in the response in a list of only one uuid
102
+
103
+ Respond with a JSON object in the following format:
104
+ {{
105
+ "nodes": [
106
+ {{
107
+ "uuids": ["5d643020624c42fa9de13f97b1b3fa39", "node that is a duplicate of 5d643020624c42fa9de13f97b1b3fa39"],
108
+ "summary": "Brief summary of the node summaries that appear in the list of names."
109
+ }}
110
+ ]
111
+ }}
112
+ """,
113
+ ),
114
+ ]
115
+
116
+
117
+ versions: Versions = {'node': node, 'node_list': node_list}
@@ -36,12 +36,19 @@ def v1(context: dict[str, Any]) -> list[Message]:
36
36
  Message(
37
37
  role='user',
38
38
  content=f"""
39
- Edge:
40
- Fact: {context['edge_fact']}
41
-
42
- Current Episode: {context['current_episode']}
43
- Previous Episodes: {context['previous_episodes']}
44
- Reference Timestamp: {context['reference_timestamp']}
39
+ <PREVIOUS MESSAGES>
40
+ {context['previous_episodes']}
41
+ </PREVIOUS MESSAGES>
42
+ <CURRENT MESSAGE>
43
+ {context["current_episode"]}
44
+ </CURRENT MESSAGE>
45
+ <REFERENCE TIMESTAMP>
46
+ {context['reference_timestamp']}
47
+ </REFERENCE TIMESTAMP>
48
+
49
+ <FACT>
50
+ {context['edge_fact']}
51
+ </FACT>
45
52
 
46
53
  IMPORTANT: Only extract time information if it is part of the provided fact. Otherwise ignore the time mentioned. Make sure to do your best to determine the dates if only the relative time is mentioned. (eg 10 years ago, 2 mins ago) based on the provided reference timestamp
47
54
  If the relationship is not of spanning nature, but you are still able to determine the dates, set the valid_at only.
@@ -60,7 +67,7 @@ def v1(context: dict[str, Any]) -> list[Message]:
60
67
  5. Do not infer dates from related events. Only use dates that are directly stated to establish or change the relationship.
61
68
  6. For relative time mentions directly related to the relationship, calculate the actual datetime based on the reference timestamp.
62
69
  7. If only a date is mentioned without a specific time, use 00:00:00 (midnight) for that date.
63
- 8. If only a year is mentioned, use January 1st of that year at 00:00:00.
70
+ 8. If only year is mentioned, use January 1st of that year at 00:00:00.
64
71
  9. Always include the time zone offset (use Z for UTC if no specific time zone is mentioned).
65
72
  Respond with a JSON object:
66
73
  {{