graphiti-core 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of graphiti-core might be problematic. Click here for more details.
- graphiti_core/__init__.py +3 -0
- graphiti_core/edges.py +232 -0
- graphiti_core/graphiti.py +618 -0
- graphiti_core/helpers.py +7 -0
- graphiti_core/llm_client/__init__.py +5 -0
- graphiti_core/llm_client/anthropic_client.py +63 -0
- graphiti_core/llm_client/client.py +96 -0
- graphiti_core/llm_client/config.py +58 -0
- graphiti_core/llm_client/groq_client.py +64 -0
- graphiti_core/llm_client/openai_client.py +65 -0
- graphiti_core/llm_client/utils.py +22 -0
- graphiti_core/nodes.py +250 -0
- graphiti_core/prompts/__init__.py +4 -0
- graphiti_core/prompts/dedupe_edges.py +154 -0
- graphiti_core/prompts/dedupe_nodes.py +151 -0
- graphiti_core/prompts/extract_edge_dates.py +60 -0
- graphiti_core/prompts/extract_edges.py +138 -0
- graphiti_core/prompts/extract_nodes.py +145 -0
- graphiti_core/prompts/invalidate_edges.py +74 -0
- graphiti_core/prompts/lib.py +122 -0
- graphiti_core/prompts/models.py +31 -0
- graphiti_core/search/__init__.py +0 -0
- graphiti_core/search/search.py +142 -0
- graphiti_core/search/search_utils.py +454 -0
- graphiti_core/utils/__init__.py +15 -0
- graphiti_core/utils/bulk_utils.py +227 -0
- graphiti_core/utils/maintenance/__init__.py +16 -0
- graphiti_core/utils/maintenance/edge_operations.py +170 -0
- graphiti_core/utils/maintenance/graph_data_operations.py +133 -0
- graphiti_core/utils/maintenance/node_operations.py +199 -0
- graphiti_core/utils/maintenance/temporal_operations.py +184 -0
- graphiti_core/utils/maintenance/utils.py +0 -0
- graphiti_core/utils/utils.py +39 -0
- graphiti_core-0.1.0.dist-info/LICENSE +201 -0
- graphiti_core-0.1.0.dist-info/METADATA +199 -0
- graphiti_core-0.1.0.dist-info/RECORD +37 -0
- graphiti_core-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright 2024, Zep Software, Inc.
|
|
3
|
+
|
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
you may not use this file except in compliance with the License.
|
|
6
|
+
You may obtain a copy of the License at
|
|
7
|
+
|
|
8
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
|
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
See the License for the specific language governing permissions and
|
|
14
|
+
limitations under the License.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import json
|
|
18
|
+
from typing import Any, Protocol, TypedDict
|
|
19
|
+
|
|
20
|
+
from .models import Message, PromptFunction, PromptVersion
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class Prompt(Protocol):
|
|
24
|
+
v1: PromptVersion
|
|
25
|
+
v2: PromptVersion
|
|
26
|
+
edge_list: PromptVersion
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class Versions(TypedDict):
|
|
30
|
+
v1: PromptFunction
|
|
31
|
+
v2: PromptFunction
|
|
32
|
+
edge_list: PromptFunction
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def v1(context: dict[str, Any]) -> list[Message]:
|
|
36
|
+
return [
|
|
37
|
+
Message(
|
|
38
|
+
role='system',
|
|
39
|
+
content='You are a helpful assistant that de-duplicates relationship from edge lists.',
|
|
40
|
+
),
|
|
41
|
+
Message(
|
|
42
|
+
role='user',
|
|
43
|
+
content=f"""
|
|
44
|
+
Given the following context, deduplicate facts from a list of new facts given a list of existing facts:
|
|
45
|
+
|
|
46
|
+
Existing Facts:
|
|
47
|
+
{json.dumps(context['existing_edges'], indent=2)}
|
|
48
|
+
|
|
49
|
+
New Facts:
|
|
50
|
+
{json.dumps(context['extracted_edges'], indent=2)}
|
|
51
|
+
|
|
52
|
+
Task:
|
|
53
|
+
If any facts in New Facts is a duplicate of a fact in Existing Facts,
|
|
54
|
+
do not return it in the list of unique facts.
|
|
55
|
+
|
|
56
|
+
Guidelines:
|
|
57
|
+
1. identical or near identical facts are duplicates
|
|
58
|
+
2. Facts are also duplicates if they are represented by similar sentences
|
|
59
|
+
3. Facts will often discuss the same or similar relation between identical entities
|
|
60
|
+
|
|
61
|
+
Respond with a JSON object in the following format:
|
|
62
|
+
{{
|
|
63
|
+
"unique_facts": [
|
|
64
|
+
{{
|
|
65
|
+
"uuid": "unique identifier of the fact"
|
|
66
|
+
}}
|
|
67
|
+
]
|
|
68
|
+
}}
|
|
69
|
+
""",
|
|
70
|
+
),
|
|
71
|
+
]
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def v2(context: dict[str, Any]) -> list[Message]:
|
|
75
|
+
return [
|
|
76
|
+
Message(
|
|
77
|
+
role='system',
|
|
78
|
+
content='You are a helpful assistant that de-duplicates relationship from edge lists.',
|
|
79
|
+
),
|
|
80
|
+
Message(
|
|
81
|
+
role='user',
|
|
82
|
+
content=f"""
|
|
83
|
+
Given the following context, deduplicate edges from a list of new edges given a list of existing edges:
|
|
84
|
+
|
|
85
|
+
Existing Edges:
|
|
86
|
+
{json.dumps(context['existing_edges'], indent=2)}
|
|
87
|
+
|
|
88
|
+
New Edges:
|
|
89
|
+
{json.dumps(context['extracted_edges'], indent=2)}
|
|
90
|
+
|
|
91
|
+
Task:
|
|
92
|
+
1. start with the list of edges from New Edges
|
|
93
|
+
2. If any edge in New Edges is a duplicate of an edge in Existing Edges, replace the new edge with the existing
|
|
94
|
+
edge in the list
|
|
95
|
+
3. Respond with the resulting list of edges
|
|
96
|
+
|
|
97
|
+
Guidelines:
|
|
98
|
+
1. Use both the triplet name and fact of edges to determine if they are duplicates,
|
|
99
|
+
duplicate edges may have different names meaning the same thing and slight variations in the facts.
|
|
100
|
+
2. If you encounter facts that are semantically equivalent or very similar, keep the original edge
|
|
101
|
+
|
|
102
|
+
Respond with a JSON object in the following format:
|
|
103
|
+
{{
|
|
104
|
+
"new_edges": [
|
|
105
|
+
{{
|
|
106
|
+
"triplet": "source_node_name-edge_name-target_node_name",
|
|
107
|
+
"fact": "one sentence description of the fact"
|
|
108
|
+
}}
|
|
109
|
+
]
|
|
110
|
+
}}
|
|
111
|
+
""",
|
|
112
|
+
),
|
|
113
|
+
]
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def edge_list(context: dict[str, Any]) -> list[Message]:
|
|
117
|
+
return [
|
|
118
|
+
Message(
|
|
119
|
+
role='system',
|
|
120
|
+
content='You are a helpful assistant that de-duplicates edges from edge lists.',
|
|
121
|
+
),
|
|
122
|
+
Message(
|
|
123
|
+
role='user',
|
|
124
|
+
content=f"""
|
|
125
|
+
Given the following context, find all of the duplicates in a list of facts:
|
|
126
|
+
|
|
127
|
+
Facts:
|
|
128
|
+
{json.dumps(context['edges'], indent=2)}
|
|
129
|
+
|
|
130
|
+
Task:
|
|
131
|
+
If any facts in Facts is a duplicate of another fact, return a new fact with one of their uuid's.
|
|
132
|
+
|
|
133
|
+
Guidelines:
|
|
134
|
+
1. identical or near identical facts are duplicates
|
|
135
|
+
2. Facts are also duplicates if they are represented by similar sentences
|
|
136
|
+
3. Facts will often discuss the same or similar relation between identical entities
|
|
137
|
+
4. The final list should have only unique facts. If 3 facts are all duplicates of each other, only one of their
|
|
138
|
+
facts should be in the response
|
|
139
|
+
|
|
140
|
+
Respond with a JSON object in the following format:
|
|
141
|
+
{{
|
|
142
|
+
"unique_facts": [
|
|
143
|
+
{{
|
|
144
|
+
"uuid": "unique identifier of the fact",
|
|
145
|
+
"fact": "fact of a unique edge"
|
|
146
|
+
}}
|
|
147
|
+
]
|
|
148
|
+
}}
|
|
149
|
+
""",
|
|
150
|
+
),
|
|
151
|
+
]
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
versions: Versions = {'v1': v1, 'v2': v2, 'edge_list': edge_list}
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright 2024, Zep Software, Inc.
|
|
3
|
+
|
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
you may not use this file except in compliance with the License.
|
|
6
|
+
You may obtain a copy of the License at
|
|
7
|
+
|
|
8
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
|
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
See the License for the specific language governing permissions and
|
|
14
|
+
limitations under the License.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import json
|
|
18
|
+
from typing import Any, Protocol, TypedDict
|
|
19
|
+
|
|
20
|
+
from .models import Message, PromptFunction, PromptVersion
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class Prompt(Protocol):
|
|
24
|
+
v1: PromptVersion
|
|
25
|
+
v2: PromptVersion
|
|
26
|
+
node_list: PromptVersion
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class Versions(TypedDict):
|
|
30
|
+
v1: PromptFunction
|
|
31
|
+
v2: PromptFunction
|
|
32
|
+
node_list: PromptVersion
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def v1(context: dict[str, Any]) -> list[Message]:
|
|
36
|
+
return [
|
|
37
|
+
Message(
|
|
38
|
+
role='system',
|
|
39
|
+
content='You are a helpful assistant that de-duplicates nodes from node lists.',
|
|
40
|
+
),
|
|
41
|
+
Message(
|
|
42
|
+
role='user',
|
|
43
|
+
content=f"""
|
|
44
|
+
Given the following context, deduplicate nodes from a list of new nodes given a list of existing nodes:
|
|
45
|
+
|
|
46
|
+
Existing Nodes:
|
|
47
|
+
{json.dumps(context['existing_nodes'], indent=2)}
|
|
48
|
+
|
|
49
|
+
New Nodes:
|
|
50
|
+
{json.dumps(context['extracted_nodes'], indent=2)}
|
|
51
|
+
|
|
52
|
+
Task:
|
|
53
|
+
1. start with the list of nodes from New Nodes
|
|
54
|
+
2. If any node in New Nodes is a duplicate of a node in Existing Nodes, replace the new node with the existing
|
|
55
|
+
node in the list
|
|
56
|
+
3. Respond with the resulting list of nodes
|
|
57
|
+
|
|
58
|
+
Guidelines:
|
|
59
|
+
1. Use both the name and summary of nodes to determine if they are duplicates,
|
|
60
|
+
duplicate nodes may have different names
|
|
61
|
+
|
|
62
|
+
Respond with a JSON object in the following format:
|
|
63
|
+
{{
|
|
64
|
+
"new_nodes": [
|
|
65
|
+
{{
|
|
66
|
+
"name": "Unique identifier for the node",
|
|
67
|
+
}}
|
|
68
|
+
]
|
|
69
|
+
}}
|
|
70
|
+
""",
|
|
71
|
+
),
|
|
72
|
+
]
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def v2(context: dict[str, Any]) -> list[Message]:
|
|
76
|
+
return [
|
|
77
|
+
Message(
|
|
78
|
+
role='system',
|
|
79
|
+
content='You are a helpful assistant that de-duplicates nodes from node lists.',
|
|
80
|
+
),
|
|
81
|
+
Message(
|
|
82
|
+
role='user',
|
|
83
|
+
content=f"""
|
|
84
|
+
Given the following context, deduplicate nodes from a list of new nodes given a list of existing nodes:
|
|
85
|
+
|
|
86
|
+
Existing Nodes:
|
|
87
|
+
{json.dumps(context['existing_nodes'], indent=2)}
|
|
88
|
+
|
|
89
|
+
New Nodes:
|
|
90
|
+
{json.dumps(context['extracted_nodes'], indent=2)}
|
|
91
|
+
Important:
|
|
92
|
+
If a node in the new nodes is describing the same entity as a node in the existing nodes, mark it as a duplicate!!!
|
|
93
|
+
Task:
|
|
94
|
+
If any node in New Nodes is a duplicate of a node in Existing Nodes, add their names to the output list
|
|
95
|
+
|
|
96
|
+
Guidelines:
|
|
97
|
+
1. Use both the name and summary of nodes to determine if they are duplicates,
|
|
98
|
+
duplicate nodes may have different names
|
|
99
|
+
2. In the output, name should always be the name of the New Node that is a duplicate. duplicate_of should be
|
|
100
|
+
the name of the Existing Node.
|
|
101
|
+
|
|
102
|
+
Respond with a JSON object in the following format:
|
|
103
|
+
{{
|
|
104
|
+
"duplicates": [
|
|
105
|
+
{{
|
|
106
|
+
"name": "name of the new node",
|
|
107
|
+
"duplicate_of": "name of the existing node"
|
|
108
|
+
}}
|
|
109
|
+
]
|
|
110
|
+
}}
|
|
111
|
+
""",
|
|
112
|
+
),
|
|
113
|
+
]
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def node_list(context: dict[str, Any]) -> list[Message]:
|
|
117
|
+
return [
|
|
118
|
+
Message(
|
|
119
|
+
role='system',
|
|
120
|
+
content='You are a helpful assistant that de-duplicates nodes from node lists.',
|
|
121
|
+
),
|
|
122
|
+
Message(
|
|
123
|
+
role='user',
|
|
124
|
+
content=f"""
|
|
125
|
+
Given the following context, deduplicate a list of nodes:
|
|
126
|
+
|
|
127
|
+
Nodes:
|
|
128
|
+
{json.dumps(context['nodes'], indent=2)}
|
|
129
|
+
|
|
130
|
+
Task:
|
|
131
|
+
1. Group nodes together such that all duplicate nodes are in the same list of names
|
|
132
|
+
2. All duplicate names should be grouped together in the same list
|
|
133
|
+
|
|
134
|
+
Guidelines:
|
|
135
|
+
1. Each name from the list of nodes should appear EXACTLY once in your response
|
|
136
|
+
2. If a node has no duplicates, it should appear in the response in a list of only one name
|
|
137
|
+
|
|
138
|
+
Respond with a JSON object in the following format:
|
|
139
|
+
{{
|
|
140
|
+
"nodes": [
|
|
141
|
+
{{
|
|
142
|
+
"names": ["myNode", "node that is a duplicate of myNode"],
|
|
143
|
+
}}
|
|
144
|
+
]
|
|
145
|
+
}}
|
|
146
|
+
""",
|
|
147
|
+
),
|
|
148
|
+
]
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
versions: Versions = {'v1': v1, 'v2': v2, 'node_list': node_list}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
from typing import Any, Protocol, TypedDict
|
|
2
|
+
|
|
3
|
+
from .models import Message, PromptFunction, PromptVersion
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Prompt(Protocol):
|
|
7
|
+
v1: PromptVersion
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Versions(TypedDict):
|
|
11
|
+
v1: PromptFunction
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def v1(context: dict[str, Any]) -> list[Message]:
|
|
15
|
+
return [
|
|
16
|
+
Message(
|
|
17
|
+
role='system',
|
|
18
|
+
content='You are an AI assistant that extracts datetime information for graph edges, focusing only on dates directly related to the establishment or change of the relationship described in the edge fact.',
|
|
19
|
+
),
|
|
20
|
+
Message(
|
|
21
|
+
role='user',
|
|
22
|
+
content=f"""
|
|
23
|
+
Edge:
|
|
24
|
+
Edge Name: {context['edge_name']}
|
|
25
|
+
Fact: {context['edge_fact']}
|
|
26
|
+
|
|
27
|
+
Current Episode: {context['current_episode']}
|
|
28
|
+
Previous Episodes: {context['previous_episodes']}
|
|
29
|
+
Reference Timestamp: {context['reference_timestamp']}
|
|
30
|
+
|
|
31
|
+
IMPORTANT: Only extract time information if it is part of the provided fact. Otherwise ignore the time mentioned. Make sure to do your best to determine the dates if only the relative time is mentioned. (eg 10 years ago, 2 mins ago) based on the provided reference timestamp
|
|
32
|
+
If the relationship is not of spanning nature, but you are still able to determine the dates, set the valid_at only.
|
|
33
|
+
Definitions:
|
|
34
|
+
- valid_at: The date and time when the relationship described by the edge fact became true or was established.
|
|
35
|
+
- invalid_at: The date and time when the relationship described by the edge fact stopped being true or ended.
|
|
36
|
+
|
|
37
|
+
Task:
|
|
38
|
+
Analyze the conversation and determine if there are dates that are part of the edge fact. Only set dates if they explicitly relate to the formation or alteration of the relationship itself.
|
|
39
|
+
|
|
40
|
+
Guidelines:
|
|
41
|
+
1. Use ISO 8601 format (YYYY-MM-DDTHH:MM:SSZ) for datetimes.
|
|
42
|
+
2. Use the reference timestamp as the current time when determining the valid_at and invalid_at dates.
|
|
43
|
+
3. If no temporal information is found that establishes or changes the relationship, leave the fields as null.
|
|
44
|
+
4. Do not infer dates from related events. Only use dates that are directly stated to establish or change the relationship.
|
|
45
|
+
5. For relative time mentions directly related to the relationship, calculate the actual datetime based on the reference timestamp.
|
|
46
|
+
6. If only a date is mentioned without a specific time, use 00:00:00 (midnight) for that date.
|
|
47
|
+
7. If only a year is mentioned, use January 1st of that year at 00:00:00.
|
|
48
|
+
9. Always include the time zone offset (use Z for UTC if no specific time zone is mentioned).
|
|
49
|
+
Respond with a JSON object:
|
|
50
|
+
{{
|
|
51
|
+
"valid_at": "YYYY-MM-DDTHH:MM:SSZ or null",
|
|
52
|
+
"invalid_at": "YYYY-MM-DDTHH:MM:SSZ or null",
|
|
53
|
+
"explanation": "Brief explanation of why these dates were chosen or why they were set to null"
|
|
54
|
+
}}
|
|
55
|
+
""",
|
|
56
|
+
),
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
versions: Versions = {'v1': v1}
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright 2024, Zep Software, Inc.
|
|
3
|
+
|
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
you may not use this file except in compliance with the License.
|
|
6
|
+
You may obtain a copy of the License at
|
|
7
|
+
|
|
8
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
|
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
See the License for the specific language governing permissions and
|
|
14
|
+
limitations under the License.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import json
|
|
18
|
+
from typing import Any, Protocol, TypedDict
|
|
19
|
+
|
|
20
|
+
from .models import Message, PromptFunction, PromptVersion
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class Prompt(Protocol):
|
|
24
|
+
v1: PromptVersion
|
|
25
|
+
v2: PromptVersion
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class Versions(TypedDict):
|
|
29
|
+
v1: PromptFunction
|
|
30
|
+
v2: PromptFunction
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def v1(context: dict[str, Any]) -> list[Message]:
|
|
34
|
+
return [
|
|
35
|
+
Message(
|
|
36
|
+
role='system',
|
|
37
|
+
content='You are a helpful assistant that extracts graph edges from provided context.',
|
|
38
|
+
),
|
|
39
|
+
Message(
|
|
40
|
+
role='user',
|
|
41
|
+
content=f"""
|
|
42
|
+
Given the following context, extract new semantic edges (relationships) that need to be added to the knowledge graph:
|
|
43
|
+
|
|
44
|
+
Current Graph Structure:
|
|
45
|
+
{context['relevant_schema']}
|
|
46
|
+
|
|
47
|
+
New Nodes:
|
|
48
|
+
{json.dumps(context['new_nodes'], indent=2)}
|
|
49
|
+
|
|
50
|
+
New Episode:
|
|
51
|
+
Content: {context['episode_content']}
|
|
52
|
+
Timestamp: {context['episode_timestamp']}
|
|
53
|
+
|
|
54
|
+
Previous Episodes:
|
|
55
|
+
{json.dumps([ep['content'] for ep in context['previous_episodes']], indent=2)}
|
|
56
|
+
|
|
57
|
+
Extract new semantic edges based on the content of the current episode, considering the existing graph structure, new nodes, and context from previous episodes.
|
|
58
|
+
|
|
59
|
+
Guidelines:
|
|
60
|
+
1. Create edges only between semantic nodes (not episodic nodes like messages).
|
|
61
|
+
2. Each edge should represent a clear relationship between two semantic nodes.
|
|
62
|
+
3. The relation_type should be a concise, all-caps description of the relationship (e.g., LOVES, IS_FRIENDS_WITH, WORKS_FOR).
|
|
63
|
+
4. Provide a more detailed fact describing the relationship.
|
|
64
|
+
5. If a relationship seems to update an existing one, create a new edge with the updated information.
|
|
65
|
+
6. Consider temporal aspects of relationships when relevant.
|
|
66
|
+
7. Do not create edges involving episodic nodes (like Message 1 or Message 2).
|
|
67
|
+
8. Use existing nodes from the current graph structure when appropriate.
|
|
68
|
+
|
|
69
|
+
Respond with a JSON object in the following format:
|
|
70
|
+
{{
|
|
71
|
+
"new_edges": [
|
|
72
|
+
{{
|
|
73
|
+
"relation_type": "RELATION_TYPE_IN_CAPS",
|
|
74
|
+
"source_node": "Name of the source semantic node",
|
|
75
|
+
"target_node": "Name of the target semantic node",
|
|
76
|
+
"fact": "Detailed description of the relationship",
|
|
77
|
+
"valid_at": "YYYY-MM-DDTHH:MM:SSZ or null if not explicitly mentioned",
|
|
78
|
+
"invalid_at": "YYYY-MM-DDTHH:MM:SSZ or null if ongoing or not explicitly mentioned"
|
|
79
|
+
}}
|
|
80
|
+
]
|
|
81
|
+
}}
|
|
82
|
+
|
|
83
|
+
If no new edges need to be added, return an empty list for "new_edges".
|
|
84
|
+
""",
|
|
85
|
+
),
|
|
86
|
+
]
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def v2(context: dict[str, Any]) -> list[Message]:
|
|
90
|
+
return [
|
|
91
|
+
Message(
|
|
92
|
+
role='system',
|
|
93
|
+
content='You are a helpful assistant that extracts graph edges from provided context.',
|
|
94
|
+
),
|
|
95
|
+
Message(
|
|
96
|
+
role='user',
|
|
97
|
+
content=f"""
|
|
98
|
+
Given the following context, extract edges (relationships) that need to be added to the knowledge graph:
|
|
99
|
+
Nodes:
|
|
100
|
+
{json.dumps(context['nodes'], indent=2)}
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
Episodes:
|
|
105
|
+
{json.dumps([ep['content'] for ep in context['previous_episodes']], indent=2)}
|
|
106
|
+
{context['episode_content']} <-- New Episode
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
Extract entity edges based on the content of the current episode, the given nodes, and context from previous episodes.
|
|
110
|
+
|
|
111
|
+
Guidelines:
|
|
112
|
+
1. Create edges only between the provided nodes.
|
|
113
|
+
2. Each edge should represent a clear relationship between two nodes.
|
|
114
|
+
3. The relation_type should be a concise, all-caps description of the relationship (e.g., LOVES, IS_FRIENDS_WITH, WORKS_FOR).
|
|
115
|
+
4. Provide a more detailed fact describing the relationship.
|
|
116
|
+
5. Consider temporal aspects of relationships when relevant.
|
|
117
|
+
|
|
118
|
+
Respond with a JSON object in the following format:
|
|
119
|
+
{{
|
|
120
|
+
"edges": [
|
|
121
|
+
{{
|
|
122
|
+
"relation_type": "RELATION_TYPE_IN_CAPS",
|
|
123
|
+
"source_node_uuid": "uuid of the source entity node",
|
|
124
|
+
"target_node_uuid": "uuid of the target entity node",
|
|
125
|
+
"fact": "brief description of the relationship",
|
|
126
|
+
"valid_at": "YYYY-MM-DDTHH:MM:SSZ or null if not explicitly mentioned",
|
|
127
|
+
"invalid_at": "YYYY-MM-DDTHH:MM:SSZ or null if ongoing or not explicitly mentioned"
|
|
128
|
+
}}
|
|
129
|
+
]
|
|
130
|
+
}}
|
|
131
|
+
|
|
132
|
+
If no edges need to be added, return an empty list for "edges".
|
|
133
|
+
""",
|
|
134
|
+
),
|
|
135
|
+
]
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
versions: Versions = {'v1': v1, 'v2': v2}
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright 2024, Zep Software, Inc.
|
|
3
|
+
|
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
you may not use this file except in compliance with the License.
|
|
6
|
+
You may obtain a copy of the License at
|
|
7
|
+
|
|
8
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
|
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
See the License for the specific language governing permissions and
|
|
14
|
+
limitations under the License.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import json
|
|
18
|
+
from typing import Any, Protocol, TypedDict
|
|
19
|
+
|
|
20
|
+
from .models import Message, PromptFunction, PromptVersion
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class Prompt(Protocol):
|
|
24
|
+
v1: PromptVersion
|
|
25
|
+
v2: PromptVersion
|
|
26
|
+
extract_json: PromptVersion
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class Versions(TypedDict):
|
|
30
|
+
v1: PromptFunction
|
|
31
|
+
v2: PromptFunction
|
|
32
|
+
extract_json: PromptFunction
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def v1(context: dict[str, Any]) -> list[Message]:
|
|
36
|
+
return [
|
|
37
|
+
Message(
|
|
38
|
+
role='system',
|
|
39
|
+
content='You are a helpful assistant that extracts graph nodes from provided context.',
|
|
40
|
+
),
|
|
41
|
+
Message(
|
|
42
|
+
role='user',
|
|
43
|
+
content=f"""
|
|
44
|
+
Given the following context, extract new entity nodes that need to be added to the knowledge graph:
|
|
45
|
+
|
|
46
|
+
Previous Episodes:
|
|
47
|
+
{json.dumps([ep['content'] for ep in context['previous_episodes']], indent=2)}
|
|
48
|
+
|
|
49
|
+
New Episode:
|
|
50
|
+
Content: {context["episode_content"]}
|
|
51
|
+
|
|
52
|
+
Extract new entity nodes based on the content of the current episode, while considering the context from previous episodes.
|
|
53
|
+
|
|
54
|
+
Guidelines:
|
|
55
|
+
1. Focus on entities, concepts, or actors that are central to the current episode.
|
|
56
|
+
2. Avoid creating nodes for relationships or actions (these will be handled as edges later).
|
|
57
|
+
3. Provide a brief but informative summary for each node.
|
|
58
|
+
|
|
59
|
+
Respond with a JSON object in the following format:
|
|
60
|
+
{{
|
|
61
|
+
"new_nodes": [
|
|
62
|
+
{{
|
|
63
|
+
"name": "Unique identifier for the node",
|
|
64
|
+
"labels": ["Entity", "OptionalAdditionalLabel"],
|
|
65
|
+
"summary": "Brief summary of the node's role or significance"
|
|
66
|
+
}}
|
|
67
|
+
]
|
|
68
|
+
}}
|
|
69
|
+
|
|
70
|
+
If no new nodes need to be added, return an empty list for "new_nodes".
|
|
71
|
+
""",
|
|
72
|
+
),
|
|
73
|
+
]
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def v2(context: dict[str, Any]) -> list[Message]:
|
|
77
|
+
sys_prompt = """You are an AI assistant that extracts entity nodes from conversational text. Your primary task is to identify and extract the speaker and other significant entities mentioned in the conversation."""
|
|
78
|
+
|
|
79
|
+
user_prompt = f"""
|
|
80
|
+
Given the following conversation, extract entity nodes from the CURRENT MESSAGE that are explicitly or implicitly mentioned:
|
|
81
|
+
|
|
82
|
+
Conversation:
|
|
83
|
+
{json.dumps([ep['content'] for ep in context['previous_episodes']], indent=2)}
|
|
84
|
+
<CURRENT MESSAGE>
|
|
85
|
+
{context["episode_content"]}
|
|
86
|
+
|
|
87
|
+
Guidelines:
|
|
88
|
+
1. ALWAYS extract the speaker/actor as the first node. The speaker is the part before the colon in each line of dialogue.
|
|
89
|
+
2. Extract other significant entities, concepts, or actors mentioned in the conversation.
|
|
90
|
+
3. Provide concise but informative summaries for each extracted node.
|
|
91
|
+
4. Avoid creating nodes for relationships or actions.
|
|
92
|
+
5. Avoid creating nodes for temporal information like dates, times or years (these will be added to edges later).
|
|
93
|
+
|
|
94
|
+
Respond with a JSON object in the following format:
|
|
95
|
+
{{
|
|
96
|
+
"extracted_nodes": [
|
|
97
|
+
{{
|
|
98
|
+
"name": "Unique identifier for the node (use the speaker's name for speaker nodes)",
|
|
99
|
+
"labels": ["Entity", "Speaker" for speaker nodes, "OptionalAdditionalLabel"],
|
|
100
|
+
"summary": "Brief summary of the node's role or significance"
|
|
101
|
+
}}
|
|
102
|
+
]
|
|
103
|
+
}}
|
|
104
|
+
"""
|
|
105
|
+
return [
|
|
106
|
+
Message(role='system', content=sys_prompt),
|
|
107
|
+
Message(role='user', content=user_prompt),
|
|
108
|
+
]
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def extract_json(context: dict[str, Any]) -> list[Message]:
|
|
112
|
+
sys_prompt = """You are an AI assistant that extracts entity nodes from conversational text.
|
|
113
|
+
Your primary task is to identify and extract relevant entities from JSON files"""
|
|
114
|
+
|
|
115
|
+
user_prompt = f"""
|
|
116
|
+
Given the following source description, extract relevant entity nodes from the provided JSON:
|
|
117
|
+
|
|
118
|
+
Source Description:
|
|
119
|
+
{context["source_description"]}
|
|
120
|
+
|
|
121
|
+
JSON:
|
|
122
|
+
{context["episode_content"]}
|
|
123
|
+
|
|
124
|
+
Guidelines:
|
|
125
|
+
1. Always try to extract an entities that the JSON represents. This will often be something like a "name" or "user field
|
|
126
|
+
2. Do NOT extract any properties that contain dates
|
|
127
|
+
|
|
128
|
+
Respond with a JSON object in the following format:
|
|
129
|
+
{{
|
|
130
|
+
"extracted_nodes": [
|
|
131
|
+
{{
|
|
132
|
+
"name": "Unique identifier for the node (use the speaker's name for speaker nodes)",
|
|
133
|
+
"labels": ["Entity", "Speaker" for speaker nodes, "OptionalAdditionalLabel"],
|
|
134
|
+
"summary": "Brief summary of the node's role or significance"
|
|
135
|
+
}}
|
|
136
|
+
]
|
|
137
|
+
}}
|
|
138
|
+
"""
|
|
139
|
+
return [
|
|
140
|
+
Message(role='system', content=sys_prompt),
|
|
141
|
+
Message(role='user', content=user_prompt),
|
|
142
|
+
]
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
versions: Versions = {'v1': v1, 'v2': v2, 'extract_json': extract_json}
|