graphiti-core 0.3.21__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of graphiti-core might be problematic. Click here for more details.
- graphiti_core/cross_encoder/openai_reranker_client.py +2 -2
- graphiti_core/graphiti.py +50 -71
- graphiti_core/helpers.py +1 -0
- graphiti_core/models/edges/edge_db_queries.py +16 -0
- graphiti_core/models/nodes/node_db_queries.py +16 -0
- graphiti_core/nodes.py +2 -2
- graphiti_core/prompts/dedupe_edges.py +9 -93
- graphiti_core/prompts/dedupe_nodes.py +19 -101
- graphiti_core/prompts/extract_edge_dates.py +14 -7
- graphiti_core/prompts/extract_edges.py +55 -81
- graphiti_core/prompts/extract_nodes.py +72 -96
- graphiti_core/prompts/summarize_nodes.py +40 -1
- graphiti_core/search/search.py +15 -1
- graphiti_core/search/search_config_recipes.py +35 -0
- graphiti_core/search/search_utils.py +8 -19
- graphiti_core/utils/bulk_utils.py +3 -3
- graphiti_core/utils/maintenance/community_operations.py +3 -3
- graphiti_core/utils/maintenance/edge_operations.py +87 -55
- graphiti_core/utils/maintenance/node_operations.py +122 -52
- {graphiti_core-0.3.21.dist-info → graphiti_core-0.4.1.dist-info}/METADATA +5 -4
- {graphiti_core-0.3.21.dist-info → graphiti_core-0.4.1.dist-info}/RECORD +23 -23
- {graphiti_core-0.3.21.dist-info → graphiti_core-0.4.1.dist-info}/LICENSE +0 -0
- {graphiti_core-0.3.21.dist-info → graphiti_core-0.4.1.dist-info}/WHEEL +0 -0
|
@@ -21,120 +21,94 @@ from .models import Message, PromptFunction, PromptVersion
|
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
class Prompt(Protocol):
|
|
24
|
-
|
|
25
|
-
|
|
24
|
+
edge: PromptVersion
|
|
25
|
+
reflexion: PromptVersion
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
class Versions(TypedDict):
|
|
29
|
-
|
|
30
|
-
|
|
29
|
+
edge: PromptFunction
|
|
30
|
+
reflexion: PromptFunction
|
|
31
31
|
|
|
32
32
|
|
|
33
|
-
def
|
|
33
|
+
def edge(context: dict[str, Any]) -> list[Message]:
|
|
34
34
|
return [
|
|
35
35
|
Message(
|
|
36
36
|
role='system',
|
|
37
|
-
content='You are
|
|
37
|
+
content='You are an expert fact extractor that extracts fact triples from text.',
|
|
38
38
|
),
|
|
39
39
|
Message(
|
|
40
40
|
role='user',
|
|
41
41
|
content=f"""
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
Previous Episodes:
|
|
55
|
-
{json.dumps([ep['content'] for ep in context['previous_episodes']], indent=2)}
|
|
42
|
+
<PREVIOUS MESSAGES>
|
|
43
|
+
{json.dumps([ep for ep in context['previous_episodes']], indent=2)}
|
|
44
|
+
</PREVIOUS MESSAGES>
|
|
45
|
+
<CURRENT MESSAGE>
|
|
46
|
+
{context["episode_content"]}
|
|
47
|
+
</CURRENT MESSAGE>
|
|
48
|
+
|
|
49
|
+
<ENTITIES>
|
|
50
|
+
{context["nodes"]}
|
|
51
|
+
</ENTITIES>
|
|
52
|
+
|
|
53
|
+
{context['custom_prompt']}
|
|
56
54
|
|
|
57
|
-
|
|
55
|
+
Given the above MESSAGES and ENTITIES, extract all facts pertaining to the listed ENTITIES from the CURRENT MESSAGE.
|
|
56
|
+
|
|
58
57
|
|
|
59
58
|
Guidelines:
|
|
60
|
-
1.
|
|
61
|
-
2. Each
|
|
62
|
-
3. The relation_type should be a concise, all-caps description of the
|
|
63
|
-
4. Provide a more detailed fact
|
|
64
|
-
5.
|
|
65
|
-
6. Consider temporal aspects of relationships when relevant.
|
|
66
|
-
7. Do not create edges involving episodic nodes (like Message 1 or Message 2).
|
|
67
|
-
8. Use existing nodes from the current graph structure when appropriate.
|
|
59
|
+
1. Extract facts only between the provided entities.
|
|
60
|
+
2. Each fact should represent a clear relationship between two DISTINCT nodes.
|
|
61
|
+
3. The relation_type should be a concise, all-caps description of the fact (e.g., LOVES, IS_FRIENDS_WITH, WORKS_FOR).
|
|
62
|
+
4. Provide a more detailed fact containing all relevant information.
|
|
63
|
+
5. Consider temporal aspects of relationships when relevant.
|
|
68
64
|
|
|
69
65
|
Respond with a JSON object in the following format:
|
|
70
66
|
{{
|
|
71
|
-
"
|
|
67
|
+
"edges": [
|
|
72
68
|
{{
|
|
73
69
|
"relation_type": "RELATION_TYPE_IN_CAPS",
|
|
74
|
-
"
|
|
75
|
-
"
|
|
76
|
-
"fact": "
|
|
77
|
-
"valid_at": "YYYY-MM-DDTHH:MM:SSZ or null if not explicitly mentioned",
|
|
78
|
-
"invalid_at": "YYYY-MM-DDTHH:MM:SSZ or null if ongoing or not explicitly mentioned"
|
|
70
|
+
"source_entity_name": "name of the source entity",
|
|
71
|
+
"target_entity_name": "name of the target entity",
|
|
72
|
+
"fact": "extracted factual information",
|
|
79
73
|
}}
|
|
80
74
|
]
|
|
81
75
|
}}
|
|
82
|
-
|
|
83
|
-
If no new edges need to be added, return an empty list for "new_edges".
|
|
84
76
|
""",
|
|
85
77
|
),
|
|
86
78
|
]
|
|
87
79
|
|
|
88
80
|
|
|
89
|
-
def
|
|
90
|
-
|
|
91
|
-
Message(
|
|
92
|
-
role='system',
|
|
93
|
-
content='You are a helpful assistant that extracts graph edges from provided context.',
|
|
94
|
-
),
|
|
95
|
-
Message(
|
|
96
|
-
role='user',
|
|
97
|
-
content=f"""
|
|
98
|
-
Given the following context, extract edges (relationships) that need to be added to the knowledge graph:
|
|
99
|
-
Nodes:
|
|
100
|
-
{json.dumps(context['nodes'], indent=2)}
|
|
101
|
-
|
|
102
|
-
|
|
81
|
+
def reflexion(context: dict[str, Any]) -> list[Message]:
|
|
82
|
+
sys_prompt = """You are an AI assistant that determines which facts have not been extracted from the given context"""
|
|
103
83
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
84
|
+
user_prompt = f"""
|
|
85
|
+
<PREVIOUS MESSAGES>
|
|
86
|
+
{json.dumps([ep for ep in context['previous_episodes']], indent=2)}
|
|
87
|
+
</PREVIOUS MESSAGES>
|
|
88
|
+
<CURRENT MESSAGE>
|
|
89
|
+
{context["episode_content"]}
|
|
90
|
+
</CURRENT MESSAGE>
|
|
108
91
|
|
|
109
|
-
|
|
92
|
+
<EXTRACTED ENTITIES>
|
|
93
|
+
{context["nodes"]}
|
|
94
|
+
</EXTRACTED ENTITIES>
|
|
110
95
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
3. The relation_type should be a concise, all-caps description of the relationship (e.g., LOVES, IS_FRIENDS_WITH, WORKS_FOR).
|
|
115
|
-
4. Provide a more detailed fact describing the relationship.
|
|
116
|
-
5. The fact should include any specific relevant information, including numeric information
|
|
117
|
-
6. Consider temporal aspects of relationships when relevant.
|
|
118
|
-
7. Avoid using the same node as the source and target of a relationship
|
|
96
|
+
<EXTRACTED FACTS>
|
|
97
|
+
{context["extracted_facts"]}
|
|
98
|
+
</EXTRACTED FACTS>
|
|
119
99
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
"edges": [
|
|
123
|
-
{{
|
|
124
|
-
"relation_type": "RELATION_TYPE_IN_CAPS",
|
|
125
|
-
"source_node_uuid": "uuid of the source entity node",
|
|
126
|
-
"target_node_uuid": "uuid of the target entity node",
|
|
127
|
-
"fact": "brief description of the relationship",
|
|
128
|
-
"valid_at": "YYYY-MM-DDTHH:MM:SSZ or null if not explicitly mentioned",
|
|
129
|
-
"invalid_at": "YYYY-MM-DDTHH:MM:SSZ or null if ongoing or not explicitly mentioned"
|
|
130
|
-
}}
|
|
131
|
-
]
|
|
132
|
-
}}
|
|
100
|
+
Given the above MESSAGES, list of EXTRACTED ENTITIES entities, and list of EXTRACTED FACTS;
|
|
101
|
+
determine if any facts haven't been extracted:
|
|
133
102
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
103
|
+
Respond with a JSON object in the following format:
|
|
104
|
+
{{
|
|
105
|
+
"missing_facts": [ "facts that weren't extracted", ...]
|
|
106
|
+
}}
|
|
107
|
+
"""
|
|
108
|
+
return [
|
|
109
|
+
Message(role='system', content=sys_prompt),
|
|
110
|
+
Message(role='user', content=user_prompt),
|
|
137
111
|
]
|
|
138
112
|
|
|
139
113
|
|
|
140
|
-
versions: Versions = {'
|
|
114
|
+
versions: Versions = {'edge': edge, 'reflexion': reflexion}
|
|
@@ -21,89 +21,45 @@ from .models import Message, PromptFunction, PromptVersion
|
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
class Prompt(Protocol):
|
|
24
|
-
|
|
25
|
-
v2: PromptVersion
|
|
24
|
+
extract_message: PromptVersion
|
|
26
25
|
extract_json: PromptVersion
|
|
27
26
|
extract_text: PromptVersion
|
|
27
|
+
reflexion: PromptVersion
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
class Versions(TypedDict):
|
|
31
|
-
|
|
32
|
-
v2: PromptFunction
|
|
31
|
+
extract_message: PromptFunction
|
|
33
32
|
extract_json: PromptFunction
|
|
34
33
|
extract_text: PromptFunction
|
|
34
|
+
reflexion: PromptFunction
|
|
35
35
|
|
|
36
36
|
|
|
37
|
-
def
|
|
38
|
-
|
|
39
|
-
Message(
|
|
40
|
-
role='system',
|
|
41
|
-
content='You are a helpful assistant that extracts graph nodes from provided context.',
|
|
42
|
-
),
|
|
43
|
-
Message(
|
|
44
|
-
role='user',
|
|
45
|
-
content=f"""
|
|
46
|
-
Given the following context, extract new entity nodes that need to be added to the knowledge graph:
|
|
47
|
-
|
|
48
|
-
Previous Episodes:
|
|
49
|
-
{json.dumps([ep['content'] for ep in context['previous_episodes']], indent=2)}
|
|
50
|
-
|
|
51
|
-
New Episode:
|
|
52
|
-
Content: {context["episode_content"]}
|
|
53
|
-
|
|
54
|
-
Extract new entity nodes based on the content of the current episode, while considering the context from previous episodes.
|
|
55
|
-
|
|
56
|
-
Guidelines:
|
|
57
|
-
1. Focus on entities, concepts, or actors that are central to the current episode.
|
|
58
|
-
2. Avoid creating nodes for relationships or actions (these will be handled as edges later).
|
|
59
|
-
3. Provide a brief but informative summary for each node.
|
|
60
|
-
4. Be as explicit as possible in your node names, using full names and avoiding abbreviations.
|
|
61
|
-
|
|
62
|
-
Respond with a JSON object in the following format:
|
|
63
|
-
{{
|
|
64
|
-
"new_nodes": [
|
|
65
|
-
{{
|
|
66
|
-
"name": "Unique identifier for the node",
|
|
67
|
-
"labels": ["Entity", "OptionalAdditionalLabel"],
|
|
68
|
-
"summary": "Brief summary of the node's role or significance"
|
|
69
|
-
}}
|
|
70
|
-
]
|
|
71
|
-
}}
|
|
72
|
-
|
|
73
|
-
If no new nodes need to be added, return an empty list for "new_nodes".
|
|
74
|
-
""",
|
|
75
|
-
),
|
|
76
|
-
]
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
def v2(context: dict[str, Any]) -> list[Message]:
|
|
80
|
-
sys_prompt = """You are an AI assistant that extracts entity nodes from conversational text. Your primary task is to identify and extract the speaker and other significant entities mentioned in the conversation."""
|
|
37
|
+
def extract_message(context: dict[str, Any]) -> list[Message]:
|
|
38
|
+
sys_prompt = """You are an AI assistant that extracts entity nodes from conversational messages. Your primary task is to identify and extract the speaker and other significant entities mentioned in the conversation."""
|
|
81
39
|
|
|
82
40
|
user_prompt = f"""
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
{json.dumps([ep['content'] for ep in context['previous_episodes']], indent=2)}
|
|
41
|
+
<PREVIOUS MESSAGES>
|
|
42
|
+
{json.dumps([ep for ep in context['previous_episodes']], indent=2)}
|
|
43
|
+
</PREVIOUS MESSAGES>
|
|
87
44
|
<CURRENT MESSAGE>
|
|
88
45
|
{context["episode_content"]}
|
|
46
|
+
</CURRENT MESSAGE>
|
|
47
|
+
|
|
48
|
+
{context['custom_prompt']}
|
|
49
|
+
|
|
50
|
+
Given the above conversation, extract entity nodes from the CURRENT MESSAGE that are explicitly or implicitly mentioned:
|
|
89
51
|
|
|
90
52
|
Guidelines:
|
|
91
53
|
1. ALWAYS extract the speaker/actor as the first node. The speaker is the part before the colon in each line of dialogue.
|
|
92
|
-
2. Extract other significant entities, concepts, or actors mentioned in the
|
|
93
|
-
3.
|
|
94
|
-
4.
|
|
95
|
-
5.
|
|
96
|
-
6.
|
|
54
|
+
2. Extract other significant entities, concepts, or actors mentioned in the CURRENT MESSAGE.
|
|
55
|
+
3. DO NOT create nodes for relationships or actions.
|
|
56
|
+
4. DO NOT create nodes for temporal information like dates, times or years (these will be added to edges later).
|
|
57
|
+
5. Be as explicit as possible in your node names, using full names.
|
|
58
|
+
6. DO NOT extract entities mentioned only in PREVIOUS MESSAGES, those messages are only to provide context.
|
|
97
59
|
|
|
98
60
|
Respond with a JSON object in the following format:
|
|
99
61
|
{{
|
|
100
|
-
"
|
|
101
|
-
{{
|
|
102
|
-
"name": "Unique identifier for the node (use the speaker's name for speaker nodes)",
|
|
103
|
-
"labels": ["Entity", "Speaker" for speaker nodes, "OptionalAdditionalLabel"],
|
|
104
|
-
"summary": "Brief summary of the node's role or significance"
|
|
105
|
-
}}
|
|
106
|
-
]
|
|
62
|
+
"extracted_node_names": ["Name of the extracted entity", ...],
|
|
107
63
|
}}
|
|
108
64
|
"""
|
|
109
65
|
return [
|
|
@@ -113,17 +69,20 @@ Respond with a JSON object in the following format:
|
|
|
113
69
|
|
|
114
70
|
|
|
115
71
|
def extract_json(context: dict[str, Any]) -> list[Message]:
|
|
116
|
-
sys_prompt = """You are an AI assistant that extracts entity nodes from
|
|
72
|
+
sys_prompt = """You are an AI assistant that extracts entity nodes from JSON.
|
|
117
73
|
Your primary task is to identify and extract relevant entities from JSON files"""
|
|
118
74
|
|
|
119
75
|
user_prompt = f"""
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
Source Description:
|
|
76
|
+
<SOURCE DESCRIPTION>:
|
|
123
77
|
{context["source_description"]}
|
|
124
|
-
|
|
125
|
-
JSON
|
|
78
|
+
</SOURCE DESCRIPTION>
|
|
79
|
+
<JSON>
|
|
126
80
|
{context["episode_content"]}
|
|
81
|
+
</JSON>
|
|
82
|
+
|
|
83
|
+
{context['custom_prompt']}
|
|
84
|
+
|
|
85
|
+
Given the above source description and JSON, extract relevant entity nodes from the provided JSON:
|
|
127
86
|
|
|
128
87
|
Guidelines:
|
|
129
88
|
1. Always try to extract an entities that the JSON represents. This will often be something like a "name" or "user field
|
|
@@ -131,13 +90,7 @@ Guidelines:
|
|
|
131
90
|
|
|
132
91
|
Respond with a JSON object in the following format:
|
|
133
92
|
{{
|
|
134
|
-
"
|
|
135
|
-
{{
|
|
136
|
-
"name": "Unique identifier for the node (use the speaker's name for speaker nodes)",
|
|
137
|
-
"labels": ["Entity", "Speaker" for speaker nodes, "OptionalAdditionalLabel"],
|
|
138
|
-
"summary": "Brief summary of the node's role or significance"
|
|
139
|
-
}}
|
|
140
|
-
]
|
|
93
|
+
"extracted_node_names": ["Name of the extracted entity", ...],
|
|
141
94
|
}}
|
|
142
95
|
"""
|
|
143
96
|
return [
|
|
@@ -147,32 +100,55 @@ Respond with a JSON object in the following format:
|
|
|
147
100
|
|
|
148
101
|
|
|
149
102
|
def extract_text(context: dict[str, Any]) -> list[Message]:
|
|
150
|
-
sys_prompt = """You are an AI assistant that extracts entity nodes from
|
|
103
|
+
sys_prompt = """You are an AI assistant that extracts entity nodes from text. Your primary task is to identify and extract the speaker and other significant entities mentioned in the provided text."""
|
|
151
104
|
|
|
152
105
|
user_prompt = f"""
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
Conversation:
|
|
156
|
-
{json.dumps([ep['content'] for ep in context['previous_episodes']], indent=2)}
|
|
157
|
-
<CURRENT MESSAGE>
|
|
106
|
+
<TEXT>
|
|
158
107
|
{context["episode_content"]}
|
|
108
|
+
</TEXT>
|
|
109
|
+
|
|
110
|
+
{context['custom_prompt']}
|
|
111
|
+
|
|
112
|
+
Given the following text, extract entity nodes from the TEXT that are explicitly or implicitly mentioned:
|
|
159
113
|
|
|
160
114
|
Guidelines:
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
6. Be as explicit as possible in your node names, using full names and avoiding abbreviations.
|
|
115
|
+
1. Extract significant entities, concepts, or actors mentioned in the conversation.
|
|
116
|
+
2. Avoid creating nodes for relationships or actions.
|
|
117
|
+
3. Avoid creating nodes for temporal information like dates, times or years (these will be added to edges later).
|
|
118
|
+
4. Be as explicit as possible in your node names, using full names and avoiding abbreviations.
|
|
166
119
|
|
|
167
120
|
Respond with a JSON object in the following format:
|
|
168
121
|
{{
|
|
169
|
-
"
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
122
|
+
"extracted_node_names": ["Name of the extracted entity", ...],
|
|
123
|
+
}}
|
|
124
|
+
"""
|
|
125
|
+
return [
|
|
126
|
+
Message(role='system', content=sys_prompt),
|
|
127
|
+
Message(role='user', content=user_prompt),
|
|
175
128
|
]
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def reflexion(context: dict[str, Any]) -> list[Message]:
|
|
132
|
+
sys_prompt = """You are an AI assistant that determines which entities have not been extracted from the given context"""
|
|
133
|
+
|
|
134
|
+
user_prompt = f"""
|
|
135
|
+
<PREVIOUS MESSAGES>
|
|
136
|
+
{json.dumps([ep for ep in context['previous_episodes']], indent=2)}
|
|
137
|
+
</PREVIOUS MESSAGES>
|
|
138
|
+
<CURRENT MESSAGE>
|
|
139
|
+
{context["episode_content"]}
|
|
140
|
+
</CURRENT MESSAGE>
|
|
141
|
+
|
|
142
|
+
<EXTRACTED ENTITIES>
|
|
143
|
+
{context["extracted_entities"]}
|
|
144
|
+
</EXTRACTED ENTITIES>
|
|
145
|
+
|
|
146
|
+
Given the above previous messages, current message, and list of extracted entities; determine if any entities haven't been
|
|
147
|
+
extracted:
|
|
148
|
+
|
|
149
|
+
Respond with a JSON object in the following format:
|
|
150
|
+
{{
|
|
151
|
+
"missed_entities": [ "name of entity that wasn't extracted", ...]
|
|
176
152
|
}}
|
|
177
153
|
"""
|
|
178
154
|
return [
|
|
@@ -182,8 +158,8 @@ Respond with a JSON object in the following format:
|
|
|
182
158
|
|
|
183
159
|
|
|
184
160
|
versions: Versions = {
|
|
185
|
-
'
|
|
186
|
-
'v2': v2,
|
|
161
|
+
'extract_message': extract_message,
|
|
187
162
|
'extract_json': extract_json,
|
|
188
163
|
'extract_text': extract_text,
|
|
164
|
+
'reflexion': reflexion,
|
|
189
165
|
}
|
|
@@ -22,11 +22,13 @@ from .models import Message, PromptFunction, PromptVersion
|
|
|
22
22
|
|
|
23
23
|
class Prompt(Protocol):
|
|
24
24
|
summarize_pair: PromptVersion
|
|
25
|
+
summarize_context: PromptVersion
|
|
25
26
|
summary_description: PromptVersion
|
|
26
27
|
|
|
27
28
|
|
|
28
29
|
class Versions(TypedDict):
|
|
29
30
|
summarize_pair: PromptFunction
|
|
31
|
+
summarize_context: PromptFunction
|
|
30
32
|
summary_description: PromptFunction
|
|
31
33
|
|
|
32
34
|
|
|
@@ -53,6 +55,39 @@ def summarize_pair(context: dict[str, Any]) -> list[Message]:
|
|
|
53
55
|
]
|
|
54
56
|
|
|
55
57
|
|
|
58
|
+
def summarize_context(context: dict[str, Any]) -> list[Message]:
|
|
59
|
+
return [
|
|
60
|
+
Message(
|
|
61
|
+
role='system',
|
|
62
|
+
content='You are a helpful assistant that combines summaries with new conversation context.',
|
|
63
|
+
),
|
|
64
|
+
Message(
|
|
65
|
+
role='user',
|
|
66
|
+
content=f"""
|
|
67
|
+
|
|
68
|
+
<MESSAGES>
|
|
69
|
+
{json.dumps(context['previous_episodes'], indent=2)}
|
|
70
|
+
{json.dumps(context['episode_content'], indent=2)}
|
|
71
|
+
</MESSAGES>
|
|
72
|
+
|
|
73
|
+
Given the above MESSAGES and the following ENTITY name, create a summary for the ENTITY. Your summary must only use
|
|
74
|
+
information from the provided MESSAGES. Your summary should also only contain information relevant to the
|
|
75
|
+
provided ENTITY.
|
|
76
|
+
|
|
77
|
+
<ENTITY>
|
|
78
|
+
{context['node_name']}
|
|
79
|
+
</ENTITY>
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
Respond with a JSON object in the following format:
|
|
83
|
+
{{
|
|
84
|
+
"summary": "Entity summary"
|
|
85
|
+
}}
|
|
86
|
+
""",
|
|
87
|
+
),
|
|
88
|
+
]
|
|
89
|
+
|
|
90
|
+
|
|
56
91
|
def summary_description(context: dict[str, Any]) -> list[Message]:
|
|
57
92
|
return [
|
|
58
93
|
Message(
|
|
@@ -76,4 +111,8 @@ def summary_description(context: dict[str, Any]) -> list[Message]:
|
|
|
76
111
|
]
|
|
77
112
|
|
|
78
113
|
|
|
79
|
-
versions: Versions = {
|
|
114
|
+
versions: Versions = {
|
|
115
|
+
'summarize_pair': summarize_pair,
|
|
116
|
+
'summarize_context': summarize_context,
|
|
117
|
+
'summary_description': summary_description,
|
|
118
|
+
}
|
graphiti_core/search/search.py
CHANGED
|
@@ -32,8 +32,10 @@ from graphiti_core.search.search_config import (
|
|
|
32
32
|
CommunitySearchConfig,
|
|
33
33
|
EdgeReranker,
|
|
34
34
|
EdgeSearchConfig,
|
|
35
|
+
EdgeSearchMethod,
|
|
35
36
|
NodeReranker,
|
|
36
37
|
NodeSearchConfig,
|
|
38
|
+
NodeSearchMethod,
|
|
37
39
|
SearchConfig,
|
|
38
40
|
SearchResults,
|
|
39
41
|
)
|
|
@@ -141,7 +143,7 @@ async def edge_search(
|
|
|
141
143
|
search_results: list[list[EntityEdge]] = list(
|
|
142
144
|
await asyncio.gather(
|
|
143
145
|
*[
|
|
144
|
-
edge_fulltext_search(driver, query,
|
|
146
|
+
edge_fulltext_search(driver, query, group_ids, 2 * limit),
|
|
145
147
|
edge_similarity_search(
|
|
146
148
|
driver, query_vector, None, None, group_ids, 2 * limit, config.sim_min_score
|
|
147
149
|
),
|
|
@@ -150,6 +152,12 @@ async def edge_search(
|
|
|
150
152
|
)
|
|
151
153
|
)
|
|
152
154
|
|
|
155
|
+
if EdgeSearchMethod.bfs in config.search_methods and bfs_origin_node_uuids is None:
|
|
156
|
+
source_node_uuids = [edge.source_node_uuid for result in search_results for edge in result]
|
|
157
|
+
search_results.append(
|
|
158
|
+
await edge_bfs_search(driver, source_node_uuids, config.bfs_max_depth, 2 * limit)
|
|
159
|
+
)
|
|
160
|
+
|
|
153
161
|
edge_uuid_map = {edge.uuid: edge for result in search_results for edge in result}
|
|
154
162
|
|
|
155
163
|
reranked_uuids: list[str] = []
|
|
@@ -229,6 +237,12 @@ async def node_search(
|
|
|
229
237
|
)
|
|
230
238
|
)
|
|
231
239
|
|
|
240
|
+
if NodeSearchMethod.bfs in config.search_methods and bfs_origin_node_uuids is None:
|
|
241
|
+
origin_node_uuids = [node.uuid for result in search_results for node in result]
|
|
242
|
+
search_results.append(
|
|
243
|
+
await node_bfs_search(driver, origin_node_uuids, config.bfs_max_depth, 2 * limit)
|
|
244
|
+
)
|
|
245
|
+
|
|
232
246
|
search_result_uuids = [[node.uuid for node in result] for result in search_results]
|
|
233
247
|
node_uuid_map = {node.uuid: node for result in search_results for node in result}
|
|
234
248
|
|
|
@@ -118,6 +118,19 @@ EDGE_HYBRID_SEARCH_EPISODE_MENTIONS = SearchConfig(
|
|
|
118
118
|
)
|
|
119
119
|
)
|
|
120
120
|
|
|
121
|
+
# performs a hybrid search over edges with cross encoder reranking
|
|
122
|
+
EDGE_HYBRID_SEARCH_CROSS_ENCODER = SearchConfig(
|
|
123
|
+
edge_config=EdgeSearchConfig(
|
|
124
|
+
search_methods=[
|
|
125
|
+
EdgeSearchMethod.bm25,
|
|
126
|
+
EdgeSearchMethod.cosine_similarity,
|
|
127
|
+
EdgeSearchMethod.bfs,
|
|
128
|
+
],
|
|
129
|
+
reranker=EdgeReranker.cross_encoder,
|
|
130
|
+
),
|
|
131
|
+
limit=10,
|
|
132
|
+
)
|
|
133
|
+
|
|
121
134
|
# performs a hybrid search over nodes with rrf reranking
|
|
122
135
|
NODE_HYBRID_SEARCH_RRF = SearchConfig(
|
|
123
136
|
node_config=NodeSearchConfig(
|
|
@@ -150,6 +163,19 @@ NODE_HYBRID_SEARCH_EPISODE_MENTIONS = SearchConfig(
|
|
|
150
163
|
)
|
|
151
164
|
)
|
|
152
165
|
|
|
166
|
+
# performs a hybrid search over nodes with episode mentions reranking
|
|
167
|
+
NODE_HYBRID_SEARCH_CROSS_ENCODER = SearchConfig(
|
|
168
|
+
node_config=NodeSearchConfig(
|
|
169
|
+
search_methods=[
|
|
170
|
+
NodeSearchMethod.bm25,
|
|
171
|
+
NodeSearchMethod.cosine_similarity,
|
|
172
|
+
NodeSearchMethod.bfs,
|
|
173
|
+
],
|
|
174
|
+
reranker=NodeReranker.cross_encoder,
|
|
175
|
+
),
|
|
176
|
+
limit=10,
|
|
177
|
+
)
|
|
178
|
+
|
|
153
179
|
# performs a hybrid search over communities with rrf reranking
|
|
154
180
|
COMMUNITY_HYBRID_SEARCH_RRF = SearchConfig(
|
|
155
181
|
community_config=CommunitySearchConfig(
|
|
@@ -165,3 +191,12 @@ COMMUNITY_HYBRID_SEARCH_MMR = SearchConfig(
|
|
|
165
191
|
reranker=CommunityReranker.mmr,
|
|
166
192
|
)
|
|
167
193
|
)
|
|
194
|
+
|
|
195
|
+
# performs a hybrid search over communities with mmr reranking
|
|
196
|
+
COMMUNITY_HYBRID_SEARCH_CROSS_ENCODER = SearchConfig(
|
|
197
|
+
community_config=CommunitySearchConfig(
|
|
198
|
+
search_methods=[CommunitySearchMethod.bm25, CommunitySearchMethod.cosine_similarity],
|
|
199
|
+
reranker=CommunityReranker.cross_encoder,
|
|
200
|
+
),
|
|
201
|
+
limit=3,
|
|
202
|
+
)
|
|
@@ -135,8 +135,6 @@ async def get_communities_by_nodes(
|
|
|
135
135
|
async def edge_fulltext_search(
|
|
136
136
|
driver: AsyncDriver,
|
|
137
137
|
query: str,
|
|
138
|
-
source_node_uuid: str | None,
|
|
139
|
-
target_node_uuid: str | None,
|
|
140
138
|
group_ids: list[str] | None = None,
|
|
141
139
|
limit=RELEVANT_SCHEMA_LIMIT,
|
|
142
140
|
) -> list[EntityEdge]:
|
|
@@ -146,11 +144,9 @@ async def edge_fulltext_search(
|
|
|
146
144
|
return []
|
|
147
145
|
|
|
148
146
|
cypher_query = Query("""
|
|
149
|
-
CALL db.index.fulltext.queryRelationships("edge_name_and_fact", $query)
|
|
150
|
-
YIELD relationship AS
|
|
151
|
-
|
|
152
|
-
WHERE ($source_uuid IS NULL OR n.uuid IN [$source_uuid, $target_uuid])
|
|
153
|
-
AND ($target_uuid IS NULL OR m.uuid IN [$source_uuid, $target_uuid])
|
|
147
|
+
CALL db.index.fulltext.queryRelationships("edge_name_and_fact", $query, {limit: $limit})
|
|
148
|
+
YIELD relationship AS r, score
|
|
149
|
+
WITH r, score, startNode(r) AS n, endNode(r) AS m
|
|
154
150
|
RETURN
|
|
155
151
|
r.uuid AS uuid,
|
|
156
152
|
r.group_id AS group_id,
|
|
@@ -170,8 +166,6 @@ async def edge_fulltext_search(
|
|
|
170
166
|
records, _, _ = await driver.execute_query(
|
|
171
167
|
cypher_query,
|
|
172
168
|
query=fuzzy_query,
|
|
173
|
-
source_uuid=source_node_uuid,
|
|
174
|
-
target_uuid=target_node_uuid,
|
|
175
169
|
group_ids=group_ids,
|
|
176
170
|
limit=limit,
|
|
177
171
|
database_=DEFAULT_DATABASE,
|
|
@@ -296,7 +290,7 @@ async def node_fulltext_search(
|
|
|
296
290
|
|
|
297
291
|
records, _, _ = await driver.execute_query(
|
|
298
292
|
"""
|
|
299
|
-
CALL db.index.fulltext.queryNodes("node_name_and_summary", $query)
|
|
293
|
+
CALL db.index.fulltext.queryNodes("node_name_and_summary", $query, {limit: $limit})
|
|
300
294
|
YIELD node AS n, score
|
|
301
295
|
RETURN
|
|
302
296
|
n.uuid AS uuid,
|
|
@@ -407,7 +401,7 @@ async def community_fulltext_search(
|
|
|
407
401
|
|
|
408
402
|
records, _, _ = await driver.execute_query(
|
|
409
403
|
"""
|
|
410
|
-
CALL db.index.fulltext.queryNodes("community_name", $query)
|
|
404
|
+
CALL db.index.fulltext.queryNodes("community_name", $query, {limit: $limit})
|
|
411
405
|
YIELD node AS comm, score
|
|
412
406
|
RETURN
|
|
413
407
|
comm.uuid AS uuid,
|
|
@@ -539,8 +533,8 @@ async def hybrid_node_search(
|
|
|
539
533
|
|
|
540
534
|
|
|
541
535
|
async def get_relevant_nodes(
|
|
542
|
-
nodes: list[EntityNode],
|
|
543
536
|
driver: AsyncDriver,
|
|
537
|
+
nodes: list[EntityNode],
|
|
544
538
|
) -> list[EntityNode]:
|
|
545
539
|
"""
|
|
546
540
|
Retrieve relevant nodes based on the provided list of EntityNodes.
|
|
@@ -573,6 +567,7 @@ async def get_relevant_nodes(
|
|
|
573
567
|
driver,
|
|
574
568
|
[node.group_id for node in nodes],
|
|
575
569
|
)
|
|
570
|
+
|
|
576
571
|
return relevant_nodes
|
|
577
572
|
|
|
578
573
|
|
|
@@ -599,13 +594,7 @@ async def get_relevant_edges(
|
|
|
599
594
|
)
|
|
600
595
|
for edge in edges
|
|
601
596
|
if edge.fact_embedding is not None
|
|
602
|
-
]
|
|
603
|
-
*[
|
|
604
|
-
edge_fulltext_search(
|
|
605
|
-
driver, edge.fact, source_node_uuid, target_node_uuid, [edge.group_id], limit
|
|
606
|
-
)
|
|
607
|
-
for edge in edges
|
|
608
|
-
],
|
|
597
|
+
]
|
|
609
598
|
)
|
|
610
599
|
|
|
611
600
|
for result in results:
|