graphiti-core 0.17.4__py3-none-any.whl → 0.24.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- graphiti_core/cross_encoder/gemini_reranker_client.py +1 -1
- graphiti_core/cross_encoder/openai_reranker_client.py +1 -1
- graphiti_core/decorators.py +110 -0
- graphiti_core/driver/driver.py +62 -2
- graphiti_core/driver/falkordb_driver.py +215 -23
- graphiti_core/driver/graph_operations/graph_operations.py +191 -0
- graphiti_core/driver/kuzu_driver.py +182 -0
- graphiti_core/driver/neo4j_driver.py +61 -8
- graphiti_core/driver/neptune_driver.py +305 -0
- graphiti_core/driver/search_interface/search_interface.py +89 -0
- graphiti_core/edges.py +264 -132
- graphiti_core/embedder/azure_openai.py +10 -3
- graphiti_core/embedder/client.py +2 -1
- graphiti_core/graph_queries.py +114 -101
- graphiti_core/graphiti.py +582 -255
- graphiti_core/graphiti_types.py +2 -0
- graphiti_core/helpers.py +21 -14
- graphiti_core/llm_client/anthropic_client.py +142 -52
- graphiti_core/llm_client/azure_openai_client.py +57 -19
- graphiti_core/llm_client/client.py +83 -21
- graphiti_core/llm_client/config.py +1 -1
- graphiti_core/llm_client/gemini_client.py +75 -57
- graphiti_core/llm_client/openai_base_client.py +94 -50
- graphiti_core/llm_client/openai_client.py +28 -8
- graphiti_core/llm_client/openai_generic_client.py +91 -56
- graphiti_core/models/edges/edge_db_queries.py +259 -35
- graphiti_core/models/nodes/node_db_queries.py +311 -32
- graphiti_core/nodes.py +388 -164
- graphiti_core/prompts/dedupe_edges.py +42 -31
- graphiti_core/prompts/dedupe_nodes.py +56 -39
- graphiti_core/prompts/eval.py +4 -4
- graphiti_core/prompts/extract_edges.py +23 -14
- graphiti_core/prompts/extract_nodes.py +73 -32
- graphiti_core/prompts/prompt_helpers.py +39 -0
- graphiti_core/prompts/snippets.py +29 -0
- graphiti_core/prompts/summarize_nodes.py +23 -25
- graphiti_core/search/search.py +154 -74
- graphiti_core/search/search_config.py +39 -4
- graphiti_core/search/search_filters.py +109 -31
- graphiti_core/search/search_helpers.py +5 -6
- graphiti_core/search/search_utils.py +1360 -473
- graphiti_core/tracer.py +193 -0
- graphiti_core/utils/bulk_utils.py +216 -90
- graphiti_core/utils/datetime_utils.py +13 -0
- graphiti_core/utils/maintenance/community_operations.py +62 -38
- graphiti_core/utils/maintenance/dedup_helpers.py +262 -0
- graphiti_core/utils/maintenance/edge_operations.py +286 -126
- graphiti_core/utils/maintenance/graph_data_operations.py +44 -74
- graphiti_core/utils/maintenance/node_operations.py +320 -158
- graphiti_core/utils/maintenance/temporal_operations.py +11 -3
- graphiti_core/utils/ontology_utils/entity_types_utils.py +1 -1
- graphiti_core/utils/text_utils.py +53 -0
- {graphiti_core-0.17.4.dist-info → graphiti_core-0.24.3.dist-info}/METADATA +221 -87
- graphiti_core-0.24.3.dist-info/RECORD +86 -0
- {graphiti_core-0.17.4.dist-info → graphiti_core-0.24.3.dist-info}/WHEEL +1 -1
- graphiti_core-0.17.4.dist-info/RECORD +0 -77
- /graphiti_core/{utils/maintenance/utils.py → migrations/__init__.py} +0 -0
- {graphiti_core-0.17.4.dist-info → graphiti_core-0.24.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -14,22 +14,22 @@ See the License for the specific language governing permissions and
|
|
|
14
14
|
limitations under the License.
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
|
-
import json
|
|
18
17
|
from typing import Any, Protocol, TypedDict
|
|
19
18
|
|
|
20
19
|
from pydantic import BaseModel, Field
|
|
21
20
|
|
|
22
21
|
from .models import Message, PromptFunction, PromptVersion
|
|
22
|
+
from .prompt_helpers import to_prompt_json
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
class EdgeDuplicate(BaseModel):
|
|
26
26
|
duplicate_facts: list[int] = Field(
|
|
27
27
|
...,
|
|
28
|
-
description='List of
|
|
28
|
+
description='List of idx values of any duplicate facts. If no duplicate facts are found, default to empty list.',
|
|
29
29
|
)
|
|
30
30
|
contradicted_facts: list[int] = Field(
|
|
31
31
|
...,
|
|
32
|
-
description='List of
|
|
32
|
+
description='List of idx values of facts that should be invalidated. If no facts should be invalidated, the list should be empty.',
|
|
33
33
|
)
|
|
34
34
|
fact_type: str = Field(..., description='One of the provided fact types or DEFAULT')
|
|
35
35
|
|
|
@@ -67,13 +67,13 @@ def edge(context: dict[str, Any]) -> list[Message]:
|
|
|
67
67
|
Given the following context, determine whether the New Edge represents any of the edges in the list of Existing Edges.
|
|
68
68
|
|
|
69
69
|
<EXISTING EDGES>
|
|
70
|
-
{
|
|
70
|
+
{to_prompt_json(context['related_edges'])}
|
|
71
71
|
</EXISTING EDGES>
|
|
72
72
|
|
|
73
73
|
<NEW EDGE>
|
|
74
|
-
{
|
|
74
|
+
{to_prompt_json(context['extracted_edges'])}
|
|
75
75
|
</NEW EDGE>
|
|
76
|
-
|
|
76
|
+
|
|
77
77
|
Task:
|
|
78
78
|
If the New Edges represents the same factual information as any edge in Existing Edges, return the id of the duplicate fact
|
|
79
79
|
as part of the list of duplicate_facts.
|
|
@@ -98,7 +98,7 @@ def edge_list(context: dict[str, Any]) -> list[Message]:
|
|
|
98
98
|
Given the following context, find all of the duplicates in a list of facts:
|
|
99
99
|
|
|
100
100
|
Facts:
|
|
101
|
-
{
|
|
101
|
+
{to_prompt_json(context['edges'])}
|
|
102
102
|
|
|
103
103
|
Task:
|
|
104
104
|
If any facts in Facts is a duplicate of another fact, return a new fact with one of their uuid's.
|
|
@@ -124,37 +124,48 @@ def resolve_edge(context: dict[str, Any]) -> list[Message]:
|
|
|
124
124
|
Message(
|
|
125
125
|
role='user',
|
|
126
126
|
content=f"""
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
127
|
+
Task:
|
|
128
|
+
You will receive TWO separate lists of facts. Each list uses 'idx' as its index field, starting from 0.
|
|
129
|
+
|
|
130
|
+
1. DUPLICATE DETECTION:
|
|
131
|
+
- If the NEW FACT represents identical factual information as any fact in EXISTING FACTS, return those idx values in duplicate_facts.
|
|
132
|
+
- Facts with similar information that contain key differences should NOT be marked as duplicates.
|
|
133
|
+
- Return idx values from EXISTING FACTS.
|
|
134
|
+
- If no duplicates, return an empty list for duplicate_facts.
|
|
135
|
+
|
|
136
|
+
2. FACT TYPE CLASSIFICATION:
|
|
137
|
+
- Given the predefined FACT TYPES, determine if the NEW FACT should be classified as one of these types.
|
|
138
|
+
- Return the fact type as fact_type or DEFAULT if NEW FACT is not one of the FACT TYPES.
|
|
139
|
+
|
|
140
|
+
3. CONTRADICTION DETECTION:
|
|
141
|
+
- Based on FACT INVALIDATION CANDIDATES and NEW FACT, determine which facts the new fact contradicts.
|
|
142
|
+
- Return idx values from FACT INVALIDATION CANDIDATES.
|
|
143
|
+
- If no contradictions, return an empty list for contradicted_facts.
|
|
144
|
+
|
|
145
|
+
IMPORTANT:
|
|
146
|
+
- duplicate_facts: Use ONLY 'idx' values from EXISTING FACTS
|
|
147
|
+
- contradicted_facts: Use ONLY 'idx' values from FACT INVALIDATION CANDIDATES
|
|
148
|
+
- These are two separate lists with independent idx ranges starting from 0
|
|
149
|
+
|
|
150
|
+
Guidelines:
|
|
151
|
+
1. Some facts may be very similar but will have key differences, particularly around numeric values in the facts.
|
|
152
|
+
Do not mark these facts as duplicates.
|
|
153
|
+
|
|
154
|
+
<FACT TYPES>
|
|
155
|
+
{context['edge_types']}
|
|
156
|
+
</FACT TYPES>
|
|
157
|
+
|
|
131
158
|
<EXISTING FACTS>
|
|
132
159
|
{context['existing_edges']}
|
|
133
160
|
</EXISTING FACTS>
|
|
161
|
+
|
|
134
162
|
<FACT INVALIDATION CANDIDATES>
|
|
135
163
|
{context['edge_invalidation_candidates']}
|
|
136
164
|
</FACT INVALIDATION CANDIDATES>
|
|
137
|
-
|
|
138
|
-
<FACT TYPES>
|
|
139
|
-
{context['edge_types']}
|
|
140
|
-
</FACT TYPES>
|
|
141
|
-
|
|
142
165
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
If the NEW FACT is not a duplicate of any of the EXISTING FACTS, return -1.
|
|
147
|
-
|
|
148
|
-
Given the predefined FACT TYPES, determine if the NEW FACT should be classified as one of these types.
|
|
149
|
-
Return the fact type as fact_type or DEFAULT if NEW FACT is not one of the FACT TYPES.
|
|
150
|
-
|
|
151
|
-
Based on the provided FACT INVALIDATION CANDIDATES and NEW FACT, determine which existing facts the new fact contradicts.
|
|
152
|
-
Return a list containing all idx's of the facts that are contradicted by the NEW FACT.
|
|
153
|
-
If there are no contradicted facts, return an empty list.
|
|
154
|
-
|
|
155
|
-
Guidelines:
|
|
156
|
-
1. The facts do not need to be completely identical to be duplicates, they just need to express the same information.
|
|
157
|
-
2. Some facts may be very similar but will have key differences, particularly around numeric values in the facts.
|
|
166
|
+
<NEW FACT>
|
|
167
|
+
{context['new_edge']}
|
|
168
|
+
</NEW FACT>
|
|
158
169
|
""",
|
|
159
170
|
),
|
|
160
171
|
]
|
|
@@ -14,12 +14,12 @@ See the License for the specific language governing permissions and
|
|
|
14
14
|
limitations under the License.
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
|
-
import json
|
|
18
17
|
from typing import Any, Protocol, TypedDict
|
|
19
18
|
|
|
20
19
|
from pydantic import BaseModel, Field
|
|
21
20
|
|
|
22
21
|
from .models import Message, PromptFunction, PromptVersion
|
|
22
|
+
from .prompt_helpers import to_prompt_json
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
class NodeDuplicate(BaseModel):
|
|
@@ -34,7 +34,7 @@ class NodeDuplicate(BaseModel):
|
|
|
34
34
|
)
|
|
35
35
|
duplicates: list[int] = Field(
|
|
36
36
|
...,
|
|
37
|
-
description='idx of all duplicate
|
|
37
|
+
description='idx of all entities that are a duplicate of the entity with the above id.',
|
|
38
38
|
)
|
|
39
39
|
|
|
40
40
|
|
|
@@ -64,20 +64,20 @@ def node(context: dict[str, Any]) -> list[Message]:
|
|
|
64
64
|
role='user',
|
|
65
65
|
content=f"""
|
|
66
66
|
<PREVIOUS MESSAGES>
|
|
67
|
-
{
|
|
67
|
+
{to_prompt_json([ep for ep in context['previous_episodes']])}
|
|
68
68
|
</PREVIOUS MESSAGES>
|
|
69
69
|
<CURRENT MESSAGE>
|
|
70
70
|
{context['episode_content']}
|
|
71
71
|
</CURRENT MESSAGE>
|
|
72
72
|
<NEW ENTITY>
|
|
73
|
-
{
|
|
73
|
+
{to_prompt_json(context['extracted_node'])}
|
|
74
74
|
</NEW ENTITY>
|
|
75
75
|
<ENTITY TYPE DESCRIPTION>
|
|
76
|
-
{
|
|
76
|
+
{to_prompt_json(context['entity_type_description'])}
|
|
77
77
|
</ENTITY TYPE DESCRIPTION>
|
|
78
78
|
|
|
79
79
|
<EXISTING ENTITIES>
|
|
80
|
-
{
|
|
80
|
+
{to_prompt_json(context['existing_nodes'])}
|
|
81
81
|
</EXISTING ENTITIES>
|
|
82
82
|
|
|
83
83
|
Given the above EXISTING ENTITIES and their attributes, MESSAGE, and PREVIOUS MESSAGES; Determine if the NEW ENTITY extracted from the conversation
|
|
@@ -92,12 +92,23 @@ def node(context: dict[str, Any]) -> list[Message]:
|
|
|
92
92
|
|
|
93
93
|
TASK:
|
|
94
94
|
1. Compare `new_entity` against each item in `existing_entities`.
|
|
95
|
-
2. If it refers to the same real
|
|
96
|
-
3. Let `duplicate_idx` = the
|
|
97
|
-
4. Let `duplicates` = the list of
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
95
|
+
2. If it refers to the same real-world object or concept, collect its index.
|
|
96
|
+
3. Let `duplicate_idx` = the smallest collected index, or -1 if none.
|
|
97
|
+
4. Let `duplicates` = the sorted list of all collected indices (empty list if none).
|
|
98
|
+
|
|
99
|
+
Respond with a JSON object containing an "entity_resolutions" array with a single entry:
|
|
100
|
+
{{
|
|
101
|
+
"entity_resolutions": [
|
|
102
|
+
{{
|
|
103
|
+
"id": integer id from NEW ENTITY,
|
|
104
|
+
"name": the best full name for the entity,
|
|
105
|
+
"duplicate_idx": integer index of the best duplicate in EXISTING ENTITIES, or -1 if none,
|
|
106
|
+
"duplicates": sorted list of all duplicate indices you collected (deduplicate the list, use [] when none)
|
|
107
|
+
}}
|
|
108
|
+
]
|
|
109
|
+
}}
|
|
110
|
+
|
|
111
|
+
Only reference indices that appear in EXISTING ENTITIES, and return [] / -1 when unsure.
|
|
101
112
|
""",
|
|
102
113
|
),
|
|
103
114
|
]
|
|
@@ -108,44 +119,44 @@ def nodes(context: dict[str, Any]) -> list[Message]:
|
|
|
108
119
|
Message(
|
|
109
120
|
role='system',
|
|
110
121
|
content='You are a helpful assistant that determines whether or not ENTITIES extracted from a conversation are duplicates'
|
|
111
|
-
'of existing entities.',
|
|
122
|
+
' of existing entities.',
|
|
112
123
|
),
|
|
113
124
|
Message(
|
|
114
125
|
role='user',
|
|
115
126
|
content=f"""
|
|
116
127
|
<PREVIOUS MESSAGES>
|
|
117
|
-
{
|
|
128
|
+
{to_prompt_json([ep for ep in context['previous_episodes']])}
|
|
118
129
|
</PREVIOUS MESSAGES>
|
|
119
130
|
<CURRENT MESSAGE>
|
|
120
131
|
{context['episode_content']}
|
|
121
132
|
</CURRENT MESSAGE>
|
|
122
|
-
|
|
123
|
-
|
|
133
|
+
|
|
134
|
+
|
|
124
135
|
Each of the following ENTITIES were extracted from the CURRENT MESSAGE.
|
|
125
136
|
Each entity in ENTITIES is represented as a JSON object with the following structure:
|
|
126
137
|
{{
|
|
127
138
|
id: integer id of the entity,
|
|
128
139
|
name: "name of the entity",
|
|
129
|
-
entity_type: "
|
|
130
|
-
entity_type_description: "Description of what the entity type represents"
|
|
131
|
-
duplication_candidates: [
|
|
132
|
-
{{
|
|
133
|
-
idx: integer index of the candidate entity,
|
|
134
|
-
name: "name of the candidate entity",
|
|
135
|
-
entity_type: "ontological classification of the candidate entity",
|
|
136
|
-
...<additional attributes>
|
|
137
|
-
}}
|
|
138
|
-
]
|
|
140
|
+
entity_type: ["Entity", "<optional additional label>", ...],
|
|
141
|
+
entity_type_description: "Description of what the entity type represents"
|
|
139
142
|
}}
|
|
140
|
-
|
|
143
|
+
|
|
141
144
|
<ENTITIES>
|
|
142
|
-
{
|
|
145
|
+
{to_prompt_json(context['extracted_nodes'])}
|
|
143
146
|
</ENTITIES>
|
|
144
|
-
|
|
147
|
+
|
|
145
148
|
<EXISTING ENTITIES>
|
|
146
|
-
{
|
|
149
|
+
{to_prompt_json(context['existing_nodes'])}
|
|
147
150
|
</EXISTING ENTITIES>
|
|
148
151
|
|
|
152
|
+
Each entry in EXISTING ENTITIES is an object with the following structure:
|
|
153
|
+
{{
|
|
154
|
+
idx: integer index of the candidate entity (use this when referencing a duplicate),
|
|
155
|
+
name: "name of the candidate entity",
|
|
156
|
+
entity_types: ["Entity", "<optional additional label>", ...],
|
|
157
|
+
...<additional attributes such as summaries or metadata>
|
|
158
|
+
}}
|
|
159
|
+
|
|
149
160
|
For each of the above ENTITIES, determine if the entity is a duplicate of any of the EXISTING ENTITIES.
|
|
150
161
|
|
|
151
162
|
Entities should only be considered duplicates if they refer to the *same real-world object or concept*.
|
|
@@ -155,14 +166,20 @@ def nodes(context: dict[str, Any]) -> list[Message]:
|
|
|
155
166
|
- They have similar names or purposes but refer to separate instances or concepts.
|
|
156
167
|
|
|
157
168
|
Task:
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
169
|
+
ENTITIES contains {len(context['extracted_nodes'])} entities with IDs 0 through {len(context['extracted_nodes']) - 1}.
|
|
170
|
+
Your response MUST include EXACTLY {len(context['extracted_nodes'])} resolutions with IDs 0 through {len(context['extracted_nodes']) - 1}. Do not skip or add IDs.
|
|
171
|
+
|
|
172
|
+
For every entity, return an object with the following keys:
|
|
173
|
+
{{
|
|
174
|
+
"id": integer id from ENTITIES,
|
|
175
|
+
"name": the best full name for the entity (preserve the original name unless a duplicate has a more complete name),
|
|
176
|
+
"duplicate_idx": the idx of the EXISTING ENTITY that is the best duplicate match, or -1 if there is no duplicate,
|
|
177
|
+
"duplicates": a sorted list of all idx values from EXISTING ENTITIES that refer to duplicates (deduplicate the list, use [] when none or unsure)
|
|
178
|
+
}}
|
|
179
|
+
|
|
180
|
+
- Only use idx values that appear in EXISTING ENTITIES.
|
|
181
|
+
- Set duplicate_idx to the smallest idx you collected for that entity, or -1 if duplicates is empty.
|
|
182
|
+
- Never fabricate entities or indices.
|
|
166
183
|
""",
|
|
167
184
|
),
|
|
168
185
|
]
|
|
@@ -180,7 +197,7 @@ def node_list(context: dict[str, Any]) -> list[Message]:
|
|
|
180
197
|
Given the following context, deduplicate a list of nodes:
|
|
181
198
|
|
|
182
199
|
Nodes:
|
|
183
|
-
{
|
|
200
|
+
{to_prompt_json(context['nodes'])}
|
|
184
201
|
|
|
185
202
|
Task:
|
|
186
203
|
1. Group nodes together such that all duplicate nodes are in the same list of uuids
|
graphiti_core/prompts/eval.py
CHANGED
|
@@ -14,12 +14,12 @@ See the License for the specific language governing permissions and
|
|
|
14
14
|
limitations under the License.
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
|
-
import json
|
|
18
17
|
from typing import Any, Protocol, TypedDict
|
|
19
18
|
|
|
20
19
|
from pydantic import BaseModel, Field
|
|
21
20
|
|
|
22
21
|
from .models import Message, PromptFunction, PromptVersion
|
|
22
|
+
from .prompt_helpers import to_prompt_json
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
class QueryExpansion(BaseModel):
|
|
@@ -68,7 +68,7 @@ def query_expansion(context: dict[str, Any]) -> list[Message]:
|
|
|
68
68
|
Bob is asking Alice a question, are you able to rephrase the question into a simpler one about Alice in the third person
|
|
69
69
|
that maintains the relevant context?
|
|
70
70
|
<QUESTION>
|
|
71
|
-
{
|
|
71
|
+
{to_prompt_json(context['query'])}
|
|
72
72
|
</QUESTION>
|
|
73
73
|
"""
|
|
74
74
|
return [
|
|
@@ -84,10 +84,10 @@ def qa_prompt(context: dict[str, Any]) -> list[Message]:
|
|
|
84
84
|
Your task is to briefly answer the question in the way that you think Alice would answer the question.
|
|
85
85
|
You are given the following entity summaries and facts to help you determine the answer to your question.
|
|
86
86
|
<ENTITY_SUMMARIES>
|
|
87
|
-
{
|
|
87
|
+
{to_prompt_json(context['entity_summaries'])}
|
|
88
88
|
</ENTITY_SUMMARIES>
|
|
89
89
|
<FACTS>
|
|
90
|
-
{
|
|
90
|
+
{to_prompt_json(context['facts'])}
|
|
91
91
|
</FACTS>
|
|
92
92
|
<QUESTION>
|
|
93
93
|
{context['query']}
|
|
@@ -14,19 +14,26 @@ See the License for the specific language governing permissions and
|
|
|
14
14
|
limitations under the License.
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
|
-
import json
|
|
18
17
|
from typing import Any, Protocol, TypedDict
|
|
19
18
|
|
|
20
19
|
from pydantic import BaseModel, Field
|
|
21
20
|
|
|
22
21
|
from .models import Message, PromptFunction, PromptVersion
|
|
22
|
+
from .prompt_helpers import to_prompt_json
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
class Edge(BaseModel):
|
|
26
26
|
relation_type: str = Field(..., description='FACT_PREDICATE_IN_SCREAMING_SNAKE_CASE')
|
|
27
|
-
source_entity_id: int = Field(
|
|
28
|
-
|
|
29
|
-
|
|
27
|
+
source_entity_id: int = Field(
|
|
28
|
+
..., description='The id of the source entity from the ENTITIES list'
|
|
29
|
+
)
|
|
30
|
+
target_entity_id: int = Field(
|
|
31
|
+
..., description='The id of the target entity from the ENTITIES list'
|
|
32
|
+
)
|
|
33
|
+
fact: str = Field(
|
|
34
|
+
...,
|
|
35
|
+
description='A natural language description of the relationship between the entities, paraphrased from the source text',
|
|
36
|
+
)
|
|
30
37
|
valid_at: str | None = Field(
|
|
31
38
|
None,
|
|
32
39
|
description='The date and time when the relationship described by the edge fact became true or was established. Use ISO 8601 format (YYYY-MM-DDTHH:MM:SS.SSSSSSZ)',
|
|
@@ -68,8 +75,12 @@ def edge(context: dict[str, Any]) -> list[Message]:
|
|
|
68
75
|
Message(
|
|
69
76
|
role='user',
|
|
70
77
|
content=f"""
|
|
78
|
+
<FACT TYPES>
|
|
79
|
+
{context['edge_types']}
|
|
80
|
+
</FACT TYPES>
|
|
81
|
+
|
|
71
82
|
<PREVIOUS_MESSAGES>
|
|
72
|
-
{
|
|
83
|
+
{to_prompt_json([ep for ep in context['previous_episodes']])}
|
|
73
84
|
</PREVIOUS_MESSAGES>
|
|
74
85
|
|
|
75
86
|
<CURRENT_MESSAGE>
|
|
@@ -77,23 +88,20 @@ def edge(context: dict[str, Any]) -> list[Message]:
|
|
|
77
88
|
</CURRENT_MESSAGE>
|
|
78
89
|
|
|
79
90
|
<ENTITIES>
|
|
80
|
-
{context['nodes']}
|
|
91
|
+
{to_prompt_json(context['nodes'])}
|
|
81
92
|
</ENTITIES>
|
|
82
93
|
|
|
83
94
|
<REFERENCE_TIME>
|
|
84
95
|
{context['reference_time']} # ISO 8601 (UTC); used to resolve relative time mentions
|
|
85
96
|
</REFERENCE_TIME>
|
|
86
97
|
|
|
87
|
-
<FACT TYPES>
|
|
88
|
-
{context['edge_types']}
|
|
89
|
-
</FACT TYPES>
|
|
90
|
-
|
|
91
98
|
# TASK
|
|
92
99
|
Extract all factual relationships between the given ENTITIES based on the CURRENT MESSAGE.
|
|
93
100
|
Only extract facts that:
|
|
94
101
|
- involve two DISTINCT ENTITIES from the ENTITIES list,
|
|
95
102
|
- are clearly stated or unambiguously implied in the CURRENT MESSAGE,
|
|
96
103
|
and can be represented as edges in a knowledge graph.
|
|
104
|
+
- Facts should include entity names rather than pronouns whenever possible.
|
|
97
105
|
- The FACT TYPES provide a list of the most important types of facts, make sure to extract facts of these types
|
|
98
106
|
- The FACT TYPES are not an exhaustive list, extract all facts from the message even if they do not fit into one
|
|
99
107
|
of the FACT TYPES
|
|
@@ -106,11 +114,12 @@ You may use information from the PREVIOUS MESSAGES only to disambiguate referenc
|
|
|
106
114
|
|
|
107
115
|
# EXTRACTION RULES
|
|
108
116
|
|
|
109
|
-
1.
|
|
117
|
+
1. **Entity ID Validation**: `source_entity_id` and `target_entity_id` must use only the `id` values from the ENTITIES list provided above.
|
|
118
|
+
- **CRITICAL**: Using IDs not in the list will cause the edge to be rejected
|
|
110
119
|
2. Each fact must involve two **distinct** entities.
|
|
111
120
|
3. Use a SCREAMING_SNAKE_CASE string as the `relation_type` (e.g., FOUNDED, WORKS_AT).
|
|
112
121
|
4. Do not emit duplicate or semantically redundant facts.
|
|
113
|
-
5. The `
|
|
122
|
+
5. The `fact` should closely paraphrase the original source sentence(s). Do not verbatim quote the original text.
|
|
114
123
|
6. Use `REFERENCE_TIME` to resolve vague or relative temporal expressions (e.g., "last week").
|
|
115
124
|
7. Do **not** hallucinate or infer temporal bounds from unrelated events.
|
|
116
125
|
|
|
@@ -132,7 +141,7 @@ def reflexion(context: dict[str, Any]) -> list[Message]:
|
|
|
132
141
|
|
|
133
142
|
user_prompt = f"""
|
|
134
143
|
<PREVIOUS MESSAGES>
|
|
135
|
-
{
|
|
144
|
+
{to_prompt_json([ep for ep in context['previous_episodes']])}
|
|
136
145
|
</PREVIOUS MESSAGES>
|
|
137
146
|
<CURRENT MESSAGE>
|
|
138
147
|
{context['episode_content']}
|
|
@@ -166,7 +175,7 @@ def extract_attributes(context: dict[str, Any]) -> list[Message]:
|
|
|
166
175
|
content=f"""
|
|
167
176
|
|
|
168
177
|
<MESSAGE>
|
|
169
|
-
{
|
|
178
|
+
{to_prompt_json(context['episode_content'])}
|
|
170
179
|
</MESSAGE>
|
|
171
180
|
<REFERENCE TIME>
|
|
172
181
|
{context['reference_time']}
|
|
@@ -14,12 +14,15 @@ See the License for the specific language governing permissions and
|
|
|
14
14
|
limitations under the License.
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
|
-
import json
|
|
18
17
|
from typing import Any, Protocol, TypedDict
|
|
19
18
|
|
|
20
19
|
from pydantic import BaseModel, Field
|
|
21
20
|
|
|
21
|
+
from graphiti_core.utils.text_utils import MAX_SUMMARY_CHARS
|
|
22
|
+
|
|
22
23
|
from .models import Message, PromptFunction, PromptVersion
|
|
24
|
+
from .prompt_helpers import to_prompt_json
|
|
25
|
+
from .snippets import summary_instructions
|
|
23
26
|
|
|
24
27
|
|
|
25
28
|
class ExtractedEntity(BaseModel):
|
|
@@ -42,7 +45,8 @@ class EntityClassificationTriple(BaseModel):
|
|
|
42
45
|
uuid: str = Field(description='UUID of the entity')
|
|
43
46
|
name: str = Field(description='Name of the entity')
|
|
44
47
|
entity_type: str | None = Field(
|
|
45
|
-
default=None,
|
|
48
|
+
default=None,
|
|
49
|
+
description='Type of the entity. Must be one of the provided types or None',
|
|
46
50
|
)
|
|
47
51
|
|
|
48
52
|
|
|
@@ -52,6 +56,13 @@ class EntityClassification(BaseModel):
|
|
|
52
56
|
)
|
|
53
57
|
|
|
54
58
|
|
|
59
|
+
class EntitySummary(BaseModel):
|
|
60
|
+
summary: str = Field(
|
|
61
|
+
...,
|
|
62
|
+
description=f'Summary containing the important information about the entity. Under {MAX_SUMMARY_CHARS} characters.',
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
|
|
55
66
|
class Prompt(Protocol):
|
|
56
67
|
extract_message: PromptVersion
|
|
57
68
|
extract_json: PromptVersion
|
|
@@ -59,6 +70,7 @@ class Prompt(Protocol):
|
|
|
59
70
|
reflexion: PromptVersion
|
|
60
71
|
classify_nodes: PromptVersion
|
|
61
72
|
extract_attributes: PromptVersion
|
|
73
|
+
extract_summary: PromptVersion
|
|
62
74
|
|
|
63
75
|
|
|
64
76
|
class Versions(TypedDict):
|
|
@@ -68,6 +80,7 @@ class Versions(TypedDict):
|
|
|
68
80
|
reflexion: PromptFunction
|
|
69
81
|
classify_nodes: PromptFunction
|
|
70
82
|
extract_attributes: PromptFunction
|
|
83
|
+
extract_summary: PromptFunction
|
|
71
84
|
|
|
72
85
|
|
|
73
86
|
def extract_message(context: dict[str, Any]) -> list[Message]:
|
|
@@ -75,23 +88,23 @@ def extract_message(context: dict[str, Any]) -> list[Message]:
|
|
|
75
88
|
Your primary task is to extract and classify the speaker and other significant entities mentioned in the conversation."""
|
|
76
89
|
|
|
77
90
|
user_prompt = f"""
|
|
91
|
+
<ENTITY TYPES>
|
|
92
|
+
{context['entity_types']}
|
|
93
|
+
</ENTITY TYPES>
|
|
94
|
+
|
|
78
95
|
<PREVIOUS MESSAGES>
|
|
79
|
-
{
|
|
96
|
+
{to_prompt_json([ep for ep in context['previous_episodes']])}
|
|
80
97
|
</PREVIOUS MESSAGES>
|
|
81
98
|
|
|
82
99
|
<CURRENT MESSAGE>
|
|
83
100
|
{context['episode_content']}
|
|
84
101
|
</CURRENT MESSAGE>
|
|
85
102
|
|
|
86
|
-
<ENTITY TYPES>
|
|
87
|
-
{context['entity_types']}
|
|
88
|
-
</ENTITY TYPES>
|
|
89
|
-
|
|
90
103
|
Instructions:
|
|
91
104
|
|
|
92
105
|
You are given a conversation context and a CURRENT MESSAGE. Your task is to extract **entity nodes** mentioned **explicitly or implicitly** in the CURRENT MESSAGE.
|
|
93
106
|
Pronoun references such as he/she/they or this/that/those should be disambiguated to the names of the
|
|
94
|
-
reference entities.
|
|
107
|
+
reference entities. Only extract distinct entities from the CURRENT MESSAGE. Don't extract pronouns like you, me, he/she/they, we/us as entities.
|
|
95
108
|
|
|
96
109
|
1. **Speaker Extraction**: Always extract the speaker (the part before the colon `:` in each dialogue line) as the first entity node.
|
|
97
110
|
- If the speaker is mentioned again in the message, treat both mentions as a **single entity**.
|
|
@@ -124,15 +137,16 @@ def extract_json(context: dict[str, Any]) -> list[Message]:
|
|
|
124
137
|
Your primary task is to extract and classify relevant entities from JSON files"""
|
|
125
138
|
|
|
126
139
|
user_prompt = f"""
|
|
140
|
+
<ENTITY TYPES>
|
|
141
|
+
{context['entity_types']}
|
|
142
|
+
</ENTITY TYPES>
|
|
143
|
+
|
|
127
144
|
<SOURCE DESCRIPTION>:
|
|
128
145
|
{context['source_description']}
|
|
129
146
|
</SOURCE DESCRIPTION>
|
|
130
147
|
<JSON>
|
|
131
148
|
{context['episode_content']}
|
|
132
149
|
</JSON>
|
|
133
|
-
<ENTITY TYPES>
|
|
134
|
-
{context['entity_types']}
|
|
135
|
-
</ENTITY TYPES>
|
|
136
150
|
|
|
137
151
|
{context['custom_prompt']}
|
|
138
152
|
|
|
@@ -141,8 +155,9 @@ For each entity extracted, also determine its entity type based on the provided
|
|
|
141
155
|
Indicate the classified entity type by providing its entity_type_id.
|
|
142
156
|
|
|
143
157
|
Guidelines:
|
|
144
|
-
1.
|
|
145
|
-
2.
|
|
158
|
+
1. Extract all entities that the JSON represents. This will often be something like a "name" or "user" field
|
|
159
|
+
2. Extract all entities mentioned in all other properties throughout the JSON structure
|
|
160
|
+
3. Do NOT extract any properties that contain dates
|
|
146
161
|
"""
|
|
147
162
|
return [
|
|
148
163
|
Message(role='system', content=sys_prompt),
|
|
@@ -155,13 +170,14 @@ def extract_text(context: dict[str, Any]) -> list[Message]:
|
|
|
155
170
|
Your primary task is to extract and classify the speaker and other significant entities mentioned in the provided text."""
|
|
156
171
|
|
|
157
172
|
user_prompt = f"""
|
|
158
|
-
<TEXT>
|
|
159
|
-
{context['episode_content']}
|
|
160
|
-
</TEXT>
|
|
161
173
|
<ENTITY TYPES>
|
|
162
174
|
{context['entity_types']}
|
|
163
175
|
</ENTITY TYPES>
|
|
164
176
|
|
|
177
|
+
<TEXT>
|
|
178
|
+
{context['episode_content']}
|
|
179
|
+
</TEXT>
|
|
180
|
+
|
|
165
181
|
Given the above text, extract entities from the TEXT that are explicitly or implicitly mentioned.
|
|
166
182
|
For each entity extracted, also determine its entity type based on the provided ENTITY TYPES and their descriptions.
|
|
167
183
|
Indicate the classified entity type by providing its entity_type_id.
|
|
@@ -185,7 +201,7 @@ def reflexion(context: dict[str, Any]) -> list[Message]:
|
|
|
185
201
|
|
|
186
202
|
user_prompt = f"""
|
|
187
203
|
<PREVIOUS MESSAGES>
|
|
188
|
-
{
|
|
204
|
+
{to_prompt_json([ep for ep in context['previous_episodes']])}
|
|
189
205
|
</PREVIOUS MESSAGES>
|
|
190
206
|
<CURRENT MESSAGE>
|
|
191
207
|
{context['episode_content']}
|
|
@@ -209,22 +225,22 @@ def classify_nodes(context: dict[str, Any]) -> list[Message]:
|
|
|
209
225
|
|
|
210
226
|
user_prompt = f"""
|
|
211
227
|
<PREVIOUS MESSAGES>
|
|
212
|
-
{
|
|
228
|
+
{to_prompt_json([ep for ep in context['previous_episodes']])}
|
|
213
229
|
</PREVIOUS MESSAGES>
|
|
214
230
|
<CURRENT MESSAGE>
|
|
215
231
|
{context['episode_content']}
|
|
216
232
|
</CURRENT MESSAGE>
|
|
217
|
-
|
|
233
|
+
|
|
218
234
|
<EXTRACTED ENTITIES>
|
|
219
235
|
{context['extracted_entities']}
|
|
220
236
|
</EXTRACTED ENTITIES>
|
|
221
|
-
|
|
237
|
+
|
|
222
238
|
<ENTITY TYPES>
|
|
223
239
|
{context['entity_types']}
|
|
224
240
|
</ENTITY TYPES>
|
|
225
|
-
|
|
241
|
+
|
|
226
242
|
Given the above conversation, extracted entities, and provided entity types and their descriptions, classify the extracted entities.
|
|
227
|
-
|
|
243
|
+
|
|
228
244
|
Guidelines:
|
|
229
245
|
1. Each entity must have exactly one type
|
|
230
246
|
2. Only use the provided ENTITY TYPES as types, do not use additional types to classify entities.
|
|
@@ -245,21 +261,45 @@ def extract_attributes(context: dict[str, Any]) -> list[Message]:
|
|
|
245
261
|
Message(
|
|
246
262
|
role='user',
|
|
247
263
|
content=f"""
|
|
248
|
-
|
|
249
|
-
<MESSAGES>
|
|
250
|
-
{json.dumps(context['previous_episodes'], indent=2)}
|
|
251
|
-
{json.dumps(context['episode_content'], indent=2)}
|
|
252
|
-
</MESSAGES>
|
|
253
|
-
|
|
254
|
-
Given the above MESSAGES and the following ENTITY, update any of its attributes based on the information provided
|
|
264
|
+
Given the MESSAGES and the following ENTITY, update any of its attributes based on the information provided
|
|
255
265
|
in MESSAGES. Use the provided attribute descriptions to better understand how each attribute should be determined.
|
|
256
266
|
|
|
257
267
|
Guidelines:
|
|
258
268
|
1. Do not hallucinate entity property values if they cannot be found in the current context.
|
|
259
269
|
2. Only use the provided MESSAGES and ENTITY to set attribute values.
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
270
|
+
|
|
271
|
+
<MESSAGES>
|
|
272
|
+
{to_prompt_json(context['previous_episodes'])}
|
|
273
|
+
{to_prompt_json(context['episode_content'])}
|
|
274
|
+
</MESSAGES>
|
|
275
|
+
|
|
276
|
+
<ENTITY>
|
|
277
|
+
{context['node']}
|
|
278
|
+
</ENTITY>
|
|
279
|
+
""",
|
|
280
|
+
),
|
|
281
|
+
]
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def extract_summary(context: dict[str, Any]) -> list[Message]:
|
|
285
|
+
return [
|
|
286
|
+
Message(
|
|
287
|
+
role='system',
|
|
288
|
+
content='You are a helpful assistant that extracts entity summaries from the provided text.',
|
|
289
|
+
),
|
|
290
|
+
Message(
|
|
291
|
+
role='user',
|
|
292
|
+
content=f"""
|
|
293
|
+
Given the MESSAGES and the ENTITY, update the summary that combines relevant information about the entity
|
|
294
|
+
from the messages and relevant information from the existing summary.
|
|
295
|
+
|
|
296
|
+
{summary_instructions}
|
|
297
|
+
|
|
298
|
+
<MESSAGES>
|
|
299
|
+
{to_prompt_json(context['previous_episodes'])}
|
|
300
|
+
{to_prompt_json(context['episode_content'])}
|
|
301
|
+
</MESSAGES>
|
|
302
|
+
|
|
263
303
|
<ENTITY>
|
|
264
304
|
{context['node']}
|
|
265
305
|
</ENTITY>
|
|
@@ -273,6 +313,7 @@ versions: Versions = {
|
|
|
273
313
|
'extract_json': extract_json,
|
|
274
314
|
'extract_text': extract_text,
|
|
275
315
|
'reflexion': reflexion,
|
|
316
|
+
'extract_summary': extract_summary,
|
|
276
317
|
'classify_nodes': classify_nodes,
|
|
277
318
|
'extract_attributes': extract_attributes,
|
|
278
319
|
}
|