graphiti-core 0.17.4__py3-none-any.whl → 0.25.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- graphiti_core/cross_encoder/gemini_reranker_client.py +1 -1
- graphiti_core/cross_encoder/openai_reranker_client.py +1 -1
- graphiti_core/decorators.py +110 -0
- graphiti_core/driver/driver.py +62 -2
- graphiti_core/driver/falkordb_driver.py +215 -23
- graphiti_core/driver/graph_operations/graph_operations.py +191 -0
- graphiti_core/driver/kuzu_driver.py +182 -0
- graphiti_core/driver/neo4j_driver.py +70 -8
- graphiti_core/driver/neptune_driver.py +305 -0
- graphiti_core/driver/search_interface/search_interface.py +89 -0
- graphiti_core/edges.py +264 -132
- graphiti_core/embedder/azure_openai.py +10 -3
- graphiti_core/embedder/client.py +2 -1
- graphiti_core/graph_queries.py +114 -101
- graphiti_core/graphiti.py +635 -260
- graphiti_core/graphiti_types.py +2 -0
- graphiti_core/helpers.py +37 -15
- graphiti_core/llm_client/anthropic_client.py +142 -52
- graphiti_core/llm_client/azure_openai_client.py +57 -19
- graphiti_core/llm_client/client.py +83 -21
- graphiti_core/llm_client/config.py +1 -1
- graphiti_core/llm_client/gemini_client.py +75 -57
- graphiti_core/llm_client/openai_base_client.py +92 -48
- graphiti_core/llm_client/openai_client.py +39 -9
- graphiti_core/llm_client/openai_generic_client.py +91 -56
- graphiti_core/models/edges/edge_db_queries.py +259 -35
- graphiti_core/models/nodes/node_db_queries.py +311 -32
- graphiti_core/nodes.py +388 -164
- graphiti_core/prompts/dedupe_edges.py +42 -31
- graphiti_core/prompts/dedupe_nodes.py +56 -39
- graphiti_core/prompts/eval.py +4 -4
- graphiti_core/prompts/extract_edges.py +24 -15
- graphiti_core/prompts/extract_nodes.py +76 -35
- graphiti_core/prompts/prompt_helpers.py +39 -0
- graphiti_core/prompts/snippets.py +29 -0
- graphiti_core/prompts/summarize_nodes.py +23 -25
- graphiti_core/search/search.py +154 -74
- graphiti_core/search/search_config.py +39 -4
- graphiti_core/search/search_filters.py +110 -31
- graphiti_core/search/search_helpers.py +5 -6
- graphiti_core/search/search_utils.py +1360 -473
- graphiti_core/tracer.py +193 -0
- graphiti_core/utils/bulk_utils.py +216 -90
- graphiti_core/utils/content_chunking.py +702 -0
- graphiti_core/utils/datetime_utils.py +13 -0
- graphiti_core/utils/maintenance/community_operations.py +62 -38
- graphiti_core/utils/maintenance/dedup_helpers.py +262 -0
- graphiti_core/utils/maintenance/edge_operations.py +306 -156
- graphiti_core/utils/maintenance/graph_data_operations.py +44 -74
- graphiti_core/utils/maintenance/node_operations.py +466 -206
- graphiti_core/utils/maintenance/temporal_operations.py +11 -3
- graphiti_core/utils/ontology_utils/entity_types_utils.py +1 -1
- graphiti_core/utils/text_utils.py +53 -0
- {graphiti_core-0.17.4.dist-info → graphiti_core-0.25.3.dist-info}/METADATA +221 -87
- graphiti_core-0.25.3.dist-info/RECORD +87 -0
- {graphiti_core-0.17.4.dist-info → graphiti_core-0.25.3.dist-info}/WHEEL +1 -1
- graphiti_core-0.17.4.dist-info/RECORD +0 -77
- /graphiti_core/{utils/maintenance/utils.py → migrations/__init__.py} +0 -0
- {graphiti_core-0.17.4.dist-info → graphiti_core-0.25.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -14,22 +14,22 @@ See the License for the specific language governing permissions and
|
|
|
14
14
|
limitations under the License.
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
|
-
import json
|
|
18
17
|
from typing import Any, Protocol, TypedDict
|
|
19
18
|
|
|
20
19
|
from pydantic import BaseModel, Field
|
|
21
20
|
|
|
22
21
|
from .models import Message, PromptFunction, PromptVersion
|
|
22
|
+
from .prompt_helpers import to_prompt_json
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
class EdgeDuplicate(BaseModel):
|
|
26
26
|
duplicate_facts: list[int] = Field(
|
|
27
27
|
...,
|
|
28
|
-
description='List of
|
|
28
|
+
description='List of idx values of any duplicate facts. If no duplicate facts are found, default to empty list.',
|
|
29
29
|
)
|
|
30
30
|
contradicted_facts: list[int] = Field(
|
|
31
31
|
...,
|
|
32
|
-
description='List of
|
|
32
|
+
description='List of idx values of facts that should be invalidated. If no facts should be invalidated, the list should be empty.',
|
|
33
33
|
)
|
|
34
34
|
fact_type: str = Field(..., description='One of the provided fact types or DEFAULT')
|
|
35
35
|
|
|
@@ -67,13 +67,13 @@ def edge(context: dict[str, Any]) -> list[Message]:
|
|
|
67
67
|
Given the following context, determine whether the New Edge represents any of the edges in the list of Existing Edges.
|
|
68
68
|
|
|
69
69
|
<EXISTING EDGES>
|
|
70
|
-
{
|
|
70
|
+
{to_prompt_json(context['related_edges'])}
|
|
71
71
|
</EXISTING EDGES>
|
|
72
72
|
|
|
73
73
|
<NEW EDGE>
|
|
74
|
-
{
|
|
74
|
+
{to_prompt_json(context['extracted_edges'])}
|
|
75
75
|
</NEW EDGE>
|
|
76
|
-
|
|
76
|
+
|
|
77
77
|
Task:
|
|
78
78
|
If the New Edges represents the same factual information as any edge in Existing Edges, return the id of the duplicate fact
|
|
79
79
|
as part of the list of duplicate_facts.
|
|
@@ -98,7 +98,7 @@ def edge_list(context: dict[str, Any]) -> list[Message]:
|
|
|
98
98
|
Given the following context, find all of the duplicates in a list of facts:
|
|
99
99
|
|
|
100
100
|
Facts:
|
|
101
|
-
{
|
|
101
|
+
{to_prompt_json(context['edges'])}
|
|
102
102
|
|
|
103
103
|
Task:
|
|
104
104
|
If any facts in Facts is a duplicate of another fact, return a new fact with one of their uuid's.
|
|
@@ -124,37 +124,48 @@ def resolve_edge(context: dict[str, Any]) -> list[Message]:
|
|
|
124
124
|
Message(
|
|
125
125
|
role='user',
|
|
126
126
|
content=f"""
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
127
|
+
Task:
|
|
128
|
+
You will receive TWO separate lists of facts. Each list uses 'idx' as its index field, starting from 0.
|
|
129
|
+
|
|
130
|
+
1. DUPLICATE DETECTION:
|
|
131
|
+
- If the NEW FACT represents identical factual information as any fact in EXISTING FACTS, return those idx values in duplicate_facts.
|
|
132
|
+
- Facts with similar information that contain key differences should NOT be marked as duplicates.
|
|
133
|
+
- Return idx values from EXISTING FACTS.
|
|
134
|
+
- If no duplicates, return an empty list for duplicate_facts.
|
|
135
|
+
|
|
136
|
+
2. FACT TYPE CLASSIFICATION:
|
|
137
|
+
- Given the predefined FACT TYPES, determine if the NEW FACT should be classified as one of these types.
|
|
138
|
+
- Return the fact type as fact_type or DEFAULT if NEW FACT is not one of the FACT TYPES.
|
|
139
|
+
|
|
140
|
+
3. CONTRADICTION DETECTION:
|
|
141
|
+
- Based on FACT INVALIDATION CANDIDATES and NEW FACT, determine which facts the new fact contradicts.
|
|
142
|
+
- Return idx values from FACT INVALIDATION CANDIDATES.
|
|
143
|
+
- If no contradictions, return an empty list for contradicted_facts.
|
|
144
|
+
|
|
145
|
+
IMPORTANT:
|
|
146
|
+
- duplicate_facts: Use ONLY 'idx' values from EXISTING FACTS
|
|
147
|
+
- contradicted_facts: Use ONLY 'idx' values from FACT INVALIDATION CANDIDATES
|
|
148
|
+
- These are two separate lists with independent idx ranges starting from 0
|
|
149
|
+
|
|
150
|
+
Guidelines:
|
|
151
|
+
1. Some facts may be very similar but will have key differences, particularly around numeric values in the facts.
|
|
152
|
+
Do not mark these facts as duplicates.
|
|
153
|
+
|
|
154
|
+
<FACT TYPES>
|
|
155
|
+
{context['edge_types']}
|
|
156
|
+
</FACT TYPES>
|
|
157
|
+
|
|
131
158
|
<EXISTING FACTS>
|
|
132
159
|
{context['existing_edges']}
|
|
133
160
|
</EXISTING FACTS>
|
|
161
|
+
|
|
134
162
|
<FACT INVALIDATION CANDIDATES>
|
|
135
163
|
{context['edge_invalidation_candidates']}
|
|
136
164
|
</FACT INVALIDATION CANDIDATES>
|
|
137
|
-
|
|
138
|
-
<FACT TYPES>
|
|
139
|
-
{context['edge_types']}
|
|
140
|
-
</FACT TYPES>
|
|
141
|
-
|
|
142
165
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
If the NEW FACT is not a duplicate of any of the EXISTING FACTS, return -1.
|
|
147
|
-
|
|
148
|
-
Given the predefined FACT TYPES, determine if the NEW FACT should be classified as one of these types.
|
|
149
|
-
Return the fact type as fact_type or DEFAULT if NEW FACT is not one of the FACT TYPES.
|
|
150
|
-
|
|
151
|
-
Based on the provided FACT INVALIDATION CANDIDATES and NEW FACT, determine which existing facts the new fact contradicts.
|
|
152
|
-
Return a list containing all idx's of the facts that are contradicted by the NEW FACT.
|
|
153
|
-
If there are no contradicted facts, return an empty list.
|
|
154
|
-
|
|
155
|
-
Guidelines:
|
|
156
|
-
1. The facts do not need to be completely identical to be duplicates, they just need to express the same information.
|
|
157
|
-
2. Some facts may be very similar but will have key differences, particularly around numeric values in the facts.
|
|
166
|
+
<NEW FACT>
|
|
167
|
+
{context['new_edge']}
|
|
168
|
+
</NEW FACT>
|
|
158
169
|
""",
|
|
159
170
|
),
|
|
160
171
|
]
|
|
@@ -14,12 +14,12 @@ See the License for the specific language governing permissions and
|
|
|
14
14
|
limitations under the License.
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
|
-
import json
|
|
18
17
|
from typing import Any, Protocol, TypedDict
|
|
19
18
|
|
|
20
19
|
from pydantic import BaseModel, Field
|
|
21
20
|
|
|
22
21
|
from .models import Message, PromptFunction, PromptVersion
|
|
22
|
+
from .prompt_helpers import to_prompt_json
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
class NodeDuplicate(BaseModel):
|
|
@@ -34,7 +34,7 @@ class NodeDuplicate(BaseModel):
|
|
|
34
34
|
)
|
|
35
35
|
duplicates: list[int] = Field(
|
|
36
36
|
...,
|
|
37
|
-
description='idx of all duplicate
|
|
37
|
+
description='idx of all entities that are a duplicate of the entity with the above id.',
|
|
38
38
|
)
|
|
39
39
|
|
|
40
40
|
|
|
@@ -64,20 +64,20 @@ def node(context: dict[str, Any]) -> list[Message]:
|
|
|
64
64
|
role='user',
|
|
65
65
|
content=f"""
|
|
66
66
|
<PREVIOUS MESSAGES>
|
|
67
|
-
{
|
|
67
|
+
{to_prompt_json([ep for ep in context['previous_episodes']])}
|
|
68
68
|
</PREVIOUS MESSAGES>
|
|
69
69
|
<CURRENT MESSAGE>
|
|
70
70
|
{context['episode_content']}
|
|
71
71
|
</CURRENT MESSAGE>
|
|
72
72
|
<NEW ENTITY>
|
|
73
|
-
{
|
|
73
|
+
{to_prompt_json(context['extracted_node'])}
|
|
74
74
|
</NEW ENTITY>
|
|
75
75
|
<ENTITY TYPE DESCRIPTION>
|
|
76
|
-
{
|
|
76
|
+
{to_prompt_json(context['entity_type_description'])}
|
|
77
77
|
</ENTITY TYPE DESCRIPTION>
|
|
78
78
|
|
|
79
79
|
<EXISTING ENTITIES>
|
|
80
|
-
{
|
|
80
|
+
{to_prompt_json(context['existing_nodes'])}
|
|
81
81
|
</EXISTING ENTITIES>
|
|
82
82
|
|
|
83
83
|
Given the above EXISTING ENTITIES and their attributes, MESSAGE, and PREVIOUS MESSAGES; Determine if the NEW ENTITY extracted from the conversation
|
|
@@ -92,12 +92,23 @@ def node(context: dict[str, Any]) -> list[Message]:
|
|
|
92
92
|
|
|
93
93
|
TASK:
|
|
94
94
|
1. Compare `new_entity` against each item in `existing_entities`.
|
|
95
|
-
2. If it refers to the same real
|
|
96
|
-
3. Let `duplicate_idx` = the
|
|
97
|
-
4. Let `duplicates` = the list of
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
95
|
+
2. If it refers to the same real-world object or concept, collect its index.
|
|
96
|
+
3. Let `duplicate_idx` = the smallest collected index, or -1 if none.
|
|
97
|
+
4. Let `duplicates` = the sorted list of all collected indices (empty list if none).
|
|
98
|
+
|
|
99
|
+
Respond with a JSON object containing an "entity_resolutions" array with a single entry:
|
|
100
|
+
{{
|
|
101
|
+
"entity_resolutions": [
|
|
102
|
+
{{
|
|
103
|
+
"id": integer id from NEW ENTITY,
|
|
104
|
+
"name": the best full name for the entity,
|
|
105
|
+
"duplicate_idx": integer index of the best duplicate in EXISTING ENTITIES, or -1 if none,
|
|
106
|
+
"duplicates": sorted list of all duplicate indices you collected (deduplicate the list, use [] when none)
|
|
107
|
+
}}
|
|
108
|
+
]
|
|
109
|
+
}}
|
|
110
|
+
|
|
111
|
+
Only reference indices that appear in EXISTING ENTITIES, and return [] / -1 when unsure.
|
|
101
112
|
""",
|
|
102
113
|
),
|
|
103
114
|
]
|
|
@@ -108,44 +119,44 @@ def nodes(context: dict[str, Any]) -> list[Message]:
|
|
|
108
119
|
Message(
|
|
109
120
|
role='system',
|
|
110
121
|
content='You are a helpful assistant that determines whether or not ENTITIES extracted from a conversation are duplicates'
|
|
111
|
-
'of existing entities.',
|
|
122
|
+
' of existing entities.',
|
|
112
123
|
),
|
|
113
124
|
Message(
|
|
114
125
|
role='user',
|
|
115
126
|
content=f"""
|
|
116
127
|
<PREVIOUS MESSAGES>
|
|
117
|
-
{
|
|
128
|
+
{to_prompt_json([ep for ep in context['previous_episodes']])}
|
|
118
129
|
</PREVIOUS MESSAGES>
|
|
119
130
|
<CURRENT MESSAGE>
|
|
120
131
|
{context['episode_content']}
|
|
121
132
|
</CURRENT MESSAGE>
|
|
122
|
-
|
|
123
|
-
|
|
133
|
+
|
|
134
|
+
|
|
124
135
|
Each of the following ENTITIES were extracted from the CURRENT MESSAGE.
|
|
125
136
|
Each entity in ENTITIES is represented as a JSON object with the following structure:
|
|
126
137
|
{{
|
|
127
138
|
id: integer id of the entity,
|
|
128
139
|
name: "name of the entity",
|
|
129
|
-
entity_type: "
|
|
130
|
-
entity_type_description: "Description of what the entity type represents"
|
|
131
|
-
duplication_candidates: [
|
|
132
|
-
{{
|
|
133
|
-
idx: integer index of the candidate entity,
|
|
134
|
-
name: "name of the candidate entity",
|
|
135
|
-
entity_type: "ontological classification of the candidate entity",
|
|
136
|
-
...<additional attributes>
|
|
137
|
-
}}
|
|
138
|
-
]
|
|
140
|
+
entity_type: ["Entity", "<optional additional label>", ...],
|
|
141
|
+
entity_type_description: "Description of what the entity type represents"
|
|
139
142
|
}}
|
|
140
|
-
|
|
143
|
+
|
|
141
144
|
<ENTITIES>
|
|
142
|
-
{
|
|
145
|
+
{to_prompt_json(context['extracted_nodes'])}
|
|
143
146
|
</ENTITIES>
|
|
144
|
-
|
|
147
|
+
|
|
145
148
|
<EXISTING ENTITIES>
|
|
146
|
-
{
|
|
149
|
+
{to_prompt_json(context['existing_nodes'])}
|
|
147
150
|
</EXISTING ENTITIES>
|
|
148
151
|
|
|
152
|
+
Each entry in EXISTING ENTITIES is an object with the following structure:
|
|
153
|
+
{{
|
|
154
|
+
idx: integer index of the candidate entity (use this when referencing a duplicate),
|
|
155
|
+
name: "name of the candidate entity",
|
|
156
|
+
entity_types: ["Entity", "<optional additional label>", ...],
|
|
157
|
+
...<additional attributes such as summaries or metadata>
|
|
158
|
+
}}
|
|
159
|
+
|
|
149
160
|
For each of the above ENTITIES, determine if the entity is a duplicate of any of the EXISTING ENTITIES.
|
|
150
161
|
|
|
151
162
|
Entities should only be considered duplicates if they refer to the *same real-world object or concept*.
|
|
@@ -155,14 +166,20 @@ def nodes(context: dict[str, Any]) -> list[Message]:
|
|
|
155
166
|
- They have similar names or purposes but refer to separate instances or concepts.
|
|
156
167
|
|
|
157
168
|
Task:
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
169
|
+
ENTITIES contains {len(context['extracted_nodes'])} entities with IDs 0 through {len(context['extracted_nodes']) - 1}.
|
|
170
|
+
Your response MUST include EXACTLY {len(context['extracted_nodes'])} resolutions with IDs 0 through {len(context['extracted_nodes']) - 1}. Do not skip or add IDs.
|
|
171
|
+
|
|
172
|
+
For every entity, return an object with the following keys:
|
|
173
|
+
{{
|
|
174
|
+
"id": integer id from ENTITIES,
|
|
175
|
+
"name": the best full name for the entity (preserve the original name unless a duplicate has a more complete name),
|
|
176
|
+
"duplicate_idx": the idx of the EXISTING ENTITY that is the best duplicate match, or -1 if there is no duplicate,
|
|
177
|
+
"duplicates": a sorted list of all idx values from EXISTING ENTITIES that refer to duplicates (deduplicate the list, use [] when none or unsure)
|
|
178
|
+
}}
|
|
179
|
+
|
|
180
|
+
- Only use idx values that appear in EXISTING ENTITIES.
|
|
181
|
+
- Set duplicate_idx to the smallest idx you collected for that entity, or -1 if duplicates is empty.
|
|
182
|
+
- Never fabricate entities or indices.
|
|
166
183
|
""",
|
|
167
184
|
),
|
|
168
185
|
]
|
|
@@ -180,7 +197,7 @@ def node_list(context: dict[str, Any]) -> list[Message]:
|
|
|
180
197
|
Given the following context, deduplicate a list of nodes:
|
|
181
198
|
|
|
182
199
|
Nodes:
|
|
183
|
-
{
|
|
200
|
+
{to_prompt_json(context['nodes'])}
|
|
184
201
|
|
|
185
202
|
Task:
|
|
186
203
|
1. Group nodes together such that all duplicate nodes are in the same list of uuids
|
graphiti_core/prompts/eval.py
CHANGED
|
@@ -14,12 +14,12 @@ See the License for the specific language governing permissions and
|
|
|
14
14
|
limitations under the License.
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
|
-
import json
|
|
18
17
|
from typing import Any, Protocol, TypedDict
|
|
19
18
|
|
|
20
19
|
from pydantic import BaseModel, Field
|
|
21
20
|
|
|
22
21
|
from .models import Message, PromptFunction, PromptVersion
|
|
22
|
+
from .prompt_helpers import to_prompt_json
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
class QueryExpansion(BaseModel):
|
|
@@ -68,7 +68,7 @@ def query_expansion(context: dict[str, Any]) -> list[Message]:
|
|
|
68
68
|
Bob is asking Alice a question, are you able to rephrase the question into a simpler one about Alice in the third person
|
|
69
69
|
that maintains the relevant context?
|
|
70
70
|
<QUESTION>
|
|
71
|
-
{
|
|
71
|
+
{to_prompt_json(context['query'])}
|
|
72
72
|
</QUESTION>
|
|
73
73
|
"""
|
|
74
74
|
return [
|
|
@@ -84,10 +84,10 @@ def qa_prompt(context: dict[str, Any]) -> list[Message]:
|
|
|
84
84
|
Your task is to briefly answer the question in the way that you think Alice would answer the question.
|
|
85
85
|
You are given the following entity summaries and facts to help you determine the answer to your question.
|
|
86
86
|
<ENTITY_SUMMARIES>
|
|
87
|
-
{
|
|
87
|
+
{to_prompt_json(context['entity_summaries'])}
|
|
88
88
|
</ENTITY_SUMMARIES>
|
|
89
89
|
<FACTS>
|
|
90
|
-
{
|
|
90
|
+
{to_prompt_json(context['facts'])}
|
|
91
91
|
</FACTS>
|
|
92
92
|
<QUESTION>
|
|
93
93
|
{context['query']}
|
|
@@ -14,19 +14,26 @@ See the License for the specific language governing permissions and
|
|
|
14
14
|
limitations under the License.
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
|
-
import json
|
|
18
17
|
from typing import Any, Protocol, TypedDict
|
|
19
18
|
|
|
20
19
|
from pydantic import BaseModel, Field
|
|
21
20
|
|
|
22
21
|
from .models import Message, PromptFunction, PromptVersion
|
|
22
|
+
from .prompt_helpers import to_prompt_json
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
class Edge(BaseModel):
|
|
26
26
|
relation_type: str = Field(..., description='FACT_PREDICATE_IN_SCREAMING_SNAKE_CASE')
|
|
27
|
-
source_entity_id: int = Field(
|
|
28
|
-
|
|
29
|
-
|
|
27
|
+
source_entity_id: int = Field(
|
|
28
|
+
..., description='The id of the source entity from the ENTITIES list'
|
|
29
|
+
)
|
|
30
|
+
target_entity_id: int = Field(
|
|
31
|
+
..., description='The id of the target entity from the ENTITIES list'
|
|
32
|
+
)
|
|
33
|
+
fact: str = Field(
|
|
34
|
+
...,
|
|
35
|
+
description='A natural language description of the relationship between the entities, paraphrased from the source text',
|
|
36
|
+
)
|
|
30
37
|
valid_at: str | None = Field(
|
|
31
38
|
None,
|
|
32
39
|
description='The date and time when the relationship described by the edge fact became true or was established. Use ISO 8601 format (YYYY-MM-DDTHH:MM:SS.SSSSSSZ)',
|
|
@@ -68,8 +75,12 @@ def edge(context: dict[str, Any]) -> list[Message]:
|
|
|
68
75
|
Message(
|
|
69
76
|
role='user',
|
|
70
77
|
content=f"""
|
|
78
|
+
<FACT TYPES>
|
|
79
|
+
{context['edge_types']}
|
|
80
|
+
</FACT TYPES>
|
|
81
|
+
|
|
71
82
|
<PREVIOUS_MESSAGES>
|
|
72
|
-
{
|
|
83
|
+
{to_prompt_json([ep for ep in context['previous_episodes']])}
|
|
73
84
|
</PREVIOUS_MESSAGES>
|
|
74
85
|
|
|
75
86
|
<CURRENT_MESSAGE>
|
|
@@ -77,23 +88,20 @@ def edge(context: dict[str, Any]) -> list[Message]:
|
|
|
77
88
|
</CURRENT_MESSAGE>
|
|
78
89
|
|
|
79
90
|
<ENTITIES>
|
|
80
|
-
{context['nodes']}
|
|
91
|
+
{to_prompt_json(context['nodes'])}
|
|
81
92
|
</ENTITIES>
|
|
82
93
|
|
|
83
94
|
<REFERENCE_TIME>
|
|
84
95
|
{context['reference_time']} # ISO 8601 (UTC); used to resolve relative time mentions
|
|
85
96
|
</REFERENCE_TIME>
|
|
86
97
|
|
|
87
|
-
<FACT TYPES>
|
|
88
|
-
{context['edge_types']}
|
|
89
|
-
</FACT TYPES>
|
|
90
|
-
|
|
91
98
|
# TASK
|
|
92
99
|
Extract all factual relationships between the given ENTITIES based on the CURRENT MESSAGE.
|
|
93
100
|
Only extract facts that:
|
|
94
101
|
- involve two DISTINCT ENTITIES from the ENTITIES list,
|
|
95
102
|
- are clearly stated or unambiguously implied in the CURRENT MESSAGE,
|
|
96
103
|
and can be represented as edges in a knowledge graph.
|
|
104
|
+
- Facts should include entity names rather than pronouns whenever possible.
|
|
97
105
|
- The FACT TYPES provide a list of the most important types of facts, make sure to extract facts of these types
|
|
98
106
|
- The FACT TYPES are not an exhaustive list, extract all facts from the message even if they do not fit into one
|
|
99
107
|
of the FACT TYPES
|
|
@@ -102,15 +110,16 @@ Only extract facts that:
|
|
|
102
110
|
You may use information from the PREVIOUS MESSAGES only to disambiguate references or support continuity.
|
|
103
111
|
|
|
104
112
|
|
|
105
|
-
{context['
|
|
113
|
+
{context['custom_extraction_instructions']}
|
|
106
114
|
|
|
107
115
|
# EXTRACTION RULES
|
|
108
116
|
|
|
109
|
-
1.
|
|
117
|
+
1. **Entity ID Validation**: `source_entity_id` and `target_entity_id` must use only the `id` values from the ENTITIES list provided above.
|
|
118
|
+
- **CRITICAL**: Using IDs not in the list will cause the edge to be rejected
|
|
110
119
|
2. Each fact must involve two **distinct** entities.
|
|
111
120
|
3. Use a SCREAMING_SNAKE_CASE string as the `relation_type` (e.g., FOUNDED, WORKS_AT).
|
|
112
121
|
4. Do not emit duplicate or semantically redundant facts.
|
|
113
|
-
5. The `
|
|
122
|
+
5. The `fact` should closely paraphrase the original source sentence(s). Do not verbatim quote the original text.
|
|
114
123
|
6. Use `REFERENCE_TIME` to resolve vague or relative temporal expressions (e.g., "last week").
|
|
115
124
|
7. Do **not** hallucinate or infer temporal bounds from unrelated events.
|
|
116
125
|
|
|
@@ -132,7 +141,7 @@ def reflexion(context: dict[str, Any]) -> list[Message]:
|
|
|
132
141
|
|
|
133
142
|
user_prompt = f"""
|
|
134
143
|
<PREVIOUS MESSAGES>
|
|
135
|
-
{
|
|
144
|
+
{to_prompt_json([ep for ep in context['previous_episodes']])}
|
|
136
145
|
</PREVIOUS MESSAGES>
|
|
137
146
|
<CURRENT MESSAGE>
|
|
138
147
|
{context['episode_content']}
|
|
@@ -166,7 +175,7 @@ def extract_attributes(context: dict[str, Any]) -> list[Message]:
|
|
|
166
175
|
content=f"""
|
|
167
176
|
|
|
168
177
|
<MESSAGE>
|
|
169
|
-
{
|
|
178
|
+
{to_prompt_json(context['episode_content'])}
|
|
170
179
|
</MESSAGE>
|
|
171
180
|
<REFERENCE TIME>
|
|
172
181
|
{context['reference_time']}
|